03387abcd5cca234649b9f58796a86992e9b8b00
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / test / java / org / opendaylight / controller / cluster / raft / ReplicationAndSnapshotsWithLaggingFollowerIntegrationTest.java
1 /*
2  * Copyright (c) 2015 Brocade Communications Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.raft;
9
10 import static org.junit.Assert.assertEquals;
11 import akka.persistence.SaveSnapshotSuccess;
12 import com.google.common.collect.ImmutableMap;
13 import java.util.Arrays;
14 import java.util.List;
15 import java.util.Map;
16 import org.junit.Assert;
17 import org.junit.Test;
18 import org.opendaylight.controller.cluster.raft.MockRaftActorContext.MockPayload;
19 import org.opendaylight.controller.cluster.raft.base.messages.ApplyJournalEntries;
20 import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
21 import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
22 import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot;
23 import org.opendaylight.controller.cluster.raft.base.messages.UpdateElectionTerm;
24 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
25 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
26 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
27 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
28 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
29 import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
30 import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
31 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
32
33 /**
34  * Tests replication and snapshots end-to-end using real RaftActors and behavior communication with a
35  * lagging follower.
36  *
37  * @author Thomas Pantelis
38  */
39 public class ReplicationAndSnapshotsWithLaggingFollowerIntegrationTest extends AbstractRaftActorIntegrationTest {
40
41     private void setup() {
42         leaderId = factory.generateActorId("leader");
43         follower1Id = factory.generateActorId("follower");
44         follower2Id = factory.generateActorId("follower");
45
46         // Setup the persistent journal for the leader - just an election term and no journal/snapshots.
47         InMemoryJournal.addEntry(leaderId, 1, new UpdateElectionTerm(initialTerm, leaderId));
48
49         // Create the leader and 2 follower actors.
50         follower1Actor = newTestRaftActor(follower1Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
51                 follower2Id, testActorPath(follower2Id)), newFollowerConfigParams());
52
53         follower2Actor = newTestRaftActor(follower2Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
54                 follower1Id, testActorPath(follower1Id)), newFollowerConfigParams());
55
56         Map<String, String> peerAddresses = ImmutableMap.<String, String>builder().
57                 put(follower1Id, follower1Actor.path().toString()).
58                 put(follower2Id, follower2Actor.path().toString()).build();
59
60         leaderConfigParams = newLeaderConfigParams();
61         leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
62
63         waitUntilLeader(leaderActor);
64
65         leaderContext = leaderActor.underlyingActor().getRaftActorContext();
66         leader = leaderActor.underlyingActor().getCurrentBehavior();
67
68         follower1Context = follower1Actor.underlyingActor().getRaftActorContext();
69         follower1 = follower1Actor.underlyingActor().getCurrentBehavior();
70
71         follower2Context = follower2Actor.underlyingActor().getRaftActorContext();
72         follower2 = follower2Actor.underlyingActor().getCurrentBehavior();
73
74         currentTerm = leaderContext.getTermInformation().getCurrentTerm();
75         assertEquals("Current term > " + initialTerm, true, currentTerm > initialTerm);
76
77         leaderCollectorActor = leaderActor.underlyingActor().collectorActor();
78         follower1CollectorActor = follower1Actor.underlyingActor().collectorActor();
79         follower2CollectorActor = follower2Actor.underlyingActor().collectorActor();
80
81         testLog.info("Leader created and elected");
82     }
83
84     /**
85      * Send 2 payload instances with follower 2 lagging then resume the follower and verifies it gets
86      * caught up via AppendEntries.
87      */
88     @Test
89     public void testReplicationsWithLaggingFollowerCaughtUpViaAppendEntries() throws Exception {
90         testLog.info("testReplicationsWithLaggingFollowerCaughtUpViaAppendEntries starting: sending 2 new payloads");
91
92         setup();
93
94         // Simulate lagging by dropping AppendEntries messages in follower 2.
95         follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
96
97         // Send the payloads.
98         MockPayload payload0 = sendPayloadData(leaderActor, "zero");
99         MockPayload payload1 = sendPayloadData(leaderActor, "one");
100
101         // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
102         List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 2);
103         verifyApplyState(applyStates.get(0), leaderCollectorActor, payload0.toString(), currentTerm, 0, payload0);
104         verifyApplyState(applyStates.get(1), leaderCollectorActor, payload1.toString(), currentTerm, 1, payload1);
105
106         // Verify follower 1 applies each log entry.
107         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 2);
108         verifyApplyState(applyStates.get(0), null, null, currentTerm, 0, payload0);
109         verifyApplyState(applyStates.get(1), null, null, currentTerm, 1, payload1);
110
111         // Ensure there's at least 1 more heartbeat.
112         MessageCollectorActor.clearMessages(leaderCollectorActor);
113         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
114
115         // The leader should not have performed fake snapshots to trim the log because the entries have not
116         // been replicated to follower 2.
117         assertEquals("Leader snapshot term", -1, leaderContext.getReplicatedLog().getSnapshotTerm());
118         assertEquals("Leader snapshot index", -1, leaderContext.getReplicatedLog().getSnapshotIndex());
119         assertEquals("Leader journal log size", 2, leaderContext.getReplicatedLog().size());
120         assertEquals("Leader journal last index", 1, leaderContext.getReplicatedLog().lastIndex());
121         assertEquals("Leader commit index", 1, leaderContext.getCommitIndex());
122         assertEquals("Leader last applied", 1, leaderContext.getLastApplied());
123         assertEquals("Leader replicatedToAllIndex", -1, leader.getReplicatedToAllIndex());
124
125         testLog.info("testReplicationsWithLaggingFollowerCaughtUpViaAppendEntries: new entries applied - resuming follower {}", follower2Id);
126
127         // Now stop dropping AppendEntries in follower 2.
128         follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
129
130         // Verify follower 2 applies each log entry.
131         applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 2);
132         verifyApplyState(applyStates.get(0), null, null, currentTerm, 0, payload0);
133         verifyApplyState(applyStates.get(1), null, null, currentTerm, 1, payload1);
134
135         // Ensure there's at least 1 more heartbeat.
136         MessageCollectorActor.clearMessages(leaderCollectorActor);
137         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
138
139         // The leader should now have performed fake snapshots to trim the log.
140         verifyLeadersTrimmedLog(1);
141
142         // Even though follower 2 lagged behind, the leader should not have tried to install a snapshot
143         // to catch it up because no snapshotting was done so the follower's next index was present in the log.
144         InstallSnapshot installSnapshot = MessageCollectorActor.getFirstMatching(follower2CollectorActor,
145                 InstallSnapshot.class);
146         Assert.assertNull("Follower 2 received unexpected InstallSnapshot", installSnapshot);
147
148         testLog.info("testReplicationsWithLaggingFollowerCaughtUpViaAppendEntries complete");
149     }
150
151     /**
152      * Send payloads to trigger a leader snapshot due to snapshotBatchCount reached with follower 2
153      * lagging but not enough for the leader to trim its log from the last applied index. Follower 2's log
154      * will be behind by several entries and, when it is resumed, it should be caught up via AppendEntries
155      * sent by the leader.
156      *
157      * @throws Exception
158      */
159     @Test
160     public void testLeaderSnapshotWithLaggingFollowerCaughtUpViaAppendEntries() throws Exception {
161         testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaAppendEntries starting");
162
163         setup();
164
165         sendInitialPayloadsReplicatedToAllFollowers("zero", "one");
166
167         // Configure follower 2 to drop messages and lag.
168         follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
169
170         // Send the first payload and verify it gets applied by the leader and follower 1.
171         MockPayload payload2 = sendPayloadData(leaderActor, "two");
172
173         ApplyState applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
174         verifyApplyState(applyState, leaderCollectorActor, payload2.toString(), currentTerm, 2, payload2);
175
176         applyState = MessageCollectorActor.expectFirstMatching(follower1CollectorActor, ApplyState.class);
177         verifyApplyState(applyState, null, null, currentTerm, 2, payload2);
178
179         expSnapshotState.add(payload2);
180
181         MessageCollectorActor.clearMessages(leaderCollectorActor);
182         MessageCollectorActor.clearMessages(follower1CollectorActor);
183
184         // Send another payload - this should cause a snapshot due to snapshotBatchCount reached.
185         MockPayload payload3 = sendPayloadData(leaderActor, "three");
186
187         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
188
189         testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaAppendEntries: sending 2 more payloads");
190
191         // Send 2 more payloads - not enough to trigger another snapshot.
192         MockPayload payload4 = sendPayloadData(leaderActor, "four");
193         MockPayload payload5 = sendPayloadData(leaderActor, "five");
194
195         // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
196         List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 3);
197         verifyApplyState(applyStates.get(0), leaderCollectorActor, payload3.toString(), currentTerm, 3, payload3);
198         verifyApplyState(applyStates.get(1), leaderCollectorActor, payload4.toString(), currentTerm, 4, payload4);
199         verifyApplyState(applyStates.get(2), leaderCollectorActor, payload5.toString(), currentTerm, 5, payload5);
200
201         // Verify follower 1 applies each log entry.
202         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 3);
203         verifyApplyState(applyStates.get(0), null, null, currentTerm, 3, payload3);
204         verifyApplyState(applyStates.get(1), null, null, currentTerm, 4, payload4);
205         verifyApplyState(applyStates.get(2), null, null, currentTerm, 5, payload5);
206
207         // The snapshot should have caused the leader to advanced the snapshot index to the
208         // last previously applied index (1) that was replicated to all followers at the time of capture.
209         // Note: since the log size (3) did not exceed the snapshot batch count (4), the leader should not
210         // have trimmed the log to the last index actually applied (5).
211         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
212         assertEquals("Leader snapshot index", 1, leaderContext.getReplicatedLog().getSnapshotIndex());
213         assertEquals("Leader journal log size", 4, leaderContext.getReplicatedLog().size());
214         assertEquals("Leader journal last index", 5, leaderContext.getReplicatedLog().lastIndex());
215         assertEquals("Leader commit index", 5, leaderContext.getCommitIndex());
216         assertEquals("Leader last applied", 5, leaderContext.getLastApplied());
217         assertEquals("Leader replicatedToAllIndex", 1, leader.getReplicatedToAllIndex());
218
219         // Now stop dropping AppendEntries in follower 2.
220         follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
221
222         // Verify follower 2 applies each log entry. The leader should not install a snapshot b/c
223         // follower 2's next index (3) is still present in the log.
224         applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 4);
225         verifyApplyState(applyStates.get(0), null, null, currentTerm, 2, payload2);
226         verifyApplyState(applyStates.get(1), null, null, currentTerm, 3, payload3);
227         verifyApplyState(applyStates.get(2), null, null, currentTerm, 4, payload4);
228         verifyApplyState(applyStates.get(3), null, null, currentTerm, 5, payload5);
229
230         // Verify the leader did not try to install a snapshot to catch up follower 2.
231         InstallSnapshot installSnapshot = MessageCollectorActor.getFirstMatching(follower2CollectorActor, InstallSnapshot.class);
232         Assert.assertNull("Follower 2 received unexpected InstallSnapshot", installSnapshot);
233
234         // Ensure there's at least 1 more heartbeat.
235         MessageCollectorActor.clearMessages(leaderCollectorActor);
236         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
237
238         // The leader should now have performed fake snapshots to advance the snapshot index and to trim
239         // the log. In addition replicatedToAllIndex should've advanced.
240         verifyLeadersTrimmedLog(5);
241
242         // Verify the leader's persisted snapshot.
243         List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
244         assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
245         verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 2, currentTerm, 3);
246         List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
247         assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
248         verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 3, payload3);
249
250         // Verify follower 1's log and snapshot indexes.
251         MessageCollectorActor.clearMessages(follower1CollectorActor);
252         MessageCollectorActor.expectFirstMatching(follower1CollectorActor, AppendEntries.class);
253         verifyFollowersTrimmedLog(1, follower1Actor, 5);
254
255         // Verify follower 2's log and snapshot indexes.
256         MessageCollectorActor.clearMessages(follower2CollectorActor);
257         MessageCollectorActor.expectFirstMatching(follower2CollectorActor, AppendEntries.class);
258         verifyFollowersTrimmedLog(2, follower2Actor, 5);
259
260         MessageCollectorActor.clearMessages(leaderCollectorActor);
261         MessageCollectorActor.clearMessages(follower1CollectorActor);
262         MessageCollectorActor.clearMessages(follower2CollectorActor);
263
264         expSnapshotState.add(payload3);
265         expSnapshotState.add(payload4);
266         expSnapshotState.add(payload5);
267
268         testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaAppendEntries complete");
269     }
270
271     /**
272      * Send payloads to trigger a leader snapshot due to snapshotBatchCount reached with follower 2
273      * lagging where the leader trims its log from the last applied index. Follower 2's log
274      * will be behind by several entries and, when it is resumed, it should be caught up via a snapshot
275      * installed by the leader.
276      *
277      * @throws Exception
278      */
279     @Test
280     public void testLeaderSnapshotWithLaggingFollowerCaughtUpViaInstallSnapshot() throws Exception {
281         testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaInstallSnapshot starting");
282
283         setup();
284
285         sendInitialPayloadsReplicatedToAllFollowers("zero", "one");
286
287         // Configure follower 2 to drop messages and lag.
288         follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
289
290         // Send 5 payloads - the second should cause a leader snapshot.
291         MockPayload payload2 = sendPayloadData(leaderActor, "two");
292         MockPayload payload3 = sendPayloadData(leaderActor, "three");
293         MockPayload payload4 = sendPayloadData(leaderActor, "four");
294         MockPayload payload5 = sendPayloadData(leaderActor, "five");
295         MockPayload payload6 = sendPayloadData(leaderActor, "six");
296
297         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
298
299         // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
300         List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 5);
301         verifyApplyState(applyStates.get(0), leaderCollectorActor, payload2.toString(), currentTerm, 2, payload2);
302         verifyApplyState(applyStates.get(2), leaderCollectorActor, payload4.toString(), currentTerm, 4, payload4);
303         verifyApplyState(applyStates.get(4), leaderCollectorActor, payload6.toString(), currentTerm, 6, payload6);
304
305         MessageCollectorActor.clearMessages(leaderCollectorActor);
306
307         testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaAppendEntries: sending 1 more payload to trigger second snapshot");
308
309         // Send another payload to trigger a second leader snapshot.
310         MockPayload payload7 = sendPayloadData(leaderActor, "seven");
311
312         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
313
314         ApplyState applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
315         verifyApplyState(applyState, leaderCollectorActor, payload7.toString(), currentTerm, 7, payload7);
316
317         // Verify follower 1 applies each log entry.
318         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 6);
319         verifyApplyState(applyStates.get(0), null, null, currentTerm, 2, payload2);
320         verifyApplyState(applyStates.get(2), null, null, currentTerm, 4, payload4);
321         verifyApplyState(applyStates.get(5), null, null, currentTerm, 7, payload7);
322
323         // The snapshot should have caused the leader to advanced the snapshot index to the leader's last
324         // applied index (6) since the log size should have exceed the snapshot batch count (4).
325         // replicatedToAllIndex should remain at 1 since follower 2 is lagging.
326         verifyLeadersTrimmedLog(7, 1);
327
328         expSnapshotState.add(payload2);
329         expSnapshotState.add(payload3);
330         expSnapshotState.add(payload4);
331         expSnapshotState.add(payload5);
332         expSnapshotState.add(payload6);
333
334         // Verify the leader's persisted snapshot.
335         List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
336         assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
337         verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 6, currentTerm, 7);
338         List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
339         assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
340         verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 7, payload7);
341
342         expSnapshotState.add(payload7);
343
344         verifyInstallSnapshotToLaggingFollower(7);
345
346         testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaInstallSnapshot complete");
347     }
348
349     /**
350      * Send payloads with follower 2 lagging with the last payload having a large enough size to trigger a
351      * leader snapshot such that the leader trims its log from the last applied index.. Follower 2's log will
352      * be behind by several entries and, when it is resumed, it should be caught up via a snapshot installed
353      * by the leader.
354      *
355      * @throws Exception
356      */
357     @Test
358     public void testLeaderSnapshotTriggeredByMemoryThresholdExceededWithLaggingFollower() throws Exception {
359         testLog.info("testLeaderSnapshotTriggeredByMemoryThresholdExceededWithLaggingFollower starting");
360
361         snapshotBatchCount = 5;
362         setup();
363
364         sendInitialPayloadsReplicatedToAllFollowers("zero");
365
366         leaderActor.underlyingActor().setMockTotalMemory(1000);
367
368         // We'll expect a ReplicatedLogImplEntry message and an ApplyJournalEntries message added to the journal.
369         InMemoryJournal.addWriteMessagesCompleteLatch(leaderId, 2);
370
371         follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
372
373         // Send a payload with a large relative size but not enough to trigger a snapshot.
374         MockPayload payload1 = sendPayloadData(leaderActor, "one", 500);
375
376         // Verify the leader got consensus and applies the first log entry even though follower 2 didn't respond.
377         List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 1);
378         verifyApplyState(applyStates.get(0), leaderCollectorActor, payload1.toString(), currentTerm, 1, payload1);
379
380         // Wait for all the ReplicatedLogImplEntry and ApplyJournalEntries messages to be added to the journal
381         // before the snapshot so the snapshot sequence # will be higher to ensure the snapshot gets
382         // purged from the snapshot store after subsequent snapshots.
383         InMemoryJournal.waitForWriteMessagesComplete(leaderId);
384
385         // Verify a snapshot is not triggered.
386         CaptureSnapshot captureSnapshot = MessageCollectorActor.getFirstMatching(leaderCollectorActor, CaptureSnapshot.class);
387         Assert.assertNull("Leader received unexpected CaptureSnapshot", captureSnapshot);
388
389         expSnapshotState.add(payload1);
390
391         // Send another payload with a large enough relative size in combination with the last payload
392         // that exceeds the memory threshold (70% * 1000 = 700) - this should do a snapshot.
393         MockPayload payload2 = sendPayloadData(leaderActor, "two", 201);
394
395         // Verify the leader applies the last log entry.
396         applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 2);
397         verifyApplyState(applyStates.get(1), leaderCollectorActor, payload2.toString(), currentTerm, 2, payload2);
398
399         // Verify follower 1 applies each log entry.
400         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 2);
401         verifyApplyState(applyStates.get(0), null, null, currentTerm, 1, payload1);
402         verifyApplyState(applyStates.get(1), null, null, currentTerm, 2, payload2);
403
404         // A snapshot should've occurred - wait for it to complete.
405         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
406
407         // Because the snapshot was triggered by exceeding the memory threshold the leader should've advanced
408         // the snapshot index to the last applied index and trimmed the log even though the entries weren't
409         // replicated to all followers.
410         verifyLeadersTrimmedLog(2, 0);
411
412         // Verify the leader's persisted snapshot.
413         List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
414         assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
415         verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 1, currentTerm, 2);
416         List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
417         assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
418         verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 2, payload2);
419
420         expSnapshotState.add(payload2);
421
422         verifyInstallSnapshotToLaggingFollower(2L);
423
424         // Sends a payload with index 3.
425         verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot();
426
427         // Sends 3 payloads with indexes 4, 5 and 6.
428         verifyReplicationsAndSnapshotWithNoLaggingAfterInstallSnapshot();
429
430         // Recover the leader from persistence and verify.
431         long leadersLastIndexOnRecovery = 6;
432
433         // The leader's last snapshot was triggered by index 4 so the last applied index in the snapshot was 3.
434         long leadersSnapshotIndexOnRecovery = 3;
435
436         // The recovered journal should have 3 entries starting at index 4.
437         long leadersFirstJournalEntryIndexOnRecovery = 4;
438
439         verifyLeaderRecoveryAfterReinstatement(leadersLastIndexOnRecovery, leadersSnapshotIndexOnRecovery,
440                 leadersFirstJournalEntryIndexOnRecovery);
441
442         testLog.info("testLeaderSnapshotTriggeredByMemoryThresholdExceeded ending");
443     }
444
445     /**
446      * Send another payload to verify another snapshot is not done since the last snapshot trimmed the
447      * first log entry so the memory threshold should not be exceeded.
448      *
449      * @throws Exception
450      */
451     private void verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot() throws Exception {
452         ApplyState applyState;
453         CaptureSnapshot captureSnapshot;
454
455         MockPayload payload3 = sendPayloadData(leaderActor, "three");
456
457         // Verify the leader applies the state.
458         applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
459         verifyApplyState(applyState, leaderCollectorActor, payload3.toString(), currentTerm, 3, payload3);
460
461         captureSnapshot = MessageCollectorActor.getFirstMatching(leaderCollectorActor, CaptureSnapshot.class);
462         Assert.assertNull("Leader received unexpected CaptureSnapshot", captureSnapshot);
463
464         // Verify the follower 1 applies the state.
465         applyState = MessageCollectorActor.expectFirstMatching(follower1CollectorActor, ApplyState.class);
466         verifyApplyState(applyState, null, null, currentTerm, 3, payload3);
467
468         // Verify the follower 2 applies the state.
469         applyState = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplyState.class);
470         verifyApplyState(applyState, null, null, currentTerm, 3, payload3);
471
472         // Verify the leader's state.
473         verifyLeadersTrimmedLog(3);
474
475         // Verify follower 1's state.
476         verifyFollowersTrimmedLog(1, follower1Actor, 3);
477
478         // Verify follower 2's state.
479         verifyFollowersTrimmedLog(2, follower2Actor, 3);
480
481         // Revert back to JVM total memory.
482         leaderActor.underlyingActor().setMockTotalMemory(0);
483
484         MessageCollectorActor.clearMessages(leaderCollectorActor);
485         MessageCollectorActor.clearMessages(follower1CollectorActor);
486         MessageCollectorActor.clearMessages(follower2CollectorActor);
487
488         expSnapshotState.add(payload3);
489     }
490
491     /**
492      * Resume the lagging follower 2 and verify it receives an install snapshot from the leader.
493      *
494      * @throws Exception
495      */
496     private void verifyInstallSnapshotToLaggingFollower(long lastAppliedIndex) throws Exception {
497         List<Snapshot> persistedSnapshots;
498         List<ReplicatedLogEntry> unAppliedEntry;
499         ApplySnapshot applySnapshot;
500         InstallSnapshot installSnapshot;
501
502         testLog.info("testInstallSnapshotToLaggingFollower starting");
503
504         MessageCollectorActor.clearMessages(leaderCollectorActor);
505
506         // Now stop dropping AppendEntries in follower 2.
507         follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
508
509
510         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
511
512         // Verify the leader's persisted snapshot. The previous snapshot (currently) won't be deleted from
513         // the snapshot store because the second snapshot was initiated by the follower install snapshot and
514         // not because the batch count was reached so the persisted journal sequence number wasn't advanced
515         // far enough to cause the previous snapshot to be deleted. This is because
516         // RaftActor#trimPersistentData subtracts the snapshotBatchCount from the snapshot's sequence number.
517         // This is OK - the next snapshot should delete it. In production, even if the system restarted
518         // before another snapshot, they would both get applied which wouldn't hurt anything.
519         persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
520         Assert.assertTrue("Expected at least 1 persisted snapshots", persistedSnapshots.size() > 0);
521         Snapshot persistedSnapshot = persistedSnapshots.get(persistedSnapshots.size() - 1);
522         verifySnapshot("Persisted", persistedSnapshot, currentTerm, lastAppliedIndex, currentTerm, lastAppliedIndex);
523         unAppliedEntry = persistedSnapshot.getUnAppliedEntries();
524         assertEquals("Persisted Snapshot getUnAppliedEntries size", 0, unAppliedEntry.size());
525
526         int snapshotSize = persistedSnapshot.getState().length;
527         int expTotalChunks = (snapshotSize / SNAPSHOT_CHUNK_SIZE) + ((snapshotSize % SNAPSHOT_CHUNK_SIZE) > 0 ? 1 : 0);
528
529         installSnapshot = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, InstallSnapshot.class);
530         assertEquals("InstallSnapshot getTerm", currentTerm, installSnapshot.getTerm());
531         assertEquals("InstallSnapshot getLeaderId", leaderId, installSnapshot.getLeaderId());
532         assertEquals("InstallSnapshot getChunkIndex", 1, installSnapshot.getChunkIndex());
533         assertEquals("InstallSnapshot getTotalChunks", expTotalChunks, installSnapshot.getTotalChunks());
534         assertEquals("InstallSnapshot getLastIncludedTerm", currentTerm, installSnapshot.getLastIncludedTerm());
535         assertEquals("InstallSnapshot getLastIncludedIndex", lastAppliedIndex, installSnapshot.getLastIncludedIndex());
536         //assertArrayEquals("InstallSnapshot getData", snapshot, installSnapshot.getData().toByteArray());
537
538         List<InstallSnapshotReply> installSnapshotReplies = MessageCollectorActor.expectMatching(
539                 leaderCollectorActor, InstallSnapshotReply.class, expTotalChunks);
540         int index = 1;
541         for(InstallSnapshotReply installSnapshotReply: installSnapshotReplies) {
542             assertEquals("InstallSnapshotReply getTerm", currentTerm, installSnapshotReply.getTerm());
543             assertEquals("InstallSnapshotReply getChunkIndex", index++, installSnapshotReply.getChunkIndex());
544             assertEquals("InstallSnapshotReply getFollowerId", follower2Id, installSnapshotReply.getFollowerId());
545             assertEquals("InstallSnapshotReply isSuccess", true, installSnapshotReply.isSuccess());
546         }
547
548         // Verify follower 2 applies the snapshot.
549         applySnapshot = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplySnapshot.class);
550         verifySnapshot("Follower 2", applySnapshot.getSnapshot(), currentTerm, lastAppliedIndex, currentTerm, lastAppliedIndex);
551         assertEquals("Persisted Snapshot getUnAppliedEntries size", 0, applySnapshot.getSnapshot().getUnAppliedEntries().size());
552
553         // Wait for the snapshot to complete.
554         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
555
556         // Ensure there's at least 1 more heartbeat.
557         MessageCollectorActor.clearMessages(leaderCollectorActor);
558         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
559
560         // The leader should now have performed fake snapshots to advance the snapshot index and to trim
561         // the log. In addition replicatedToAllIndex should've advanced.
562         verifyLeadersTrimmedLog(lastAppliedIndex);
563
564         MessageCollectorActor.clearMessages(leaderCollectorActor);
565         MessageCollectorActor.clearMessages(follower1CollectorActor);
566         MessageCollectorActor.clearMessages(follower2CollectorActor);
567
568         testLog.info("testInstallSnapshotToLaggingFollower complete");
569     }
570
571     /**
572      * Do another round of payloads and snapshot to verify replicatedToAllIndex gets back on track and
573      * snapshots works as expected after doing a follower snapshot. In this step we don't lag a follower.
574      *
575      * @throws Exception
576      */
577     private void verifyReplicationsAndSnapshotWithNoLaggingAfterInstallSnapshot() throws Exception {
578         List<ApplyState> applyStates;
579         ApplyState applyState;
580
581         testLog.info("testReplicationsAndSnapshotAfterInstallSnapshot starting: replicatedToAllIndex: {}",
582                 leader.getReplicatedToAllIndex());
583
584         // Send another payload - a snapshot should occur.
585         MockPayload payload4 = sendPayloadData(leaderActor, "four");
586
587         // Wait for the snapshot to complete.
588         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
589
590         applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
591         verifyApplyState(applyState, leaderCollectorActor, payload4.toString(), currentTerm, 4, payload4);
592
593         // Verify the leader's last persisted snapshot (previous ones may not be purged yet).
594         List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
595         Snapshot persistedSnapshot = persistedSnapshots.get(persistedSnapshots.size() - 1);
596         verifySnapshot("Persisted", persistedSnapshot, currentTerm, 3, currentTerm, 4);
597         List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshot.getUnAppliedEntries();
598         assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
599         verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 4, payload4);
600
601         // Send a couple more payloads.
602         MockPayload payload5 = sendPayloadData(leaderActor, "five");
603         MockPayload payload6 = sendPayloadData(leaderActor, "six");
604
605         // Verify the leader applies the 2 log entries.
606         applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 3);
607         verifyApplyState(applyStates.get(1), leaderCollectorActor, payload5.toString(), currentTerm, 5, payload5);
608         verifyApplyState(applyStates.get(2), leaderCollectorActor, payload6.toString(), currentTerm, 6, payload6);
609
610         // Verify the leader applies a log entry for at least the last entry index.
611         verifyApplyJournalEntries(leaderCollectorActor, 6);
612
613         // Ensure there's at least 1 more heartbeat to trim the log.
614         MessageCollectorActor.clearMessages(leaderCollectorActor);
615         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
616
617         // Verify the leader's final state.
618         verifyLeadersTrimmedLog(6);
619
620         InMemoryJournal.dumpJournal(leaderId);
621
622         // Verify the leaders's persisted journal log - it should only contain the last 2 ReplicatedLogEntries
623         // added after the snapshot as the persisted journal should've been purged to the snapshot
624         // sequence number.
625         verifyPersistedJournal(leaderId, Arrays.asList(new ReplicatedLogImplEntry(5, currentTerm, payload5),
626                 new ReplicatedLogImplEntry(6, currentTerm, payload6)));
627
628         // Verify the leaders's persisted journal contains an ApplyJournalEntries for at least the last entry index.
629         List<ApplyJournalEntries> persistedApplyJournalEntries = InMemoryJournal.get(leaderId, ApplyJournalEntries.class);
630         boolean found = false;
631         for(ApplyJournalEntries entry: persistedApplyJournalEntries) {
632             if(entry.getToIndex() == 6) {
633                 found = true;
634                 break;
635             }
636         }
637
638         Assert.assertTrue(String.format("ApplyJournalEntries with index %d not found in leader's persisted journal", 6), found);
639
640         // Verify follower 1 applies the 3 log entries.
641         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 3);
642         verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
643         verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
644         verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
645
646         // Verify follower 1's log state.
647         verifyFollowersTrimmedLog(1, follower1Actor, 6);
648
649         // Verify follower 2 applies the 3 log entries.
650         applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
651         verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
652         verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
653         verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
654
655         // Verify follower 2's log state.
656         verifyFollowersTrimmedLog(2, follower2Actor, 6);
657
658         expSnapshotState.add(payload4);
659         expSnapshotState.add(payload5);
660         expSnapshotState.add(payload6);
661
662         testLog.info("testReplicationsAndSnapshotAfterInstallSnapshot ending");
663     }
664
665     /**
666      * Kill the leader actor, reinstate it and verify the recovered journal.
667      */
668     private void verifyLeaderRecoveryAfterReinstatement(long lastIndex, long snapshotIndex, long firstJournalEntryIndex) {
669         testLog.info("testLeaderReinstatement starting");
670
671         killActor(leaderActor);
672
673         leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
674         TestRaftActor testRaftActor = leaderActor.underlyingActor();
675
676         testRaftActor.startDropMessages(RequestVoteReply.class);
677
678         leaderContext = testRaftActor.getRaftActorContext();
679
680         testRaftActor.waitForRecoveryComplete();
681
682         int logSize = (int) (expSnapshotState.size() - firstJournalEntryIndex);
683         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
684         assertEquals("Leader snapshot index", snapshotIndex, leaderContext.getReplicatedLog().getSnapshotIndex());
685         assertEquals("Leader journal log size", logSize, leaderContext.getReplicatedLog().size());
686         assertEquals("Leader journal last index", lastIndex, leaderContext.getReplicatedLog().lastIndex());
687         assertEquals("Leader commit index", lastIndex, leaderContext.getCommitIndex());
688         assertEquals("Leader last applied", lastIndex, leaderContext.getLastApplied());
689
690         for(long i = firstJournalEntryIndex; i < expSnapshotState.size(); i++) {
691             verifyReplicatedLogEntry(leaderContext.getReplicatedLog().get(i), currentTerm, i,
692                     expSnapshotState.get((int) i));
693         }
694
695         assertEquals("Leader applied state", expSnapshotState, testRaftActor.getState());
696
697         testLog.info("testLeaderReinstatement ending");
698     }
699
700     private void sendInitialPayloadsReplicatedToAllFollowers(String... data) {
701
702         // Send the payloads.
703         for(String d: data) {
704             expSnapshotState.add(sendPayloadData(leaderActor, d));
705         }
706
707         int nEntries = data.length;
708
709         // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
710         List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, nEntries);
711         for(int i = 0; i < expSnapshotState.size(); i++) {
712             MockPayload payload = expSnapshotState.get(i);
713             verifyApplyState(applyStates.get(i), leaderCollectorActor, payload.toString(), currentTerm, i, payload);
714         }
715
716         // Verify follower 1 applies each log entry.
717         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, nEntries);
718         for(int i = 0; i < expSnapshotState.size(); i++) {
719             MockPayload payload = expSnapshotState.get(i);
720             verifyApplyState(applyStates.get(i), null, null, currentTerm, i, payload);
721         }
722
723         // Verify follower 2 applies each log entry.
724         applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, nEntries);
725         for(int i = 0; i < expSnapshotState.size(); i++) {
726             MockPayload payload = expSnapshotState.get(i);
727             verifyApplyState(applyStates.get(i), null, null, currentTerm, i, payload);
728         }
729
730         // Ensure there's at least 1 more heartbeat.
731         MessageCollectorActor.clearMessages(leaderCollectorActor);
732         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
733
734         // The leader should have performed fake snapshots to trim the log to the last index replicated to
735         // all followers.
736         verifyLeadersTrimmedLog(nEntries - 1);
737
738         MessageCollectorActor.clearMessages(leaderCollectorActor);
739         MessageCollectorActor.clearMessages(follower1CollectorActor);
740         MessageCollectorActor.clearMessages(follower2CollectorActor);
741     }
742 }