2 * Copyright (c) 2015 Brocade Communications Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.controller.cluster.raft;
10 import static org.junit.Assert.assertArrayEquals;
11 import static org.junit.Assert.assertEquals;
12 import akka.persistence.SaveSnapshotSuccess;
13 import com.google.common.collect.ImmutableMap;
14 import java.util.Arrays;
15 import java.util.List;
17 import org.junit.Assert;
18 import org.junit.Test;
19 import org.opendaylight.controller.cluster.raft.MockRaftActorContext.MockPayload;
20 import org.opendaylight.controller.cluster.raft.RaftActor.UpdateElectionTerm;
21 import org.opendaylight.controller.cluster.raft.base.messages.ApplyJournalEntries;
22 import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
23 import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
24 import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot;
25 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
26 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
27 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
28 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
29 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
30 import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
31 import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
32 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
35 * Tests replication and snapshots end-to-end using real RaftActors and behavior communication with a
38 * @author Thomas Pantelis
40 public class ReplicationAndSnapshotsWithLaggingFollowerIntegrationTest extends AbstractRaftActorIntegrationTest {
42 private MockPayload payload9;
43 private MockPayload payload11;
44 private MockPayload payload12;
45 private MockPayload payload13;
48 public void runTest() throws Exception {
49 testLog.info("testReplicationAndSnapshotsWithLaggingFollower starting");
51 leaderId = factory.generateActorId("leader");
52 follower1Id = factory.generateActorId("follower");
53 follower2Id = factory.generateActorId("follower");
55 // Setup the persistent journal for the leader - just an election term and no journal/snapshots.
56 InMemoryJournal.addEntry(leaderId, 1, new UpdateElectionTerm(initialTerm, leaderId));
58 // Create the leader and 2 follower actors.
59 follower1Actor = newTestRaftActor(follower1Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
60 follower2Id, testActorPath(follower2Id)), newFollowerConfigParams());
62 follower2Actor = newTestRaftActor(follower2Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
63 follower1Id, testActorPath(follower1Id)), newFollowerConfigParams());
65 Map<String, String> peerAddresses = ImmutableMap.<String, String>builder().
66 put(follower1Id, follower1Actor.path().toString()).
67 put(follower2Id, follower2Actor.path().toString()).build();
69 leaderConfigParams = newLeaderConfigParams();
70 leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
72 waitUntilLeader(leaderActor);
74 leaderContext = leaderActor.underlyingActor().getRaftActorContext();
75 leader = leaderActor.underlyingActor().getCurrentBehavior();
77 follower1Context = follower1Actor.underlyingActor().getRaftActorContext();
78 follower1 = follower1Actor.underlyingActor().getCurrentBehavior();
80 follower2Context = follower2Actor.underlyingActor().getRaftActorContext();
81 follower2 = follower2Actor.underlyingActor().getCurrentBehavior();
83 currentTerm = leaderContext.getTermInformation().getCurrentTerm();
84 assertEquals("Current term > " + initialTerm, true, currentTerm > initialTerm);
86 leaderCollectorActor = leaderActor.underlyingActor().collectorActor();
87 follower1CollectorActor = follower1Actor.underlyingActor().collectorActor();
88 follower2CollectorActor = follower2Actor.underlyingActor().collectorActor();
90 testLog.info("Leader created and elected");
92 testInitialReplications();
94 testSubsequentReplicationsAndSnapshots();
96 testLeaderSnapshotTriggeredByMemoryThresholdExceeded();
98 testInstallSnapshotToLaggingFollower();
100 verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot();
102 testFinalReplicationsAndSnapshot();
104 testLeaderReinstatement();
106 testLog.info("testReplicationAndSnapshotsWithLaggingFollower ending");
110 * Send 3 payload instances with follower 2 temporarily lagging.
114 private void testInitialReplications() throws Exception {
116 testLog.info("testInitialReplications starting: sending 2 new payloads");
118 // Simulate lagging by dropping AppendEntries messages in follower 2.
119 follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
121 // Send the payloads.
122 MockPayload payload0 = sendPayloadData(leaderActor, "zero");
123 MockPayload payload1 = sendPayloadData(leaderActor, "one");
124 MockPayload payload2 = sendPayloadData(leaderActor, "two");
126 // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
127 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 3);
128 verifyApplyState(applyStates.get(0), leaderCollectorActor, payload0.toString(), currentTerm, 0, payload0);
129 verifyApplyState(applyStates.get(1), leaderCollectorActor, payload1.toString(), currentTerm, 1, payload1);
130 verifyApplyState(applyStates.get(2), leaderCollectorActor, payload2.toString(), currentTerm, 2, payload2);
132 // Verify follower 1 applies each log entry.
133 applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 3);
134 verifyApplyState(applyStates.get(0), null, null, currentTerm, 0, payload0);
135 verifyApplyState(applyStates.get(1), null, null, currentTerm, 1, payload1);
136 verifyApplyState(applyStates.get(2), null, null, currentTerm, 2, payload2);
138 // Ensure there's at least 1 more heartbeat.
139 MessageCollectorActor.clearMessages(leaderCollectorActor);
140 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
142 // The leader should not have performed fake snapshots to trim the log because the entries have not
143 // been replicated to follower 2.
144 assertEquals("Leader snapshot term", -1, leaderContext.getReplicatedLog().getSnapshotTerm());
145 assertEquals("Leader snapshot index", -1, leaderContext.getReplicatedLog().getSnapshotIndex());
146 assertEquals("Leader journal log size", 3, leaderContext.getReplicatedLog().size());
147 assertEquals("Leader journal last index", 2, leaderContext.getReplicatedLog().lastIndex());
148 assertEquals("Leader commit index", 2, leaderContext.getCommitIndex());
149 assertEquals("Leader last applied", 2, leaderContext.getLastApplied());
150 assertEquals("Leader replicatedToAllIndex", -1, leader.getReplicatedToAllIndex());
152 testLog.info("Step 3: new entries applied - re-enabling follower {}", follower2Id);
154 // Now stop dropping AppendEntries in follower 2.
155 follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
157 // Verify follower 2 applies each log entry.
158 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
159 verifyApplyState(applyStates.get(0), null, null, currentTerm, 0, payload0);
160 verifyApplyState(applyStates.get(1), null, null, currentTerm, 1, payload1);
161 verifyApplyState(applyStates.get(2), null, null, currentTerm, 2, payload2);
163 // Ensure there's at least 1 more heartbeat.
164 MessageCollectorActor.clearMessages(leaderCollectorActor);
165 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
167 // The leader should now have performed fake snapshots to trim the log.
168 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
169 assertEquals("Leader snapshot index", 1, leaderContext.getReplicatedLog().getSnapshotIndex());
170 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
171 assertEquals("Leader journal last index", 2, leaderContext.getReplicatedLog().lastIndex());
172 assertEquals("Leader commit index", 2, leaderContext.getCommitIndex());
173 assertEquals("Leader last applied", 2, leaderContext.getLastApplied());
174 // Note - replicatedToAllIndex always lags 1 behind last applied since it trims the log up to the
175 // last applied index. The next entry successfully replicated to followers woild advance it.
176 assertEquals("Leader replicatedToAllIndex", 1, leader.getReplicatedToAllIndex());
178 // Even though follower 2 lagged behind, the leader should not have tried to install a snapshot
179 // to catch it up because no snapshotting was done so the follower's next index was present in the log.
180 InstallSnapshot installSnapshot = MessageCollectorActor.getFirstMatching(follower2CollectorActor,
181 InstallSnapshot.class);
182 Assert.assertNull("Follower 2 received unexpected InstallSnapshot", installSnapshot);
184 MessageCollectorActor.clearMessages(leaderCollectorActor);
185 MessageCollectorActor.clearMessages(follower1CollectorActor);
186 MessageCollectorActor.clearMessages(follower2CollectorActor);
188 testLog.info("testInitialReplications complete");
192 * Send 5 more payloads with follower 2 lagging. Since the snapshotBatch count is 4, this should cause
193 * 2 leader snapshots and follower 2's log will be behind by 5 entries.
197 private void testSubsequentReplicationsAndSnapshots() throws Exception {
198 testLog.info("testSubsequentReplicationsAndSnapshots starting: sending first payload, replicatedToAllIndex: {}",
199 leader.getReplicatedToAllIndex());
201 leaderActor.underlyingActor().setSnapshot(new byte[] {2});
203 follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
205 // Send the first payload - this should cause the first snapshot.
206 MockPayload payload3 = sendPayloadData(leaderActor, "three");
208 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
210 byte[] snapshot = new byte[] {6};
211 leaderActor.underlyingActor().setSnapshot(snapshot);
213 testLog.info("testSubsequentReplicationsAndSnapshots: sending 4 more payloads");
215 // Send the next 4. The last one should cause the second snapshot.
216 MockPayload payload4 = sendPayloadData(leaderActor, "four");
217 MockPayload payload5 = sendPayloadData(leaderActor, "five");
218 MockPayload payload6 = sendPayloadData(leaderActor, "six");
219 MockPayload payload7 = sendPayloadData(leaderActor, "seven");
221 // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
222 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 5);
223 verifyApplyState(applyStates.get(0), leaderCollectorActor, payload3.toString(), currentTerm, 3, payload3);
224 verifyApplyState(applyStates.get(1), leaderCollectorActor, payload4.toString(), currentTerm, 4, payload4);
225 verifyApplyState(applyStates.get(2), leaderCollectorActor, payload5.toString(), currentTerm, 5, payload5);
226 verifyApplyState(applyStates.get(3), leaderCollectorActor, payload6.toString(), currentTerm, 6, payload6);
227 verifyApplyState(applyStates.get(4), leaderCollectorActor, payload7.toString(), currentTerm, 7, payload7);
229 // Verify follower 1 applies each log entry.
230 applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 5);
231 verifyApplyState(applyStates.get(0), null, null, currentTerm, 3, payload3);
232 verifyApplyState(applyStates.get(1), null, null, currentTerm, 4, payload4);
233 verifyApplyState(applyStates.get(2), null, null, currentTerm, 5, payload5);
234 verifyApplyState(applyStates.get(3), null, null, currentTerm, 6, payload6);
235 verifyApplyState(applyStates.get(4), null, null, currentTerm, 7, payload7);
237 // Wait for snapshot completion.
238 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
240 // The first new entry applied should have caused the leader to advanced the snapshot index to the
241 // last previously applied index (2) that was replicated to all followers.
242 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
243 assertEquals("Leader snapshot index", 2, leaderContext.getReplicatedLog().getSnapshotIndex());
244 assertEquals("Leader journal log size", 5, leaderContext.getReplicatedLog().size());
245 assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
246 assertEquals("Leader commit index", 7, leaderContext.getCommitIndex());
247 assertEquals("Leader last applied", 7, leaderContext.getLastApplied());
248 assertEquals("Leader replicatedToAllIndex", 2, leader.getReplicatedToAllIndex());
250 // Now stop dropping AppendEntries in follower 2.
251 follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
253 // Verify follower 2 applies each log entry.
254 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 5);
255 verifyApplyState(applyStates.get(0), null, null, currentTerm, 3, payload3);
256 verifyApplyState(applyStates.get(1), null, null, currentTerm, 4, payload4);
257 verifyApplyState(applyStates.get(2), null, null, currentTerm, 5, payload5);
258 verifyApplyState(applyStates.get(3), null, null, currentTerm, 6, payload6);
259 verifyApplyState(applyStates.get(4), null, null, currentTerm, 7, payload7);
261 // Ensure there's at least 1 more heartbeat.
262 MessageCollectorActor.clearMessages(leaderCollectorActor);
263 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
265 // The leader should now have performed fake snapshots to advance the snapshot index and to trim
266 // the log. In addition replicatedToAllIndex should've advanced.
267 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
268 assertEquals("Leader snapshot index", 6, leaderContext.getReplicatedLog().getSnapshotIndex());
269 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
270 assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
271 assertEquals("Leader replicatedToAllIndex", 6, leader.getReplicatedToAllIndex());
273 // Verify the leader's persisted snapshot.
274 List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
275 assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
276 verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 3, currentTerm, 7, snapshot);
277 List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
278 assertEquals("Persisted Snapshot getUnAppliedEntries size", 4, unAppliedEntry.size());
279 verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 4, payload4);
280 verifyReplicatedLogEntry(unAppliedEntry.get(1), currentTerm, 5, payload5);
281 verifyReplicatedLogEntry(unAppliedEntry.get(2), currentTerm, 6, payload6);
282 verifyReplicatedLogEntry(unAppliedEntry.get(3), currentTerm, 7, payload7);
284 // Even though follower 2's log was behind by 5 entries and 2 snapshots were done, the leader
285 // should not have tried to install a snapshot to catch it up because replicatedToAllIndex was also
286 // behind. Instead of installing a snapshot the leader would've sent AppendEntries with the log entries.
287 InstallSnapshot installSnapshot = MessageCollectorActor.getFirstMatching(follower2CollectorActor, InstallSnapshot.class);
288 Assert.assertNull("Follower 2 received unexpected InstallSnapshot", installSnapshot);
290 // Verify follower 1's log and snapshot indexes.
291 MessageCollectorActor.clearMessages(follower1CollectorActor);
292 MessageCollectorActor.expectFirstMatching(follower1CollectorActor, AppendEntries.class);
293 assertEquals("Follower 1 snapshot term", currentTerm, follower1Context.getReplicatedLog().getSnapshotTerm());
294 assertEquals("Follower 1 snapshot index", 6, follower1Context.getReplicatedLog().getSnapshotIndex());
295 assertEquals("Follower 1 journal log size", 1, follower1Context.getReplicatedLog().size());
296 assertEquals("Follower 1 journal last index", 7, follower1Context.getReplicatedLog().lastIndex());
297 assertEquals("Follower 1 commit index", 7, follower1Context.getCommitIndex());
298 assertEquals("Follower 1 last applied", 7, follower1Context.getLastApplied());
299 assertEquals("Follower 1 replicatedToAllIndex", 6, follower1.getReplicatedToAllIndex());
301 // Verify follower 2's log and snapshot indexes.
302 MessageCollectorActor.clearMessages(follower2CollectorActor);
303 MessageCollectorActor.expectFirstMatching(follower2CollectorActor, AppendEntries.class);
304 assertEquals("Follower 2 snapshot term", currentTerm, follower2Context.getReplicatedLog().getSnapshotTerm());
305 assertEquals("Follower 2 snapshot index", 6, follower2Context.getReplicatedLog().getSnapshotIndex());
306 assertEquals("Follower 2 journal log size", 1, follower2Context.getReplicatedLog().size());
307 assertEquals("Follower 2 journal last index", 7, follower2Context.getReplicatedLog().lastIndex());
308 assertEquals("Follower 2 commit index", 7, follower2Context.getCommitIndex());
309 assertEquals("Follower 2 last applied", 7, follower2Context.getLastApplied());
310 assertEquals("Follower 2 replicatedToAllIndex", 6, follower2.getReplicatedToAllIndex());
312 MessageCollectorActor.clearMessages(leaderCollectorActor);
313 MessageCollectorActor.clearMessages(follower1CollectorActor);
314 MessageCollectorActor.clearMessages(follower2CollectorActor);
316 testLog.info("testSubsequentReplicationsAndSnapshots complete");
320 * Send a couple more payloads with follower 2 lagging. The last payload will have a large enough size
321 * to trigger a leader snapshot.
325 private void testLeaderSnapshotTriggeredByMemoryThresholdExceeded() throws Exception {
326 testLog.info("testLeaderSnapshotTriggeredByMemoryThresholdExceeded starting: sending 3 payloads, replicatedToAllIndex: {}",
327 leader.getReplicatedToAllIndex());
329 leaderActor.underlyingActor().setMockTotalMemory(1000);
330 byte[] snapshot = new byte[] {6};
331 leaderActor.underlyingActor().setSnapshot(snapshot);
333 // We'll expect a ReplicatedLogImplEntry message and an ApplyJournalEntries message added to the journal.
334 InMemoryJournal.addWriteMessagesCompleteLatch(leaderId, 2);
336 follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
338 // Send a payload with a large relative size but not enough to trigger a snapshot.
339 MockPayload payload8 = sendPayloadData(leaderActor, "eight", 500);
341 // Verify the leader got consensus and applies the first log entry even though follower 2 didn't respond.
342 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 1);
343 verifyApplyState(applyStates.get(0), leaderCollectorActor, payload8.toString(), currentTerm, 8, payload8);
345 // Wait for all the ReplicatedLogImplEntry and ApplyJournalEntries messages to be added to the journal
346 // before the snapshot so the snapshot sequence # will be higher to ensure the snapshot gets
347 // purged from the snapshot store after subsequent snapshots.
348 InMemoryJournal.waitForWriteMessagesComplete(leaderId);
350 // Verify a snapshot is not triggered.
351 CaptureSnapshot captureSnapshot = MessageCollectorActor.getFirstMatching(leaderCollectorActor, CaptureSnapshot.class);
352 Assert.assertNull("Leader received unexpected CaptureSnapshot", captureSnapshot);
354 // Send another payload with a large enough relative size in combination with the last payload
355 // that exceeds the memory threshold (70% * 1000 = 700) - this should do a snapshot.
356 payload9 = sendPayloadData(leaderActor, "nine", 201);
358 // Verify the leader applies the last log entry.
359 applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 2);
360 verifyApplyState(applyStates.get(1), leaderCollectorActor, payload9.toString(), currentTerm, 9, payload9);
362 // Verify follower 1 applies each log entry.
363 applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 2);
364 verifyApplyState(applyStates.get(0), null, null, currentTerm, 8, payload8);
365 verifyApplyState(applyStates.get(1), null, null, currentTerm, 9, payload9);
367 // A snapshot should've occurred - wait for it to complete.
368 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
370 // Because the snapshot was triggered by exceeding the memory threshold the leader should've advanced
371 // the snapshot index to the last applied index and trimmed the log even though the entries weren't
372 // replicated to all followers.
373 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
374 assertEquals("Leader snapshot index", 8, leaderContext.getReplicatedLog().getSnapshotIndex());
375 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
376 assertEquals("Leader journal last index", 9, leaderContext.getReplicatedLog().lastIndex());
377 assertEquals("Leader commit index", 9, leaderContext.getCommitIndex());
378 assertEquals("Leader last applied", 9, leaderContext.getLastApplied());
379 // Note: replicatedToAllIndex should not be advanced since log entries 8 and 9 haven't yet been
380 // replicated to follower 2.
381 assertEquals("Leader replicatedToAllIndex", 7, leader.getReplicatedToAllIndex());
383 // Verify the leader's persisted snapshot.
384 List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
385 assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
386 verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 8, currentTerm, 9, snapshot);
387 List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
388 assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
389 verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 9, payload9);
391 testLog.info("testLeaderSnapshotTriggeredByMemoryThresholdExceeded ending");
395 * Send another payload to verify another snapshot is not done since the last snapshot trimmed the
396 * first log entry so the memory threshold should not be exceeded.
400 private void verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot() throws Exception {
401 ApplyState applyState;
402 CaptureSnapshot captureSnapshot;
404 MockPayload payload10 = sendPayloadData(leaderActor, "ten");
406 // Verify the leader applies the state.
407 applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
408 verifyApplyState(applyState, leaderCollectorActor, payload10.toString(), currentTerm, 10, payload10);
410 captureSnapshot = MessageCollectorActor.getFirstMatching(leaderCollectorActor, CaptureSnapshot.class);
411 Assert.assertNull("Leader received unexpected CaptureSnapshot", captureSnapshot);
413 // Verify the follower 1 applies the state.
414 applyState = MessageCollectorActor.expectFirstMatching(follower1CollectorActor, ApplyState.class);
415 verifyApplyState(applyState, null, null, currentTerm, 10, payload10);
417 // Verify the follower 2 applies the state.
418 applyState = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplyState.class);
419 verifyApplyState(applyState, null, null, currentTerm, 10, payload10);
421 // Verify the leader's state.
422 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
423 assertEquals("Leader snapshot index", 9, leaderContext.getReplicatedLog().getSnapshotIndex());
424 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
425 assertEquals("Leader journal last index", 10, leaderContext.getReplicatedLog().lastIndex());
426 assertEquals("Leader commit index", 10, leaderContext.getCommitIndex());
427 assertEquals("Leader last applied", 10, leaderContext.getLastApplied());
428 assertEquals("Leader replicatedToAllIndex", 9, leader.getReplicatedToAllIndex());
430 // Verify follower 1's state.
431 assertEquals("Follower 1 snapshot term", currentTerm, follower1Context.getReplicatedLog().getSnapshotTerm());
432 assertEquals("Follower 1 snapshot index", 9, follower1Context.getReplicatedLog().getSnapshotIndex());
433 assertEquals("Follower 1 journal log size", 1, follower1Context.getReplicatedLog().size());
434 assertEquals("Follower 1 journal last index", 10, follower1Context.getReplicatedLog().lastIndex());
435 assertEquals("Follower 1 commit index", 10, follower1Context.getCommitIndex());
436 assertEquals("Follower 1 last applied", 10, follower1Context.getLastApplied());
437 assertEquals("Follower 1 replicatedToAllIndex", 9, follower1.getReplicatedToAllIndex());
439 // Verify follower 2's state.
440 assertEquals("Follower 2 snapshot term", currentTerm, follower2Context.getReplicatedLog().getSnapshotTerm());
441 assertEquals("Follower 2 snapshot index", 9, follower2Context.getReplicatedLog().getSnapshotIndex());
442 assertEquals("Follower 2 journal log size", 1, follower2Context.getReplicatedLog().size());
443 assertEquals("Follower 2 journal last index", 10, follower2Context.getReplicatedLog().lastIndex());
444 assertEquals("Follower 2 commit index", 10, follower2Context.getCommitIndex());
445 assertEquals("Follower 2 last applied", 10, follower2Context.getLastApplied());
446 assertEquals("Follower 2 replicatedToAllIndex", 9, follower2.getReplicatedToAllIndex());
448 // Revert back to JVM total memory.
449 leaderActor.underlyingActor().setMockTotalMemory(0);
451 MessageCollectorActor.clearMessages(leaderCollectorActor);
452 MessageCollectorActor.clearMessages(follower1CollectorActor);
453 MessageCollectorActor.clearMessages(follower2CollectorActor);
457 * Following a snapshot due memory threshold exceeded, resume the lagging follower and verify it receives
458 * an install snapshot from the leader.
462 private void testInstallSnapshotToLaggingFollower() throws Exception {
463 List<Snapshot> persistedSnapshots;
464 List<ReplicatedLogEntry> unAppliedEntry;
465 ApplyState applyState;
466 ApplySnapshot applySnapshot;
467 InstallSnapshot installSnapshot;
468 InstallSnapshotReply installSnapshotReply;
470 byte[] snapshot = new byte[] {10};
471 leaderActor.underlyingActor().setSnapshot(snapshot);
473 // Now stop dropping AppendEntries in follower 2.
474 follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
476 installSnapshot = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, InstallSnapshot.class);
477 assertEquals("InstallSnapshot getTerm", currentTerm, installSnapshot.getTerm());
478 assertEquals("InstallSnapshot getLeaderId", leaderId, installSnapshot.getLeaderId());
479 assertEquals("InstallSnapshot getChunkIndex", 1, installSnapshot.getChunkIndex());
480 assertEquals("InstallSnapshot getTotalChunks", 1, installSnapshot.getTotalChunks());
481 assertEquals("InstallSnapshot getLastIncludedTerm", currentTerm, installSnapshot.getLastIncludedTerm());
482 assertEquals("InstallSnapshot getLastIncludedIndex", 8, installSnapshot.getLastIncludedIndex());
483 assertArrayEquals("InstallSnapshot getData", snapshot, installSnapshot.getData().toByteArray());
485 installSnapshotReply = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, InstallSnapshotReply.class);
486 assertEquals("InstallSnapshotReply getTerm", currentTerm, installSnapshotReply.getTerm());
487 assertEquals("InstallSnapshotReply getChunkIndex", 1, installSnapshotReply.getChunkIndex());
488 assertEquals("InstallSnapshotReply getFollowerId", follower2Id, installSnapshotReply.getFollowerId());
489 assertEquals("InstallSnapshotReply isSuccess", true, installSnapshotReply.isSuccess());
491 // Verify follower 2 applies the snapshot.
492 applySnapshot = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplySnapshot.class);
493 verifySnapshot("Follower 2", applySnapshot.getSnapshot(), currentTerm, 8, currentTerm, 8, snapshot);
494 assertEquals("Persisted Snapshot getUnAppliedEntries size", 0, applySnapshot.getSnapshot().getUnAppliedEntries().size());
496 // Verify follower 2 only applies the second log entry (9) as the first one (8) was in the snapshot.
497 applyState = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplyState.class);
498 verifyApplyState(applyState, null, null, currentTerm, 9, payload9);
500 // Wait for the snapshot to complete.
501 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
503 // Ensure there's at least 1 more heartbeat.
504 MessageCollectorActor.clearMessages(leaderCollectorActor);
505 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
507 // The leader should now have performed fake snapshots to advance the snapshot index and to trim
508 // the log. In addition replicatedToAllIndex should've advanced.
509 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
510 assertEquals("Leader snapshot index", 8, leaderContext.getReplicatedLog().getSnapshotIndex());
511 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
512 assertEquals("Leader commit index", 9, leaderContext.getCommitIndex());
513 assertEquals("Leader last applied", 9, leaderContext.getLastApplied());
514 assertEquals("Leader replicatedToAllIndex", 8, leader.getReplicatedToAllIndex());
516 // Verify the leader's persisted snapshot. The previous snapshot (currently) won't be deleted from
517 // the snapshot store because the second snapshot was initiated by the follower install snapshot and
518 // not because the batch count was reached so the persisted journal sequence number wasn't advanced
519 // far enough to cause the previous snapshot to be deleted. This is because
520 // RaftActor#trimPersistentData subtracts the snapshotBatchCount from the snapshot's sequence number.
521 // This is OK - the next snapshot should delete it. In production, even if the system restarted
522 // before another snapshot, they would both get applied which wouldn't hurt anything.
523 persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
524 Assert.assertTrue("Expected at least 1 persisted snapshots", persistedSnapshots.size() > 0);
525 Snapshot persistedSnapshot = persistedSnapshots.get(persistedSnapshots.size() - 1);
526 verifySnapshot("Persisted", persistedSnapshot, currentTerm, 9, currentTerm, 9, snapshot);
527 unAppliedEntry = persistedSnapshot.getUnAppliedEntries();
528 assertEquals("Persisted Snapshot getUnAppliedEntries size", 0, unAppliedEntry.size());
530 MessageCollectorActor.clearMessages(leaderCollectorActor);
531 MessageCollectorActor.clearMessages(follower1CollectorActor);
532 MessageCollectorActor.clearMessages(follower2CollectorActor);
536 * Do another round of payloads and snapshot to verify replicatedToAllIndex gets back on track and
537 * snapshots works as expected after doing a follower snapshot. In this step we don't lag a follower.
539 private void testFinalReplicationsAndSnapshot() {
540 List<ApplyState> applyStates;
541 ApplyState applyState;
543 testLog.info("testFinalReplicationsAndSnapshot starting: replicatedToAllIndex: {}", leader.getReplicatedToAllIndex());
545 byte[] snapshot = new byte[] {14};
546 leaderActor.underlyingActor().setSnapshot(snapshot);
548 // Send another payload - a snapshot should occur.
549 payload11 = sendPayloadData(leaderActor, "eleven");
551 // Wait for the snapshot to complete.
552 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
554 applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
555 verifyApplyState(applyState, leaderCollectorActor, payload11.toString(), currentTerm, 11, payload11);
557 // Verify the leader's last persisted snapshot (previous ones may not be purged yet).
558 List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
559 Snapshot persistedSnapshot = persistedSnapshots.get(persistedSnapshots.size() - 1);
560 verifySnapshot("Persisted", persistedSnapshot, currentTerm, 10, currentTerm, 11, snapshot);
561 List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshot.getUnAppliedEntries();
562 assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
563 verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 11, payload11);
565 // Send a couple more payloads.
566 payload12 = sendPayloadData(leaderActor, "twelve");
567 payload13 = sendPayloadData(leaderActor, "thirteen");
569 // Verify the leader applies the 2 log entries.
570 applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 3);
571 verifyApplyState(applyStates.get(1), leaderCollectorActor, payload12.toString(), currentTerm, 12, payload12);
572 verifyApplyState(applyStates.get(2), leaderCollectorActor, payload13.toString(), currentTerm, 13, payload13);
574 // Verify the leader applies a log entry for at least the last entry index.
575 verifyApplyJournalEntries(leaderCollectorActor, 13);
577 // Ensure there's at least 1 more heartbeat to trim the log.
578 MessageCollectorActor.clearMessages(leaderCollectorActor);
579 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
581 // Verify the leader's final snapshot index et al.
582 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
583 assertEquals("Leader snapshot index", 12, leaderContext.getReplicatedLog().getSnapshotIndex());
584 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
585 assertEquals("Leader journal last index", 13, leaderContext.getReplicatedLog().lastIndex());
586 assertEquals("Leader commit index", 13, leaderContext.getCommitIndex());
587 assertEquals("Leader last applied", 13, leaderContext.getLastApplied());
588 assertEquals("Leader replicatedToAllIndex", 12, leader.getReplicatedToAllIndex());
590 InMemoryJournal.dumpJournal(leaderId);
592 // Verify the leaders's persisted journal log - should only contain the last 2 ReplicatedLogEntries
593 // added after the snapshot as the persisted journal should've been purged to the snapshot
595 verifyPersistedJournal(leaderId, Arrays.asList(new ReplicatedLogImplEntry(12, currentTerm, payload12),
596 new ReplicatedLogImplEntry(13, currentTerm, payload13)));
598 // Verify the leaders's persisted journal contains an ApplyJournalEntries for at least the last entry index.
599 List<ApplyJournalEntries> persistedApplyJournalEntries = InMemoryJournal.get(leaderId, ApplyJournalEntries.class);
600 boolean found = false;
601 for(ApplyJournalEntries entry: persistedApplyJournalEntries) {
602 if(entry.getToIndex() == 13) {
608 Assert.assertTrue(String.format("ApplyJournalEntries with index %d not found in leader's persisted journal", 13), found);
610 // Verify follower 1 applies the 2 log entries.
611 applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 3);
612 verifyApplyState(applyStates.get(0), null, null, currentTerm, 11, payload11);
613 verifyApplyState(applyStates.get(1), null, null, currentTerm, 12, payload12);
614 verifyApplyState(applyStates.get(2), null, null, currentTerm, 13, payload13);
616 // Verify follower 1's log state.
617 assertEquals("Follower 1 snapshot term", currentTerm, follower1Context.getReplicatedLog().getSnapshotTerm());
618 assertEquals("Follower 1 snapshot index", 12, follower1Context.getReplicatedLog().getSnapshotIndex());
619 assertEquals("Follower 1 journal log size", 1, follower1Context.getReplicatedLog().size());
620 assertEquals("Follower 1 journal last index", 13, follower1Context.getReplicatedLog().lastIndex());
621 assertEquals("Follower 1 commit index", 13, follower1Context.getCommitIndex());
622 assertEquals("Follower 1 last applied", 13, follower1Context.getLastApplied());
623 assertEquals("Follower 1 replicatedToAllIndex", 12, follower1.getReplicatedToAllIndex());
625 // Verify follower 2 applies the 2 log entries.
626 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
627 verifyApplyState(applyStates.get(0), null, null, currentTerm, 11, payload11);
628 verifyApplyState(applyStates.get(1), null, null, currentTerm, 12, payload12);
629 verifyApplyState(applyStates.get(2), null, null, currentTerm, 13, payload13);
631 // Verify follower 2's log state.
632 assertEquals("Follower 2 snapshot term", currentTerm, follower2Context.getReplicatedLog().getSnapshotTerm());
633 assertEquals("Follower 2 snapshot index", 12, follower2Context.getReplicatedLog().getSnapshotIndex());
634 assertEquals("Follower 2 journal log size", 1, follower2Context.getReplicatedLog().size());
635 assertEquals("Follower 2 journal last index", 13, follower2Context.getReplicatedLog().lastIndex());
636 assertEquals("Follower 2 commit index", 13, follower2Context.getCommitIndex());
637 assertEquals("Follower 2 last applied", 13, follower2Context.getLastApplied());
638 assertEquals("Follower 2 replicatedToAllIndex", 12, follower2.getReplicatedToAllIndex());
640 testLog.info("testFinalReplicationsAndSnapshot ending");
644 * Kill the leader actor, reinstate it and verify the recovered journal.
646 private void testLeaderReinstatement() {
647 testLog.info("testLeaderReinstatement starting");
649 killActor(leaderActor);
651 leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
653 leaderActor.underlyingActor().startDropMessages(RequestVoteReply.class);
655 leaderContext = leaderActor.underlyingActor().getRaftActorContext();
657 leaderActor.underlyingActor().waitForRecoveryComplete();
659 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
660 assertEquals("Leader snapshot index", 10, leaderContext.getReplicatedLog().getSnapshotIndex());
661 assertEquals("Leader journal log size", 3, leaderContext.getReplicatedLog().size());
662 assertEquals("Leader journal last index", 13, leaderContext.getReplicatedLog().lastIndex());
663 assertEquals("Leader commit index", 13, leaderContext.getCommitIndex());
664 assertEquals("Leader last applied", 13, leaderContext.getLastApplied());
665 verifyReplicatedLogEntry(leaderContext.getReplicatedLog().get(11), currentTerm, 11, payload11);
666 verifyReplicatedLogEntry(leaderContext.getReplicatedLog().get(12), currentTerm, 12, payload12);
667 verifyReplicatedLogEntry(leaderContext.getReplicatedLog().get(13), currentTerm, 13, payload13);
669 testLog.info("testLeaderReinstatement ending");