2 * Copyright (c) 2015 Brocade Communications Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.controller.cluster.raft;
10 import static org.junit.Assert.assertEquals;
11 import akka.persistence.SaveSnapshotSuccess;
12 import com.google.common.collect.ImmutableMap;
13 import java.util.List;
14 import org.junit.Test;
15 import org.opendaylight.controller.cluster.raft.MockRaftActorContext.MockPayload;
16 import org.opendaylight.controller.cluster.raft.RaftActor.UpdateElectionTerm;
17 import org.opendaylight.controller.cluster.raft.base.messages.ApplyLogEntries;
18 import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
19 import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot;
20 import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshotReply;
21 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
22 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
23 import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
24 import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
25 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
28 * Tests replication and snapshots end-to-end using real RaftActors and behavior communication.
30 * @author Thomas Pantelis
32 public class ReplicationAndSnapshotsIntegrationTest extends AbstractRaftActorIntegrationTest {
34 private List<ReplicatedLogImplEntry> origLeaderJournal;
36 private MockPayload recoveredPayload0;
37 private MockPayload recoveredPayload1;
38 private MockPayload recoveredPayload2;
39 private MockPayload payload4;
40 private MockPayload payload5;
41 private MockPayload payload6;
42 private MockPayload payload7;
45 public void runTest() {
46 testLog.info("testReplicationAndSnapshots starting");
48 // Setup the persistent journal for the leader. We'll start up with 3 journal log entries (one less
49 // than the snapshotBatchCount).
51 InMemoryJournal.addEntry(leaderId, seqId++, new UpdateElectionTerm(initialTerm, leaderId));
52 recoveredPayload0 = new MockPayload("zero");
53 InMemoryJournal.addEntry(leaderId, seqId++, new ReplicatedLogImplEntry(0, initialTerm, recoveredPayload0));
54 recoveredPayload1 = new MockPayload("one");
55 InMemoryJournal.addEntry(leaderId, seqId++, new ReplicatedLogImplEntry(1, initialTerm, recoveredPayload1));
56 recoveredPayload2 = new MockPayload("two");
57 InMemoryJournal.addEntry(leaderId, seqId++, new ReplicatedLogImplEntry(2, initialTerm, recoveredPayload2));
58 InMemoryJournal.addEntry(leaderId, seqId++, new ApplyLogEntries(2));
60 origLeaderJournal = InMemoryJournal.get(leaderId, ReplicatedLogImplEntry.class);
62 // Create the leader and 2 follower actors and verify initial syncing of the followers after leader
63 // persistence recovery.
65 follower1Actor = newTestRaftActor(follower1Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
66 follower2Id, testActorPath(follower2Id)), newFollowerConfigParams());
68 follower2Actor = newTestRaftActor(follower2Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
69 follower1Id, testActorPath(follower1Id)), newFollowerConfigParams());
71 peerAddresses = ImmutableMap.<String, String>builder().
72 put(follower1Id, follower1Actor.path().toString()).
73 put(follower2Id, follower2Actor.path().toString()).build();
75 leaderConfigParams = newLeaderConfigParams();
76 leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
78 follower1CollectorActor = follower1Actor.underlyingActor().collectorActor();
79 follower2CollectorActor = follower2Actor.underlyingActor().collectorActor();
80 leaderCollectorActor = leaderActor.underlyingActor().collectorActor();
82 leaderContext = leaderActor.underlyingActor().getRaftActorContext();
84 verifyLeaderRecoveryAndInitialization();
88 testSubsequentReplications();
92 testLeaderReinstatement();
94 testLog.info("testReplicationAndSnapshots ending");
98 * Verify the expected leader is elected as the leader and verify initial syncing of the followers
99 * from the leader's persistence recovery.
101 void verifyLeaderRecoveryAndInitialization() {
102 testLog.info("verifyLeaderRecoveryAndInitialization starting");
104 waitUntilLeader(leaderActor);
106 currentTerm = leaderContext.getTermInformation().getCurrentTerm();
107 assertEquals("Current term > " + initialTerm, true, currentTerm > initialTerm);
109 leader = leaderActor.underlyingActor().getCurrentBehavior();
111 // The followers should receive AppendEntries for each leader log entry that was recovered from
112 // persistence and apply each one.
113 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(
114 follower1CollectorActor, ApplyState.class, 3);
115 verifyApplyState(applyStates.get(0), null, null, initialTerm, 0, recoveredPayload0);
116 verifyApplyState(applyStates.get(1), null, null, initialTerm, 1, recoveredPayload1);
117 verifyApplyState(applyStates.get(2), null, null, initialTerm, 2, recoveredPayload2);
119 // Verify follower 1 applies a log entry for at least the last entry index.
120 verifyApplyJournalEntries(follower1CollectorActor, 2);
122 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
123 verifyApplyState(applyStates.get(0), null, null, initialTerm, 0, recoveredPayload0);
124 verifyApplyState(applyStates.get(1), null, null, initialTerm, 1, recoveredPayload1);
125 verifyApplyState(applyStates.get(2), null, null, initialTerm, 2, recoveredPayload2);
127 // Verify follower 1]2 applies a log entry for at least the last entry index.
128 verifyApplyJournalEntries(follower2CollectorActor, 2);
130 MessageCollectorActor.clearMessages(leaderCollectorActor);
131 MessageCollectorActor.clearMessages(follower1CollectorActor);
132 MessageCollectorActor.clearMessages(follower2CollectorActor);
134 // The leader should have performed fake snapshots due to the follower's AppendEntriesReplies and
135 // trimmed the in-memory log so that only the last entry remains.
136 assertEquals("Leader snapshot term", initialTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
137 assertEquals("Leader snapshot index", 1, leaderContext.getReplicatedLog().getSnapshotIndex());
138 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
139 assertEquals("Leader journal last index", 2, leaderContext.getReplicatedLog().lastIndex());
140 assertEquals("Leader commit index", 2, leaderContext.getCommitIndex());
141 assertEquals("Leader last applied", 2, leaderContext.getLastApplied());
142 assertEquals("Leader replicatedToAllIndex", 1, leader.getReplicatedToAllIndex());
144 // Verify the follower's persisted journal log.
145 verifyPersistedJournal(follower1Id, origLeaderJournal);
146 verifyPersistedJournal(follower2Id, origLeaderJournal);
148 MessageCollectorActor.clearMessages(leaderCollectorActor);
149 MessageCollectorActor.clearMessages(follower1CollectorActor);
150 MessageCollectorActor.clearMessages(follower2CollectorActor);
152 testLog.info("verifyLeaderRecoveryAndInitialization ending");
156 * Send a payload to the TestRaftActor to persist and replicate. Since snapshotBatchCount is set to
157 * 4 and we already have 3 entries in the journal log, this should initiate a snapshot. In this
158 * scenario, the follower consensus and application of state is delayed until after the snapshot
161 private void testFirstSnapshot() {
162 testLog.info("testFirstSnapshot starting");
164 byte[] snapshot = new byte[] {1,2,3,4};
165 leaderActor.underlyingActor().setSnapshot(snapshot);
167 // Delay the consensus by temporarily dropping the AppendEntries to both followers.
168 follower1Actor.underlyingActor().startDropMessages(AppendEntries.class);
169 follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
172 MockPayload payload3 = sendPayloadData(leaderActor, "three");
174 // Wait for snapshot complete.
175 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
177 // The snapshot index should not be advanced nor the log trimmed because replicatedToAllIndex
178 // is behind due the followers not being replicated yet via AppendEntries.
179 assertEquals("Leader snapshot term", initialTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
180 assertEquals("Leader snapshot index", 1, leaderContext.getReplicatedLog().getSnapshotIndex());
181 assertEquals("Leader journal log size", 2, leaderContext.getReplicatedLog().size());
182 assertEquals("Leader journal last index", 3, leaderContext.getReplicatedLog().lastIndex());
184 // Verify the persisted snapshot in the leader. This should reflect the advanced snapshot index as
185 // the last applied log entry (2) even though the leader hasn't yet advanced its cached snapshot index.
186 List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
187 assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
188 verifySnapshot("Persisted", persistedSnapshots.get(0), initialTerm, 2, currentTerm, 3, snapshot);
189 List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
190 assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
191 verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 3, payload3);
193 // The leader's persisted journal log should be cleared since we snapshotted.
194 List<ReplicatedLogImplEntry> persistedLeaderJournal = InMemoryJournal.get(leaderId, ReplicatedLogImplEntry.class);
195 assertEquals("Persisted journal log size", 0, persistedLeaderJournal.size());
197 // Allow AppendEntries to both followers to proceed. This should catch up the followers and cause a
198 // "fake" snapshot in the leader to advance the snapshot index to 2. Also the state should be applied
199 // in all members (via ApplyState).
200 follower1Actor.underlyingActor().stopDropMessages(AppendEntries.class);
201 follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
203 ApplyState applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
204 verifyApplyState(applyState, leaderCollectorActor, payload3.toString(), currentTerm, 3, payload3);
206 verifyApplyJournalEntries(leaderCollectorActor, 3);
208 assertEquals("Leader commit index", 3, leaderContext.getCommitIndex());
210 applyState = MessageCollectorActor.expectFirstMatching(follower1CollectorActor, ApplyState.class);
211 verifyApplyState(applyState, null, null, currentTerm, 3, payload3);
213 verifyApplyJournalEntries(follower1CollectorActor, 3);
215 applyState = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplyState.class);
216 verifyApplyState(applyState, null, null, currentTerm, 3, payload3);
218 verifyApplyJournalEntries(follower2CollectorActor, 3);
220 assertEquals("Leader snapshot term", initialTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
221 assertEquals("Leader snapshot index", 2, leaderContext.getReplicatedLog().getSnapshotIndex());
222 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
223 assertEquals("Leader commit index", 3, leaderContext.getCommitIndex());
224 assertEquals("Leader last applied", 3, leaderContext.getLastApplied());
225 assertEquals("Leader replicatedToAllIndex", 2, leader.getReplicatedToAllIndex());
227 MessageCollectorActor.clearMessages(leaderCollectorActor);
228 MessageCollectorActor.clearMessages(follower1CollectorActor);
229 MessageCollectorActor.clearMessages(follower2CollectorActor);
231 testLog.info("testFirstSnapshot ending");
235 * Send 3 more payload instances and verify they get applied by all members.
237 private void testSubsequentReplications() {
238 testLog.info("testSubsequentReplications starting");
240 payload4 = sendPayloadData(leaderActor, "four");
241 payload5 = sendPayloadData(leaderActor, "five");
242 payload6 = sendPayloadData(leaderActor, "six");
244 // Verify the leader applies the states.
245 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 3);
246 verifyApplyState(applyStates.get(0), leaderCollectorActor, payload4.toString(), currentTerm, 4, payload4);
247 verifyApplyState(applyStates.get(1), leaderCollectorActor, payload5.toString(), currentTerm, 5, payload5);
248 verifyApplyState(applyStates.get(2), leaderCollectorActor, payload6.toString(), currentTerm, 6, payload6);
250 // Verify the leader applies a log entry for at least the last entry index.
251 verifyApplyJournalEntries(leaderCollectorActor, 6);
253 // The leader should have performed fake snapshots due to the follower's AppendEntriesReplies and
254 // trimmed the in-memory log so that only the last entry remains.
255 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
256 assertEquals("Leader snapshot index", 5, leaderContext.getReplicatedLog().getSnapshotIndex());
257 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
258 assertEquals("Leader journal last index", 6, leaderContext.getReplicatedLog().lastIndex());
259 assertEquals("Leader commit index", 6, leaderContext.getCommitIndex());
260 assertEquals("Leader last applied", 6, leaderContext.getLastApplied());
261 assertEquals("Leader replicatedToAllIndex", 5, leader.getReplicatedToAllIndex());
263 // Verify follower 1 applies the states.
264 applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 3);
265 verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
266 verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
267 verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
269 // Verify follower 1 applies a log entry for at least the last entry index.
270 verifyApplyJournalEntries(follower1CollectorActor, 6);
272 // Verify follower 2 applies the states.
273 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
274 verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
275 verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
276 verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
278 // Verify follower 2 applies a log entry for at least the last entry index.
279 verifyApplyJournalEntries(follower2CollectorActor, 6);
281 MessageCollectorActor.clearMessages(leaderCollectorActor);
283 testLog.info("testSubsequentReplications ending");
287 * Send one more payload to trigger another snapshot. In this scenario, we delay the snapshot until
288 * consensus occurs and the leader applies the state.
290 private void testSecondSnapshot() {
291 testLog.info("testSecondSnapshot starting");
293 byte[] snapshot = new byte[] {5,6,7,8};
294 leaderActor.underlyingActor().setSnapshot(snapshot);
296 // Delay the CaptureSnapshot message to the leader actor.
297 leaderActor.underlyingActor().startDropMessages(CaptureSnapshot.class);
300 payload7 = sendPayloadData(leaderActor, "seven");
302 // Capture the CaptureSnapshot message so we can send it later.
303 CaptureSnapshot captureSnapshot = MessageCollectorActor.expectFirstMatching(
304 leaderCollectorActor, CaptureSnapshot.class);
306 // Wait for the state to be applied in the leader.
307 ApplyState applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
308 verifyApplyState(applyState, leaderCollectorActor, payload7.toString(), currentTerm, 7, payload7);
310 // At this point the leader has applied the new state but the cached snapshot index should not be
311 // advanced by a "fake" snapshot because we're in the middle of a snapshot. We'll wait for at least
312 // one more heartbeat AppendEntriesReply to ensure this does not occur.
313 MessageCollectorActor.clearMessages(leaderCollectorActor);
314 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
316 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
317 assertEquals("Leader snapshot index", 5, leaderContext.getReplicatedLog().getSnapshotIndex());
318 assertEquals("Leader journal log size", 2, leaderContext.getReplicatedLog().size());
319 assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
320 assertEquals("Leader commit index", 7, leaderContext.getCommitIndex());
321 assertEquals("Leader last applied", 7, leaderContext.getLastApplied());
322 assertEquals("Leader replicatedToAllIndex", 5, leader.getReplicatedToAllIndex());
324 // Now deliver the CaptureSnapshot.
325 leaderActor.underlyingActor().stopDropMessages(CaptureSnapshot.class);
326 leaderActor.tell(captureSnapshot, leaderActor);
328 // Wait for CaptureSnapshotReply to complete.
329 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, CaptureSnapshotReply.class);
331 // Wait for snapshot complete.
332 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
334 // Wait for another heartbeat AppendEntriesReply. This should cause a "fake" snapshot to advance the
335 // snapshot index and trimmed the log since we're no longer in a snapshot.
336 MessageCollectorActor.clearMessages(leaderCollectorActor);
337 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
338 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
339 assertEquals("Leader snapshot index", 6, leaderContext.getReplicatedLog().getSnapshotIndex());
340 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
341 assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
342 assertEquals("Leader commit index", 7, leaderContext.getCommitIndex());
344 // Verify the persisted snapshot. This should reflect the advanced snapshot index as the last applied
346 List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
347 assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
348 verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 6, currentTerm, 7, snapshot);
349 List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
350 assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
351 verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 7, payload7);
353 // The leader's persisted journal log should be cleared since we did a snapshot.
354 List<ReplicatedLogImplEntry> persistedLeaderJournal = InMemoryJournal.get(
355 leaderId, ReplicatedLogImplEntry.class);
356 assertEquals("Persisted journal log size", 0, persistedLeaderJournal.size());
358 // Verify the followers apply all 4 new log entries.
359 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 4);
360 verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
361 verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
362 verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
363 verifyApplyState(applyStates.get(3), null, null, currentTerm, 7, payload7);
365 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 4);
366 verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
367 verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
368 verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
369 verifyApplyState(applyStates.get(3), null, null, currentTerm, 7, payload7);
371 // Verify the follower's snapshot index has also advanced. (after another AppendEntries heartbeat
374 MessageCollectorActor.clearMessages(follower1CollectorActor);
375 MessageCollectorActor.expectFirstMatching(follower1CollectorActor, AppendEntries.class);
376 RaftActorContext follower1Context = follower1Actor.underlyingActor().getRaftActorContext();
377 assertEquals("Follower 1 snapshot term", currentTerm, follower1Context.getReplicatedLog().getSnapshotTerm());
378 assertEquals("Follower 1 snapshot index", 6, follower1Context.getReplicatedLog().getSnapshotIndex());
379 assertEquals("Follower 1 journal log size", 1, follower1Context.getReplicatedLog().size());
380 assertEquals("Follower 1 journal last index", 7, follower1Context.getReplicatedLog().lastIndex());
381 assertEquals("Follower 1 commit index", 7, follower1Context.getCommitIndex());
383 MessageCollectorActor.clearMessages(follower2CollectorActor);
384 MessageCollectorActor.expectFirstMatching(follower2CollectorActor, AppendEntries.class);
385 RaftActorContext follower2Context = follower2Actor.underlyingActor().getRaftActorContext();
386 assertEquals("Follower 2 snapshot term", currentTerm, follower2Context.getReplicatedLog().getSnapshotTerm());
387 assertEquals("Follower 2 snapshot index", 6, follower2Context.getReplicatedLog().getSnapshotIndex());
388 assertEquals("Follower 2 journal log size", 1, follower2Context.getReplicatedLog().size());
389 assertEquals("Follower 2 journal last index", 7, follower2Context.getReplicatedLog().lastIndex());
390 assertEquals("Follower 2 commit index", 7, follower2Context.getCommitIndex());
392 testLog.info("testSecondSnapshot ending");
396 * Kill the leader actor, reinstate it and verify the recovered journal.
398 private void testLeaderReinstatement() {
399 testLog.info("testLeaderReinstatement starting");
401 killActor(leaderActor);
403 leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
405 leaderActor.underlyingActor().waitForRecoveryComplete();
407 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
408 assertEquals("Leader snapshot index", 6, leaderContext.getReplicatedLog().getSnapshotIndex());
409 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
410 assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
411 assertEquals("Leader commit index", 7, leaderContext.getCommitIndex());
412 assertEquals("Leader last applied", 7, leaderContext.getLastApplied());
413 verifyReplicatedLogEntry(leaderContext.getReplicatedLog().last(), currentTerm, 7, payload7);
415 testLog.info("testLeaderReinstatement ending");