2 * Copyright (c) 2015 Brocade Communications Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.controller.cluster.raft;
10 import static org.junit.Assert.assertEquals;
11 import akka.persistence.SaveSnapshotSuccess;
12 import com.google.common.collect.ImmutableMap;
13 import java.util.List;
14 import org.junit.Test;
15 import org.opendaylight.controller.cluster.raft.MockRaftActorContext.MockPayload;
16 import org.opendaylight.controller.cluster.raft.RaftActor.UpdateElectionTerm;
17 import org.opendaylight.controller.cluster.raft.base.messages.ApplyJournalEntries;
18 import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
19 import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshotReply;
20 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
21 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
22 import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
23 import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
24 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
27 * Tests replication and snapshots end-to-end using real RaftActors and behavior communication.
29 * @author Thomas Pantelis
31 public class ReplicationAndSnapshotsIntegrationTest extends AbstractRaftActorIntegrationTest {
33 private List<ReplicatedLogImplEntry> origLeaderJournal;
35 private MockPayload recoveredPayload0;
36 private MockPayload recoveredPayload1;
37 private MockPayload recoveredPayload2;
38 private MockPayload payload3;
39 private MockPayload payload4;
40 private MockPayload payload5;
41 private MockPayload payload6;
42 private MockPayload payload7;
45 public void runTest() throws Exception {
46 testLog.info("testReplicationAndSnapshots starting");
48 // Setup the persistent journal for the leader. We'll start up with 3 journal log entries (one less
49 // than the snapshotBatchCount).
51 InMemoryJournal.addEntry(leaderId, seqId++, new UpdateElectionTerm(initialTerm, leaderId));
52 recoveredPayload0 = new MockPayload("zero");
53 InMemoryJournal.addEntry(leaderId, seqId++, new ReplicatedLogImplEntry(0, initialTerm, recoveredPayload0));
54 recoveredPayload1 = new MockPayload("one");
55 InMemoryJournal.addEntry(leaderId, seqId++, new ReplicatedLogImplEntry(1, initialTerm, recoveredPayload1));
56 recoveredPayload2 = new MockPayload("two");
57 InMemoryJournal.addEntry(leaderId, seqId++, new ReplicatedLogImplEntry(2, initialTerm, recoveredPayload2));
58 InMemoryJournal.addEntry(leaderId, seqId++, new ApplyJournalEntries(2));
60 origLeaderJournal = InMemoryJournal.get(leaderId, ReplicatedLogImplEntry.class);
62 // Create the leader and 2 follower actors and verify initial syncing of the followers after leader
63 // persistence recovery.
65 follower1Actor = newTestRaftActor(follower1Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
66 follower2Id, testActorPath(follower2Id)), newFollowerConfigParams());
68 follower2Actor = newTestRaftActor(follower2Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
69 follower1Id, testActorPath(follower1Id)), newFollowerConfigParams());
71 peerAddresses = ImmutableMap.<String, String>builder().
72 put(follower1Id, follower1Actor.path().toString()).
73 put(follower2Id, follower2Actor.path().toString()).build();
75 leaderConfigParams = newLeaderConfigParams();
76 leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
78 follower1CollectorActor = follower1Actor.underlyingActor().collectorActor();
79 follower2CollectorActor = follower2Actor.underlyingActor().collectorActor();
80 leaderCollectorActor = leaderActor.underlyingActor().collectorActor();
82 leaderContext = leaderActor.underlyingActor().getRaftActorContext();
84 verifyLeaderRecoveryAndInitialization();
88 testSubsequentReplications();
92 testLeaderReinstatement();
94 testLog.info("testReplicationAndSnapshots ending");
98 * Verify the expected leader is elected as the leader and verify initial syncing of the followers
99 * from the leader's persistence recovery.
101 void verifyLeaderRecoveryAndInitialization() {
102 testLog.info("verifyLeaderRecoveryAndInitialization starting");
104 waitUntilLeader(leaderActor);
106 currentTerm = leaderContext.getTermInformation().getCurrentTerm();
107 assertEquals("Current term > " + initialTerm, true, currentTerm > initialTerm);
109 leader = leaderActor.underlyingActor().getCurrentBehavior();
111 // The followers should receive AppendEntries for each leader log entry that was recovered from
112 // persistence and apply each one.
113 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(
114 follower1CollectorActor, ApplyState.class, 3);
115 verifyApplyState(applyStates.get(0), null, null, initialTerm, 0, recoveredPayload0);
116 verifyApplyState(applyStates.get(1), null, null, initialTerm, 1, recoveredPayload1);
117 verifyApplyState(applyStates.get(2), null, null, initialTerm, 2, recoveredPayload2);
119 // Verify follower 1 applies a log entry for at least the last entry index.
120 verifyApplyJournalEntries(follower1CollectorActor, 2);
122 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
123 verifyApplyState(applyStates.get(0), null, null, initialTerm, 0, recoveredPayload0);
124 verifyApplyState(applyStates.get(1), null, null, initialTerm, 1, recoveredPayload1);
125 verifyApplyState(applyStates.get(2), null, null, initialTerm, 2, recoveredPayload2);
127 // Verify follower 1]2 applies a log entry for at least the last entry index.
128 verifyApplyJournalEntries(follower2CollectorActor, 2);
130 MessageCollectorActor.clearMessages(leaderCollectorActor);
131 MessageCollectorActor.clearMessages(follower1CollectorActor);
132 MessageCollectorActor.clearMessages(follower2CollectorActor);
134 // The leader should have performed fake snapshots due to the follower's AppendEntriesReplies and
135 // trimmed the in-memory log so that only the last entry remains.
136 assertEquals("Leader snapshot term", initialTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
137 assertEquals("Leader snapshot index", 1, leaderContext.getReplicatedLog().getSnapshotIndex());
138 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
139 assertEquals("Leader journal last index", 2, leaderContext.getReplicatedLog().lastIndex());
140 assertEquals("Leader commit index", 2, leaderContext.getCommitIndex());
141 assertEquals("Leader last applied", 2, leaderContext.getLastApplied());
142 assertEquals("Leader replicatedToAllIndex", 1, leader.getReplicatedToAllIndex());
144 // Verify the follower's persisted journal log.
145 verifyPersistedJournal(follower1Id, origLeaderJournal);
146 verifyPersistedJournal(follower2Id, origLeaderJournal);
148 MessageCollectorActor.clearMessages(leaderCollectorActor);
149 MessageCollectorActor.clearMessages(follower1CollectorActor);
150 MessageCollectorActor.clearMessages(follower2CollectorActor);
152 testLog.info("verifyLeaderRecoveryAndInitialization ending");
156 * Send a payload to the TestRaftActor to persist and replicate. Since snapshotBatchCount is set to
157 * 4 and we already have 3 entries in the journal log, this should initiate a snapshot. In this
158 * scenario, the follower consensus and application of state is delayed until after the snapshot
162 private void testFirstSnapshot() throws Exception {
163 testLog.info("testFirstSnapshot starting");
165 expSnapshotState.add(recoveredPayload0);
166 expSnapshotState.add(recoveredPayload1);
167 expSnapshotState.add(recoveredPayload2);
169 // Delay the consensus by temporarily dropping the AppendEntries to both followers.
170 follower1Actor.underlyingActor().startDropMessages(AppendEntries.class);
171 follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
174 payload3 = sendPayloadData(leaderActor, "three");
176 // Wait for snapshot complete.
177 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
179 // The snapshot index should not be advanced nor the log trimmed because replicatedToAllIndex
180 // is behind due the followers not being replicated yet via AppendEntries.
181 assertEquals("Leader snapshot term", initialTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
182 assertEquals("Leader snapshot index", 1, leaderContext.getReplicatedLog().getSnapshotIndex());
183 assertEquals("Leader journal log size", 2, leaderContext.getReplicatedLog().size());
184 assertEquals("Leader journal last index", 3, leaderContext.getReplicatedLog().lastIndex());
186 // Verify the persisted snapshot in the leader. This should reflect the advanced snapshot index as
187 // the last applied log entry (2) even though the leader hasn't yet advanced its cached snapshot index.
188 List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
189 assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
190 verifySnapshot("Persisted", persistedSnapshots.get(0), initialTerm, 2, currentTerm, 3);
191 List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
192 assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
193 verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 3, payload3);
195 // The leader's persisted journal log should be cleared since we snapshotted.
196 List<ReplicatedLogImplEntry> persistedLeaderJournal = InMemoryJournal.get(leaderId, ReplicatedLogImplEntry.class);
197 assertEquals("Persisted journal log size", 0, persistedLeaderJournal.size());
199 // Allow AppendEntries to both followers to proceed. This should catch up the followers and cause a
200 // "fake" snapshot in the leader to advance the snapshot index to 2. Also the state should be applied
201 // in all members (via ApplyState).
202 follower1Actor.underlyingActor().stopDropMessages(AppendEntries.class);
203 follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
205 ApplyState applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
206 verifyApplyState(applyState, leaderCollectorActor, payload3.toString(), currentTerm, 3, payload3);
208 verifyApplyJournalEntries(leaderCollectorActor, 3);
210 assertEquals("Leader commit index", 3, leaderContext.getCommitIndex());
212 applyState = MessageCollectorActor.expectFirstMatching(follower1CollectorActor, ApplyState.class);
213 verifyApplyState(applyState, null, null, currentTerm, 3, payload3);
215 verifyApplyJournalEntries(follower1CollectorActor, 3);
217 applyState = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplyState.class);
218 verifyApplyState(applyState, null, null, currentTerm, 3, payload3);
220 verifyApplyJournalEntries(follower2CollectorActor, 3);
222 assertEquals("Leader snapshot term", initialTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
223 assertEquals("Leader snapshot index", 2, leaderContext.getReplicatedLog().getSnapshotIndex());
224 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
225 assertEquals("Leader commit index", 3, leaderContext.getCommitIndex());
226 assertEquals("Leader last applied", 3, leaderContext.getLastApplied());
227 assertEquals("Leader replicatedToAllIndex", 2, leader.getReplicatedToAllIndex());
229 MessageCollectorActor.clearMessages(leaderCollectorActor);
230 MessageCollectorActor.clearMessages(follower1CollectorActor);
231 MessageCollectorActor.clearMessages(follower2CollectorActor);
233 testLog.info("testFirstSnapshot ending");
237 * Send 3 more payload instances and verify they get applied by all members.
239 private void testSubsequentReplications() {
240 testLog.info("testSubsequentReplications starting");
242 payload4 = sendPayloadData(leaderActor, "four");
243 payload5 = sendPayloadData(leaderActor, "five");
244 payload6 = sendPayloadData(leaderActor, "six");
246 // Verify the leader applies the states.
247 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 3);
248 verifyApplyState(applyStates.get(0), leaderCollectorActor, payload4.toString(), currentTerm, 4, payload4);
249 verifyApplyState(applyStates.get(1), leaderCollectorActor, payload5.toString(), currentTerm, 5, payload5);
250 verifyApplyState(applyStates.get(2), leaderCollectorActor, payload6.toString(), currentTerm, 6, payload6);
252 // Verify the leader applies a log entry for at least the last entry index.
253 verifyApplyJournalEntries(leaderCollectorActor, 6);
255 // The leader should have performed fake snapshots due to the follower's AppendEntriesReplies and
256 // trimmed the in-memory log so that only the last entry remains.
257 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
258 assertEquals("Leader snapshot index", 5, leaderContext.getReplicatedLog().getSnapshotIndex());
259 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
260 assertEquals("Leader journal last index", 6, leaderContext.getReplicatedLog().lastIndex());
261 assertEquals("Leader commit index", 6, leaderContext.getCommitIndex());
262 assertEquals("Leader last applied", 6, leaderContext.getLastApplied());
263 assertEquals("Leader replicatedToAllIndex", 5, leader.getReplicatedToAllIndex());
265 // Verify follower 1 applies the states.
266 applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 3);
267 verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
268 verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
269 verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
271 // Verify follower 1 applies a log entry for at least the last entry index.
272 verifyApplyJournalEntries(follower1CollectorActor, 6);
274 // Verify follower 2 applies the states.
275 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
276 verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
277 verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
278 verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
280 // Verify follower 2 applies a log entry for at least the last entry index.
281 verifyApplyJournalEntries(follower2CollectorActor, 6);
283 MessageCollectorActor.clearMessages(leaderCollectorActor);
285 testLog.info("testSubsequentReplications ending");
289 * Send one more payload to trigger another snapshot. In this scenario, we delay the snapshot until
290 * consensus occurs and the leader applies the state.
293 private void testSecondSnapshot() throws Exception {
294 testLog.info("testSecondSnapshot starting");
296 expSnapshotState.add(payload3);
297 expSnapshotState.add(payload4);
298 expSnapshotState.add(payload5);
299 expSnapshotState.add(payload6);
301 // Delay the CaptureSnapshot message to the leader actor.
302 leaderActor.underlyingActor().startDropMessages(CaptureSnapshotReply.class);
305 payload7 = sendPayloadData(leaderActor, "seven");
307 // Capture the CaptureSnapshotReply message so we can send it later.
308 CaptureSnapshotReply captureSnapshotReply = MessageCollectorActor.expectFirstMatching(leaderCollectorActor,
309 CaptureSnapshotReply.class);
311 // Wait for the state to be applied in the leader.
312 ApplyState applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
313 verifyApplyState(applyState, leaderCollectorActor, payload7.toString(), currentTerm, 7, payload7);
315 // At this point the leader has applied the new state but the cached snapshot index should not be
316 // advanced by a "fake" snapshot because we're in the middle of a snapshot. We'll wait for at least
317 // one more heartbeat AppendEntriesReply to ensure this does not occur.
318 MessageCollectorActor.clearMessages(leaderCollectorActor);
319 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
321 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
322 assertEquals("Leader snapshot index", 5, leaderContext.getReplicatedLog().getSnapshotIndex());
323 assertEquals("Leader journal log size", 2, leaderContext.getReplicatedLog().size());
324 assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
325 assertEquals("Leader commit index", 7, leaderContext.getCommitIndex());
326 assertEquals("Leader last applied", 7, leaderContext.getLastApplied());
327 assertEquals("Leader replicatedToAllIndex", 5, leader.getReplicatedToAllIndex());
329 // Now deliver the CaptureSnapshotReply.
330 leaderActor.underlyingActor().stopDropMessages(CaptureSnapshotReply.class);
331 leaderActor.tell(captureSnapshotReply, leaderActor);
333 // Wait for snapshot complete.
334 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
336 // Wait for another heartbeat AppendEntriesReply. This should cause a "fake" snapshot to advance the
337 // snapshot index and trimmed the log since we're no longer in a snapshot.
338 MessageCollectorActor.clearMessages(leaderCollectorActor);
339 MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
340 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
341 assertEquals("Leader snapshot index", 6, leaderContext.getReplicatedLog().getSnapshotIndex());
342 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
343 assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
344 assertEquals("Leader commit index", 7, leaderContext.getCommitIndex());
346 // Verify the persisted snapshot. This should reflect the snapshot index as the last applied
347 // log entry (7) and shouldn't contain any unapplied entries as we capture persisted the snapshot data
348 // when the snapshot is created (ie when the CaptureSnapshot is processed).
349 List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
350 assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
351 verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 6, currentTerm, 7);
352 List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
353 assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
354 verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 7, payload7);
356 // The leader's persisted journal log should be cleared since we did a snapshot.
357 List<ReplicatedLogImplEntry> persistedLeaderJournal = InMemoryJournal.get(
358 leaderId, ReplicatedLogImplEntry.class);
359 assertEquals("Persisted journal log size", 0, persistedLeaderJournal.size());
361 // Verify the followers apply all 4 new log entries.
362 List<ApplyState> applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 4);
363 verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
364 verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
365 verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
366 verifyApplyState(applyStates.get(3), null, null, currentTerm, 7, payload7);
368 applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 4);
369 verifyApplyState(applyStates.get(0), null, null, currentTerm, 4, payload4);
370 verifyApplyState(applyStates.get(1), null, null, currentTerm, 5, payload5);
371 verifyApplyState(applyStates.get(2), null, null, currentTerm, 6, payload6);
372 verifyApplyState(applyStates.get(3), null, null, currentTerm, 7, payload7);
374 // Verify the follower's snapshot index has also advanced. (after another AppendEntries heartbeat
377 MessageCollectorActor.clearMessages(follower1CollectorActor);
378 MessageCollectorActor.expectFirstMatching(follower1CollectorActor, AppendEntries.class);
379 RaftActorContext follower1Context = follower1Actor.underlyingActor().getRaftActorContext();
380 assertEquals("Follower 1 snapshot term", currentTerm, follower1Context.getReplicatedLog().getSnapshotTerm());
381 assertEquals("Follower 1 snapshot index", 6, follower1Context.getReplicatedLog().getSnapshotIndex());
382 assertEquals("Follower 1 journal log size", 1, follower1Context.getReplicatedLog().size());
383 assertEquals("Follower 1 journal last index", 7, follower1Context.getReplicatedLog().lastIndex());
384 assertEquals("Follower 1 commit index", 7, follower1Context.getCommitIndex());
386 MessageCollectorActor.clearMessages(follower2CollectorActor);
387 MessageCollectorActor.expectFirstMatching(follower2CollectorActor, AppendEntries.class);
388 RaftActorContext follower2Context = follower2Actor.underlyingActor().getRaftActorContext();
389 assertEquals("Follower 2 snapshot term", currentTerm, follower2Context.getReplicatedLog().getSnapshotTerm());
390 assertEquals("Follower 2 snapshot index", 6, follower2Context.getReplicatedLog().getSnapshotIndex());
391 assertEquals("Follower 2 journal log size", 1, follower2Context.getReplicatedLog().size());
392 assertEquals("Follower 2 journal last index", 7, follower2Context.getReplicatedLog().lastIndex());
393 assertEquals("Follower 2 commit index", 7, follower2Context.getCommitIndex());
395 expSnapshotState.add(payload7);
397 testLog.info("testSecondSnapshot ending");
401 * Kill the leader actor, reinstate it and verify the recovered journal.
403 private void testLeaderReinstatement() {
404 testLog.info("testLeaderReinstatement starting");
406 killActor(leaderActor);
408 leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
410 leaderActor.underlyingActor().waitForRecoveryComplete();
412 leaderContext = leaderActor.underlyingActor().getRaftActorContext();
414 assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
415 assertEquals("Leader snapshot index", 6, leaderContext.getReplicatedLog().getSnapshotIndex());
416 assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
417 assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
418 assertEquals("Leader commit index", 7, leaderContext.getCommitIndex());
419 assertEquals("Leader last applied", 7, leaderContext.getLastApplied());
420 verifyReplicatedLogEntry(leaderContext.getReplicatedLog().last(), currentTerm, 7, payload7);
422 testLog.info("testLeaderReinstatement ending");