Merge "Adjust Tx rate limiter for unused transactions"
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / test / java / org / opendaylight / controller / cluster / raft / ReplicationAndSnapshotsWithLaggingFollowerIntegrationTest.java
1 /*
2  * Copyright (c) 2015 Brocade Communications Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.raft;
9
10 import static org.junit.Assert.assertArrayEquals;
11 import static org.junit.Assert.assertEquals;
12 import akka.persistence.SaveSnapshotSuccess;
13 import com.google.common.collect.ImmutableMap;
14 import java.util.Arrays;
15 import java.util.List;
16 import java.util.Map;
17 import org.junit.Assert;
18 import org.junit.Test;
19 import org.opendaylight.controller.cluster.raft.MockRaftActorContext.MockPayload;
20 import org.opendaylight.controller.cluster.raft.RaftActor.UpdateElectionTerm;
21 import org.opendaylight.controller.cluster.raft.base.messages.ApplyJournalEntries;
22 import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
23 import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
24 import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot;
25 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
26 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
27 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
28 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
29 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
30 import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
31 import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
32 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
33
34 /**
35  * Tests replication and snapshots end-to-end using real RaftActors and behavior communication with a
36  * lagging follower.
37  *
38  * @author Thomas Pantelis
39  */
40 public class ReplicationAndSnapshotsWithLaggingFollowerIntegrationTest extends AbstractRaftActorIntegrationTest {
41
42     private MockPayload payload9;
43     private MockPayload payload11;
44     private MockPayload payload12;
45     private MockPayload payload13;
46
47     @Test
48     public void runTest() throws Exception {
49         testLog.info("testReplicationAndSnapshotsWithLaggingFollower starting");
50
51         leaderId = factory.generateActorId("leader");
52         follower1Id = factory.generateActorId("follower");
53         follower2Id = factory.generateActorId("follower");
54
55         // Setup the persistent journal for the leader - just an election term and no journal/snapshots.
56         InMemoryJournal.addEntry(leaderId, 1, new UpdateElectionTerm(initialTerm, leaderId));
57
58         // Create the leader and 2 follower actors.
59         follower1Actor = newTestRaftActor(follower1Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
60                 follower2Id, testActorPath(follower2Id)), newFollowerConfigParams());
61
62         follower2Actor = newTestRaftActor(follower2Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
63                 follower1Id, testActorPath(follower1Id)), newFollowerConfigParams());
64
65         Map<String, String> peerAddresses = ImmutableMap.<String, String>builder().
66                 put(follower1Id, follower1Actor.path().toString()).
67                 put(follower2Id, follower2Actor.path().toString()).build();
68
69         leaderConfigParams = newLeaderConfigParams();
70         leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
71
72         waitUntilLeader(leaderActor);
73
74         leaderContext = leaderActor.underlyingActor().getRaftActorContext();
75         leader = leaderActor.underlyingActor().getCurrentBehavior();
76
77         follower1Context = follower1Actor.underlyingActor().getRaftActorContext();
78         follower1 = follower1Actor.underlyingActor().getCurrentBehavior();
79
80         follower2Context = follower2Actor.underlyingActor().getRaftActorContext();
81         follower2 = follower2Actor.underlyingActor().getCurrentBehavior();
82
83         currentTerm = leaderContext.getTermInformation().getCurrentTerm();
84         assertEquals("Current term > " + initialTerm, true, currentTerm > initialTerm);
85
86         leaderCollectorActor = leaderActor.underlyingActor().collectorActor();
87         follower1CollectorActor = follower1Actor.underlyingActor().collectorActor();
88         follower2CollectorActor = follower2Actor.underlyingActor().collectorActor();
89
90         testLog.info("Leader created and elected");
91
92         testInitialReplications();
93
94         testSubsequentReplicationsAndSnapshots();
95
96         testLeaderSnapshotTriggeredByMemoryThresholdExceeded();
97
98         testInstallSnapshotToLaggingFollower();
99
100         verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot();
101
102         testFinalReplicationsAndSnapshot();
103
104         testLeaderReinstatement();
105
106         testLog.info("testReplicationAndSnapshotsWithLaggingFollower ending");
107     }
108
109     /**
110      * Send 3 payload instances with follower 2 temporarily lagging.
111      *
112      * @throws Exception
113      */
114     private void testInitialReplications() throws Exception {
115
116         testLog.info("testInitialReplications starting: sending 2 new payloads");
117
118         // Simulate lagging by dropping AppendEntries messages in follower 2.
119         follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
120
121         // Send the payloads.
122         MockPayload payload0 = sendPayloadData(leaderActor, "zero");
123         MockPayload payload1 = sendPayloadData(leaderActor, "one");
124         MockPayload payload2 = sendPayloadData(leaderActor, "two");
125
126         // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
127         List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 3);
128         verifyApplyState(applyStates.get(0), leaderCollectorActor, payload0.toString(), currentTerm, 0, payload0);
129         verifyApplyState(applyStates.get(1), leaderCollectorActor, payload1.toString(), currentTerm, 1, payload1);
130         verifyApplyState(applyStates.get(2), leaderCollectorActor, payload2.toString(), currentTerm, 2, payload2);
131
132         // Verify follower 1 applies each log entry.
133         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 3);
134         verifyApplyState(applyStates.get(0), null, null, currentTerm, 0, payload0);
135         verifyApplyState(applyStates.get(1), null, null, currentTerm, 1, payload1);
136         verifyApplyState(applyStates.get(2), null, null, currentTerm, 2, payload2);
137
138         // Ensure there's at least 1 more heartbeat.
139         MessageCollectorActor.clearMessages(leaderCollectorActor);
140         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
141
142         // The leader should not have performed fake snapshots to trim the log because the entries have not
143         // been replicated to follower 2.
144         assertEquals("Leader snapshot term", -1, leaderContext.getReplicatedLog().getSnapshotTerm());
145         assertEquals("Leader snapshot index", -1, leaderContext.getReplicatedLog().getSnapshotIndex());
146         assertEquals("Leader journal log size", 3, leaderContext.getReplicatedLog().size());
147         assertEquals("Leader journal last index", 2, leaderContext.getReplicatedLog().lastIndex());
148         assertEquals("Leader commit index", 2, leaderContext.getCommitIndex());
149         assertEquals("Leader last applied", 2, leaderContext.getLastApplied());
150         assertEquals("Leader replicatedToAllIndex", -1, leader.getReplicatedToAllIndex());
151
152         testLog.info("Step 3: new entries applied - re-enabling follower {}", follower2Id);
153
154         // Now stop dropping AppendEntries in follower 2.
155         follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
156
157         // Verify follower 2 applies each log entry.
158         applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
159         verifyApplyState(applyStates.get(0), null, null, currentTerm, 0, payload0);
160         verifyApplyState(applyStates.get(1), null, null, currentTerm, 1, payload1);
161         verifyApplyState(applyStates.get(2), null, null, currentTerm, 2, payload2);
162
163         // Ensure there's at least 1 more heartbeat.
164         MessageCollectorActor.clearMessages(leaderCollectorActor);
165         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
166
167         // The leader should now have performed fake snapshots to trim the log.
168         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
169         assertEquals("Leader snapshot index", 1, leaderContext.getReplicatedLog().getSnapshotIndex());
170         assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
171         assertEquals("Leader journal last index", 2, leaderContext.getReplicatedLog().lastIndex());
172         assertEquals("Leader commit index", 2, leaderContext.getCommitIndex());
173         assertEquals("Leader last applied", 2, leaderContext.getLastApplied());
174         // Note - replicatedToAllIndex always lags 1 behind last applied since it trims the log up to the
175         // last applied index. The next entry successfully replicated to followers woild advance it.
176         assertEquals("Leader replicatedToAllIndex", 1, leader.getReplicatedToAllIndex());
177
178         // Even though follower 2 lagged behind, the leader should not have tried to install a snapshot
179         // to catch it up because no snapshotting was done so the follower's next index was present in the log.
180         InstallSnapshot installSnapshot = MessageCollectorActor.getFirstMatching(follower2CollectorActor,
181                 InstallSnapshot.class);
182         Assert.assertNull("Follower 2 received unexpected InstallSnapshot", installSnapshot);
183
184         MessageCollectorActor.clearMessages(leaderCollectorActor);
185         MessageCollectorActor.clearMessages(follower1CollectorActor);
186         MessageCollectorActor.clearMessages(follower2CollectorActor);
187
188         testLog.info("testInitialReplications complete");
189     }
190
191     /**
192      * Send 5 more payloads with follower 2 lagging. Since the snapshotBatch count is 4, this should cause
193      * 2 leader snapshots and follower 2's log will be behind by 5 entries.
194      *
195      * @throws Exception
196      */
197     private void testSubsequentReplicationsAndSnapshots() throws Exception {
198         testLog.info("testSubsequentReplicationsAndSnapshots starting: sending first payload, replicatedToAllIndex: {}",
199                 leader.getReplicatedToAllIndex());
200
201         leaderActor.underlyingActor().setSnapshot(new byte[] {2});
202
203         follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
204
205         // Send the first payload - this should cause the first snapshot.
206         MockPayload payload3 = sendPayloadData(leaderActor, "three");
207
208         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
209
210         byte[] snapshot = new byte[] {6};
211         leaderActor.underlyingActor().setSnapshot(snapshot);
212
213         testLog.info("testSubsequentReplicationsAndSnapshots: sending 4 more payloads");
214
215         // Send the next 4. The last one should cause the second snapshot.
216         MockPayload payload4 = sendPayloadData(leaderActor, "four");
217         MockPayload payload5 = sendPayloadData(leaderActor, "five");
218         MockPayload payload6 = sendPayloadData(leaderActor, "six");
219         MockPayload payload7 = sendPayloadData(leaderActor, "seven");
220
221         // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
222         List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 5);
223         verifyApplyState(applyStates.get(0), leaderCollectorActor, payload3.toString(), currentTerm, 3, payload3);
224         verifyApplyState(applyStates.get(1), leaderCollectorActor, payload4.toString(), currentTerm, 4, payload4);
225         verifyApplyState(applyStates.get(2), leaderCollectorActor, payload5.toString(), currentTerm, 5, payload5);
226         verifyApplyState(applyStates.get(3), leaderCollectorActor, payload6.toString(), currentTerm, 6, payload6);
227         verifyApplyState(applyStates.get(4), leaderCollectorActor, payload7.toString(), currentTerm, 7, payload7);
228
229         // Verify follower 1 applies each log entry.
230         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 5);
231         verifyApplyState(applyStates.get(0), null, null, currentTerm, 3, payload3);
232         verifyApplyState(applyStates.get(1), null, null, currentTerm, 4, payload4);
233         verifyApplyState(applyStates.get(2), null, null, currentTerm, 5, payload5);
234         verifyApplyState(applyStates.get(3), null, null, currentTerm, 6, payload6);
235         verifyApplyState(applyStates.get(4), null, null, currentTerm, 7, payload7);
236
237         // Wait for snapshot completion.
238         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
239
240         // The first new entry applied should have caused the leader to advanced the snapshot index to the
241         // last previously applied index (2) that was replicated to all followers.
242         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
243         assertEquals("Leader snapshot index", 2, leaderContext.getReplicatedLog().getSnapshotIndex());
244         assertEquals("Leader journal log size", 5, leaderContext.getReplicatedLog().size());
245         assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
246         assertEquals("Leader commit index", 7, leaderContext.getCommitIndex());
247         assertEquals("Leader last applied", 7, leaderContext.getLastApplied());
248         assertEquals("Leader replicatedToAllIndex", 2, leader.getReplicatedToAllIndex());
249
250         // Now stop dropping AppendEntries in follower 2.
251         follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
252
253         // Verify follower 2 applies each log entry.
254         applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 5);
255         verifyApplyState(applyStates.get(0), null, null, currentTerm, 3, payload3);
256         verifyApplyState(applyStates.get(1), null, null, currentTerm, 4, payload4);
257         verifyApplyState(applyStates.get(2), null, null, currentTerm, 5, payload5);
258         verifyApplyState(applyStates.get(3), null, null, currentTerm, 6, payload6);
259         verifyApplyState(applyStates.get(4), null, null, currentTerm, 7, payload7);
260
261         // Ensure there's at least 1 more heartbeat.
262         MessageCollectorActor.clearMessages(leaderCollectorActor);
263         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
264
265         // The leader should now have performed fake snapshots to advance the snapshot index and to trim
266         // the log. In addition replicatedToAllIndex should've advanced.
267         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
268         assertEquals("Leader snapshot index", 6, leaderContext.getReplicatedLog().getSnapshotIndex());
269         assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
270         assertEquals("Leader journal last index", 7, leaderContext.getReplicatedLog().lastIndex());
271         assertEquals("Leader replicatedToAllIndex", 6, leader.getReplicatedToAllIndex());
272
273         // Verify the leader's persisted snapshot.
274         List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
275         assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
276         verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 3, currentTerm, 7, snapshot);
277         List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
278         assertEquals("Persisted Snapshot getUnAppliedEntries size", 4, unAppliedEntry.size());
279         verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 4, payload4);
280         verifyReplicatedLogEntry(unAppliedEntry.get(1), currentTerm, 5, payload5);
281         verifyReplicatedLogEntry(unAppliedEntry.get(2), currentTerm, 6, payload6);
282         verifyReplicatedLogEntry(unAppliedEntry.get(3), currentTerm, 7, payload7);
283
284         // Even though follower 2's log was behind by 5 entries and 2 snapshots were done, the leader
285         // should not have tried to install a snapshot to catch it up because replicatedToAllIndex was also
286         // behind. Instead of installing a snapshot the leader would've sent AppendEntries with the log entries.
287         InstallSnapshot installSnapshot = MessageCollectorActor.getFirstMatching(follower2CollectorActor, InstallSnapshot.class);
288         Assert.assertNull("Follower 2 received unexpected InstallSnapshot", installSnapshot);
289
290         // Verify follower 1's log and snapshot indexes.
291         MessageCollectorActor.clearMessages(follower1CollectorActor);
292         MessageCollectorActor.expectFirstMatching(follower1CollectorActor, AppendEntries.class);
293         assertEquals("Follower 1 snapshot term", currentTerm, follower1Context.getReplicatedLog().getSnapshotTerm());
294         assertEquals("Follower 1 snapshot index", 6, follower1Context.getReplicatedLog().getSnapshotIndex());
295         assertEquals("Follower 1 journal log size", 1, follower1Context.getReplicatedLog().size());
296         assertEquals("Follower 1 journal last index", 7, follower1Context.getReplicatedLog().lastIndex());
297         assertEquals("Follower 1 commit index", 7, follower1Context.getCommitIndex());
298         assertEquals("Follower 1 last applied", 7, follower1Context.getLastApplied());
299         assertEquals("Follower 1 replicatedToAllIndex", 6, follower1.getReplicatedToAllIndex());
300
301         // Verify follower 2's log and snapshot indexes.
302         MessageCollectorActor.clearMessages(follower2CollectorActor);
303         MessageCollectorActor.expectFirstMatching(follower2CollectorActor, AppendEntries.class);
304         assertEquals("Follower 2 snapshot term", currentTerm, follower2Context.getReplicatedLog().getSnapshotTerm());
305         assertEquals("Follower 2 snapshot index", 6, follower2Context.getReplicatedLog().getSnapshotIndex());
306         assertEquals("Follower 2 journal log size", 1, follower2Context.getReplicatedLog().size());
307         assertEquals("Follower 2 journal last index", 7, follower2Context.getReplicatedLog().lastIndex());
308         assertEquals("Follower 2 commit index", 7, follower2Context.getCommitIndex());
309         assertEquals("Follower 2 last applied", 7, follower2Context.getLastApplied());
310         assertEquals("Follower 2 replicatedToAllIndex", 6, follower2.getReplicatedToAllIndex());
311
312         MessageCollectorActor.clearMessages(leaderCollectorActor);
313         MessageCollectorActor.clearMessages(follower1CollectorActor);
314         MessageCollectorActor.clearMessages(follower2CollectorActor);
315
316         testLog.info("testSubsequentReplicationsAndSnapshots complete");
317     }
318
319     /**
320      * Send a couple more payloads with follower 2 lagging. The last payload will have a large enough size
321      * to trigger a leader snapshot.
322      *
323      * @throws Exception
324      */
325     private void testLeaderSnapshotTriggeredByMemoryThresholdExceeded() throws Exception {
326         testLog.info("testLeaderSnapshotTriggeredByMemoryThresholdExceeded starting: sending 3 payloads, replicatedToAllIndex: {}",
327                 leader.getReplicatedToAllIndex());
328
329         leaderActor.underlyingActor().setMockTotalMemory(1000);
330         byte[] snapshot = new byte[] {6};
331         leaderActor.underlyingActor().setSnapshot(snapshot);
332
333         // We'll expect a ReplicatedLogImplEntry message and an ApplyJournalEntries message added to the journal.
334         InMemoryJournal.addWriteMessagesCompleteLatch(leaderId, 2);
335
336         follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
337
338         // Send a payload with a large relative size but not enough to trigger a snapshot.
339         MockPayload payload8 = sendPayloadData(leaderActor, "eight", 500);
340
341         // Verify the leader got consensus and applies the first log entry even though follower 2 didn't respond.
342         List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 1);
343         verifyApplyState(applyStates.get(0), leaderCollectorActor, payload8.toString(), currentTerm, 8, payload8);
344
345         // Wait for all the ReplicatedLogImplEntry and ApplyJournalEntries messages to be added to the journal
346         // before the snapshot so the snapshot sequence # will be higher to ensure the snapshot gets
347         // purged from the snapshot store after subsequent snapshots.
348         InMemoryJournal.waitForWriteMessagesComplete(leaderId);
349
350         // Verify a snapshot is not triggered.
351         CaptureSnapshot captureSnapshot = MessageCollectorActor.getFirstMatching(leaderCollectorActor, CaptureSnapshot.class);
352         Assert.assertNull("Leader received unexpected CaptureSnapshot", captureSnapshot);
353
354         // Send another payload with a large enough relative size in combination with the last payload
355         // that exceeds the memory threshold (70% * 1000 = 700) - this should do a snapshot.
356         payload9 = sendPayloadData(leaderActor, "nine", 201);
357
358         // Verify the leader applies the last log entry.
359         applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 2);
360         verifyApplyState(applyStates.get(1), leaderCollectorActor, payload9.toString(), currentTerm, 9, payload9);
361
362         // Verify follower 1 applies each log entry.
363         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 2);
364         verifyApplyState(applyStates.get(0), null, null, currentTerm, 8, payload8);
365         verifyApplyState(applyStates.get(1), null, null, currentTerm, 9, payload9);
366
367         // A snapshot should've occurred - wait for it to complete.
368         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
369
370         // Because the snapshot was triggered by exceeding the memory threshold the leader should've advanced
371         // the snapshot index to the last applied index and trimmed the log even though the entries weren't
372         // replicated to all followers.
373         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
374         assertEquals("Leader snapshot index", 8, leaderContext.getReplicatedLog().getSnapshotIndex());
375         assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
376         assertEquals("Leader journal last index", 9, leaderContext.getReplicatedLog().lastIndex());
377         assertEquals("Leader commit index", 9, leaderContext.getCommitIndex());
378         assertEquals("Leader last applied", 9, leaderContext.getLastApplied());
379         // Note: replicatedToAllIndex should not be advanced since log entries 8 and 9 haven't yet been
380         // replicated to follower 2.
381         assertEquals("Leader replicatedToAllIndex", 7, leader.getReplicatedToAllIndex());
382
383         // Verify the leader's persisted snapshot.
384         List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
385         assertEquals("Persisted snapshots size", 1, persistedSnapshots.size());
386         verifySnapshot("Persisted", persistedSnapshots.get(0), currentTerm, 8, currentTerm, 9, snapshot);
387         List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries();
388         assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
389         verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 9, payload9);
390
391         testLog.info("testLeaderSnapshotTriggeredByMemoryThresholdExceeded ending");
392     }
393
394     /**
395      * Send another payload to verify another snapshot is not done since the last snapshot trimmed the
396      * first log entry so the memory threshold should not be exceeded.
397      *
398      * @throws Exception
399      */
400     private void verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot() throws Exception {
401         ApplyState applyState;
402         CaptureSnapshot captureSnapshot;
403
404         MockPayload payload10 = sendPayloadData(leaderActor, "ten");
405
406         // Verify the leader applies the state.
407         applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
408         verifyApplyState(applyState, leaderCollectorActor, payload10.toString(), currentTerm, 10, payload10);
409
410         captureSnapshot = MessageCollectorActor.getFirstMatching(leaderCollectorActor, CaptureSnapshot.class);
411         Assert.assertNull("Leader received unexpected CaptureSnapshot", captureSnapshot);
412
413         // Verify the follower 1 applies the state.
414         applyState = MessageCollectorActor.expectFirstMatching(follower1CollectorActor, ApplyState.class);
415         verifyApplyState(applyState, null, null, currentTerm, 10, payload10);
416
417         // Verify the follower 2 applies the state.
418         applyState = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplyState.class);
419         verifyApplyState(applyState, null, null, currentTerm, 10, payload10);
420
421         // Verify the leader's state.
422         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
423         assertEquals("Leader snapshot index", 9, leaderContext.getReplicatedLog().getSnapshotIndex());
424         assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
425         assertEquals("Leader journal last index", 10, leaderContext.getReplicatedLog().lastIndex());
426         assertEquals("Leader commit index", 10, leaderContext.getCommitIndex());
427         assertEquals("Leader last applied", 10, leaderContext.getLastApplied());
428         assertEquals("Leader replicatedToAllIndex", 9, leader.getReplicatedToAllIndex());
429
430         // Verify follower 1's state.
431         assertEquals("Follower 1 snapshot term", currentTerm, follower1Context.getReplicatedLog().getSnapshotTerm());
432         assertEquals("Follower 1 snapshot index", 9, follower1Context.getReplicatedLog().getSnapshotIndex());
433         assertEquals("Follower 1 journal log size", 1, follower1Context.getReplicatedLog().size());
434         assertEquals("Follower 1 journal last index", 10, follower1Context.getReplicatedLog().lastIndex());
435         assertEquals("Follower 1 commit index", 10, follower1Context.getCommitIndex());
436         assertEquals("Follower 1 last applied", 10, follower1Context.getLastApplied());
437         assertEquals("Follower 1 replicatedToAllIndex", 9, follower1.getReplicatedToAllIndex());
438
439         // Verify follower 2's state.
440         assertEquals("Follower 2 snapshot term", currentTerm, follower2Context.getReplicatedLog().getSnapshotTerm());
441         assertEquals("Follower 2 snapshot index", 9, follower2Context.getReplicatedLog().getSnapshotIndex());
442         assertEquals("Follower 2 journal log size", 1, follower2Context.getReplicatedLog().size());
443         assertEquals("Follower 2 journal last index", 10, follower2Context.getReplicatedLog().lastIndex());
444         assertEquals("Follower 2 commit index", 10, follower2Context.getCommitIndex());
445         assertEquals("Follower 2 last applied", 10, follower2Context.getLastApplied());
446         assertEquals("Follower 2 replicatedToAllIndex", 9, follower2.getReplicatedToAllIndex());
447
448         // Revert back to JVM total memory.
449         leaderActor.underlyingActor().setMockTotalMemory(0);
450
451         MessageCollectorActor.clearMessages(leaderCollectorActor);
452         MessageCollectorActor.clearMessages(follower1CollectorActor);
453         MessageCollectorActor.clearMessages(follower2CollectorActor);
454     }
455
456     /**
457      * Following a snapshot due memory threshold exceeded, resume the lagging follower and verify it receives
458      * an install snapshot from the leader.
459      *
460      * @throws Exception
461      */
462     private void testInstallSnapshotToLaggingFollower() throws Exception {
463         List<Snapshot> persistedSnapshots;
464         List<ReplicatedLogEntry> unAppliedEntry;
465         ApplyState applyState;
466         ApplySnapshot applySnapshot;
467         InstallSnapshot installSnapshot;
468         InstallSnapshotReply installSnapshotReply;
469
470         byte[] snapshot = new byte[] {10};
471         leaderActor.underlyingActor().setSnapshot(snapshot);
472
473         // Now stop dropping AppendEntries in follower 2.
474         follower2Actor.underlyingActor().stopDropMessages(AppendEntries.class);
475
476         installSnapshot = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, InstallSnapshot.class);
477         assertEquals("InstallSnapshot getTerm", currentTerm, installSnapshot.getTerm());
478         assertEquals("InstallSnapshot getLeaderId", leaderId, installSnapshot.getLeaderId());
479         assertEquals("InstallSnapshot getChunkIndex", 1, installSnapshot.getChunkIndex());
480         assertEquals("InstallSnapshot getTotalChunks", 1, installSnapshot.getTotalChunks());
481         assertEquals("InstallSnapshot getLastIncludedTerm", currentTerm, installSnapshot.getLastIncludedTerm());
482         assertEquals("InstallSnapshot getLastIncludedIndex", 8, installSnapshot.getLastIncludedIndex());
483         assertArrayEquals("InstallSnapshot getData", snapshot, installSnapshot.getData().toByteArray());
484
485         installSnapshotReply = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, InstallSnapshotReply.class);
486         assertEquals("InstallSnapshotReply getTerm", currentTerm, installSnapshotReply.getTerm());
487         assertEquals("InstallSnapshotReply getChunkIndex", 1, installSnapshotReply.getChunkIndex());
488         assertEquals("InstallSnapshotReply getFollowerId", follower2Id, installSnapshotReply.getFollowerId());
489         assertEquals("InstallSnapshotReply isSuccess", true, installSnapshotReply.isSuccess());
490
491         // Verify follower 2 applies the snapshot.
492         applySnapshot = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplySnapshot.class);
493         verifySnapshot("Follower 2", applySnapshot.getSnapshot(), currentTerm, 8, currentTerm, 8, snapshot);
494         assertEquals("Persisted Snapshot getUnAppliedEntries size", 0, applySnapshot.getSnapshot().getUnAppliedEntries().size());
495
496         // Verify follower 2 only applies the second log entry (9) as the first one (8) was in the snapshot.
497         applyState = MessageCollectorActor.expectFirstMatching(follower2CollectorActor, ApplyState.class);
498         verifyApplyState(applyState, null, null, currentTerm, 9, payload9);
499
500         // Wait for the snapshot to complete.
501         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
502
503         // Ensure there's at least 1 more heartbeat.
504         MessageCollectorActor.clearMessages(leaderCollectorActor);
505         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
506
507         // The leader should now have performed fake snapshots to advance the snapshot index and to trim
508         // the log. In addition replicatedToAllIndex should've advanced.
509         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
510         assertEquals("Leader snapshot index", 8, leaderContext.getReplicatedLog().getSnapshotIndex());
511         assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
512         assertEquals("Leader commit index", 9, leaderContext.getCommitIndex());
513         assertEquals("Leader last applied", 9, leaderContext.getLastApplied());
514         assertEquals("Leader replicatedToAllIndex", 8, leader.getReplicatedToAllIndex());
515
516         // Verify the leader's persisted snapshot. The previous snapshot (currently) won't be deleted from
517         // the snapshot store because the second snapshot was initiated by the follower install snapshot and
518         // not because the batch count was reached so the persisted journal sequence number wasn't advanced
519         // far enough to cause the previous snapshot to be deleted. This is because
520         // RaftActor#trimPersistentData subtracts the snapshotBatchCount from the snapshot's sequence number.
521         // This is OK - the next snapshot should delete it. In production, even if the system restarted
522         // before another snapshot, they would both get applied which wouldn't hurt anything.
523         persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
524         Assert.assertTrue("Expected at least 1 persisted snapshots", persistedSnapshots.size() > 0);
525         Snapshot persistedSnapshot = persistedSnapshots.get(persistedSnapshots.size() - 1);
526         verifySnapshot("Persisted", persistedSnapshot, currentTerm, 9, currentTerm, 9, snapshot);
527         unAppliedEntry = persistedSnapshot.getUnAppliedEntries();
528         assertEquals("Persisted Snapshot getUnAppliedEntries size", 0, unAppliedEntry.size());
529
530         MessageCollectorActor.clearMessages(leaderCollectorActor);
531         MessageCollectorActor.clearMessages(follower1CollectorActor);
532         MessageCollectorActor.clearMessages(follower2CollectorActor);
533     }
534
535     /**
536      * Do another round of payloads and snapshot to verify replicatedToAllIndex gets back on track and
537      * snapshots works as expected after doing a follower snapshot. In this step we don't lag a follower.
538      */
539     private void testFinalReplicationsAndSnapshot() {
540         List<ApplyState> applyStates;
541         ApplyState applyState;
542
543         testLog.info("testFinalReplicationsAndSnapshot starting: replicatedToAllIndex: {}", leader.getReplicatedToAllIndex());
544
545         byte[] snapshot = new byte[] {14};
546         leaderActor.underlyingActor().setSnapshot(snapshot);
547
548         // Send another payload - a snapshot should occur.
549         payload11 = sendPayloadData(leaderActor, "eleven");
550
551         // Wait for the snapshot to complete.
552         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
553
554         applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
555         verifyApplyState(applyState, leaderCollectorActor, payload11.toString(), currentTerm, 11, payload11);
556
557         // Verify the leader's last persisted snapshot (previous ones may not be purged yet).
558         List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
559         Snapshot persistedSnapshot = persistedSnapshots.get(persistedSnapshots.size() - 1);
560         verifySnapshot("Persisted", persistedSnapshot, currentTerm, 10, currentTerm, 11, snapshot);
561         List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshot.getUnAppliedEntries();
562         assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
563         verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 11, payload11);
564
565         // Send a couple more payloads.
566         payload12 = sendPayloadData(leaderActor, "twelve");
567         payload13 = sendPayloadData(leaderActor, "thirteen");
568
569         // Verify the leader applies the 2 log entries.
570         applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 3);
571         verifyApplyState(applyStates.get(1), leaderCollectorActor, payload12.toString(), currentTerm, 12, payload12);
572         verifyApplyState(applyStates.get(2), leaderCollectorActor, payload13.toString(), currentTerm, 13, payload13);
573
574         // Verify the leader applies a log entry for at least the last entry index.
575         verifyApplyJournalEntries(leaderCollectorActor, 13);
576
577         // Ensure there's at least 1 more heartbeat to trim the log.
578         MessageCollectorActor.clearMessages(leaderCollectorActor);
579         MessageCollectorActor.expectFirstMatching(leaderCollectorActor, AppendEntriesReply.class);
580
581         // Verify the leader's final snapshot index et al.
582         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
583         assertEquals("Leader snapshot index", 12, leaderContext.getReplicatedLog().getSnapshotIndex());
584         assertEquals("Leader journal log size", 1, leaderContext.getReplicatedLog().size());
585         assertEquals("Leader journal last index", 13, leaderContext.getReplicatedLog().lastIndex());
586         assertEquals("Leader commit index", 13, leaderContext.getCommitIndex());
587         assertEquals("Leader last applied", 13, leaderContext.getLastApplied());
588         assertEquals("Leader replicatedToAllIndex", 12, leader.getReplicatedToAllIndex());
589
590         InMemoryJournal.dumpJournal(leaderId);
591
592         // Verify the leaders's persisted journal log - should only contain the last 2 ReplicatedLogEntries
593         // added after the snapshot as the persisted journal should've been purged to the snapshot
594         // sequence number.
595         verifyPersistedJournal(leaderId, Arrays.asList(new ReplicatedLogImplEntry(12, currentTerm, payload12),
596                 new ReplicatedLogImplEntry(13, currentTerm, payload13)));
597
598         // Verify the leaders's persisted journal contains an ApplyJournalEntries for at least the last entry index.
599         List<ApplyJournalEntries> persistedApplyJournalEntries = InMemoryJournal.get(leaderId, ApplyJournalEntries.class);
600         boolean found = false;
601         for(ApplyJournalEntries entry: persistedApplyJournalEntries) {
602             if(entry.getToIndex() == 13) {
603                 found = true;
604                 break;
605             }
606         }
607
608         Assert.assertTrue(String.format("ApplyJournalEntries with index %d not found in leader's persisted journal", 13), found);
609
610         // Verify follower 1 applies the 2 log entries.
611         applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 3);
612         verifyApplyState(applyStates.get(0), null, null, currentTerm, 11, payload11);
613         verifyApplyState(applyStates.get(1), null, null, currentTerm, 12, payload12);
614         verifyApplyState(applyStates.get(2), null, null, currentTerm, 13, payload13);
615
616         // Verify follower 1's log state.
617         assertEquals("Follower 1 snapshot term", currentTerm, follower1Context.getReplicatedLog().getSnapshotTerm());
618         assertEquals("Follower 1 snapshot index", 12, follower1Context.getReplicatedLog().getSnapshotIndex());
619         assertEquals("Follower 1 journal log size", 1, follower1Context.getReplicatedLog().size());
620         assertEquals("Follower 1 journal last index", 13, follower1Context.getReplicatedLog().lastIndex());
621         assertEquals("Follower 1 commit index", 13, follower1Context.getCommitIndex());
622         assertEquals("Follower 1 last applied", 13, follower1Context.getLastApplied());
623         assertEquals("Follower 1 replicatedToAllIndex", 12, follower1.getReplicatedToAllIndex());
624
625         // Verify follower 2 applies the 2 log entries.
626         applyStates = MessageCollectorActor.expectMatching(follower2CollectorActor, ApplyState.class, 3);
627         verifyApplyState(applyStates.get(0), null, null, currentTerm, 11, payload11);
628         verifyApplyState(applyStates.get(1), null, null, currentTerm, 12, payload12);
629         verifyApplyState(applyStates.get(2), null, null, currentTerm, 13, payload13);
630
631         // Verify follower 2's log state.
632         assertEquals("Follower 2 snapshot term", currentTerm, follower2Context.getReplicatedLog().getSnapshotTerm());
633         assertEquals("Follower 2 snapshot index", 12, follower2Context.getReplicatedLog().getSnapshotIndex());
634         assertEquals("Follower 2 journal log size", 1, follower2Context.getReplicatedLog().size());
635         assertEquals("Follower 2 journal last index", 13, follower2Context.getReplicatedLog().lastIndex());
636         assertEquals("Follower 2 commit index", 13, follower2Context.getCommitIndex());
637         assertEquals("Follower 2 last applied", 13, follower2Context.getLastApplied());
638         assertEquals("Follower 2 replicatedToAllIndex", 12, follower2.getReplicatedToAllIndex());
639
640         testLog.info("testFinalReplicationsAndSnapshot ending");
641     }
642
643     /**
644      * Kill the leader actor, reinstate it and verify the recovered journal.
645      */
646     private void testLeaderReinstatement() {
647         testLog.info("testLeaderReinstatement starting");
648
649         killActor(leaderActor);
650
651         leaderActor = newTestRaftActor(leaderId, peerAddresses, leaderConfigParams);
652
653         leaderActor.underlyingActor().startDropMessages(RequestVoteReply.class);
654
655         leaderContext = leaderActor.underlyingActor().getRaftActorContext();
656
657         leaderActor.underlyingActor().waitForRecoveryComplete();
658
659         assertEquals("Leader snapshot term", currentTerm, leaderContext.getReplicatedLog().getSnapshotTerm());
660         assertEquals("Leader snapshot index", 10, leaderContext.getReplicatedLog().getSnapshotIndex());
661         assertEquals("Leader journal log size", 3, leaderContext.getReplicatedLog().size());
662         assertEquals("Leader journal last index", 13, leaderContext.getReplicatedLog().lastIndex());
663         assertEquals("Leader commit index", 13, leaderContext.getCommitIndex());
664         assertEquals("Leader last applied", 13, leaderContext.getLastApplied());
665         verifyReplicatedLogEntry(leaderContext.getReplicatedLog().get(11), currentTerm, 11, payload11);
666         verifyReplicatedLogEntry(leaderContext.getReplicatedLog().get(12), currentTerm, 12, payload12);
667         verifyReplicatedLogEntry(leaderContext.getReplicatedLog().get(13), currentTerm, 13, payload13);
668
669         testLog.info("testLeaderReinstatement ending");
670     }
671 }