import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
-import javax.annotation.Nullable;
+import org.apache.commons.lang3.SerializationUtils;
+import org.eclipse.jdt.annotation.Nullable;
import org.junit.Assert;
import org.junit.Test;
import org.opendaylight.controller.cluster.raft.MockRaftActorContext.MockPayload;
import org.opendaylight.controller.cluster.raft.persisted.ApplyJournalEntries;
import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload;
import org.opendaylight.controller.cluster.raft.persisted.ServerInfo;
+import org.opendaylight.controller.cluster.raft.persisted.SimpleReplicatedLogEntry;
+import org.opendaylight.controller.cluster.raft.persisted.Snapshot;
import org.opendaylight.controller.cluster.raft.persisted.UpdateElectionTerm;
import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
testLog.info("Leader created and elected");
}
+ private void setupFollower2() {
+ follower2Actor = newTestRaftActor(follower2Id, ImmutableMap.of(leaderId, testActorPath(leaderId),
+ follower1Id, testActorPath(follower1Id)), newFollowerConfigParams());
+
+ follower2Context = follower2Actor.underlyingActor().getRaftActorContext();
+ follower2 = follower2Actor.underlyingActor().getCurrentBehavior();
+
+ follower2CollectorActor = follower2Actor.underlyingActor().collectorActor();
+ }
+
/**
* Send 2 payload instances with follower 2 lagging then resume the follower and verifies it gets
* caught up via AppendEntries.
*/
@Test
- public void testReplicationsWithLaggingFollowerCaughtUpViaAppendEntries() throws Exception {
+ public void testReplicationsWithLaggingFollowerCaughtUpViaAppendEntries() {
testLog.info("testReplicationsWithLaggingFollowerCaughtUpViaAppendEntries starting: sending 2 new payloads");
setup();
* sent by the leader.
*/
@Test
- public void testLeaderSnapshotWithLaggingFollowerCaughtUpViaAppendEntries() throws Exception {
+ public void testLeaderSnapshotWithLaggingFollowerCaughtUpViaAppendEntries() {
testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaAppendEntries starting");
setup();
* installed by the leader.
*/
@Test
- public void testLeaderSnapshotWithLaggingFollowerCaughtUpViaInstallSnapshot() throws Exception {
+ public void testLeaderSnapshotWithLaggingFollowerCaughtUpViaInstallSnapshot() {
testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaInstallSnapshot starting");
setup();
// Configure follower 2 to drop messages and lag.
follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
+ // Sleep for at least the election timeout interval so follower 2 is deemed inactive by the leader.
+ Uninterruptibles.sleepUninterruptibly(leaderConfigParams.getElectionTimeOutInterval().toMillis() + 5,
+ TimeUnit.MILLISECONDS);
+
// Send 5 payloads - the second should cause a leader snapshot.
final MockPayload payload2 = sendPayloadData(leaderActor, "two");
final MockPayload payload3 = sendPayloadData(leaderActor, "three");
testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaInstallSnapshot: "
+ "sending 1 more payload to trigger second snapshot");
- // Sleep for at least the election timeout interval so follower 2 is deemed inactive by the leader.
- Uninterruptibles.sleepUninterruptibly(leaderConfigParams.getElectionTimeOutInterval().toMillis() + 5,
- TimeUnit.MILLISECONDS);
-
// Send another payload to trigger a second leader snapshot.
MockPayload payload7 = sendPayloadData(leaderActor, "seven");
testLog.info("testLeaderSnapshotWithLaggingFollowerCaughtUpViaInstallSnapshot complete");
}
+ /**
+ * Tests whether the leader reattempts to send a snapshot when a follower crashes before replying with
+ * InstallSnapshotReply after the last chunk has been sent.
+ */
+ @Test
+ public void testLeaderInstallsSnapshotWithRestartedFollowerDuringSnapshotInstallation() throws Exception {
+ testLog.info("testLeaderInstallsSnapshotWithRestartedFollowerDuringSnapshotInstallation starting");
+
+ setup();
+
+ sendInitialPayloadsReplicatedToAllFollowers("zero", "one");
+
+ // Configure follower 2 to drop messages and lag.
+ follower2Actor.stop();
+
+ // Sleep for at least the election timeout interval so follower 2 is deemed inactive by the leader.
+ Uninterruptibles.sleepUninterruptibly(leaderConfigParams.getElectionTimeOutInterval().toMillis() + 5,
+ TimeUnit.MILLISECONDS);
+
+ // Send 5 payloads - the second should cause a leader snapshot.
+ final MockPayload payload2 = sendPayloadData(leaderActor, "two");
+ final MockPayload payload3 = sendPayloadData(leaderActor, "three");
+ final MockPayload payload4 = sendPayloadData(leaderActor, "four");
+ final MockPayload payload5 = sendPayloadData(leaderActor, "five");
+ final MockPayload payload6 = sendPayloadData(leaderActor, "six");
+
+ MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
+
+ // Verify the leader got consensus and applies each log entry even though follower 2 didn't respond.
+ List<ApplyState> applyStates = MessageCollectorActor.expectMatching(leaderCollectorActor, ApplyState.class, 5);
+ verifyApplyState(applyStates.get(0), leaderCollectorActor, payload2.toString(), currentTerm, 2, payload2);
+ verifyApplyState(applyStates.get(2), leaderCollectorActor, payload4.toString(), currentTerm, 4, payload4);
+ verifyApplyState(applyStates.get(4), leaderCollectorActor, payload6.toString(), currentTerm, 6, payload6);
+
+ MessageCollectorActor.clearMessages(leaderCollectorActor);
+
+ testLog.info("testLeaderInstallsSnapshotWithRestartedFollowerDuringSnapshotInstallation: "
+ + "sending 1 more payload to trigger second snapshot");
+
+ // Send another payload to trigger a second leader snapshot.
+ MockPayload payload7 = sendPayloadData(leaderActor, "seven");
+
+ MessageCollectorActor.expectFirstMatching(leaderCollectorActor, SaveSnapshotSuccess.class);
+
+
+ ApplyState applyState = MessageCollectorActor.expectFirstMatching(leaderCollectorActor, ApplyState.class);
+ verifyApplyState(applyState, leaderCollectorActor, payload7.toString(), currentTerm, 7, payload7);
+
+ // Verify follower 1 applies each log entry.
+ applyStates = MessageCollectorActor.expectMatching(follower1CollectorActor, ApplyState.class, 6);
+ verifyApplyState(applyStates.get(0), null, null, currentTerm, 2, payload2);
+ verifyApplyState(applyStates.get(2), null, null, currentTerm, 4, payload4);
+ verifyApplyState(applyStates.get(5), null, null, currentTerm, 7, payload7);
+
+ leaderActor.underlyingActor()
+ .startDropMessages(InstallSnapshotReply.class, reply -> reply.getChunkIndex() == 5);
+
+ setupFollower2();
+
+ MessageCollectorActor.expectMatching(follower2CollectorActor, InstallSnapshot.class, 5);
+
+ follower2Actor.stop();
+
+ // need to get rid of persistence for follower2
+ InMemorySnapshotStore.clearSnapshotsFor(follower2Id);
+
+ leaderActor.underlyingActor().stopDropMessages(InstallSnapshotReply.class);
+
+ MessageCollectorActor.clearMessages(follower2CollectorActor);
+ setupFollower2();
+
+ MessageCollectorActor.expectMatching(follower2CollectorActor, SaveSnapshotSuccess.class, 1);
+ }
+
/**
* Send payloads with follower 2 lagging with the last payload having a large enough size to trigger a
* leader snapshot such that the leader trims its log from the last applied index.. Follower 2's log will
* by the leader.
*/
@Test
- public void testLeaderSnapshotTriggeredByMemoryThresholdExceededWithLaggingFollower() throws Exception {
+ public void testLeaderSnapshotTriggeredByMemoryThresholdExceededWithLaggingFollower() {
testLog.info("testLeaderSnapshotTriggeredByMemoryThresholdExceededWithLaggingFollower starting");
snapshotBatchCount = 5;
follower2Actor.underlyingActor().startDropMessages(AppendEntries.class);
+ // Sleep for at least the election timeout interval so follower 2 is deemed inactive by the leader.
+ Uninterruptibles.sleepUninterruptibly(leaderConfigParams.getElectionTimeOutInterval().toMillis() + 5,
+ TimeUnit.MILLISECONDS);
+
// Send a payload with a large relative size but not enough to trigger a snapshot.
MockPayload payload1 = sendPayloadData(leaderActor, "one", 500);
verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot();
// Sends 3 payloads with indexes 4, 5 and 6.
- verifyReplicationsAndSnapshotWithNoLaggingAfterInstallSnapshot();
+ long leadersSnapshotIndexOnRecovery = verifyReplicationsAndSnapshotWithNoLaggingAfterInstallSnapshot();
// Recover the leader from persistence and verify.
long leadersLastIndexOnRecovery = 6;
- // The leader's last snapshot was triggered by index 4 so the last applied index in the snapshot was 3.
- long leadersSnapshotIndexOnRecovery = 3;
-
- // The recovered journal should have 3 entries starting at index 4.
- long leadersFirstJournalEntryIndexOnRecovery = 4;
+ long leadersFirstJournalEntryIndexOnRecovery = leadersSnapshotIndexOnRecovery + 1;
verifyLeaderRecoveryAfterReinstatement(leadersLastIndexOnRecovery, leadersSnapshotIndexOnRecovery,
leadersFirstJournalEntryIndexOnRecovery);
* Send another payload to verify another snapshot is not done since the last snapshot trimmed the
* first log entry so the memory threshold should not be exceeded.
*/
- private void verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot() throws Exception {
+ private void verifyNoSubsequentSnapshotAfterMemoryThresholdExceededSnapshot() {
ApplyState applyState;
CaptureSnapshot captureSnapshot;
* Resume the lagging follower 2 and verify it receives an install snapshot from the leader.
*/
private void verifyInstallSnapshotToLaggingFollower(long lastAppliedIndex,
- @Nullable ServerConfigurationPayload expServerConfig) throws Exception {
- testLog.info("testInstallSnapshotToLaggingFollower starting");
+ @Nullable ServerConfigurationPayload expServerConfig) {
+ testLog.info("verifyInstallSnapshotToLaggingFollower starting");
MessageCollectorActor.clearMessages(leaderCollectorActor);
List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshot.getUnAppliedEntries();
assertEquals("Persisted Snapshot getUnAppliedEntries size", 0, unAppliedEntry.size());
- int snapshotSize = persistedSnapshot.getState().length;
+ int snapshotSize = SerializationUtils.serialize(persistedSnapshot.getState()).length;
final int expTotalChunks = snapshotSize / SNAPSHOT_CHUNK_SIZE
+ (snapshotSize % SNAPSHOT_CHUNK_SIZE > 0 ? 1 : 0);
MessageCollectorActor.clearMessages(follower1CollectorActor);
MessageCollectorActor.clearMessages(follower2CollectorActor);
- testLog.info("testInstallSnapshotToLaggingFollower complete");
+ testLog.info("verifyInstallSnapshotToLaggingFollower complete");
}
/**
* Do another round of payloads and snapshot to verify replicatedToAllIndex gets back on track and
* snapshots works as expected after doing a follower snapshot. In this step we don't lag a follower.
*/
- private void verifyReplicationsAndSnapshotWithNoLaggingAfterInstallSnapshot() throws Exception {
- testLog.info("testReplicationsAndSnapshotAfterInstallSnapshot starting: replicatedToAllIndex: {}",
+ private long verifyReplicationsAndSnapshotWithNoLaggingAfterInstallSnapshot() {
+ testLog.info(
+ "verifyReplicationsAndSnapshotWithNoLaggingAfterInstallSnapshot starting: replicatedToAllIndex: {}",
leader.getReplicatedToAllIndex());
// Send another payload - a snapshot should occur.
// Verify the leader's last persisted snapshot (previous ones may not be purged yet).
List<Snapshot> persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class);
Snapshot persistedSnapshot = persistedSnapshots.get(persistedSnapshots.size() - 1);
- verifySnapshot("Persisted", persistedSnapshot, currentTerm, 3, currentTerm, 4);
+ // The last (fourth) payload may or may not have been applied when the snapshot is captured depending on the
+ // timing when the async persistence completes.
List<ReplicatedLogEntry> unAppliedEntry = persistedSnapshot.getUnAppliedEntries();
- assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
- verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 4, payload4);
+ long leadersSnapshotIndex;
+ if (unAppliedEntry.isEmpty()) {
+ leadersSnapshotIndex = 4;
+ expSnapshotState.add(payload4);
+ verifySnapshot("Persisted", persistedSnapshot, currentTerm, 4, currentTerm, 4);
+ } else {
+ leadersSnapshotIndex = 3;
+ verifySnapshot("Persisted", persistedSnapshot, currentTerm, 3, currentTerm, 4);
+ assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size());
+ verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 4, payload4);
+ expSnapshotState.add(payload4);
+ }
// Send a couple more payloads.
MockPayload payload5 = sendPayloadData(leaderActor, "five");
// Verify the leaders's persisted journal log - it should only contain the last 2 ReplicatedLogEntries
// added after the snapshot as the persisted journal should've been purged to the snapshot
// sequence number.
- verifyPersistedJournal(leaderId, Arrays.asList(new ReplicatedLogImplEntry(5, currentTerm, payload5),
- new ReplicatedLogImplEntry(6, currentTerm, payload6)));
+ verifyPersistedJournal(leaderId, Arrays.asList(new SimpleReplicatedLogEntry(5, currentTerm, payload5),
+ new SimpleReplicatedLogEntry(6, currentTerm, payload6)));
// Verify the leaders's persisted journal contains an ApplyJournalEntries for at least the last entry index.
List<ApplyJournalEntries> persistedApplyJournalEntries =
// Verify follower 2's log state.
verifyFollowersTrimmedLog(2, follower2Actor, 6);
- expSnapshotState.add(payload4);
expSnapshotState.add(payload5);
expSnapshotState.add(payload6);
- testLog.info("testReplicationsAndSnapshotAfterInstallSnapshot ending");
+ testLog.info("verifyReplicationsAndSnapshotWithNoLaggingAfterInstallSnapshot ending");
+
+ return leadersSnapshotIndex;
}
/**
*/
private void verifyLeaderRecoveryAfterReinstatement(long lastIndex, long snapshotIndex,
long firstJournalEntryIndex) {
- testLog.info("testLeaderReinstatement starting");
+ testLog.info("verifyLeaderRecoveryAfterReinstatement starting: lastIndex: {}, snapshotIndex: {}, "
+ + "firstJournalEntryIndex: {}", lastIndex, snapshotIndex, firstJournalEntryIndex);
killActor(leaderActor);
assertEquals("Leader applied state", expSnapshotState, testRaftActor.getState());
- testLog.info("testLeaderReinstatement ending");
+ testLog.info("verifyLeaderRecoveryAfterReinstatement ending");
}
private void sendInitialPayloadsReplicatedToAllFollowers(String... data) {