From f9fc8cd613d183de5b1cf226b66649777b9f4ad8 Mon Sep 17 00:00:00 2001 From: Tom Pantelis Date: Fri, 3 Jun 2016 16:04:28 -0400 Subject: [PATCH] Remove snapshot after startup and fix related bug Fixed an issue in the follower out-of-sync integrity checking where is needs to take into account that the previous index may be in the snapshot. A similar issue was seen with other inegrity checks. These issues were indirectly related to the snapshot after startup that was introduced in Be. I think this snapshot is unsafe b/c the replicatedToAllIndex hasn't been determined yet which I think may cause other issues with the trimming after snapshot completion, as the logic takes replicatedToAllIndex into account. And there may be other lurking bugs. I thinks it's safer to let the normal snapshot logic handle it. The reason for the snapshot after startup was to avoid having to recover the same journal entries again on restart that were just recovered. However in reality, in production, servers aren't commonly restarted and typically go weeks/months in between restarts. By the time of the next restart there would likely have been another snapshot and an arbitrary amount of new journal entries to recover so it really doesn't add much value. Change-Id: Ie14148e5dbde3e93deafc5943278aea8c9bb3e75 Signed-off-by: Tom Pantelis --- .../controller/cluster/raft/RaftActor.java | 7 ---- .../cluster/raft/behaviors/Follower.java | 8 ++-- .../cluster/raft/RaftActorTest.java | 37 ------------------- ...eplicationAndSnapshotsIntegrationTest.java | 6 +-- 4 files changed, 6 insertions(+), 52 deletions(-) diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java index f85fa9bfe3..dd986fbbac 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java @@ -182,13 +182,6 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor { initializeBehavior(); raftRecovery = null; - - if (context.getReplicatedLog().size() > 0) { - self().tell(new InitiateCaptureSnapshot(), self()); - LOG.info("{}: Snapshot capture initiated after recovery", persistenceId()); - } else { - LOG.info("{}: Snapshot capture NOT initiated after recovery, journal empty", persistenceId()); - } } } diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java index 82c6bafedd..1290abdff3 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java @@ -72,7 +72,7 @@ public class Follower extends AbstractRaftActorBehavior { } private boolean isLogEntryPresent(long index){ - if(index == context.getReplicatedLog().getSnapshotIndex()){ + if(context.getReplicatedLog().isInSnapshot(index)) { return true; } @@ -290,8 +290,7 @@ public class Follower extends AbstractRaftActorBehavior { logName(), prevLogTerm, appendEntries.getPrevLogTerm()); } else if(appendEntries.getPrevLogIndex() == -1 && appendEntries.getPrevLogTerm() == -1 && appendEntries.getReplicatedToAllIndex() != -1 - && !isLogEntryPresent(appendEntries.getReplicatedToAllIndex()) - && !context.getReplicatedLog().isInSnapshot(appendEntries.getReplicatedToAllIndex())) { + && !isLogEntryPresent(appendEntries.getReplicatedToAllIndex())) { // This append entry comes from a leader who has it's log aggressively trimmed and so does not have // the previous entry in it's in-memory journal @@ -300,8 +299,7 @@ public class Follower extends AbstractRaftActorBehavior { logName(), appendEntries.getReplicatedToAllIndex()); } else if(appendEntries.getPrevLogIndex() == -1 && appendEntries.getPrevLogTerm() == -1 && appendEntries.getReplicatedToAllIndex() != -1 && numLogEntries > 0 - && !isLogEntryPresent(appendEntries.getEntries().get(0).getIndex() - 1) - && !context.getReplicatedLog().isInSnapshot(appendEntries.getEntries().get(0).getIndex() - 1)) { + && !isLogEntryPresent(appendEntries.getEntries().get(0).getIndex() - 1)) { LOG.debug( "{}: Cannot append entries because the calculated previousIndex {} was not found in the in-memory journal", logName(), appendEntries.getEntries().get(0).getIndex() - 1); diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorTest.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorTest.java index f4846d2589..1ec197de13 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorTest.java +++ b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorTest.java @@ -962,40 +962,6 @@ public class RaftActorTest extends AbstractActorTest { }}; } - @Test - public void testRaftActorOnRecoverySnapshot() throws Exception { - TEST_LOG.info("testRaftActorOnRecoverySnapshot"); - - new JavaTestKit(getSystem()) {{ - String persistenceId = factory.generateActorId("follower-"); - - DefaultConfigParamsImpl config = new DefaultConfigParamsImpl(); - - // Set the heartbeat interval high to essentially disable election otherwise the test - // may fail if the actor is switched to Leader - config.setHeartBeatInterval(new FiniteDuration(1, TimeUnit.DAYS)); - - ImmutableMap peerAddresses = ImmutableMap.builder().put("member1", "address").build(); - - // Create mock ReplicatedLogEntry - ReplicatedLogEntry replLogEntry = new MockRaftActorContext.MockReplicatedLogEntry(1,1, - new MockRaftActorContext.MockPayload("F", 1)); - - InMemoryJournal.addEntry(persistenceId, 1, replLogEntry); - - TestActorRef ref = factory.createTestActor( - MockRaftActor.props(persistenceId, peerAddresses, config)); - - MockRaftActor mockRaftActor = ref.underlyingActor(); - - mockRaftActor.waitForRecoveryComplete(); - - mockRaftActor.waitForInitializeBehaviorComplete(); - - verify(mockRaftActor.snapshotCohortDelegate, timeout(5000)).createSnapshot(any(ActorRef.class)); - }}; - } - @Test public void testSwitchBehavior(){ String persistenceId = factory.generateActorId("leader-"); @@ -1133,9 +1099,6 @@ public class RaftActorTest extends AbstractActorTest { mockRaftActor.waitForRecoveryComplete(); - // Wait for snapshot after recovery - verify(mockRaftActor.snapshotCohortDelegate, timeout(5000)).createSnapshot(any(ActorRef.class)); - mockRaftActor.snapshotCohortDelegate = mock(RaftActorSnapshotCohort.class); raftActorRef.tell(GetSnapshot.INSTANCE, kit.getRef()); diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/ReplicationAndSnapshotsIntegrationTest.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/ReplicationAndSnapshotsIntegrationTest.java index 6334173cc6..c37aee71d3 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/ReplicationAndSnapshotsIntegrationTest.java +++ b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/ReplicationAndSnapshotsIntegrationTest.java @@ -188,9 +188,9 @@ public class ReplicationAndSnapshotsIntegrationTest extends AbstractRaftActorInt // Verify the persisted snapshot in the leader. This should reflect the advanced snapshot index as // the last applied log entry (2) even though the leader hasn't yet advanced its cached snapshot index. List persistedSnapshots = InMemorySnapshotStore.getSnapshots(leaderId, Snapshot.class); - assertEquals("Persisted snapshots size", 2, persistedSnapshots.size()); - verifySnapshot("Persisted", persistedSnapshots.get(1), initialTerm, 2, currentTerm, 3); - List unAppliedEntry = persistedSnapshots.get(1).getUnAppliedEntries(); + assertEquals("Persisted snapshots size", 1, persistedSnapshots.size()); + verifySnapshot("Persisted", persistedSnapshots.get(0), initialTerm, 2, currentTerm, 3); + List unAppliedEntry = persistedSnapshots.get(0).getUnAppliedEntries(); assertEquals("Persisted Snapshot getUnAppliedEntries size", 1, unAppliedEntry.size()); verifyReplicatedLogEntry(unAppliedEntry.get(0), currentTerm, 3, payload3); -- 2.36.6