From: Tom Pantelis Date: Wed, 11 Mar 2015 23:30:59 +0000 (-0400) Subject: Changes in RaftActor#handleCaptureSnapshotReply X-Git-Tag: release/lithium~406^2 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=commitdiff_plain;h=8b3aa5263a221b9c11f67c03f6816c512902d760 Changes in RaftActor#handleCaptureSnapshotReply captureSnapshot.getReplicatedToAllIndex() is usually -1 (in fact I haven't yet been able to come with a test scenario where it isn't) and we shouldn't reset the behavior's replicatedToAllIndex history when we snapshot due to memory threshold exceeded. This prevents log trimming when a lagging follower is caught up via install snapshot. Eventually log trimming would catch up on subsequent replicates but I don't see a reason why we should reset it to -1. Also made a couple other logging changes. Change-Id: I6b9eafc84455a88c3bc1fc91608fe257c03b4093 Signed-off-by: Tom Pantelis --- diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java index ec3f375bde..01787a8497 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java @@ -371,7 +371,7 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor { context.getReplicatedLog().size()); } else if (message instanceof CaptureSnapshot) { - LOG.info("{}: CaptureSnapshot received by actor", persistenceId()); + LOG.debug("{}: CaptureSnapshot received by actor: {}", persistenceId(), message); if(captureSnapshot == null) { captureSnapshot = (CaptureSnapshot)message; @@ -675,7 +675,7 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor { } private void handleCaptureSnapshotReply(byte[] snapshotBytes) { - LOG.info("{}: CaptureSnapshotReply received by actor: snapshot size {}", persistenceId(), snapshotBytes.length); + LOG.debug("{}: CaptureSnapshotReply received by actor: snapshot size {}", persistenceId(), snapshotBytes.length); // create a snapshot object from the state provided and save it // when snapshot is saved async, SaveSnapshotSuccess is raised. @@ -692,13 +692,24 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor { long dataThreshold = Runtime.getRuntime().totalMemory() * getRaftActorContext().getConfigParams().getSnapshotDataThresholdPercentage() / 100; if (context.getReplicatedLog().dataSize() > dataThreshold) { + + if(LOG.isDebugEnabled()) { + LOG.debug("{}: dataSize {} exceeds dataThreshold {} - doing snapshotPreCommit with index {}", + persistenceId(), context.getReplicatedLog().dataSize(), dataThreshold, + captureSnapshot.getLastAppliedIndex()); + } + // if memory is less, clear the log based on lastApplied. // this could/should only happen if one of the followers is down // as normally we keep removing from the log when its replicated to all. context.getReplicatedLog().snapshotPreCommit(captureSnapshot.getLastAppliedIndex(), captureSnapshot.getLastAppliedTerm()); - getCurrentBehavior().setReplicatedToAllIndex(captureSnapshot.getReplicatedToAllIndex()); + // Don't reset replicatedToAllIndex to -1 as this may prevent us from trimming the log after an + // install snapshot to a follower. + if(captureSnapshot.getReplicatedToAllIndex() >= 0) { + getCurrentBehavior().setReplicatedToAllIndex(captureSnapshot.getReplicatedToAllIndex()); + } } else if(captureSnapshot.getReplicatedToAllIndex() != -1){ // clear the log based on replicatedToAllIndex context.getReplicatedLog().snapshotPreCommit(captureSnapshot.getReplicatedToAllIndex(), @@ -715,9 +726,9 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor { } - LOG.info("{}: Removed in-memory snapshotted entries, adjusted snaphsotIndex:{} " + - "and term:{}", persistenceId(), captureSnapshot.getLastAppliedIndex(), - captureSnapshot.getLastAppliedTerm()); + LOG.info("{}: Removed in-memory snapshotted entries, adjusted snaphsotIndex: {} " + + "and term: {}", persistenceId(), replicatedLog.getSnapshotIndex(), + replicatedLog.getSnapshotTerm()); if (isLeader() && captureSnapshot.isInstallSnapshotInitiated()) { // this would be call straight to the leader and won't initiate in serialization