Changes in RaftActor#handleCaptureSnapshotReply 68/16368/3
authorTom Pantelis <tpanteli@brocade.com>
Wed, 11 Mar 2015 23:30:59 +0000 (19:30 -0400)
committerTom Pantelis <tpanteli@brocade.com>
Thu, 12 Mar 2015 00:30:23 +0000 (20:30 -0400)
captureSnapshot.getReplicatedToAllIndex() is usually -1 (in fact I
haven't yet been able to come with a test scenario where it isn't) and
we shouldn't reset the behavior's replicatedToAllIndex history when
we snapshot due to memory threshold exceeded. This prevents log trimming
when a lagging follower is caught up via install snapshot. Eventually
log trimming would catch up on subsequent replicates but I don't see a
reason why we should reset it to -1.

Also made a couple other logging changes.

Change-Id: I6b9eafc84455a88c3bc1fc91608fe257c03b4093
Signed-off-by: Tom Pantelis <tpanteli@brocade.com>
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java

index ec3f375bdeb0fb940e4d08d14397ebd03f957853..01787a849759d1462f138b2f866be1ae4240e019 100644 (file)
@@ -371,7 +371,7 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor {
                 context.getReplicatedLog().size());
 
         } else if (message instanceof CaptureSnapshot) {
                 context.getReplicatedLog().size());
 
         } else if (message instanceof CaptureSnapshot) {
-            LOG.info("{}: CaptureSnapshot received by actor", persistenceId());
+            LOG.debug("{}: CaptureSnapshot received by actor: {}", persistenceId(), message);
 
             if(captureSnapshot == null) {
                 captureSnapshot = (CaptureSnapshot)message;
 
             if(captureSnapshot == null) {
                 captureSnapshot = (CaptureSnapshot)message;
@@ -675,7 +675,7 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor {
     }
 
     private void handleCaptureSnapshotReply(byte[] snapshotBytes) {
     }
 
     private void handleCaptureSnapshotReply(byte[] snapshotBytes) {
-        LOG.info("{}: CaptureSnapshotReply received by actor: snapshot size {}", persistenceId(), snapshotBytes.length);
+        LOG.debug("{}: CaptureSnapshotReply received by actor: snapshot size {}", persistenceId(), snapshotBytes.length);
 
         // create a snapshot object from the state provided and save it
         // when snapshot is saved async, SaveSnapshotSuccess is raised.
 
         // create a snapshot object from the state provided and save it
         // when snapshot is saved async, SaveSnapshotSuccess is raised.
@@ -692,13 +692,24 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor {
         long dataThreshold = Runtime.getRuntime().totalMemory() *
                 getRaftActorContext().getConfigParams().getSnapshotDataThresholdPercentage() / 100;
         if (context.getReplicatedLog().dataSize() > dataThreshold) {
         long dataThreshold = Runtime.getRuntime().totalMemory() *
                 getRaftActorContext().getConfigParams().getSnapshotDataThresholdPercentage() / 100;
         if (context.getReplicatedLog().dataSize() > dataThreshold) {
+
+            if(LOG.isDebugEnabled()) {
+                LOG.debug("{}: dataSize {} exceeds dataThreshold {} - doing snapshotPreCommit with index {}",
+                        persistenceId(), context.getReplicatedLog().dataSize(), dataThreshold,
+                        captureSnapshot.getLastAppliedIndex());
+            }
+
             // if memory is less, clear the log based on lastApplied.
             // this could/should only happen if one of the followers is down
             // as normally we keep removing from the log when its replicated to all.
             context.getReplicatedLog().snapshotPreCommit(captureSnapshot.getLastAppliedIndex(),
                     captureSnapshot.getLastAppliedTerm());
 
             // if memory is less, clear the log based on lastApplied.
             // this could/should only happen if one of the followers is down
             // as normally we keep removing from the log when its replicated to all.
             context.getReplicatedLog().snapshotPreCommit(captureSnapshot.getLastAppliedIndex(),
                     captureSnapshot.getLastAppliedTerm());
 
-            getCurrentBehavior().setReplicatedToAllIndex(captureSnapshot.getReplicatedToAllIndex());
+            // Don't reset replicatedToAllIndex to -1 as this may prevent us from trimming the log after an
+            // install snapshot to a follower.
+            if(captureSnapshot.getReplicatedToAllIndex() >= 0) {
+                getCurrentBehavior().setReplicatedToAllIndex(captureSnapshot.getReplicatedToAllIndex());
+            }
         } else if(captureSnapshot.getReplicatedToAllIndex() != -1){
             // clear the log based on replicatedToAllIndex
             context.getReplicatedLog().snapshotPreCommit(captureSnapshot.getReplicatedToAllIndex(),
         } else if(captureSnapshot.getReplicatedToAllIndex() != -1){
             // clear the log based on replicatedToAllIndex
             context.getReplicatedLog().snapshotPreCommit(captureSnapshot.getReplicatedToAllIndex(),
@@ -715,9 +726,9 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor {
         }
 
 
         }
 
 
-        LOG.info("{}: Removed in-memory snapshotted entries, adjusted snaphsotIndex:{} " +
-            "and term:{}", persistenceId(), captureSnapshot.getLastAppliedIndex(),
-            captureSnapshot.getLastAppliedTerm());
+        LOG.info("{}: Removed in-memory snapshotted entries, adjusted snaphsotIndex: {} " +
+            "and term: {}", persistenceId(), replicatedLog.getSnapshotIndex(),
+            replicatedLog.getSnapshotTerm());
 
         if (isLeader() && captureSnapshot.isInstallSnapshotInitiated()) {
             // this would be call straight to the leader and won't initiate in serialization
 
         if (isLeader() && captureSnapshot.isInstallSnapshotInitiated()) {
             // this would be call straight to the leader and won't initiate in serialization