Add more info logging in sal-akka-raft 77/51877/1
authorTom Pantelis <tpanteli@brocade.com>
Sat, 11 Feb 2017 15:52:17 +0000 (10:52 -0500)
committerTom Pantelis <tpanteli@brocade.com>
Tue, 14 Feb 2017 21:41:02 +0000 (16:41 -0500)
Added more info logging for abnormal and infrequent code paths in
the raft behaviors to help in troubleshooting as these paths are
sometimes involved when something goes wrong.

Change-Id: I3017c81c2ef7100ca8a9477285ca637355c05e87
Signed-off-by: Tom Pantelis <tpanteli@brocade.com>
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractRaftActorBehavior.java
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Candidate.java
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java

index 180f492082ee05bde4477cd82add5a0c4d846f69..6c88548eaa285b93d61dd0a645e37be29037687a 100644 (file)
@@ -161,7 +161,7 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
         //2 peers = 2 votesRequired , minIsolatedLeaderPeerCount = 1
         //4 peers = 3 votesRequired, minIsolatedLeaderPeerCount = 2
 
-        return minReplicationCount > 0 ? (minReplicationCount - 1) : 0;
+        return minReplicationCount > 0 ? minReplicationCount - 1 : 0;
     }
 
     @VisibleForTesting
@@ -231,9 +231,10 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
             // snapshot. It's also possible that the follower's last log index is behind the leader's.
             // However in this case the log terms won't match and the logs will conflict - this is handled
             // elsewhere.
-            LOG.debug("{}: handleAppendEntriesReply: follower {} lastIndex {} is ahead of our lastIndex {} - forcing install snaphot",
-                    logName(), followerLogInformation.getId(), appendEntriesReply.getLogLastIndex(),
-                    context.getReplicatedLog().lastIndex());
+            LOG.info("{}: handleAppendEntriesReply: follower {} lastIndex {} is ahead of our lastIndex {} "
+                    + "(snapshotIndex {}) - forcing install snaphot", logName(), followerLogInformation.getId(),
+                    appendEntriesReply.getLogLastIndex(), context.getReplicatedLog().lastIndex(),
+                    context.getReplicatedLog().getSnapshotIndex());
 
             followerLogInformation.setMatchIndex(-1);
             followerLogInformation.setNextIndex(-1);
@@ -255,7 +256,7 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 followerLogInformation.setNextIndex(followerLastLogIndex - 1);
                 updated = true;
 
-                LOG.debug("{}: handleAppendEntriesReply: follower {} last log term {} for index {} conflicts with the " +
+                LOG.info("{}: handleAppendEntriesReply: follower {} last log term {} for index {} conflicts with the " +
                           "leader's {} - set the follower's next index to {}",
                         logName(), followerId, appendEntriesReply.getLogLastTerm(), appendEntriesReply.getLogLastIndex(),
                         followersLastLogTermInLeadersLog, followerLogInformation.getNextIndex());
@@ -263,7 +264,8 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 updated = updateFollowerLogInformation(followerLogInformation, appendEntriesReply);
             }
         } else {
-            LOG.debug("{}: handleAppendEntriesReply: received unsuccessful reply: {}", logName(), appendEntriesReply);
+            LOG.info("{}: handleAppendEntriesReply - received unsuccessful reply: {}, leader snapshotIndex: {}",
+                    logName(), appendEntriesReply, context.getReplicatedLog().getSnapshotIndex());
 
             if(appendEntriesReply.isForceInstallSnapshot()) {
                 // Reset the followers match and next index. This is to signal that this follower has nothing
@@ -281,6 +283,11 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 // follower's last log index.
 
                 updated = updateFollowerLogInformation(followerLogInformation, appendEntriesReply);
+
+                LOG.info("{}: follower {} appears to be behind the leader from the last snapshot - "
+                    + "updated: matchIndex: {}, nextIndex: {}", logName(), followerId,
+                    appendEntriesReply.getLogLastTerm(), followerLogInformation.getMatchIndex(),
+                    followerLogInformation.getNextIndex());
             } else {
                 // The follower's log conflicts with leader's log so decrement follower's next index by 1
                 // in an attempt to find where the logs match.
@@ -288,8 +295,8 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 followerLogInformation.decrNextIndex();
                 updated = true;
 
-                LOG.debug("{}: follower's last log term {} conflicts with the leader's {} - dec next index to {}",
-                        logName(), appendEntriesReply.getLogLastTerm(), followersLastLogTermInLeadersLog,
+                LOG.info("{}: follower {} last log term {} conflicts with the leader's {} - dec next index to {}",
+                        logName(), followerId, appendEntriesReply.getLogLastTerm(), followersLastLogTermInLeadersLog,
                         followerLogInformation.getNextIndex());
             }
         }
@@ -425,7 +432,7 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
             // set currentTerm = T, convert to follower (§5.1)
             // This applies to all RPC messages and responses
             if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) {
-                LOG.debug("{}: Term {} in \"{}\" message is greater than leader's term {} - switching to Follower",
+                LOG.info("{}: Term {} in \"{}\" message is greater than leader's term {} - switching to Follower",
                         logName(), rpc.getTerm(), rpc, context.getTermInformation().getCurrentTerm());
 
                 context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
@@ -479,22 +486,15 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
             if (reply.isSuccess()) {
                 if(installSnapshotState.isLastChunk(reply.getChunkIndex())) {
                     //this was the last chunk reply
-                    if(LOG.isDebugEnabled()) {
-                        LOG.debug("{}: InstallSnapshotReply received, " +
-                                "last chunk received, Chunk: {}. Follower: {} Setting nextIndex: {}",
-                                logName(), reply.getChunkIndex(), followerId,
-                            context.getReplicatedLog().getSnapshotIndex() + 1
-                        );
-                    }
 
                     long followerMatchIndex = snapshot.get().getLastIncludedIndex();
                     followerLogInformation.setMatchIndex(followerMatchIndex);
                     followerLogInformation.setNextIndex(followerMatchIndex + 1);
                     followerLogInformation.clearLeaderInstallSnapshotState();
 
-                    LOG.debug("{}: follower: {}, matchIndex set to {}, nextIndex set to {}",
-                        logName(), followerId, followerLogInformation.getMatchIndex(),
-                        followerLogInformation.getNextIndex());
+                    LOG.info("{}: Snapshot successfully installed on follower {} (last chunk {}) - "
+                        + "matchIndex set to {}, nextIndex set to {}", logName(), followerId, reply.getChunkIndex(),
+                        followerLogInformation.getMatchIndex(), followerLogInformation.getNextIndex());
 
                     if (!anyFollowersInstallingSnapshot()) {
                         // once there are no pending followers receiving snapshots
@@ -513,8 +513,7 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                     installSnapshotState.markSendStatus(true);
                 }
             } else {
-                LOG.info("{}: InstallSnapshotReply received sending snapshot chunk failed, Will retry, Chunk: {}",
-                        logName(), reply.getChunkIndex());
+                LOG.warn("{}: Received failed InstallSnapshotReply - will retry: {}", logName(), reply);
 
                 installSnapshotState.markSendStatus(false);
             }
@@ -622,7 +621,7 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 long leaderLastIndex = context.getReplicatedLog().lastIndex();
                 long leaderSnapShotIndex = context.getReplicatedLog().getSnapshotIndex();
 
-                if((!isHeartbeat && LOG.isDebugEnabled()) || LOG.isTraceEnabled()) {
+                if(!isHeartbeat && LOG.isDebugEnabled() || LOG.isTraceEnabled()) {
                     LOG.debug("{}: Checking sendAppendEntries for follower {}: active: {}, followerNextIndex: {}, leaderLastIndex: {}, leaderSnapShotIndex: {}",
                             logName(), followerId, isFollowerActive, followerNextIndex, leaderLastIndex, leaderSnapShotIndex);
                 }
@@ -646,17 +645,22 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                     // if the follower is just not starting and if leader's index is more than followers index
                     // then snapshot should be sent
 
-                    if (LOG.isDebugEnabled()) {
-                        LOG.debug(String.format("%s: InitiateInstallSnapshot to follower: %s, " +
-                                    "follower-nextIndex: %d, leader-snapshot-index: %d,  " +
-                                    "leader-last-index: %d", logName(), followerId,
-                                    followerNextIndex, leaderSnapShotIndex, leaderLastIndex));
-                    }
-
                     // Send heartbeat to follower whenever install snapshot is initiated.
                     sendAppendEntries = true;
                     if (canInstallSnapshot(followerNextIndex)) {
+                        LOG.info("{}: Initiating install snapshot to follower {}: follower nextIndex: {}, leader "
+                                + "snapshotIndex: {}, leader lastIndex: {}, leader log size: {}", logName(), followerId,
+                                followerNextIndex, leaderSnapShotIndex, leaderLastIndex,
+                                context.getReplicatedLog().size());
+
                         initiateCaptureSnapshot(followerId);
+                    } else {
+                        // It doesn't seem like we should ever reach here - most likely indicates sonething is
+                        // wrong.
+                        LOG.info("{}: Follower {} is behind but cannot install snapshot: follower nextIndex: {}, "
+                                + "leader snapshotIndex: {}, leader lastIndex: {}, leader log size: {}", logName(),
+                                followerId, followerNextIndex, leaderSnapShotIndex, leaderLastIndex,
+                                context.getReplicatedLog().size());
                     }
 
                 } else if(sendHeartbeat) {
@@ -745,8 +749,8 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
         // Otherwise send it a snapshot only if the nextIndex is not present in the log but is present
         // in the snapshot
         return nextIndex == -1 ||
-                (!context.getReplicatedLog().isPresent(nextIndex)
-                        && context.getReplicatedLog().isInSnapshot(nextIndex));
+                !context.getReplicatedLog().isPresent(nextIndex)
+                        && context.getReplicatedLog().isInSnapshot(nextIndex);
 
     }
 
index 375b3779b891dd94e1a3b702f9396853478de265..c03152811a98ac38c411b73fb1099460f60e5966 100644 (file)
@@ -132,10 +132,8 @@ public abstract class AbstractRaftActorBehavior implements RaftActorBehavior {
 
         // 1. Reply false if term < currentTerm (§5.1)
         if (appendEntries.getTerm() < currentTerm()) {
-            if(LOG.isDebugEnabled()) {
-                LOG.debug("{}: Cannot append entries because sender term {} is less than {}",
-                        logName(), appendEntries.getTerm(), currentTerm());
-            }
+            LOG.info("{}: Cannot append entries because sender's term {} is less than {}", logName(),
+                    appendEntries.getTerm(), currentTerm());
 
             sender.tell(
                 new AppendEntriesReply(context.getId(), currentTerm(), false,
@@ -213,7 +211,7 @@ public abstract class AbstractRaftActorBehavior implements RaftActorBehavior {
             // more up-to-date.
             if (requestVote.getLastLogTerm() > lastTerm()) {
                 candidateLatest = true;
-            } else if ((requestVote.getLastLogTerm() == lastTerm())
+            } else if (requestVote.getLastLogTerm() == lastTerm()
                     && requestVote.getLastLogIndex() >= lastIndex()) {
                 candidateLatest = true;
             }
@@ -427,7 +425,8 @@ public abstract class AbstractRaftActorBehavior implements RaftActorBehavior {
             return this;
         }
 
-        LOG.info("{} :- Switching from behavior {} to {}", logName(), this.state(), newBehavior.state());
+        LOG.info("{} :- Switching from behavior {} to {}, election term: {}", logName(), this.state(),
+                newBehavior.state(), context.getTermInformation().getCurrentTerm());
         try {
             close();
         } catch (Exception e) {
index 176704f3d377323a962d5a171cfe53c84beb494e..da1c9c14d6f279ccdcdd6cd72025544bc5602cea 100644 (file)
@@ -92,8 +92,8 @@ public class Candidate extends AbstractRaftActorBehavior {
 
         // Some other candidate for the same term became a leader and sent us an append entry
         if(currentTerm() == appendEntries.getTerm()){
-            LOG.debug("{}: New Leader sent an append entry to Candidate for term {} will switch to Follower",
-                    logName(), currentTerm());
+            LOG.info("{}: New Leader {} sent an AppendEntries to Candidate for term {} - will switch to Follower",
+                    logName(), appendEntries.getLeaderId(), currentTerm());
 
             return switchBehavior(new Follower(context));
         }
@@ -116,8 +116,8 @@ public class Candidate extends AbstractRaftActorBehavior {
 
         if (voteCount >= votesRequired) {
             if(context.getLastApplied() < context.getReplicatedLog().lastIndex()) {
-                LOG.debug("{}: LastApplied index {} is behind last index {}", logName(), context.getLastApplied(),
-                        context.getReplicatedLog().lastIndex());
+                LOG.info("{}: LastApplied index {} is behind last index {} - switching to PreLeader",
+                        logName(), context.getLastApplied(), context.getReplicatedLog().lastIndex());
                 return internalSwitchBehavior(RaftState.PreLeader);
             } else {
                 return internalSwitchBehavior(RaftState.Leader);
@@ -160,6 +160,9 @@ public class Candidate extends AbstractRaftActorBehavior {
             // set currentTerm = T, convert to follower (§5.1)
             // This applies to all RPC messages and responses
             if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) {
+                LOG.info("{}: Term {} in \"{}\" message is greater than Candidate's term {} - switching to Follower",
+                        logName(), rpc.getTerm(), rpc, context.getTermInformation().getCurrentTerm());
+
                 context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
 
                 // The raft paper does not say whether or not a Candidate can/should process a RequestVote in
@@ -187,7 +190,7 @@ public class Candidate extends AbstractRaftActorBehavior {
         long newTerm = currentTerm + 1;
         context.getTermInformation().updateAndPersist(newTerm, context.getId());
 
-        LOG.debug("{}: Starting new term {}", logName(), newTerm);
+        LOG.info("{}: Starting new election term {}", logName(), newTerm);
 
         // Request for a vote
         // TODO: Retry request for vote if replies do not arrive in a reasonable
index 5c9c0b749c8dcfcf772b5723bd10c0e6923ee872..006d05e28cbc9f784c3bfdaa4c85faff30112298 100644 (file)
@@ -169,11 +169,12 @@ public class Follower extends AbstractRaftActorBehavior {
             // We found that the log was out of sync so just send a negative
             // reply and return
 
-            LOG.debug("{}: Follower is out-of-sync, so sending negative reply, lastIndex: {}, lastTerm: {}",
-                        logName(), lastIndex, lastTerm());
+            final AppendEntriesReply reply = new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
+                    lastTerm(), context.getPayloadVersion());
 
-            sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
-                    lastTerm(), context.getPayloadVersion()), actor());
+            LOG.info("{}: Follower is out-of-sync so sending negative reply: {}", logName(), reply);
+
+            sender.tell(reply, actor());
             return this;
         }
 
@@ -210,8 +211,7 @@ public class Follower extends AbstractRaftActorBehavior {
 
                     if(!context.getRaftPolicy().applyModificationToStateBeforeConsensus()) {
 
-                        LOG.debug("{}: Removing entries from log starting at {}", logName(),
-                                matchEntry.getIndex());
+                        LOG.info("{}: Removing entries from log starting at {}", logName(), matchEntry.getIndex());
 
                         // Entries do not match so remove all subsequent entries
                         if(!context.getReplicatedLog().removeFromAndPersist(matchEntry.getIndex())) {
@@ -220,7 +220,7 @@ public class Follower extends AbstractRaftActorBehavior {
                             // so we must send back a reply to force a snapshot to completely re-sync the
                             // follower's log and state.
 
-                            LOG.debug("{}: Could not remove entries - sending reply to force snapshot", logName());
+                            LOG.info("{}: Could not remove entries - sending reply to force snapshot", logName());
                             sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
                                     lastTerm(), context.getPayloadVersion(), true), actor());
                             return this;
@@ -316,14 +316,14 @@ public class Follower extends AbstractRaftActorBehavior {
             // an entry at prevLogIndex and this follower has no entries in
             // it's log.
 
-            LOG.debug("{}: The followers log is empty and the senders prevLogIndex is {}",
+            LOG.info("{}: The followers log is empty and the senders prevLogIndex is {}",
                         logName(), appendEntries.getPrevLogIndex());
         } else if (lastIndex > -1 && appendEntries.getPrevLogIndex() != -1 && !prevEntryPresent) {
 
             // The follower's log is out of sync because the Leader's
             // prevLogIndex entry was not found in it's log
 
-            LOG.debug("{}: The log is not empty but the prevLogIndex {} was not found in it - lastIndex: {}, snapshotIndex: {}",
+            LOG.info("{}: The log is not empty but the prevLogIndex {} was not found in it - lastIndex: {}, snapshotIndex: {}",
                         logName(), appendEntries.getPrevLogIndex(), lastIndex, context.getReplicatedLog().getSnapshotIndex());
         } else if (lastIndex > -1 && prevEntryPresent && prevLogTerm != appendEntries.getPrevLogTerm()) {
 
@@ -331,7 +331,7 @@ public class Follower extends AbstractRaftActorBehavior {
             // prevLogIndex entry does exist in the follower's log but it has
             // a different term in it
 
-            LOG.debug("{}: The prevLogIndex {} was found in the log but the term {} is not equal to the append entries " +
+            LOG.info("{}: The prevLogIndex {} was found in the log but the term {} is not equal to the append entries " +
                       "prevLogTerm {} - lastIndex: {}, snapshotIndex: {}", logName(), appendEntries.getPrevLogIndex(),
                       prevLogTerm, appendEntries.getPrevLogTerm(), lastIndex, context.getReplicatedLog().getSnapshotIndex());
         } else if(appendEntries.getPrevLogIndex() == -1 && appendEntries.getPrevLogTerm() == -1
@@ -340,13 +340,13 @@ public class Follower extends AbstractRaftActorBehavior {
             // This append entry comes from a leader who has it's log aggressively trimmed and so does not have
             // the previous entry in it's in-memory journal
 
-            LOG.debug(
+            LOG.info(
                     "{}: Cannot append entries because the replicatedToAllIndex {} does not appear to be in the in-memory journal",
                     logName(), appendEntries.getReplicatedToAllIndex());
         } else if(appendEntries.getPrevLogIndex() == -1 && appendEntries.getPrevLogTerm() == -1
                 && appendEntries.getReplicatedToAllIndex() != -1 && numLogEntries > 0
                 && !isLogEntryPresent(appendEntries.getEntries().get(0).getIndex() - 1)) {
-            LOG.debug(
+            LOG.info(
                     "{}: Cannot append entries because the calculated previousIndex {} was not found in the in-memory journal",
                     logName(), appendEntries.getEntries().get(0).getIndex() - 1);
         } else {
@@ -383,7 +383,7 @@ public class Follower extends AbstractRaftActorBehavior {
         // set currentTerm = T, convert to follower (§5.1)
         // This applies to all RPC messages and responses
         if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) {
-            LOG.debug("{}: Term {} in \"{}\" message is greater than follower's term {} - updating term",
+            LOG.info("{}: Term {} in \"{}\" message is greater than follower's term {} - updating term",
                 logName(), rpc.getTerm(), rpc, context.getTermInformation().getCurrentTerm());
 
             context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
@@ -515,6 +515,9 @@ public class Follower extends AbstractRaftActorBehavior {
 
             if(snapshotTracker.addChunk(installSnapshot.getChunkIndex(), installSnapshot.getData(),
                     installSnapshot.getLastChunkHashCode())){
+
+                LOG.info("{}: Snapshot installed from leader: {}", logName(), installSnapshot.getLeaderId());
+
                 Snapshot snapshot = Snapshot.create(snapshotTracker.getSnapshot(),
                         new ArrayList<ReplicatedLogEntry>(),
                         installSnapshot.getLastIncludedIndex(),