BUG-8618: improve debug logs
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / behaviors / AbstractLeader.java
index 4382cfec3237075087837b5f1402cadab249188f..097f0ec677ea52355d99a1256c3a219ccf522869 100644 (file)
@@ -45,6 +45,7 @@ import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
 import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
+import org.opendaylight.controller.cluster.raft.messages.RequestVote;
 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
 import org.opendaylight.controller.cluster.raft.messages.UnInitializedFollowerSnapshotReply;
 import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload;
@@ -448,6 +449,19 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
 
                 context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
 
+                // This is a special case. Normally when stepping down as leader we don't process and reply to the
+                // RaftRPC as per raft. But if we're in the process of transferring leadership and we get a
+                // RequestVote, process the RequestVote before switching to Follower. This enables the requesting
+                // candidate node to be elected the leader faster and avoids us possibly timing out in the Follower
+                // state and starting a new election and grabbing leadership back before the other candidate node can
+                // start a new election due to lack of responses. This case would only occur if there isn't a majority
+                // of other nodes available that can elect the requesting candidate. Since we're transferring
+                // leadership, we should make every effort to get the requesting node elected.
+                if (message instanceof RequestVote && context.getRaftActorLeadershipTransferCohort() != null) {
+                    log.debug("{}: Leadership transfer in progress - processing RequestVote", logName());
+                    super.handleMessage(sender, message);
+                }
+
                 return internalSwitchBehavior(RaftState.Follower);
             }
         }
@@ -531,10 +545,12 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 installSnapshotState.markSendStatus(false);
             }
 
-            if (wasLastChunk && !context.getSnapshotManager().isCapturing()) {
-                // Since the follower is now caught up try to purge the log.
-                purgeInMemoryLog();
-            } else if (!wasLastChunk && installSnapshotState.canSendNextChunk()) {
+            if (wasLastChunk) {
+                if (!context.getSnapshotManager().isCapturing()) {
+                    // Since the follower is now caught up try to purge the log.
+                    purgeInMemoryLog();
+                }
+            } else {
                 ActorSelection followerActor = context.getPeerActorSelection(followerId);
                 if (followerActor != null) {
                     sendSnapshotChunk(followerActor, followerLogInformation);
@@ -802,6 +818,10 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 // Ensure the snapshot bytes are set - this is a no-op.
                 installSnapshotState.setSnapshotBytes(snapshotHolder.get().getSnapshotBytes());
 
+                if (!installSnapshotState.canSendNextChunk()) {
+                    return;
+                }
+
                 byte[] nextSnapshotChunk = installSnapshotState.getNextChunk();
 
                 log.debug("{}: next snapshot chunk size for follower {}: {}", logName(), followerLogInfo.getId(),