Fix intermittent PreLeaderScenarioTest failure
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / behaviors / AbstractLeader.java
index 74e8b2049f1506d148c4c4782c17091266209778..097f0ec677ea52355d99a1256c3a219ccf522869 100644 (file)
@@ -45,6 +45,7 @@ import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
 import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
+import org.opendaylight.controller.cluster.raft.messages.RequestVote;
 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
 import org.opendaylight.controller.cluster.raft.messages.UnInitializedFollowerSnapshotReply;
 import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload;
@@ -288,12 +289,13 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 // The follower's log conflicts with leader's log so decrement follower's next index by 1
                 // in an attempt to find where the logs match.
 
-                followerLogInformation.decrNextIndex();
-                updated = true;
+                if (followerLogInformation.decrNextIndex()) {
+                    updated = true;
 
-                log.info("{}: follower {} last log term {} conflicts with the leader's {} - dec next index to {}",
-                        logName(), followerId, appendEntriesReply.getLogLastTerm(), followersLastLogTermInLeadersLog,
-                        followerLogInformation.getNextIndex());
+                    log.info("{}: follower {} last log term {} conflicts with the leader's {} - dec next index to {}",
+                            logName(), followerId, appendEntriesReply.getLogLastTerm(),
+                            followersLastLogTermInLeadersLog, followerLogInformation.getNextIndex());
+                }
             }
         }
 
@@ -447,6 +449,19 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
 
                 context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
 
+                // This is a special case. Normally when stepping down as leader we don't process and reply to the
+                // RaftRPC as per raft. But if we're in the process of transferring leadership and we get a
+                // RequestVote, process the RequestVote before switching to Follower. This enables the requesting
+                // candidate node to be elected the leader faster and avoids us possibly timing out in the Follower
+                // state and starting a new election and grabbing leadership back before the other candidate node can
+                // start a new election due to lack of responses. This case would only occur if there isn't a majority
+                // of other nodes available that can elect the requesting candidate. Since we're transferring
+                // leadership, we should make every effort to get the requesting node elected.
+                if (message instanceof RequestVote && context.getRaftActorLeadershipTransferCohort() != null) {
+                    log.debug("{}: Leadership transfer in progress - processing RequestVote", logName());
+                    super.handleMessage(sender, message);
+                }
+
                 return internalSwitchBehavior(RaftState.Follower);
             }
         }
@@ -530,10 +545,12 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 installSnapshotState.markSendStatus(false);
             }
 
-            if (wasLastChunk && !context.getSnapshotManager().isCapturing()) {
-                // Since the follower is now caught up try to purge the log.
-                purgeInMemoryLog();
-            } else if (!wasLastChunk && installSnapshotState.canSendNextChunk()) {
+            if (wasLastChunk) {
+                if (!context.getSnapshotManager().isCapturing()) {
+                    // Since the follower is now caught up try to purge the log.
+                    purgeInMemoryLog();
+                }
+            } else {
                 ActorSelection followerActor = context.getPeerActorSelection(followerId);
                 if (followerActor != null) {
                     sendSnapshotChunk(followerActor, followerLogInformation);
@@ -801,6 +818,10 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                 // Ensure the snapshot bytes are set - this is a no-op.
                 installSnapshotState.setSnapshotBytes(snapshotHolder.get().getSnapshotBytes());
 
+                if (!installSnapshotState.canSendNextChunk()) {
+                    return;
+                }
+
                 byte[] nextSnapshotChunk = installSnapshotState.getNextChunk();
 
                 log.debug("{}: next snapshot chunk size for follower {}: {}", logName(), followerLogInfo.getId(),