Fix intermittent PreLeaderScenarioTest failure

[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / behaviors / AbstractLeader.java
diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java

index 74e8b2049f1506d148c4c4782c17091266209778..097f0ec677ea52355d99a1256c3a219ccf522869 100644 (file)
--- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java
+++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java
@@ -45,6 +45,7 @@ import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
  import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
  import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
  import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
+import org.opendaylight.controller.cluster.raft.messages.RequestVote;
  import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
  import org.opendaylight.controller.cluster.raft.messages.UnInitializedFollowerSnapshotReply;
  import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload;
@@ -288,12 +289,13 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                  // The follower's log conflicts with leader's log so decrement follower's next index by 1
                  // in an attempt to find where the logs match.
  
-                followerLogInformation.decrNextIndex();
-                updated = true;
+                if (followerLogInformation.decrNextIndex()) {
+                    updated = true;
  
-                log.info("{}: follower {} last log term {} conflicts with the leader's {} - dec next index to {}",
-                        logName(), followerId, appendEntriesReply.getLogLastTerm(), followersLastLogTermInLeadersLog,
-                        followerLogInformation.getNextIndex());
+                    log.info("{}: follower {} last log term {} conflicts with the leader's {} - dec next index to {}",
+                            logName(), followerId, appendEntriesReply.getLogLastTerm(),
+                            followersLastLogTermInLeadersLog, followerLogInformation.getNextIndex());
+                }
              }
          }
  
@@ -447,6 +449,19 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
  
                  context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
  
+                // This is a special case. Normally when stepping down as leader we don't process and reply to the
+                // RaftRPC as per raft. But if we're in the process of transferring leadership and we get a
+                // RequestVote, process the RequestVote before switching to Follower. This enables the requesting
+                // candidate node to be elected the leader faster and avoids us possibly timing out in the Follower
+                // state and starting a new election and grabbing leadership back before the other candidate node can
+                // start a new election due to lack of responses. This case would only occur if there isn't a majority
+                // of other nodes available that can elect the requesting candidate. Since we're transferring
+                // leadership, we should make every effort to get the requesting node elected.
+                if (message instanceof RequestVote && context.getRaftActorLeadershipTransferCohort() != null) {
+                    log.debug("{}: Leadership transfer in progress - processing RequestVote", logName());
+                    super.handleMessage(sender, message);
+                }
+
                  return internalSwitchBehavior(RaftState.Follower);
              }
          }
@@ -530,10 +545,12 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                  installSnapshotState.markSendStatus(false);
              }
  
-            if (wasLastChunk && !context.getSnapshotManager().isCapturing()) {
-                // Since the follower is now caught up try to purge the log.
-                purgeInMemoryLog();
-            } else if (!wasLastChunk && installSnapshotState.canSendNextChunk()) {
+            if (wasLastChunk) {
+                if (!context.getSnapshotManager().isCapturing()) {
+                    // Since the follower is now caught up try to purge the log.
+                    purgeInMemoryLog();
+                }
+            } else {
                  ActorSelection followerActor = context.getPeerActorSelection(followerId);
                  if (followerActor != null) {
                      sendSnapshotChunk(followerActor, followerLogInformation);
@@ -801,6 +818,10 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
                  // Ensure the snapshot bytes are set - this is a no-op.
                  installSnapshotState.setSnapshotBytes(snapshotHolder.get().getSnapshotBytes());
  
+                if (!installSnapshotState.canSendNextChunk()) {
+                    return;
+                }
+
                  byte[] nextSnapshotChunk = installSnapshotState.getNextChunk();
  
                  log.debug("{}: next snapshot chunk size for follower {}: {}", logName(), followerLogInfo.getId(),