BUG-8618: introduce RaftActor.unpauseLeader()
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / RaftActorLeadershipTransferCohort.java
index 85980e2ca371fe752d1743cc0f3aab6df186628a..5b27d50130a980891dbbbc2f072d0705fa3b8c9b 100644 (file)
@@ -16,6 +16,7 @@ import com.google.common.base.Stopwatch;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
+import javax.annotation.Nullable;
 import org.opendaylight.controller.cluster.raft.base.messages.LeaderTransitioning;
 import org.opendaylight.controller.cluster.raft.behaviors.Leader;
 import org.opendaylight.controller.cluster.raft.behaviors.RaftActorBehavior;
@@ -26,6 +27,7 @@ import scala.concurrent.duration.FiniteDuration;
 /**
  * A raft actor support class that participates in leadership transfer. An instance is created upon
  * initialization of leadership transfer.
+ *
  * <p>
  * The transfer process is as follows:
  * <ol>
@@ -35,13 +37,14 @@ import scala.concurrent.duration.FiniteDuration;
  *     their local RoleChangeNotifiers.</li>
  * <li>Call {@link RaftActor#pauseLeader} passing this RaftActorLeadershipTransferCohort
  *     instance. This allows derived classes to perform work prior to transferring leadership.</li>
- * <li>When the pause is complete, the {@link #run} method is called which in turn calls
- *     {@link Leader#transferLeadership}.</li>
+ * <li>When the pause is complete, the run method is called which in turn calls
+ *     {@link Leader#transferLeadership(RaftActorLeadershipTransferCohort)}.</li>
  * <li>The Leader calls {@link #transferComplete} on successful completion.</li>
  * <li>Wait a short period of time for the new leader to be elected to give the derived class a chance to
  *     possibly complete work that was suspended while we were transferring.</li>
  * <li>On notification of the new leader from the RaftActor or on time out, notify {@link OnComplete} callbacks.</li>
  * </ol>
+ *
  * <p>
  * NOTE: All methods on this class must be called on the actor's thread dispatcher as they may access/modify
  * internal state.
@@ -51,15 +54,31 @@ import scala.concurrent.duration.FiniteDuration;
 public class RaftActorLeadershipTransferCohort {
     private static final Logger LOG = LoggerFactory.getLogger(RaftActorLeadershipTransferCohort.class);
 
-    private final RaftActor raftActor;
-    private Cancellable newLeaderTimer;
+    static final long USE_DEFAULT_LEADER_TIMEOUT = -1;
+
     private final List<OnComplete> onCompleteCallbacks = new ArrayList<>();
-    private long newLeaderTimeoutInMillis = 2000;
     private final Stopwatch transferTimer = Stopwatch.createUnstarted();
+    private final RaftActor raftActor;
+    private final String requestedFollowerId;
+
+    private long newLeaderTimeoutInMillis = 2000;
+    private Cancellable newLeaderTimer;
     private boolean isTransferring;
 
-    RaftActorLeadershipTransferCohort(RaftActor raftActor) {
+    RaftActorLeadershipTransferCohort(final RaftActor raftActor) {
+        this(raftActor, null);
+    }
+
+    RaftActorLeadershipTransferCohort(final RaftActor raftActor, @Nullable final String requestedFollowerId) {
         this.raftActor = raftActor;
+        this.requestedFollowerId = requestedFollowerId;
+
+        // We'll wait an election timeout period for a new leader to be elected plus some cushion to take into
+        // account the variance.
+        final long electionTimeout = raftActor.getRaftActorContext().getConfigParams()
+                .getElectionTimeOutInterval().toMillis();
+        final int variance = raftActor.getRaftActorContext().getConfigParams().getElectionTimeVariance();
+        newLeaderTimeoutInMillis = 2 * (electionTimeout + variance);
     }
 
     void init() {
@@ -69,28 +88,29 @@ public class RaftActorLeadershipTransferCohort {
         transferTimer.start();
 
         Optional<ActorRef> roleChangeNotifier = raftActor.getRoleChangeNotifier();
-        if(roleChangeNotifier.isPresent()) {
+        if (roleChangeNotifier.isPresent()) {
             roleChangeNotifier.get().tell(raftActor.newLeaderStateChanged(context.getId(), null,
                     currentBehavior.getLeaderPayloadVersion()), raftActor.self());
         }
 
-        for(String peerId: context.getPeerIds()) {
+        for (String peerId: context.getPeerIds()) {
             ActorSelection followerActor = context.getPeerActorSelection(peerId);
-            if(followerActor != null) {
-                followerActor.tell(LeaderTransitioning.INSTANCE, context.getActor());
+            if (followerActor != null) {
+                followerActor.tell(new LeaderTransitioning(context.getId()), context.getActor());
             }
         }
 
         raftActor.pauseLeader(new TimedRunnable(context.getConfigParams().getElectionTimeOutInterval(), raftActor) {
             @Override
             protected void doRun() {
+                LOG.debug("{}: pauseLeader successfully completed - doing transfer", raftActor.persistenceId());
                 doTransfer();
             }
 
             @Override
             protected void doCancel() {
-                LOG.debug("{}: pauseLeader timed out - aborting transfer", raftActor.persistenceId());
-                abortTransfer();
+                LOG.debug("{}: pauseLeader timed out - continuing with transfer", raftActor.persistenceId());
+                doTransfer();
             }
         });
     }
@@ -102,7 +122,7 @@ public class RaftActorLeadershipTransferCohort {
     void doTransfer() {
         RaftActorBehavior behavior = raftActor.getCurrentBehavior();
         // Sanity check...
-        if(behavior instanceof Leader) {
+        if (behavior instanceof Leader) {
             isTransferring = true;
             ((Leader)behavior).transferLeadership(this);
         } else {
@@ -131,42 +151,39 @@ public class RaftActorLeadershipTransferCohort {
         // and convert to follower due to higher term. We should then get an AppendEntries heart
         // beat with the new leader id.
 
-        // Add a timer in case we don't get a leader change - 2 sec should be plenty of time if a new
-        // leader is elected. Note: the Runnable is sent as a message to the raftActor which executes it
-        // safely run on the actor's thread dispatcher.
+        // Add a timer in case we don't get a leader change. Note: the Runnable is sent as a message to the raftActor
+        // which executes it safely run on the actor's thread dispatcher.
         FiniteDuration timeout = FiniteDuration.create(newLeaderTimeoutInMillis, TimeUnit.MILLISECONDS);
         newLeaderTimer = raftActor.getContext().system().scheduler().scheduleOnce(timeout, raftActor.self(),
-                new Runnable() {
-                    @Override
-                    public void run() {
-                        LOG.debug("{}: leader not elected in time", raftActor.persistenceId());
-                        finish(true);
-                    }
-                }, raftActor.getContext().system().dispatcher(), raftActor.self());
+            (Runnable) () -> {
+                LOG.debug("{}: leader not elected in time", raftActor.persistenceId());
+                finish(true);
+            }, raftActor.getContext().system().dispatcher(), raftActor.self());
     }
 
-    void onNewLeader(String newLeader) {
-        if(newLeader != null && newLeaderTimer != null) {
+    void onNewLeader(final String newLeader) {
+        if (newLeader != null && newLeaderTimer != null) {
             LOG.debug("{}: leader changed to {}", raftActor.persistenceId(), newLeader);
             newLeaderTimer.cancel();
             finish(true);
         }
     }
 
-    private void finish(boolean success) {
+    private void finish(final boolean success) {
         isTransferring = false;
-        if(transferTimer.isRunning()) {
+        if (transferTimer.isRunning()) {
             transferTimer.stop();
-            if(success) {
+            if (success) {
                 LOG.info("{}: Successfully transferred leadership to {} in {}", raftActor.persistenceId(),
                         raftActor.getLeaderId(), transferTimer);
             } else {
                 LOG.warn("{}: Failed to transfer leadership in {}", raftActor.persistenceId(), transferTimer);
+                raftActor.unpauseLeader();
             }
         }
 
-        for(OnComplete onComplete: onCompleteCallbacks) {
-            if(success) {
+        for (OnComplete onComplete: onCompleteCallbacks) {
+            if (success) {
                 onComplete.onSuccess(raftActor.self());
             } else {
                 onComplete.onFailure(raftActor.self());
@@ -174,7 +191,7 @@ public class RaftActorLeadershipTransferCohort {
         }
     }
 
-    void addOnComplete(OnComplete onComplete) {
+    void addOnComplete(final OnComplete onComplete) {
         onCompleteCallbacks.add(onComplete);
     }
 
@@ -182,13 +199,19 @@ public class RaftActorLeadershipTransferCohort {
         return isTransferring;
     }
 
-    @VisibleForTesting
-    void setNewLeaderTimeoutInMillis(long newLeaderTimeoutInMillis) {
-        this.newLeaderTimeoutInMillis = newLeaderTimeoutInMillis;
+    void setNewLeaderTimeoutInMillis(final long newLeaderTimeoutInMillis) {
+        if (newLeaderTimeoutInMillis != USE_DEFAULT_LEADER_TIMEOUT) {
+            this.newLeaderTimeoutInMillis = newLeaderTimeoutInMillis;
+        }
+    }
+
+    public Optional<String> getRequestedFollowerId() {
+        return Optional.fromNullable(requestedFollowerId);
     }
 
     interface OnComplete {
         void onSuccess(ActorRef raftActorRef);
+
         void onFailure(ActorRef raftActorRef);
     }
 }