Bug 8606: Continue leadership transfer on pauseLeader timeout 94/58794/2
authorTom Pantelis <tompantelis@gmail.com>
Mon, 12 Jun 2017 13:42:38 +0000 (09:42 -0400)
committerRobert Varga <nite@hq.sk>
Tue, 13 Jun 2017 10:19:25 +0000 (10:19 +0000)
Modified it to continue with leadership transfer if pauseLeader times out
instead of aborting. The shard may have a lot of transactions queued up
which it can't finish in time but there may still be a follower that is
caught up (ie whose matchIndex equals the leader's lastIndex) or would be
caught up if leadership transfer continued. Worst case is no follower is
available and the "catch up" phase of leadership transfer also times out
which would lengthen shut down time but that should be fine.

Change-Id: I1ec1ef43bb556e50416bb7239ce3c267265db9b3
Signed-off-by: Tom Pantelis <tompantelis@gmail.com>
(cherry picked from commit dac16f0d464eff3325b3800a803e81b303964e4b)

opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java

index 45e497de46cc4bd2c29a3ffd0fd403df1f0d902c..0a22abb9657ff5813ce27a34736e78e5b7e3bc3a 100644 (file)
@@ -103,13 +103,14 @@ public class RaftActorLeadershipTransferCohort {
         raftActor.pauseLeader(new TimedRunnable(context.getConfigParams().getElectionTimeOutInterval(), raftActor) {
             @Override
             protected void doRun() {
+                LOG.debug("{}: pauseLeader successfully completed - doing transfer", raftActor.persistenceId());
                 doTransfer();
             }
 
             @Override
             protected void doCancel() {
-                LOG.debug("{}: pauseLeader timed out - aborting transfer", raftActor.persistenceId());
-                abortTransfer();
+                LOG.debug("{}: pauseLeader timed out - continuing with transfer", raftActor.persistenceId());
+                doTransfer();
             }
         });
     }
index ed96159f6b6dbfb43580e3d83cf6a3297a99a99b..44b0d2be6b5ca86a7135a297f4ef56428d7c081b 100644 (file)
@@ -17,6 +17,7 @@ import com.google.common.base.Function;
 import org.junit.After;
 import org.junit.Test;
 import org.opendaylight.controller.cluster.raft.RaftActorLeadershipTransferCohort.OnComplete;
+import org.opendaylight.controller.cluster.raft.behaviors.Leader;
 import org.opendaylight.controller.cluster.raft.policy.DisableElectionsRaftPolicy;
 
 /**
@@ -39,6 +40,7 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest {
 
     private void setup(String testName) {
         String persistenceId = factory.generateActorId(testName + "-leader-");
+        config.setCustomRaftPolicyImplementationClass(DisableElectionsRaftPolicy.class.getName());
         mockRaftActor = factory.<MockRaftActor>createTestActor(MockRaftActor.builder().id(persistenceId).config(config)
                 .pauseLeaderFunction(pauseLeaderFunction).props().withDispatcher(Dispatchers.DefaultDispatcherId()),
                 persistenceId).underlyingActor();
@@ -74,7 +76,6 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest {
 
     @Test
     public void testNotLeaderOnDoTransfer() {
-        config.setCustomRaftPolicyImplementationClass(DisableElectionsRaftPolicy.class.getName());
         setup("testNotLeaderOnDoTransfer");
         cohort.doTransfer();
         verify(onComplete).onSuccess(mockRaftActor.self());
@@ -90,9 +91,17 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest {
     @Test
     public void testPauseLeaderTimeout() {
         pauseLeaderFunction = input -> null;
-
         setup("testPauseLeaderTimeout");
+
+        Leader leader = new Leader(mockRaftActor.getRaftActorContext()) {
+            @Override
+            public void transferLeadership(RaftActorLeadershipTransferCohort leadershipTransferCohort) {
+                leadershipTransferCohort.transferComplete();
+            }
+        };
+        mockRaftActor.setCurrentBehavior(leader);
+
         cohort.init();
-        verify(onComplete, timeout(2000)).onFailure(mockRaftActor.self());
+        verify(onComplete, timeout(2000)).onSuccess(mockRaftActor.self());
     }
 }