From: Tom Pantelis Date: Mon, 12 Jun 2017 13:42:38 +0000 (-0400) Subject: Bug 8606: Continue leadership transfer on pauseLeader timeout X-Git-Tag: release/nitrogen~118 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=commitdiff_plain;h=dac16f0d464eff3325b3800a803e81b303964e4b;hp=9dd99568ffecade36887a5d4266e4acfe8c70ff4 Bug 8606: Continue leadership transfer on pauseLeader timeout Modified it to continue with leadership transfer if pauseLeader times out instead of aborting. The shard may have a lot of transactions queued up which it can't finish in time but there may still be a follower that is caught up (ie whose matchIndex equals the leader's lastIndex) or would be caught up if leadership transfer continued. Worst case is no follower is available and the "catch up" phase of leadership transfer also times out which would lengthen shut down time but that should be fine. Change-Id: I1ec1ef43bb556e50416bb7239ce3c267265db9b3 Signed-off-by: Tom Pantelis --- diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java index 45e497de46..0a22abb965 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java @@ -103,13 +103,14 @@ public class RaftActorLeadershipTransferCohort { raftActor.pauseLeader(new TimedRunnable(context.getConfigParams().getElectionTimeOutInterval(), raftActor) { @Override protected void doRun() { + LOG.debug("{}: pauseLeader successfully completed - doing transfer", raftActor.persistenceId()); doTransfer(); } @Override protected void doCancel() { - LOG.debug("{}: pauseLeader timed out - aborting transfer", raftActor.persistenceId()); - abortTransfer(); + LOG.debug("{}: pauseLeader timed out - continuing with transfer", raftActor.persistenceId()); + doTransfer(); } }); } diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java index ed96159f6b..44b0d2be6b 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java +++ b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java @@ -17,6 +17,7 @@ import com.google.common.base.Function; import org.junit.After; import org.junit.Test; import org.opendaylight.controller.cluster.raft.RaftActorLeadershipTransferCohort.OnComplete; +import org.opendaylight.controller.cluster.raft.behaviors.Leader; import org.opendaylight.controller.cluster.raft.policy.DisableElectionsRaftPolicy; /** @@ -39,6 +40,7 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest { private void setup(String testName) { String persistenceId = factory.generateActorId(testName + "-leader-"); + config.setCustomRaftPolicyImplementationClass(DisableElectionsRaftPolicy.class.getName()); mockRaftActor = factory.createTestActor(MockRaftActor.builder().id(persistenceId).config(config) .pauseLeaderFunction(pauseLeaderFunction).props().withDispatcher(Dispatchers.DefaultDispatcherId()), persistenceId).underlyingActor(); @@ -74,7 +76,6 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest { @Test public void testNotLeaderOnDoTransfer() { - config.setCustomRaftPolicyImplementationClass(DisableElectionsRaftPolicy.class.getName()); setup("testNotLeaderOnDoTransfer"); cohort.doTransfer(); verify(onComplete).onSuccess(mockRaftActor.self()); @@ -90,9 +91,17 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest { @Test public void testPauseLeaderTimeout() { pauseLeaderFunction = input -> null; - setup("testPauseLeaderTimeout"); + + Leader leader = new Leader(mockRaftActor.getRaftActorContext()) { + @Override + public void transferLeadership(RaftActorLeadershipTransferCohort leadershipTransferCohort) { + leadershipTransferCohort.transferComplete(); + } + }; + mockRaftActor.setCurrentBehavior(leader); + cohort.init(); - verify(onComplete, timeout(2000)).onFailure(mockRaftActor.self()); + verify(onComplete, timeout(2000)).onSuccess(mockRaftActor.self()); } }