From dac16f0d464eff3325b3800a803e81b303964e4b Mon Sep 17 00:00:00 2001 From: Tom Pantelis Date: Mon, 12 Jun 2017 09:42:38 -0400 Subject: [PATCH 1/1] Bug 8606: Continue leadership transfer on pauseLeader timeout Modified it to continue with leadership transfer if pauseLeader times out instead of aborting. The shard may have a lot of transactions queued up which it can't finish in time but there may still be a follower that is caught up (ie whose matchIndex equals the leader's lastIndex) or would be caught up if leadership transfer continued. Worst case is no follower is available and the "catch up" phase of leadership transfer also times out which would lengthen shut down time but that should be fine. Change-Id: I1ec1ef43bb556e50416bb7239ce3c267265db9b3 Signed-off-by: Tom Pantelis --- .../raft/RaftActorLeadershipTransferCohort.java | 5 +++-- .../RaftActorLeadershipTransferCohortTest.java | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java index 45e497de46..0a22abb965 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohort.java @@ -103,13 +103,14 @@ public class RaftActorLeadershipTransferCohort { raftActor.pauseLeader(new TimedRunnable(context.getConfigParams().getElectionTimeOutInterval(), raftActor) { @Override protected void doRun() { + LOG.debug("{}: pauseLeader successfully completed - doing transfer", raftActor.persistenceId()); doTransfer(); } @Override protected void doCancel() { - LOG.debug("{}: pauseLeader timed out - aborting transfer", raftActor.persistenceId()); - abortTransfer(); + LOG.debug("{}: pauseLeader timed out - continuing with transfer", raftActor.persistenceId()); + doTransfer(); } }); } diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java index ed96159f6b..44b0d2be6b 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java +++ b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorLeadershipTransferCohortTest.java @@ -17,6 +17,7 @@ import com.google.common.base.Function; import org.junit.After; import org.junit.Test; import org.opendaylight.controller.cluster.raft.RaftActorLeadershipTransferCohort.OnComplete; +import org.opendaylight.controller.cluster.raft.behaviors.Leader; import org.opendaylight.controller.cluster.raft.policy.DisableElectionsRaftPolicy; /** @@ -39,6 +40,7 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest { private void setup(String testName) { String persistenceId = factory.generateActorId(testName + "-leader-"); + config.setCustomRaftPolicyImplementationClass(DisableElectionsRaftPolicy.class.getName()); mockRaftActor = factory.createTestActor(MockRaftActor.builder().id(persistenceId).config(config) .pauseLeaderFunction(pauseLeaderFunction).props().withDispatcher(Dispatchers.DefaultDispatcherId()), persistenceId).underlyingActor(); @@ -74,7 +76,6 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest { @Test public void testNotLeaderOnDoTransfer() { - config.setCustomRaftPolicyImplementationClass(DisableElectionsRaftPolicy.class.getName()); setup("testNotLeaderOnDoTransfer"); cohort.doTransfer(); verify(onComplete).onSuccess(mockRaftActor.self()); @@ -90,9 +91,17 @@ public class RaftActorLeadershipTransferCohortTest extends AbstractActorTest { @Test public void testPauseLeaderTimeout() { pauseLeaderFunction = input -> null; - setup("testPauseLeaderTimeout"); + + Leader leader = new Leader(mockRaftActor.getRaftActorContext()) { + @Override + public void transferLeadership(RaftActorLeadershipTransferCohort leadershipTransferCohort) { + leadershipTransferCohort.transferComplete(); + } + }; + mockRaftActor.setCurrentBehavior(leader); + cohort.init(); - verify(onComplete, timeout(2000)).onFailure(mockRaftActor.self()); + verify(onComplete, timeout(2000)).onSuccess(mockRaftActor.self()); } } -- 2.36.6