From: Tom Pantelis Date: Tue, 2 Aug 2016 02:23:33 +0000 (-0400) Subject: Improve leader election convergence X-Git-Tag: release/boron~30 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=commitdiff_plain;ds=sidebyside;h=refs%2Fchanges%2F69%2F42969%2F3;hp=364229dd715facec8ef8c73d6c60546c5f38b103;p=controller.git Improve leader election convergence When 2 nodes startup with the first node's log behind the second node's, it usually takes several election rounds to converge - I've seen anywhere from 40 s to 3 min, depending on timing. What happens is that the first node goes to Candidate first but it's RequestVote is rejected by the seconds node. Shortly after the seconds node goes to Candidate - the term is higher than the first which causes the first node to go back to Follower. However it doesn't respond to the RequestVote. Then the first node goes to Candidate and the cycle repeats. Eventually, due to the election variance, the seconds node times out first and the second node process the RequestVote and grants it. But it can take more than 10 cycles. We can improve the convergence by allowing a Candidate to process and respond to RequestVote when the sender's term is greater. It still transitions to Follower as per the raft rules. The raft paper does not say whether or not a Candidate can/should process a RequestVote in this case but it seems to make sense. With this change, the first RequestVote sent by the second node is granted and it converges quickly. Change-Id: If9416ddf7bf0dfc1220a169be4174f440626a0dd Signed-off-by: Tom Pantelis --- diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Candidate.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Candidate.java index 4d51922bc2..1205c4bad6 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Candidate.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Candidate.java @@ -157,6 +157,12 @@ public class Candidate extends AbstractRaftActorBehavior { if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) { context.getTermInformation().updateAndPersist(rpc.getTerm(), null); + // The raft paper does not say whether or not a Candidate can/should process a RequestVote in + // this case but doing so gains quicker convergence when the sender's log is more up-to-date. + if (message instanceof RequestVote) { + super.handleMessage(sender, message); + } + return internalSwitchBehavior(RaftState.Follower); } } diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/CandidateTest.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/CandidateTest.java index 6e5c931502..40080a8c72 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/CandidateTest.java +++ b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/CandidateTest.java @@ -313,7 +313,7 @@ public class CandidateTest extends AbstractRaftActorBehaviorTest { context.getTermInformation().update(2, "test"); // Send an unknown message so that the state of the RaftActor remains unchanged - RaftActorBehavior expected = behavior.handleMessage(candidateActor, "unknown"); + behavior.handleMessage(candidateActor, "unknown"); RaftActorBehavior raftBehavior = behavior.handleMessage(candidateActor, appendEntries); @@ -333,6 +333,7 @@ public class CandidateTest extends AbstractRaftActorBehaviorTest { return new MockRaftActorContext("candidate", getSystem(), candidateActor); } + @SuppressWarnings("unchecked") private Map setupPeers(final int count) { Map peerMap = new HashMap<>(); peerActors = new TestActorRef[count]; @@ -349,6 +350,10 @@ public class CandidateTest extends AbstractRaftActorBehaviorTest { protected void assertStateChangesToFollowerWhenRaftRPCHasNewerTerm(final MockRaftActorContext actorContext, final ActorRef actorRef, final RaftRPC rpc) throws Exception { super.assertStateChangesToFollowerWhenRaftRPCHasNewerTerm(actorContext, actorRef, rpc); - assertEquals("New votedFor", null, actorContext.getTermInformation().getVotedFor()); + if(rpc instanceof RequestVote) { + assertEquals("New votedFor", ((RequestVote)rpc).getCandidateId(), actorContext.getTermInformation().getVotedFor()); + } else { + assertEquals("New votedFor", null, actorContext.getTermInformation().getVotedFor()); + } } }