package org.opendaylight.controller.cluster.raft.behaviors;
import akka.actor.ActorRef;
-import akka.actor.Cancellable;
+import akka.actor.ActorSelection;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
+import com.google.common.base.Stopwatch;
+import java.util.concurrent.TimeUnit;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import org.opendaylight.controller.cluster.raft.FollowerLogInformation;
import org.opendaylight.controller.cluster.raft.RaftActorContext;
-import org.opendaylight.controller.cluster.raft.base.messages.InitiateInstallSnapshot;
-import org.opendaylight.controller.cluster.raft.base.messages.IsolatedLeaderCheck;
-import scala.concurrent.duration.FiniteDuration;
+import org.opendaylight.controller.cluster.raft.RaftActorLeadershipTransferCohort;
+import org.opendaylight.controller.cluster.raft.RaftState;
+import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow;
+import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
/**
- * The behavior of a RaftActor when it is in the Leader state
- * <p/>
+ * The behavior of a RaftActor when it is in the Leader state.
+ *
+ * <p>
* Leaders:
* <ul>
* <li> Upon election: send initial empty AppendEntries RPCs
* respond after entry applied to state machine (§5.3)
* <li> If last log index ≥ nextIndex for a follower: send
* AppendEntries RPC with log entries starting at nextIndex
- * <ul>
* <li> If successful: update nextIndex and matchIndex for
* follower (§5.3)
* <li> If AppendEntries fails because of log inconsistency:
* decrement nextIndex and retry (§5.3)
- * </ul>
- * <li> If there exists an N such that N > commitIndex, a majority
+ * <li> If there exists an N such that N > commitIndex, a majority
* of matchIndex[i] ≥ N, and log[N].term == currentTerm:
* set commitIndex = N (§5.3, §5.4).
+ * </ul>
*/
public class Leader extends AbstractLeader {
- private Cancellable installSnapshotSchedule = null;
- private Cancellable isolatedLeaderCheckSchedule = null;
+ /**
+ * Internal message sent to periodically check if this leader has become isolated and should transition
+ * to {@link IsolatedLeader}.
+ */
+ @VisibleForTesting
+ static final Object ISOLATED_LEADER_CHECK = new Object();
- public Leader(RaftActorContext context) {
- super(context);
+ private final Stopwatch isolatedLeaderCheck = Stopwatch.createStarted();
+ @Nullable private LeadershipTransferContext leadershipTransferContext;
- scheduleInstallSnapshotCheck(context.getConfigParams().getIsolatedCheckInterval());
+ Leader(RaftActorContext context, @Nullable AbstractLeader initializeFromLeader) {
+ super(context, RaftState.Leader, initializeFromLeader);
+ }
- scheduleIsolatedLeaderCheck(
- new FiniteDuration(context.getConfigParams().getHeartBeatInterval().length() * 10,
- context.getConfigParams().getHeartBeatInterval().unit()));
+ public Leader(RaftActorContext context) {
+ this(context, null);
}
- @Override public RaftActorBehavior handleMessage(ActorRef sender, Object originalMessage) {
+ @Override
+ public RaftActorBehavior handleMessage(ActorRef sender, Object originalMessage) {
Preconditions.checkNotNull(sender, "sender should not be null");
- if (originalMessage instanceof IsolatedLeaderCheck) {
+ if (ISOLATED_LEADER_CHECK.equals(originalMessage)) {
if (isLeaderIsolated()) {
- LOG.info("{}: At least {} followers need to be active, Switching {} from Leader to IsolatedLeader",
- context.getId(), minIsolatedLeaderPeerCount, leaderId);
- return switchBehavior(new IsolatedLeader(context));
+ log.warn("{}: At least {} followers need to be active, Switching {} from Leader to IsolatedLeader",
+ context.getId(), getMinIsolatedLeaderPeerCount(), getLeaderId());
+ return internalSwitchBehavior(new IsolatedLeader(context, this));
+ } else {
+ return this;
}
+ } else {
+ return super.handleMessage(sender, originalMessage);
}
-
- return super.handleMessage(sender, originalMessage);
}
- protected void stopInstallSnapshotSchedule() {
- if (installSnapshotSchedule != null && !installSnapshotSchedule.isCancelled()) {
- installSnapshotSchedule.cancel();
+ @Override
+ protected void beforeSendHeartbeat() {
+ if (isolatedLeaderCheck.elapsed(TimeUnit.MILLISECONDS)
+ > context.getConfigParams().getIsolatedCheckIntervalInMillis()) {
+ context.getActor().tell(ISOLATED_LEADER_CHECK, context.getActor());
+ isolatedLeaderCheck.reset().start();
}
- }
- protected void scheduleInstallSnapshotCheck(FiniteDuration interval) {
- if (getFollowerIds().isEmpty()) {
- // Optimization - do not bother scheduling a heartbeat as there are
- // no followers
- return;
+ if (leadershipTransferContext != null && leadershipTransferContext.isExpired(
+ context.getConfigParams().getElectionTimeOutInterval().toMillis())) {
+ log.debug("{}: Leadership transfer expired", logName());
+ leadershipTransferContext = null;
}
+ }
- stopInstallSnapshotSchedule();
+ @Override
+ protected RaftActorBehavior handleAppendEntriesReply(ActorRef sender, AppendEntriesReply appendEntriesReply) {
+ RaftActorBehavior returnBehavior = super.handleAppendEntriesReply(sender, appendEntriesReply);
+ tryToCompleteLeadershipTransfer(appendEntriesReply.getFollowerId());
+ return returnBehavior;
+ }
- // Schedule a message to send append entries to followers that can
- // accept an append entries with some data in it
- installSnapshotSchedule =
- context.getActorSystem().scheduler().scheduleOnce(
- interval,
- context.getActor(), new InitiateInstallSnapshot(),
- context.getActorSystem().dispatcher(), context.getActor());
+ /**
+ * Attempts to transfer leadership to a follower as per the raft paper (§3.10) as follows:
+ * <ul>
+ * <li>Start a timer (Stopwatch).</li>
+ * <li>Send an initial AppendEntries heartbeat to all followers.</li>
+ * <li>On AppendEntriesReply, check if the follower's new match Index matches the leader's last index</li>
+ * <li>If it matches,
+ * <ul>
+ * <li>Send an additional AppendEntries to ensure the follower has applied all its log entries to its state.</li>
+ * <li>Send an ElectionTimeout to the follower to immediately start an election.</li>
+ * <li>Notify {@link RaftActorLeadershipTransferCohort#transferComplete}.</li>
+ * </ul></li>
+ * <li>Otherwise if the election time out period elapses, notify
+ * {@link RaftActorLeadershipTransferCohort#abortTransfer}.</li>
+ * </ul>
+ *
+ * @param leadershipTransferCohort the cohort participating in the leadership transfer
+ */
+ public void transferLeadership(@Nonnull RaftActorLeadershipTransferCohort leadershipTransferCohort) {
+ log.debug("{}: Attempting to transfer leadership", logName());
+
+ leadershipTransferContext = new LeadershipTransferContext(leadershipTransferCohort);
+
+ // Send an immediate heart beat to the followers.
+ sendAppendEntries(0, false);
}
- protected void stopIsolatedLeaderCheckSchedule() {
- if (isolatedLeaderCheckSchedule != null && !isolatedLeaderCheckSchedule.isCancelled()) {
- isolatedLeaderCheckSchedule.cancel();
+ private void tryToCompleteLeadershipTransfer(String followerId) {
+ if (leadershipTransferContext == null) {
+ return;
+ }
+
+ final Optional<String> requestedFollowerIdOptional
+ = leadershipTransferContext.transferCohort.getRequestedFollowerId();
+ if (requestedFollowerIdOptional.isPresent() && !requestedFollowerIdOptional.get().equals(followerId)) {
+ // we want to transfer leadership to specific follower
+ return;
}
- }
- protected void scheduleIsolatedLeaderCheck(FiniteDuration isolatedCheckInterval) {
- isolatedLeaderCheckSchedule = context.getActorSystem().scheduler().schedule(isolatedCheckInterval, isolatedCheckInterval,
- context.getActor(), new IsolatedLeaderCheck(),
- context.getActorSystem().dispatcher(), context.getActor());
+ FollowerLogInformation followerInfo = getFollower(followerId);
+ if (followerInfo == null) {
+ return;
+ }
+
+ long lastIndex = context.getReplicatedLog().lastIndex();
+ boolean isVoting = context.getPeerInfo(followerId).isVoting();
+
+ log.debug("{}: tryToCompleteLeadershipTransfer: followerId: {}, matchIndex: {}, lastIndex: {}, isVoting: {}",
+ logName(), followerId, followerInfo.getMatchIndex(), lastIndex, isVoting);
+
+ if (isVoting && followerInfo.getMatchIndex() == lastIndex) {
+ log.debug("{}: Follower's log matches - sending ElectionTimeout", logName());
+
+ // We can't be sure if the follower has applied all its log entries to its state so send an
+ // additional AppendEntries with the latest commit index.
+ sendAppendEntries(0, false);
+
+ // Now send a TimeoutNow message to the matching follower to immediately start an election.
+ ActorSelection followerActor = context.getPeerActorSelection(followerId);
+ followerActor.tell(TimeoutNow.INSTANCE, context.getActor());
+
+ log.debug("{}: Leader transfer complete", logName());
+
+ leadershipTransferContext.transferCohort.transferComplete();
+ leadershipTransferContext = null;
+ }
}
@Override
- public void close() throws Exception {
- stopInstallSnapshotSchedule();
- stopIsolatedLeaderCheckSchedule();
+ public void close() {
+ if (leadershipTransferContext != null) {
+ LeadershipTransferContext localLeadershipTransferContext = leadershipTransferContext;
+ leadershipTransferContext = null;
+ localLeadershipTransferContext.transferCohort.abortTransfer();
+ }
+
super.close();
}
void markFollowerInActive(String followerId) {
getFollower(followerId).markFollowerInActive();
}
+
+ private static class LeadershipTransferContext {
+ RaftActorLeadershipTransferCohort transferCohort;
+ Stopwatch timer = Stopwatch.createStarted();
+
+ LeadershipTransferContext(RaftActorLeadershipTransferCohort transferCohort) {
+ this.transferCohort = transferCohort;
+ }
+
+ boolean isExpired(long timeout) {
+ if (timer.elapsed(TimeUnit.MILLISECONDS) >= timeout) {
+ transferCohort.abortTransfer();
+ return true;
+ }
+
+ return false;
+ }
+ }
}