X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-akka-raft%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fraft%2Fbehaviors%2FLeader.java;h=3534ac5cf142eda058ceb1e39b90d311b096b20b;hp=d83362b58081c0e4c4576a848bf10ca29d8fc7da;hb=refs%2Fchanges%2F09%2F83009%2F6;hpb=b17a51ecb983331f0e521e40f9dd2474f268de13 diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java index d83362b580..3534ac5cf1 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java @@ -5,49 +5,29 @@ * terms of the Eclipse Public License v1.0 which accompanies this distribution, * and is available at http://www.eclipse.org/legal/epl-v10.html */ - package org.opendaylight.controller.cluster.raft.behaviors; +import static java.util.Objects.requireNonNull; + import akka.actor.ActorRef; import akka.actor.ActorSelection; -import akka.actor.Cancellable; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; -import com.google.common.base.Preconditions; -import com.google.protobuf.ByteString; -import org.opendaylight.controller.cluster.raft.ClientRequestTracker; -import org.opendaylight.controller.cluster.raft.ClientRequestTrackerImpl; +import com.google.common.base.Stopwatch; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import org.eclipse.jdt.annotation.NonNull; +import org.eclipse.jdt.annotation.Nullable; import org.opendaylight.controller.cluster.raft.FollowerLogInformation; -import org.opendaylight.controller.cluster.raft.FollowerLogInformationImpl; import org.opendaylight.controller.cluster.raft.RaftActorContext; +import org.opendaylight.controller.cluster.raft.RaftActorLeadershipTransferCohort; import org.opendaylight.controller.cluster.raft.RaftState; -import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry; -import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot; -import org.opendaylight.controller.cluster.raft.base.messages.InitiateInstallSnapshot; -import org.opendaylight.controller.cluster.raft.base.messages.Replicate; -import org.opendaylight.controller.cluster.raft.base.messages.SendHeartBeat; -import org.opendaylight.controller.cluster.raft.base.messages.SendInstallSnapshot; -import org.opendaylight.controller.cluster.raft.messages.AppendEntries; +import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow; import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply; -import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot; -import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply; -import org.opendaylight.controller.cluster.raft.messages.RaftRPC; -import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply; -import scala.concurrent.duration.FiniteDuration; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; /** - * The behavior of a RaftActor when it is in the Leader state - *

+ * The behavior of a RaftActor when it is in the Leader state. + * + *

* Leaders: *

*/ -public class Leader extends AbstractRaftActorBehavior { - - - protected final Map followerToLog = new HashMap<>(); - protected final Map mapFollowerToSnapshot = new HashMap<>(); - - private final Set followers; - - private Cancellable heartbeatSchedule = null; - private Cancellable installSnapshotSchedule = null; - - private List trackerList = new ArrayList<>(); - - private final int minReplicationCount; - - private Optional snapshot; - - public Leader(RaftActorContext context) { - super(context); - - followers = context.getPeerAddresses().keySet(); - - for (String followerId : followers) { - FollowerLogInformation followerLogInformation = - new FollowerLogInformationImpl(followerId, - new AtomicLong(context.getCommitIndex()), - new AtomicLong(-1), - context.getConfigParams().getElectionTimeOutInterval()); - - followerToLog.put(followerId, followerLogInformation); - } - - if(LOG.isDebugEnabled()) { - LOG.debug("Election:Leader has following peers: {}", followers); - } - - if (followers.size() > 0) { - minReplicationCount = (followers.size() + 1) / 2 + 1; - } else { - minReplicationCount = 0; - } - - snapshot = Optional.absent(); - - // Immediately schedule a heartbeat - // Upon election: send initial empty AppendEntries RPCs - // (heartbeat) to each server; repeat during idle periods to - // prevent election timeouts (§5.2) - scheduleHeartBeat(new FiniteDuration(0, TimeUnit.SECONDS)); - - scheduleInstallSnapshotCheck( - new FiniteDuration(context.getConfigParams().getHeartBeatInterval().length() * 1000, - context.getConfigParams().getHeartBeatInterval().unit()) - ); - - } - - private Optional getSnapshot() { - return snapshot; - } - +public class Leader extends AbstractLeader { + /** + * Internal message sent to periodically check if this leader has become isolated and should transition + * to {@link IsolatedLeader}. + */ @VisibleForTesting - void setSnapshot(Optional snapshot) { - this.snapshot = snapshot; - } + static final Object ISOLATED_LEADER_CHECK = new Object(); - @Override protected RaftActorBehavior handleAppendEntries(ActorRef sender, - AppendEntries appendEntries) { - - if(LOG.isDebugEnabled()) { - LOG.debug(appendEntries.toString()); - } + private final Stopwatch isolatedLeaderCheck = Stopwatch.createStarted(); + private @Nullable LeadershipTransferContext leadershipTransferContext; - return this; + Leader(final RaftActorContext context, @Nullable final AbstractLeader initializeFromLeader) { + super(context, RaftState.Leader, initializeFromLeader); } - @Override protected RaftActorBehavior handleAppendEntriesReply(ActorRef sender, - AppendEntriesReply appendEntriesReply) { - - if(! appendEntriesReply.isSuccess()) { - if(LOG.isDebugEnabled()) { - LOG.debug(appendEntriesReply.toString()); - } - } - - // Update the FollowerLogInformation - String followerId = appendEntriesReply.getFollowerId(); - FollowerLogInformation followerLogInformation = - followerToLog.get(followerId); - - if(followerLogInformation == null){ - LOG.error("Unknown follower {}", followerId); - return this; - } - - followerLogInformation.markFollowerActive(); - - if (appendEntriesReply.isSuccess()) { - followerLogInformation - .setMatchIndex(appendEntriesReply.getLogLastIndex()); - followerLogInformation - .setNextIndex(appendEntriesReply.getLogLastIndex() + 1); - } else { - - // TODO: When we find that the follower is out of sync with the - // Leader we simply decrement that followers next index by 1. - // Would it be possible to do better than this? The RAFT spec - // does not explicitly deal with it but may be something for us to - // think about - - followerLogInformation.decrNextIndex(); - } - - // Now figure out if this reply warrants a change in the commitIndex - // If there exists an N such that N > commitIndex, a majority - // of matchIndex[i] ≥ N, and log[N].term == currentTerm: - // set commitIndex = N (§5.3, §5.4). - for (long N = context.getCommitIndex() + 1; ; N++) { - int replicatedCount = 1; - - for (FollowerLogInformation info : followerToLog.values()) { - if (info.getMatchIndex().get() >= N) { - replicatedCount++; - } - } - - if (replicatedCount >= minReplicationCount) { - ReplicatedLogEntry replicatedLogEntry = - context.getReplicatedLog().get(N); - if (replicatedLogEntry != null - && replicatedLogEntry.getTerm() - == currentTerm()) { - context.setCommitIndex(N); - } - } else { - break; - } - } - - // Apply the change to the state machine - if (context.getCommitIndex() > context.getLastApplied()) { - applyLogToStateMachine(context.getCommitIndex()); - } - - return this; - } - - protected ClientRequestTracker removeClientRequestTracker(long logIndex) { - - ClientRequestTracker toRemove = findClientRequestTracker(logIndex); - if(toRemove != null) { - trackerList.remove(toRemove); - } - - return toRemove; + public Leader(final RaftActorContext context) { + this(context, null); } - protected ClientRequestTracker findClientRequestTracker(long logIndex) { - for (ClientRequestTracker tracker : trackerList) { - if (tracker.getIndex() == logIndex) { - return tracker; - } - } - - return null; - } - - @Override protected RaftActorBehavior handleRequestVoteReply(ActorRef sender, - RequestVoteReply requestVoteReply) { - return this; - } - - @Override public RaftState state() { - return RaftState.Leader; - } - - @Override public RaftActorBehavior handleMessage(ActorRef sender, Object originalMessage) { - Preconditions.checkNotNull(sender, "sender should not be null"); - - Object message = fromSerializableMessage(originalMessage); - - if (message instanceof RaftRPC) { - RaftRPC rpc = (RaftRPC) message; - // If RPC request or response contains term T > currentTerm: - // set currentTerm = T, convert to follower (§5.1) - // This applies to all RPC messages and responses - if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) { - context.getTermInformation().updateAndPersist(rpc.getTerm(), null); + @Override + public RaftActorBehavior handleMessage(final ActorRef sender, final Object originalMessage) { + requireNonNull(sender, "sender should not be null"); - return switchBehavior(new Follower(context)); - } - } - - try { - if (message instanceof SendHeartBeat) { - sendHeartBeat(); - return this; - - } else if(message instanceof InitiateInstallSnapshot) { - installSnapshotIfNeeded(); - - } else if(message instanceof SendInstallSnapshot) { - // received from RaftActor - setSnapshot(Optional.of(((SendInstallSnapshot) message).getSnapshot())); - sendInstallSnapshot(); - - } else if (message instanceof Replicate) { - replicate((Replicate) message); - - } else if (message instanceof InstallSnapshotReply){ - handleInstallSnapshotReply( - (InstallSnapshotReply) message); - } - } finally { - scheduleHeartBeat(context.getConfigParams().getHeartBeatInterval()); - } - - return super.handleMessage(sender, message); - } - - private void handleInstallSnapshotReply(InstallSnapshotReply reply) { - String followerId = reply.getFollowerId(); - FollowerToSnapshot followerToSnapshot = mapFollowerToSnapshot.get(followerId); - FollowerLogInformation followerLogInformation = followerToLog.get(followerId); - followerLogInformation.markFollowerActive(); - - if (followerToSnapshot != null && - followerToSnapshot.getChunkIndex() == reply.getChunkIndex()) { - - if (reply.isSuccess()) { - if(followerToSnapshot.isLastChunk(reply.getChunkIndex())) { - //this was the last chunk reply - if(LOG.isDebugEnabled()) { - LOG.debug("InstallSnapshotReply received, " + - "last chunk received, Chunk:{}. Follower:{} Setting nextIndex:{}", - reply.getChunkIndex(), followerId, - context.getReplicatedLog().getSnapshotIndex() + 1 - ); - } - - followerLogInformation.setMatchIndex( - context.getReplicatedLog().getSnapshotIndex()); - followerLogInformation.setNextIndex( - context.getReplicatedLog().getSnapshotIndex() + 1); - mapFollowerToSnapshot.remove(followerId); - - if(LOG.isDebugEnabled()) { - LOG.debug("followerToLog.get(followerId).getNextIndex().get()=" + - followerToLog.get(followerId).getNextIndex().get()); - } - - if (mapFollowerToSnapshot.isEmpty()) { - // once there are no pending followers receiving snapshots - // we can remove snapshot from the memory - setSnapshot(Optional.absent()); - } - - } else { - followerToSnapshot.markSendStatus(true); - } + if (ISOLATED_LEADER_CHECK.equals(originalMessage)) { + if (isLeaderIsolated()) { + log.warn("{}: At least {} followers need to be active, Switching {} from Leader to IsolatedLeader", + context.getId(), getMinIsolatedLeaderPeerCount(), getLeaderId()); + return internalSwitchBehavior(new IsolatedLeader(context, this)); } else { - LOG.info("InstallSnapshotReply received, " + - "sending snapshot chunk failed, Will retry, Chunk:{}", - reply.getChunkIndex() - ); - followerToSnapshot.markSendStatus(false); + return this; } - } else { - LOG.error("ERROR!!" + - "FollowerId in InstallSnapshotReply not known to Leader" + - " or Chunk Index in InstallSnapshotReply not matching {} != {}", - followerToSnapshot.getChunkIndex(), reply.getChunkIndex() - ); + return super.handleMessage(sender, originalMessage); } } - private void replicate(Replicate replicate) { - long logIndex = replicate.getReplicatedLogEntry().getIndex(); - - if(LOG.isDebugEnabled()) { - LOG.debug("Replicate message {}", logIndex); + @Override + protected void beforeSendHeartbeat() { + if (isolatedLeaderCheck.elapsed(TimeUnit.MILLISECONDS) + > context.getConfigParams().getIsolatedCheckIntervalInMillis()) { + context.getActor().tell(ISOLATED_LEADER_CHECK, context.getActor()); + isolatedLeaderCheck.reset().start(); } - // Create a tracker entry we will use this later to notify the - // client actor - trackerList.add( - new ClientRequestTrackerImpl(replicate.getClientActor(), - replicate.getIdentifier(), - logIndex) - ); - - if (followers.size() == 0) { - context.setCommitIndex(logIndex); - applyLogToStateMachine(logIndex); - } else { - sendAppendEntries(); + if (leadershipTransferContext != null && leadershipTransferContext.isExpired( + context.getConfigParams().getElectionTimeOutInterval().toMillis())) { + log.debug("{}: Leadership transfer expired", logName()); + leadershipTransferContext = null; } } - private void sendAppendEntries() { - // Send an AppendEntries to all followers - for (String followerId : followers) { - ActorSelection followerActor = context.getPeerActorSelection(followerId); - - if (followerActor != null) { - FollowerLogInformation followerLogInformation = followerToLog.get(followerId); - long followerNextIndex = followerLogInformation.getNextIndex().get(); - boolean isFollowerActive = followerLogInformation.isFollowerActive(); - List entries = null; - - if (mapFollowerToSnapshot.get(followerId) != null) { - // if install snapshot is in process , then sent next chunk if possible - if (isFollowerActive && mapFollowerToSnapshot.get(followerId).canSendNextChunk()) { - sendSnapshotChunk(followerActor, followerId); - } else { - // we send a heartbeat even if we have not received a reply for the last chunk - sendAppendEntriesToFollower(followerActor, followerNextIndex, - Collections.emptyList()); - } - - } else { - long leaderLastIndex = context.getReplicatedLog().lastIndex(); - long leaderSnapShotIndex = context.getReplicatedLog().getSnapshotIndex(); - - if (isFollowerActive && - context.getReplicatedLog().isPresent(followerNextIndex)) { - // FIXME : Sending one entry at a time - entries = context.getReplicatedLog().getFrom(followerNextIndex, 1); - - } else if (isFollowerActive && followerNextIndex >= 0 && - leaderLastIndex >= followerNextIndex ) { - // if the followers next index is not present in the leaders log, and - // if the follower is just not starting and if leader's index is more than followers index - // then snapshot should be sent - - if(LOG.isDebugEnabled()) { - LOG.debug("InitiateInstallSnapshot to follower:{}," + - "follower-nextIndex:{}, leader-snapshot-index:{}, " + - "leader-last-index:{}", followerId, - followerNextIndex, leaderSnapShotIndex, leaderLastIndex - ); - } - actor().tell(new InitiateInstallSnapshot(), actor()); - - // we would want to sent AE as the capture snapshot might take time - entries = Collections.emptyList(); - - } else { - //we send an AppendEntries, even if the follower is inactive - // in-order to update the followers timestamp, in case it becomes active again - entries = Collections.emptyList(); - } - - sendAppendEntriesToFollower(followerActor, followerNextIndex, entries); - - } - } - } - } - - private void sendAppendEntriesToFollower(ActorSelection followerActor, long followerNextIndex, - List entries) { - followerActor.tell( - new AppendEntries(currentTerm(), context.getId(), - prevLogIndex(followerNextIndex), - prevLogTerm(followerNextIndex), entries, - context.getCommitIndex()).toSerializable(), - actor() - ); + @Override + protected RaftActorBehavior handleAppendEntriesReply(final ActorRef sender, + final AppendEntriesReply appendEntriesReply) { + RaftActorBehavior returnBehavior = super.handleAppendEntriesReply(sender, appendEntriesReply); + tryToCompleteLeadershipTransfer(appendEntriesReply.getFollowerId()); + return returnBehavior; } /** - * An installSnapshot is scheduled at a interval that is a multiple of - * a HEARTBEAT_INTERVAL. This is to avoid the need to check for installing - * snapshots at every heartbeat. - * - * Install Snapshot works as follows - * 1. Leader sends a InitiateInstallSnapshot message to self - * 2. Leader then initiates the capture snapshot by sending a CaptureSnapshot message to actor - * 3. RaftActor on receipt of the CaptureSnapshotReply (from Shard), stores the received snapshot in the replicated log - * and makes a call to Leader's handleMessage , with SendInstallSnapshot message. - * 4. Leader , picks the snapshot from im-mem ReplicatedLog and sends it in chunks to the Follower - * 5. On complete, Follower sends back a InstallSnapshotReply. - * 6. On receipt of the InstallSnapshotReply for the last chunk, Leader marks the install complete for that follower - * and replenishes the memory by deleting the snapshot in Replicated log. + * Attempts to transfer leadership to a follower as per the raft paper (§3.10) as follows: + *
    + *
  • Start a timer (Stopwatch).
  • + *
  • Send an initial AppendEntries heartbeat to all followers.
  • + *
  • On AppendEntriesReply, check if the follower's new match Index matches the leader's last index
  • + *
  • If it matches, + *
      + *
    • Send an additional AppendEntries to ensure the follower has applied all its log entries to its state.
    • + *
    • Send an ElectionTimeout to the follower to immediately start an election.
    • + *
    • Notify {@link RaftActorLeadershipTransferCohort#transferComplete}.
    • + *
  • + *
  • Otherwise if the election time out period elapses, notify + * {@link RaftActorLeadershipTransferCohort#abortTransfer}.
  • + *
* + * @param leadershipTransferCohort the cohort participating in the leadership transfer */ - private void installSnapshotIfNeeded() { - for (String followerId : followers) { - ActorSelection followerActor = - context.getPeerActorSelection(followerId); - - if(followerActor != null) { - FollowerLogInformation followerLogInformation = - followerToLog.get(followerId); - - long nextIndex = followerLogInformation.getNextIndex().get(); - - if (!context.getReplicatedLog().isPresent(nextIndex) && - context.getReplicatedLog().isInSnapshot(nextIndex)) { - LOG.info("{} follower needs a snapshot install", followerId); - if (snapshot.isPresent()) { - // if a snapshot is present in the memory, most likely another install is in progress - // no need to capture snapshot - sendSnapshotChunk(followerActor, followerId); - - } else { - initiateCaptureSnapshot(); - //we just need 1 follower who would need snapshot to be installed. - // when we have the snapshot captured, we would again check (in SendInstallSnapshot) - // who needs an install and send to all who need - break; - } - - } - } - } - } - - // on every install snapshot, we try to capture the snapshot. - // Once a capture is going on, another one issued will get ignored by RaftActor. - private void initiateCaptureSnapshot() { - LOG.info("Initiating Snapshot Capture to Install Snapshot, Leader:{}", getLeaderId()); - ReplicatedLogEntry lastAppliedEntry = context.getReplicatedLog().get(context.getLastApplied()); - long lastAppliedIndex = -1; - long lastAppliedTerm = -1; - - if (lastAppliedEntry != null) { - lastAppliedIndex = lastAppliedEntry.getIndex(); - lastAppliedTerm = lastAppliedEntry.getTerm(); - } else if (context.getReplicatedLog().getSnapshotIndex() > -1) { - lastAppliedIndex = context.getReplicatedLog().getSnapshotIndex(); - lastAppliedTerm = context.getReplicatedLog().getSnapshotTerm(); - } - - boolean isInstallSnapshotInitiated = true; - actor().tell(new CaptureSnapshot(lastIndex(), lastTerm(), - lastAppliedIndex, lastAppliedTerm, isInstallSnapshotInitiated), - actor()); - } - - - private void sendInstallSnapshot() { - for (String followerId : followers) { - ActorSelection followerActor = context.getPeerActorSelection(followerId); - - if(followerActor != null) { - FollowerLogInformation followerLogInformation = followerToLog.get(followerId); - long nextIndex = followerLogInformation.getNextIndex().get(); - - if (!context.getReplicatedLog().isPresent(nextIndex) && - context.getReplicatedLog().isInSnapshot(nextIndex)) { - sendSnapshotChunk(followerActor, followerId); - } - } - } - } - - /** - * Sends a snapshot chunk to a given follower - * InstallSnapshot should qualify as a heartbeat too. - */ - private void sendSnapshotChunk(ActorSelection followerActor, String followerId) { - try { - if (snapshot.isPresent()) { - followerActor.tell( - new InstallSnapshot(currentTerm(), context.getId(), - context.getReplicatedLog().getSnapshotIndex(), - context.getReplicatedLog().getSnapshotTerm(), - getNextSnapshotChunk(followerId,snapshot.get()), - mapFollowerToSnapshot.get(followerId).incrementChunkIndex(), - mapFollowerToSnapshot.get(followerId).getTotalChunks() - ).toSerializable(), - actor() - ); - LOG.info("InstallSnapshot sent to follower {}, Chunk: {}/{}", - followerActor.path(), mapFollowerToSnapshot.get(followerId).getChunkIndex(), - mapFollowerToSnapshot.get(followerId).getTotalChunks()); - } - } catch (IOException e) { - LOG.error(e, "InstallSnapshot failed for Leader."); - } - } - - /** - * Acccepts snaphot as ByteString, enters into map for future chunks - * creates and return a ByteString chunk - */ - private ByteString getNextSnapshotChunk(String followerId, ByteString snapshotBytes) throws IOException { - FollowerToSnapshot followerToSnapshot = mapFollowerToSnapshot.get(followerId); - if (followerToSnapshot == null) { - followerToSnapshot = new FollowerToSnapshot(snapshotBytes); - mapFollowerToSnapshot.put(followerId, followerToSnapshot); - } - ByteString nextChunk = followerToSnapshot.getNextChunk(); - if (LOG.isDebugEnabled()) { - LOG.debug("Leader's snapshot nextChunk size:{}", nextChunk.size()); - } - return nextChunk; - } + public void transferLeadership(@NonNull final RaftActorLeadershipTransferCohort leadershipTransferCohort) { + log.debug("{}: Attempting to transfer leadership", logName()); - private void sendHeartBeat() { - if (followers.size() > 0) { - sendAppendEntries(); - } - } + leadershipTransferContext = new LeadershipTransferContext(leadershipTransferCohort); - private void stopHeartBeat() { - if (heartbeatSchedule != null && !heartbeatSchedule.isCancelled()) { - heartbeatSchedule.cancel(); - } + // Send an immediate heart beat to the followers. + sendAppendEntries(0, false); } - private void stopInstallSnapshotSchedule() { - if (installSnapshotSchedule != null && !installSnapshotSchedule.isCancelled()) { - installSnapshotSchedule.cancel(); + private void tryToCompleteLeadershipTransfer(final String followerId) { + if (leadershipTransferContext == null) { + return; } - } - private void scheduleHeartBeat(FiniteDuration interval) { - if(followers.size() == 0){ - // Optimization - do not bother scheduling a heartbeat as there are - // no followers + final Optional requestedFollowerIdOptional + = leadershipTransferContext.transferCohort.getRequestedFollowerId(); + if (requestedFollowerIdOptional.isPresent() && !requestedFollowerIdOptional.get().equals(followerId)) { + // we want to transfer leadership to specific follower return; } - stopHeartBeat(); - - // Schedule a heartbeat. When the scheduler triggers a SendHeartbeat - // message is sent to itself. - // Scheduling the heartbeat only once here because heartbeats do not - // need to be sent if there are other messages being sent to the remote - // actor. - heartbeatSchedule = context.getActorSystem().scheduler().scheduleOnce( - interval, context.getActor(), new SendHeartBeat(), - context.getActorSystem().dispatcher(), context.getActor()); - } - - private void scheduleInstallSnapshotCheck(FiniteDuration interval) { - if(followers.size() == 0){ - // Optimization - do not bother scheduling a heartbeat as there are - // no followers + FollowerLogInformation followerInfo = getFollower(followerId); + if (followerInfo == null) { return; } - stopInstallSnapshotSchedule(); + long lastIndex = context.getReplicatedLog().lastIndex(); + boolean isVoting = context.getPeerInfo(followerId).isVoting(); - // Schedule a message to send append entries to followers that can - // accept an append entries with some data in it - installSnapshotSchedule = - context.getActorSystem().scheduler().scheduleOnce( - interval, - context.getActor(), new InitiateInstallSnapshot(), - context.getActorSystem().dispatcher(), context.getActor()); - } + log.debug("{}: tryToCompleteLeadershipTransfer: followerId: {}, matchIndex: {}, lastIndex: {}, isVoting: {}", + logName(), followerId, followerInfo.getMatchIndex(), lastIndex, isVoting); + if (isVoting && followerInfo.getMatchIndex() == lastIndex) { + log.debug("{}: Follower's log matches - sending ElectionTimeout", logName()); + // We can't be sure if the follower has applied all its log entries to its state so send an + // additional AppendEntries with the latest commit index. + sendAppendEntries(0, false); - @Override public void close() throws Exception { - stopHeartBeat(); - } - - @Override public String getLeaderId() { - return context.getId(); - } - - /** - * Encapsulates the snapshot bytestring and handles the logic of sending - * snapshot chunks - */ - protected class FollowerToSnapshot { - private ByteString snapshotBytes; - private int offset = 0; - // the next snapshot chunk is sent only if the replyReceivedForOffset matches offset - private int replyReceivedForOffset; - // if replyStatus is false, the previous chunk is attempted - private boolean replyStatus = false; - private int chunkIndex; - private int totalChunks; - - public FollowerToSnapshot(ByteString snapshotBytes) { - this.snapshotBytes = snapshotBytes; - replyReceivedForOffset = -1; - chunkIndex = 1; - int size = snapshotBytes.size(); - totalChunks = ( size / context.getConfigParams().getSnapshotChunkSize()) + - ((size % context.getConfigParams().getSnapshotChunkSize()) > 0 ? 1 : 0); - if(LOG.isDebugEnabled()) { - LOG.debug("Snapshot {} bytes, total chunks to send:{}", - size, totalChunks); - } - } + // Now send a TimeoutNow message to the matching follower to immediately start an election. + ActorSelection followerActor = context.getPeerActorSelection(followerId); + followerActor.tell(TimeoutNow.INSTANCE, context.getActor()); - public ByteString getSnapshotBytes() { - return snapshotBytes; - } + log.debug("{}: Leader transfer complete", logName()); - public int incrementOffset() { - if(replyStatus) { - // if prev chunk failed, we would want to sent the same chunk again - offset = offset + context.getConfigParams().getSnapshotChunkSize(); - } - return offset; + leadershipTransferContext.transferCohort.transferComplete(); + leadershipTransferContext = null; } + } - public int incrementChunkIndex() { - if (replyStatus) { - // if prev chunk failed, we would want to sent the same chunk again - chunkIndex = chunkIndex + 1; - } - return chunkIndex; + @Override + public void close() { + if (leadershipTransferContext != null) { + LeadershipTransferContext localLeadershipTransferContext = leadershipTransferContext; + leadershipTransferContext = null; + localLeadershipTransferContext.transferCohort.abortTransfer(); } - public int getChunkIndex() { - return chunkIndex; - } + super.close(); + } - public int getTotalChunks() { - return totalChunks; - } + @VisibleForTesting + void markFollowerActive(final String followerId) { + getFollower(followerId).markFollowerActive(); + } - public boolean canSendNextChunk() { - // we only send a false if a chunk is sent but we have not received a reply yet - return replyReceivedForOffset == offset; - } + @VisibleForTesting + void markFollowerInActive(final String followerId) { + getFollower(followerId).markFollowerInActive(); + } - public boolean isLastChunk(int chunkIndex) { - return totalChunks == chunkIndex; - } + private static class LeadershipTransferContext { + RaftActorLeadershipTransferCohort transferCohort; + Stopwatch timer = Stopwatch.createStarted(); - public void markSendStatus(boolean success) { - if (success) { - // if the chunk sent was successful - replyReceivedForOffset = offset; - replyStatus = true; - } else { - // if the chunk sent was failure - replyReceivedForOffset = offset; - replyStatus = false; - } + LeadershipTransferContext(final RaftActorLeadershipTransferCohort transferCohort) { + this.transferCohort = transferCohort; } - public ByteString getNextChunk() { - int snapshotLength = getSnapshotBytes().size(); - int start = incrementOffset(); - int size = context.getConfigParams().getSnapshotChunkSize(); - if (context.getConfigParams().getSnapshotChunkSize() > snapshotLength) { - size = snapshotLength; - } else { - if ((start + context.getConfigParams().getSnapshotChunkSize()) > snapshotLength) { - size = snapshotLength - start; - } - } - - if(LOG.isDebugEnabled()) { - LOG.debug("length={}, offset={},size={}", - snapshotLength, start, size); + boolean isExpired(final long timeout) { + if (timer.elapsed(TimeUnit.MILLISECONDS) >= timeout) { + transferCohort.abortTransfer(); + return true; } - return getSnapshotBytes().substring(start, start + size); - - } - } - - // called from example-actor for printing the follower-states - public String printFollowerStates() { - StringBuilder sb = new StringBuilder(); - for(FollowerLogInformation followerLogInformation : followerToLog.values()) { - boolean isFollowerActive = followerLogInformation.isFollowerActive(); - sb.append("{"+followerLogInformation.getId() + " state:" + isFollowerActive + "},"); + return false; } - return "[" + sb.toString() + "]"; - } - - @VisibleForTesting - void markFollowerActive(String followerId) { - followerToLog.get(followerId).markFollowerActive(); } }