X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-akka-raft%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fraft%2Fbehaviors%2FLeader.java;h=eb49abc17aea4a0ebefacde0a5925312082e1405;hb=4ecb8ecaf04594b3312a44d801423f515ea445b3;hp=d83362b58081c0e4c4576a848bf10ca29d8fc7da;hpb=4206a55cd7724e15e3c5b7b5b7c12f78481384cf;p=controller.git
diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java
index d83362b580..eb49abc17a 100644
--- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java
+++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java
@@ -5,49 +5,28 @@
* terms of the Eclipse Public License v1.0 which accompanies this distribution,
* and is available at http://www.eclipse.org/legal/epl-v10.html
*/
-
package org.opendaylight.controller.cluster.raft.behaviors;
import akka.actor.ActorRef;
import akka.actor.ActorSelection;
-import akka.actor.Cancellable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
-import com.google.protobuf.ByteString;
-import org.opendaylight.controller.cluster.raft.ClientRequestTracker;
-import org.opendaylight.controller.cluster.raft.ClientRequestTrackerImpl;
+import com.google.common.base.Stopwatch;
+import java.util.concurrent.TimeUnit;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
import org.opendaylight.controller.cluster.raft.FollowerLogInformation;
-import org.opendaylight.controller.cluster.raft.FollowerLogInformationImpl;
import org.opendaylight.controller.cluster.raft.RaftActorContext;
+import org.opendaylight.controller.cluster.raft.RaftActorLeadershipTransferCohort;
import org.opendaylight.controller.cluster.raft.RaftState;
-import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
-import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot;
-import org.opendaylight.controller.cluster.raft.base.messages.InitiateInstallSnapshot;
-import org.opendaylight.controller.cluster.raft.base.messages.Replicate;
-import org.opendaylight.controller.cluster.raft.base.messages.SendHeartBeat;
-import org.opendaylight.controller.cluster.raft.base.messages.SendInstallSnapshot;
-import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
+import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow;
import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
-import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
-import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
-import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
-import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
-import scala.concurrent.duration.FiniteDuration;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicLong;
/**
- * The behavior of a RaftActor when it is in the Leader state
- *
+ * The behavior of a RaftActor when it is in the Leader state.
+ *
+ *
* Leaders:
*
* - Upon election: send initial empty AppendEntries RPCs
@@ -57,693 +36,177 @@ import java.util.concurrent.atomic.AtomicLong;
* respond after entry applied to state machine (§5.3)
*
- If last log index ⥠nextIndex for a follower: send
* AppendEntries RPC with log entries starting at nextIndex
- *
* - If successful: update nextIndex and matchIndex for
* follower (§5.3)
*
- If AppendEntries fails because of log inconsistency:
* decrement nextIndex and retry (§5.3)
- *
- * - If there exists an N such that N > commitIndex, a majority
+ *
- If there exists an N such that N > commitIndex, a majority
* of matchIndex[i] ⥠N, and log[N].term == currentTerm:
* set commitIndex = N (§5.3, §5.4).
+ *
*/
-public class Leader extends AbstractRaftActorBehavior {
-
-
- protected final Map followerToLog = new HashMap<>();
- protected final Map mapFollowerToSnapshot = new HashMap<>();
-
- private final Set followers;
-
- private Cancellable heartbeatSchedule = null;
- private Cancellable installSnapshotSchedule = null;
-
- private List trackerList = new ArrayList<>();
-
- private final int minReplicationCount;
-
- private Optional snapshot;
-
- public Leader(RaftActorContext context) {
- super(context);
-
- followers = context.getPeerAddresses().keySet();
-
- for (String followerId : followers) {
- FollowerLogInformation followerLogInformation =
- new FollowerLogInformationImpl(followerId,
- new AtomicLong(context.getCommitIndex()),
- new AtomicLong(-1),
- context.getConfigParams().getElectionTimeOutInterval());
-
- followerToLog.put(followerId, followerLogInformation);
- }
-
- if(LOG.isDebugEnabled()) {
- LOG.debug("Election:Leader has following peers: {}", followers);
- }
-
- if (followers.size() > 0) {
- minReplicationCount = (followers.size() + 1) / 2 + 1;
- } else {
- minReplicationCount = 0;
- }
-
- snapshot = Optional.absent();
-
- // Immediately schedule a heartbeat
- // Upon election: send initial empty AppendEntries RPCs
- // (heartbeat) to each server; repeat during idle periods to
- // prevent election timeouts (§5.2)
- scheduleHeartBeat(new FiniteDuration(0, TimeUnit.SECONDS));
-
- scheduleInstallSnapshotCheck(
- new FiniteDuration(context.getConfigParams().getHeartBeatInterval().length() * 1000,
- context.getConfigParams().getHeartBeatInterval().unit())
- );
-
- }
-
- private Optional getSnapshot() {
- return snapshot;
- }
-
+public class Leader extends AbstractLeader {
+ /**
+ * Internal message sent to periodically check if this leader has become isolated and should transition
+ * to {@link IsolatedLeader}.
+ */
@VisibleForTesting
- void setSnapshot(Optional snapshot) {
- this.snapshot = snapshot;
- }
-
- @Override protected RaftActorBehavior handleAppendEntries(ActorRef sender,
- AppendEntries appendEntries) {
-
- if(LOG.isDebugEnabled()) {
- LOG.debug(appendEntries.toString());
- }
-
- return this;
- }
-
- @Override protected RaftActorBehavior handleAppendEntriesReply(ActorRef sender,
- AppendEntriesReply appendEntriesReply) {
-
- if(! appendEntriesReply.isSuccess()) {
- if(LOG.isDebugEnabled()) {
- LOG.debug(appendEntriesReply.toString());
- }
- }
-
- // Update the FollowerLogInformation
- String followerId = appendEntriesReply.getFollowerId();
- FollowerLogInformation followerLogInformation =
- followerToLog.get(followerId);
-
- if(followerLogInformation == null){
- LOG.error("Unknown follower {}", followerId);
- return this;
- }
+ static final Object ISOLATED_LEADER_CHECK = new Object();
- followerLogInformation.markFollowerActive();
+ private final Stopwatch isolatedLeaderCheck = Stopwatch.createStarted();
+ @Nullable private LeadershipTransferContext leadershipTransferContext;
- if (appendEntriesReply.isSuccess()) {
- followerLogInformation
- .setMatchIndex(appendEntriesReply.getLogLastIndex());
- followerLogInformation
- .setNextIndex(appendEntriesReply.getLogLastIndex() + 1);
- } else {
-
- // TODO: When we find that the follower is out of sync with the
- // Leader we simply decrement that followers next index by 1.
- // Would it be possible to do better than this? The RAFT spec
- // does not explicitly deal with it but may be something for us to
- // think about
-
- followerLogInformation.decrNextIndex();
- }
-
- // Now figure out if this reply warrants a change in the commitIndex
- // If there exists an N such that N > commitIndex, a majority
- // of matchIndex[i] ⥠N, and log[N].term == currentTerm:
- // set commitIndex = N (§5.3, §5.4).
- for (long N = context.getCommitIndex() + 1; ; N++) {
- int replicatedCount = 1;
-
- for (FollowerLogInformation info : followerToLog.values()) {
- if (info.getMatchIndex().get() >= N) {
- replicatedCount++;
- }
- }
-
- if (replicatedCount >= minReplicationCount) {
- ReplicatedLogEntry replicatedLogEntry =
- context.getReplicatedLog().get(N);
- if (replicatedLogEntry != null
- && replicatedLogEntry.getTerm()
- == currentTerm()) {
- context.setCommitIndex(N);
- }
- } else {
- break;
- }
- }
-
- // Apply the change to the state machine
- if (context.getCommitIndex() > context.getLastApplied()) {
- applyLogToStateMachine(context.getCommitIndex());
- }
-
- return this;
- }
-
- protected ClientRequestTracker removeClientRequestTracker(long logIndex) {
-
- ClientRequestTracker toRemove = findClientRequestTracker(logIndex);
- if(toRemove != null) {
- trackerList.remove(toRemove);
- }
-
- return toRemove;
+ Leader(RaftActorContext context, @Nullable AbstractLeader initializeFromLeader) {
+ super(context, RaftState.Leader, initializeFromLeader);
}
- protected ClientRequestTracker findClientRequestTracker(long logIndex) {
- for (ClientRequestTracker tracker : trackerList) {
- if (tracker.getIndex() == logIndex) {
- return tracker;
- }
- }
-
- return null;
- }
-
- @Override protected RaftActorBehavior handleRequestVoteReply(ActorRef sender,
- RequestVoteReply requestVoteReply) {
- return this;
- }
-
- @Override public RaftState state() {
- return RaftState.Leader;
+ public Leader(RaftActorContext context) {
+ this(context, null);
}
- @Override public RaftActorBehavior handleMessage(ActorRef sender, Object originalMessage) {
+ @Override
+ public RaftActorBehavior handleMessage(ActorRef sender, Object originalMessage) {
Preconditions.checkNotNull(sender, "sender should not be null");
- Object message = fromSerializableMessage(originalMessage);
-
- if (message instanceof RaftRPC) {
- RaftRPC rpc = (RaftRPC) message;
- // If RPC request or response contains term T > currentTerm:
- // set currentTerm = T, convert to follower (§5.1)
- // This applies to all RPC messages and responses
- if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) {
- context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
-
- return switchBehavior(new Follower(context));
- }
- }
-
- try {
- if (message instanceof SendHeartBeat) {
- sendHeartBeat();
- return this;
-
- } else if(message instanceof InitiateInstallSnapshot) {
- installSnapshotIfNeeded();
-
- } else if(message instanceof SendInstallSnapshot) {
- // received from RaftActor
- setSnapshot(Optional.of(((SendInstallSnapshot) message).getSnapshot()));
- sendInstallSnapshot();
-
- } else if (message instanceof Replicate) {
- replicate((Replicate) message);
-
- } else if (message instanceof InstallSnapshotReply){
- handleInstallSnapshotReply(
- (InstallSnapshotReply) message);
- }
- } finally {
- scheduleHeartBeat(context.getConfigParams().getHeartBeatInterval());
- }
-
- return super.handleMessage(sender, message);
- }
-
- private void handleInstallSnapshotReply(InstallSnapshotReply reply) {
- String followerId = reply.getFollowerId();
- FollowerToSnapshot followerToSnapshot = mapFollowerToSnapshot.get(followerId);
- FollowerLogInformation followerLogInformation = followerToLog.get(followerId);
- followerLogInformation.markFollowerActive();
-
- if (followerToSnapshot != null &&
- followerToSnapshot.getChunkIndex() == reply.getChunkIndex()) {
-
- if (reply.isSuccess()) {
- if(followerToSnapshot.isLastChunk(reply.getChunkIndex())) {
- //this was the last chunk reply
- if(LOG.isDebugEnabled()) {
- LOG.debug("InstallSnapshotReply received, " +
- "last chunk received, Chunk:{}. Follower:{} Setting nextIndex:{}",
- reply.getChunkIndex(), followerId,
- context.getReplicatedLog().getSnapshotIndex() + 1
- );
- }
-
- followerLogInformation.setMatchIndex(
- context.getReplicatedLog().getSnapshotIndex());
- followerLogInformation.setNextIndex(
- context.getReplicatedLog().getSnapshotIndex() + 1);
- mapFollowerToSnapshot.remove(followerId);
-
- if(LOG.isDebugEnabled()) {
- LOG.debug("followerToLog.get(followerId).getNextIndex().get()=" +
- followerToLog.get(followerId).getNextIndex().get());
- }
-
- if (mapFollowerToSnapshot.isEmpty()) {
- // once there are no pending followers receiving snapshots
- // we can remove snapshot from the memory
- setSnapshot(Optional.absent());
- }
-
- } else {
- followerToSnapshot.markSendStatus(true);
- }
+ if (ISOLATED_LEADER_CHECK.equals(originalMessage)) {
+ if (isLeaderIsolated()) {
+ log.warn("{}: At least {} followers need to be active, Switching {} from Leader to IsolatedLeader",
+ context.getId(), getMinIsolatedLeaderPeerCount(), getLeaderId());
+ return internalSwitchBehavior(new IsolatedLeader(context, this));
} else {
- LOG.info("InstallSnapshotReply received, " +
- "sending snapshot chunk failed, Will retry, Chunk:{}",
- reply.getChunkIndex()
- );
- followerToSnapshot.markSendStatus(false);
+ return this;
}
-
} else {
- LOG.error("ERROR!!" +
- "FollowerId in InstallSnapshotReply not known to Leader" +
- " or Chunk Index in InstallSnapshotReply not matching {} != {}",
- followerToSnapshot.getChunkIndex(), reply.getChunkIndex()
- );
+ return super.handleMessage(sender, originalMessage);
}
}
- private void replicate(Replicate replicate) {
- long logIndex = replicate.getReplicatedLogEntry().getIndex();
-
- if(LOG.isDebugEnabled()) {
- LOG.debug("Replicate message {}", logIndex);
+ @Override
+ protected void beforeSendHeartbeat() {
+ if (isolatedLeaderCheck.elapsed(TimeUnit.MILLISECONDS)
+ > context.getConfigParams().getIsolatedCheckIntervalInMillis()) {
+ context.getActor().tell(ISOLATED_LEADER_CHECK, context.getActor());
+ isolatedLeaderCheck.reset().start();
}
- // Create a tracker entry we will use this later to notify the
- // client actor
- trackerList.add(
- new ClientRequestTrackerImpl(replicate.getClientActor(),
- replicate.getIdentifier(),
- logIndex)
- );
-
- if (followers.size() == 0) {
- context.setCommitIndex(logIndex);
- applyLogToStateMachine(logIndex);
- } else {
- sendAppendEntries();
+ if (leadershipTransferContext != null && leadershipTransferContext.isExpired(
+ context.getConfigParams().getElectionTimeOutInterval().toMillis())) {
+ log.debug("{}: Leadership transfer expired", logName());
+ leadershipTransferContext = null;
}
}
- private void sendAppendEntries() {
- // Send an AppendEntries to all followers
- for (String followerId : followers) {
- ActorSelection followerActor = context.getPeerActorSelection(followerId);
-
- if (followerActor != null) {
- FollowerLogInformation followerLogInformation = followerToLog.get(followerId);
- long followerNextIndex = followerLogInformation.getNextIndex().get();
- boolean isFollowerActive = followerLogInformation.isFollowerActive();
- List entries = null;
-
- if (mapFollowerToSnapshot.get(followerId) != null) {
- // if install snapshot is in process , then sent next chunk if possible
- if (isFollowerActive && mapFollowerToSnapshot.get(followerId).canSendNextChunk()) {
- sendSnapshotChunk(followerActor, followerId);
- } else {
- // we send a heartbeat even if we have not received a reply for the last chunk
- sendAppendEntriesToFollower(followerActor, followerNextIndex,
- Collections.emptyList());
- }
-
- } else {
- long leaderLastIndex = context.getReplicatedLog().lastIndex();
- long leaderSnapShotIndex = context.getReplicatedLog().getSnapshotIndex();
-
- if (isFollowerActive &&
- context.getReplicatedLog().isPresent(followerNextIndex)) {
- // FIXME : Sending one entry at a time
- entries = context.getReplicatedLog().getFrom(followerNextIndex, 1);
-
- } else if (isFollowerActive && followerNextIndex >= 0 &&
- leaderLastIndex >= followerNextIndex ) {
- // if the followers next index is not present in the leaders log, and
- // if the follower is just not starting and if leader's index is more than followers index
- // then snapshot should be sent
-
- if(LOG.isDebugEnabled()) {
- LOG.debug("InitiateInstallSnapshot to follower:{}," +
- "follower-nextIndex:{}, leader-snapshot-index:{}, " +
- "leader-last-index:{}", followerId,
- followerNextIndex, leaderSnapShotIndex, leaderLastIndex
- );
- }
- actor().tell(new InitiateInstallSnapshot(), actor());
-
- // we would want to sent AE as the capture snapshot might take time
- entries = Collections.emptyList();
-
- } else {
- //we send an AppendEntries, even if the follower is inactive
- // in-order to update the followers timestamp, in case it becomes active again
- entries = Collections.emptyList();
- }
-
- sendAppendEntriesToFollower(followerActor, followerNextIndex, entries);
-
- }
- }
- }
- }
-
- private void sendAppendEntriesToFollower(ActorSelection followerActor, long followerNextIndex,
- List entries) {
- followerActor.tell(
- new AppendEntries(currentTerm(), context.getId(),
- prevLogIndex(followerNextIndex),
- prevLogTerm(followerNextIndex), entries,
- context.getCommitIndex()).toSerializable(),
- actor()
- );
+ @Override
+ protected RaftActorBehavior handleAppendEntriesReply(ActorRef sender, AppendEntriesReply appendEntriesReply) {
+ RaftActorBehavior returnBehavior = super.handleAppendEntriesReply(sender, appendEntriesReply);
+ tryToCompleteLeadershipTransfer(appendEntriesReply.getFollowerId());
+ return returnBehavior;
}
/**
- * An installSnapshot is scheduled at a interval that is a multiple of
- * a HEARTBEAT_INTERVAL. This is to avoid the need to check for installing
- * snapshots at every heartbeat.
- *
- * Install Snapshot works as follows
- * 1. Leader sends a InitiateInstallSnapshot message to self
- * 2. Leader then initiates the capture snapshot by sending a CaptureSnapshot message to actor
- * 3. RaftActor on receipt of the CaptureSnapshotReply (from Shard), stores the received snapshot in the replicated log
- * and makes a call to Leader's handleMessage , with SendInstallSnapshot message.
- * 4. Leader , picks the snapshot from im-mem ReplicatedLog and sends it in chunks to the Follower
- * 5. On complete, Follower sends back a InstallSnapshotReply.
- * 6. On receipt of the InstallSnapshotReply for the last chunk, Leader marks the install complete for that follower
- * and replenishes the memory by deleting the snapshot in Replicated log.
+ * Attempts to transfer leadership to a follower as per the raft paper (§3.10) as follows:
+ *
+ * - Start a timer (Stopwatch).
+ * - Send an initial AppendEntries heartbeat to all followers.
+ * - On AppendEntriesReply, check if the follower's new match Index matches the leader's last index
+ * - If it matches,
+ *
+ * - Send an additional AppendEntries to ensure the follower has applied all its log entries to its state.
+ * - Send an ElectionTimeout to the follower to immediately start an election.
+ * - Notify {@link RaftActorLeadershipTransferCohort#transferComplete}.
+ *
+ * - Otherwise if the election time out period elapses, notify
+ * {@link RaftActorLeadershipTransferCohort#abortTransfer}.
+ *
*
+ * @param leadershipTransferCohort the cohort participating in the leadership transfer
*/
- private void installSnapshotIfNeeded() {
- for (String followerId : followers) {
- ActorSelection followerActor =
- context.getPeerActorSelection(followerId);
-
- if(followerActor != null) {
- FollowerLogInformation followerLogInformation =
- followerToLog.get(followerId);
-
- long nextIndex = followerLogInformation.getNextIndex().get();
+ public void transferLeadership(@Nonnull RaftActorLeadershipTransferCohort leadershipTransferCohort) {
+ log.debug("{}: Attempting to transfer leadership", logName());
- if (!context.getReplicatedLog().isPresent(nextIndex) &&
- context.getReplicatedLog().isInSnapshot(nextIndex)) {
- LOG.info("{} follower needs a snapshot install", followerId);
- if (snapshot.isPresent()) {
- // if a snapshot is present in the memory, most likely another install is in progress
- // no need to capture snapshot
- sendSnapshotChunk(followerActor, followerId);
+ leadershipTransferContext = new LeadershipTransferContext(leadershipTransferCohort);
- } else {
- initiateCaptureSnapshot();
- //we just need 1 follower who would need snapshot to be installed.
- // when we have the snapshot captured, we would again check (in SendInstallSnapshot)
- // who needs an install and send to all who need
- break;
- }
-
- }
- }
- }
+ // Send an immediate heart beat to the followers.
+ sendAppendEntries(0, false);
}
- // on every install snapshot, we try to capture the snapshot.
- // Once a capture is going on, another one issued will get ignored by RaftActor.
- private void initiateCaptureSnapshot() {
- LOG.info("Initiating Snapshot Capture to Install Snapshot, Leader:{}", getLeaderId());
- ReplicatedLogEntry lastAppliedEntry = context.getReplicatedLog().get(context.getLastApplied());
- long lastAppliedIndex = -1;
- long lastAppliedTerm = -1;
-
- if (lastAppliedEntry != null) {
- lastAppliedIndex = lastAppliedEntry.getIndex();
- lastAppliedTerm = lastAppliedEntry.getTerm();
- } else if (context.getReplicatedLog().getSnapshotIndex() > -1) {
- lastAppliedIndex = context.getReplicatedLog().getSnapshotIndex();
- lastAppliedTerm = context.getReplicatedLog().getSnapshotTerm();
+ private void tryToCompleteLeadershipTransfer(String followerId) {
+ if (leadershipTransferContext == null) {
+ return;
}
- boolean isInstallSnapshotInitiated = true;
- actor().tell(new CaptureSnapshot(lastIndex(), lastTerm(),
- lastAppliedIndex, lastAppliedTerm, isInstallSnapshotInitiated),
- actor());
- }
-
-
- private void sendInstallSnapshot() {
- for (String followerId : followers) {
- ActorSelection followerActor = context.getPeerActorSelection(followerId);
-
- if(followerActor != null) {
- FollowerLogInformation followerLogInformation = followerToLog.get(followerId);
- long nextIndex = followerLogInformation.getNextIndex().get();
-
- if (!context.getReplicatedLog().isPresent(nextIndex) &&
- context.getReplicatedLog().isInSnapshot(nextIndex)) {
- sendSnapshotChunk(followerActor, followerId);
- }
- }
+ final Optional requestedFollowerIdOptional
+ = leadershipTransferContext.transferCohort.getRequestedFollowerId();
+ if (requestedFollowerIdOptional.isPresent() && !requestedFollowerIdOptional.get().equals(followerId)) {
+ // we want to transfer leadership to specific follower
+ return;
}
- }
- /**
- * Sends a snapshot chunk to a given follower
- * InstallSnapshot should qualify as a heartbeat too.
- */
- private void sendSnapshotChunk(ActorSelection followerActor, String followerId) {
- try {
- if (snapshot.isPresent()) {
- followerActor.tell(
- new InstallSnapshot(currentTerm(), context.getId(),
- context.getReplicatedLog().getSnapshotIndex(),
- context.getReplicatedLog().getSnapshotTerm(),
- getNextSnapshotChunk(followerId,snapshot.get()),
- mapFollowerToSnapshot.get(followerId).incrementChunkIndex(),
- mapFollowerToSnapshot.get(followerId).getTotalChunks()
- ).toSerializable(),
- actor()
- );
- LOG.info("InstallSnapshot sent to follower {}, Chunk: {}/{}",
- followerActor.path(), mapFollowerToSnapshot.get(followerId).getChunkIndex(),
- mapFollowerToSnapshot.get(followerId).getTotalChunks());
- }
- } catch (IOException e) {
- LOG.error(e, "InstallSnapshot failed for Leader.");
+ FollowerLogInformation followerInfo = getFollower(followerId);
+ if (followerInfo == null) {
+ return;
}
- }
- /**
- * Acccepts snaphot as ByteString, enters into map for future chunks
- * creates and return a ByteString chunk
- */
- private ByteString getNextSnapshotChunk(String followerId, ByteString snapshotBytes) throws IOException {
- FollowerToSnapshot followerToSnapshot = mapFollowerToSnapshot.get(followerId);
- if (followerToSnapshot == null) {
- followerToSnapshot = new FollowerToSnapshot(snapshotBytes);
- mapFollowerToSnapshot.put(followerId, followerToSnapshot);
- }
- ByteString nextChunk = followerToSnapshot.getNextChunk();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Leader's snapshot nextChunk size:{}", nextChunk.size());
- }
- return nextChunk;
- }
+ long lastIndex = context.getReplicatedLog().lastIndex();
+ boolean isVoting = context.getPeerInfo(followerId).isVoting();
- private void sendHeartBeat() {
- if (followers.size() > 0) {
- sendAppendEntries();
- }
- }
+ log.debug("{}: tryToCompleteLeadershipTransfer: followerId: {}, matchIndex: {}, lastIndex: {}, isVoting: {}",
+ logName(), followerId, followerInfo.getMatchIndex(), lastIndex, isVoting);
- private void stopHeartBeat() {
- if (heartbeatSchedule != null && !heartbeatSchedule.isCancelled()) {
- heartbeatSchedule.cancel();
- }
- }
+ if (isVoting && followerInfo.getMatchIndex() == lastIndex) {
+ log.debug("{}: Follower's log matches - sending ElectionTimeout", logName());
- private void stopInstallSnapshotSchedule() {
- if (installSnapshotSchedule != null && !installSnapshotSchedule.isCancelled()) {
- installSnapshotSchedule.cancel();
- }
- }
+ // We can't be sure if the follower has applied all its log entries to its state so send an
+ // additional AppendEntries with the latest commit index.
+ sendAppendEntries(0, false);
- private void scheduleHeartBeat(FiniteDuration interval) {
- if(followers.size() == 0){
- // Optimization - do not bother scheduling a heartbeat as there are
- // no followers
- return;
- }
+ // Now send a TimeoutNow message to the matching follower to immediately start an election.
+ ActorSelection followerActor = context.getPeerActorSelection(followerId);
+ followerActor.tell(TimeoutNow.INSTANCE, context.getActor());
- stopHeartBeat();
+ log.debug("{}: Leader transfer complete", logName());
- // Schedule a heartbeat. When the scheduler triggers a SendHeartbeat
- // message is sent to itself.
- // Scheduling the heartbeat only once here because heartbeats do not
- // need to be sent if there are other messages being sent to the remote
- // actor.
- heartbeatSchedule = context.getActorSystem().scheduler().scheduleOnce(
- interval, context.getActor(), new SendHeartBeat(),
- context.getActorSystem().dispatcher(), context.getActor());
+ leadershipTransferContext.transferCohort.transferComplete();
+ leadershipTransferContext = null;
+ }
}
- private void scheduleInstallSnapshotCheck(FiniteDuration interval) {
- if(followers.size() == 0){
- // Optimization - do not bother scheduling a heartbeat as there are
- // no followers
- return;
+ @Override
+ public void close() {
+ if (leadershipTransferContext != null) {
+ LeadershipTransferContext localLeadershipTransferContext = leadershipTransferContext;
+ leadershipTransferContext = null;
+ localLeadershipTransferContext.transferCohort.abortTransfer();
}
- stopInstallSnapshotSchedule();
-
- // Schedule a message to send append entries to followers that can
- // accept an append entries with some data in it
- installSnapshotSchedule =
- context.getActorSystem().scheduler().scheduleOnce(
- interval,
- context.getActor(), new InitiateInstallSnapshot(),
- context.getActorSystem().dispatcher(), context.getActor());
+ super.close();
}
-
-
- @Override public void close() throws Exception {
- stopHeartBeat();
+ @VisibleForTesting
+ void markFollowerActive(String followerId) {
+ getFollower(followerId).markFollowerActive();
}
- @Override public String getLeaderId() {
- return context.getId();
+ @VisibleForTesting
+ void markFollowerInActive(String followerId) {
+ getFollower(followerId).markFollowerInActive();
}
- /**
- * Encapsulates the snapshot bytestring and handles the logic of sending
- * snapshot chunks
- */
- protected class FollowerToSnapshot {
- private ByteString snapshotBytes;
- private int offset = 0;
- // the next snapshot chunk is sent only if the replyReceivedForOffset matches offset
- private int replyReceivedForOffset;
- // if replyStatus is false, the previous chunk is attempted
- private boolean replyStatus = false;
- private int chunkIndex;
- private int totalChunks;
-
- public FollowerToSnapshot(ByteString snapshotBytes) {
- this.snapshotBytes = snapshotBytes;
- replyReceivedForOffset = -1;
- chunkIndex = 1;
- int size = snapshotBytes.size();
- totalChunks = ( size / context.getConfigParams().getSnapshotChunkSize()) +
- ((size % context.getConfigParams().getSnapshotChunkSize()) > 0 ? 1 : 0);
- if(LOG.isDebugEnabled()) {
- LOG.debug("Snapshot {} bytes, total chunks to send:{}",
- size, totalChunks);
- }
- }
-
- public ByteString getSnapshotBytes() {
- return snapshotBytes;
- }
-
- public int incrementOffset() {
- if(replyStatus) {
- // if prev chunk failed, we would want to sent the same chunk again
- offset = offset + context.getConfigParams().getSnapshotChunkSize();
- }
- return offset;
- }
-
- public int incrementChunkIndex() {
- if (replyStatus) {
- // if prev chunk failed, we would want to sent the same chunk again
- chunkIndex = chunkIndex + 1;
- }
- return chunkIndex;
- }
-
- public int getChunkIndex() {
- return chunkIndex;
- }
-
- public int getTotalChunks() {
- return totalChunks;
- }
-
- public boolean canSendNextChunk() {
- // we only send a false if a chunk is sent but we have not received a reply yet
- return replyReceivedForOffset == offset;
- }
+ private static class LeadershipTransferContext {
+ RaftActorLeadershipTransferCohort transferCohort;
+ Stopwatch timer = Stopwatch.createStarted();
- public boolean isLastChunk(int chunkIndex) {
- return totalChunks == chunkIndex;
- }
-
- public void markSendStatus(boolean success) {
- if (success) {
- // if the chunk sent was successful
- replyReceivedForOffset = offset;
- replyStatus = true;
- } else {
- // if the chunk sent was failure
- replyReceivedForOffset = offset;
- replyStatus = false;
- }
+ LeadershipTransferContext(RaftActorLeadershipTransferCohort transferCohort) {
+ this.transferCohort = transferCohort;
}
- public ByteString getNextChunk() {
- int snapshotLength = getSnapshotBytes().size();
- int start = incrementOffset();
- int size = context.getConfigParams().getSnapshotChunkSize();
- if (context.getConfigParams().getSnapshotChunkSize() > snapshotLength) {
- size = snapshotLength;
- } else {
- if ((start + context.getConfigParams().getSnapshotChunkSize()) > snapshotLength) {
- size = snapshotLength - start;
- }
- }
-
- if(LOG.isDebugEnabled()) {
- LOG.debug("length={}, offset={},size={}",
- snapshotLength, start, size);
+ boolean isExpired(long timeout) {
+ if (timer.elapsed(TimeUnit.MILLISECONDS) >= timeout) {
+ transferCohort.abortTransfer();
+ return true;
}
- return getSnapshotBytes().substring(start, start + size);
-
- }
- }
-
- // called from example-actor for printing the follower-states
- public String printFollowerStates() {
- StringBuilder sb = new StringBuilder();
- for(FollowerLogInformation followerLogInformation : followerToLog.values()) {
- boolean isFollowerActive = followerLogInformation.isFollowerActive();
- sb.append("{"+followerLogInformation.getId() + " state:" + isFollowerActive + "},");
+ return false;
}
- return "[" + sb.toString() + "]";
- }
-
- @VisibleForTesting
- void markFollowerActive(String followerId) {
- followerToLog.get(followerId).markFollowerActive();
}
}