* terms of the Eclipse Public License v1.0 which accompanies this distribution,
* and is available at http://www.eclipse.org/legal/epl-v10.html
*/
-
package org.opendaylight.controller.cluster.raft.behaviors;
import akka.actor.ActorRef;
import akka.actor.ActorSelection;
-import akka.actor.Cancellable;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
-import org.opendaylight.controller.cluster.raft.ClientRequestTracker;
-import org.opendaylight.controller.cluster.raft.ClientRequestTrackerImpl;
+import com.google.common.base.Stopwatch;
+import java.util.concurrent.TimeUnit;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
import org.opendaylight.controller.cluster.raft.FollowerLogInformation;
-import org.opendaylight.controller.cluster.raft.FollowerLogInformationImpl;
import org.opendaylight.controller.cluster.raft.RaftActorContext;
+import org.opendaylight.controller.cluster.raft.RaftActorLeadershipTransferCohort;
import org.opendaylight.controller.cluster.raft.RaftState;
-import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
-import org.opendaylight.controller.cluster.raft.internal.messages.ApplyState;
-import org.opendaylight.controller.cluster.raft.internal.messages.Replicate;
-import org.opendaylight.controller.cluster.raft.internal.messages.SendHeartBeat;
-import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
+import org.opendaylight.controller.cluster.raft.base.messages.ElectionTimeout;
+import org.opendaylight.controller.cluster.raft.base.messages.IsolatedLeaderCheck;
import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
-import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
-import scala.concurrent.duration.FiniteDuration;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicLong;
/**
* The behavior of a RaftActor when it is in the Leader state
* of matchIndex[i] ≥ N, and log[N].term == currentTerm:
* set commitIndex = N (§5.3, §5.4).
*/
-public class Leader extends AbstractRaftActorBehavior {
-
-
- private final Map<String, FollowerLogInformation> followerToLog =
- new HashMap();
-
- private final Map<String, ActorSelection> followerToActor = new HashMap<>();
-
- private Cancellable heartbeatCancel = null;
-
- private List<ClientRequestTracker> trackerList = new ArrayList<>();
-
- private final int minReplicationCount;
+public class Leader extends AbstractLeader {
+ private static final IsolatedLeaderCheck ISOLATED_LEADER_CHECK = new IsolatedLeaderCheck();
+ private final Stopwatch isolatedLeaderCheck;
+ private @Nullable LeadershipTransferContext leadershipTransferContext;
public Leader(RaftActorContext context) {
super(context);
+ isolatedLeaderCheck = Stopwatch.createStarted();
+ }
- if(lastIndex() >= 0) {
- context.setCommitIndex(lastIndex());
- }
+ @Override public RaftActorBehavior handleMessage(ActorRef sender, Object originalMessage) {
+ Preconditions.checkNotNull(sender, "sender should not be null");
- for (String followerId : context.getPeerAddresses().keySet()) {
- FollowerLogInformation followerLogInformation =
- new FollowerLogInformationImpl(followerId,
- new AtomicLong(lastIndex()),
- new AtomicLong(-1));
+ if (originalMessage instanceof IsolatedLeaderCheck) {
+ if (isLeaderIsolated()) {
+ LOG.warn("{}: At least {} followers need to be active, Switching {} from Leader to IsolatedLeader",
+ context.getId(), getMinIsolatedLeaderPeerCount(), leaderId);
- followerToActor.put(followerId,
- context.actorSelection(context.getPeerAddress(followerId)));
+ return internalSwitchBehavior(RaftState.IsolatedLeader);
+ }
+ }
- followerToLog.put(followerId, followerLogInformation);
+ return super.handleMessage(sender, originalMessage);
+ }
+ @Override
+ protected void beforeSendHeartbeat(){
+ if(isolatedLeaderCheck.elapsed(TimeUnit.MILLISECONDS) > context.getConfigParams().getIsolatedCheckIntervalInMillis()){
+ context.getActor().tell(ISOLATED_LEADER_CHECK, context.getActor());
+ isolatedLeaderCheck.reset().start();
}
- if (followerToActor.size() > 0) {
- minReplicationCount = (followerToActor.size() + 1) / 2 + 1;
- } else {
- minReplicationCount = 0;
+ if(leadershipTransferContext != null && leadershipTransferContext.isExpired(
+ context.getConfigParams().getElectionTimeOutInterval().toMillis())) {
+ LOG.debug("{}: Leadership transfer expired", logName());
+ leadershipTransferContext = null;
}
+ }
-
- // Immediately schedule a heartbeat
- // Upon election: send initial empty AppendEntries RPCs
- // (heartbeat) to each server; repeat during idle periods to
- // prevent election timeouts (§5.2)
- scheduleHeartBeat(new FiniteDuration(0, TimeUnit.SECONDS));
-
-
+ @Override
+ protected RaftActorBehavior handleAppendEntriesReply(ActorRef sender, AppendEntriesReply appendEntriesReply) {
+ RaftActorBehavior returnBehavior = super.handleAppendEntriesReply(sender, appendEntriesReply);
+ tryToCompleteLeadershipTransfer(appendEntriesReply.getFollowerId());
+ return returnBehavior;
}
- @Override protected RaftState handleAppendEntries(ActorRef sender,
- AppendEntries appendEntries, RaftState suggestedState) {
+ /**
+ * Attempts to transfer leadership to a follower as per the raft paper (§3.10) as follows:
+ * <ul>
+ * <li>Start a timer (Stopwatch).</li>
+ * <li>Send an initial AppendEntries heartbeat to all followers.</li>
+ * <li>On AppendEntriesReply, check if the follower's new match Index matches the leader's last index</li>
+ * <li>If it matches, </li>
+ * <ul>
+ * <li>Send an additional AppendEntries to ensure the follower has applied all its log entries to its state.</li>
+ * <li>Send an ElectionTimeout to the follower to immediately start an election.</li>
+ * <li>Notify {@link RaftActorLeadershipTransferCohort#transferComplete}.</li>
+ * </ul>
+ * <li>Otherwise if the election time out period elapses, notify
+ * {@link RaftActorLeadershipTransferCohort#abortTtransfer}.</li>
+ * </ul>
+ *
+ * @param leadershipTransferCohort
+ */
+ public void transferLeadership(@Nonnull RaftActorLeadershipTransferCohort leadershipTransferCohort) {
+ LOG.debug("{}: Attempting to transfer leadership", logName());
+
+ leadershipTransferContext = new LeadershipTransferContext(leadershipTransferCohort);
+
+ // Send an immediate heart beat to the followers.
+ sendAppendEntries(0, false);
+ }
- context.getLogger()
- .error("An unexpected AppendEntries received in state " + state());
+ private void tryToCompleteLeadershipTransfer(String followerId) {
+ if(leadershipTransferContext == null) {
+ return;
+ }
- return suggestedState;
- }
+ FollowerLogInformation followerInfo = getFollower(followerId);
+ if(followerInfo == null) {
+ return;
+ }
- @Override protected RaftState handleAppendEntriesReply(ActorRef sender,
- AppendEntriesReply appendEntriesReply, RaftState suggestedState) {
+ long lastIndex = context.getReplicatedLog().lastIndex();
+ boolean isVoting = context.getPeerInfo(followerId).isVoting();
- // Do not take any other action since a behavior change is coming
- if (suggestedState != state())
- return suggestedState;
+ LOG.debug("{}: tryToCompleteLeadershipTransfer: followerId: {}, matchIndex: {}, lastIndex: {}, isVoting: {}",
+ logName(), followerId, followerInfo.getMatchIndex(), lastIndex, isVoting);
- // Update the FollowerLogInformation
- String followerId = appendEntriesReply.getFollowerId();
- FollowerLogInformation followerLogInformation =
- followerToLog.get(followerId);
- if (appendEntriesReply.isSuccess()) {
- followerLogInformation
- .setMatchIndex(appendEntriesReply.getLogLastIndex());
- followerLogInformation
- .setNextIndex(appendEntriesReply.getLogLastIndex() + 1);
- } else {
- followerLogInformation.decrNextIndex();
- }
+ if(isVoting && followerInfo.getMatchIndex() == lastIndex) {
+ LOG.debug("{}: Follower's log matches - sending ElectionTimeout", logName());
- // Now figure out if this reply warrants a change in the commitIndex
- // If there exists an N such that N > commitIndex, a majority
- // of matchIndex[i] ≥ N, and log[N].term == currentTerm:
- // set commitIndex = N (§5.3, §5.4).
- for (long N = context.getCommitIndex() + 1; ; N++) {
- int replicatedCount = 1;
+ // We can't be sure if the follower has applied all its log entries to its state so send an
+ // additional AppendEntries with the latest commit index.
+ sendAppendEntries(0, false);
- for (FollowerLogInformation info : followerToLog.values()) {
- if (info.getMatchIndex().get() >= N) {
- replicatedCount++;
- }
- }
+ // Now send an ElectionTimeout to the matching follower to immediately start an election.
+ ActorSelection followerActor = context.getPeerActorSelection(followerId);
+ followerActor.tell(new ElectionTimeout(), context.getActor());
- if (replicatedCount >= minReplicationCount){
- ReplicatedLogEntry replicatedLogEntry =
- context.getReplicatedLog().get(N);
- if (replicatedLogEntry != null
- && replicatedLogEntry.getTerm()
- == currentTerm()) {
- context.setCommitIndex(N);
- }
- } else {
- break;
- }
- }
+ LOG.debug("{}: Leader transfer complete", logName());
- if(context.getCommitIndex() > context.getLastApplied()){
- applyLogToStateMachine(context.getCommitIndex());
+ leadershipTransferContext.transferCohort.transferComplete();
+ leadershipTransferContext = null;
}
-
- return suggestedState;
}
- protected ClientRequestTracker findClientRequestTracker(long logIndex) {
- for (ClientRequestTracker tracker : trackerList) {
- if (tracker.getIndex() == logIndex) {
- return tracker;
- }
+ @Override
+ public void close() throws Exception {
+ if(leadershipTransferContext != null) {
+ leadershipTransferContext.transferCohort.abortTransfer();
}
- return null;
+ super.close();
}
- @Override protected RaftState handleRequestVoteReply(ActorRef sender,
- RequestVoteReply requestVoteReply, RaftState suggestedState) {
- return suggestedState;
+ @VisibleForTesting
+ void markFollowerActive(String followerId) {
+ getFollower(followerId).markFollowerActive();
}
- @Override public RaftState state() {
- return RaftState.Leader;
+ @VisibleForTesting
+ void markFollowerInActive(String followerId) {
+ getFollower(followerId).markFollowerInActive();
}
- @Override public RaftState handleMessage(ActorRef sender, Object message) {
- Preconditions.checkNotNull(sender, "sender should not be null");
-
- try {
- if (message instanceof SendHeartBeat) {
- return sendHeartBeat();
- } else if (message instanceof Replicate) {
-
- Replicate replicate = (Replicate) message;
- long logIndex = replicate.getReplicatedLogEntry().getIndex();
-
- context.getLogger().debug("Replicate message " + logIndex);
+ private static class LeadershipTransferContext {
+ RaftActorLeadershipTransferCohort transferCohort;
+ Stopwatch timer = Stopwatch.createStarted();
- if (followerToActor.size() == 0) {
- context.setCommitIndex(
- replicate.getReplicatedLogEntry().getIndex());
-
- context.getActor()
- .tell(new ApplyState(replicate.getClientActor(),
- replicate.getIdentifier(),
- replicate.getReplicatedLogEntry()),
- context.getActor()
- );
- } else {
-
- trackerList.add(
- new ClientRequestTrackerImpl(replicate.getClientActor(),
- replicate.getIdentifier(),
- logIndex)
- );
-
- ReplicatedLogEntry prevEntry =
- context.getReplicatedLog().get(lastIndex() - 1);
- long prevLogIndex = -1;
- long prevLogTerm = -1;
- if (prevEntry != null) {
- prevLogIndex = prevEntry.getIndex();
- prevLogTerm = prevEntry.getTerm();
- }
- // Send an AppendEntries to all followers
- for (String followerId : followerToActor.keySet()) {
- ActorSelection followerActor =
- followerToActor.get(followerId);
- FollowerLogInformation followerLogInformation =
- followerToLog.get(followerId);
- followerActor.tell(
- new AppendEntries(currentTerm(), context.getId(),
- prevLogIndex, prevLogTerm,
- context.getReplicatedLog().getFrom(
- followerLogInformation.getNextIndex()
- .get()
- ), context.getCommitIndex()
- ),
- actor()
- );
- }
- }
- }
- } finally {
- scheduleHeartBeat(HEART_BEAT_INTERVAL);
+ LeadershipTransferContext(RaftActorLeadershipTransferCohort transferCohort) {
+ this.transferCohort = transferCohort;
}
- return super.handleMessage(sender, message);
- }
-
- private RaftState sendHeartBeat() {
- if (followerToActor.size() > 0) {
- for (String follower : followerToActor.keySet()) {
-
- FollowerLogInformation followerLogInformation =
- followerToLog.get(follower);
-
- AtomicLong nextIndex =
- followerLogInformation.getNextIndex();
-
- List<ReplicatedLogEntry> entries =
- context.getReplicatedLog().getFrom(nextIndex.get());
-
- followerToActor.get(follower).tell(new AppendEntries(
- context.getTermInformation().getCurrentTerm(),
- context.getId(),
- context.getReplicatedLog().lastIndex(),
- context.getReplicatedLog().lastTerm(),
- entries, context.getCommitIndex()),
- context.getActor()
- );
+ boolean isExpired(long timeout) {
+ if(timer.elapsed(TimeUnit.MILLISECONDS) >= timeout) {
+ transferCohort.abortTransfer();
+ return true;
}
- }
- return state();
- }
- private void stopHeartBeat() {
- if (heartbeatCancel != null && !heartbeatCancel.isCancelled()) {
- heartbeatCancel.cancel();
+ return false;
}
}
-
- private void scheduleHeartBeat(FiniteDuration interval) {
- stopHeartBeat();
-
- // Schedule a heartbeat. When the scheduler triggers a SendHeartbeat
- // message is sent to itself.
- // Scheduling the heartbeat only once here because heartbeats do not
- // need to be sent if there are other messages being sent to the remote
- // actor.
- heartbeatCancel =
- context.getActorSystem().scheduler().scheduleOnce(
- interval,
- context.getActor(), new SendHeartBeat(),
- context.getActorSystem().dispatcher(), context.getActor());
- }
-
- @Override public void close() throws Exception {
- stopHeartBeat();
- }
-
- @Override public String getLeaderId() {
- return context.getId();
- }
-
}