X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-akka-raft%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fraft%2Fbehaviors%2FFollower.java;h=4913d98c36e9090253cf0ea4ece40873a2d655cd;hb=703c260bebba0a8f7efa85edef4ad4d359c59cda;hp=4fa4dbfb57b34f620eecd6b82710dffc754cd88f;hpb=b4bf55727093657662d8c16a50fa85f87978a586;p=controller.git diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java index 4fa4dbfb57..4913d98c36 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Follower.java @@ -14,22 +14,24 @@ import akka.cluster.Cluster; import akka.cluster.ClusterEvent.CurrentClusterState; import akka.cluster.Member; import akka.cluster.MemberStatus; -import akka.japi.Procedure; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.IOException; -import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; import org.eclipse.jdt.annotation.Nullable; import org.opendaylight.controller.cluster.messaging.MessageAssembler; import org.opendaylight.controller.cluster.raft.RaftActorContext; import org.opendaylight.controller.cluster.raft.RaftState; import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry; import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot; +import org.opendaylight.controller.cluster.raft.base.messages.ApplyState; import org.opendaylight.controller.cluster.raft.base.messages.ElectionTimeout; import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow; import org.opendaylight.controller.cluster.raft.messages.AppendEntries; @@ -51,6 +53,7 @@ import org.opendaylight.controller.cluster.raft.persisted.Snapshot; * convert to candidate * */ +// Non-final for testing public class Follower extends AbstractRaftActorBehavior { private static final long MAX_ELECTION_TIMEOUT_FACTOR = 18; @@ -67,11 +70,13 @@ public class Follower extends AbstractRaftActorBehavior { this(context, null, (short)-1); } + @SuppressFBWarnings(value = "MC_OVERRIDABLE_METHOD_CALL_IN_CONSTRUCTOR", + justification = "electionDuration() is not final for Candidate override") public Follower(final RaftActorContext context, final String initialLeaderId, final short initialLeaderPayloadVersion) { super(context, RaftState.Follower); - this.leaderId = initialLeaderId; - this.leaderPayloadVersion = initialLeaderPayloadVersion; + leaderId = initialLeaderId; + leaderPayloadVersion = initialLeaderPayloadVersion; initialSyncStatusTracker = new SyncStatusTracker(context.getActor(), getId(), context.getConfigParams() .getSyncIndexThreshold()); @@ -303,7 +308,7 @@ public class Follower extends AbstractRaftActorBehavior { // applied to the state already, as the persistence callback occurs async, and we want those entries // purged from the persisted log as well. final AtomicBoolean shouldCaptureSnapshot = new AtomicBoolean(false); - final Procedure appendAndPersistCallback = logEntry -> { + final Consumer appendAndPersistCallback = logEntry -> { final List entries = appendEntries.getEntries(); final ReplicatedLogEntry lastEntryToAppend = entries.get(entries.size() - 1); if (shouldCaptureSnapshot.get() && logEntry == lastEntryToAppend) { @@ -322,8 +327,8 @@ public class Follower extends AbstractRaftActorBehavior { shouldCaptureSnapshot.compareAndSet(false, context.getReplicatedLog().shouldCaptureSnapshot(entry.getIndex())); - if (entry.getData() instanceof ServerConfigurationPayload) { - context.updatePeerIds((ServerConfigurationPayload)entry.getData()); + if (entry.getData() instanceof ServerConfigurationPayload serverConfiguration) { + context.updatePeerIds(serverConfiguration); } } @@ -408,8 +413,8 @@ public class Follower extends AbstractRaftActorBehavior { return false; } - private void sendOutOfSyncAppendEntriesReply(final ActorRef sender, boolean forceInstallSnapshot, - short leaderRaftVersion) { + private void sendOutOfSyncAppendEntriesReply(final ActorRef sender, final boolean forceInstallSnapshot, + final short leaderRaftVersion) { // We found that the log was out of sync so just send a negative reply. final AppendEntriesReply reply = new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex(), lastTerm(), context.getPayloadVersion(), forceInstallSnapshot, needsLeaderAddress(), @@ -435,6 +440,11 @@ public class Follower extends AbstractRaftActorBehavior { return this; } + @Override + final ApplyState getApplyStateFor(final ReplicatedLogEntry entry) { + return new ApplyState(null, null, entry); + } + @Override public RaftActorBehavior handleMessage(final ActorRef sender, final Object message) { if (message instanceof ElectionTimeout || message instanceof TimeoutNow) { @@ -445,30 +455,29 @@ public class Follower extends AbstractRaftActorBehavior { return this; } - if (!(message instanceof RaftRPC)) { + if (!(message instanceof RaftRPC rpc)) { // The rest of the processing requires the message to be a RaftRPC return null; } - final RaftRPC rpc = (RaftRPC) message; // If RPC request or response contains term T > currentTerm: // set currentTerm = T, convert to follower (§5.1) // This applies to all RPC messages and responses - if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) { + if (rpc.getTerm() > context.getTermInformation().getCurrentTerm() && shouldUpdateTerm(rpc)) { log.info("{}: Term {} in \"{}\" message is greater than follower's term {} - updating term", logName(), rpc.getTerm(), rpc, context.getTermInformation().getCurrentTerm()); context.getTermInformation().updateAndPersist(rpc.getTerm(), null); } - if (rpc instanceof InstallSnapshot) { - handleInstallSnapshot(sender, (InstallSnapshot) rpc); + if (rpc instanceof InstallSnapshot installSnapshot) { + handleInstallSnapshot(sender, installSnapshot); restartLastLeaderMessageTimer(); scheduleElection(electionDuration()); return this; } - if (!(rpc instanceof RequestVote) || canGrantVote((RequestVote) rpc)) { + if (!(rpc instanceof RequestVote requestVote) || canGrantVote(requestVote)) { restartLastLeaderMessageTimer(); scheduleElection(electionDuration()); } @@ -500,6 +509,10 @@ public class Follower extends AbstractRaftActorBehavior { if (isLeaderAvailabilityKnown() && lastLeaderMessageInterval < maxElectionTimeout) { log.debug("{}: Received ElectionTimeout but leader appears to be available", logName()); scheduleElection(electionDuration()); + } else if (isThisFollowerIsolated()) { + log.debug("{}: this follower is isolated. Do not switch to Candidate for now.", logName()); + setLeaderId(null); + scheduleElection(electionDuration()); } else { log.debug("{}: Received ElectionTimeout - switching to Candidate", logName()); return internalSwitchBehavior(RaftState.Candidate); @@ -569,6 +582,36 @@ public class Follower extends AbstractRaftActorBehavior { return false; } + private boolean isThisFollowerIsolated() { + final Optional maybeCluster = context.getCluster(); + if (!maybeCluster.isPresent()) { + return false; + } + + final Cluster cluster = maybeCluster.get(); + final Member selfMember = cluster.selfMember(); + + final CurrentClusterState state = cluster.state(); + final Set unreachable = state.getUnreachable(); + final Iterable members = state.getMembers(); + + log.debug("{}: Checking if this node is isolated in the cluster unreachable set {}," + + "all members {} self member: {}", logName(), unreachable, members, selfMember); + + // no unreachable peers means we cannot be isolated + if (unreachable.isEmpty()) { + return false; + } + + final Set membersToCheck = new HashSet<>(); + members.forEach(membersToCheck::add); + + membersToCheck.removeAll(unreachable); + + // check if the only member not unreachable is us + return membersToCheck.size() == 1 && membersToCheck.iterator().next().equals(selfMember); + } + private void handleInstallSnapshot(final ActorRef sender, final InstallSnapshot installSnapshot) { log.debug("{}: handleInstallSnapshot: {}", logName(), installSnapshot); @@ -593,14 +636,14 @@ public class Follower extends AbstractRaftActorBehavior { Snapshot snapshot = Snapshot.create( context.getSnapshotManager().convertSnapshot(snapshotTracker.getSnapshotBytes()), - new ArrayList<>(), + List.of(), installSnapshot.getLastIncludedIndex(), installSnapshot.getLastIncludedTerm(), installSnapshot.getLastIncludedIndex(), installSnapshot.getLastIncludedTerm(), context.getTermInformation().getCurrentTerm(), context.getTermInformation().getVotedFor(), - installSnapshot.getServerConfig().orNull()); + installSnapshot.getServerConfig().orElse(null)); ApplySnapshot.Callback applySnapshotCallback = new ApplySnapshot.Callback() { @Override @@ -627,8 +670,7 @@ public class Follower extends AbstractRaftActorBehavior { } catch (IOException e) { log.debug("{}: Exception in InstallSnapshot of follower", logName(), e); - sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(), - -1, false), actor()); + sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(), -1, false), actor()); closeSnapshotTracker(); }