Improve segmented journal actor metrics
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / behaviors / Follower.java
index d88c30db333bcf040b20febc76884c9bc13e9147..9dd630aade8a5e2e0a8265aa69564ceaace63cd4 100644 (file)
@@ -16,8 +16,9 @@ import akka.cluster.Member;
 import akka.cluster.MemberStatus;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Stopwatch;
+import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 import java.io.IOException;
-import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Optional;
 import java.util.Set;
@@ -30,6 +31,7 @@ import org.opendaylight.controller.cluster.raft.RaftActorContext;
 import org.opendaylight.controller.cluster.raft.RaftState;
 import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
 import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
+import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
 import org.opendaylight.controller.cluster.raft.base.messages.ElectionTimeout;
 import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow;
 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
@@ -51,6 +53,7 @@ import org.opendaylight.controller.cluster.raft.persisted.Snapshot;
  * convert to candidate
  * </ul>
  */
+// Non-final for testing
 public class Follower extends AbstractRaftActorBehavior {
     private static final long MAX_ELECTION_TIMEOUT_FACTOR = 18;
 
@@ -67,11 +70,13 @@ public class Follower extends AbstractRaftActorBehavior {
         this(context, null, (short)-1);
     }
 
+    @SuppressFBWarnings(value = "MC_OVERRIDABLE_METHOD_CALL_IN_CONSTRUCTOR",
+        justification = "electionDuration() is not final for Candidate override")
     public Follower(final RaftActorContext context, final String initialLeaderId,
             final short initialLeaderPayloadVersion) {
         super(context, RaftState.Follower);
-        this.leaderId = initialLeaderId;
-        this.leaderPayloadVersion = initialLeaderPayloadVersion;
+        leaderId = initialLeaderId;
+        leaderPayloadVersion = initialLeaderPayloadVersion;
 
         initialSyncStatusTracker = new SyncStatusTracker(context.getActor(), getId(), context.getConfigParams()
             .getSyncIndexThreshold());
@@ -160,12 +165,11 @@ public class Follower extends AbstractRaftActorBehavior {
         leaderId = appendEntries.getLeaderId();
         leaderPayloadVersion = appendEntries.getPayloadVersion();
 
-        if (appendEntries.getLeaderAddress().isPresent()) {
-            final String address = appendEntries.getLeaderAddress().get();
-            log.debug("New leader address: {}", address);
-
-            context.setPeerAddress(leaderId, address);
-            context.getConfigParams().getPeerAddressResolver().setResolved(leaderId, address);
+        final var leaderAddress = appendEntries.leaderAddress();
+        if (leaderAddress != null) {
+            log.debug("New leader address: {}", leaderAddress);
+            context.setPeerAddress(leaderId, leaderAddress);
+            context.getConfigParams().getPeerAddressResolver().setResolved(leaderId, leaderAddress);
         }
 
         // First check if the logs are in sync or not
@@ -322,8 +326,8 @@ public class Follower extends AbstractRaftActorBehavior {
             shouldCaptureSnapshot.compareAndSet(false,
                     context.getReplicatedLog().shouldCaptureSnapshot(entry.getIndex()));
 
-            if (entry.getData() instanceof ServerConfigurationPayload) {
-                context.updatePeerIds((ServerConfigurationPayload)entry.getData());
+            if (entry.getData() instanceof ServerConfigurationPayload serverConfiguration) {
+                context.updatePeerIds(serverConfiguration);
             }
         }
 
@@ -435,6 +439,11 @@ public class Follower extends AbstractRaftActorBehavior {
         return this;
     }
 
+    @Override
+    final ApplyState getApplyStateFor(final ReplicatedLogEntry entry) {
+        return new ApplyState(null, null, entry);
+    }
+
     @Override
     public RaftActorBehavior handleMessage(final ActorRef sender, final Object message) {
         if (message instanceof ElectionTimeout || message instanceof TimeoutNow) {
@@ -445,12 +454,11 @@ public class Follower extends AbstractRaftActorBehavior {
             return this;
         }
 
-        if (!(message instanceof RaftRPC)) {
+        if (!(message instanceof RaftRPC rpc)) {
             // The rest of the processing requires the message to be a RaftRPC
             return null;
         }
 
-        final RaftRPC rpc = (RaftRPC) message;
         // If RPC request or response contains term T > currentTerm:
         // set currentTerm = T, convert to follower (ยง5.1)
         // This applies to all RPC messages and responses
@@ -461,14 +469,14 @@ public class Follower extends AbstractRaftActorBehavior {
             context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
         }
 
-        if (rpc instanceof InstallSnapshot) {
-            handleInstallSnapshot(sender, (InstallSnapshot) rpc);
+        if (rpc instanceof InstallSnapshot installSnapshot) {
+            handleInstallSnapshot(sender, installSnapshot);
             restartLastLeaderMessageTimer();
             scheduleElection(electionDuration());
             return this;
         }
 
-        if (!(rpc instanceof RequestVote) || canGrantVote((RequestVote) rpc)) {
+        if (!(rpc instanceof RequestVote requestVote) || canGrantVote(requestVote)) {
             restartLastLeaderMessageTimer();
             scheduleElection(electionDuration());
         }
@@ -500,6 +508,10 @@ public class Follower extends AbstractRaftActorBehavior {
                 if (isLeaderAvailabilityKnown() && lastLeaderMessageInterval < maxElectionTimeout) {
                     log.debug("{}: Received ElectionTimeout but leader appears to be available", logName());
                     scheduleElection(electionDuration());
+                } else if (isThisFollowerIsolated()) {
+                    log.debug("{}: this follower is isolated. Do not switch to Candidate for now.", logName());
+                    setLeaderId(null);
+                    scheduleElection(electionDuration());
                 } else {
                     log.debug("{}: Received ElectionTimeout - switching to Candidate", logName());
                     return internalSwitchBehavior(RaftState.Candidate);
@@ -537,7 +549,7 @@ public class Follower extends AbstractRaftActorBehavior {
 
         Address leaderAddress = leaderActor.anchorPath().address();
 
-        CurrentClusterState state = cluster.get().state();
+        CurrentClusterState state = cluster.orElseThrow().state();
         Set<Member> unreachable = state.getUnreachable();
 
         log.debug("{}: Checking for leader {} in the cluster unreachable set {}", logName(), leaderAddress,
@@ -569,6 +581,36 @@ public class Follower extends AbstractRaftActorBehavior {
         return false;
     }
 
+    private boolean isThisFollowerIsolated() {
+        final Optional<Cluster> maybeCluster = context.getCluster();
+        if (!maybeCluster.isPresent()) {
+            return false;
+        }
+
+        final Cluster cluster = maybeCluster.orElseThrow();
+        final Member selfMember = cluster.selfMember();
+
+        final CurrentClusterState state = cluster.state();
+        final Set<Member> unreachable = state.getUnreachable();
+        final Iterable<Member> members = state.getMembers();
+
+        log.debug("{}: Checking if this node is isolated in the cluster unreachable set {},"
+                        + "all members {} self member: {}", logName(), unreachable, members, selfMember);
+
+        // no unreachable peers means we cannot be isolated
+        if (unreachable.isEmpty()) {
+            return false;
+        }
+
+        final Set<Member> membersToCheck = new HashSet<>();
+        members.forEach(membersToCheck::add);
+
+        membersToCheck.removeAll(unreachable);
+
+        // check if the only member not unreachable is us
+        return membersToCheck.size() == 1 && membersToCheck.iterator().next().equals(selfMember);
+    }
+
     private void handleInstallSnapshot(final ActorRef sender, final InstallSnapshot installSnapshot) {
 
         log.debug("{}: handleInstallSnapshot: {}", logName(), installSnapshot);
@@ -593,7 +635,7 @@ public class Follower extends AbstractRaftActorBehavior {
 
                 Snapshot snapshot = Snapshot.create(
                         context.getSnapshotManager().convertSnapshot(snapshotTracker.getSnapshotBytes()),
-                        new ArrayList<>(),
+                        List.of(),
                         installSnapshot.getLastIncludedIndex(),
                         installSnapshot.getLastIncludedTerm(),
                         installSnapshot.getLastIncludedIndex(),
@@ -627,8 +669,7 @@ public class Follower extends AbstractRaftActorBehavior {
         } catch (IOException e) {
             log.debug("{}: Exception in InstallSnapshot of follower", logName(), e);
 
-            sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(),
-                    -1, false), actor());
+            sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(), -1, false), actor());
 
             closeSnapshotTracker();
         }