X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-akka-raft%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fraft%2Fbehaviors%2FAbstractRaftActorBehavior.java;h=b1560a5648b283e028ae0dc96e4267d92c6f438f;hb=2a31c2cacb9ad8f015a49708261ea93d256f0f60;hp=d7a8d5abb35bdb258633b65a783aed9e0aa9677d;hpb=26cd54f2cbe0737db6e82aa96cd31671c6f6bf7e;p=controller.git

diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractRaftActorBehavior.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractRaftActorBehavior.java
index d7a8d5abb3..45671ea31e 100644
--- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractRaftActorBehavior.java
+++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractRaftActorBehavior.java
@@ -9,13 +9,23 @@
 package org.opendaylight.controller.cluster.raft.behaviors;
 
 import akka.actor.ActorRef;
+import akka.actor.Cancellable;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.opendaylight.controller.cluster.raft.ClientRequestTracker;
 import org.opendaylight.controller.cluster.raft.RaftActorContext;
 import org.opendaylight.controller.cluster.raft.RaftState;
+import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
+import org.opendaylight.controller.cluster.raft.SerializationUtils;
+import org.opendaylight.controller.cluster.raft.base.messages.ApplyJournalEntries;
+import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
+import org.opendaylight.controller.cluster.raft.base.messages.ElectionTimeout;
 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
-import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
 import org.opendaylight.controller.cluster.raft.messages.RequestVote;
 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
+import org.slf4j.Logger;
+import scala.concurrent.duration.FiniteDuration;
 
 /**
  * Abstract class that represents the behavior of a RaftActor
@@ -29,14 +39,59 @@ import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
  */
 public abstract class AbstractRaftActorBehavior implements RaftActorBehavior {
 
+    protected static final ElectionTimeout ELECTION_TIMEOUT = new ElectionTimeout();
+
     /**
      * Information about the RaftActor whose behavior this class represents
      */
     protected final RaftActorContext context;
 
+    /**
+     *
+     */
+    protected final Logger LOG;
 
-    protected AbstractRaftActorBehavior(RaftActorContext context) {
+    /**
+     *
+     */
+    private Cancellable electionCancel = null;
+
+    /**
+     *
+     */
+    protected String leaderId = null;
+
+    private long replicatedToAllIndex = -1;
+
+    private final String logName;
+
+    private final RaftState state;
+
+    protected AbstractRaftActorBehavior(RaftActorContext context, RaftState state) {
         this.context = context;
+        this.state = state;
+        this.LOG = context.getLogger();
+
+        logName = String.format("%s (%s)", context.getId(), state);
+    }
+
+    @Override
+    public RaftState state() {
+        return state;
+    }
+
+    public String logName() {
+        return logName;
+    }
+
+    @Override
+    public void setReplicatedToAllIndex(long replicatedToAllIndex) {
+        this.replicatedToAllIndex = replicatedToAllIndex;
+    }
+
+    @Override
+    public long getReplicatedToAllIndex() {
+        return replicatedToAllIndex;
     }
 
     /**
@@ -49,13 +104,40 @@ public abstract class AbstractRaftActorBehavior implements RaftActorBehavior {
      *
      * @param sender         The actor that sent this message
      * @param appendEntries  The AppendEntries message
-     * @param suggestedState The state that the RaftActor should be in based
-     *                       on the base class's processing of the AppendEntries
-     *                       message
      * @return
      */
-    protected abstract RaftState handleAppendEntries(ActorRef sender,
-        AppendEntries appendEntries, RaftState suggestedState);
+    protected abstract RaftActorBehavior handleAppendEntries(ActorRef sender,
+        AppendEntries appendEntries);
+
+
+    /**
+     * appendEntries first processes the AppendEntries message and then
+     * delegates handling to a specific behavior
+     *
+     * @param sender
+     * @param appendEntries
+     * @return
+     */
+    protected RaftActorBehavior appendEntries(ActorRef sender,
+        AppendEntries appendEntries) {
+
+        // 1. Reply false if term < currentTerm (Â§5.1)
+        if (appendEntries.getTerm() < currentTerm()) {
+            if(LOG.isDebugEnabled()) {
+                LOG.debug("{}: Cannot append entries because sender term {} is less than {}",
+                        logName(), appendEntries.getTerm(), currentTerm());
+            }
+
+            sender.tell(
+                new AppendEntriesReply(context.getId(), currentTerm(), false,
+                    lastIndex(), lastTerm()), actor()
+            );
+            return this;
+        }
+
+
+        return handleAppendEntries(sender, appendEntries);
+    }
 
     /**
      * Derived classes should not directly handle AppendEntriesReply messages it
@@ -67,32 +149,65 @@ public abstract class AbstractRaftActorBehavior implements RaftActorBehavior {
      *
      * @param sender             The actor that sent this message
      * @param appendEntriesReply The AppendEntriesReply message
-     * @param suggestedState     The state that the RaftActor should be in based
-     *                           on the base class's processing of the
-     *                           AppendEntriesReply message
      * @return
      */
-
-    protected abstract RaftState handleAppendEntriesReply(ActorRef sender,
-        AppendEntriesReply appendEntriesReply, RaftState suggestedState);
+    protected abstract RaftActorBehavior handleAppendEntriesReply(ActorRef sender,
+        AppendEntriesReply appendEntriesReply);
 
     /**
-     * Derived classes should not directly handle RequestVote messages it
-     * should let the base class handle it first. Once the base class handles
-     * the RequestVote message and does the common actions that are applicable
-     * in all RaftState's it will delegate the handling of the RequestVote
-     * message to the derived class to do more state specific handling by calling
-     * this method
+     * requestVote handles the RequestVote message. This logic is common
+     * for all behaviors
      *
-     * @param sender         The actor that sent this message
-     * @param requestVote    The RequestVote message
-     * @param suggestedState The state that the RaftActor should be in based
-     *                       on the base class's processing of the RequestVote
-     *                       message
+     * @param sender
+     * @param requestVote
      * @return
      */
-    protected abstract RaftState handleRequestVote(ActorRef sender,
-        RequestVote requestVote, RaftState suggestedState);
+    protected RaftActorBehavior requestVote(ActorRef sender, RequestVote requestVote) {
+
+        LOG.debug("{}: In requestVote:  {}", logName(), requestVote);
+
+        boolean grantVote = false;
+
+        //  Reply false if term < currentTerm (Â§5.1)
+        if (requestVote.getTerm() < currentTerm()) {
+            grantVote = false;
+
+            // If votedFor is null or candidateId, and candidateâs log is at
+            // least as up-to-date as receiverâs log, grant vote (Â§5.2, Â§5.4)
+        } else if (votedFor() == null || votedFor()
+            .equals(requestVote.getCandidateId())) {
+
+            boolean candidateLatest = false;
+
+            // From Â§5.4.1
+            // Raft determines which of two logs is more up-to-date
+            // by comparing the index and term of the last entries in the
+            // logs. If the logs have last entries with different terms, then
+            // the log with the later term is more up-to-date. If the logs
+            // end with the same term, then whichever log is longer is
+            // more up-to-date.
+            if (requestVote.getLastLogTerm() > lastTerm()) {
+                candidateLatest = true;
+            } else if ((requestVote.getLastLogTerm() == lastTerm())
+                && requestVote.getLastLogIndex() >= lastIndex()) {
+                candidateLatest = true;
+            }
+
+            if (candidateLatest) {
+                grantVote = true;
+                context.getTermInformation().updateAndPersist(requestVote.getTerm(),
+                    requestVote.getCandidateId());
+            }
+        }
+
+        RequestVoteReply reply = new RequestVoteReply(currentTerm(), grantVote);
+
+        LOG.debug("{}: requestVote returning: {}", logName(), reply);
+
+        sender.tell(reply, actor());
+
+        return this;
+    }
 
     /**
      * Derived classes should not directly handle RequestVoteReply messages it
@@ -104,59 +219,279 @@ public abstract class AbstractRaftActorBehavior implements RaftActorBehavior {
      *
      * @param sender           The actor that sent this message
      * @param requestVoteReply The RequestVoteReply message
-     * @param suggestedState   The state that the RaftActor should be in based
-     *                         on the base class's processing of the RequestVote
-     *                         message
      * @return
      */
+    protected abstract RaftActorBehavior handleRequestVoteReply(ActorRef sender,
+        RequestVoteReply requestVoteReply);
+
+    /**
+     * Creates a random election duration
+     *
+     * @return
+     */
+    protected FiniteDuration electionDuration() {
+        long variance = new Random().nextInt(context.getConfigParams().getElectionTimeVariance());
+        return context.getConfigParams().getElectionTimeOutInterval().$plus(
+                new FiniteDuration(variance, TimeUnit.MILLISECONDS));
+    }
+
+    /**
+     * stop the scheduled election
+     */
+    protected void stopElection() {
+        if (electionCancel != null && !electionCancel.isCancelled()) {
+            electionCancel.cancel();
+        }
+    }
+
+    /**
+     * schedule a new election
+     *
+     * @param interval
+     */
+    protected void scheduleElection(FiniteDuration interval) {
+        stopElection();
 
-    protected abstract RaftState handleRequestVoteReply(ActorRef sender,
-        RequestVoteReply requestVoteReply, RaftState suggestedState);
+        // Schedule an election. When the scheduler triggers an ElectionTimeout
+        // message is sent to itself
+        electionCancel =
+            context.getActorSystem().scheduler().scheduleOnce(interval,
+                context.getActor(), ELECTION_TIMEOUT,
+                context.getActorSystem().dispatcher(), context.getActor());
+    }
 
     /**
-     * @return The derived class should return the state that corresponds to
-     * it's behavior
+     * Get the current term
+     * @return
      */
-    protected abstract RaftState state();
+    protected long currentTerm() {
+        return context.getTermInformation().getCurrentTerm();
+    }
 
-    @Override
-    public RaftState handleMessage(ActorRef sender, Object message) {
-        RaftState raftState = state();
-        if (message instanceof RaftRPC) {
-            raftState = applyTerm((RaftRPC) message);
+    /**
+     * Get the candidate for whom we voted in the current term
+     * @return
+     */
+    protected String votedFor() {
+        return context.getTermInformation().getVotedFor();
+    }
+
+    /**
+     * Get the actor associated with this behavior
+     * @return
+     */
+    protected ActorRef actor() {
+        return context.getActor();
+    }
+
+    /**
+     * Get the term from the last entry in the log
+     *
+     * @return
+     */
+    protected long lastTerm() {
+        return context.getReplicatedLog().lastTerm();
+    }
+
+    /**
+     * Get the index from the last entry in the log
+     *
+     * @return
+     */
+    protected long lastIndex() {
+        return context.getReplicatedLog().lastIndex();
+    }
+
+    /**
+     * Find the client request tracker for a specific logIndex
+     *
+     * @param logIndex
+     * @return
+     */
+    protected ClientRequestTracker findClientRequestTracker(long logIndex) {
+        return null;
+    }
+
+    /**
+     * Find the client request tracker for a specific logIndex
+     *
+     * @param logIndex
+     * @return
+     */
+    protected ClientRequestTracker removeClientRequestTracker(long logIndex) {
+        return null;
+    }
+
+
+    /**
+     * Find the log index from the previous to last entry in the log
+     *
+     * @return
+     */
+    protected long prevLogIndex(long index){
+        ReplicatedLogEntry prevEntry =
+            context.getReplicatedLog().get(index - 1);
+        if (prevEntry != null) {
+            return prevEntry.getIndex();
         }
-        if (message instanceof AppendEntries) {
-            AppendEntries appendEntries = (AppendEntries) message;
-            if (appendEntries.getLeaderCommit() > context.getLastApplied()
-                .get()) {
-                applyLogToStateMachine(appendEntries.getLeaderCommit());
+        return -1;
+    }
+
+    /**
+     * Find the log term from the previous to last entry in the log
+     * @return
+     */
+    protected long prevLogTerm(long index){
+        ReplicatedLogEntry prevEntry =
+            context.getReplicatedLog().get(index - 1);
+        if (prevEntry != null) {
+            return prevEntry.getTerm();
+        }
+        return -1;
+    }
+
+    /**
+     * Apply the provided index to the state machine
+     *
+     * @param index a log index that is known to be committed
+     */
+    protected void applyLogToStateMachine(final long index) {
+        long newLastApplied = context.getLastApplied();
+        // Now maybe we apply to the state machine
+        for (long i = context.getLastApplied() + 1;
+             i < index + 1; i++) {
+            ActorRef clientActor = null;
+            String identifier = null;
+            ClientRequestTracker tracker = removeClientRequestTracker(i);
+
+            if (tracker != null) {
+                clientActor = tracker.getClientActor();
+                identifier = tracker.getIdentifier();
+            }
+            ReplicatedLogEntry replicatedLogEntry =
+                context.getReplicatedLog().get(i);
+
+            if (replicatedLogEntry != null) {
+                // Send a local message to the local RaftActor (it's derived class to be
+                // specific to apply the log to it's index)
+                actor().tell(new ApplyState(clientActor, identifier,
+                    replicatedLogEntry), actor());
+                newLastApplied = i;
+            } else {
+                //if one index is not present in the log, no point in looping
+                // around as the rest wont be present either
+                LOG.warn(
+                        "{}: Missing index {} from log. Cannot apply state. Ignoring {} to {}",
+                        logName(), i, i, index);
+                break;
             }
-            raftState = handleAppendEntries(sender, appendEntries, raftState);
+        }
+        if(LOG.isDebugEnabled()) {
+            LOG.debug("{}: Setting last applied to {}", logName(), newLastApplied);
+        }
+        context.setLastApplied(newLastApplied);
+
+        // send a message to persist a ApplyLogEntries marker message into akka's persistent journal
+        // will be used during recovery
+        //in case if the above code throws an error and this message is not sent, it would be fine
+        // as the  append entries received later would initiate add this message to the journal
+        actor().tell(new ApplyJournalEntries(context.getLastApplied()), actor());
+    }
+
+    protected Object fromSerializableMessage(Object serializable){
+        return SerializationUtils.fromSerializable(serializable);
+    }
+
+    @Override
+    public RaftActorBehavior handleMessage(ActorRef sender, Object message) {
+        if (message instanceof AppendEntries) {
+            return appendEntries(sender, (AppendEntries) message);
         } else if (message instanceof AppendEntriesReply) {
-            raftState =
-                handleAppendEntriesReply(sender, (AppendEntriesReply) message,
-                    raftState);
+            return handleAppendEntriesReply(sender, (AppendEntriesReply) message);
         } else if (message instanceof RequestVote) {
-            raftState =
-                handleRequestVote(sender, (RequestVote) message, raftState);
+            return requestVote(sender, (RequestVote) message);
         } else if (message instanceof RequestVoteReply) {
-            raftState =
-                handleRequestVoteReply(sender, (RequestVoteReply) message,
-                    raftState);
+            return handleRequestVoteReply(sender, (RequestVoteReply) message);
         }
-        return raftState;
+        return this;
     }
 
-    private RaftState applyTerm(RaftRPC rpc) {
-        if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()
-            .get()) {
-            context.getTermInformation().update(rpc.getTerm(), null);
-            return RaftState.Follower;
+    @Override public String getLeaderId() {
+        return leaderId;
+    }
+
+    protected RaftActorBehavior switchBehavior(RaftActorBehavior behavior) {
+        LOG.info("{} :- Switching from behavior {} to {}", logName(), this.state(), behavior.state());
+        try {
+            close();
+        } catch (Exception e) {
+            LOG.error("{}: Failed to close behavior : {}", logName(), this.state(), e);
+        }
+
+        return behavior;
+    }
+
+    protected int getMajorityVoteCount(int numPeers) {
+        // Votes are required from a majority of the peers including self.
+        // The numMajority field therefore stores a calculated value
+        // of the number of votes required for this candidate to win an
+        // election based on it's known peers.
+        // If a peer was added during normal operation and raft replicas
+        // came to know about them then the new peer would also need to be
+        // taken into consideration when calculating this value.
+        // Here are some examples for what the numMajority would be for n
+        // peers
+        // 0 peers = 1 numMajority -: (0 + 1) / 2 + 1 = 1
+        // 2 peers = 2 numMajority -: (2 + 1) / 2 + 1 = 2
+        // 4 peers = 3 numMajority -: (4 + 1) / 2 + 1 = 3
+
+        int numMajority = 0;
+        if (numPeers > 0) {
+            int self = 1;
+            numMajority = (numPeers + self) / 2 + 1;
+        }
+        return numMajority;
+
+    }
+
+
+    /**
+     * Performs a snapshot with no capture on the replicated log.
+     * It clears the log from the supplied index or last-applied-1 which ever is minimum.
+     *
+     * @param snapshotCapturedIndex
+     */
+    protected void performSnapshotWithoutCapture(final long snapshotCapturedIndex) {
+        //  we would want to keep the lastApplied as its used while capturing snapshots
+        long lastApplied = context.getLastApplied();
+        long tempMin = Math.min(snapshotCapturedIndex, (lastApplied > -1 ? lastApplied - 1 : -1));
+
+        if(LOG.isTraceEnabled()) {
+            LOG.trace("{}: performSnapshotWithoutCapture: snapshotCapturedIndex: {}, lastApplied: {}, tempMin: {}",
+                    logName, snapshotCapturedIndex, lastApplied, tempMin);
+        }
+
+        if (tempMin > -1 && context.getReplicatedLog().isPresent(tempMin))  {
+            LOG.debug("{}: fakeSnapshot purging log to {} for term {}", logName(), tempMin,
+                    context.getTermInformation().getCurrentTerm());
+
+            //use the term of the temp-min, since we check for isPresent, entry will not be null
+            ReplicatedLogEntry entry = context.getReplicatedLog().get(tempMin);
+            context.getReplicatedLog().snapshotPreCommit(tempMin, entry.getTerm());
+            context.getReplicatedLog().snapshotCommit();
+            setReplicatedToAllIndex(tempMin);
+        } else if(tempMin > getReplicatedToAllIndex()) {
+            // It's possible a follower was lagging and an install snapshot advanced its match index past
+            // the current replicatedToAllIndex. Since the follower is now caught up we should advance the
+            // replicatedToAllIndex (to tempMin). The fact that tempMin wasn't found in the log is likely
+            // due to a previous snapshot triggered by the memory threshold exceeded, in that case we
+            // trim the log to the last applied index even if previous entries weren't replicated to all followers.
+            setReplicatedToAllIndex(tempMin);
         }
-        return state();
     }
 
-    private void applyLogToStateMachine(long index) {
-        context.getLastApplied().set(index);
+    protected String getId(){
+        return context.getId();
     }
+
 }