Snapshot changes
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / RaftActor.java
index 7814bad00b4d1423cea9847b06882d36138e0238..36c86f542d398f43b58b6236d8095b8f1ddcf4a9 100644 (file)
@@ -19,6 +19,9 @@ import akka.persistence.SaveSnapshotSuccess;
 import akka.persistence.SnapshotOffer;
 import akka.persistence.SnapshotSelectionCriteria;
 import akka.persistence.UntypedPersistentActor;
+import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
+import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
+import org.opendaylight.controller.cluster.raft.base.messages.Replicate;
 import org.opendaylight.controller.cluster.raft.behaviors.Candidate;
 import org.opendaylight.controller.cluster.raft.behaviors.Follower;
 import org.opendaylight.controller.cluster.raft.behaviors.Leader;
@@ -26,13 +29,10 @@ import org.opendaylight.controller.cluster.raft.behaviors.RaftActorBehavior;
 import org.opendaylight.controller.cluster.raft.client.messages.AddRaftPeer;
 import org.opendaylight.controller.cluster.raft.client.messages.FindLeader;
 import org.opendaylight.controller.cluster.raft.client.messages.FindLeaderReply;
-import org.opendaylight.controller.cluster.raft.internal.messages.ApplySnapshot;
 import org.opendaylight.controller.cluster.raft.client.messages.RemoveRaftPeer;
-import org.opendaylight.controller.cluster.raft.internal.messages.ApplyState;
-import org.opendaylight.controller.cluster.raft.internal.messages.Replicate;
+import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload;
 
 import java.io.Serializable;
-import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
@@ -80,6 +80,8 @@ public abstract class RaftActor extends UntypedPersistentActor {
     protected final LoggingAdapter LOG =
         Logging.getLogger(getContext().system(), this);
 
+    private static final int SNAPSHOT_ENTRY_COUNT = 100000;
+
     /**
      * The current state determines the current behavior of a RaftActor
      * A Raft Actor always starts off in the Follower State
@@ -99,7 +101,6 @@ public abstract class RaftActor extends UntypedPersistentActor {
 
 
     public RaftActor(String id, Map<String, String> peerAddresses) {
-        final String id1 = getSelf().path().toString();
         context = new RaftActorContextImpl(this.getSelf(),
             this.getContext(),
             id, new ElectionTermImpl(),
@@ -108,6 +109,7 @@ public abstract class RaftActor extends UntypedPersistentActor {
 
     @Override public void onReceiveRecover(Object message) {
         if (message instanceof SnapshotOffer) {
+            LOG.debug("SnapshotOffer called..");
             SnapshotOffer offer = (SnapshotOffer) message;
             Snapshot snapshot = (Snapshot) offer.snapshot();
 
@@ -116,6 +118,13 @@ public abstract class RaftActor extends UntypedPersistentActor {
             // when we need to install it on a peer
             replicatedLog = new ReplicatedLogImpl(snapshot);
 
+            context.setReplicatedLog(replicatedLog);
+
+            LOG.debug("Applied snapshot to replicatedLog. " +
+                "snapshotIndex={}, snapshotTerm={}, journal-size={}",
+                replicatedLog.snapshotIndex, replicatedLog.snapshotTerm,
+                replicatedLog.size());
+
             // Apply the snapshot to the actors state
             applySnapshot(snapshot.getState());
 
@@ -127,7 +136,11 @@ public abstract class RaftActor extends UntypedPersistentActor {
             context.getTermInformation().update(((UpdateElectionTerm) message).getCurrentTerm(), ((UpdateElectionTerm) message).getVotedFor());
         } else if (message instanceof RecoveryCompleted) {
             LOG.debug(
-                "Last index in log : " + replicatedLog.lastIndex());
+                "RecoveryCompleted - Switching actor to Follower - " +
+                    "Last index in log:{}, snapshotIndex={}, snapshotTerm={}, " +
+                    "journal-size={}",
+                replicatedLog.lastIndex(), replicatedLog.snapshotIndex,
+                replicatedLog.snapshotTerm, replicatedLog.size());
             currentBehavior = switchBehavior(RaftState.Follower);
         }
     }
@@ -142,14 +155,17 @@ public abstract class RaftActor extends UntypedPersistentActor {
 
             applyState(applyState.getClientActor(), applyState.getIdentifier(),
                 applyState.getReplicatedLogEntry().getData());
+
         } else if(message instanceof ApplySnapshot ) {
             applySnapshot(((ApplySnapshot) message).getSnapshot());
+
         } else if (message instanceof FindLeader) {
             getSender().tell(
                 new FindLeaderReply(
                     context.getPeerAddress(currentBehavior.getLeaderId())),
                 getSelf()
             );
+
         } else if (message instanceof SaveSnapshotSuccess) {
             SaveSnapshotSuccess success = (SaveSnapshotSuccess) message;
 
@@ -157,26 +173,46 @@ public abstract class RaftActor extends UntypedPersistentActor {
             trimPersistentData(success.metadata().sequenceNr());
 
         } else if (message instanceof SaveSnapshotFailure) {
+
             // TODO: Handle failure in saving the snapshot
+
         } else if (message instanceof FindLeader){
+
             getSender().tell(new FindLeaderReply(
                 context.getPeerAddress(currentBehavior.getLeaderId())),
                 getSelf());
 
         } else if (message instanceof AddRaftPeer){
+
+            // FIXME : Do not add raft peers like this.
+            // When adding a new Peer we have to ensure that the a majority of
+            // the peers know about the new Peer. Doing it this way may cause
+            // a situation where multiple Leaders may emerge
             AddRaftPeer arp = (AddRaftPeer)message;
            context.addToPeers(arp.getName(), arp.getAddress());
 
         } else if (message instanceof RemoveRaftPeer){
+
             RemoveRaftPeer rrp = (RemoveRaftPeer)message;
             context.removePeer(rrp.getName());
+
         } else {
+
             RaftState state =
                 currentBehavior.handleMessage(getSender(), message);
             currentBehavior = switchBehavior(state);
         }
     }
 
+    public java.util.Set<String> getPeers() {
+        return context.getPeerAddresses().keySet();
+    }
+
+    protected String getReplicatedLogState() {
+        return "snapshotIndex=" + context.getReplicatedLog().getSnapshotIndex()
+            + ", snapshotTerm=" + context.getReplicatedLog().getSnapshotTerm()
+            + ", im-mem journal size=" + context.getReplicatedLog().size();
+    }
 
 
     /**
@@ -188,7 +224,7 @@ public abstract class RaftActor extends UntypedPersistentActor {
      * @param data
      */
     protected void persistData(ActorRef clientActor, String identifier,
-        Object data) {
+        Payload data) {
 
         ReplicatedLogEntry replicatedLogEntry = new ReplicatedLogImplEntry(
             context.getReplicatedLog().lastIndex() + 1,
@@ -236,6 +272,24 @@ public abstract class RaftActor extends UntypedPersistentActor {
         return currentBehavior.state();
     }
 
+    /**
+     * setPeerAddress sets the address of a known peer at a later time.
+     * <p>
+     * This is to account for situations where a we know that a peer
+     * exists but we do not know an address up-front. This may also be used in
+     * situations where a known peer starts off in a different location and we
+     * need to change it's address
+     * <p>
+     * Note that if the peerId does not match the list of peers passed to
+     * this actor during construction an IllegalStateException will be thrown.
+     *
+     * @param peerId
+     * @param peerAddress
+     */
+    protected void setPeerAddress(String peerId, String peerAddress){
+        context.setPeerAddress(peerId, peerAddress);
+    }
+
 
 
     /**
@@ -311,85 +365,33 @@ public abstract class RaftActor extends UntypedPersistentActor {
     }
 
     private void trimPersistentData(long sequenceNumber) {
-        // Trim snapshots
+        // Trim akka snapshots
         // FIXME : Not sure how exactly the SnapshotSelectionCriteria is applied
         // For now guessing that it is ANDed.
         deleteSnapshots(new SnapshotSelectionCriteria(
-            sequenceNumber - 100000, 43200000));
+            sequenceNumber - SNAPSHOT_ENTRY_COUNT, 43200000));
 
-        // Trim journal
+        // Trim akka journal
         deleteMessages(sequenceNumber);
     }
 
 
-    private class ReplicatedLogImpl implements ReplicatedLog {
-        private final List<ReplicatedLogEntry> journal;
-        private final Object snapshot;
-        private long snapshotIndex = -1;
-        private long snapshotTerm = -1;
+    private class ReplicatedLogImpl extends AbstractReplicatedLogImpl {
 
         public ReplicatedLogImpl(Snapshot snapshot) {
-            this.snapshot = snapshot.getState();
-            this.snapshotIndex = snapshot.getLastAppliedIndex();
-            this.snapshotTerm = snapshot.getLastAppliedTerm();
-
-            this.journal = new ArrayList<>(snapshot.getUnAppliedEntries());
+            super(snapshot.getState(),
+                snapshot.getLastAppliedIndex(), snapshot.getLastAppliedTerm(),
+                snapshot.getUnAppliedEntries());
         }
 
         public ReplicatedLogImpl() {
-            this.snapshot = null;
-            this.journal = new ArrayList<>();
-        }
-
-        @Override public ReplicatedLogEntry get(long index) {
-            int adjustedIndex = adjustedIndex(index);
-
-            if (adjustedIndex < 0 || adjustedIndex >= journal.size()) {
-                return null;
-            }
-
-            return journal.get(adjustedIndex);
-        }
-
-        @Override public ReplicatedLogEntry last() {
-            if (journal.size() == 0) {
-                return null;
-            }
-            return get(journal.size() - 1);
-        }
-
-        @Override public long lastIndex() {
-            if (journal.size() == 0) {
-                return -1;
-            }
-
-            return last().getIndex();
-        }
-
-        @Override public long lastTerm() {
-            if (journal.size() == 0) {
-                return -1;
-            }
-
-            return last().getTerm();
+            super();
         }
 
+        @Override public void removeFromAndPersist(long logEntryIndex) {
+            int adjustedIndex = adjustedIndex(logEntryIndex);
 
-        @Override public void removeFrom(long index) {
-            int adjustedIndex = adjustedIndex(index);
-
-            if (adjustedIndex < 0 || adjustedIndex >= journal.size()) {
-                return;
-            }
-
-            journal.subList(adjustedIndex , journal.size()).clear();
-        }
-
-
-        @Override public void removeFromAndPersist(long index) {
-            int adjustedIndex = adjustedIndex(index);
-
-            if (adjustedIndex < 0 || adjustedIndex >= journal.size()) {
+            if (adjustedIndex < 0) {
                 return;
             }
 
@@ -403,29 +405,6 @@ public abstract class RaftActor extends UntypedPersistentActor {
                     //FIXME : Doing nothing for now
                 }
             });
-
-
-        }
-
-        @Override public void append(
-            final ReplicatedLogEntry replicatedLogEntry) {
-            journal.add(replicatedLogEntry);
-        }
-
-        @Override public List<ReplicatedLogEntry> getFrom(long index) {
-            int adjustedIndex = adjustedIndex(index);
-
-            List<ReplicatedLogEntry> entries = new ArrayList<>(100);
-            if (adjustedIndex < 0 || adjustedIndex >= journal.size()) {
-                return entries;
-            }
-
-
-            for (int i = adjustedIndex;
-                 i < journal.size(); i++) {
-                entries.add(journal.get(i));
-            }
-            return entries;
         }
 
         @Override public void appendAndPersist(
@@ -437,7 +416,7 @@ public abstract class RaftActor extends UntypedPersistentActor {
             final String identifier,
             final ReplicatedLogEntry replicatedLogEntry) {
             context.getLogger().debug(
-                "Append log entry and persist " + replicatedLogEntry);
+                "Append log entry and persist {} ", replicatedLogEntry);
             // FIXME : By adding the replicated log entry to the in-memory journal we are not truly ensuring durability of the logs
             journal.add(replicatedLogEntry);
 
@@ -450,20 +429,42 @@ public abstract class RaftActor extends UntypedPersistentActor {
                 new Procedure<ReplicatedLogEntry>() {
                     public void apply(ReplicatedLogEntry evt) throws Exception {
                         // FIXME : Tentatively create a snapshot every hundred thousand entries. To be tuned.
-                        if (size() > 100000) {
-                            ReplicatedLogEntry lastAppliedEntry =
-                                get(context.getLastApplied());
+                        if (journal.size() > SNAPSHOT_ENTRY_COUNT) {
+                            LOG.info("Initiating Snapshot Capture..");
                             long lastAppliedIndex = -1;
                             long lastAppliedTerm = -1;
+
+                            ReplicatedLogEntry lastAppliedEntry = get(context.getLastApplied());
                             if (lastAppliedEntry != null) {
                                 lastAppliedIndex = lastAppliedEntry.getIndex();
                                 lastAppliedTerm = lastAppliedEntry.getTerm();
                             }
 
-                            saveSnapshot(Snapshot.create(createSnapshot(),
+                            LOG.debug("Snapshot Capture logSize: {}", journal.size());
+                            LOG.debug("Snapshot Capture lastApplied:{} ", context.getLastApplied());
+                            LOG.debug("Snapshot Capture lastAppliedIndex:{}", lastAppliedIndex);
+                            LOG.debug("Snapshot Capture lastAppliedTerm:{}", lastAppliedTerm);
+
+                            // create a snapshot object from the state provided and save it
+                            // when snapshot is saved async, SaveSnapshotSuccess is raised.
+                            Snapshot sn = Snapshot.create(createSnapshot(),
                                 getFrom(context.getLastApplied() + 1),
                                 lastIndex(), lastTerm(), lastAppliedIndex,
-                                lastAppliedTerm));
+                                lastAppliedTerm);
+                            saveSnapshot(sn);
+
+                            LOG.info("Persisting of snapshot done:{}", sn.getLogMessage());
+
+                            //be greedy and remove entries from in-mem journal which are in the snapshot
+                            // and update snapshotIndex and snapshotTerm without waiting for the success,
+                            // TODO: damage-recovery to be done on failure
+                            journal.subList(0, (int) (lastAppliedIndex - snapshotIndex)).clear();
+                            snapshotIndex = lastAppliedIndex;
+                            snapshotTerm = lastAppliedTerm;
+
+                            LOG.info("Removed in-memory snapshotted entries, " +
+                                "adjusted snaphsotIndex:{}" +
+                                "and term:{}", snapshotIndex, lastAppliedTerm);
                         }
                         // Send message for replication
                         if (clientActor != null) {
@@ -477,76 +478,6 @@ public abstract class RaftActor extends UntypedPersistentActor {
             );
         }
 
-        @Override public long size() {
-            return journal.size() + snapshotIndex + 1;
-        }
-
-        @Override public boolean isPresent(long index) {
-            int adjustedIndex = adjustedIndex(index);
-
-            if (adjustedIndex < 0 || adjustedIndex >= journal.size()) {
-                return false;
-            }
-            return true;
-        }
-
-        @Override public boolean isInSnapshot(long index) {
-            return index <= snapshotIndex;
-        }
-
-        @Override public Object getSnapshot() {
-            return snapshot;
-        }
-
-        @Override public long getSnapshotIndex() {
-            return snapshotIndex;
-        }
-
-        @Override public long getSnapshotTerm() {
-            return snapshotTerm;
-        }
-
-        private int adjustedIndex(long index) {
-            if(snapshotIndex < 0){
-                return (int) index;
-            }
-            return (int) (index - snapshotIndex);
-        }
-    }
-
-
-    private static class ReplicatedLogImplEntry implements ReplicatedLogEntry,
-        Serializable {
-
-        private final long index;
-        private final long term;
-        private final Object payload;
-
-        public ReplicatedLogImplEntry(long index, long term, Object payload) {
-
-            this.index = index;
-            this.term = term;
-            this.payload = payload;
-        }
-
-        @Override public Object getData() {
-            return payload;
-        }
-
-        @Override public long getTerm() {
-            return term;
-        }
-
-        @Override public long getIndex() {
-            return index;
-        }
-
-        @Override public String toString() {
-            return "Entry{" +
-                "index=" + index +
-                ", term=" + term +
-                '}';
-        }
     }
 
     private static class DeleteEntries implements Serializable {
@@ -609,6 +540,17 @@ public abstract class RaftActor extends UntypedPersistentActor {
         public long getLastAppliedTerm() {
             return lastAppliedTerm;
         }
+
+        public String getLogMessage() {
+            StringBuilder sb = new StringBuilder();
+            return sb.append("Snapshot={")
+                .append("lastTerm:" + this.getLastTerm()  + ", ")
+                .append("LastAppliedIndex:" + this.getLastAppliedIndex()  + ", ")
+                .append("LastAppliedTerm:" + this.getLastAppliedTerm()  + ", ")
+                .append("UnAppliedEntries size:" + this.getUnAppliedEntries().size()  + "}")
+                .toString();
+
+        }
     }
 
     private class ElectionTermImpl implements ElectionTerm {