Bug 2787: Batch AppendEntries to speed up follower sync
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / AbstractReplicatedLogImpl.java
index a2c9d660ad8d2472f7b9a83ab3f175b1883ad1f6..c245206f641f3a4ff31da8608076f1c3d68cb4f6 100644 (file)
@@ -7,7 +7,10 @@
  */
 package org.opendaylight.controller.cluster.raft;
 
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 /**
@@ -17,29 +20,34 @@ import java.util.List;
 public abstract class AbstractReplicatedLogImpl implements ReplicatedLog {
 
     // We define this as ArrayList so we can use ensureCapacity.
-    protected ArrayList<ReplicatedLogEntry> journal;
+    private ArrayList<ReplicatedLogEntry> journal;
 
-    protected long snapshotIndex = -1;
-    protected long snapshotTerm = -1;
+    private long snapshotIndex = -1;
+    private long snapshotTerm = -1;
 
     // to be used for rollback during save snapshot failure
-    protected ArrayList<ReplicatedLogEntry> snapshottedJournal;
-    protected long previousSnapshotIndex = -1;
-    protected long previousSnapshotTerm = -1;
+    private ArrayList<ReplicatedLogEntry> snapshottedJournal;
+    private long previousSnapshotIndex = -1;
+    private long previousSnapshotTerm = -1;
+    private int dataSize = 0;
 
     public AbstractReplicatedLogImpl(long snapshotIndex,
         long snapshotTerm, List<ReplicatedLogEntry> unAppliedEntries) {
         this.snapshotIndex = snapshotIndex;
         this.snapshotTerm = snapshotTerm;
         this.journal = new ArrayList<>(unAppliedEntries);
+
+        for(ReplicatedLogEntry entry: journal) {
+            dataSize += entry.size();
+        }
     }
 
     public AbstractReplicatedLogImpl() {
-        this.journal = new ArrayList<>();
+        this(-1L, -1L, Collections.<ReplicatedLogEntry>emptyList());
     }
 
     protected int adjustedIndex(long logEntryIndex) {
-        if(snapshotIndex < 0){
+        if (snapshotIndex < 0) {
             return (int) logEntryIndex;
         }
         return (int) (logEntryIndex - (snapshotIndex + 1));
@@ -87,18 +95,26 @@ public abstract class AbstractReplicatedLogImpl implements ReplicatedLog {
     }
 
     @Override
-    public void removeFrom(long logEntryIndex) {
+    public long removeFrom(long logEntryIndex) {
         int adjustedIndex = adjustedIndex(logEntryIndex);
         if (adjustedIndex < 0 || adjustedIndex >= journal.size()) {
             // physical index should be less than list size and >= 0
-            return;
+            return -1;
         }
+
+        for(int i = adjustedIndex; i < journal.size(); i++) {
+            dataSize -= journal.get(i).size();
+        }
+
         journal.subList(adjustedIndex , journal.size()).clear();
+
+        return adjustedIndex;
     }
 
     @Override
     public void append(ReplicatedLogEntry replicatedLogEntry) {
         journal.add(replicatedLogEntry);
+        dataSize += replicatedLogEntry.size();
     }
 
     @Override
@@ -108,31 +124,58 @@ public abstract class AbstractReplicatedLogImpl implements ReplicatedLog {
 
     @Override
     public List<ReplicatedLogEntry> getFrom(long logEntryIndex) {
-        return getFrom(logEntryIndex, journal.size());
+        return getFrom(logEntryIndex, journal.size(), NO_MAX_SIZE);
     }
 
     @Override
-    public List<ReplicatedLogEntry> getFrom(long logEntryIndex, int max) {
+    public List<ReplicatedLogEntry> getFrom(long logEntryIndex, int maxEntries, long maxDataSize) {
         int adjustedIndex = adjustedIndex(logEntryIndex);
         int size = journal.size();
-        List<ReplicatedLogEntry> entries = new ArrayList<>(100);
         if (adjustedIndex >= 0 && adjustedIndex < size) {
             // physical index should be less than list size and >= 0
-            int maxIndex = adjustedIndex + max;
+            int maxIndex = adjustedIndex + maxEntries;
             if(maxIndex > size){
                 maxIndex = size;
             }
-            entries.addAll(journal.subList(adjustedIndex, maxIndex));
+
+            if(maxDataSize == NO_MAX_SIZE) {
+                return new ArrayList<>(journal.subList(adjustedIndex, maxIndex));
+            } else {
+                List<ReplicatedLogEntry> retList = new ArrayList<>(maxIndex - adjustedIndex);
+                long totalSize = 0;
+                for(int i = adjustedIndex; i < maxIndex; i++) {
+                    ReplicatedLogEntry entry = journal.get(i);
+                    totalSize += entry.size();
+                    if(totalSize <= maxDataSize) {
+                        retList.add(entry);
+                    } else {
+                        if(retList.isEmpty()) {
+                            // Edge case - the first entry's size exceeds the threshold. We need to return
+                            // at least the first entry so add it here.
+                            retList.add(entry);
+                        }
+
+                        break;
+                    }
+                }
+
+                return retList;
+            }
+        } else {
+            return Collections.emptyList();
         }
-        return entries;
     }
 
-
     @Override
     public long size() {
        return journal.size();
     }
 
+    @Override
+    public int dataSize() {
+        return dataSize;
+    }
+
     @Override
     public boolean isPresent(long logEntryIndex) {
         if (logEntryIndex > lastIndex()) {
@@ -181,10 +224,15 @@ public abstract class AbstractReplicatedLogImpl implements ReplicatedLog {
 
     @Override
     public void snapshotPreCommit(long snapshotCapturedIndex, long snapshotCapturedTerm) {
+        Preconditions.checkArgument(snapshotCapturedIndex >= snapshotIndex,
+                "snapshotCapturedIndex must be greater than or equal to snapshotIndex");
+
         snapshottedJournal = new ArrayList<>(journal.size());
 
-        snapshottedJournal.addAll(journal.subList(0, (int)(snapshotCapturedIndex - snapshotIndex)));
-        clear(0, (int) (snapshotCapturedIndex - snapshotIndex));
+        List<ReplicatedLogEntry> snapshotJournalEntries = journal.subList(0, (int) (snapshotCapturedIndex - snapshotIndex));
+
+        snapshottedJournal.addAll(snapshotJournalEntries);
+        snapshotJournalEntries.clear();
 
         previousSnapshotIndex = snapshotIndex;
         setSnapshotIndex(snapshotCapturedIndex);
@@ -198,6 +246,12 @@ public abstract class AbstractReplicatedLogImpl implements ReplicatedLog {
         snapshottedJournal = null;
         previousSnapshotIndex = -1;
         previousSnapshotTerm = -1;
+        dataSize = 0;
+        // need to recalc the datasize based on the entries left after precommit.
+        for(ReplicatedLogEntry logEntry : journal) {
+            dataSize += logEntry.size();
+        }
+
     }
 
     @Override
@@ -212,4 +266,9 @@ public abstract class AbstractReplicatedLogImpl implements ReplicatedLog {
         snapshotTerm = previousSnapshotTerm;
         previousSnapshotTerm = -1;
     }
+
+    @VisibleForTesting
+    ReplicatedLogEntry getAtPhysicalIndex(int index) {
+        return journal.get(index);
+    }
 }