Bug 6540: EOS - Prune pending owner change commits on leader change
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / datastore / entityownership / EntityOwnershipShardCommitCoordinator.java
index 6c15ef6ed05cb2fb41b7b837e3655b0fef167beb..c792cf1dda996370b52002fa952b5689088d73df 100644 (file)
@@ -7,18 +7,28 @@
  */
 package org.opendaylight.controller.cluster.datastore.entityownership;
 
+import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_OWNER_QNAME;
+
 import akka.actor.ActorRef;
 import akka.actor.Cancellable;
 import akka.actor.Status.Failure;
+import com.google.common.base.Preconditions;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.Queue;
+import javax.annotation.Nullable;
+import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier;
+import org.opendaylight.controller.cluster.access.concepts.FrontendIdentifier;
+import org.opendaylight.controller.cluster.access.concepts.FrontendType;
+import org.opendaylight.controller.cluster.access.concepts.LocalHistoryIdentifier;
+import org.opendaylight.controller.cluster.access.concepts.MemberName;
+import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
 import org.opendaylight.controller.cluster.datastore.DataStoreVersions;
 import org.opendaylight.controller.cluster.datastore.exceptions.NoShardLeaderException;
-import org.opendaylight.controller.cluster.datastore.identifiers.TransactionIdentifier;
 import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications;
 import org.opendaylight.controller.cluster.datastore.messages.CommitTransactionReply;
 import org.opendaylight.controller.cluster.datastore.modification.Modification;
+import org.opendaylight.controller.cluster.datastore.modification.WriteModification;
 import org.slf4j.Logger;
 import scala.concurrent.duration.FiniteDuration;
 
@@ -28,29 +38,37 @@ import scala.concurrent.duration.FiniteDuration;
  * @author Thomas Pantelis
  */
 class EntityOwnershipShardCommitCoordinator {
-    private static final Object COMMIT_RETRY_MESSAGE = "entityCommitRetry";
+    private static final Object COMMIT_RETRY_MESSAGE = new Object() {
+        @Override
+        public String toString() {
+            return "entityCommitRetry";
+        }
+    };
+    private static final FrontendType FRONTEND_TYPE = FrontendType.forName("entity-ownership-internal");
 
-    private final Logger log;
-    private int transactionIDCounter = 0;
-    private final String localMemberName;
     private final Queue<Modification> pendingModifications = new LinkedList<>();
+    private final LocalHistoryIdentifier historyId;
+    private final Logger log;
+
     private BatchedModifications inflightCommit;
     private Cancellable retryCommitSchedule;
+    private long transactionIDCounter = 0;
 
-    EntityOwnershipShardCommitCoordinator(String localMemberName, Logger log) {
-        this.localMemberName = localMemberName;
-        this.log = log;
+    EntityOwnershipShardCommitCoordinator(MemberName localMemberName, Logger log) {
+        this.log = Preconditions.checkNotNull(log);
+        historyId = new LocalHistoryIdentifier(
+                ClientIdentifier.create(FrontendIdentifier.create(localMemberName, FRONTEND_TYPE), 0), 0);
     }
 
     boolean handleMessage(Object message, EntityOwnershipShard shard) {
         boolean handled = true;
-        if(CommitTransactionReply.SERIALIZABLE_CLASS.isInstance(message)) {
+        if(CommitTransactionReply.isSerializedType(message)) {
             // Successful reply from a local commit.
             inflightCommitSucceeded(shard);
         } else if(message instanceof akka.actor.Status.Failure) {
             // Failure reply from a local commit.
             inflightCommitFailure(((Failure)message).cause(), shard);
-        } else if(message.equals(COMMIT_RETRY_MESSAGE)) {
+        } else if(COMMIT_RETRY_MESSAGE.equals(message)) {
             retryInflightCommit(shard);
         } else {
             handled = false;
@@ -140,23 +158,35 @@ class EntityOwnershipShardCommitCoordinator {
     }
 
     void commitModification(Modification modification, EntityOwnershipShard shard) {
+        BatchedModifications modifications = newBatchedModifications();
+        modifications.addModification(modification);
+        commitModifications(modifications, shard);
+    }
+
+    void commitModifications(BatchedModifications modifications, EntityOwnershipShard shard) {
+        if(modifications.getModifications().isEmpty()) {
+            return;
+        }
+
         boolean hasLeader = shard.hasLeader();
         if(inflightCommit != null || !hasLeader) {
             if(log.isDebugEnabled()) {
-                log.debug("{} - adding modification to pending",
-                        (inflightCommit != null ? "A commit is inflight" : "No shard leader"));
+                log.debug("{} - adding modifications to pending",
+                        inflightCommit != null ? "A commit is inflight" : "No shard leader");
             }
 
-            pendingModifications.add(modification);
+            pendingModifications.addAll(modifications.getModifications());
         } else {
-            inflightCommit = newBatchedModifications();
-            inflightCommit.addModification(modification);
-
+            inflightCommit = modifications;
             shard.tryCommitModifications(inflightCommit);
         }
     }
 
     void onStateChanged(EntityOwnershipShard shard, boolean isLeader) {
+        shard.possiblyRemoveAllInitialCandidates(shard.getLeader());
+
+        possiblyPrunePendingCommits(shard, isLeader);
+
         if(!isLeader && inflightCommit != null) {
             // We're no longer the leader but we have an inflight local commit. This likely means we didn't get
             // consensus for the commit and switched to follower due to another node with a higher term. We
@@ -173,16 +203,75 @@ class EntityOwnershipShardCommitCoordinator {
         }
     }
 
+    private void possiblyPrunePendingCommits(EntityOwnershipShard shard, boolean isLeader) {
+        // If we were the leader and transitioned to follower, we'll try to forward pending commits to the new leader.
+        // However certain commits, e.g. entity owner changes, should only be committed by a valid leader as the
+        // criteria used to determine the commit may be stale. Since we're no longer a valid leader, we should not
+        // forward such commits thus we prune the pending modifications. We still should forward local candidate change
+        // commits.
+        if (shard.hasLeader() && !isLeader) {
+            // We may have already submitted a transaction for replication and commit. We don't need the base Shard to
+            // forward it since we also have it stored in the inflightCommit and handle retries. So we just clear
+            // pending transactions and drop them.
+            shard.convertPendingTransactionsToMessages();
+
+            // Prune the inflightCommit.
+            if(inflightCommit != null) {
+                inflightCommit = pruneModifications(inflightCommit);
+            }
+
+            // Prune the subsequent pending modifications.
+            Iterator<Modification> iter = pendingModifications.iterator();
+            while(iter.hasNext()) {
+                Modification mod = iter.next();
+                if(!canForwardModificationToNewLeader(mod)) {
+                    iter.remove();
+                }
+            }
+        }
+    }
+
+    @Nullable
+    private BatchedModifications pruneModifications(BatchedModifications toPrune) {
+        BatchedModifications prunedModifications = new BatchedModifications(toPrune.getTransactionID(), toPrune.getVersion());
+        prunedModifications.setDoCommitOnReady(toPrune.isDoCommitOnReady());
+        prunedModifications.setReady(toPrune.isReady());
+        prunedModifications.setTotalMessagesSent(toPrune.getTotalMessagesSent());
+        for(Modification mod: toPrune.getModifications()) {
+            if(canForwardModificationToNewLeader(mod)) {
+                prunedModifications.addModification(mod);
+            }
+        }
+
+        return !prunedModifications.getModifications().isEmpty() ? prunedModifications : null;
+    }
+
+    private boolean canForwardModificationToNewLeader(Modification mod) {
+        // If this is a WRITE of entity owner we don't want to forward it to a new leader since the criteria used
+        // to determine the new owner might be stale.
+        if (mod instanceof WriteModification) {
+            WriteModification writeMod = (WriteModification)mod;
+            boolean canForward = !writeMod.getPath().getLastPathArgument().getNodeType().equals(ENTITY_OWNER_QNAME);
+
+            if (!canForward) {
+                log.debug("Not forwarding WRITE modification for {} to new leader", writeMod.getPath());
+            }
+
+            return canForward;
+        }
+
+        return true;
+    }
+
     private void newInflightCommitWithDifferentTransactionID() {
         BatchedModifications newBatchedModifications = newBatchedModifications();
         newBatchedModifications.getModifications().addAll(inflightCommit.getModifications());
         inflightCommit = newBatchedModifications;
     }
 
-    private BatchedModifications newBatchedModifications() {
+    BatchedModifications newBatchedModifications() {
         BatchedModifications modifications = new BatchedModifications(
-                TransactionIdentifier.create(localMemberName, ++transactionIDCounter).toString(),
-                DataStoreVersions.CURRENT_VERSION, "");
+            new TransactionIdentifier(historyId, ++transactionIDCounter), DataStoreVersions.CURRENT_VERSION);
         modifications.setDoCommitOnReady(true);
         modifications.setReady(true);
         modifications.setTotalMessagesSent(1);