Fix shard commit deadlock
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / datastore / TransactionProxy.java
index f645608dd9b96c327153c804f544a0b2eacb16b3..5a1cb6740d0229b4e9918f8280f73299eab82727 100644 (file)
@@ -12,19 +12,21 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
 import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.CheckedFuture;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.MoreExecutors;
 import com.google.common.util.concurrent.SettableFuture;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
-import org.opendaylight.controller.cluster.datastore.identifiers.TransactionIdentifier;
+import java.util.TreeMap;
+import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
 import org.opendaylight.controller.cluster.datastore.messages.AbstractRead;
 import org.opendaylight.controller.cluster.datastore.messages.DataExists;
 import org.opendaylight.controller.cluster.datastore.messages.ReadData;
@@ -34,10 +36,10 @@ import org.opendaylight.controller.cluster.datastore.modification.MergeModificat
 import org.opendaylight.controller.cluster.datastore.modification.WriteModification;
 import org.opendaylight.controller.cluster.datastore.utils.ActorContext;
 import org.opendaylight.controller.cluster.datastore.utils.NormalizedNodeAggregator;
-import org.opendaylight.controller.md.sal.common.api.data.ReadFailedException;
-import org.opendaylight.controller.sal.core.spi.data.AbstractDOMStoreTransaction;
-import org.opendaylight.controller.sal.core.spi.data.DOMStoreReadWriteTransaction;
-import org.opendaylight.yangtools.util.concurrent.MappingCheckedFuture;
+import org.opendaylight.mdsal.common.api.MappingCheckedFuture;
+import org.opendaylight.mdsal.common.api.ReadFailedException;
+import org.opendaylight.mdsal.dom.spi.store.AbstractDOMStoreTransaction;
+import org.opendaylight.mdsal.dom.spi.store.DOMStoreReadWriteTransaction;
 import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier;
 import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode;
 import org.opendaylight.yangtools.yang.data.api.schema.tree.DataValidationFailedException;
@@ -49,15 +51,39 @@ import scala.concurrent.Promise;
 /**
  * A transaction potentially spanning multiple backend shards.
  */
-public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIdentifier> implements DOMStoreReadWriteTransaction {
-    private static enum TransactionState {
+public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIdentifier>
+        implements DOMStoreReadWriteTransaction {
+    private enum TransactionState {
         OPEN,
         READY,
         CLOSED,
     }
+
     private static final Logger LOG = LoggerFactory.getLogger(TransactionProxy.class);
 
-    private final Map<String, TransactionContextWrapper> txContextWrappers = new HashMap<>();
+    // Global lock used for transactions spanning multiple shards - synchronizes sending of the ready messages
+    // for atomicity to avoid potential deadlock with concurrent transactions spanning the same shards as outlined
+    // in the following scenario:
+    //
+    //  - Tx1 sends ready message to shard A
+    //  - Tx2 sends ready message to shard A
+    //  - Tx2 sends ready message to shard B
+    //  - Tx1 sends ready message to shard B
+    //
+    // This scenario results in deadlock: after Tx1 canCommits to shard A, it can't proceed with shard B until Tx2
+    // completes as Tx2 was readied first on shard B. However Tx2 cannot make progress because it's waiting to canCommit
+    // on shard A which is blocked by Tx1.
+    //
+    // The global lock avoids this as it forces the ready messages to be sent in a predictable order:
+    //
+    //  - Tx1 sends ready message to shard A
+    //  - Tx1 sends ready message to shard B
+    //  - Tx2 sends ready message to shard A
+    //  - Tx2 sends ready message to shard B
+    //
+    private static final Object GLOBAL_TX_READY_LOCK = new Object();
+
+    private final Map<String, TransactionContextWrapper> txContextWrappers = new TreeMap<>();
     private final AbstractTransactionContextFactory<?> txContextFactory;
     private final TransactionType type;
     private TransactionState state = TransactionState.OPEN;
@@ -77,19 +103,19 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
         return executeRead(shardNameFromIdentifier(path), new DataExists(path, DataStoreVersions.CURRENT_VERSION));
     }
 
-    private <T> CheckedFuture<T, ReadFailedException> executeRead(String shardName, final AbstractRead<T> readCmd) {
-        Preconditions.checkState(type != TransactionType.WRITE_ONLY, "Reads from write-only transactions are not allowed");
+    private <T> CheckedFuture<T, ReadFailedException> executeRead(final String shardName,
+            final AbstractRead<T> readCmd) {
+        Preconditions.checkState(type != TransactionType.WRITE_ONLY,
+                "Reads from write-only transactions are not allowed");
 
-        if(LOG.isDebugEnabled()) {
-            LOG.debug("Tx {} {} {}", getIdentifier(), readCmd.getClass().getSimpleName(), readCmd.getPath());
-        }
+        LOG.trace("Tx {} {} {}", getIdentifier(), readCmd.getClass().getSimpleName(), readCmd.getPath());
 
         final SettableFuture<T> proxyFuture = SettableFuture.create();
         TransactionContextWrapper contextWrapper = getContextWrapper(shardName);
         contextWrapper.maybeExecuteTransactionOperation(new TransactionOperation() {
             @Override
-            public void invoke(TransactionContext transactionContext) {
-                transactionContext.executeRead(readCmd, proxyFuture);
+            public void invoke(final TransactionContext transactionContext, final Boolean havePermit) {
+                transactionContext.executeRead(readCmd, proxyFuture, havePermit);
             }
         });
 
@@ -98,15 +124,12 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
 
     @Override
     public CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> read(final YangInstanceIdentifier path) {
-        Preconditions.checkState(type != TransactionType.WRITE_ONLY, "Reads from write-only transactions are not allowed");
+        Preconditions.checkState(type != TransactionType.WRITE_ONLY,
+                "Reads from write-only transactions are not allowed");
+        Preconditions.checkNotNull(path, "path should not be null");
 
-        LOG.debug("Tx {} read {}", getIdentifier(), path);
-
-        if (YangInstanceIdentifier.EMPTY.equals(path)) {
-            return readAllData();
-        } else {
-            return singleShardRead(shardNameFromIdentifier(path), path);
-        }
+        LOG.trace("Tx {} read {}", getIdentifier(), path);
+        return path.isEmpty() ? readAllData() :  singleShardRead(shardNameFromIdentifier(path), path);
     }
 
     private CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> singleShardRead(
@@ -116,7 +139,8 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
 
     private CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> readAllData() {
         final Set<String> allShardNames = txContextFactory.getActorContext().getConfiguration().getAllShardNames();
-        final Collection<CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException>> futures = new ArrayList<>(allShardNames.size());
+        final Collection<CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException>> futures =
+                new ArrayList<>(allShardNames.size());
 
         for (String shardName : allShardNames) {
             futures.add(singleShardRead(shardName, YangInstanceIdentifier.EMPTY));
@@ -125,16 +149,16 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
         final ListenableFuture<List<Optional<NormalizedNode<?, ?>>>> listFuture = Futures.allAsList(futures);
         final ListenableFuture<Optional<NormalizedNode<?, ?>>> aggregateFuture;
 
-        aggregateFuture = Futures.transform(listFuture, new Function<List<Optional<NormalizedNode<?, ?>>>, Optional<NormalizedNode<?, ?>>>() {
-            @Override
-            public Optional<NormalizedNode<?, ?>> apply(final List<Optional<NormalizedNode<?, ?>>> input) {
+        aggregateFuture = Futures.transform(listFuture,
+            (Function<List<Optional<NormalizedNode<?, ?>>>, Optional<NormalizedNode<?, ?>>>) input -> {
                 try {
-                    return NormalizedNodeAggregator.aggregate(YangInstanceIdentifier.EMPTY, input, txContextFactory.getActorContext().getSchemaContext());
+                    return NormalizedNodeAggregator.aggregate(YangInstanceIdentifier.EMPTY, input,
+                            txContextFactory.getActorContext().getSchemaContext(),
+                            txContextFactory.getActorContext().getDatastoreContext().getLogicalStoreType());
                 } catch (DataValidationFailedException e) {
                     throw new IllegalArgumentException("Failed to aggregate", e);
                 }
-            }
-        });
+            }, MoreExecutors.directExecutor());
 
         return MappingCheckedFuture.create(aggregateFuture, ReadFailedException.MAPPER);
     }
@@ -157,16 +181,14 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
     private void executeModification(final AbstractModification modification) {
         checkModificationState();
 
-        if(LOG.isDebugEnabled()) {
-            LOG.debug("Tx {} executeModification {} {}", getIdentifier(), modification.getClass().getSimpleName(),
-                    modification.getPath());
-        }
+        LOG.trace("Tx {} executeModification {} {}", getIdentifier(), modification.getClass().getSimpleName(),
+                modification.getPath());
 
         TransactionContextWrapper contextWrapper = getContextWrapper(modification.getPath());
         contextWrapper.maybeExecuteTransactionOperation(new TransactionOperation() {
             @Override
-            protected void invoke(TransactionContext transactionContext) {
-                transactionContext.executeModification(modification);
+            protected void invoke(final TransactionContext transactionContext, final Boolean havePermit) {
+                transactionContext.executeModification(modification, havePermit);
             }
         });
     }
@@ -199,7 +221,7 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
         for (TransactionContextWrapper contextWrapper : txContextWrappers.values()) {
             contextWrapper.maybeExecuteTransactionOperation(new TransactionOperation() {
                 @Override
-                public void invoke(TransactionContext transactionContext) {
+                public void invoke(final TransactionContext transactionContext, final Boolean havePermit) {
                     transactionContext.closeTransaction();
                 }
             });
@@ -220,15 +242,16 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
 
         final AbstractThreePhaseCommitCohort<?> ret;
         switch (txContextWrappers.size()) {
-        case 0:
-            ret = NoOpDOMStoreThreePhaseCommitCohort.INSTANCE;
-            break;
-        case 1:
-            final Entry<String, TransactionContextWrapper> e = Iterables.getOnlyElement(txContextWrappers.entrySet());
-            ret = createSingleCommitCohort(e.getKey(), e.getValue());
-            break;
-        default:
-            ret = createMultiCommitCohort(txContextWrappers.entrySet());
+            case 0:
+                ret = NoOpDOMStoreThreePhaseCommitCohort.INSTANCE;
+                break;
+            case 1:
+                final Entry<String, TransactionContextWrapper> e = Iterables.getOnlyElement(
+                        txContextWrappers.entrySet());
+                ret = createSingleCommitCohort(e.getKey(), e.getValue());
+                break;
+            default:
+                ret = createMultiCommitCohort(txContextWrappers.entrySet());
         }
 
         txContextFactory.onTransactionReady(getIdentifier(), ret.getCohortFutures());
@@ -237,6 +260,7 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
         return debugContext == null ? ret : new DebugThreePhaseCommitCohort(getIdentifier(), ret, debugContext);
     }
 
+    @SuppressWarnings({ "rawtypes", "unchecked" })
     private AbstractThreePhaseCommitCohort<?> createSingleCommitCohort(final String shardName,
             final TransactionContextWrapper contextWrapper) {
 
@@ -251,44 +275,51 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
             final Promise promise = akka.dispatch.Futures.promise();
             contextWrapper.maybeExecuteTransactionOperation(new TransactionOperation() {
                 @Override
-                public void invoke(TransactionContext transactionContext) {
-                    promise.completeWith(getReadyOrDirectCommitFuture(transactionContext, operationCallbackRef));
+                public void invoke(final TransactionContext newTransactionContext, final Boolean havePermit) {
+                    promise.completeWith(getDirectCommitFuture(newTransactionContext, operationCallbackRef,
+                        havePermit));
                 }
             });
             future = promise.future();
         } else {
             // avoid the creation of a promise and a TransactionOperation
-            future = getReadyOrDirectCommitFuture(transactionContext, operationCallbackRef);
+            future = getDirectCommitFuture(transactionContext, operationCallbackRef, null);
         }
 
-        return new SingleCommitCohortProxy(txContextFactory.getActorContext(), future, getIdentifier().toString(),
-                operationCallbackRef);
+        return new SingleCommitCohortProxy(txContextFactory.getActorContext(), future, getIdentifier(),
+            operationCallbackRef);
     }
 
-    private Future<?> getReadyOrDirectCommitFuture(TransactionContext transactionContext,
-            OperationCallback.Reference operationCallbackRef) {
-        if (transactionContext.supportsDirectCommit()) {
-            TransactionRateLimitingCallback rateLimitingCallback = new TransactionRateLimitingCallback(
-                    txContextFactory.getActorContext());
-            operationCallbackRef.set(rateLimitingCallback);
-            rateLimitingCallback.run();
-            return transactionContext.directCommit();
-        } else {
-            return transactionContext.readyTransaction();
-        }
+    private Future<?> getDirectCommitFuture(final TransactionContext transactionContext,
+            final OperationCallback.Reference operationCallbackRef, final Boolean havePermit) {
+        TransactionRateLimitingCallback rateLimitingCallback = new TransactionRateLimitingCallback(
+                txContextFactory.getActorContext());
+        operationCallbackRef.set(rateLimitingCallback);
+        rateLimitingCallback.run();
+        return transactionContext.directCommit(havePermit);
     }
 
     private AbstractThreePhaseCommitCohort<ActorSelection> createMultiCommitCohort(
             final Set<Entry<String, TransactionContextWrapper>> txContextWrapperEntries) {
 
-        final List<Future<ActorSelection>> cohortFutures = new ArrayList<>(txContextWrapperEntries.size());
-        for (Entry<String, TransactionContextWrapper> e : txContextWrapperEntries) {
-            LOG.debug("Tx {} Readying transaction for shard {}", getIdentifier(), e.getKey());
+        final List<ThreePhaseCommitCohortProxy.CohortInfo> cohorts = new ArrayList<>(txContextWrapperEntries.size());
+
+        synchronized (GLOBAL_TX_READY_LOCK) {
+            for (Entry<String, TransactionContextWrapper> e : txContextWrapperEntries) {
+                LOG.debug("Tx {} Readying transaction for shard {}", getIdentifier(), e.getKey());
 
-            cohortFutures.add(e.getValue().readyTransaction());
+                final TransactionContextWrapper wrapper = e.getValue();
+
+                // The remote tx version is obtained the via TransactionContext which may not be available yet so
+                // we pass a Supplier to dynamically obtain it. Once the ready Future is resolved the
+                // TransactionContext is available.
+                Supplier<Short> txVersionSupplier = () -> wrapper.getTransactionContext().getTransactionVersion();
+
+                cohorts.add(new ThreePhaseCommitCohortProxy.CohortInfo(wrapper.readyTransaction(), txVersionSupplier));
+            }
         }
 
-        return new ThreePhaseCommitCohortProxy(txContextFactory.getActorContext(), cohortFutures, getIdentifier().toString());
+        return new ThreePhaseCommitCohortProxy(txContextFactory.getActorContext(), cohorts, getIdentifier());
     }
 
     private String shardNameFromIdentifier(final YangInstanceIdentifier path) {