Fix shard commit deadlock
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / datastore / TransactionProxy.java
index e7a00042e4146c5ddc03a013e6eeda3198a38ea3..5a1cb6740d0229b4e9918f8280f73299eab82727 100644 (file)
@@ -12,28 +12,34 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
 import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.CheckedFuture;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.MoreExecutors;
 import com.google.common.util.concurrent.SettableFuture;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.TimeUnit;
-import org.opendaylight.controller.cluster.datastore.identifiers.TransactionIdentifier;
-import org.opendaylight.controller.cluster.datastore.shardstrategy.ShardStrategyFactory;
+import java.util.TreeMap;
+import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
+import org.opendaylight.controller.cluster.datastore.messages.AbstractRead;
+import org.opendaylight.controller.cluster.datastore.messages.DataExists;
+import org.opendaylight.controller.cluster.datastore.messages.ReadData;
+import org.opendaylight.controller.cluster.datastore.modification.AbstractModification;
+import org.opendaylight.controller.cluster.datastore.modification.DeleteModification;
+import org.opendaylight.controller.cluster.datastore.modification.MergeModification;
+import org.opendaylight.controller.cluster.datastore.modification.WriteModification;
 import org.opendaylight.controller.cluster.datastore.utils.ActorContext;
 import org.opendaylight.controller.cluster.datastore.utils.NormalizedNodeAggregator;
-import org.opendaylight.controller.md.sal.common.api.data.ReadFailedException;
-import org.opendaylight.controller.sal.core.spi.data.AbstractDOMStoreTransaction;
-import org.opendaylight.controller.sal.core.spi.data.DOMStoreReadWriteTransaction;
-import org.opendaylight.yangtools.util.concurrent.MappingCheckedFuture;
+import org.opendaylight.mdsal.common.api.MappingCheckedFuture;
+import org.opendaylight.mdsal.common.api.ReadFailedException;
+import org.opendaylight.mdsal.dom.spi.store.AbstractDOMStoreTransaction;
+import org.opendaylight.mdsal.dom.spi.store.DOMStoreReadWriteTransaction;
 import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier;
 import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode;
 import org.opendaylight.yangtools.yang.data.api.schema.tree.DataValidationFailedException;
@@ -45,20 +51,42 @@ import scala.concurrent.Promise;
 /**
  * A transaction potentially spanning multiple backend shards.
  */
-public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIdentifier> implements DOMStoreReadWriteTransaction {
-    private static enum TransactionState {
+public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIdentifier>
+        implements DOMStoreReadWriteTransaction {
+    private enum TransactionState {
         OPEN,
         READY,
         CLOSED,
     }
+
     private static final Logger LOG = LoggerFactory.getLogger(TransactionProxy.class);
 
-    private final Map<String, TransactionContextWrapper> txContextAdapters = new HashMap<>();
+    // Global lock used for transactions spanning multiple shards - synchronizes sending of the ready messages
+    // for atomicity to avoid potential deadlock with concurrent transactions spanning the same shards as outlined
+    // in the following scenario:
+    //
+    //  - Tx1 sends ready message to shard A
+    //  - Tx2 sends ready message to shard A
+    //  - Tx2 sends ready message to shard B
+    //  - Tx1 sends ready message to shard B
+    //
+    // This scenario results in deadlock: after Tx1 canCommits to shard A, it can't proceed with shard B until Tx2
+    // completes as Tx2 was readied first on shard B. However Tx2 cannot make progress because it's waiting to canCommit
+    // on shard A which is blocked by Tx1.
+    //
+    // The global lock avoids this as it forces the ready messages to be sent in a predictable order:
+    //
+    //  - Tx1 sends ready message to shard A
+    //  - Tx1 sends ready message to shard B
+    //  - Tx2 sends ready message to shard A
+    //  - Tx2 sends ready message to shard B
+    //
+    private static final Object GLOBAL_TX_READY_LOCK = new Object();
+
+    private final Map<String, TransactionContextWrapper> txContextWrappers = new TreeMap<>();
     private final AbstractTransactionContextFactory<?> txContextFactory;
     private final TransactionType type;
     private TransactionState state = TransactionState.OPEN;
-    private volatile OperationCompleter operationCompleter;
-    private volatile Semaphore operationLimiter;
 
     @VisibleForTesting
     public TransactionProxy(final AbstractTransactionContextFactory<?> txContextFactory, final TransactionType type) {
@@ -72,18 +100,22 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
 
     @Override
     public CheckedFuture<Boolean, ReadFailedException> exists(final YangInstanceIdentifier path) {
-        Preconditions.checkState(type != TransactionType.WRITE_ONLY, "Reads from write-only transactions are not allowed");
+        return executeRead(shardNameFromIdentifier(path), new DataExists(path, DataStoreVersions.CURRENT_VERSION));
+    }
 
-        LOG.debug("Tx {} exists {}", getIdentifier(), path);
+    private <T> CheckedFuture<T, ReadFailedException> executeRead(final String shardName,
+            final AbstractRead<T> readCmd) {
+        Preconditions.checkState(type != TransactionType.WRITE_ONLY,
+                "Reads from write-only transactions are not allowed");
 
-        throttleOperation();
+        LOG.trace("Tx {} {} {}", getIdentifier(), readCmd.getClass().getSimpleName(), readCmd.getPath());
 
-        final SettableFuture<Boolean> proxyFuture = SettableFuture.create();
-        TransactionContextWrapper contextAdapter = getContextAdapter(path);
-        contextAdapter.maybeExecuteTransactionOperation(new TransactionOperation() {
+        final SettableFuture<T> proxyFuture = SettableFuture.create();
+        TransactionContextWrapper contextWrapper = getContextWrapper(shardName);
+        contextWrapper.maybeExecuteTransactionOperation(new TransactionOperation() {
             @Override
-            public void invoke(TransactionContext transactionContext) {
-                transactionContext.dataExists(path, proxyFuture);
+            public void invoke(final TransactionContext transactionContext, final Boolean havePermit) {
+                transactionContext.executeRead(readCmd, proxyFuture, havePermit);
             }
         });
 
@@ -92,36 +124,23 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
 
     @Override
     public CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> read(final YangInstanceIdentifier path) {
-        Preconditions.checkState(type != TransactionType.WRITE_ONLY, "Reads from write-only transactions are not allowed");
+        Preconditions.checkState(type != TransactionType.WRITE_ONLY,
+                "Reads from write-only transactions are not allowed");
+        Preconditions.checkNotNull(path, "path should not be null");
 
-        LOG.debug("Tx {} read {}", getIdentifier(), path);
-
-        if (YangInstanceIdentifier.EMPTY.equals(path)) {
-            return readAllData();
-        } else {
-            throttleOperation();
-
-            return singleShardRead(shardNameFromIdentifier(path), path);
-        }
+        LOG.trace("Tx {} read {}", getIdentifier(), path);
+        return path.isEmpty() ? readAllData() :  singleShardRead(shardNameFromIdentifier(path), path);
     }
 
     private CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> singleShardRead(
             final String shardName, final YangInstanceIdentifier path) {
-        final SettableFuture<Optional<NormalizedNode<?, ?>>> proxyFuture = SettableFuture.create();
-        TransactionContextWrapper contextAdapter = getContextAdapter(shardName);
-        contextAdapter.maybeExecuteTransactionOperation(new TransactionOperation() {
-            @Override
-            public void invoke(TransactionContext transactionContext) {
-                transactionContext.readData(path, proxyFuture);
-            }
-        });
-
-        return MappingCheckedFuture.create(proxyFuture, ReadFailedException.MAPPER);
+        return executeRead(shardName, new ReadData(path, DataStoreVersions.CURRENT_VERSION));
     }
 
     private CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> readAllData() {
         final Set<String> allShardNames = txContextFactory.getActorContext().getConfiguration().getAllShardNames();
-        final Collection<CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException>> futures = new ArrayList<>(allShardNames.size());
+        final Collection<CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException>> futures =
+                new ArrayList<>(allShardNames.size());
 
         for (String shardName : allShardNames) {
             futures.add(singleShardRead(shardName, YangInstanceIdentifier.EMPTY));
@@ -130,67 +149,46 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
         final ListenableFuture<List<Optional<NormalizedNode<?, ?>>>> listFuture = Futures.allAsList(futures);
         final ListenableFuture<Optional<NormalizedNode<?, ?>>> aggregateFuture;
 
-        aggregateFuture = Futures.transform(listFuture, new Function<List<Optional<NormalizedNode<?, ?>>>, Optional<NormalizedNode<?, ?>>>() {
-            @Override
-            public Optional<NormalizedNode<?, ?>> apply(final List<Optional<NormalizedNode<?, ?>>> input) {
+        aggregateFuture = Futures.transform(listFuture,
+            (Function<List<Optional<NormalizedNode<?, ?>>>, Optional<NormalizedNode<?, ?>>>) input -> {
                 try {
-                    return NormalizedNodeAggregator.aggregate(YangInstanceIdentifier.EMPTY, input, txContextFactory.getActorContext().getSchemaContext());
+                    return NormalizedNodeAggregator.aggregate(YangInstanceIdentifier.EMPTY, input,
+                            txContextFactory.getActorContext().getSchemaContext(),
+                            txContextFactory.getActorContext().getDatastoreContext().getLogicalStoreType());
                 } catch (DataValidationFailedException e) {
                     throw new IllegalArgumentException("Failed to aggregate", e);
                 }
-            }
-        });
+            }, MoreExecutors.directExecutor());
 
         return MappingCheckedFuture.create(aggregateFuture, ReadFailedException.MAPPER);
     }
 
     @Override
     public void delete(final YangInstanceIdentifier path) {
-        checkModificationState();
-
-        LOG.debug("Tx {} delete {}", getIdentifier(), path);
-
-        throttleOperation();
-
-        TransactionContextWrapper contextAdapter = getContextAdapter(path);
-        contextAdapter.maybeExecuteTransactionOperation(new TransactionOperation() {
-            @Override
-            public void invoke(TransactionContext transactionContext) {
-                transactionContext.deleteData(path);
-            }
-        });
+        executeModification(new DeleteModification(path));
     }
 
     @Override
     public void merge(final YangInstanceIdentifier path, final NormalizedNode<?, ?> data) {
-        checkModificationState();
-
-        LOG.debug("Tx {} merge {}", getIdentifier(), path);
-
-        throttleOperation();
-
-        TransactionContextWrapper contextAdapter = getContextAdapter(path);
-        contextAdapter.maybeExecuteTransactionOperation(new TransactionOperation() {
-            @Override
-            public void invoke(TransactionContext transactionContext) {
-                transactionContext.mergeData(path, data);
-            }
-        });
+        executeModification(new MergeModification(path, data));
     }
 
     @Override
     public void write(final YangInstanceIdentifier path, final NormalizedNode<?, ?> data) {
-        checkModificationState();
+        executeModification(new WriteModification(path, data));
+    }
 
-        LOG.debug("Tx {} write {}", getIdentifier(), path);
+    private void executeModification(final AbstractModification modification) {
+        checkModificationState();
 
-        throttleOperation();
+        LOG.trace("Tx {} executeModification {} {}", getIdentifier(), modification.getClass().getSimpleName(),
+                modification.getPath());
 
-        TransactionContextWrapper contextAdapter = getContextAdapter(path);
-        contextAdapter.maybeExecuteTransactionOperation(new TransactionOperation() {
+        TransactionContextWrapper contextWrapper = getContextWrapper(modification.getPath());
+        contextWrapper.maybeExecuteTransactionOperation(new TransactionOperation() {
             @Override
-            public void invoke(TransactionContext transactionContext) {
-                transactionContext.writeData(path, data);
+            protected void invoke(final TransactionContext transactionContext, final Boolean havePermit) {
+                transactionContext.executeModification(modification, havePermit);
             }
         });
     }
@@ -220,17 +218,17 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
             return;
         }
 
-        for (TransactionContextWrapper contextAdapter : txContextAdapters.values()) {
-            contextAdapter.maybeExecuteTransactionOperation(new TransactionOperation() {
+        for (TransactionContextWrapper contextWrapper : txContextWrappers.values()) {
+            contextWrapper.maybeExecuteTransactionOperation(new TransactionOperation() {
                 @Override
-                public void invoke(TransactionContext transactionContext) {
+                public void invoke(final TransactionContext transactionContext, final Boolean havePermit) {
                     transactionContext.closeTransaction();
                 }
             });
         }
 
 
-        txContextAdapters.clear();
+        txContextWrappers.clear();
     }
 
     @Override
@@ -240,19 +238,20 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
         final boolean success = seal(TransactionState.READY);
         Preconditions.checkState(success, "Transaction %s is %s, it cannot be readied", getIdentifier(), state);
 
-        LOG.debug("Tx {} Readying {} components for commit", getIdentifier(), txContextAdapters.size());
+        LOG.debug("Tx {} Readying {} components for commit", getIdentifier(), txContextWrappers.size());
 
         final AbstractThreePhaseCommitCohort<?> ret;
-        switch (txContextAdapters.size()) {
-        case 0:
-            ret = NoOpDOMStoreThreePhaseCommitCohort.INSTANCE;
-            break;
-        case 1:
-            final Entry<String, TransactionContextWrapper> e = Iterables.getOnlyElement(txContextAdapters.entrySet());
-            ret = createSingleCommitCohort(e.getKey(), e.getValue());
-            break;
-        default:
-            ret = createMultiCommitCohort(txContextAdapters.entrySet());
+        switch (txContextWrappers.size()) {
+            case 0:
+                ret = NoOpDOMStoreThreePhaseCommitCohort.INSTANCE;
+                break;
+            case 1:
+                final Entry<String, TransactionContextWrapper> e = Iterables.getOnlyElement(
+                        txContextWrappers.entrySet());
+                ret = createSingleCommitCohort(e.getKey(), e.getValue());
+                break;
+            default:
+                ret = createMultiCommitCohort(txContextWrappers.entrySet());
         }
 
         txContextFactory.onTransactionReady(getIdentifier(), ret.getCohortFutures());
@@ -261,96 +260,84 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
         return debugContext == null ? ret : new DebugThreePhaseCommitCohort(getIdentifier(), ret, debugContext);
     }
 
+    @SuppressWarnings({ "rawtypes", "unchecked" })
     private AbstractThreePhaseCommitCohort<?> createSingleCommitCohort(final String shardName,
-            final TransactionContextWrapper contextAdapter) {
-        throttleOperation();
+            final TransactionContextWrapper contextWrapper) {
 
         LOG.debug("Tx {} Readying transaction for shard {}", getIdentifier(), shardName);
 
         final OperationCallback.Reference operationCallbackRef =
                 new OperationCallback.Reference(OperationCallback.NO_OP_CALLBACK);
 
-        final TransactionContext transactionContext = contextAdapter.getTransactionContext();
+        final TransactionContext transactionContext = contextWrapper.getTransactionContext();
         final Future future;
         if (transactionContext == null) {
             final Promise promise = akka.dispatch.Futures.promise();
-            contextAdapter.maybeExecuteTransactionOperation(new TransactionOperation() {
+            contextWrapper.maybeExecuteTransactionOperation(new TransactionOperation() {
                 @Override
-                public void invoke(TransactionContext transactionContext) {
-                    promise.completeWith(getReadyOrDirectCommitFuture(transactionContext, operationCallbackRef));
+                public void invoke(final TransactionContext newTransactionContext, final Boolean havePermit) {
+                    promise.completeWith(getDirectCommitFuture(newTransactionContext, operationCallbackRef,
+                        havePermit));
                 }
             });
             future = promise.future();
         } else {
             // avoid the creation of a promise and a TransactionOperation
-            future = getReadyOrDirectCommitFuture(transactionContext, operationCallbackRef);
+            future = getDirectCommitFuture(transactionContext, operationCallbackRef, null);
         }
 
-        return new SingleCommitCohortProxy(txContextFactory.getActorContext(), future, getIdentifier().toString(),
-                operationCallbackRef);
+        return new SingleCommitCohortProxy(txContextFactory.getActorContext(), future, getIdentifier(),
+            operationCallbackRef);
     }
 
-    private Future<?> getReadyOrDirectCommitFuture(TransactionContext transactionContext,
-            OperationCallback.Reference operationCallbackRef) {
-        if (transactionContext.supportsDirectCommit()) {
-            TransactionRateLimitingCallback rateLimitingCallback = new TransactionRateLimitingCallback(
-                    txContextFactory.getActorContext());
-            operationCallbackRef.set(rateLimitingCallback);
-            rateLimitingCallback.run();
-            return transactionContext.directCommit();
-        } else {
-            return transactionContext.readyTransaction();
-        }
+    private Future<?> getDirectCommitFuture(final TransactionContext transactionContext,
+            final OperationCallback.Reference operationCallbackRef, final Boolean havePermit) {
+        TransactionRateLimitingCallback rateLimitingCallback = new TransactionRateLimitingCallback(
+                txContextFactory.getActorContext());
+        operationCallbackRef.set(rateLimitingCallback);
+        rateLimitingCallback.run();
+        return transactionContext.directCommit(havePermit);
     }
 
     private AbstractThreePhaseCommitCohort<ActorSelection> createMultiCommitCohort(
-            final Set<Entry<String, TransactionContextWrapper>> txContextAdapterEntries) {
-
-        throttleOperation();
-        final List<Future<ActorSelection>> cohortFutures = new ArrayList<>(txContextAdapterEntries.size());
-        for (Entry<String, TransactionContextWrapper> e : txContextAdapterEntries) {
-            LOG.debug("Tx {} Readying transaction for shard {}", getIdentifier(), e.getKey());
-
-            TransactionContextWrapper contextAdapter = e.getValue();
-            final TransactionContext transactionContext = contextAdapter.getTransactionContext();
-            Future<ActorSelection> future;
-            if (transactionContext != null) {
-                // avoid the creation of a promise and a TransactionOperation
-                future = transactionContext.readyTransaction();
-            } else {
-                final Promise<ActorSelection> promise = akka.dispatch.Futures.promise();
-                contextAdapter.maybeExecuteTransactionOperation(new TransactionOperation() {
-                    @Override
-                    public void invoke(TransactionContext transactionContext) {
-                        promise.completeWith(transactionContext.readyTransaction());
-                    }
-                });
-
-                future = promise.future();
-            }
+            final Set<Entry<String, TransactionContextWrapper>> txContextWrapperEntries) {
 
-            cohortFutures.add(future);
+        final List<ThreePhaseCommitCohortProxy.CohortInfo> cohorts = new ArrayList<>(txContextWrapperEntries.size());
+
+        synchronized (GLOBAL_TX_READY_LOCK) {
+            for (Entry<String, TransactionContextWrapper> e : txContextWrapperEntries) {
+                LOG.debug("Tx {} Readying transaction for shard {}", getIdentifier(), e.getKey());
+
+                final TransactionContextWrapper wrapper = e.getValue();
+
+                // The remote tx version is obtained the via TransactionContext which may not be available yet so
+                // we pass a Supplier to dynamically obtain it. Once the ready Future is resolved the
+                // TransactionContext is available.
+                Supplier<Short> txVersionSupplier = () -> wrapper.getTransactionContext().getTransactionVersion();
+
+                cohorts.add(new ThreePhaseCommitCohortProxy.CohortInfo(wrapper.readyTransaction(), txVersionSupplier));
+            }
         }
 
-        return new ThreePhaseCommitCohortProxy(txContextFactory.getActorContext(), cohortFutures, getIdentifier().toString());
+        return new ThreePhaseCommitCohortProxy(txContextFactory.getActorContext(), cohorts, getIdentifier());
     }
 
-    private static String shardNameFromIdentifier(final YangInstanceIdentifier path) {
-        return ShardStrategyFactory.getStrategy(path).findShard(path);
+    private String shardNameFromIdentifier(final YangInstanceIdentifier path) {
+        return txContextFactory.getActorContext().getShardStrategyFactory().getStrategy(path).findShard(path);
     }
 
-    private TransactionContextWrapper getContextAdapter(final YangInstanceIdentifier path) {
-        return getContextAdapter(shardNameFromIdentifier(path));
+    private TransactionContextWrapper getContextWrapper(final YangInstanceIdentifier path) {
+        return getContextWrapper(shardNameFromIdentifier(path));
     }
 
-    private TransactionContextWrapper getContextAdapter(final String shardName) {
-        final TransactionContextWrapper existing = txContextAdapters.get(shardName);
+    private TransactionContextWrapper getContextWrapper(final String shardName) {
+        final TransactionContextWrapper existing = txContextWrappers.get(shardName);
         if (existing != null) {
             return existing;
         }
 
-        final TransactionContextWrapper fresh = txContextFactory.newTransactionAdapter(this, shardName);
-        txContextAdapters.put(shardName, fresh);
+        final TransactionContextWrapper fresh = txContextFactory.newTransactionContextWrapper(this, shardName);
+        txContextWrappers.put(shardName, fresh);
         return fresh;
     }
 
@@ -365,44 +352,4 @@ public class TransactionProxy extends AbstractDOMStoreTransaction<TransactionIde
     ActorContext getActorContext() {
         return txContextFactory.getActorContext();
     }
-
-    OperationCompleter getCompleter() {
-        OperationCompleter ret = operationCompleter;
-        if (ret == null) {
-            final Semaphore s = getLimiter();
-            ret = new OperationCompleter(s);
-            operationCompleter = ret;
-        }
-
-        return ret;
-    }
-
-    Semaphore getLimiter() {
-        Semaphore ret = operationLimiter;
-        if (ret == null) {
-            // Note : Currently mailbox-capacity comes from akka.conf and not from the config-subsystem
-            ret = new Semaphore(getActorContext().getTransactionOutstandingOperationLimit());
-            operationLimiter = ret;
-        }
-        return ret;
-    }
-
-    void throttleOperation() {
-        throttleOperation(1);
-    }
-
-    private void throttleOperation(int acquirePermits) {
-        try {
-            if (!getLimiter().tryAcquire(acquirePermits,
-                getActorContext().getDatastoreContext().getOperationTimeoutInSeconds(), TimeUnit.SECONDS)){
-                LOG.warn("Failed to acquire operation permit for transaction {}", getIdentifier());
-            }
-        } catch (InterruptedException e) {
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Interrupted when trying to acquire operation permit for transaction {}", getIdentifier(), e);
-            } else {
-                LOG.warn("Interrupted when trying to acquire operation permit for transaction {}", getIdentifier());
-            }
-        }
-    }
 }