X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FShardDataTree.java;h=e1c12cd489c3cb6f14164380cbff005512c50431;hp=e8b41d97474f49951b8b67846d22c7a06b73d185;hb=a6af137c30470b86d4bc624d4c48cb686495a182;hpb=52725324973f22ac0c85ed4fd8459cf0ef504407 diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java index e8b41d9747..e1c12cd489 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java @@ -27,11 +27,14 @@ import java.io.IOException; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; +import java.util.Deque; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.Queue; +import java.util.SortedSet; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.Consumer; @@ -58,8 +61,6 @@ import org.opendaylight.controller.cluster.datastore.persisted.ShardDataTreeSnap import org.opendaylight.controller.cluster.datastore.utils.DataTreeModificationOutput; import org.opendaylight.controller.cluster.datastore.utils.PruningDataTreeModification; import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload; -import org.opendaylight.controller.md.sal.common.api.data.AsyncDataBroker.DataChangeScope; -import org.opendaylight.controller.md.sal.common.api.data.AsyncDataChangeListener; import org.opendaylight.mdsal.common.api.OptimisticLockFailedException; import org.opendaylight.mdsal.common.api.TransactionCommitFailedException; import org.opendaylight.mdsal.dom.api.DOMDataTreeChangeListener; @@ -102,6 +103,11 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { this.cohort = Preconditions.checkNotNull(cohort); lastAccess = now; } + + @Override + public String toString() { + return "CommitEntry [tx=" + cohort.getIdentifier() + ", state=" + cohort.getState() + "]"; + } } private static final Timeout COMMIT_STEP_TIMEOUT = new Timeout(Duration.create(5, TimeUnit.SECONDS)); @@ -117,7 +123,7 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { private final Map transactionChains = new HashMap<>(); private final DataTreeCohortActorRegistry cohortRegistry = new DataTreeCohortActorRegistry(); - private final Queue pendingTransactions = new ArrayDeque<>(); + private final Deque pendingTransactions = new ArrayDeque<>(); private final Queue pendingCommits = new ArrayDeque<>(); private final Queue pendingFinishCommits = new ArrayDeque<>(); @@ -127,7 +133,6 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { private final Map replicationCallbacks = new HashMap<>(); private final ShardDataTreeChangeListenerPublisher treeChangeListenerPublisher; - private final ShardDataChangeListenerPublisher dataChangeListenerPublisher; private final Collection> metadata; private final DataTree dataTree; private final String logContext; @@ -148,14 +153,13 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { ShardDataTree(final Shard shard, final SchemaContext schemaContext, final DataTree dataTree, final ShardDataTreeChangeListenerPublisher treeChangeListenerPublisher, - final ShardDataChangeListenerPublisher dataChangeListenerPublisher, final String logContext, + final String logContext, final ShardDataTreeMetadata... metadata) { this.dataTree = Preconditions.checkNotNull(dataTree); updateSchemaContext(schemaContext); this.shard = Preconditions.checkNotNull(shard); this.treeChangeListenerPublisher = Preconditions.checkNotNull(treeChangeListenerPublisher); - this.dataChangeListenerPublisher = Preconditions.checkNotNull(dataChangeListenerPublisher); this.logContext = Preconditions.checkNotNull(logContext); this.metadata = ImmutableList.copyOf(metadata); tip = dataTree; @@ -164,10 +168,9 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { ShardDataTree(final Shard shard, final SchemaContext schemaContext, final TreeType treeType, final YangInstanceIdentifier root, final ShardDataTreeChangeListenerPublisher treeChangeListenerPublisher, - final ShardDataChangeListenerPublisher dataChangeListenerPublisher, final String logContext, + final String logContext, final ShardDataTreeMetadata... metadata) { - this(shard, schemaContext, createDataTree(treeType, root), treeChangeListenerPublisher, - dataChangeListenerPublisher, logContext, metadata); + this(shard, schemaContext, createDataTree(treeType, root), treeChangeListenerPublisher, logContext, metadata); } private static DataTree createDataTree(final TreeType treeType, final YangInstanceIdentifier root) { @@ -182,8 +185,7 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { @VisibleForTesting public ShardDataTree(final Shard shard, final SchemaContext schemaContext, final TreeType treeType) { this(shard, schemaContext, treeType, YangInstanceIdentifier.EMPTY, - new DefaultShardDataTreeChangeListenerPublisher(""), - new DefaultShardDataChangeListenerPublisher(""), ""); + new DefaultShardDataTreeChangeListenerPublisher(""), ""); } final String logContext() { @@ -313,7 +315,7 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { } @SuppressWarnings("checkstyle:IllegalCatch") - private void applyRecoveryCandidate(final DataTreeCandidate candidate) throws DataValidationFailedException { + private void applyRecoveryCandidate(final DataTreeCandidate candidate) { final PruningDataTreeModification mod = wrapWithPruning(dataTree.takeSnapshot().newModification()); DataTreeCandidates.applyToModification(mod, candidate); mod.ready(); @@ -342,7 +344,7 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { * @throws IOException when the snapshot fails to deserialize * @throws DataValidationFailedException when the snapshot fails to apply */ - void applyRecoveryPayload(@Nonnull final Payload payload) throws IOException, DataValidationFailedException { + void applyRecoveryPayload(@Nonnull final Payload payload) throws IOException { if (payload instanceof CommitTransactionPayload) { final Entry e = ((CommitTransactionPayload) payload).getCandidate(); @@ -564,7 +566,6 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { @VisibleForTesting public void notifyListeners(final DataTreeCandidate candidate) { treeChangeListenerPublisher.publishChanges(candidate); - dataChangeListenerPublisher.publishChanges(candidate); } /** @@ -619,14 +620,6 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { replicatePayload(id, PurgeLocalHistoryPayload.create(id), callback); } - void registerDataChangeListener(final YangInstanceIdentifier path, - final AsyncDataChangeListener> listener, - final DataChangeScope scope, final Optional initialState, - final Consumer>>> - onRegistration) { - dataChangeListenerPublisher.registerDataChangeListener(path, listener, scope, initialState, onRegistration); - } - Optional readCurrentData() { final java.util.Optional> currentState = dataTree.takeSnapshot().readNode(YangInstanceIdentifier.EMPTY); @@ -658,11 +651,12 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { } @Override - ShardDataTreeCohort finishTransaction(final ReadWriteShardDataTreeTransaction transaction) { + ShardDataTreeCohort finishTransaction(final ReadWriteShardDataTreeTransaction transaction, + final java.util.Optional> participatingShardNames) { final DataTreeModification snapshot = transaction.getSnapshot(); snapshot.ready(); - return createReadyCohort(transaction.getIdentifier(), snapshot); + return createReadyCohort(transaction.getIdentifier(), snapshot, participatingShardNames); } void purgeTransaction(final TransactionIdentifier id, final Runnable callback) { @@ -801,13 +795,108 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { return; } if (!cohort.equals(head.cohort)) { - LOG.debug("{}: Transaction {} scheduled for canCommit step", logContext, cohort.getIdentifier()); - return; + // The tx isn't at the head of the queue so we can't start canCommit at this point. Here we check if this + // tx should be moved ahead of other tx's in the READY state in the pendingTransactions queue. If this tx + // has other participating shards, it could deadlock with other tx's accessing the same shards + // depending on the order the tx's are readied on each shard + // (see https://jira.opendaylight.org/browse/CONTROLLER-1836). Therefore, if the preceding participating + // shard names for a preceding pending tx, call it A, in the queue matches that of this tx, then this tx + // is allowed to be moved ahead of tx A in the queue so it is processed first to avoid potential deadlock + // if tx A is behind this tx in the pendingTransactions queue for a preceding shard. In other words, since + // canCommmit for this tx was requested before tx A, honor that request. If this tx is moved to the head of + // the queue as a result, then proceed with canCommit. + + Collection precedingShardNames = extractPrecedingShardNames(cohort.getParticipatingShardNames()); + if (precedingShardNames.isEmpty()) { + LOG.debug("{}: Tx {} is scheduled for canCommit step", logContext, cohort.getIdentifier()); + return; + } + + LOG.debug("{}: Evaluating tx {} for canCommit - preceding participating shard names {}", + logContext, cohort.getIdentifier(), precedingShardNames); + final Iterator iter = pendingTransactions.iterator(); + int index = -1; + int moveToIndex = -1; + while (iter.hasNext()) { + final CommitEntry entry = iter.next(); + ++index; + + if (cohort.equals(entry.cohort)) { + if (moveToIndex < 0) { + LOG.debug("{}: Not moving tx {} - cannot proceed with canCommit", + logContext, cohort.getIdentifier()); + return; + } + + LOG.debug("{}: Moving {} to index {} in the pendingTransactions queue", + logContext, cohort.getIdentifier(), moveToIndex); + iter.remove(); + insertEntry(pendingTransactions, entry, moveToIndex); + + if (!cohort.equals(pendingTransactions.peek().cohort)) { + LOG.debug("{}: Tx {} is not at the head of the queue - cannot proceed with canCommit", + logContext, cohort.getIdentifier()); + return; + } + + LOG.debug("{}: Tx {} is now at the head of the queue - proceeding with canCommit", + logContext, cohort.getIdentifier()); + break; + } + + if (entry.cohort.getState() != State.READY) { + LOG.debug("{}: Skipping pending transaction {} in state {}", + logContext, entry.cohort.getIdentifier(), entry.cohort.getState()); + continue; + } + + final Collection pendingPrecedingShardNames = extractPrecedingShardNames( + entry.cohort.getParticipatingShardNames()); + + if (precedingShardNames.equals(pendingPrecedingShardNames)) { + if (moveToIndex < 0) { + LOG.debug("{}: Preceding shard names {} for pending tx {} match - saving moveToIndex {}", + logContext, pendingPrecedingShardNames, entry.cohort.getIdentifier(), index); + moveToIndex = index; + } else { + LOG.debug( + "{}: Preceding shard names {} for pending tx {} match but moveToIndex already set to {}", + logContext, pendingPrecedingShardNames, entry.cohort.getIdentifier(), moveToIndex); + } + } else { + LOG.debug("{}: Preceding shard names {} for pending tx {} differ - skipping", + logContext, pendingPrecedingShardNames, entry.cohort.getIdentifier()); + } + } } processNextPendingTransaction(); } + private void insertEntry(Deque queue, CommitEntry entry, int atIndex) { + if (atIndex == 0) { + queue.addFirst(entry); + return; + } + + LOG.trace("Inserting into Deque at index {}", atIndex); + + Deque tempStack = new ArrayDeque<>(atIndex); + for (int i = 0; i < atIndex; i++) { + tempStack.push(queue.poll()); + } + + queue.addFirst(entry); + + tempStack.forEach(queue::addFirst); + } + + private Collection extractPrecedingShardNames( + java.util.Optional> participatingShardNames) { + return participatingShardNames.map((Function, Collection>) + set -> set.headSet(shard.getShardName())).orElse(Collections.emptyList()); + } + private void failPreCommit(final Throwable cause) { shard.getShardMBean().incrementFailedTransactionsCount(); pendingTransactions.poll().cohort.failedPreCommit(cause); @@ -965,22 +1054,24 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { } @Override - ShardDataTreeCohort createReadyCohort(final TransactionIdentifier txId, final DataTreeModification mod) { + ShardDataTreeCohort createReadyCohort(final TransactionIdentifier txId, final DataTreeModification mod, + final java.util.Optional> participatingShardNames) { SimpleShardDataTreeCohort cohort = new SimpleShardDataTreeCohort(this, mod, txId, - cohortRegistry.createCohort(schemaContext, txId, runnable -> shard.executeInSelf(runnable), - COMMIT_STEP_TIMEOUT)); + cohortRegistry.createCohort(schemaContext, txId, shard::executeInSelf, + COMMIT_STEP_TIMEOUT), participatingShardNames); pendingTransactions.add(new CommitEntry(cohort, readTime())); return cohort; } // Exposed for ShardCommitCoordinator so it does not have deal with local histories (it does not care), this mimics // the newReadWriteTransaction() - ShardDataTreeCohort newReadyCohort(final TransactionIdentifier txId, final DataTreeModification mod) { + ShardDataTreeCohort newReadyCohort(final TransactionIdentifier txId, final DataTreeModification mod, + final java.util.Optional> participatingShardNames) { if (txId.getHistoryId().getHistoryId() == 0) { - return createReadyCohort(txId, mod); + return createReadyCohort(txId, mod, participatingShardNames); } - return ensureTransactionChain(txId.getHistoryId(), null).createReadyCohort(txId, mod); + return ensureTransactionChain(txId.getHistoryId(), null).createReadyCohort(txId, mod, participatingShardNames); } @SuppressFBWarnings(value = "DB_DUPLICATE_SWITCH_CLAUSES", justification = "See inline comments below.")