X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FShardCommitCoordinator.java;h=739321b06876ca6331bd785acb6074944ac86daf;hb=b30c169bfb4e94d7a519be55dd1f97ccdec5327c;hp=3431755b5109d8b30a2e5137e626b1d565751c71;hpb=98a12fd8256bf8feee9bb364ade1ee5f21079d61;p=controller.git diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java index 3431755b51..739321b068 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java @@ -8,28 +8,37 @@ package org.opendaylight.controller.cluster.datastore; import akka.actor.ActorRef; -import akka.actor.Status; +import akka.actor.Status.Failure; import akka.serialization.Serialization; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Queue; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import org.opendaylight.controller.cluster.datastore.compat.BackwardsCompatibleThreePhaseCommitCohort; +import org.opendaylight.controller.cluster.datastore.ShardCommitCoordinator.CohortEntry.State; +import org.opendaylight.controller.cluster.datastore.messages.AbortTransactionReply; import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications; import org.opendaylight.controller.cluster.datastore.messages.BatchedModificationsReply; +import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransaction; import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransactionReply; +import org.opendaylight.controller.cluster.datastore.messages.CommitTransaction; import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction; import org.opendaylight.controller.cluster.datastore.messages.ReadyLocalTransaction; import org.opendaylight.controller.cluster.datastore.messages.ReadyTransactionReply; import org.opendaylight.controller.cluster.datastore.modification.Modification; -import org.opendaylight.controller.cluster.datastore.modification.MutableCompositeModification; +import org.opendaylight.controller.cluster.datastore.utils.AbstractBatchedModificationsCursor; import org.opendaylight.controller.md.sal.common.api.data.TransactionCommitFailedException; +import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTreeCandidate; +import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTreeModification; import org.slf4j.Logger; /** @@ -67,8 +76,10 @@ class ShardCommitCoordinator { private ReadyTransactionReply readyTransactionReply; + private Runnable runOnPendingTransactionsComplete; + ShardCommitCoordinator(ShardDataTree dataTree, - long cacheExpiryTimeoutInMillis, int queueCapacity, ActorRef shardActor, Logger log, String name) { + long cacheExpiryTimeoutInMillis, int queueCapacity, Logger log, String name) { this.queueCapacity = queueCapacity; this.log = log; @@ -81,6 +92,10 @@ class ShardCommitCoordinator { return queuedCohortEntries.size(); } + int getCohortCacheSize() { + return cohortCache.size(); + } + void setQueueCapacity(int queueCapacity) { this.queueCapacity = queueCapacity; } @@ -96,6 +111,10 @@ class ShardCommitCoordinator { private boolean queueCohortEntry(CohortEntry cohortEntry, ActorRef sender, Shard shard) { if(queuedCohortEntries.size() < queueCapacity) { queuedCohortEntries.offer(cohortEntry); + + log.debug("{}: Enqueued transaction {}, queue size {}", name, cohortEntry.getTransactionID(), + queuedCohortEntries.size()); + return true; } else { cohortCache.remove(cohortEntry.getTransactionID()); @@ -105,7 +124,7 @@ class ShardCommitCoordinator { " capacity %d has been reached.", name, cohortEntry.getTransactionID(), queueCapacity)); log.error(ex.getMessage()); - sender.tell(new Status.Failure(ex), shard.self()); + sender.tell(new Failure(ex), shard.self()); return false; } } @@ -122,42 +141,23 @@ class ShardCommitCoordinator { log.debug("{}: Readying transaction {}, client version {}", name, ready.getTransactionID(), ready.getTxnClientVersion()); - CohortEntry cohortEntry = new CohortEntry(ready.getTransactionID(), ready.getCohort(), - (MutableCompositeModification) ready.getModification()); + ShardDataTreeCohort cohort = ready.getTransaction().ready(); + CohortEntry cohortEntry = new CohortEntry(ready.getTransactionID(), cohort, ready.getTxnClientVersion()); cohortCache.put(ready.getTransactionID(), cohortEntry); if(!queueCohortEntry(cohortEntry, sender, shard)) { return; } - if(ready.getTxnClientVersion() < DataStoreVersions.LITHIUM_VERSION) { - // Return our actor path as we'll handle the three phase commit except if the Tx client - // version < Helium-1 version which means the Tx was initiated by a base Helium version node. - // In that case, the subsequent 3-phase commit messages won't contain the transactionId so to - // maintain backwards compatibility, we create a separate cohort actor to provide the compatible behavior. - ActorRef replyActorPath = shard.self(); - if(ready.getTxnClientVersion() < DataStoreVersions.HELIUM_1_VERSION) { - log.debug("{}: Creating BackwardsCompatibleThreePhaseCommitCohort", name); - replyActorPath = shard.getContext().actorOf(BackwardsCompatibleThreePhaseCommitCohort.props( - ready.getTransactionID())); - } - - ReadyTransactionReply readyTransactionReply = - new ReadyTransactionReply(Serialization.serializedActorPath(replyActorPath), - ready.getTxnClientVersion()); - sender.tell(ready.isReturnSerialized() ? readyTransactionReply.toSerializable() : - readyTransactionReply, shard.self()); + if(ready.isDoImmediateCommit()) { + cohortEntry.setDoImmediateCommit(true); + cohortEntry.setReplySender(sender); + cohortEntry.setShard(shard); + handleCanCommit(cohortEntry); } else { - if(ready.isDoImmediateCommit()) { - cohortEntry.setDoImmediateCommit(true); - cohortEntry.setReplySender(sender); - cohortEntry.setShard(shard); - handleCanCommit(cohortEntry); - } else { - // The caller does not want immediate commit - the 3-phase commit will be coordinated by the - // front-end so send back a ReadyTransactionReply with our actor path. - sender.tell(readyTransactionReply(shard), shard.self()); - } + // The caller does not want immediate commit - the 3-phase commit will be coordinated by the + // front-end so send back a ReadyTransactionReply with our actor path. + sender.tell(readyTransactionReply(shard), shard.self()); } } @@ -176,7 +176,7 @@ class ShardCommitCoordinator { if(cohortEntry == null) { cohortEntry = new CohortEntry(batched.getTransactionID(), dataTree.newReadWriteTransaction(batched.getTransactionID(), - batched.getTransactionChainID())); + batched.getTransactionChainID()), batched.getVersion()); cohortCache.put(batched.getTransactionID(), cohortEntry); } @@ -234,7 +234,8 @@ class ShardCommitCoordinator { void handleReadyLocalTransaction(ReadyLocalTransaction message, ActorRef sender, Shard shard) { final ShardDataTreeCohort cohort = new SimpleShardDataTreeCohort(dataTree, message.getModification(), message.getTransactionID()); - final CohortEntry cohortEntry = new CohortEntry(message.getTransactionID(), cohort); + final CohortEntry cohortEntry = new CohortEntry(message.getTransactionID(), cohort, + DataStoreVersions.CURRENT_VERSION); cohortCache.put(message.getTransactionID(), cohortEntry); cohortEntry.setDoImmediateCommit(message.isDoCommitOnReady()); @@ -253,6 +254,36 @@ class ShardCommitCoordinator { } } + Collection createForwardedBatchedModifications(final BatchedModifications from, + final int maxModificationsPerBatch) { + CohortEntry cohortEntry = getAndRemoveCohortEntry(from.getTransactionID()); + if(cohortEntry == null || cohortEntry.getTransaction() == null) { + return Collections.singletonList(from); + } + + cohortEntry.applyModifications(from.getModifications()); + + final LinkedList newModifications = new LinkedList<>(); + cohortEntry.getTransaction().getSnapshot().applyToCursor(new AbstractBatchedModificationsCursor() { + @Override + protected BatchedModifications getModifications() { + if(newModifications.isEmpty() || + newModifications.getLast().getModifications().size() >= maxModificationsPerBatch) { + newModifications.add(new BatchedModifications(from.getTransactionID(), + from.getVersion(), from.getTransactionChainID())); + } + + return newModifications.getLast(); + } + }); + + BatchedModifications last = newModifications.getLast(); + last.setDoCommitOnReady(from.isDoCommitOnReady()); + last.setReady(from.isReady()); + last.setTotalMessagesSent(newModifications.size()); + return newModifications; + } + private void handleCanCommit(CohortEntry cohortEntry) { String transactionID = cohortEntry.getTransactionID(); @@ -278,8 +309,9 @@ class ShardCommitCoordinator { doCanCommit(currentCohortEntry); } else { if(log.isDebugEnabled()) { - log.debug("{}: Tx {} is the next pending canCommit - skipping {} for now", - name, queuedCohortEntries.peek().getTransactionID(), transactionID); + log.debug("{}: Tx {} is the next pending canCommit - skipping {} for now", name, + queuedCohortEntries.peek() != null ? queuedCohortEntries.peek().getTransactionID() : "???", + transactionID); } } } @@ -301,7 +333,7 @@ class ShardCommitCoordinator { IllegalStateException ex = new IllegalStateException( String.format("%s: No cohort entry found for transaction %s", name, transactionID)); log.error(ex.getMessage()); - sender.tell(new Status.Failure(ex), shard.self()); + sender.tell(new Failure(ex), shard.self()); return; } @@ -314,10 +346,7 @@ class ShardCommitCoordinator { private void doCanCommit(final CohortEntry cohortEntry) { boolean canCommit = false; try { - // We block on the future here so we don't have to worry about possibly accessing our - // state on a different thread outside of our dispatcher. Also, the data store - // currently uses a same thread executor anyway. - canCommit = cohortEntry.getCohort().canCommit().get(); + canCommit = cohortEntry.canCommit(); log.debug("{}: canCommit for {}: {}", name, cohortEntry.getTransactionID(), canCommit); @@ -325,13 +354,14 @@ class ShardCommitCoordinator { if(canCommit) { doCommit(cohortEntry); } else { - cohortEntry.getReplySender().tell(new Status.Failure(new TransactionCommitFailedException( + cohortEntry.getReplySender().tell(new Failure(new TransactionCommitFailedException( "Can Commit failed, no detailed cause available.")), cohortEntry.getShard().self()); } } else { cohortEntry.getReplySender().tell( - canCommit ? CanCommitTransactionReply.YES.toSerializable() : - CanCommitTransactionReply.NO.toSerializable(), cohortEntry.getShard().self()); + canCommit ? CanCommitTransactionReply.yes(cohortEntry.getClientVersion()).toSerializable() : + CanCommitTransactionReply.no(cohortEntry.getClientVersion()).toSerializable(), + cohortEntry.getShard().self()); } } catch (Exception e) { log.debug("{}: An exception occurred during canCommit", name, e); @@ -341,7 +371,7 @@ class ShardCommitCoordinator { failure = e.getCause(); } - cohortEntry.getReplySender().tell(new Status.Failure(failure), cohortEntry.getShard().self()); + cohortEntry.getReplySender().tell(new Failure(failure), cohortEntry.getShard().self()); } finally { if(!canCommit) { // Remove the entry from the cache now. @@ -361,10 +391,7 @@ class ShardCommitCoordinator { // normally fail since we ensure only one concurrent 3-phase commit. try { - // We block on the future here so we don't have to worry about possibly accessing our - // state on a different thread outside of our dispatcher. Also, the data store - // currently uses a same thread executor anyway. - cohortEntry.getCohort().preCommit().get(); + cohortEntry.preCommit(); cohortEntry.getShard().continueCommit(cohortEntry); @@ -374,7 +401,7 @@ class ShardCommitCoordinator { } catch (Exception e) { log.error("{} An exception occurred while preCommitting transaction {}", name, cohortEntry.getTransactionID(), e); - cohortEntry.getReplySender().tell(new akka.actor.Status.Failure(e), cohortEntry.getShard().self()); + cohortEntry.getReplySender().tell(new Failure(e), cohortEntry.getShard().self()); currentTransactionComplete(cohortEntry.getTransactionID(), true); } @@ -401,7 +428,7 @@ class ShardCommitCoordinator { String.format("%s: Cannot commit transaction %s - it is not the current transaction", name, transactionID)); log.error(ex.getMessage()); - sender.tell(new akka.actor.Status.Failure(ex), shard.self()); + sender.tell(new Failure(ex), shard.self()); return false; } @@ -409,6 +436,136 @@ class ShardCommitCoordinator { return doCommit(cohortEntry); } + void handleAbort(final String transactionID, final ActorRef sender, final Shard shard) { + CohortEntry cohortEntry = getCohortEntryIfCurrent(transactionID); + if(cohortEntry != null) { + // We don't remove the cached cohort entry here (ie pass false) in case the Tx was + // aborted during replication in which case we may still commit locally if replication + // succeeds. + currentTransactionComplete(transactionID, false); + } else { + cohortEntry = getAndRemoveCohortEntry(transactionID); + } + + if(cohortEntry == null) { + return; + } + + log.debug("{}: Aborting transaction {}", name, transactionID); + + final ActorRef self = shard.getSelf(); + try { + cohortEntry.abort(); + + shard.getShardMBean().incrementAbortTransactionsCount(); + + if(sender != null) { + sender.tell(AbortTransactionReply.instance(cohortEntry.getClientVersion()).toSerializable(), self); + } + } catch (Exception e) { + log.error("{}: An exception happened during abort", name, e); + + if(sender != null) { + sender.tell(new Failure(e), self); + } + } + } + + void checkForExpiredTransactions(final long timeout, final Shard shard) { + CohortEntry cohortEntry = getCurrentCohortEntry(); + if(cohortEntry != null) { + if(cohortEntry.isExpired(timeout)) { + log.warn("{}: Current transaction {} has timed out after {} ms - aborting", + name, cohortEntry.getTransactionID(), timeout); + + handleAbort(cohortEntry.getTransactionID(), null, shard); + } + } + + cleanupExpiredCohortEntries(); + } + + void abortPendingTransactions(final String reason, final Shard shard) { + if(currentCohortEntry == null && queuedCohortEntries.isEmpty()) { + return; + } + + List cohortEntries = getAndClearPendingCohortEntries(); + + log.debug("{}: Aborting {} pending queued transactions", name, cohortEntries.size()); + + for(CohortEntry cohortEntry: cohortEntries) { + if(cohortEntry.getReplySender() != null) { + cohortEntry.getReplySender().tell(new Failure(new RuntimeException(reason)), shard.self()); + } + } + } + + private List getAndClearPendingCohortEntries() { + List cohortEntries = new ArrayList<>(); + if(currentCohortEntry != null) { + cohortEntries.add(currentCohortEntry); + cohortCache.remove(currentCohortEntry.getTransactionID()); + currentCohortEntry = null; + } + + for(CohortEntry cohortEntry: queuedCohortEntries) { + cohortEntries.add(cohortEntry); + cohortCache.remove(cohortEntry.getTransactionID()); + } + + queuedCohortEntries.clear(); + return cohortEntries; + } + + Collection convertPendingTransactionsToMessages(final int maxModificationsPerBatch) { + if(currentCohortEntry == null && queuedCohortEntries.isEmpty()) { + return Collections.emptyList(); + } + + Collection messages = new ArrayList<>(); + List cohortEntries = getAndClearPendingCohortEntries(); + for(CohortEntry cohortEntry: cohortEntries) { + if(cohortEntry.isExpired(cacheExpiryTimeoutInMillis) || cohortEntry.isAborted()) { + continue; + } + + final LinkedList newModifications = new LinkedList<>(); + cohortEntry.getDataTreeModification().applyToCursor(new AbstractBatchedModificationsCursor() { + @Override + protected BatchedModifications getModifications() { + if(newModifications.isEmpty() || + newModifications.getLast().getModifications().size() >= maxModificationsPerBatch) { + newModifications.add(new BatchedModifications(cohortEntry.getTransactionID(), + cohortEntry.getClientVersion(), "")); + } + + return newModifications.getLast(); + } + }); + + if(!newModifications.isEmpty()) { + BatchedModifications last = newModifications.getLast(); + last.setDoCommitOnReady(cohortEntry.isDoImmediateCommit()); + last.setReady(true); + last.setTotalMessagesSent(newModifications.size()); + messages.addAll(newModifications); + + if(!cohortEntry.isDoImmediateCommit() && cohortEntry.getState() == State.CAN_COMMITTED) { + messages.add(new CanCommitTransaction(cohortEntry.getTransactionID(), + cohortEntry.getClientVersion())); + } + + if(!cohortEntry.isDoImmediateCommit() && cohortEntry.getState() == State.PRE_COMMITTED) { + messages.add(new CommitTransaction(cohortEntry.getTransactionID(), + cohortEntry.getClientVersion())); + } + } + } + + return messages; + } + /** * Returns the cohort entry for the Tx commit currently in progress if the given transaction ID * matches the current entry. @@ -417,7 +574,7 @@ class ShardCommitCoordinator { * @return the current CohortEntry or null if the given transaction ID does not match the * current entry. */ - public CohortEntry getCohortEntryIfCurrent(String transactionID) { + CohortEntry getCohortEntryIfCurrent(String transactionID) { if(isCurrentTransaction(transactionID)) { return currentCohortEntry; } @@ -425,15 +582,15 @@ class ShardCommitCoordinator { return null; } - public CohortEntry getCurrentCohortEntry() { + CohortEntry getCurrentCohortEntry() { return currentCohortEntry; } - public CohortEntry getAndRemoveCohortEntry(String transactionID) { + CohortEntry getAndRemoveCohortEntry(String transactionID) { return cohortCache.remove(transactionID); } - public boolean isCurrentTransaction(String transactionID) { + boolean isCurrentTransaction(String transactionID) { return currentCohortEntry != null && currentCohortEntry.getTransactionID().equals(transactionID); } @@ -447,7 +604,7 @@ class ShardCommitCoordinator { * @param removeCohortEntry if true the CohortEntry for the transaction is also removed from * the cache. */ - public void currentTransactionComplete(String transactionID, boolean removeCohortEntry) { + void currentTransactionComplete(String transactionID, boolean removeCohortEntry) { if(removeCohortEntry) { cohortCache.remove(transactionID); } @@ -483,25 +640,49 @@ class ShardCommitCoordinator { } else if(next.isExpired(cacheExpiryTimeoutInMillis)) { log.warn("{}: canCommit for transaction {} was not received within {} ms - entry removed from cache", name, next.getTransactionID(), cacheExpiryTimeoutInMillis); - - iter.remove(); - cohortCache.remove(next.getTransactionID()); - } else { + } else if(!next.isAborted()) { break; } + + iter.remove(); + cohortCache.remove(next.getTransactionID()); } + + maybeRunOperationOnPendingTransactionsComplete(); } void cleanupExpiredCohortEntries() { maybeProcessNextCohortEntry(); } + void setRunOnPendingTransactionsComplete(Runnable operation) { + runOnPendingTransactionsComplete = operation; + maybeRunOperationOnPendingTransactionsComplete(); + } + + private void maybeRunOperationOnPendingTransactionsComplete() { + if(runOnPendingTransactionsComplete != null && currentCohortEntry == null && queuedCohortEntries.isEmpty()) { + log.debug("{}: Pending transactions complete - running operation {}", name, runOnPendingTransactionsComplete); + + runOnPendingTransactionsComplete.run(); + runOnPendingTransactionsComplete = null; + } + } + @VisibleForTesting void setCohortDecorator(CohortDecorator cohortDecorator) { this.cohortDecorator = cohortDecorator; } static class CohortEntry { + enum State { + PENDING, + CAN_COMMITTED, + PRE_COMMITTED, + COMMITTED, + ABORTED + } + private final String transactionID; private ShardDataTreeCohort cohort; private final ReadWriteShardDataTreeTransaction transaction; @@ -511,23 +692,20 @@ class ShardCommitCoordinator { private boolean doImmediateCommit; private final Stopwatch lastAccessTimer = Stopwatch.createStarted(); private int totalBatchedModificationsReceived; + private State state = State.PENDING; + private final short clientVersion; - CohortEntry(String transactionID, ReadWriteShardDataTreeTransaction transaction) { + CohortEntry(String transactionID, ReadWriteShardDataTreeTransaction transaction, short clientVersion) { this.transaction = Preconditions.checkNotNull(transaction); this.transactionID = transactionID; + this.clientVersion = clientVersion; } - CohortEntry(String transactionID, ShardDataTreeCohort cohort, - MutableCompositeModification compositeModification) { - this.transactionID = transactionID; - this.cohort = cohort; - this.transaction = null; - } - - CohortEntry(String transactionID, ShardDataTreeCohort cohort) { + CohortEntry(String transactionID, ShardDataTreeCohort cohort, short clientVersion) { this.transactionID = transactionID; this.cohort = cohort; this.transaction = null; + this.clientVersion = clientVersion; } void updateLastAccessTime() { @@ -539,8 +717,24 @@ class ShardCommitCoordinator { return transactionID; } - ShardDataTreeCohort getCohort() { - return cohort; + short getClientVersion() { + return clientVersion; + } + + State getState() { + return state; + } + + DataTreeCandidate getCandidate() { + return cohort.getCandidate(); + } + + DataTreeModification getDataTreeModification() { + return cohort.getDataTreeModification(); + } + + ReadWriteShardDataTreeTransaction getTransaction() { + return transaction; } int getTotalBatchedModificationsReceived() { @@ -565,6 +759,32 @@ class ShardCommitCoordinator { } } + boolean canCommit() throws InterruptedException, ExecutionException { + state = State.CAN_COMMITTED; + + // We block on the future here (and also preCommit(), commit(), abort()) so we don't have to worry + // about possibly accessing our state on a different thread outside of our dispatcher. + // TODO: the ShardDataTreeCohort returns immediate Futures anyway which begs the question - why + // bother even returning Futures from ShardDataTreeCohort if we have to treat them synchronously + // anyway?. The Futures are really a remnant from when we were using the InMemoryDataBroker. + return cohort.canCommit().get(); + } + + void preCommit() throws InterruptedException, ExecutionException { + state = State.PRE_COMMITTED; + cohort.preCommit().get(); + } + + void commit() throws InterruptedException, ExecutionException { + state = State.COMMITTED; + cohort.commit().get(); + } + + void abort() throws InterruptedException, ExecutionException { + state = State.ABORTED; + cohort.abort().get(); + } + void ready(CohortDecorator cohortDecorator, boolean doImmediateCommit) { Preconditions.checkState(cohort == null, "cohort was already set"); @@ -610,6 +830,11 @@ class ShardCommitCoordinator { this.shard = shard; } + + boolean isAborted() { + return state == State.ABORTED; + } + @Override public String toString() { StringBuilder builder = new StringBuilder();