X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FShardCommitCoordinator.java;h=0d63115754698688f5c64fba9d46d63b93336f87;hp=30947fa6662b4a56d5b091cfe3133d019c9f9a24;hb=c98e417bc397d599f7e5f8a56af91e479e0a6e5f;hpb=559c2b6afa7714572e01b52029acaa4d5a7315e2 diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java index 30947fa666..0d63115754 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java @@ -12,24 +12,25 @@ import akka.actor.Status; import akka.serialization.Serialization; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.RemovalCause; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; +import com.google.common.base.Stopwatch; +import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedList; +import java.util.Map; import java.util.Queue; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import org.opendaylight.controller.cluster.datastore.compat.BackwardsCompatibleThreePhaseCommitCohort; +import org.opendaylight.controller.cluster.datastore.messages.AbortTransactionReply; import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications; import org.opendaylight.controller.cluster.datastore.messages.BatchedModificationsReply; import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransactionReply; import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction; +import org.opendaylight.controller.cluster.datastore.messages.ReadyLocalTransaction; import org.opendaylight.controller.cluster.datastore.messages.ReadyTransactionReply; import org.opendaylight.controller.cluster.datastore.modification.Modification; -import org.opendaylight.controller.cluster.datastore.modification.MutableCompositeModification; import org.opendaylight.controller.md.sal.common.api.data.TransactionCommitFailedException; +import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTreeCandidate; import org.slf4j.Logger; /** @@ -37,20 +38,22 @@ import org.slf4j.Logger; * * @author Thomas Pantelis */ -public class ShardCommitCoordinator { +class ShardCommitCoordinator { // Interface hook for unit tests to replace or decorate the DOMStoreThreePhaseCommitCohorts. public interface CohortDecorator { ShardDataTreeCohort decorate(String transactionID, ShardDataTreeCohort actual); } - private final Cache cohortCache; + private final Map cohortCache = new HashMap<>(); private CohortEntry currentCohortEntry; private final ShardDataTree dataTree; - private final Queue queuedCohortEntries; + // We use a LinkedList here to avoid synchronization overhead with concurrent queue impls + // since this should only be accessed on the shard's dispatcher. + private final Queue queuedCohortEntries = new LinkedList<>(); private int queueCapacity; @@ -58,38 +61,28 @@ public class ShardCommitCoordinator { private final String name; - private final RemovalListener cacheRemovalListener = - new RemovalListener() { - @Override - public void onRemoval(RemovalNotification notification) { - if(notification.getCause() == RemovalCause.EXPIRED) { - log.warn("{}: Transaction {} was timed out of the cache", name, notification.getKey()); - } - } - }; + private final long cacheExpiryTimeoutInMillis; // This is a hook for unit tests to replace or decorate the DOMStoreThreePhaseCommitCohorts. private CohortDecorator cohortDecorator; private ReadyTransactionReply readyTransactionReply; - public ShardCommitCoordinator(ShardDataTree dataTree, - long cacheExpiryTimeoutInSec, int queueCapacity, ActorRef shardActor, Logger log, String name) { + ShardCommitCoordinator(ShardDataTree dataTree, + long cacheExpiryTimeoutInMillis, int queueCapacity, Logger log, String name) { this.queueCapacity = queueCapacity; this.log = log; this.name = name; this.dataTree = Preconditions.checkNotNull(dataTree); + this.cacheExpiryTimeoutInMillis = cacheExpiryTimeoutInMillis; + } - cohortCache = CacheBuilder.newBuilder().expireAfterAccess(cacheExpiryTimeoutInSec, TimeUnit.SECONDS). - removalListener(cacheRemovalListener).build(); - - // We use a LinkedList here to avoid synchronization overhead with concurrent queue impls - // since this should only be accessed on the shard's dispatcher. - queuedCohortEntries = new LinkedList<>(); + int getQueueSize() { + return queuedCohortEntries.size(); } - public void setQueueCapacity(int queueCapacity) { + void setQueueCapacity(int queueCapacity) { this.queueCapacity = queueCapacity; } @@ -101,18 +94,46 @@ public class ShardCommitCoordinator { return readyTransactionReply; } + private boolean queueCohortEntry(CohortEntry cohortEntry, ActorRef sender, Shard shard) { + if(queuedCohortEntries.size() < queueCapacity) { + queuedCohortEntries.offer(cohortEntry); + + log.debug("{}: Enqueued transaction {}, queue size {}", name, cohortEntry.getTransactionID(), + queuedCohortEntries.size()); + + return true; + } else { + cohortCache.remove(cohortEntry.getTransactionID()); + + RuntimeException ex = new RuntimeException( + String.format("%s: Could not enqueue transaction %s - the maximum commit queue"+ + " capacity %d has been reached.", + name, cohortEntry.getTransactionID(), queueCapacity)); + log.error(ex.getMessage()); + sender.tell(new Status.Failure(ex), shard.self()); + return false; + } + } + /** * This method is called to ready a transaction that was prepared by ShardTransaction actor. It caches * the prepared cohort entry for the given transactions ID in preparation for the subsequent 3-phase commit. + * + * @param ready the ForwardedReadyTransaction message to process + * @param sender the sender of the message + * @param shard the transaction's shard actor */ - public void handleForwardedReadyTransaction(ForwardedReadyTransaction ready, ActorRef sender, Shard shard) { + void handleForwardedReadyTransaction(ForwardedReadyTransaction ready, ActorRef sender, Shard shard) { log.debug("{}: Readying transaction {}, client version {}", name, ready.getTransactionID(), ready.getTxnClientVersion()); - CohortEntry cohortEntry = new CohortEntry(ready.getTransactionID(), ready.getCohort(), - (MutableCompositeModification) ready.getModification()); + CohortEntry cohortEntry = new CohortEntry(ready.getTransactionID(), ready.getCohort()); cohortCache.put(ready.getTransactionID(), cohortEntry); + if(!queueCohortEntry(cohortEntry, sender, shard)) { + return; + } + if(ready.getTxnClientVersion() < DataStoreVersions.LITHIUM_VERSION) { // Return our actor path as we'll handle the three phase commit except if the Tx client // version < Helium-1 version which means the Tx was initiated by a base Helium version node. @@ -150,14 +171,12 @@ public class ShardCommitCoordinator { * DOMStoreWriteTransaction, one is created. The batched modifications are applied to the write Tx. If * the BatchedModifications is ready to commit then a DOMStoreThreePhaseCommitCohort is created. * - * @param batched the BatchedModifications - * @param shardActor the transaction's shard actor - * - * @throws ExecutionException if an error occurs loading the cache + * @param batched the BatchedModifications message to process + * @param sender the sender of the message + * @param shard the transaction's shard actor */ - boolean handleBatchedModifications(BatchedModifications batched, ActorRef sender, Shard shard) - throws ExecutionException { - CohortEntry cohortEntry = cohortCache.getIfPresent(batched.getTransactionID()); + void handleBatchedModifications(BatchedModifications batched, ActorRef sender, Shard shard) { + CohortEntry cohortEntry = cohortCache.get(batched.getTransactionID()); if(cohortEntry == null) { cohortEntry = new CohortEntry(batched.getTransactionID(), dataTree.newReadWriteTransaction(batched.getTransactionID(), @@ -173,6 +192,22 @@ public class ShardCommitCoordinator { cohortEntry.applyModifications(batched.getModifications()); if(batched.isReady()) { + if(cohortEntry.getLastBatchedModificationsException() != null) { + cohortCache.remove(cohortEntry.getTransactionID()); + throw cohortEntry.getLastBatchedModificationsException(); + } + + if(cohortEntry.getTotalBatchedModificationsReceived() != batched.getTotalMessagesSent()) { + cohortCache.remove(cohortEntry.getTransactionID()); + throw new IllegalStateException(String.format( + "The total number of batched messages received %d does not match the number sent %d", + cohortEntry.getTotalBatchedModificationsReceived(), batched.getTotalMessagesSent())); + } + + if(!queueCohortEntry(cohortEntry, sender, shard)) { + return; + } + if(log.isDebugEnabled()) { log.debug("{}: Readying Tx {}, client version {}", name, batched.getTransactionID(), batched.getVersion()); @@ -190,57 +225,80 @@ public class ShardCommitCoordinator { } else { sender.tell(new BatchedModificationsReply(batched.getModifications().size()), shard.self()); } + } + + /** + * This method handles {@link ReadyLocalTransaction} message. All transaction modifications have + * been prepared beforehand by the sender and we just need to drive them through into the dataTree. + * + * @param message the ReadyLocalTransaction message to process + * @param sender the sender of the message + * @param shard the transaction's shard actor + */ + void handleReadyLocalTransaction(ReadyLocalTransaction message, ActorRef sender, Shard shard) { + final ShardDataTreeCohort cohort = new SimpleShardDataTreeCohort(dataTree, message.getModification(), + message.getTransactionID()); + final CohortEntry cohortEntry = new CohortEntry(message.getTransactionID(), cohort); + cohortCache.put(message.getTransactionID(), cohortEntry); + cohortEntry.setDoImmediateCommit(message.isDoCommitOnReady()); + + if(!queueCohortEntry(cohortEntry, sender, shard)) { + return; + } - return batched.isReady(); + log.debug("{}: Applying local modifications for Tx {}", name, message.getTransactionID()); + + if (message.isDoCommitOnReady()) { + cohortEntry.setReplySender(sender); + cohortEntry.setShard(shard); + handleCanCommit(cohortEntry); + } else { + sender.tell(readyTransactionReply(shard), shard.self()); + } } private void handleCanCommit(CohortEntry cohortEntry) { String transactionID = cohortEntry.getTransactionID(); - if(log.isDebugEnabled()) { - log.debug("{}: Processing canCommit for transaction {} for shard {}", - name, transactionID, cohortEntry.getShard().self().path()); - } + cohortEntry.updateLastAccessTime(); if(currentCohortEntry != null) { - // There's already a Tx commit in progress - attempt to queue this entry to be - // committed after the current Tx completes. - log.debug("{}: Transaction {} is already in progress - queueing transaction {}", - name, currentCohortEntry.getTransactionID(), transactionID); + // There's already a Tx commit in progress so we can't process this entry yet - but it's in the + // queue and will get processed after all prior entries complete. - if(queuedCohortEntries.size() < queueCapacity) { - queuedCohortEntries.offer(cohortEntry); - } else { - removeCohortEntry(transactionID); - - RuntimeException ex = new RuntimeException( - String.format("%s: Could not enqueue transaction %s - the maximum commit queue"+ - " capacity %d has been reached.", - name, transactionID, queueCapacity)); - log.error(ex.getMessage()); - cohortEntry.getReplySender().tell(new Status.Failure(ex), cohortEntry.getShard().self()); + if(log.isDebugEnabled()) { + log.debug("{}: Commit for Tx {} already in progress - skipping canCommit for {} for now", + name, currentCohortEntry.getTransactionID(), transactionID); } - } else { - // No Tx commit currently in progress - make this the current entry and proceed with - // canCommit. - cohortEntry.updateLastAccessTime(); - currentCohortEntry = cohortEntry; - doCanCommit(cohortEntry); + return; + } + + // No Tx commit currently in progress - check if this entry is the next one in the queue, If so make + // it the current entry and proceed with canCommit. + // Purposely checking reference equality here. + if(queuedCohortEntries.peek() == cohortEntry) { + currentCohortEntry = queuedCohortEntries.poll(); + doCanCommit(currentCohortEntry); + } else { + if(log.isDebugEnabled()) { + log.debug("{}: Tx {} is the next pending canCommit - skipping {} for now", + name, queuedCohortEntries.peek().getTransactionID(), transactionID); + } } } /** * This method handles the canCommit phase for a transaction. * - * @param canCommit the CanCommitTransaction message - * @param sender the actor that sent the message + * @param transactionID the ID of the transaction to canCommit + * @param sender the actor to which to send the response * @param shard the transaction's shard actor */ - public void handleCanCommit(String transactionID, final ActorRef sender, final Shard shard) { + void handleCanCommit(String transactionID, final ActorRef sender, final Shard shard) { // Lookup the cohort entry that was cached previously (or should have been) by // transactionReady (via the ForwardedReadyTransaction message). - final CohortEntry cohortEntry = cohortCache.getIfPresent(transactionID); + final CohortEntry cohortEntry = cohortCache.get(transactionID); if(cohortEntry == null) { // Either canCommit was invoked before ready(shouldn't happen) or a long time passed // between canCommit and ready and the entry was expired from the cache. @@ -258,13 +316,11 @@ public class ShardCommitCoordinator { } private void doCanCommit(final CohortEntry cohortEntry) { - boolean canCommit = false; try { - // We block on the future here so we don't have to worry about possibly accessing our - // state on a different thread outside of our dispatcher. Also, the data store - // currently uses a same thread executor anyway. - canCommit = cohortEntry.getCohort().canCommit().get(); + canCommit = cohortEntry.canCommit(); + + log.debug("{}: canCommit for {}: {}", name, cohortEntry.getTransactionID(), canCommit); if(cohortEntry.isDoImmediateCommit()) { if(canCommit) { @@ -279,7 +335,7 @@ public class ShardCommitCoordinator { CanCommitTransactionReply.NO.toSerializable(), cohortEntry.getShard().self()); } } catch (Exception e) { - log.debug("{}: An exception occurred during canCommit: {}", name, e); + log.debug("{}: An exception occurred during canCommit", name, e); Throwable failure = e; if(e instanceof ExecutionException) { @@ -306,10 +362,7 @@ public class ShardCommitCoordinator { // normally fail since we ensure only one concurrent 3-phase commit. try { - // We block on the future here so we don't have to worry about possibly accessing our - // state on a different thread outside of our dispatcher. Also, the data store - // currently uses a same thread executor anyway. - cohortEntry.getCohort().preCommit().get(); + cohortEntry.preCommit(); cohortEntry.getShard().continueCommit(cohortEntry); @@ -327,6 +380,14 @@ public class ShardCommitCoordinator { return success; } + /** + * This method handles the preCommit and commit phases for a transaction. + * + * @param transactionID the ID of the transaction to commit + * @param sender the actor to which to send the response + * @param shard the transaction's shard actor + * @return true if the transaction was successfully prepared, false otherwise. + */ boolean handleCommit(final String transactionID, final ActorRef sender, final Shard shard) { // Get the current in-progress cohort entry in the commitCoordinator if it corresponds to // this transaction. @@ -342,9 +403,45 @@ public class ShardCommitCoordinator { return false; } + cohortEntry.setReplySender(sender); return doCommit(cohortEntry); } + void handleAbort(final String transactionID, final ActorRef sender, final Shard shard) { + CohortEntry cohortEntry = getCohortEntryIfCurrent(transactionID); + if(cohortEntry != null) { + // We don't remove the cached cohort entry here (ie pass false) in case the Tx was + // aborted during replication in which case we may still commit locally if replication + // succeeds. + currentTransactionComplete(transactionID, false); + } else { + cohortEntry = getAndRemoveCohortEntry(transactionID); + } + + if(cohortEntry == null) { + return; + } + + log.debug("{}: Aborting transaction {}", name, transactionID); + + final ActorRef self = shard.getSelf(); + try { + cohortEntry.abort(); + + shard.getShardMBean().incrementAbortTransactionsCount(); + + if(sender != null) { + sender.tell(new AbortTransactionReply().toSerializable(), self); + } + } catch (Exception e) { + log.error("{}: An exception happened during abort", name, e); + + if(sender != null) { + sender.tell(new akka.actor.Status.Failure(e), self); + } + } + } + /** * Returns the cohort entry for the Tx commit currently in progress if the given transaction ID * matches the current entry. @@ -366,13 +463,7 @@ public class ShardCommitCoordinator { } public CohortEntry getAndRemoveCohortEntry(String transactionID) { - CohortEntry cohortEntry = cohortCache.getIfPresent(transactionID); - cohortCache.invalidate(transactionID); - return cohortEntry; - } - - public void removeCohortEntry(String transactionID) { - cohortCache.invalidate(transactionID); + return cohortCache.remove(transactionID); } public boolean isCurrentTransaction(String transactionID) { @@ -391,68 +482,138 @@ public class ShardCommitCoordinator { */ public void currentTransactionComplete(String transactionID, boolean removeCohortEntry) { if(removeCohortEntry) { - removeCohortEntry(transactionID); + cohortCache.remove(transactionID); } if(isCurrentTransaction(transactionID)) { - // Dequeue the next cohort entry waiting in the queue. - currentCohortEntry = queuedCohortEntries.poll(); - if(currentCohortEntry != null) { - currentCohortEntry.updateLastAccessTime(); - doCanCommit(currentCohortEntry); + currentCohortEntry = null; + + log.debug("{}: currentTransactionComplete: {}", name, transactionID); + + maybeProcessNextCohortEntry(); + } + } + + private void maybeProcessNextCohortEntry() { + // Check if there's a next cohort entry waiting in the queue and if it is ready to commit. Also + // clean out expired entries. + Iterator iter = queuedCohortEntries.iterator(); + while(iter.hasNext()) { + CohortEntry next = iter.next(); + if(next.isReadyToCommit()) { + if(currentCohortEntry == null) { + if(log.isDebugEnabled()) { + log.debug("{}: Next entry to canCommit {}", name, next); + } + + iter.remove(); + currentCohortEntry = next; + currentCohortEntry.updateLastAccessTime(); + doCanCommit(currentCohortEntry); + } + + break; + } else if(next.isExpired(cacheExpiryTimeoutInMillis)) { + log.warn("{}: canCommit for transaction {} was not received within {} ms - entry removed from cache", + name, next.getTransactionID(), cacheExpiryTimeoutInMillis); + } else if(!next.isAborted()) { + break; } + + iter.remove(); + cohortCache.remove(next.getTransactionID()); } } + void cleanupExpiredCohortEntries() { + maybeProcessNextCohortEntry(); + } + @VisibleForTesting void setCohortDecorator(CohortDecorator cohortDecorator) { this.cohortDecorator = cohortDecorator; } - static class CohortEntry { private final String transactionID; private ShardDataTreeCohort cohort; private final ReadWriteShardDataTreeTransaction transaction; + private RuntimeException lastBatchedModificationsException; private ActorRef replySender; private Shard shard; - private long lastAccessTime; private boolean doImmediateCommit; + private final Stopwatch lastAccessTimer = Stopwatch.createStarted(); + private int totalBatchedModificationsReceived; + private boolean aborted; CohortEntry(String transactionID, ReadWriteShardDataTreeTransaction transaction) { this.transaction = Preconditions.checkNotNull(transaction); this.transactionID = transactionID; } - CohortEntry(String transactionID, ShardDataTreeCohort cohort, - MutableCompositeModification compositeModification) { + CohortEntry(String transactionID, ShardDataTreeCohort cohort) { this.transactionID = transactionID; this.cohort = cohort; this.transaction = null; } void updateLastAccessTime() { - lastAccessTime = System.currentTimeMillis(); - } - - long getLastAccessTime() { - return lastAccessTime; + lastAccessTimer.reset(); + lastAccessTimer.start(); } String getTransactionID() { return transactionID; } - ShardDataTreeCohort getCohort() { - return cohort; + DataTreeCandidate getCandidate() { + return cohort.getCandidate(); + } + + int getTotalBatchedModificationsReceived() { + return totalBatchedModificationsReceived; + } + + RuntimeException getLastBatchedModificationsException() { + return lastBatchedModificationsException; } void applyModifications(Iterable modifications) { - for (Modification modification : modifications) { - modification.apply(transaction.getSnapshot()); + totalBatchedModificationsReceived++; + if(lastBatchedModificationsException == null) { + for (Modification modification : modifications) { + try { + modification.apply(transaction.getSnapshot()); + } catch (RuntimeException e) { + lastBatchedModificationsException = e; + throw e; + } + } } } + boolean canCommit() throws InterruptedException, ExecutionException { + // We block on the future here (and also preCommit(), commit(), abort()) so we don't have to worry + // about possibly accessing our state on a different thread outside of our dispatcher. + // TODO: the ShardDataTreeCohort returns immediate Futures anyway which begs the question - why + // bother even returning Futures from ShardDataTreeCohort if we have to treat them synchronously + // anyway?. The Futures are really a remnant from when we were using the InMemoryDataBroker. + return cohort.canCommit().get(); + } + + void preCommit() throws InterruptedException, ExecutionException { + cohort.preCommit().get(); + } + + void commit() throws InterruptedException, ExecutionException { + cohort.commit().get(); + } + + void abort() throws InterruptedException, ExecutionException { + aborted = true; + cohort.abort().get(); + } + void ready(CohortDecorator cohortDecorator, boolean doImmediateCommit) { Preconditions.checkState(cohort == null, "cohort was already set"); @@ -466,6 +627,14 @@ public class ShardCommitCoordinator { } } + boolean isReadyToCommit() { + return replySender != null; + } + + boolean isExpired(long expireTimeInMillis) { + return lastAccessTimer.elapsed(TimeUnit.MILLISECONDS) >= expireTimeInMillis; + } + boolean isDoImmediateCommit() { return doImmediateCommit; } @@ -489,5 +658,18 @@ public class ShardCommitCoordinator { void setShard(Shard shard) { this.shard = shard; } + + + boolean isAborted() { + return aborted; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("CohortEntry [transactionID=").append(transactionID).append(", doImmediateCommit=") + .append(doImmediateCommit).append("]"); + return builder.toString(); + } } }