X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FShardCommitCoordinator.java;h=30947fa6662b4a56d5b091cfe3133d019c9f9a24;hp=5d0ca38d6a2f1a1398161c7e73246a47f7b216f6;hb=107324809285bfbb9890cba38ffa18390f8de4bd;hpb=3e77d4e181b0024936084e10d55ae0d7f285b5ad diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java index 5d0ca38d6a..30947fa666 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardCommitCoordinator.java @@ -9,17 +9,27 @@ package org.opendaylight.controller.cluster.datastore; import akka.actor.ActorRef; import akka.actor.Status; +import akka.serialization.Serialization; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; +import com.google.common.cache.RemovalCause; +import com.google.common.cache.RemovalListener; +import com.google.common.cache.RemovalNotification; import java.util.LinkedList; import java.util.Queue; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransaction; +import org.opendaylight.controller.cluster.datastore.compat.BackwardsCompatibleThreePhaseCommitCohort; +import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications; +import org.opendaylight.controller.cluster.datastore.messages.BatchedModificationsReply; import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransactionReply; -import org.opendaylight.controller.cluster.datastore.modification.CompositeModification; +import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction; +import org.opendaylight.controller.cluster.datastore.messages.ReadyTransactionReply; import org.opendaylight.controller.cluster.datastore.modification.Modification; -import org.opendaylight.controller.sal.core.spi.data.DOMStoreThreePhaseCommitCohort; +import org.opendaylight.controller.cluster.datastore.modification.MutableCompositeModification; +import org.opendaylight.controller.md.sal.common.api.data.TransactionCommitFailedException; import org.slf4j.Logger; /** @@ -29,10 +39,17 @@ import org.slf4j.Logger; */ public class ShardCommitCoordinator { + // Interface hook for unit tests to replace or decorate the DOMStoreThreePhaseCommitCohorts. + public interface CohortDecorator { + ShardDataTreeCohort decorate(String transactionID, ShardDataTreeCohort actual); + } + private final Cache cohortCache; private CohortEntry currentCohortEntry; + private final ShardDataTree dataTree; + private final Queue queuedCohortEntries; private int queueCapacity; @@ -41,14 +58,31 @@ public class ShardCommitCoordinator { private final String name; - public ShardCommitCoordinator(long cacheExpiryTimeoutInSec, int queueCapacity, Logger log, - String name) { - cohortCache = CacheBuilder.newBuilder().expireAfterAccess( - cacheExpiryTimeoutInSec, TimeUnit.SECONDS).build(); + private final RemovalListener cacheRemovalListener = + new RemovalListener() { + @Override + public void onRemoval(RemovalNotification notification) { + if(notification.getCause() == RemovalCause.EXPIRED) { + log.warn("{}: Transaction {} was timed out of the cache", name, notification.getKey()); + } + } + }; + + // This is a hook for unit tests to replace or decorate the DOMStoreThreePhaseCommitCohorts. + private CohortDecorator cohortDecorator; + + private ReadyTransactionReply readyTransactionReply; + + public ShardCommitCoordinator(ShardDataTree dataTree, + long cacheExpiryTimeoutInSec, int queueCapacity, ActorRef shardActor, Logger log, String name) { this.queueCapacity = queueCapacity; this.log = log; this.name = name; + this.dataTree = Preconditions.checkNotNull(dataTree); + + cohortCache = CacheBuilder.newBuilder().expireAfterAccess(cacheExpiryTimeoutInSec, TimeUnit.SECONDS). + removalListener(cacheRemovalListener).build(); // We use a LinkedList here to avoid synchronization overhead with concurrent queue impls // since this should only be accessed on the shard's dispatcher. @@ -59,50 +93,114 @@ public class ShardCommitCoordinator { this.queueCapacity = queueCapacity; } + private ReadyTransactionReply readyTransactionReply(Shard shard) { + if(readyTransactionReply == null) { + readyTransactionReply = new ReadyTransactionReply(Serialization.serializedActorPath(shard.self())); + } + + return readyTransactionReply; + } + /** - * This method caches a cohort entry for the given transactions ID in preparation for the - * subsequent 3-phase commit. - * - * @param transactionID the ID of the transaction - * @param cohort the cohort to participate in the transaction commit - * @param modification the modification made by the transaction + * This method is called to ready a transaction that was prepared by ShardTransaction actor. It caches + * the prepared cohort entry for the given transactions ID in preparation for the subsequent 3-phase commit. */ - public void transactionReady(String transactionID, DOMStoreThreePhaseCommitCohort cohort, - Modification modification) { + public void handleForwardedReadyTransaction(ForwardedReadyTransaction ready, ActorRef sender, Shard shard) { + log.debug("{}: Readying transaction {}, client version {}", name, + ready.getTransactionID(), ready.getTxnClientVersion()); + + CohortEntry cohortEntry = new CohortEntry(ready.getTransactionID(), ready.getCohort(), + (MutableCompositeModification) ready.getModification()); + cohortCache.put(ready.getTransactionID(), cohortEntry); + + if(ready.getTxnClientVersion() < DataStoreVersions.LITHIUM_VERSION) { + // Return our actor path as we'll handle the three phase commit except if the Tx client + // version < Helium-1 version which means the Tx was initiated by a base Helium version node. + // In that case, the subsequent 3-phase commit messages won't contain the transactionId so to + // maintain backwards compatibility, we create a separate cohort actor to provide the compatible behavior. + ActorRef replyActorPath = shard.self(); + if(ready.getTxnClientVersion() < DataStoreVersions.HELIUM_1_VERSION) { + log.debug("{}: Creating BackwardsCompatibleThreePhaseCommitCohort", name); + replyActorPath = shard.getContext().actorOf(BackwardsCompatibleThreePhaseCommitCohort.props( + ready.getTransactionID())); + } - cohortCache.put(transactionID, new CohortEntry(transactionID, cohort, modification)); + ReadyTransactionReply readyTransactionReply = + new ReadyTransactionReply(Serialization.serializedActorPath(replyActorPath), + ready.getTxnClientVersion()); + sender.tell(ready.isReturnSerialized() ? readyTransactionReply.toSerializable() : + readyTransactionReply, shard.self()); + } else { + if(ready.isDoImmediateCommit()) { + cohortEntry.setDoImmediateCommit(true); + cohortEntry.setReplySender(sender); + cohortEntry.setShard(shard); + handleCanCommit(cohortEntry); + } else { + // The caller does not want immediate commit - the 3-phase commit will be coordinated by the + // front-end so send back a ReadyTransactionReply with our actor path. + sender.tell(readyTransactionReply(shard), shard.self()); + } + } } /** - * This method handles the canCommit phase for a transaction. + * This method handles a BatchedModifications message for a transaction being prepared directly on the + * Shard actor instead of via a ShardTransaction actor. If there's no currently cached + * DOMStoreWriteTransaction, one is created. The batched modifications are applied to the write Tx. If + * the BatchedModifications is ready to commit then a DOMStoreThreePhaseCommitCohort is created. * - * @param canCommit the CanCommitTransaction message - * @param sender the actor that sent the message - * @param shard the transaction's shard actor + * @param batched the BatchedModifications + * @param shardActor the transaction's shard actor + * + * @throws ExecutionException if an error occurs loading the cache */ - public void handleCanCommit(CanCommitTransaction canCommit, final ActorRef sender, - final ActorRef shard) { - String transactionID = canCommit.getTransactionID(); + boolean handleBatchedModifications(BatchedModifications batched, ActorRef sender, Shard shard) + throws ExecutionException { + CohortEntry cohortEntry = cohortCache.getIfPresent(batched.getTransactionID()); + if(cohortEntry == null) { + cohortEntry = new CohortEntry(batched.getTransactionID(), + dataTree.newReadWriteTransaction(batched.getTransactionID(), + batched.getTransactionChainID())); + cohortCache.put(batched.getTransactionID(), cohortEntry); + } + if(log.isDebugEnabled()) { - log.debug("{}: Processing canCommit for transaction {} for shard {}", - name, transactionID, shard.path()); + log.debug("{}: Applying {} batched modifications for Tx {}", name, + batched.getModifications().size(), batched.getTransactionID()); } - // Lookup the cohort entry that was cached previously (or should have been) by - // transactionReady (via the ForwardedReadyTransaction message). - final CohortEntry cohortEntry = cohortCache.getIfPresent(transactionID); - if(cohortEntry == null) { - // Either canCommit was invoked before ready(shouldn't happen) or a long time passed - // between canCommit and ready and the entry was expired from the cache. - IllegalStateException ex = new IllegalStateException( - String.format("%s: No cohort entry found for transaction %s", name, transactionID)); - log.error(ex.getMessage()); - sender.tell(new Status.Failure(ex), shard); - return; + cohortEntry.applyModifications(batched.getModifications()); + + if(batched.isReady()) { + if(log.isDebugEnabled()) { + log.debug("{}: Readying Tx {}, client version {}", name, + batched.getTransactionID(), batched.getVersion()); + } + + cohortEntry.ready(cohortDecorator, batched.isDoCommitOnReady()); + + if(batched.isDoCommitOnReady()) { + cohortEntry.setReplySender(sender); + cohortEntry.setShard(shard); + handleCanCommit(cohortEntry); + } else { + sender.tell(readyTransactionReply(shard), shard.self()); + } + } else { + sender.tell(new BatchedModificationsReply(batched.getModifications().size()), shard.self()); } - cohortEntry.setCanCommitSender(sender); - cohortEntry.setShard(shard); + return batched.isReady(); + } + + private void handleCanCommit(CohortEntry cohortEntry) { + String transactionID = cohortEntry.getTransactionID(); + + if(log.isDebugEnabled()) { + log.debug("{}: Processing canCommit for transaction {} for shard {}", + name, transactionID, cohortEntry.getShard().self().path()); + } if(currentCohortEntry != null) { // There's already a Tx commit in progress - attempt to queue this entry to be @@ -120,7 +218,7 @@ public class ShardCommitCoordinator { " capacity %d has been reached.", name, transactionID, queueCapacity)); log.error(ex.getMessage()); - sender.tell(new Status.Failure(ex), shard); + cohortEntry.getReplySender().tell(new Status.Failure(ex), cohortEntry.getShard().self()); } } else { // No Tx commit currently in progress - make this the current entry and proceed with @@ -132,29 +230,119 @@ public class ShardCommitCoordinator { } } + /** + * This method handles the canCommit phase for a transaction. + * + * @param canCommit the CanCommitTransaction message + * @param sender the actor that sent the message + * @param shard the transaction's shard actor + */ + public void handleCanCommit(String transactionID, final ActorRef sender, final Shard shard) { + // Lookup the cohort entry that was cached previously (or should have been) by + // transactionReady (via the ForwardedReadyTransaction message). + final CohortEntry cohortEntry = cohortCache.getIfPresent(transactionID); + if(cohortEntry == null) { + // Either canCommit was invoked before ready(shouldn't happen) or a long time passed + // between canCommit and ready and the entry was expired from the cache. + IllegalStateException ex = new IllegalStateException( + String.format("%s: No cohort entry found for transaction %s", name, transactionID)); + log.error(ex.getMessage()); + sender.tell(new Status.Failure(ex), shard.self()); + return; + } + + cohortEntry.setReplySender(sender); + cohortEntry.setShard(shard); + + handleCanCommit(cohortEntry); + } + private void doCanCommit(final CohortEntry cohortEntry) { + boolean canCommit = false; try { // We block on the future here so we don't have to worry about possibly accessing our // state on a different thread outside of our dispatcher. Also, the data store // currently uses a same thread executor anyway. - Boolean canCommit = cohortEntry.getCohort().canCommit().get(); + canCommit = cohortEntry.getCohort().canCommit().get(); + + if(cohortEntry.isDoImmediateCommit()) { + if(canCommit) { + doCommit(cohortEntry); + } else { + cohortEntry.getReplySender().tell(new Status.Failure(new TransactionCommitFailedException( + "Can Commit failed, no detailed cause available.")), cohortEntry.getShard().self()); + } + } else { + cohortEntry.getReplySender().tell( + canCommit ? CanCommitTransactionReply.YES.toSerializable() : + CanCommitTransactionReply.NO.toSerializable(), cohortEntry.getShard().self()); + } + } catch (Exception e) { + log.debug("{}: An exception occurred during canCommit: {}", name, e); - cohortEntry.getCanCommitSender().tell( - canCommit ? CanCommitTransactionReply.YES.toSerializable() : - CanCommitTransactionReply.NO.toSerializable(), cohortEntry.getShard()); + Throwable failure = e; + if(e instanceof ExecutionException) { + failure = e.getCause(); + } + cohortEntry.getReplySender().tell(new Status.Failure(failure), cohortEntry.getShard().self()); + } finally { if(!canCommit) { - // Remove the entry from the cache now since the Tx will be aborted. - removeCohortEntry(cohortEntry.getTransactionID()); + // Remove the entry from the cache now. + currentTransactionComplete(cohortEntry.getTransactionID(), true); } - } catch (InterruptedException | ExecutionException e) { - log.debug("{}: An exception occurred during canCommit: {}", name, e); + } + } + + private boolean doCommit(CohortEntry cohortEntry) { + log.debug("{}: Committing transaction {}", name, cohortEntry.getTransactionID()); + + boolean success = false; + + // We perform the preCommit phase here atomically with the commit phase. This is an + // optimization to eliminate the overhead of an extra preCommit message. We lose front-end + // coordination of preCommit across shards in case of failure but preCommit should not + // normally fail since we ensure only one concurrent 3-phase commit. + + try { + // We block on the future here so we don't have to worry about possibly accessing our + // state on a different thread outside of our dispatcher. Also, the data store + // currently uses a same thread executor anyway. + cohortEntry.getCohort().preCommit().get(); + + cohortEntry.getShard().continueCommit(cohortEntry); + + cohortEntry.updateLastAccessTime(); + + success = true; + } catch (Exception e) { + log.error("{} An exception occurred while preCommitting transaction {}", + name, cohortEntry.getTransactionID(), e); + cohortEntry.getReplySender().tell(new akka.actor.Status.Failure(e), cohortEntry.getShard().self()); - // Remove the entry from the cache now since the Tx will be aborted. - removeCohortEntry(cohortEntry.getTransactionID()); - cohortEntry.getCanCommitSender().tell(new Status.Failure(e), cohortEntry.getShard()); + currentTransactionComplete(cohortEntry.getTransactionID(), true); } + + return success; + } + + boolean handleCommit(final String transactionID, final ActorRef sender, final Shard shard) { + // Get the current in-progress cohort entry in the commitCoordinator if it corresponds to + // this transaction. + final CohortEntry cohortEntry = getCohortEntryIfCurrent(transactionID); + if(cohortEntry == null) { + // We're not the current Tx - the Tx was likely expired b/c it took too long in + // between the canCommit and commit messages. + IllegalStateException ex = new IllegalStateException( + String.format("%s: Cannot commit transaction %s - it is not the current transaction", + name, transactionID)); + log.error(ex.getMessage()); + sender.tell(new akka.actor.Status.Failure(ex), shard.self()); + return false; + } + + return doCommit(cohortEntry); } /** @@ -216,19 +404,31 @@ public class ShardCommitCoordinator { } } + @VisibleForTesting + void setCohortDecorator(CohortDecorator cohortDecorator) { + this.cohortDecorator = cohortDecorator; + } + + static class CohortEntry { private final String transactionID; - private final DOMStoreThreePhaseCommitCohort cohort; - private final Modification modification; - private ActorRef canCommitSender; - private ActorRef shard; + private ShardDataTreeCohort cohort; + private final ReadWriteShardDataTreeTransaction transaction; + private ActorRef replySender; + private Shard shard; private long lastAccessTime; + private boolean doImmediateCommit; + + CohortEntry(String transactionID, ReadWriteShardDataTreeTransaction transaction) { + this.transaction = Preconditions.checkNotNull(transaction); + this.transactionID = transactionID; + } - CohortEntry(String transactionID, DOMStoreThreePhaseCommitCohort cohort, - Modification modification) { + CohortEntry(String transactionID, ShardDataTreeCohort cohort, + MutableCompositeModification compositeModification) { this.transactionID = transactionID; this.cohort = cohort; - this.modification = modification; + this.transaction = null; } void updateLastAccessTime() { @@ -243,35 +443,51 @@ public class ShardCommitCoordinator { return transactionID; } - DOMStoreThreePhaseCommitCohort getCohort() { + ShardDataTreeCohort getCohort() { return cohort; } - Modification getModification() { - return modification; + void applyModifications(Iterable modifications) { + for (Modification modification : modifications) { + modification.apply(transaction.getSnapshot()); + } } - ActorRef getCanCommitSender() { - return canCommitSender; + void ready(CohortDecorator cohortDecorator, boolean doImmediateCommit) { + Preconditions.checkState(cohort == null, "cohort was already set"); + + setDoImmediateCommit(doImmediateCommit); + + cohort = transaction.ready(); + + if(cohortDecorator != null) { + // Call the hook for unit tests. + cohort = cohortDecorator.decorate(transactionID, cohort); + } } - void setCanCommitSender(ActorRef canCommitSender) { - this.canCommitSender = canCommitSender; + boolean isDoImmediateCommit() { + return doImmediateCommit; } - ActorRef getShard() { - return shard; + void setDoImmediateCommit(boolean doImmediateCommit) { + this.doImmediateCommit = doImmediateCommit; } - void setShard(ActorRef shard) { - this.shard = shard; + ActorRef getReplySender() { + return replySender; } - boolean hasModifications(){ - if(modification instanceof CompositeModification){ - return ((CompositeModification) modification).getModifications().size() > 0; - } - return true; + void setReplySender(ActorRef replySender) { + this.replySender = replySender; + } + + Shard getShard() { + return shard; + } + + void setShard(Shard shard) { + this.shard = shard; } } }