X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FRemoteTransactionContextSupport.java;h=4a031fa9160d55ae341e2ea50a4539513dc7be96;hp=984d650a32fcd52db6749e48aa820d7b47b6975e;hb=8ec73bf853a9b6708b455c0321a585992e02b125;hpb=d71b6614d6cdb5a98f086edeb56f5c52f365c61c diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/RemoteTransactionContextSupport.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/RemoteTransactionContextSupport.java index 984d650a32..4a031fa916 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/RemoteTransactionContextSupport.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/RemoteTransactionContextSupport.java @@ -10,13 +10,16 @@ package org.opendaylight.controller.cluster.datastore; import akka.actor.ActorSelection; import akka.dispatch.OnComplete; +import akka.pattern.AskTimeoutException; +import akka.util.Timeout; import com.google.common.base.Preconditions; import java.util.concurrent.TimeUnit; -import org.opendaylight.controller.cluster.datastore.compat.PreLithiumTransactionContextImpl; import org.opendaylight.controller.cluster.datastore.exceptions.NoShardLeaderException; +import org.opendaylight.controller.cluster.datastore.exceptions.ShardLeaderNotRespondingException; import org.opendaylight.controller.cluster.datastore.identifiers.TransactionIdentifier; import org.opendaylight.controller.cluster.datastore.messages.CreateTransaction; import org.opendaylight.controller.cluster.datastore.messages.CreateTransactionReply; +import org.opendaylight.controller.cluster.datastore.messages.PrimaryShardInfo; import org.opendaylight.controller.cluster.datastore.utils.ActorContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,10 +39,8 @@ import scala.concurrent.duration.FiniteDuration; final class RemoteTransactionContextSupport { private static final Logger LOG = LoggerFactory.getLogger(RemoteTransactionContextSupport.class); - /** - * Time interval in between transaction create retries. - */ - private static final FiniteDuration CREATE_TX_TRY_INTERVAL = FiniteDuration.create(1, TimeUnit.SECONDS); + private static final long CREATE_TX_TRY_INTERVAL_IN_MS = 1000; + private static final long MAX_CREATE_TX_MSG_TIMEOUT_IN_MS = 5000; private final TransactionProxy parent; private final String shardName; @@ -47,19 +48,34 @@ final class RemoteTransactionContextSupport { /** * The target primary shard. */ - private volatile ActorSelection primaryShard; - private volatile int createTxTries; + private volatile PrimaryShardInfo primaryShardInfo; - private final TransactionContextWrapper transactionContextAdapter; + /** + * The total timeout for creating a tx on the primary shard. + */ + private volatile long totalCreateTxTimeout; - RemoteTransactionContextSupport(final TransactionContextWrapper transactionContextAdapter, final TransactionProxy parent, + private final Timeout createTxMessageTimeout; + + private final TransactionContextWrapper transactionContextWrapper; + + RemoteTransactionContextSupport(final TransactionContextWrapper transactionContextWrapper, final TransactionProxy parent, final String shardName) { this.parent = Preconditions.checkNotNull(parent); this.shardName = shardName; - this.transactionContextAdapter = transactionContextAdapter; - createTxTries = (int) (parent.getActorContext().getDatastoreContext(). - getShardLeaderElectionTimeout().duration().toMillis() / - CREATE_TX_TRY_INTERVAL.toMillis()); + this.transactionContextWrapper = transactionContextWrapper; + + // For the total create tx timeout, use 2 times the election timeout. This should be enough time for + // a leader re-election to occur if we happen to hit it in transition. + totalCreateTxTimeout = parent.getActorContext().getDatastoreContext().getShardRaftConfig() + .getElectionTimeOutInterval().toMillis() * 2; + + // We'll use the operationTimeout for the the create Tx message timeout so it can be set appropriately + // for unit tests but cap it at MAX_CREATE_TX_MSG_TIMEOUT_IN_MS. The operationTimeout could be set + // larger than the totalCreateTxTimeout in production which we don't want. + long operationTimeout = parent.getActorContext().getOperationTimeout().duration().toMillis(); + createTxMessageTimeout = new Timeout(Math.min(operationTimeout, MAX_CREATE_TX_MSG_TIMEOUT_IN_MS), + TimeUnit.MILLISECONDS); } String getShardName() { @@ -74,10 +90,6 @@ final class RemoteTransactionContextSupport { return parent.getActorContext(); } - private OperationLimiter getOperationLimiter() { - return parent.getLimiter(); - } - private TransactionIdentifier getIdentifier() { return parent.getIdentifier(); } @@ -85,19 +97,20 @@ final class RemoteTransactionContextSupport { /** * Sets the target primary shard and initiates a CreateTransaction try. */ - void setPrimaryShard(ActorSelection primaryShard) { - this.primaryShard = primaryShard; + void setPrimaryShard(PrimaryShardInfo primaryShardInfo) { + this.primaryShardInfo = primaryShardInfo; - if (getTransactionType() == TransactionType.WRITE_ONLY && + if (getTransactionType() == TransactionType.WRITE_ONLY && getActorContext().getDatastoreContext().isWriteOnlyTransactionOptimizationsEnabled()) { + ActorSelection primaryShard = primaryShardInfo.getPrimaryShardActor(); + LOG.debug("Tx {} Primary shard {} found - creating WRITE_ONLY transaction context", getIdentifier(), primaryShard); // For write-only Tx's we prepare the transaction modifications directly on the shard actor // to avoid the overhead of creating a separate transaction actor. - // FIXME: can't assume the shard version is LITHIUM_VERSION - need to obtain it somehow. - transactionContextAdapter.executePriorTransactionOperations(createValidTransactionContext(this.primaryShard, - this.primaryShard.path().toString(), DataStoreVersions.LITHIUM_VERSION)); + transactionContextWrapper.executePriorTransactionOperations(createValidTransactionContext( + primaryShard, String.valueOf(primaryShard.path()), primaryShardInfo.getPrimaryShardVersion())); } else { tryCreateTransaction(); } @@ -108,13 +121,16 @@ final class RemoteTransactionContextSupport { */ private void tryCreateTransaction() { if(LOG.isDebugEnabled()) { - LOG.debug("Tx {} Primary shard {} found - trying create transaction", getIdentifier(), primaryShard); + LOG.debug("Tx {} Primary shard {} found - trying create transaction", getIdentifier(), + primaryShardInfo.getPrimaryShardActor()); } Object serializedCreateMessage = new CreateTransaction(getIdentifier().toString(), - getTransactionType().ordinal(), getIdentifier().getChainId()).toSerializable(); + getTransactionType().ordinal(), getIdentifier().getChainId(), + primaryShardInfo.getPrimaryShardVersion()).toSerializable(); - Future createTxFuture = getActorContext().executeOperationAsync(primaryShard, serializedCreateMessage); + Future createTxFuture = getActorContext().executeOperationAsync( + primaryShardInfo.getPrimaryShardActor(), serializedCreateMessage, createTxMessageTimeout); createTxFuture.onComplete(new OnComplete() { @Override @@ -124,21 +140,60 @@ final class RemoteTransactionContextSupport { }, getActorContext().getClientDispatcher()); } + private void tryFindPrimaryShard() { + LOG.debug("Tx {} Retrying findPrimaryShardAsync for shard {}", getIdentifier(), shardName); + + this.primaryShardInfo = null; + Future findPrimaryFuture = getActorContext().findPrimaryShardAsync(shardName); + findPrimaryFuture.onComplete(new OnComplete() { + @Override + public void onComplete(final Throwable failure, final PrimaryShardInfo primaryShardInfo) { + onFindPrimaryShardComplete(failure, primaryShardInfo); + } + }, getActorContext().getClientDispatcher()); + } + + private void onFindPrimaryShardComplete(final Throwable failure, final PrimaryShardInfo primaryShardInfo) { + if (failure == null) { + this.primaryShardInfo = primaryShardInfo; + tryCreateTransaction(); + } else { + LOG.debug("Tx {}: Find primary for shard {} failed", getIdentifier(), shardName, failure); + + onCreateTransactionComplete(failure, null); + } + } + private void onCreateTransactionComplete(Throwable failure, Object response) { - if(failure instanceof NoShardLeaderException) { - // There's no leader for the shard yet - schedule and try again, unless we're out - // of retries. Note: createTxTries is volatile as it may be written by different - // threads however not concurrently, therefore decrementing it non-atomically here - // is ok. - if(--createTxTries > 0) { - LOG.debug("Tx {} Shard {} has no leader yet - scheduling create Tx retry", - getIdentifier(), shardName); - - getActorContext().getActorSystem().scheduler().scheduleOnce(CREATE_TX_TRY_INTERVAL, + // An AskTimeoutException will occur if the local shard forwards to an unavailable remote leader or + // the cached remote leader actor is no longer available. + boolean retryCreateTransaction = primaryShardInfo != null && + (failure instanceof NoShardLeaderException || failure instanceof AskTimeoutException); + if(retryCreateTransaction) { + // Schedule a retry unless we're out of retries. Note: totalCreateTxTimeout is volatile as it may + // be written by different threads however not concurrently, therefore decrementing it + // non-atomically here is ok. + if(totalCreateTxTimeout > 0) { + long scheduleInterval = CREATE_TX_TRY_INTERVAL_IN_MS; + if(failure instanceof AskTimeoutException) { + // Since we use the createTxMessageTimeout for the CreateTransaction request and it timed + // out, subtract it from the total timeout. Also since the createTxMessageTimeout period + // has already elapsed, we can immediately schedule the retry (10 ms is virtually immediate). + totalCreateTxTimeout -= createTxMessageTimeout.duration().toMillis(); + scheduleInterval = 10; + } + + totalCreateTxTimeout -= scheduleInterval; + + LOG.debug("Tx {}: create tx on shard {} failed with exception \"{}\" - scheduling retry in {} ms", + getIdentifier(), shardName, failure, scheduleInterval); + + getActorContext().getActorSystem().scheduler().scheduleOnce( + FiniteDuration.create(scheduleInterval, TimeUnit.MILLISECONDS), new Runnable() { @Override public void run() { - tryCreateTransaction(); + tryFindPrimaryShard(); } }, getActorContext().getClientDispatcher()); return; @@ -161,44 +216,44 @@ final class RemoteTransactionContextSupport { if(failure != null) { LOG.debug("Tx {} Creating NoOpTransaction because of error", getIdentifier(), failure); - localTransactionContext = new NoOpTransactionContext(failure, getOperationLimiter()); - } else if (CreateTransactionReply.SERIALIZABLE_CLASS.equals(response.getClass())) { + Throwable resultingEx = failure; + if(failure instanceof AskTimeoutException) { + resultingEx = new ShardLeaderNotRespondingException(String.format( + "Could not create a %s transaction on shard %s. The shard leader isn't responding.", + parent.getType(), shardName), failure); + } else if(!(failure instanceof NoShardLeaderException)) { + resultingEx = new Exception(String.format( + "Error creating %s transaction on shard %s", parent.getType(), shardName), failure); + } + + localTransactionContext = new NoOpTransactionContext(resultingEx, getIdentifier()); + } else if (CreateTransactionReply.isSerializedType(response)) { localTransactionContext = createValidTransactionContext( CreateTransactionReply.fromSerializable(response)); } else { IllegalArgumentException exception = new IllegalArgumentException(String.format( "Invalid reply type %s for CreateTransaction", response.getClass())); - localTransactionContext = new NoOpTransactionContext(exception, getOperationLimiter()); + localTransactionContext = new NoOpTransactionContext(exception, getIdentifier()); } - transactionContextAdapter.executePriorTransactionOperations(localTransactionContext); + transactionContextWrapper.executePriorTransactionOperations(localTransactionContext); } private TransactionContext createValidTransactionContext(CreateTransactionReply reply) { LOG.debug("Tx {} Received {}", getIdentifier(), reply); return createValidTransactionContext(getActorContext().actorSelection(reply.getTransactionPath()), - reply.getTransactionPath(), reply.getVersion()); + reply.getTransactionPath(), primaryShardInfo.getPrimaryShardVersion()); } private TransactionContext createValidTransactionContext(ActorSelection transactionActor, String transactionPath, short remoteTransactionVersion) { - // TxActor is always created where the leader of the shard is. - // Check if TxActor is created in the same node - boolean isTxActorLocal = getActorContext().isPathLocal(transactionPath); - final TransactionContext ret; - - if (remoteTransactionVersion < DataStoreVersions.LITHIUM_VERSION) { - ret = new PreLithiumTransactionContextImpl(transactionPath, transactionActor, - getActorContext(), isTxActorLocal, remoteTransactionVersion, parent.getLimiter()); - } else { - ret = new RemoteTransactionContext(transactionActor, getActorContext(), - isTxActorLocal, remoteTransactionVersion, parent.getLimiter()); - } + final TransactionContext ret = new RemoteTransactionContext(transactionContextWrapper.getIdentifier(), + transactionActor, getActorContext(), remoteTransactionVersion, transactionContextWrapper.getLimiter()); if(parent.getType() == TransactionType.READ_ONLY) { - TransactionContextCleanup.track(this, ret); + TransactionContextCleanup.track(parent, ret); } return ret;