import akka.actor.ActorSelection;
import akka.dispatch.OnComplete;
+import akka.pattern.AskTimeoutException;
+import akka.util.Timeout;
import com.google.common.base.Preconditions;
import java.util.concurrent.TimeUnit;
import org.opendaylight.controller.cluster.datastore.compat.PreLithiumTransactionContextImpl;
import org.opendaylight.controller.cluster.datastore.exceptions.NoShardLeaderException;
+import org.opendaylight.controller.cluster.datastore.exceptions.ShardLeaderNotRespondingException;
import org.opendaylight.controller.cluster.datastore.identifiers.TransactionIdentifier;
import org.opendaylight.controller.cluster.datastore.messages.CreateTransaction;
import org.opendaylight.controller.cluster.datastore.messages.CreateTransactionReply;
+import org.opendaylight.controller.cluster.datastore.messages.PrimaryShardInfo;
import org.opendaylight.controller.cluster.datastore.utils.ActorContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
final class RemoteTransactionContextSupport {
private static final Logger LOG = LoggerFactory.getLogger(RemoteTransactionContextSupport.class);
- /**
- * Time interval in between transaction create retries.
- */
- private static final FiniteDuration CREATE_TX_TRY_INTERVAL = FiniteDuration.create(1, TimeUnit.SECONDS);
+ private static final long CREATE_TX_TRY_INTERVAL_IN_MS = 1000;
+ private static final long MAX_CREATE_TX_MSG_TIMEOUT_IN_MS = 5000;
private final TransactionProxy parent;
private final String shardName;
* The target primary shard.
*/
private volatile ActorSelection primaryShard;
- private volatile int createTxTries;
- private final TransactionContextWrapper transactionContextAdapter;
+ /**
+ * The total timeout for creating a tx on the primary shard.
+ */
+ private volatile long totalCreateTxTimeout;
+
+ private final Timeout createTxMessageTimeout;
+
+ private final TransactionContextWrapper transactionContextWrapper;
- RemoteTransactionContextSupport(final TransactionContextWrapper transactionContextAdapter, final TransactionProxy parent,
+ RemoteTransactionContextSupport(final TransactionContextWrapper transactionContextWrapper, final TransactionProxy parent,
final String shardName) {
this.parent = Preconditions.checkNotNull(parent);
this.shardName = shardName;
- this.transactionContextAdapter = transactionContextAdapter;
- createTxTries = (int) (parent.getActorContext().getDatastoreContext().
- getShardLeaderElectionTimeout().duration().toMillis() /
- CREATE_TX_TRY_INTERVAL.toMillis());
+ this.transactionContextWrapper = transactionContextWrapper;
+
+ // For the total create tx timeout, use 2 times the election timeout. This should be enough time for
+ // a leader re-election to occur if we happen to hit it in transition.
+ totalCreateTxTimeout = parent.getActorContext().getDatastoreContext().getShardRaftConfig()
+ .getElectionTimeOutInterval().toMillis() * 2;
+
+ // We'll use the operationTimeout for the the create Tx message timeout so it can be set appropriately
+ // for unit tests but cap it at MAX_CREATE_TX_MSG_TIMEOUT_IN_MS. The operationTimeout could be set
+ // larger than the totalCreateTxTimeout in production which we don't want.
+ long operationTimeout = parent.getActorContext().getOperationTimeout().duration().toMillis();
+ createTxMessageTimeout = new Timeout(Math.min(operationTimeout, MAX_CREATE_TX_MSG_TIMEOUT_IN_MS),
+ TimeUnit.MILLISECONDS);
}
String getShardName() {
return parent.getActorContext();
}
- private OperationLimiter getOperationLimiter() {
- return parent.getLimiter();
- }
-
private TransactionIdentifier getIdentifier() {
return parent.getIdentifier();
}
// For write-only Tx's we prepare the transaction modifications directly on the shard actor
// to avoid the overhead of creating a separate transaction actor.
- transactionContextAdapter.executePriorTransactionOperations(createValidTransactionContext(this.primaryShard,
+ transactionContextWrapper.executePriorTransactionOperations(createValidTransactionContext(this.primaryShard,
this.primaryShard.path().toString(), primaryVersion));
} else {
tryCreateTransaction();
Object serializedCreateMessage = new CreateTransaction(getIdentifier().toString(),
getTransactionType().ordinal(), getIdentifier().getChainId()).toSerializable();
- Future<Object> createTxFuture = getActorContext().executeOperationAsync(primaryShard, serializedCreateMessage);
+ Future<Object> createTxFuture = getActorContext().executeOperationAsync(primaryShard,
+ serializedCreateMessage, createTxMessageTimeout);
createTxFuture.onComplete(new OnComplete<Object>() {
@Override
}, getActorContext().getClientDispatcher());
}
+ private void tryFindPrimaryShard() {
+ LOG.debug("Tx {} Retrying findPrimaryShardAsync for shard {}", getIdentifier(), shardName);
+
+ this.primaryShard = null;
+ Future<PrimaryShardInfo> findPrimaryFuture = getActorContext().findPrimaryShardAsync(shardName);
+ findPrimaryFuture.onComplete(new OnComplete<PrimaryShardInfo>() {
+ @Override
+ public void onComplete(final Throwable failure, final PrimaryShardInfo primaryShardInfo) {
+ onFindPrimaryShardComplete(failure, primaryShardInfo);
+ }
+ }, getActorContext().getClientDispatcher());
+ }
+
+ private void onFindPrimaryShardComplete(final Throwable failure, final PrimaryShardInfo primaryShardInfo) {
+ if (failure == null) {
+ this.primaryShard = primaryShardInfo.getPrimaryShardActor();
+ tryCreateTransaction();
+ } else {
+ LOG.debug("Tx {}: Find primary for shard {} failed", getIdentifier(), shardName, failure);
+
+ onCreateTransactionComplete(failure, null);
+ }
+ }
+
private void onCreateTransactionComplete(Throwable failure, Object response) {
- if(failure instanceof NoShardLeaderException) {
- // There's no leader for the shard yet - schedule and try again, unless we're out
- // of retries. Note: createTxTries is volatile as it may be written by different
- // threads however not concurrently, therefore decrementing it non-atomically here
- // is ok.
- if(--createTxTries > 0) {
- LOG.debug("Tx {} Shard {} has no leader yet - scheduling create Tx retry",
- getIdentifier(), shardName);
-
- getActorContext().getActorSystem().scheduler().scheduleOnce(CREATE_TX_TRY_INTERVAL,
+ // An AskTimeoutException will occur if the local shard forwards to an unavailable remote leader or
+ // the cached remote leader actor is no longer available.
+ boolean retryCreateTransaction = this.primaryShard != null &&
+ (failure instanceof NoShardLeaderException || failure instanceof AskTimeoutException);
+ if(retryCreateTransaction) {
+ // Schedule a retry unless we're out of retries. Note: totalCreateTxTimeout is volatile as it may
+ // be written by different threads however not concurrently, therefore decrementing it
+ // non-atomically here is ok.
+ if(totalCreateTxTimeout > 0) {
+ long scheduleInterval = CREATE_TX_TRY_INTERVAL_IN_MS;
+ if(failure instanceof AskTimeoutException) {
+ // Since we use the createTxMessageTimeout for the CreateTransaction request and it timed
+ // out, subtract it from the total timeout. Also since the createTxMessageTimeout period
+ // has already elapsed, we can immediately schedule the retry (10 ms is virtually immediate).
+ totalCreateTxTimeout -= createTxMessageTimeout.duration().toMillis();
+ scheduleInterval = 10;
+ }
+
+ totalCreateTxTimeout -= scheduleInterval;
+
+ LOG.debug("Tx {}: create tx on shard {} failed with exception \"{}\" - scheduling retry in {} ms",
+ getIdentifier(), shardName, failure, scheduleInterval);
+
+ getActorContext().getActorSystem().scheduler().scheduleOnce(
+ FiniteDuration.create(scheduleInterval, TimeUnit.MILLISECONDS),
new Runnable() {
@Override
public void run() {
- tryCreateTransaction();
+ tryFindPrimaryShard();
}
}, getActorContext().getClientDispatcher());
return;
if(failure != null) {
LOG.debug("Tx {} Creating NoOpTransaction because of error", getIdentifier(), failure);
- localTransactionContext = new NoOpTransactionContext(failure, getOperationLimiter());
+ Throwable resultingEx = failure;
+ if(failure instanceof AskTimeoutException) {
+ resultingEx = new ShardLeaderNotRespondingException(String.format(
+ "Could not create a %s transaction on shard %s. The shard leader isn't responding.",
+ parent.getType(), shardName), failure);
+ } else if(!(failure instanceof NoShardLeaderException)) {
+ resultingEx = new Exception(String.format(
+ "Error creating %s transaction on shard %s", parent.getType(), shardName), failure);
+ }
+
+ localTransactionContext = new NoOpTransactionContext(resultingEx, getIdentifier());
} else if (CreateTransactionReply.SERIALIZABLE_CLASS.equals(response.getClass())) {
localTransactionContext = createValidTransactionContext(
CreateTransactionReply.fromSerializable(response));
IllegalArgumentException exception = new IllegalArgumentException(String.format(
"Invalid reply type %s for CreateTransaction", response.getClass()));
- localTransactionContext = new NoOpTransactionContext(exception, getOperationLimiter());
+ localTransactionContext = new NoOpTransactionContext(exception, getIdentifier());
}
- transactionContextAdapter.executePriorTransactionOperations(localTransactionContext);
+ transactionContextWrapper.executePriorTransactionOperations(localTransactionContext);
}
private TransactionContext createValidTransactionContext(CreateTransactionReply reply) {
final TransactionContext ret;
if (remoteTransactionVersion < DataStoreVersions.LITHIUM_VERSION) {
- ret = new PreLithiumTransactionContextImpl(transactionPath, transactionActor,
- getActorContext(), isTxActorLocal, remoteTransactionVersion, parent.getLimiter());
+ ret = new PreLithiumTransactionContextImpl(transactionContextWrapper.getIdentifier(), transactionPath, transactionActor,
+ getActorContext(), isTxActorLocal, remoteTransactionVersion, transactionContextWrapper.getLimiter());
} else {
- ret = new RemoteTransactionContext(transactionActor, getActorContext(),
- isTxActorLocal, remoteTransactionVersion, parent.getLimiter());
+ ret = new RemoteTransactionContext(transactionContextWrapper.getIdentifier(), transactionActor, getActorContext(),
+ isTxActorLocal, remoteTransactionVersion, transactionContextWrapper.getLimiter());
}
if(parent.getType() == TransactionType.READ_ONLY) {