- if(failure instanceof NoShardLeaderException) {
- // There's no leader for the shard yet - schedule and try again, unless we're out
- // of retries. Note: createTxTries is volatile as it may be written by different
- // threads however not concurrently, therefore decrementing it non-atomically here
- // is ok.
- if(--createTxTries > 0) {
- LOG.debug("Tx {} Shard {} has no leader yet - scheduling create Tx retry",
- getIdentifier(), shardName);
-
- getActorContext().getActorSystem().scheduler().scheduleOnce(CREATE_TX_TRY_INTERVAL,
- new Runnable() {
- @Override
- public void run() {
- tryCreateTransaction();
- }
- }, getActorContext().getClientDispatcher());
+ // An AskTimeoutException will occur if the local shard forwards to an unavailable remote leader or
+ // the cached remote leader actor is no longer available.
+ boolean retryCreateTransaction = primaryShardInfo != null
+ && (failure instanceof NoShardLeaderException || failure instanceof AskTimeoutException);
+ if (retryCreateTransaction) {
+ // Schedule a retry unless we're out of retries. Note: totalCreateTxTimeout is volatile as it may
+ // be written by different threads however not concurrently, therefore decrementing it
+ // non-atomically here is ok.
+ if (totalCreateTxTimeout > 0) {
+ long scheduleInterval = CREATE_TX_TRY_INTERVAL_IN_MS;
+ if (failure instanceof AskTimeoutException) {
+ // Since we use the createTxMessageTimeout for the CreateTransaction request and it timed
+ // out, subtract it from the total timeout. Also since the createTxMessageTimeout period
+ // has already elapsed, we can immediately schedule the retry (10 ms is virtually immediate).
+ totalCreateTxTimeout -= createTxMessageTimeout.duration().toMillis();
+ scheduleInterval = 10;
+ }
+
+ totalCreateTxTimeout -= scheduleInterval;
+
+ LOG.debug("Tx {}: create tx on shard {} failed with exception \"{}\" - scheduling retry in {} ms",
+ getIdentifier(), shardName, failure, scheduleInterval);
+
+ getActorContext().getActorSystem().scheduler().scheduleOnce(
+ FiniteDuration.create(scheduleInterval, TimeUnit.MILLISECONDS),
+ () -> tryFindPrimaryShard(), getActorContext().getClientDispatcher());