From bde90693f8326e284cd4bb024fe5ed34d4fa4556 Mon Sep 17 00:00:00 2001 From: Robert Varga Date: Mon, 3 Jul 2017 18:55:18 +0200 Subject: [PATCH] BUG-8618: turn timeouts in READY state into canCommit failures This patch adds more details to the TimeoutException reported when we prune a transaction while it is in the queue. It also peels the READY case from the defaults and makes sure we send an authoritative reply back to the frontend when it requests the transaction to be committed. Change-Id: I21364ff7e7103af8be6988b8483adc112c3c1d25 Signed-off-by: Robert Varga (cherry picked from commit 0d5408c4babc902d270d9f81ed53c6af93bb2867) --- .../cluster/datastore/ShardDataTree.java | 20 ++++++++++++------- .../datastore/SimpleShardDataTreeCohort.java | 7 ++++++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java index 7b317faaee..4c226aaa9c 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java @@ -975,21 +975,25 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { !pendingCommits.isEmpty() ? pendingCommits : pendingTransactions; final CommitEntry currentTx = currentQueue.peek(); if (currentTx != null && currentTx.lastAccess + timeout < now) { + final State state = currentTx.cohort.getState(); LOG.warn("{}: Current transaction {} has timed out after {} ms in state {}", logContext, - currentTx.cohort.getIdentifier(), transactionCommitTimeoutMillis, currentTx.cohort.getState()); + currentTx.cohort.getIdentifier(), transactionCommitTimeoutMillis, state); boolean processNext = true; - switch (currentTx.cohort.getState()) { + final TimeoutException cohortFailure = new TimeoutException("Backend timeout in state " + state + " after " + + transactionCommitTimeoutMillis + "ms"); + + switch (state) { case CAN_COMMIT_PENDING: - currentQueue.remove().cohort.failedCanCommit(new TimeoutException()); + currentQueue.remove().cohort.failedCanCommit(cohortFailure); break; case CAN_COMMIT_COMPLETE: // The suppression of the FindBugs "DB_DUPLICATE_SWITCH_CLAUSES" warning pertains to this clause // whose code is duplicated with PRE_COMMIT_COMPLETE. The clauses aren't combined in case the code // in PRE_COMMIT_COMPLETE is changed. - currentQueue.remove().cohort.reportFailure(new TimeoutException()); + currentQueue.remove().cohort.reportFailure(cohortFailure); break; case PRE_COMMIT_PENDING: - currentQueue.remove().cohort.failedPreCommit(new TimeoutException()); + currentQueue.remove().cohort.failedPreCommit(cohortFailure); break; case PRE_COMMIT_COMPLETE: // FIXME: this is a legacy behavior problem. Three-phase commit protocol specifies that after we @@ -1009,7 +1013,7 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { // In order to make the pre-commit timer working across failovers, though, we need // a per-shard cluster-wide monotonic time, so a follower becoming the leader can accurately // restart the timer. - currentQueue.remove().cohort.reportFailure(new TimeoutException()); + currentQueue.remove().cohort.reportFailure(cohortFailure); break; case COMMIT_PENDING: LOG.warn("{}: Transaction {} is still committing, cannot abort", logContext, @@ -1017,10 +1021,12 @@ public class ShardDataTree extends ShardDataTreeTransactionParent { currentTx.lastAccess = now; processNext = false; return; + case READY: + currentQueue.remove().cohort.reportFailure(cohortFailure); + break; case ABORTED: case COMMITTED: case FAILED: - case READY: default: currentQueue.remove(); } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/SimpleShardDataTreeCohort.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/SimpleShardDataTreeCohort.java index 6c159b1f5e..f73d343bce 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/SimpleShardDataTreeCohort.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/SimpleShardDataTreeCohort.java @@ -108,7 +108,12 @@ abstract class SimpleShardDataTreeCohort extends ShardDataTreeCohort { checkState(State.READY); this.callback = Preconditions.checkNotNull(newCallback); state = State.CAN_COMMIT_PENDING; - dataTree.startCanCommit(this); + + if (nextFailure == null) { + dataTree.startCanCommit(this); + } else { + failedCanCommit(nextFailure); + } } @Override -- 2.36.6