BUG-8618: turn timeouts in READY state into canCommit failures 95/59895/2
authorRobert Varga <robert.varga@pantheon.tech>
Mon, 3 Jul 2017 16:55:18 +0000 (18:55 +0200)
committerRobert Varga <nite@hq.sk>
Fri, 14 Jul 2017 17:16:30 +0000 (17:16 +0000)
This patch adds more details to the TimeoutException reported when
we prune a transaction while it is in the queue. It also peels the
READY case from the defaults and makes sure we send an authoritative
reply back to the frontend when it requests the transaction to be
committed.

Change-Id: I21364ff7e7103af8be6988b8483adc112c3c1d25
Signed-off-by: Robert Varga <robert.varga@pantheon.tech>
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/SimpleShardDataTreeCohort.java

index 8b6df53a1ab1040a80eea43c6601f31d41db28b6..f832c1adfa5205af556609844e6a36f7392268d7 100644 (file)
@@ -994,21 +994,25 @@ public class ShardDataTree extends ShardDataTreeTransactionParent {
             !pendingCommits.isEmpty() ? pendingCommits : pendingTransactions;
         final CommitEntry currentTx = currentQueue.peek();
         if (currentTx != null && currentTx.lastAccess + timeout < now) {
+            final State state = currentTx.cohort.getState();
             LOG.warn("{}: Current transaction {} has timed out after {} ms in state {}", logContext,
-                    currentTx.cohort.getIdentifier(), transactionCommitTimeoutMillis, currentTx.cohort.getState());
+                    currentTx.cohort.getIdentifier(), transactionCommitTimeoutMillis, state);
             boolean processNext = true;
-            switch (currentTx.cohort.getState()) {
+            final TimeoutException cohortFailure = new TimeoutException("Backend timeout in state " + state + " after "
+                    + transactionCommitTimeoutMillis + "ms");
+
+            switch (state) {
                 case CAN_COMMIT_PENDING:
-                    currentQueue.remove().cohort.failedCanCommit(new TimeoutException());
+                    currentQueue.remove().cohort.failedCanCommit(cohortFailure);
                     break;
                 case CAN_COMMIT_COMPLETE:
                     // The suppression of the FindBugs "DB_DUPLICATE_SWITCH_CLAUSES" warning pertains to this clause
                     // whose code is duplicated with PRE_COMMIT_COMPLETE. The clauses aren't combined in case the code
                     // in PRE_COMMIT_COMPLETE is changed.
-                    currentQueue.remove().cohort.reportFailure(new TimeoutException());
+                    currentQueue.remove().cohort.reportFailure(cohortFailure);
                     break;
                 case PRE_COMMIT_PENDING:
-                    currentQueue.remove().cohort.failedPreCommit(new TimeoutException());
+                    currentQueue.remove().cohort.failedPreCommit(cohortFailure);
                     break;
                 case PRE_COMMIT_COMPLETE:
                     // FIXME: this is a legacy behavior problem. Three-phase commit protocol specifies that after we
@@ -1028,7 +1032,7 @@ public class ShardDataTree extends ShardDataTreeTransactionParent {
                     //        In order to make the pre-commit timer working across failovers, though, we need
                     //        a per-shard cluster-wide monotonic time, so a follower becoming the leader can accurately
                     //        restart the timer.
-                    currentQueue.remove().cohort.reportFailure(new TimeoutException());
+                    currentQueue.remove().cohort.reportFailure(cohortFailure);
                     break;
                 case COMMIT_PENDING:
                     LOG.warn("{}: Transaction {} is still committing, cannot abort", logContext,
@@ -1036,10 +1040,12 @@ public class ShardDataTree extends ShardDataTreeTransactionParent {
                     currentTx.lastAccess = now;
                     processNext = false;
                     return;
+                case READY:
+                    currentQueue.remove().cohort.reportFailure(cohortFailure);
+                    break;
                 case ABORTED:
                 case COMMITTED:
                 case FAILED:
-                case READY:
                 default:
                     currentQueue.remove();
             }
index 6c159b1f5e1ce65c211b4a0ea2442ec964c319b3..f73d343bcec2ea4a0785ec84fa967b274345c3c6 100644 (file)
@@ -108,7 +108,12 @@ abstract class SimpleShardDataTreeCohort extends ShardDataTreeCohort {
         checkState(State.READY);
         this.callback = Preconditions.checkNotNull(newCallback);
         state = State.CAN_COMMIT_PENDING;
-        dataTree.startCanCommit(this);
+
+        if (nextFailure == null) {
+            dataTree.startCanCommit(this);
+        } else {
+            failedCanCommit(nextFailure);
+        }
     }
 
     @Override