BUG-8618: turn timeouts in READY state into canCommit failures 15/60415/2
authorRobert Varga <robert.varga@pantheon.tech>
Mon, 3 Jul 2017 16:55:18 +0000 (18:55 +0200)
committerRobert Varga <nite@hq.sk>
Sat, 15 Jul 2017 08:47:57 +0000 (08:47 +0000)
This patch adds more details to the TimeoutException reported when
we prune a transaction while it is in the queue. It also peels the
READY case from the defaults and makes sure we send an authoritative
reply back to the frontend when it requests the transaction to be
committed.

Change-Id: I21364ff7e7103af8be6988b8483adc112c3c1d25
Signed-off-by: Robert Varga <robert.varga@pantheon.tech>
(cherry picked from commit 0d5408c4babc902d270d9f81ed53c6af93bb2867)

opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardDataTree.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/SimpleShardDataTreeCohort.java

index 7b317faaee8619bdcd590cbf11ea59ca6e014b61..4c226aaa9ca918f7ed981e52e26301d3f6a229a0 100644 (file)
@@ -975,21 +975,25 @@ public class ShardDataTree extends ShardDataTreeTransactionParent {
             !pendingCommits.isEmpty() ? pendingCommits : pendingTransactions;
         final CommitEntry currentTx = currentQueue.peek();
         if (currentTx != null && currentTx.lastAccess + timeout < now) {
             !pendingCommits.isEmpty() ? pendingCommits : pendingTransactions;
         final CommitEntry currentTx = currentQueue.peek();
         if (currentTx != null && currentTx.lastAccess + timeout < now) {
+            final State state = currentTx.cohort.getState();
             LOG.warn("{}: Current transaction {} has timed out after {} ms in state {}", logContext,
             LOG.warn("{}: Current transaction {} has timed out after {} ms in state {}", logContext,
-                    currentTx.cohort.getIdentifier(), transactionCommitTimeoutMillis, currentTx.cohort.getState());
+                    currentTx.cohort.getIdentifier(), transactionCommitTimeoutMillis, state);
             boolean processNext = true;
             boolean processNext = true;
-            switch (currentTx.cohort.getState()) {
+            final TimeoutException cohortFailure = new TimeoutException("Backend timeout in state " + state + " after "
+                    + transactionCommitTimeoutMillis + "ms");
+
+            switch (state) {
                 case CAN_COMMIT_PENDING:
                 case CAN_COMMIT_PENDING:
-                    currentQueue.remove().cohort.failedCanCommit(new TimeoutException());
+                    currentQueue.remove().cohort.failedCanCommit(cohortFailure);
                     break;
                 case CAN_COMMIT_COMPLETE:
                     // The suppression of the FindBugs "DB_DUPLICATE_SWITCH_CLAUSES" warning pertains to this clause
                     // whose code is duplicated with PRE_COMMIT_COMPLETE. The clauses aren't combined in case the code
                     // in PRE_COMMIT_COMPLETE is changed.
                     break;
                 case CAN_COMMIT_COMPLETE:
                     // The suppression of the FindBugs "DB_DUPLICATE_SWITCH_CLAUSES" warning pertains to this clause
                     // whose code is duplicated with PRE_COMMIT_COMPLETE. The clauses aren't combined in case the code
                     // in PRE_COMMIT_COMPLETE is changed.
-                    currentQueue.remove().cohort.reportFailure(new TimeoutException());
+                    currentQueue.remove().cohort.reportFailure(cohortFailure);
                     break;
                 case PRE_COMMIT_PENDING:
                     break;
                 case PRE_COMMIT_PENDING:
-                    currentQueue.remove().cohort.failedPreCommit(new TimeoutException());
+                    currentQueue.remove().cohort.failedPreCommit(cohortFailure);
                     break;
                 case PRE_COMMIT_COMPLETE:
                     // FIXME: this is a legacy behavior problem. Three-phase commit protocol specifies that after we
                     break;
                 case PRE_COMMIT_COMPLETE:
                     // FIXME: this is a legacy behavior problem. Three-phase commit protocol specifies that after we
@@ -1009,7 +1013,7 @@ public class ShardDataTree extends ShardDataTreeTransactionParent {
                     //        In order to make the pre-commit timer working across failovers, though, we need
                     //        a per-shard cluster-wide monotonic time, so a follower becoming the leader can accurately
                     //        restart the timer.
                     //        In order to make the pre-commit timer working across failovers, though, we need
                     //        a per-shard cluster-wide monotonic time, so a follower becoming the leader can accurately
                     //        restart the timer.
-                    currentQueue.remove().cohort.reportFailure(new TimeoutException());
+                    currentQueue.remove().cohort.reportFailure(cohortFailure);
                     break;
                 case COMMIT_PENDING:
                     LOG.warn("{}: Transaction {} is still committing, cannot abort", logContext,
                     break;
                 case COMMIT_PENDING:
                     LOG.warn("{}: Transaction {} is still committing, cannot abort", logContext,
@@ -1017,10 +1021,12 @@ public class ShardDataTree extends ShardDataTreeTransactionParent {
                     currentTx.lastAccess = now;
                     processNext = false;
                     return;
                     currentTx.lastAccess = now;
                     processNext = false;
                     return;
+                case READY:
+                    currentQueue.remove().cohort.reportFailure(cohortFailure);
+                    break;
                 case ABORTED:
                 case COMMITTED:
                 case FAILED:
                 case ABORTED:
                 case COMMITTED:
                 case FAILED:
-                case READY:
                 default:
                     currentQueue.remove();
             }
                 default:
                     currentQueue.remove();
             }
index 6c159b1f5e1ce65c211b4a0ea2442ec964c319b3..f73d343bcec2ea4a0785ec84fa967b274345c3c6 100644 (file)
@@ -108,7 +108,12 @@ abstract class SimpleShardDataTreeCohort extends ShardDataTreeCohort {
         checkState(State.READY);
         this.callback = Preconditions.checkNotNull(newCallback);
         state = State.CAN_COMMIT_PENDING;
         checkState(State.READY);
         this.callback = Preconditions.checkNotNull(newCallback);
         state = State.CAN_COMMIT_PENDING;
-        dataTree.startCanCommit(this);
+
+        if (nextFailure == null) {
+            dataTree.startCanCommit(this);
+        } else {
+            failedCanCommit(nextFailure);
+        }
     }
 
     @Override
     }
 
     @Override