BUG-8704: rework seal mechanics to not wait during replay
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / databroker / actors / dds / AbstractProxyTransaction.java
index cc1651a2edcf33a6ba4779b7bd37038a99db5ea0..07b89e09230949da6c4849b3fb5dc03d4c3c36d8 100644 (file)
@@ -8,13 +8,30 @@
 package org.opendaylight.controller.cluster.databroker.actors.dds;
 
 import akka.actor.ActorRef;
+import com.google.common.base.MoreObjects;
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
 import com.google.common.base.Verify;
+import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.CheckedFuture;
 import com.google.common.util.concurrent.ListenableFuture;
 import com.google.common.util.concurrent.SettableFuture;
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
+import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
 import java.util.function.Consumer;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import javax.annotation.concurrent.GuardedBy;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.opendaylight.controller.cluster.access.client.ConnectionEntry;
+import org.opendaylight.controller.cluster.access.commands.AbstractLocalTransactionRequest;
+import org.opendaylight.controller.cluster.access.commands.ClosedTransactionException;
+import org.opendaylight.controller.cluster.access.commands.IncrementTransactionSequenceRequest;
 import org.opendaylight.controller.cluster.access.commands.TransactionAbortRequest;
 import org.opendaylight.controller.cluster.access.commands.TransactionAbortSuccess;
 import org.opendaylight.controller.cluster.access.commands.TransactionCanCommitSuccess;
@@ -22,14 +39,18 @@ import org.opendaylight.controller.cluster.access.commands.TransactionCommitSucc
 import org.opendaylight.controller.cluster.access.commands.TransactionDoCommitRequest;
 import org.opendaylight.controller.cluster.access.commands.TransactionPreCommitRequest;
 import org.opendaylight.controller.cluster.access.commands.TransactionPreCommitSuccess;
+import org.opendaylight.controller.cluster.access.commands.TransactionPurgeRequest;
 import org.opendaylight.controller.cluster.access.commands.TransactionRequest;
+import org.opendaylight.controller.cluster.access.concepts.Request;
 import org.opendaylight.controller.cluster.access.concepts.RequestFailure;
 import org.opendaylight.controller.cluster.access.concepts.Response;
 import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
-import org.opendaylight.controller.md.sal.common.api.data.ReadFailedException;
+import org.opendaylight.mdsal.common.api.ReadFailedException;
 import org.opendaylight.yangtools.concepts.Identifiable;
 import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier;
 import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Class translating transaction operations towards a particular backend shard.
@@ -45,63 +66,361 @@ import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode;
  * @author Robert Varga
  */
 abstract class AbstractProxyTransaction implements Identifiable<TransactionIdentifier> {
-    private final DistributedDataStoreClientBehavior client;
+    /**
+     * Marker object used instead of read-type of requests, which are satisfied only once. This has a lower footprint
+     * and allows compressing multiple requests into a single entry.
+     */
+    @NotThreadSafe
+    private static final class IncrementSequence {
+        private final long sequence;
+        private long delta = 0;
+
+        IncrementSequence(final long sequence) {
+            this.sequence = sequence;
+        }
+
+        long getDelta() {
+            return delta;
+        }
+
+        long getSequence() {
+            return sequence;
+        }
+
+        void incrementDelta() {
+            delta++;
+        }
+    }
+
+    /**
+     * Base class for representing logical state of this proxy. See individual instantiations and {@link SuccessorState}
+     * for details.
+     */
+    private static class State {
+        private final String string;
+
+        State(final String string) {
+            this.string = Preconditions.checkNotNull(string);
+        }
+
+        @Override
+        public final String toString() {
+            return string;
+        }
+    }
+
+    /**
+     * State class used when a successor has interfered. Contains coordinator latch, the successor and previous state.
+     * This is a temporary state introduced during reconnection process and is necessary for correct state hand-off
+     * between the old connection (potentially being accessed by the user) and the new connection (being cleaned up
+     * by the actor.
+     *
+     * <p>
+     * When a user operation encounters this state, it synchronizes on the it and wait until reconnection completes,
+     * at which point the request is routed to the successor transaction. This is a relatively heavy-weight solution
+     * to the problem of state transfer, but the user will observe it only if the race condition is hit.
+     */
+    private static class SuccessorState extends State {
+        private final CountDownLatch latch = new CountDownLatch(1);
+        private AbstractProxyTransaction successor;
+        private State prevState;
+
+        // SUCCESSOR + DONE
+        private boolean done;
+
+        SuccessorState() {
+            super("SUCCESSOR");
+        }
+
+        // Synchronize with succession process and return the successor
+        AbstractProxyTransaction await() {
+            try {
+                latch.await();
+            } catch (InterruptedException e) {
+                LOG.warn("Interrupted while waiting for latch of {}", successor);
+                throw Throwables.propagate(e);
+            }
+            return successor;
+        }
+
+        void finish() {
+            latch.countDown();
+        }
+
+        State getPrevState() {
+            return Verify.verifyNotNull(prevState, "Attempted to access previous state, which was not set");
+        }
+
+        void setPrevState(final State prevState) {
+            Verify.verify(this.prevState == null, "Attempted to set previous state to %s when we already have %s",
+                    prevState, this.prevState);
+            this.prevState = Preconditions.checkNotNull(prevState);
+            // We cannot have duplicate successor states, so this check is sufficient
+            this.done = DONE.equals(prevState);
+        }
+
+        // To be called from safe contexts, where successor is known to be completed
+        AbstractProxyTransaction getSuccessor() {
+            return Verify.verifyNotNull(successor);
+        }
+
+        void setSuccessor(final AbstractProxyTransaction successor) {
+            Verify.verify(this.successor == null, "Attempted to set successor to %s when we already have %s",
+                    successor, this.successor);
+            this.successor = Preconditions.checkNotNull(successor);
+        }
+
+        boolean isDone() {
+            return done;
+        }
+
+        void setDone() {
+            done = true;
+        }
+    }
+
+    private static final Logger LOG = LoggerFactory.getLogger(AbstractProxyTransaction.class);
+    private static final AtomicIntegerFieldUpdater<AbstractProxyTransaction> SEALED_UPDATER =
+            AtomicIntegerFieldUpdater.newUpdater(AbstractProxyTransaction.class, "sealed");
+    private static final AtomicReferenceFieldUpdater<AbstractProxyTransaction, State> STATE_UPDATER =
+            AtomicReferenceFieldUpdater.newUpdater(AbstractProxyTransaction.class, State.class, "state");
 
+    /**
+     * Transaction has been open and is being actively worked on.
+     */
+    private static final State OPEN = new State("OPEN");
+
+    /**
+     * Transaction has been sealed by the user, but it has not completed flushing to the backed, yet. This is
+     * a transition state, as we are waiting for the user to initiate commit procedures.
+     *
+     * <p>
+     * Since the reconnect mechanics relies on state replay for transactions, this state needs to be flushed into the
+     * queue to re-create state in successor transaction (which may be based on different messages as locality may have
+     * changed). Hence the transition to {@link #FLUSHED} state needs to be handled in a thread-safe manner.
+     */
+    private static final State SEALED = new State("SEALED");
+
+    /**
+     * Transaction state has been flushed into the queue, i.e. it is visible by the successor and potentially
+     * the backend. At this point the transaction does not hold any state besides successful requests, all other state
+     * is held either in the connection's queue or the successor object.
+     *
+     * <p>
+     * Transition to this state indicates we have all input from the user we need to initiate the correct commit
+     * protocol.
+     */
+    private static final State FLUSHED = new State("FLUSHED");
+
+    /**
+     * Transaction state has been completely resolved, we have received confirmation of the transaction fate from
+     * the backend. The only remaining task left to do is finishing up the state cleanup, which is done via purge
+     * request. We need to hang on to the transaction until that is done, as we have to make sure backend completes
+     * purging its state -- otherwise we could have a leak on the backend.
+     */
+    private static final State DONE = new State("DONE");
+
+    // Touched from client actor thread only
+    private final Deque<Object> successfulRequests = new ArrayDeque<>();
+    private final ProxyHistory parent;
+
+    // Accessed from user thread only, which may not access this object concurrently
     private long sequence;
-    private boolean sealed;
 
-    AbstractProxyTransaction(final DistributedDataStoreClientBehavior client) {
-        this.client = Preconditions.checkNotNull(client);
+    /*
+     * Atomic state-keeping is required to synchronize the process of propagating completed transaction state towards
+     * the backend -- which may include a successor.
+     *
+     * Successor, unlike {@link AbstractProxyTransaction#seal()} is triggered from the client actor thread, which means
+     * the successor placement needs to be atomic with regard to the application thread.
+     *
+     * In the common case, the application thread performs performs the seal operations and then "immediately" sends
+     * the corresponding message. The uncommon case is when the seal and send operations race with a connect completion
+     * or timeout, when a successor is injected.
+     *
+     * This leaves the problem of needing to completely transferring state just after all queued messages are replayed
+     * after a successor was injected, so that it can be properly sealed if we are racing. Further complication comes
+     * from lock ordering, where the successor injection works with a locked queue and locks proxy objects -- leading
+     * to a potential AB-BA deadlock in case of a naive implementation.
+     *
+     * For tracking user-visible state we use a single volatile int, which is flipped atomically from 0 to 1 exactly
+     * once in {@link AbstractProxyTransaction#seal()}. That keeps common operations fast, as they need to perform
+     * only a single volatile read to assert state correctness.
+     *
+     * For synchronizing client actor (successor-injecting) and user (commit-driving) thread, we keep a separate state
+     * variable. It uses pre-allocated objects for fast paths (i.e. no successor present) and a per-transition object
+     * for slow paths (when successor is injected/present).
+     */
+    private volatile int sealed;
+    private volatile State state;
+
+    AbstractProxyTransaction(final ProxyHistory parent, final boolean isDone) {
+        this.parent = Preconditions.checkNotNull(parent);
+        if (isDone) {
+            state = DONE;
+            // DONE implies previous seal operation completed
+            sealed = 1;
+        } else {
+            state = OPEN;
+        }
+    }
+
+    final void executeInActor(final Runnable command) {
+        parent.context().executeInActor(behavior -> {
+            command.run();
+            return behavior;
+        });
     }
 
     final ActorRef localActor() {
-        return client.self();
+        return parent.localActor();
+    }
+
+    final void incrementSequence(final long delta) {
+        sequence += delta;
+        LOG.debug("Transaction {} incremented sequence to {}", this, sequence);
     }
 
     final long nextSequence() {
-        return sequence++;
+        final long ret = sequence++;
+        LOG.debug("Transaction {} allocated sequence {}", this, ret);
+        return ret;
     }
 
     final void delete(final YangInstanceIdentifier path) {
-        checkSealed();
+        checkReadWrite();
+        checkNotSealed();
         doDelete(path);
     }
 
     final void merge(final YangInstanceIdentifier path, final NormalizedNode<?, ?> data) {
-        checkSealed();
+        checkReadWrite();
+        checkNotSealed();
         doMerge(path, data);
     }
 
     final void write(final YangInstanceIdentifier path, final NormalizedNode<?, ?> data) {
-        checkSealed();
+        checkReadWrite();
+        checkNotSealed();
         doWrite(path, data);
     }
 
     final CheckedFuture<Boolean, ReadFailedException> exists(final YangInstanceIdentifier path) {
-        checkSealed();
+        checkNotSealed();
         return doExists(path);
     }
 
     final CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> read(final YangInstanceIdentifier path) {
-        checkSealed();
+        checkNotSealed();
         return doRead(path);
     }
 
-    final void sendRequest(final TransactionRequest<?> request, final Consumer<Response<?, ?>> completer) {
-        client.sendRequest(request, completer);
+    final void enqueueRequest(final TransactionRequest<?> request, final Consumer<Response<?, ?>> callback,
+            final long enqueuedTicks) {
+        LOG.debug("Transaction proxy {} enqueing request {} callback {}", this, request, callback);
+        parent.enqueueRequest(request, callback, enqueuedTicks);
+    }
+
+    final void sendRequest(final TransactionRequest<?> request, final Consumer<Response<?, ?>> callback) {
+        LOG.debug("Transaction proxy {} sending request {} callback {}", this, request, callback);
+        parent.sendRequest(request, callback);
     }
 
     /**
-     * Seals this transaction when ready.
+     * Seal this transaction before it is either committed or aborted. This method should only be invoked from
+     * application thread.
      */
     final void seal() {
-        checkSealed();
-        doSeal();
-        sealed = true;
+        // Transition user-visible state first
+        final boolean success = markSealed();
+        Preconditions.checkState(success, "Proxy %s was already sealed", getIdentifier());
+
+        if (!sealAndSend(Optional.absent())) {
+            sealSuccessor();
+        }
+    }
+
+    /**
+     * Internal seal propagation method, invoked when we have raced with reconnection thread. Note that there may have
+     * been multiple reconnects, so we have to make sure the action is propagate through all intermediate instances.
+     */
+    private void sealSuccessor() {
+        // Slow path: wait for the successor to complete
+        final AbstractProxyTransaction successor = awaitSuccessor();
+
+        // At this point the successor has completed transition and is possibly visible by the user thread, which is
+        // still stuck here. The successor has not seen final part of our state, nor the fact it is sealed.
+        // Propagate state and seal the successor.
+        flushState(successor);
+        successor.predecessorSealed();
+    }
+
+    private void predecessorSealed() {
+        if (markSealed() && !sealAndSend(Optional.absent())) {
+            sealSuccessor();
+        }
+    }
+
+    void sealOnly() {
+        parent.onTransactionSealed(this);
+        final boolean success = STATE_UPDATER.compareAndSet(this, OPEN, SEALED);
+        Verify.verify(success, "Attempted to replay seal on {}", this);
+    }
+
+    /**
+     * Seal this transaction and potentially send it out towards the backend. If this method reports false, the caller
+     * needs to deal with propagating the seal operation towards the successor.
+     *
+     * @param enqueuedTicks Enqueue ticks when this is invoked from replay path.
+     * @return True if seal operation was successful, false if this proxy has a successor.
+     */
+    boolean sealAndSend(final Optional<Long> enqueuedTicks) {
+        parent.onTransactionSealed(this);
+
+        // Transition internal state to sealed and detect presence of a successor
+        return STATE_UPDATER.compareAndSet(this, OPEN, SEALED);
+    }
+
+    /**
+     * Mark this proxy as having been sealed.
+     *
+     * @return True if this call has transitioned to sealed state.
+     */
+    final boolean markSealed() {
+        return SEALED_UPDATER.compareAndSet(this, 0, 1);
+    }
+
+    private void checkNotSealed() {
+        Preconditions.checkState(sealed == 0, "Transaction %s has already been sealed", getIdentifier());
     }
 
     private void checkSealed() {
-        Preconditions.checkState(sealed, "Transaction %s has not been sealed yet", getIdentifier());
+        Preconditions.checkState(sealed != 0, "Transaction %s has not been sealed yet", getIdentifier());
+    }
+
+    private SuccessorState getSuccessorState() {
+        final State local = state;
+        Verify.verify(local instanceof SuccessorState, "State %s has unexpected class", local);
+        return (SuccessorState) local;
+    }
+
+    private void checkReadWrite() {
+        if (isSnapshotOnly()) {
+            throw new UnsupportedOperationException("Transaction " + getIdentifier() + " is a read-only snapshot");
+        }
+    }
+
+    final void recordSuccessfulRequest(final @Nonnull TransactionRequest<?> req) {
+        successfulRequests.add(Verify.verifyNotNull(req));
+    }
+
+    final void recordFinishedRequest(final Response<?, ?> response) {
+        final Object last = successfulRequests.peekLast();
+        if (last instanceof IncrementSequence) {
+            ((IncrementSequence) last).incrementDelta();
+        } else {
+            successfulRequests.addLast(new IncrementSequence(response.getSequence()));
+        }
     }
 
     /**
@@ -109,24 +428,55 @@ abstract class AbstractProxyTransaction implements Identifiable<TransactionIdent
      * being sent to the backend.
      */
     final void abort() {
-        checkSealed();
-        doAbort();
+        checkNotSealed();
+        parent.abortTransaction(this);
+
+        sendRequest(abortRequest(), resp -> {
+            LOG.debug("Transaction {} abort completed with {}", getIdentifier(), resp);
+            enqueuePurge();
+        });
     }
 
-    void abort(final VotingFuture<Void> ret) {
+    final void abort(final VotingFuture<Void> ret) {
         checkSealed();
 
-        sendRequest(new TransactionAbortRequest(getIdentifier(), nextSequence(), localActor()), t -> {
+        sendDoAbort(t -> {
             if (t instanceof TransactionAbortSuccess) {
                 ret.voteYes();
             } else if (t instanceof RequestFailure) {
-                ret.voteNo(((RequestFailure<?, ?>) t).getCause());
+                ret.voteNo(((RequestFailure<?, ?>) t).getCause().unwrap());
             } else {
                 ret.voteNo(new IllegalStateException("Unhandled response " + t.getClass()));
             }
+
+            // This is a terminal request, hence we do not need to record it
+            LOG.debug("Transaction {} abort completed", this);
+            enqueuePurge();
         });
     }
 
+    final void enqueueAbort(final Consumer<Response<?, ?>> callback, final long enqueuedTicks) {
+        checkNotSealed();
+        parent.abortTransaction(this);
+
+        enqueueRequest(abortRequest(), resp -> {
+            LOG.debug("Transaction {} abort completed with {}", getIdentifier(), resp);
+            // Purge will be sent by the predecessor's callback
+            if (callback != null) {
+                callback.accept(resp);
+            }
+        }, enqueuedTicks);
+    }
+
+    final void enqueueDoAbort(final Consumer<Response<?, ?>> callback, final long enqueuedTicks) {
+        enqueueRequest(new TransactionAbortRequest(getIdentifier(), nextSequence(), localActor()), callback,
+            enqueuedTicks);
+    }
+
+    final void sendDoAbort(final Consumer<Response<?, ?>> callback) {
+        sendRequest(new TransactionAbortRequest(getIdentifier(), nextSequence(), localActor()), callback);
+    }
+
     /**
      * Commit this transaction, possibly in a coordinated fashion.
      *
@@ -134,77 +484,363 @@ abstract class AbstractProxyTransaction implements Identifiable<TransactionIdent
      * @return Future completion
      */
     final ListenableFuture<Boolean> directCommit() {
+        checkReadWrite();
         checkSealed();
 
-        final SettableFuture<Boolean> ret = SettableFuture.create();
-        sendRequest(Verify.verifyNotNull(doCommit(false)), t -> {
-            if (t instanceof TransactionCommitSuccess) {
-                ret.set(Boolean.TRUE);
-            } else if (t instanceof RequestFailure) {
-                ret.setException(((RequestFailure<?, ?>) t).getCause());
-            } else {
-                ret.setException(new IllegalStateException("Unhandled response " + t.getClass()));
+        // Precludes startReconnect() from interfering with the fast path
+        synchronized (this) {
+            if (STATE_UPDATER.compareAndSet(this, SEALED, FLUSHED)) {
+                final SettableFuture<Boolean> ret = SettableFuture.create();
+                sendRequest(Verify.verifyNotNull(commitRequest(false)), t -> {
+                    if (t instanceof TransactionCommitSuccess) {
+                        ret.set(Boolean.TRUE);
+                    } else if (t instanceof RequestFailure) {
+                        final Throwable cause = ((RequestFailure<?, ?>) t).getCause().unwrap();
+                        if (cause instanceof ClosedTransactionException) {
+                            // This is okay, as it indicates the transaction has been completed. It can happen
+                            // when we lose connectivity with the backend after it has received the request.
+                            ret.set(Boolean.TRUE);
+                        } else {
+                            ret.setException(cause);
+                        }
+                    } else {
+                        ret.setException(new IllegalStateException("Unhandled response " + t.getClass()));
+                    }
+
+                    // This is a terminal request, hence we do not need to record it
+                    LOG.debug("Transaction {} directCommit completed", this);
+                    enqueuePurge();
+                });
+
+                return ret;
             }
-        });
-        return ret;
+        }
+
+        // We have had some interference with successor injection, wait for it to complete and defer to the successor.
+        return awaitSuccessor().directCommit();
     }
 
-    void canCommit(final VotingFuture<?> ret) {
+    final void canCommit(final VotingFuture<?> ret) {
+        checkReadWrite();
         checkSealed();
 
-        sendRequest(Verify.verifyNotNull(doCommit(true)), t -> {
-            if (t instanceof TransactionCanCommitSuccess) {
-                ret.voteYes();
-            } else if (t instanceof RequestFailure) {
-                ret.voteNo(((RequestFailure<?, ?>) t).getCause());
-            } else {
-                ret.voteNo(new IllegalStateException("Unhandled response " + t.getClass()));
+        // Precludes startReconnect() from interfering with the fast path
+        synchronized (this) {
+            if (STATE_UPDATER.compareAndSet(this, SEALED, FLUSHED)) {
+                final TransactionRequest<?> req = Verify.verifyNotNull(commitRequest(true));
+
+                sendRequest(req, t -> {
+                    if (t instanceof TransactionCanCommitSuccess) {
+                        ret.voteYes();
+                    } else if (t instanceof RequestFailure) {
+                        ret.voteNo(((RequestFailure<?, ?>) t).getCause().unwrap());
+                    } else {
+                        ret.voteNo(new IllegalStateException("Unhandled response " + t.getClass()));
+                    }
+
+                    recordSuccessfulRequest(req);
+                    LOG.debug("Transaction {} canCommit completed", this);
+                });
+
+                return;
             }
-        });
+        }
+
+        // We have had some interference with successor injection, wait for it to complete and defer to the successor.
+        awaitSuccessor().canCommit(ret);
+    }
+
+    private AbstractProxyTransaction awaitSuccessor() {
+        return getSuccessorState().await();
     }
 
-    void preCommit(final VotingFuture<?> ret) {
+    final void preCommit(final VotingFuture<?> ret) {
+        checkReadWrite();
         checkSealed();
 
-        sendRequest(new TransactionPreCommitRequest(getIdentifier(), nextSequence(), localActor()), t -> {
+        final TransactionRequest<?> req = new TransactionPreCommitRequest(getIdentifier(), nextSequence(),
+            localActor());
+        sendRequest(req, t -> {
             if (t instanceof TransactionPreCommitSuccess) {
                 ret.voteYes();
             } else if (t instanceof RequestFailure) {
-                ret.voteNo(((RequestFailure<?, ?>) t).getCause());
+                ret.voteNo(((RequestFailure<?, ?>) t).getCause().unwrap());
             } else {
                 ret.voteNo(new IllegalStateException("Unhandled response " + t.getClass()));
             }
+
+            onPreCommitComplete(req);
         });
     }
 
-    void doCommit(final VotingFuture<?> ret) {
+    private void onPreCommitComplete(final TransactionRequest<?> req) {
+        /*
+         * The backend has agreed that the transaction has entered PRE_COMMIT phase, meaning it will be committed
+         * to storage after the timeout completes.
+         *
+         * All state has been replicated to the backend, hence we do not need to keep it around. Retain only
+         * the precommit request, so we know which request to use for resync.
+         */
+        LOG.debug("Transaction {} preCommit completed, clearing successfulRequests", this);
+        successfulRequests.clear();
+
+        // TODO: this works, but can contain some useless state (like batched operations). Create an empty
+        //       equivalent of this request and store that.
+        recordSuccessfulRequest(req);
+    }
+
+    final void doCommit(final VotingFuture<?> ret) {
+        checkReadWrite();
         checkSealed();
 
         sendRequest(new TransactionDoCommitRequest(getIdentifier(), nextSequence(), localActor()), t -> {
             if (t instanceof TransactionCommitSuccess) {
                 ret.voteYes();
             } else if (t instanceof RequestFailure) {
-                ret.voteNo(((RequestFailure<?, ?>) t).getCause());
+                ret.voteNo(((RequestFailure<?, ?>) t).getCause().unwrap());
             } else {
                 ret.voteNo(new IllegalStateException("Unhandled response " + t.getClass()));
             }
+
+            LOG.debug("Transaction {} doCommit completed", this);
+
+            // Needed for ProxyHistory$Local data tree rebase points.
+            parent.completeTransaction(this);
+
+            enqueuePurge();
         });
     }
 
-    abstract TransactionRequest<?> doCommit(boolean coordinated);
+    private void enqueuePurge() {
+        enqueuePurge(null);
+    }
+
+    final void enqueuePurge(final Consumer<Response<?, ?>> callback) {
+        // Purge request are dispatched internally, hence should not wait
+        enqueuePurge(callback, parent.currentTime());
+    }
+
+    final void enqueuePurge(final Consumer<Response<?, ?>> callback, final long enqueuedTicks) {
+        LOG.debug("{}: initiating purge", this);
+
+        final State prev = state;
+        if (prev instanceof SuccessorState) {
+            ((SuccessorState) prev).setDone();
+        } else {
+            final boolean success = STATE_UPDATER.compareAndSet(this, prev, DONE);
+            if (!success) {
+                LOG.warn("{}: moved from state {} while we were purging it", this, prev);
+            }
+        }
+
+        successfulRequests.clear();
+
+        enqueueRequest(new TransactionPurgeRequest(getIdentifier(), nextSequence(), localActor()), resp -> {
+            LOG.debug("{}: purge completed", this);
+            parent.purgeTransaction(this);
+
+            if (callback != null) {
+                callback.accept(resp);
+            }
+        }, enqueuedTicks);
+    }
+
+    // Called with the connection unlocked
+    final synchronized void startReconnect() {
+        // At this point canCommit/directCommit are blocked, we assert a new successor state, retrieving the previous
+        // state. This method is called with the queue still unlocked.
+        final SuccessorState nextState = new SuccessorState();
+        final State prevState = STATE_UPDATER.getAndSet(this, nextState);
+
+        LOG.debug("Start reconnect of proxy {} previous state {}", this, prevState);
+        Verify.verify(!(prevState instanceof SuccessorState), "Proxy %s duplicate reconnect attempt after %s", this,
+            prevState);
+
+        // We have asserted a slow-path state, seal(), canCommit(), directCommit() are forced to slow paths, which will
+        // wait until we unblock nextState's latch before accessing state. Now we record prevState for later use and we
+        // are done.
+        nextState.setPrevState(prevState);
+    }
+
+    // Called with the connection locked
+    final void replayMessages(final ProxyHistory successorHistory, final Iterable<ConnectionEntry> enqueuedEntries) {
+        final SuccessorState local = getSuccessorState();
+        final State prevState = local.getPrevState();
+
+        final AbstractProxyTransaction successor = successorHistory.createTransactionProxy(getIdentifier(),
+            isSnapshotOnly(), local.isDone());
+        LOG.debug("{} created successor {}", this, successor);
+        local.setSuccessor(successor);
+
+        // Replay successful requests first
+        if (!successfulRequests.isEmpty()) {
+            // We need to find a good timestamp to use for successful requests, as we do not want to time them out
+            // nor create timing inconsistencies in the queue -- requests are expected to be ordered by their enqueue
+            // time. We will pick the time of the first entry available. If there is none, we will just use current
+            // time, as all other requests will get enqueued afterwards.
+            final ConnectionEntry firstInQueue = Iterables.getFirst(enqueuedEntries, null);
+            final long now = firstInQueue != null ? firstInQueue.getEnqueuedTicks() : parent.currentTime();
+
+            for (Object obj : successfulRequests) {
+                if (obj instanceof TransactionRequest) {
+                    LOG.debug("Forwarding successful request {} to successor {}", obj, successor);
+                    successor.doReplayRequest((TransactionRequest<?>) obj, resp -> { }, now);
+                } else {
+                    Verify.verify(obj instanceof IncrementSequence);
+                    final IncrementSequence increment = (IncrementSequence) obj;
+                    successor.doReplayRequest(new IncrementTransactionSequenceRequest(getIdentifier(),
+                        increment.getSequence(), localActor(), isSnapshotOnly(), increment.getDelta()), resp -> { },
+                        now);
+                    LOG.debug("Incrementing sequence {} to successor {}", obj, successor);
+                }
+            }
+            LOG.debug("{} replayed {} successful requests", getIdentifier(), successfulRequests.size());
+            successfulRequests.clear();
+        }
+
+        // Now replay whatever is in the connection
+        final Iterator<ConnectionEntry> it = enqueuedEntries.iterator();
+        while (it.hasNext()) {
+            final ConnectionEntry e = it.next();
+            final Request<?, ?> req = e.getRequest();
+
+            if (getIdentifier().equals(req.getTarget())) {
+                Verify.verify(req instanceof TransactionRequest, "Unhandled request %s", req);
+                LOG.debug("Replaying queued request {} to successor {}", req, successor);
+                successor.doReplayRequest((TransactionRequest<?>) req, e.getCallback(), e.getEnqueuedTicks());
+                it.remove();
+            }
+        }
+
+        /*
+         * Check the state at which we have started the reconnect attempt. State transitions triggered while we were
+         * reconnecting have been forced to slow paths, which will be unlocked once we unblock the state latch
+         * at the end of this method.
+         */
+        if (SEALED.equals(prevState)) {
+            LOG.debug("Proxy {} reconnected while being sealed, propagating state to successor {}", this, successor);
+            flushState(successor);
+            if (successor.markSealed()) {
+                successor.sealAndSend(Optional.of(parent.currentTime()));
+            }
+        }
+    }
+
+    /**
+     * Invoked from {@link #replayMessages(AbstractProxyTransaction, Iterable)} to have successor adopt an in-flight
+     * request.
+     *
+     * <p>
+     * Note: this method is invoked by the predecessor on the successor.
+     *
+     * @param request Request which needs to be forwarded
+     * @param callback Callback to be invoked once the request completes
+     * @param enqueuedTicks ticker-based time stamp when the request was enqueued
+     */
+    private void doReplayRequest(final TransactionRequest<?> request, final Consumer<Response<?, ?>> callback,
+            final long enqueuedTicks) {
+        if (request instanceof AbstractLocalTransactionRequest) {
+            handleReplayedLocalRequest((AbstractLocalTransactionRequest<?>) request, callback, enqueuedTicks);
+        } else {
+            handleReplayedRemoteRequest(request, callback, enqueuedTicks);
+        }
+    }
+
+    // Called with the connection locked
+    final void finishReconnect() {
+        final SuccessorState local = getSuccessorState();
+        LOG.debug("Finishing reconnect of proxy {}", this);
+
+        // All done, release the latch, unblocking seal() and canCommit() slow paths
+        local.finish();
+    }
+
+    /**
+     * Invoked from a retired connection for requests which have been in-flight and need to be re-adjusted
+     * and forwarded to the successor connection.
+     *
+     * @param request Request to be forwarded
+     * @param callback Original callback
+     */
+    final void forwardRequest(final TransactionRequest<?> request, final Consumer<Response<?, ?>> callback) {
+        forwardToSuccessor(getSuccessorState().getSuccessor(), request, callback);
+    }
+
+    final void forwardToSuccessor(final AbstractProxyTransaction successor, final TransactionRequest<?> request,
+            final Consumer<Response<?, ?>> callback) {
+        if (successor instanceof LocalProxyTransaction) {
+            forwardToLocal((LocalProxyTransaction)successor, request, callback);
+        } else if (successor instanceof RemoteProxyTransaction) {
+            forwardToRemote((RemoteProxyTransaction)successor, request, callback);
+        } else {
+            throw new IllegalStateException("Unhandled successor " + successor);
+        }
+    }
+
+    final void replayRequest(final TransactionRequest<?> request, final Consumer<Response<?, ?>> callback,
+            final long enqueuedTicks) {
+        getSuccessorState().getSuccessor().doReplayRequest(request, callback, enqueuedTicks);
+    }
+
+    abstract boolean isSnapshotOnly();
+
+    abstract void doDelete(YangInstanceIdentifier path);
 
-    abstract void doDelete(final YangInstanceIdentifier path);
+    abstract void doMerge(YangInstanceIdentifier path, NormalizedNode<?, ?> data);
 
-    abstract void doMerge(final YangInstanceIdentifier path, final NormalizedNode<?, ?> data);
+    abstract void doWrite(YangInstanceIdentifier path, NormalizedNode<?, ?> data);
 
-    abstract void doWrite(final YangInstanceIdentifier path, final NormalizedNode<?, ?> data);
+    abstract CheckedFuture<Boolean, ReadFailedException> doExists(YangInstanceIdentifier path);
 
-    abstract CheckedFuture<Boolean, ReadFailedException> doExists(final YangInstanceIdentifier path);
+    abstract CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> doRead(YangInstanceIdentifier path);
 
-    abstract CheckedFuture<Optional<NormalizedNode<?, ?>>, ReadFailedException> doRead(
-            final YangInstanceIdentifier path);
+    @GuardedBy("this")
+    abstract void flushState(AbstractProxyTransaction successor);
 
-    abstract void doSeal();
+    abstract TransactionRequest<?> abortRequest();
 
-    abstract void doAbort();
+    abstract TransactionRequest<?> commitRequest(boolean coordinated);
+
+    /**
+     * Replay a request originating in this proxy to a successor remote proxy.
+     */
+    abstract void forwardToRemote(RemoteProxyTransaction successor, TransactionRequest<?> request,
+            Consumer<Response<?, ?>> callback);
+
+    /**
+     * Replay a request originating in this proxy to a successor local proxy.
+     */
+    abstract void forwardToLocal(LocalProxyTransaction successor, TransactionRequest<?> request,
+            Consumer<Response<?, ?>> callback);
+
+    /**
+     * Invoked from {@link LocalProxyTransaction} when it replays its successful requests to its successor.
+     *
+     * <p>
+     * Note: this method is invoked by the predecessor on the successor.
+     *
+     * @param request Request which needs to be forwarded
+     * @param callback Callback to be invoked once the request completes
+     * @param enqueuedTicks Time stamp to use for enqueue time
+     */
+    abstract void handleReplayedLocalRequest(AbstractLocalTransactionRequest<?> request,
+            @Nullable Consumer<Response<?, ?>> callback, long enqueuedTicks);
+
+    /**
+     * Invoked from {@link RemoteProxyTransaction} when it replays its successful requests to its successor.
+     *
+     * <p>
+     * Note: this method is invoked by the predecessor on the successor.
+     *
+     * @param request Request which needs to be forwarded
+     * @param callback Callback to be invoked once the request completes
+     * @param enqueuedTicks Time stamp to use for enqueue time
+     */
+    abstract void handleReplayedRemoteRequest(TransactionRequest<?> request,
+            @Nullable Consumer<Response<?, ?>> callback, long enqueuedTicks);
+
+    @Override
+    public final String toString() {
+        return MoreObjects.toStringHelper(this).add("identifier", getIdentifier()).add("state", state).toString();
+    }
 }