BUG-5280: Fix deadlock with TransmitQueue
[controller.git] / opendaylight / md-sal / cds-access-client / src / main / java / org / opendaylight / controller / cluster / access / client / AbstractClientConnection.java
index 0e9382dbba8a644c9317c8518ae05ed8c179b02f..7dc150e403dc283a2181aad081fb1757afc690ea 100644 (file)
@@ -10,12 +10,11 @@ package org.opendaylight.controller.cluster.access.client;
 import akka.actor.ActorRef;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
-import com.google.common.base.Verify;
 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
-import java.util.ArrayDeque;
 import java.util.Optional;
-import java.util.Queue;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Consumer;
 import javax.annotation.Nonnull;
 import javax.annotation.concurrent.GuardedBy;
@@ -45,30 +44,32 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
     @VisibleForTesting
     static final long REQUEST_TIMEOUT_NANOS = TimeUnit.SECONDS.toNanos(30);
 
-    private final Queue<ConnectionEntry> pending;
+    private final Lock lock = new ReentrantLock();
     private final ClientActorContext context;
+    @GuardedBy("lock")
+    private final TransmitQueue queue;
     private final Long cookie;
 
-    private volatile ReconnectForwarder successor;
-    private volatile RequestException poisoned;
+    // Updated from actor thread only
     private long lastProgress;
 
-    private AbstractClientConnection(final ClientActorContext context, final Long cookie,
-            final Queue<ConnectionEntry> pending) {
+    private volatile RequestException poisoned;
+
+    // Do not allow subclassing outside of this package
+    AbstractClientConnection(final ClientActorContext context, final Long cookie,
+            final TransmitQueue queue) {
         this.context = Preconditions.checkNotNull(context);
         this.cookie = Preconditions.checkNotNull(cookie);
-        this.pending = Preconditions.checkNotNull(pending);
+        this.queue = Preconditions.checkNotNull(queue);
         this.lastProgress = readTime();
     }
 
-    // Do not allow subclassing outside of this package
-    AbstractClientConnection(final ClientActorContext context, final Long cookie) {
-        this(context, cookie, new ArrayDeque<>(1));
-    }
-
     // Do not allow subclassing outside of this package
     AbstractClientConnection(final AbstractClientConnection<T> oldConnection) {
-        this(oldConnection.context, oldConnection.cookie, oldConnection.pending);
+        this.context = oldConnection.context;
+        this.cookie = oldConnection.cookie;
+        this.lastProgress = oldConnection.lastProgress;
+        this.queue = new TransmitQueue.Halted();
     }
 
     public final ClientActorContext context() {
@@ -83,14 +84,6 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
         return context.self();
     }
 
-    final long readTime() {
-        return context.ticker().read();
-    }
-
-    final Queue<ConnectionEntry> pending() {
-        return pending;
-    }
-
     /**
      * Send a request to the backend and invoke a specified callback when it finishes. This method is safe to invoke
      * from any thread.
@@ -99,51 +92,53 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      * @param callback Callback to invoke
      */
     public final void sendRequest(final Request<?, ?> request, final Consumer<Response<?, ?>> callback) {
-        Preconditions.checkState(poisoned == null, "Connection %s has been poisoned", this);
-
-        final ReconnectForwarder beforeQueue = successor;
-        final ConnectionEntry entry = new ConnectionEntry(request, callback, readTime());
-        if (beforeQueue != null) {
-            LOG.trace("Forwarding entry {} from {} to {}", entry, this, beforeQueue);
-            beforeQueue.forwardEntry(entry);
-            return;
+        final RequestException maybePoison = poisoned;
+        if (maybePoison != null) {
+            throw new IllegalStateException("Connection " + this + " has been poisoned", maybePoison);
         }
 
-        enqueueEntry(entry);
+        final ConnectionEntry entry = new ConnectionEntry(request, callback, readTime());
 
-        final ReconnectForwarder afterQueue = successor;
-        if (afterQueue != null) {
-            synchronized (this) {
-                spliceToSuccessor(afterQueue);
-            }
+        lock.lock();
+        try {
+            queue.enqueue(entry, entry.getEnqueuedTicks());
+        } finally {
+            lock.unlock();
         }
     }
 
-    public final synchronized void setForwarder(final ReconnectForwarder forwarder) {
-        Verify.verify(successor == null, "Successor {} already set on connection {}", successor, this);
-        successor = Preconditions.checkNotNull(forwarder);
-        LOG.debug("Connection {} superseded by {}, splicing queue", this, successor);
-        spliceToSuccessor(forwarder);
+    public abstract Optional<T> getBackendInfo();
+
+    final Iterable<ConnectionEntry> startReplay() {
+        lock.lock();
+        return queue.asIterable();
     }
 
-    public abstract Optional<T> getBackendInfo();
+    @GuardedBy("lock")
+    final void finishReplay(final ReconnectForwarder forwarder) {
+        queue.setForwarder(forwarder, readTime());
+        lock.unlock();
+    }
 
-    @GuardedBy("this")
-    void spliceToSuccessor(final ReconnectForwarder successor) {
-        ConnectionEntry entry = pending.poll();
-        while (entry != null) {
-            successor.forwardEntry(entry);
-            entry = pending.poll();
-        }
+    @GuardedBy("lock")
+    final void setForwarder(final ReconnectForwarder forwarder) {
+        queue.setForwarder(forwarder, readTime());
     }
 
-    final ConnectionEntry dequeEntry() {
-        lastProgress = readTime();
-        return pending.poll();
+    @GuardedBy("lock")
+    abstract ClientActorBehavior<T> reconnectConnection(ClientActorBehavior<T> current);
+
+    private long readTime() {
+        return context.ticker().read();
     }
 
-    void enqueueEntry(final ConnectionEntry entry) {
-        pending.add(entry);
+    final void enqueueEntry(final ConnectionEntry entry, final long now) {
+        lock.lock();
+        try {
+            queue.enqueue(entry, now);
+        } finally {
+            lock.unlock();
+        }
     }
 
     /**
@@ -165,27 +160,33 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      */
     @VisibleForTesting
     final ClientActorBehavior<T> runTimer(final ClientActorBehavior<T> current) {
-        final long now = readTime();
-
-        if (!isEmpty()) {
-            final long ticksSinceProgress = now - lastProgress;
-            if (ticksSinceProgress >= NO_PROGRESS_TIMEOUT_NANOS) {
-                LOG.error("Queue {} has not seen progress in {} seconds, failing all requests", this,
-                    TimeUnit.NANOSECONDS.toSeconds(ticksSinceProgress));
-
-                poison(new NoProgressException(ticksSinceProgress));
-                current.removeConnection(this);
-                return current;
+        final Optional<FiniteDuration> delay;
+
+        lock.lock();
+        try {
+            final long now = readTime();
+            if (!queue.isEmpty()) {
+                final long ticksSinceProgress = now - lastProgress;
+                if (ticksSinceProgress >= NO_PROGRESS_TIMEOUT_NANOS) {
+                    LOG.error("Queue {} has not seen progress in {} seconds, failing all requests", this,
+                        TimeUnit.NANOSECONDS.toSeconds(ticksSinceProgress));
+
+                    lockedPoison(new NoProgressException(ticksSinceProgress));
+                    current.removeConnection(this);
+                    return current;
+                }
             }
-        }
 
-        // Requests are always scheduled in sequence, hence checking for timeout is relatively straightforward.
-        // Note we use also inquire about the delay, so we can re-schedule if needed, hence the unusual tri-state
-        // return convention.
-        final Optional<FiniteDuration> delay = checkTimeout(now);
-        if (delay == null) {
-            // We have timed out. There is no point in scheduling a timer
-            return reconnectConnection(current);
+            // Requests are always scheduled in sequence, hence checking for timeout is relatively straightforward.
+            // Note we use also inquire about the delay, so we can re-schedule if needed, hence the unusual tri-state
+            // return convention.
+            delay = lockedCheckTimeout(now);
+            if (delay == null) {
+                // We have timed out. There is no point in scheduling a timer
+                return reconnectConnection(current);
+            }
+        } finally {
+            lock.unlock();
         }
 
         if (delay.isPresent()) {
@@ -196,8 +197,14 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
         return current;
     }
 
-    boolean isEmpty() {
-        return pending.isEmpty();
+    @VisibleForTesting
+    final Optional<FiniteDuration> checkTimeout(final long now) {
+        lock.lock();
+        try {
+            return lockedCheckTimeout(now);
+        } finally {
+            lock.unlock();
+        }
     }
 
     /*
@@ -208,7 +215,9 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      */
     @SuppressFBWarnings(value = "NP_OPTIONAL_RETURN_NULL",
             justification = "Returning null Optional is documented in the API contract.")
-    final Optional<FiniteDuration> checkTimeout(final ConnectionEntry head, final long now) {
+    @GuardedBy("lock")
+    private Optional<FiniteDuration> lockedCheckTimeout(final long now) {
+        final ConnectionEntry head = queue.peek();
         if (head == null) {
             return Optional.empty();
         }
@@ -222,30 +231,19 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
         return Optional.of(FiniteDuration.apply(delay, TimeUnit.NANOSECONDS));
     }
 
-    /*
-     * We are using tri-state return here to indicate one of three conditions:
-     * - if there is no timeout to schedule, return Optional.empty()
-     * - if there is a timeout to schedule, return a non-empty optional
-     * - if this connections has timed out, return null
-     */
-    @SuppressFBWarnings(value = "NP_OPTIONAL_RETURN_NULL",
-            justification = "Returning null Optional is documented in the API contract.")
-    Optional<FiniteDuration> checkTimeout(final long now) {
-        return checkTimeout(pending.peek(), now);
-    }
-
-    static void poisonQueue(final Queue<? extends ConnectionEntry> queue, final RequestException cause) {
-        for (ConnectionEntry e : queue) {
-            final Request<?, ?> request = e.getRequest();
-            LOG.trace("Poisoning request {}", request, cause);
-            e.complete(request.toRequestFailure(cause));
+    final void poison(final RequestException cause) {
+        lock.lock();
+        try {
+            lockedPoison(cause);
+        } finally {
+            lock.unlock();
         }
-        queue.clear();
     }
 
-    void poison(final RequestException cause) {
+    @GuardedBy("lock")
+    private void lockedPoison(final RequestException cause) {
         poisoned = cause;
-        poisonQueue(pending, cause);
+        queue.poison(cause);
     }
 
     @VisibleForTesting
@@ -253,7 +251,23 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
         return poisoned;
     }
 
-    abstract ClientActorBehavior<T> reconnectConnection(ClientActorBehavior<T> current);
+    final void receiveResponse(final ResponseEnvelope<?> envelope) {
+        final long now = readTime();
 
-    abstract void receiveResponse(final ResponseEnvelope<?> envelope);
+        final Optional<TransmittedConnectionEntry> maybeEntry;
+        lock.lock();
+        try {
+            maybeEntry = queue.complete(envelope, now);
+        } finally {
+            lock.unlock();
+        }
+
+        if (maybeEntry.isPresent()) {
+            final TransmittedConnectionEntry entry = maybeEntry.get();
+            LOG.debug("Completing {} with {}", entry, envelope);
+            entry.complete(envelope.getMessage());
+        }
+
+        lastProgress = readTime();
+    }
 }