BUG-5280: handle NotLeaderException
[controller.git] / opendaylight / md-sal / cds-access-client / src / main / java / org / opendaylight / controller / cluster / access / client / AbstractClientConnection.java
index 0366e7ace2496c0f5edd5433f237ca09db569a01..28d8a1b42228690177ae9498e7c77dcf518558ce 100644 (file)
@@ -44,14 +44,19 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
     @VisibleForTesting
     static final long REQUEST_TIMEOUT_NANOS = TimeUnit.SECONDS.toNanos(30);
 
+    private static final FiniteDuration REQUEST_TIMEOUT_DURATION = FiniteDuration.apply(REQUEST_TIMEOUT_NANOS,
+        TimeUnit.NANOSECONDS);
+
     private final Lock lock = new ReentrantLock();
     private final ClientActorContext context;
     @GuardedBy("lock")
     private final TransmitQueue queue;
     private final Long cookie;
 
+    @GuardedBy("lock")
+    private boolean haveTimer;
+
     private volatile RequestException poisoned;
-    private long lastProgress;
 
     // Do not allow subclassing outside of this package
     AbstractClientConnection(final ClientActorContext context, final Long cookie,
@@ -59,15 +64,13 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
         this.context = Preconditions.checkNotNull(context);
         this.cookie = Preconditions.checkNotNull(cookie);
         this.queue = Preconditions.checkNotNull(queue);
-        this.lastProgress = readTime();
     }
 
     // Do not allow subclassing outside of this package
-    AbstractClientConnection(final AbstractClientConnection<T> oldConnection) {
+    AbstractClientConnection(final AbstractClientConnection<T> oldConnection, final int targetQueueSize) {
         this.context = oldConnection.context;
         this.cookie = oldConnection.cookie;
-        this.lastProgress = oldConnection.lastProgress;
-        this.queue = new TransmitQueue.Halted();
+        this.queue = new TransmitQueue.Halted(targetQueueSize);
     }
 
     public final ClientActorContext context() {
@@ -86,6 +89,9 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      * Send a request to the backend and invoke a specified callback when it finishes. This method is safe to invoke
      * from any thread.
      *
+     * <p>This method may put the caller thread to sleep in order to throttle the request rate.
+     * The callback may be called before the sleep finishes.
+     *
      * @param request Request to send
      * @param callback Callback to invoke
      */
@@ -96,13 +102,7 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
         }
 
         final ConnectionEntry entry = new ConnectionEntry(request, callback, readTime());
-
-        lock.lock();
-        try {
-            queue.enqueue(entry, entry.getEnqueuedTicks());
-        } finally {
-            lock.unlock();
-        }
+        enqueueAndWait(entry, entry.getEnqueuedTicks());
     }
 
     public abstract Optional<T> getBackendInfo();
@@ -114,7 +114,7 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
 
     @GuardedBy("lock")
     final void finishReplay(final ReconnectForwarder forwarder) {
-        queue.setForwarder(forwarder, readTime());
+        setForwarder(forwarder);
         lock.unlock();
     }
 
@@ -124,16 +124,38 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
     }
 
     @GuardedBy("lock")
-    abstract ClientActorBehavior<T> reconnectConnection(ClientActorBehavior<T> current);
+    abstract ClientActorBehavior<T> lockedReconnect(ClientActorBehavior<T> current);
 
     private long readTime() {
         return context.ticker().read();
     }
 
-    final void enqueueEntry(final ConnectionEntry entry, final long now) {
+    final long enqueueEntry(final ConnectionEntry entry, final long now) {
         lock.lock();
         try {
-            queue.enqueue(entry, now);
+            if (queue.isEmpty()) {
+                // The queue is becoming non-empty, schedule a timer
+                scheduleTimer(REQUEST_TIMEOUT_DURATION);
+            }
+            return queue.enqueue(entry, now);
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    final void enqueueAndWait(final ConnectionEntry entry, final long now) {
+        final long delay = enqueueEntry(entry, now);
+        try {
+            TimeUnit.NANOSECONDS.sleep(delay);
+        } catch (InterruptedException e) {
+            LOG.debug("Interrupted while sleeping", e);
+        }
+    }
+
+    final ClientActorBehavior<T> reconnect(final ClientActorBehavior<T> current) {
+        lock.lock();
+        try {
+            return lockedReconnect(current);
         } finally {
             lock.unlock();
         }
@@ -144,9 +166,19 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      *
      * @param delay Delay, in nanoseconds
      */
+    @GuardedBy("lock")
     private void scheduleTimer(final FiniteDuration delay) {
+        if (haveTimer) {
+            LOG.debug("{}: timer already scheduled", context.persistenceId());
+            return;
+        }
+        if (queue.hasSuccessor()) {
+            LOG.debug("{}: connection has successor, not scheduling timer", context.persistenceId());
+            return;
+        }
         LOG.debug("{}: scheduling timeout in {}", context.persistenceId(), delay);
         context.executeInActor(this::runTimer, delay);
+        haveTimer = true;
     }
 
     /**
@@ -162,17 +194,17 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
 
         lock.lock();
         try {
+            haveTimer = false;
             final long now = readTime();
-            if (!queue.isEmpty()) {
-                final long ticksSinceProgress = now - lastProgress;
-                if (ticksSinceProgress >= NO_PROGRESS_TIMEOUT_NANOS) {
-                    LOG.error("Queue {} has not seen progress in {} seconds, failing all requests", this,
-                        TimeUnit.NANOSECONDS.toSeconds(ticksSinceProgress));
-
-                    lockedPoison(new NoProgressException(ticksSinceProgress));
-                    current.removeConnection(this);
-                    return current;
-                }
+            // The following line is only reliable when queue is not forwarding, but such state should not last long.
+            final long ticksSinceProgress = queue.ticksStalling(now);
+            if (ticksSinceProgress >= NO_PROGRESS_TIMEOUT_NANOS) {
+                LOG.error("Queue {} has not seen progress in {} seconds, failing all requests", this,
+                    TimeUnit.NANOSECONDS.toSeconds(ticksSinceProgress));
+
+                lockedPoison(new NoProgressException(ticksSinceProgress));
+                current.removeConnection(this);
+                return current;
             }
 
             // Requests are always scheduled in sequence, hence checking for timeout is relatively straightforward.
@@ -181,17 +213,17 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
             delay = lockedCheckTimeout(now);
             if (delay == null) {
                 // We have timed out. There is no point in scheduling a timer
-                return reconnectConnection(current);
+                return lockedReconnect(current);
+            }
+
+            if (delay.isPresent()) {
+                // If there is new delay, schedule a timer
+                scheduleTimer(delay.get());
             }
         } finally {
             lock.unlock();
         }
 
-        if (delay.isPresent()) {
-            // If there is new delay, schedule a timer
-            scheduleTimer(delay.get());
-        }
-
         return current;
     }
 
@@ -220,13 +252,13 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
             return Optional.empty();
         }
 
-        final long delay = head.getEnqueuedTicks() - now + REQUEST_TIMEOUT_NANOS;
-        if (delay <= 0) {
-            LOG.debug("Connection {} timed out", this);
+        final long beenOpen = now - head.getEnqueuedTicks();
+        if (beenOpen >= REQUEST_TIMEOUT_NANOS) {
+            LOG.debug("Connection {} has a request not completed for {} nanoseconds, timing out", this, beenOpen);
             return null;
         }
 
-        return Optional.of(FiniteDuration.apply(delay, TimeUnit.NANOSECONDS));
+        return Optional.of(FiniteDuration.apply(REQUEST_TIMEOUT_NANOS - beenOpen, TimeUnit.NANOSECONDS));
     }
 
     final void poison(final RequestException cause) {
@@ -252,13 +284,18 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
     final void receiveResponse(final ResponseEnvelope<?> envelope) {
         final long now = readTime();
 
+        final Optional<TransmittedConnectionEntry> maybeEntry;
         lock.lock();
         try {
-            queue.complete(envelope, now);
+            maybeEntry = queue.complete(envelope, now);
         } finally {
             lock.unlock();
         }
 
-        lastProgress = readTime();
+        if (maybeEntry.isPresent()) {
+            final TransmittedConnectionEntry entry = maybeEntry.get();
+            LOG.debug("Completing {} with {}", entry, envelope);
+            entry.complete(envelope.getMessage());
+        }
     }
 }