BUG-5280: handle NotLeaderException
[controller.git] / opendaylight / md-sal / cds-access-client / src / main / java / org / opendaylight / controller / cluster / access / client / AbstractClientConnection.java
index 7dc150e403dc283a2181aad081fb1757afc690ea..28d8a1b42228690177ae9498e7c77dcf518558ce 100644 (file)
@@ -44,14 +44,17 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
     @VisibleForTesting
     static final long REQUEST_TIMEOUT_NANOS = TimeUnit.SECONDS.toNanos(30);
 
+    private static final FiniteDuration REQUEST_TIMEOUT_DURATION = FiniteDuration.apply(REQUEST_TIMEOUT_NANOS,
+        TimeUnit.NANOSECONDS);
+
     private final Lock lock = new ReentrantLock();
     private final ClientActorContext context;
     @GuardedBy("lock")
     private final TransmitQueue queue;
     private final Long cookie;
 
-    // Updated from actor thread only
-    private long lastProgress;
+    @GuardedBy("lock")
+    private boolean haveTimer;
 
     private volatile RequestException poisoned;
 
@@ -61,15 +64,13 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
         this.context = Preconditions.checkNotNull(context);
         this.cookie = Preconditions.checkNotNull(cookie);
         this.queue = Preconditions.checkNotNull(queue);
-        this.lastProgress = readTime();
     }
 
     // Do not allow subclassing outside of this package
-    AbstractClientConnection(final AbstractClientConnection<T> oldConnection) {
+    AbstractClientConnection(final AbstractClientConnection<T> oldConnection, final int targetQueueSize) {
         this.context = oldConnection.context;
         this.cookie = oldConnection.cookie;
-        this.lastProgress = oldConnection.lastProgress;
-        this.queue = new TransmitQueue.Halted();
+        this.queue = new TransmitQueue.Halted(targetQueueSize);
     }
 
     public final ClientActorContext context() {
@@ -88,6 +89,9 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      * Send a request to the backend and invoke a specified callback when it finishes. This method is safe to invoke
      * from any thread.
      *
+     * <p>This method may put the caller thread to sleep in order to throttle the request rate.
+     * The callback may be called before the sleep finishes.
+     *
      * @param request Request to send
      * @param callback Callback to invoke
      */
@@ -98,13 +102,7 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
         }
 
         final ConnectionEntry entry = new ConnectionEntry(request, callback, readTime());
-
-        lock.lock();
-        try {
-            queue.enqueue(entry, entry.getEnqueuedTicks());
-        } finally {
-            lock.unlock();
-        }
+        enqueueAndWait(entry, entry.getEnqueuedTicks());
     }
 
     public abstract Optional<T> getBackendInfo();
@@ -116,7 +114,7 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
 
     @GuardedBy("lock")
     final void finishReplay(final ReconnectForwarder forwarder) {
-        queue.setForwarder(forwarder, readTime());
+        setForwarder(forwarder);
         lock.unlock();
     }
 
@@ -126,16 +124,38 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
     }
 
     @GuardedBy("lock")
-    abstract ClientActorBehavior<T> reconnectConnection(ClientActorBehavior<T> current);
+    abstract ClientActorBehavior<T> lockedReconnect(ClientActorBehavior<T> current);
 
     private long readTime() {
         return context.ticker().read();
     }
 
-    final void enqueueEntry(final ConnectionEntry entry, final long now) {
+    final long enqueueEntry(final ConnectionEntry entry, final long now) {
+        lock.lock();
+        try {
+            if (queue.isEmpty()) {
+                // The queue is becoming non-empty, schedule a timer
+                scheduleTimer(REQUEST_TIMEOUT_DURATION);
+            }
+            return queue.enqueue(entry, now);
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    final void enqueueAndWait(final ConnectionEntry entry, final long now) {
+        final long delay = enqueueEntry(entry, now);
+        try {
+            TimeUnit.NANOSECONDS.sleep(delay);
+        } catch (InterruptedException e) {
+            LOG.debug("Interrupted while sleeping", e);
+        }
+    }
+
+    final ClientActorBehavior<T> reconnect(final ClientActorBehavior<T> current) {
         lock.lock();
         try {
-            queue.enqueue(entry, now);
+            return lockedReconnect(current);
         } finally {
             lock.unlock();
         }
@@ -146,9 +166,19 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      *
      * @param delay Delay, in nanoseconds
      */
+    @GuardedBy("lock")
     private void scheduleTimer(final FiniteDuration delay) {
+        if (haveTimer) {
+            LOG.debug("{}: timer already scheduled", context.persistenceId());
+            return;
+        }
+        if (queue.hasSuccessor()) {
+            LOG.debug("{}: connection has successor, not scheduling timer", context.persistenceId());
+            return;
+        }
         LOG.debug("{}: scheduling timeout in {}", context.persistenceId(), delay);
         context.executeInActor(this::runTimer, delay);
+        haveTimer = true;
     }
 
     /**
@@ -164,17 +194,17 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
 
         lock.lock();
         try {
+            haveTimer = false;
             final long now = readTime();
-            if (!queue.isEmpty()) {
-                final long ticksSinceProgress = now - lastProgress;
-                if (ticksSinceProgress >= NO_PROGRESS_TIMEOUT_NANOS) {
-                    LOG.error("Queue {} has not seen progress in {} seconds, failing all requests", this,
-                        TimeUnit.NANOSECONDS.toSeconds(ticksSinceProgress));
-
-                    lockedPoison(new NoProgressException(ticksSinceProgress));
-                    current.removeConnection(this);
-                    return current;
-                }
+            // The following line is only reliable when queue is not forwarding, but such state should not last long.
+            final long ticksSinceProgress = queue.ticksStalling(now);
+            if (ticksSinceProgress >= NO_PROGRESS_TIMEOUT_NANOS) {
+                LOG.error("Queue {} has not seen progress in {} seconds, failing all requests", this,
+                    TimeUnit.NANOSECONDS.toSeconds(ticksSinceProgress));
+
+                lockedPoison(new NoProgressException(ticksSinceProgress));
+                current.removeConnection(this);
+                return current;
             }
 
             // Requests are always scheduled in sequence, hence checking for timeout is relatively straightforward.
@@ -183,17 +213,17 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
             delay = lockedCheckTimeout(now);
             if (delay == null) {
                 // We have timed out. There is no point in scheduling a timer
-                return reconnectConnection(current);
+                return lockedReconnect(current);
+            }
+
+            if (delay.isPresent()) {
+                // If there is new delay, schedule a timer
+                scheduleTimer(delay.get());
             }
         } finally {
             lock.unlock();
         }
 
-        if (delay.isPresent()) {
-            // If there is new delay, schedule a timer
-            scheduleTimer(delay.get());
-        }
-
         return current;
     }
 
@@ -222,13 +252,13 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
             return Optional.empty();
         }
 
-        final long delay = head.getEnqueuedTicks() - now + REQUEST_TIMEOUT_NANOS;
-        if (delay <= 0) {
-            LOG.debug("Connection {} timed out", this);
+        final long beenOpen = now - head.getEnqueuedTicks();
+        if (beenOpen >= REQUEST_TIMEOUT_NANOS) {
+            LOG.debug("Connection {} has a request not completed for {} nanoseconds, timing out", this, beenOpen);
             return null;
         }
 
-        return Optional.of(FiniteDuration.apply(delay, TimeUnit.NANOSECONDS));
+        return Optional.of(FiniteDuration.apply(REQUEST_TIMEOUT_NANOS - beenOpen, TimeUnit.NANOSECONDS));
     }
 
     final void poison(final RequestException cause) {
@@ -267,7 +297,5 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
             LOG.debug("Completing {} with {}", entry, envelope);
             entry.complete(envelope.getMessage());
         }
-
-        lastProgress = readTime();
     }
 }