BUG-8494: Cap queue sleep time

author Robert Varga <robert.varga@pantheon.tech>

Tue, 30 May 2017 09:41:58 +0000 (11:41 +0200)

committer Tom Pantelis <tompantelis@gmail.com>

Wed, 31 May 2017 14:31:43 +0000 (14:31 +0000)
author Robert Varga <robert.varga@pantheon.tech>
Tue, 30 May 2017 09:41:58 +0000 (11:41 +0200)
committer Tom Pantelis <tompantelis@gmail.com>
Wed, 31 May 2017 14:31:43 +0000 (14:31 +0000)
diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java

index 32becc040d4b594cd5657889e9268ec8469597cf..4425c4336263399ff6818f477c8e0916f0fb5e5b 100644 (file)
--- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java
+++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java
@@ -72,6 +72,10 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      // Emit a debug entry if we sleep for more that this amount
      private static final long DEBUG_DELAY_NANOS = TimeUnit.MILLISECONDS.toNanos(100);
  
+    // Upper bound on the time a thread is forced to sleep to keep queue size under control
+    private static final long MAX_DELAY_SECONDS = 5;
+    private static final long MAX_DELAY_NANOS = TimeUnit.SECONDS.toNanos(MAX_DELAY_SECONDS);
+
      private final Lock lock = new ReentrantLock();
      private final ClientActorContext context;
      @GuardedBy("lock")
@@ -133,10 +137,17 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
       */
      public final void sendRequest(final Request<?, ?> request, final Consumer<Response<?, ?>> callback) {
          final long now = currentTime();
-        final long delay = enqueueEntry(new ConnectionEntry(request, callback, now), now);
+        long delay = enqueueEntry(new ConnectionEntry(request, callback, now), now);
          try {
-            if (delay >= DEBUG_DELAY_NANOS && LOG.isDebugEnabled()) {
-                LOG.debug("Sleeping for {}ms", TimeUnit.NANOSECONDS.toMillis(delay));
+            if (delay >= DEBUG_DELAY_NANOS) {
+                if (delay > MAX_DELAY_NANOS) {
+                    LOG.info("Capping {} throttle delay from {} to {} seconds", this,
+                        TimeUnit.NANOSECONDS.toSeconds(delay), MAX_DELAY_SECONDS);
+                    delay = MAX_DELAY_NANOS;
+                }
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("Sleeping for {}ms", TimeUnit.NANOSECONDS.toMillis(delay));
+                }
              }
              TimeUnit.NANOSECONDS.sleep(delay);
          } catch (InterruptedException e) {
@@ -172,6 +183,20 @@ public abstract class AbstractClientConnection<T extends BackendInfo> {
      @GuardedBy("lock")
      final void finishReplay(final ReconnectForwarder forwarder) {
          setForwarder(forwarder);
+
+        /*
+         * The process of replaying all messages may have taken a significant chunk of time, depending on type
+         * of messages, queue depth and available processing power. In extreme situations this may have already
+         * exceeded BACKEND_ALIVE_TIMEOUT_NANOS, in which case we are running the risk of not making reasonable forward
+         * progress before we start a reconnect cycle.
+         *
+         * Note that the timer is armed after we have sent the first message, hence we should be seeing a response
+         * from the backend before we see a timeout, simply due to how the mailbox operates.
+         *
+         * At any rate, reset the timestamp once we complete reconnection (which an atomic transition from the
+         * perspective of outside world), as that makes it a bit easier to reason about timing of events.
+         */
+        lastReceivedTicks = currentTime();
          lock.unlock();
      }
  
diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java

index 9ba118ed6b9c95a1d5c914a4b681413af4cce95d..554ffe97c77ae7c3011daca2a28c22f660a20e78 100644 (file)
--- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java
+++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java
@@ -9,6 +9,7 @@ package org.opendaylight.controller.cluster.access.client;
  
  import com.google.common.annotations.Beta;
  import com.google.common.base.Preconditions;
+import com.google.common.base.Stopwatch;
  import com.google.common.base.Verify;
  import java.util.Collection;
  import java.util.Map;
@@ -278,6 +279,8 @@ public abstract class ClientActorBehavior<T extends BackendInfo> extends
          LOG.info("{}: resolved shard {} to {}", persistenceId(), shard, backend);
          final long stamp = connectionsLock.writeLock();
          try {
+            final Stopwatch sw = Stopwatch.createStarted();
+
              // Create a new connected connection
              final ConnectedClientConnection<T> newConn = new ConnectedClientConnection<>(conn.context(),
                      conn.cookie(), backend);
@@ -301,7 +304,7 @@ public abstract class ClientActorBehavior<T extends BackendInfo> extends
                  LOG.warn("{}: old connection {} does not match existing {}, new connection {} in limbo",
                      persistenceId(), conn, existing, newConn);
              } else {
-                LOG.info("{}: replaced connection {} with {}", persistenceId(), conn, newConn);
+                LOG.info("{}: replaced connection {} with {} in {}", persistenceId(), conn, newConn, sw);
              }
          } finally {
              connectionsLock.unlockWrite(stamp);
author	Robert Varga <robert.varga@pantheon.tech>
	Tue, 30 May 2017 09:41:58 +0000 (11:41 +0200)
committer	Tom Pantelis <tompantelis@gmail.com>
	Wed, 31 May 2017 14:31:43 +0000 (14:31 +0000)
opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java		patch \| blob \| history
opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java		patch \| blob \| history