Expose AskTimeoutException counter from DatastoreInfoMXBean 83/93283/1
authorRobert Varga <robert.varga@pantheon.tech>
Thu, 22 Oct 2020 21:38:04 +0000 (23:38 +0200)
committerRobert Varga <robert.varga@pantheon.tech>
Thu, 22 Oct 2020 21:48:50 +0000 (23:48 +0200)
This is a natural place to keep track of all exceptions which
happen in datastore context. Hook an onComplete() to each future
which goes by and update the counter.

JIRA: CONTROLLER-1963
Change-Id: Ib208b377e1ab533993597660228b5fc87919e75b
Signed-off-by: Robert Varga <robert.varga@pantheon.tech>
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBean.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBeanImpl.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/utils/ActorUtils.java

index d393d64601874be47915f1cfba38b782089e8d1e..53bc70cb8091b28a0aa117dc439861dfc01e133d 100644 (file)
@@ -7,11 +7,26 @@
  */
 package org.opendaylight.controller.cluster.datastore.jmx.mbeans;
 
+import akka.pattern.AskTimeoutException;
+
 /**
  * JMX bean for general datastore info.
  *
  * @author Thomas Pantelis
  */
 public interface DatastoreInfoMXBean {
+
     double getTransactionCreationRateLimit();
+
+    /**
+     * Return the number of {@link AskTimeoutException}s encountered by the datastore.
+     *
+     * @return Number of exceptions encountered
+     */
+    long getAskTimeoutExceptionCount();
+
+    /**
+     * Reset the number of {@link AskTimeoutException}s encountered by the datastore.
+     */
+    void resetAskTimeoutExceptionCount();
 }
index a2bf871d10619993df37da573fc5ffed4cc711be..00ecb72703710fac9fe54598f1599f2a3dc873b0 100644 (file)
@@ -16,17 +16,25 @@ import org.opendaylight.controller.md.sal.common.util.jmx.AbstractMXBean;
  * @author Thomas Pantelis
  */
 public class DatastoreInfoMXBeanImpl extends AbstractMXBean implements DatastoreInfoMXBean {
-
     private final ActorUtils actorUtils;
 
-    public DatastoreInfoMXBeanImpl(String mxBeanType, ActorUtils actorUtils) {
+    public DatastoreInfoMXBeanImpl(final String mxBeanType, final ActorUtils actorUtils) {
         super("GeneralRuntimeInfo", mxBeanType, null);
         this.actorUtils = actorUtils;
     }
 
-
     @Override
     public double getTransactionCreationRateLimit() {
         return actorUtils.getTxCreationLimit();
     }
+
+    @Override
+    public long getAskTimeoutExceptionCount() {
+        return actorUtils.getAskTimeoutExceptionCount();
+    }
+
+    @Override
+    public void resetAskTimeoutExceptionCount() {
+        actorUtils.resetAskTimeoutExceptionCount();
+    }
 }
index aaf3d33db078b78a7d944724844612bb04926265..fcfa9296aeb554e0da8057c8168c8d98ac34c06b 100644 (file)
@@ -26,6 +26,7 @@ import com.google.common.base.Strings;
 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.LongAdder;
 import java.util.function.Function;
 import org.opendaylight.controller.cluster.access.concepts.MemberName;
 import org.opendaylight.controller.cluster.common.actor.Dispatchers;
@@ -67,11 +68,40 @@ import scala.concurrent.duration.FiniteDuration;
  * not be passed to actors especially remote actors.
  */
 public class ActorUtils {
+    private static final class AskTimeoutCounter extends OnComplete<Object> implements ExecutionContext {
+        private LongAdder ateExceptions = new LongAdder();
+
+        @Override
+        public void onComplete(final Throwable failure, final Object success) throws Throwable {
+            if (failure instanceof AskTimeoutException) {
+                ateExceptions.increment();
+            }
+        }
+
+        void reset() {
+            ateExceptions = new LongAdder();
+        }
+
+        long sum() {
+            return ateExceptions.sum();
+        }
+
+        @Override
+        public void execute(final Runnable runnable) {
+            // Yes, we are this ugly, but then we are just doing a check + an increment
+            runnable.run();
+        }
+
+        @Override
+        public void reportFailure(final Throwable cause) {
+            LOG.warn("Unexpected failure updating counters", cause);
+        }
+    }
+
     private static final Logger LOG = LoggerFactory.getLogger(ActorUtils.class);
     private static final String DISTRIBUTED_DATA_STORE_METRIC_REGISTRY = "distributed-data-store";
     private static final String METRIC_RATE = "rate";
-    private static final Mapper<Throwable, Throwable> FIND_PRIMARY_FAILURE_TRANSFORMER =
-                                                              new Mapper<>() {
+    private static final Mapper<Throwable, Throwable> FIND_PRIMARY_FAILURE_TRANSFORMER = new Mapper<>() {
         @Override
         public Throwable apply(final Throwable failure) {
             Throwable actualFailure = failure;
@@ -88,18 +118,20 @@ public class ActorUtils {
     public static final String BOUNDED_MAILBOX = "bounded-mailbox";
     public static final String COMMIT = "commit";
 
+    private final AskTimeoutCounter askTimeoutCounter = new AskTimeoutCounter();
     private final ActorSystem actorSystem;
     private final ActorRef shardManager;
     private final ClusterWrapper clusterWrapper;
     private final Configuration configuration;
+    private final String selfAddressHostPort;
+    private final Dispatchers dispatchers;
+
     private DatastoreContext datastoreContext;
     private FiniteDuration operationDuration;
     private Timeout operationTimeout;
-    private final String selfAddressHostPort;
     private TransactionRateLimiter txRateLimiter;
     private Timeout transactionCommitOperationTimeout;
     private Timeout shardInitializationTimeout;
-    private final Dispatchers dispatchers;
 
     private volatile EffectiveModelContext schemaContext;
 
@@ -142,7 +174,6 @@ public class ActorUtils {
         } else {
             selfAddressHostPort = null;
         }
-
     }
 
     private void setCachedProperties() {
@@ -503,6 +534,14 @@ public class ActorUtils {
         return txRateLimiter.getTxCreationLimit();
     }
 
+    public long getAskTimeoutExceptionCount() {
+        return askTimeoutCounter.sum();
+    }
+
+    public void resetAskTimeoutExceptionCount() {
+        askTimeoutCounter.reset();
+    }
+
     /**
      * Try to acquire a transaction creation permit. Will block if no permits are available.
      */
@@ -546,7 +585,9 @@ public class ActorUtils {
     }
 
     protected Future<Object> doAsk(final ActorSelection actorRef, final Object message, final Timeout timeout) {
-        return ask(actorRef, message, timeout);
+        final Future<Object> ret = ask(actorRef, message, timeout);
+        ret.onComplete(askTimeoutCounter, askTimeoutCounter);
+        return ret;
     }
 
     public PrimaryShardInfoFutureCache getPrimaryShardInfoCache() {