From 9045f6fc943e9794055463442dcfa9bce64b68d3 Mon Sep 17 00:00:00 2001 From: Robert Varga Date: Thu, 22 Oct 2020 23:38:04 +0200 Subject: [PATCH] Expose AskTimeoutException counter from DatastoreInfoMXBean This is a natural place to keep track of all exceptions which happen in datastore context. Hook an onComplete() to each future which goes by and update the counter. JIRA: CONTROLLER-1963 Change-Id: Ib208b377e1ab533993597660228b5fc87919e75b Signed-off-by: Robert Varga --- .../jmx/mbeans/DatastoreInfoMXBean.java | 15 ++++++ .../jmx/mbeans/DatastoreInfoMXBeanImpl.java | 14 +++-- .../cluster/datastore/utils/ActorUtils.java | 53 ++++++++++++++++--- 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBean.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBean.java index d393d64601..53bc70cb80 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBean.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBean.java @@ -7,11 +7,26 @@ */ package org.opendaylight.controller.cluster.datastore.jmx.mbeans; +import akka.pattern.AskTimeoutException; + /** * JMX bean for general datastore info. * * @author Thomas Pantelis */ public interface DatastoreInfoMXBean { + double getTransactionCreationRateLimit(); + + /** + * Return the number of {@link AskTimeoutException}s encountered by the datastore. + * + * @return Number of exceptions encountered + */ + long getAskTimeoutExceptionCount(); + + /** + * Reset the number of {@link AskTimeoutException}s encountered by the datastore. + */ + void resetAskTimeoutExceptionCount(); } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBeanImpl.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBeanImpl.java index a2bf871d10..00ecb72703 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBeanImpl.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/jmx/mbeans/DatastoreInfoMXBeanImpl.java @@ -16,17 +16,25 @@ import org.opendaylight.controller.md.sal.common.util.jmx.AbstractMXBean; * @author Thomas Pantelis */ public class DatastoreInfoMXBeanImpl extends AbstractMXBean implements DatastoreInfoMXBean { - private final ActorUtils actorUtils; - public DatastoreInfoMXBeanImpl(String mxBeanType, ActorUtils actorUtils) { + public DatastoreInfoMXBeanImpl(final String mxBeanType, final ActorUtils actorUtils) { super("GeneralRuntimeInfo", mxBeanType, null); this.actorUtils = actorUtils; } - @Override public double getTransactionCreationRateLimit() { return actorUtils.getTxCreationLimit(); } + + @Override + public long getAskTimeoutExceptionCount() { + return actorUtils.getAskTimeoutExceptionCount(); + } + + @Override + public void resetAskTimeoutExceptionCount() { + actorUtils.resetAskTimeoutExceptionCount(); + } } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/utils/ActorUtils.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/utils/ActorUtils.java index aaf3d33db0..fcfa9296ae 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/utils/ActorUtils.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/utils/ActorUtils.java @@ -26,6 +26,7 @@ import com.google.common.base.Strings; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.LongAdder; import java.util.function.Function; import org.opendaylight.controller.cluster.access.concepts.MemberName; import org.opendaylight.controller.cluster.common.actor.Dispatchers; @@ -67,11 +68,40 @@ import scala.concurrent.duration.FiniteDuration; * not be passed to actors especially remote actors. */ public class ActorUtils { + private static final class AskTimeoutCounter extends OnComplete implements ExecutionContext { + private LongAdder ateExceptions = new LongAdder(); + + @Override + public void onComplete(final Throwable failure, final Object success) throws Throwable { + if (failure instanceof AskTimeoutException) { + ateExceptions.increment(); + } + } + + void reset() { + ateExceptions = new LongAdder(); + } + + long sum() { + return ateExceptions.sum(); + } + + @Override + public void execute(final Runnable runnable) { + // Yes, we are this ugly, but then we are just doing a check + an increment + runnable.run(); + } + + @Override + public void reportFailure(final Throwable cause) { + LOG.warn("Unexpected failure updating counters", cause); + } + } + private static final Logger LOG = LoggerFactory.getLogger(ActorUtils.class); private static final String DISTRIBUTED_DATA_STORE_METRIC_REGISTRY = "distributed-data-store"; private static final String METRIC_RATE = "rate"; - private static final Mapper FIND_PRIMARY_FAILURE_TRANSFORMER = - new Mapper<>() { + private static final Mapper FIND_PRIMARY_FAILURE_TRANSFORMER = new Mapper<>() { @Override public Throwable apply(final Throwable failure) { Throwable actualFailure = failure; @@ -88,18 +118,20 @@ public class ActorUtils { public static final String BOUNDED_MAILBOX = "bounded-mailbox"; public static final String COMMIT = "commit"; + private final AskTimeoutCounter askTimeoutCounter = new AskTimeoutCounter(); private final ActorSystem actorSystem; private final ActorRef shardManager; private final ClusterWrapper clusterWrapper; private final Configuration configuration; + private final String selfAddressHostPort; + private final Dispatchers dispatchers; + private DatastoreContext datastoreContext; private FiniteDuration operationDuration; private Timeout operationTimeout; - private final String selfAddressHostPort; private TransactionRateLimiter txRateLimiter; private Timeout transactionCommitOperationTimeout; private Timeout shardInitializationTimeout; - private final Dispatchers dispatchers; private volatile EffectiveModelContext schemaContext; @@ -142,7 +174,6 @@ public class ActorUtils { } else { selfAddressHostPort = null; } - } private void setCachedProperties() { @@ -503,6 +534,14 @@ public class ActorUtils { return txRateLimiter.getTxCreationLimit(); } + public long getAskTimeoutExceptionCount() { + return askTimeoutCounter.sum(); + } + + public void resetAskTimeoutExceptionCount() { + askTimeoutCounter.reset(); + } + /** * Try to acquire a transaction creation permit. Will block if no permits are available. */ @@ -546,7 +585,9 @@ public class ActorUtils { } protected Future doAsk(final ActorSelection actorRef, final Object message, final Timeout timeout) { - return ask(actorRef, message, timeout); + final Future ret = ask(actorRef, message, timeout); + ret.onComplete(askTimeoutCounter, askTimeoutCounter); + return ret; } public PrimaryShardInfoFutureCache getPrimaryShardInfoCache() { -- 2.36.6