From: Tom Pantelis Date: Mon, 4 Apr 2016 05:40:06 +0000 (-0400) Subject: Fix ApplyState elapsed time check X-Git-Tag: release/boron~244 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=commitdiff_plain;h=57d7e4788a488d992b9868d44ebc392b06e317c5 Fix ApplyState elapsed time check On ApplyState, there's a check if the elapsed time exceeds a 50ms threshold and it logs a warning. However the start time is captured when the message is created prior to queueing. So if there's many ApplyState or other messages already queued, the elapsed time also includes the time spent in the queue, ie as a side effect includes the cumulative processing time of each prior message in the queue. When a follower starts up, there can be hundreds to thousands of catchup ApplyState messages and, eventually, the cumulative processing times can add up to more than 50 ms, in which case every subsequent ApplyState message trips the threshold with increasing elapsed times, even though none of them actually took 50 ms to process. Seeing hundreds to thousands of warnings with misleading elapsed times looks ominous and leads users to think something is wrong. Therefore I changed it to capture the start time just prior to calling applyState so it captures just the processing time for that message. I also removed the startTime field from ApplyState. This class is Serializable but it is only ever sent locally to self and is never serialized so there's no backwards compatibility concerns. Change-Id: I9493734b5307d6dd5d723e5fe416ba97915dfc63 Signed-off-by: Tom Pantelis --- diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java index a5b5d2b81a..1c1f9114ae 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java @@ -217,11 +217,7 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor { if (message instanceof ApplyState) { ApplyState applyState = (ApplyState) message; - long elapsedTime = (System.nanoTime() - applyState.getStartTime()); - if(elapsedTime >= APPLY_STATE_DELAY_THRESHOLD_IN_NANOS){ - LOG.warn("ApplyState took more time than expected. Elapsed Time = {} ms ApplyState = {}", - TimeUnit.NANOSECONDS.toMillis(elapsedTime), applyState); - } + long startTime = System.nanoTime(); if(LOG.isDebugEnabled()) { LOG.debug("{}: Applying state for log index {} data {}", @@ -232,6 +228,12 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor { applyState(applyState.getClientActor(), applyState.getIdentifier(), applyState.getReplicatedLogEntry().getData()); + long elapsedTime = System.nanoTime() - startTime; + if(elapsedTime >= APPLY_STATE_DELAY_THRESHOLD_IN_NANOS){ + LOG.debug("ApplyState took more time than expected. Elapsed Time = {} ms ApplyState = {}", + TimeUnit.NANOSECONDS.toMillis(elapsedTime), applyState); + } + if (!hasFollowers()) { // for single node, the capture should happen after the apply state // as we delete messages from the persistent journal which have made it to the snapshot diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/ApplyState.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/ApplyState.java index 9299e752d1..9477eb2c52 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/ApplyState.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/ApplyState.java @@ -17,14 +17,12 @@ public class ApplyState implements Serializable { private final ActorRef clientActor; private final String identifier; private final ReplicatedLogEntry replicatedLogEntry; - private final long startTime; public ApplyState(ActorRef clientActor, String identifier, ReplicatedLogEntry replicatedLogEntry) { this.clientActor = clientActor; this.identifier = identifier; this.replicatedLogEntry = replicatedLogEntry; - this.startTime = System.nanoTime(); } public ActorRef getClientActor() { @@ -39,16 +37,11 @@ public class ApplyState implements Serializable { return replicatedLogEntry; } - public long getStartTime() { - return startTime; - } - @Override public String toString() { return "ApplyState{" + "identifier='" + identifier + '\'' + ", replicatedLogEntry.index =" + replicatedLogEntry.getIndex() + - ", startTime=" + startTime + '}'; } }