From 5e8721fd675825ec5c9f826aed61c97e22188960 Mon Sep 17 00:00:00 2001 From: Tom Pantelis Date: Thu, 11 Feb 2016 03:24:53 -0500 Subject: [PATCH] Fix intermittent LeaderTest/CandidateTest failures The test cases in LeaderTest and CandidateTest have been failing intermittently. A particular test in CandidateTest has recently started failing fairly regularly on jenkins for some reason. The common denominator is that an initial message to an actor isn't received and goes to dead letters instead, even though the actor was just created. This seems related to the use of ActorSelection in the raft behavior classes, I suspect a timing issue where the underlying actor isn't actually created/available yet via actorSelection. I had seen this in the past and attempted to alleviate it by adding a verifyActorReady to TestActorFactory to verify with retries that the actor can be obtained via actorSelection.resolveOne. However it doesn't appear resolveOne works as advertised or maybe a successful call doesn't mean a message will succeed. I changed verifyActorReady to send an Identify message to the actorSelection and verify successful response. On my system LeaderTest would usually fail within 30 test runs. After the change it ran successfully 400 times. Change-Id: I2da7d4a4d14c68810e87fc64b711b5c80608f5d7 Signed-off-by: Tom Pantelis --- .../cluster/raft/TestActorFactory.java | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/TestActorFactory.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/TestActorFactory.java index 4ca018c5e2..0b709aa038 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/TestActorFactory.java +++ b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/TestActorFactory.java @@ -17,16 +17,23 @@ package org.opendaylight.controller.cluster.raft; */ import akka.actor.Actor; +import akka.actor.ActorIdentity; import akka.actor.ActorRef; +import akka.actor.ActorSelection; import akka.actor.ActorSystem; +import akka.actor.Identify; import akka.actor.PoisonPill; import akka.actor.Props; +import akka.pattern.Patterns; import akka.testkit.JavaTestKit; import akka.testkit.TestActorRef; import akka.util.Timeout; +import com.google.common.base.Stopwatch; +import com.google.common.util.concurrent.Uninterruptibles; import java.util.LinkedList; import java.util.List; import java.util.concurrent.TimeUnit; +import org.junit.Assert; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.concurrent.Await; @@ -98,22 +105,26 @@ public class TestActorFactory implements AutoCloseable { private void verifyActorReady(ActorRef actorRef) { // Sometimes we see messages go to dead letters soon after creation - it seems the actor isn't quite // in a state yet to receive messages or isn't actually created yet. This seems to happen with - // actorSelection so, to alleviate it, we use an actorSelection and call resolveOne with retries to - // ensure it's ready. + // actorSelection so, to alleviate it, we use an actorSelection and send an Identify message with + // retries to ensure it's ready. - int tries = 1; - while(true) { + Timeout timeout = new Timeout(100, TimeUnit.MILLISECONDS); + Throwable lastError = null; + Stopwatch sw = Stopwatch.createStarted(); + while(sw.elapsed(TimeUnit.SECONDS) <= 10) { try { - Timeout timeout = new Timeout(100, TimeUnit.MILLISECONDS); - Future future = system.actorSelection(actorRef.path()).resolveOne(timeout); - Await.ready(future, timeout.duration()); - break; - } catch (Exception e) { - if(tries++ > 20) { - throw new RuntimeException(e); - } + ActorSelection actorSelection = system.actorSelection(actorRef.path().toString()); + Future future = Patterns.ask(actorSelection, new Identify(""), timeout); + ActorIdentity reply = (ActorIdentity)Await.result(future, timeout.duration()); + Assert.assertNotNull("Identify returned null", reply.getRef()); + return; + } catch (Exception | AssertionError e) { + Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + lastError = e; } } + + throw new RuntimeException(lastError); } /** -- 2.36.6