Fix intermittent failures in FollowerTest 14/54814/2
authorTom Pantelis <tompantelis@gmail.com>
Tue, 11 Apr 2017 21:39:38 +0000 (17:39 -0400)
committerTom Pantelis <tompantelis@gmail.com>
Fri, 14 Apr 2017 15:00:35 +0000 (15:00 +0000)
FollowerTest.testCaptureSnapshotOnLastEntryInAppendEntries:1152 Persisted journal entries size: [] expected:<1> but was:<0>

The test waits on the deletion of journal entries after the snapshot is saved
to occur and then checks the persistent journal for the remaining
ApplyJournalEntries. But occasionally the persisting of the ApplyJournalEntries
message occurs after the deletion so the assertion fails b/c the
ApplyJournalEntries wasn;t persisted yet. This is a little odd b/c the
sequencing in the raft code is that the ApplyJournalEntries write is done
before the delete so it should also be observed the same way in the
InMemoryJournal, even though it doesn't really matter either way.

To alleviate the problem I added a wait for the ApplyJournalEntries
message in the journal in the 3 similar tests.

I also made a couple other minor changes that I observed while running the
tests.

Change-Id: I67cbb8fd79c91cd1cc23c363b78e7f5e9b9f2bbe
Signed-off-by: Tom Pantelis <tompantelis@gmail.com>
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/TestActorFactory.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractRaftActorBehaviorTest.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/FollowerTest.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/utils/InMemoryJournal.java

index e387584..639436d 100644 (file)
@@ -22,6 +22,7 @@ import akka.actor.ActorRef;
 import akka.actor.ActorSelection;
 import akka.actor.ActorSystem;
 import akka.actor.Identify;
+import akka.actor.InvalidActorNameException;
 import akka.actor.PoisonPill;
 import akka.actor.Props;
 import akka.pattern.Patterns;
@@ -108,8 +109,18 @@ public class TestActorFactory implements AutoCloseable {
      */
     @SuppressWarnings("unchecked")
     public <T extends Actor> TestActorRef<T> createTestActor(Props props, String actorId) {
-        TestActorRef<T> actorRef = TestActorRef.create(system, props, actorId);
-        return (TestActorRef<T>) addActor(actorRef, true);
+        InvalidActorNameException lastError = null;
+        for (int i = 0; i < 10; i++) {
+            try {
+                TestActorRef<T> actorRef = TestActorRef.create(system, props, actorId);
+                return (TestActorRef<T>) addActor(actorRef, true);
+            } catch (InvalidActorNameException e) {
+                lastError = e;
+                Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
+            }
+        }
+
+        throw lastError;
     }
 
     /**
index df21943..3dba8d2 100644 (file)
@@ -39,6 +39,8 @@ import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
 import org.opendaylight.controller.cluster.raft.persisted.SimpleReplicatedLogEntry;
 import org.opendaylight.controller.cluster.raft.policy.RaftPolicy;
 import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload;
+import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
+import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
 import org.slf4j.LoggerFactory;
 
@@ -58,6 +60,9 @@ public abstract class AbstractRaftActorBehaviorTest<T extends RaftActorBehavior>
         }
 
         actorFactory.close();
+
+        InMemoryJournal.clear();
+        InMemorySnapshotStore.clear();
     }
 
     /**
@@ -350,7 +355,7 @@ public abstract class AbstractRaftActorBehaviorTest<T extends RaftActorBehavior>
     }
 
     protected void logStart(String name) {
-        LoggerFactory.getLogger(LeaderTest.class).info("Starting " + name);
+        LoggerFactory.getLogger(getClass()).info("Starting " + name);
     }
 
     protected RaftPolicy createRaftPolicy(final boolean automaticElectionsEnabled,
index a6df34a..15fca0c 100644 (file)
@@ -1131,6 +1131,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
 
         InMemorySnapshotStore.addSnapshotSavedLatch(id);
         InMemoryJournal.addDeleteMessagesCompleteLatch(id);
+        InMemoryJournal.addWriteMessagesCompleteLatch(id, 1, ApplyJournalEntries.class);
 
         List<ReplicatedLogEntry> entries = Arrays.asList(
                 newReplicatedLogEntry(1, 0, "one"), newReplicatedLogEntry(1, 1, "two"));
@@ -1145,6 +1146,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
         final Snapshot snapshot = InMemorySnapshotStore.waitForSavedSnapshot(id, Snapshot.class);
 
         InMemoryJournal.waitForDeleteMessagesComplete(id);
+        InMemoryJournal.waitForWriteMessagesComplete(id);
         // We expect the ApplyJournalEntries for index 1 to remain in the persisted log b/c it's still queued for
         // persistence by the time we initiate capture so the last persisted journal sequence number doesn't include it.
         // This is OK - on recovery it will be a no-op since index 1 has already been applied.
@@ -1184,6 +1186,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
 
         InMemorySnapshotStore.addSnapshotSavedLatch(id);
         InMemoryJournal.addDeleteMessagesCompleteLatch(id);
+        InMemoryJournal.addWriteMessagesCompleteLatch(id, 1, ApplyJournalEntries.class);
 
         List<ReplicatedLogEntry> entries = Arrays.asList(
                 newReplicatedLogEntry(1, 0, "one"), newReplicatedLogEntry(1, 1, "two"),
@@ -1199,6 +1202,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
         final Snapshot snapshot = InMemorySnapshotStore.waitForSavedSnapshot(id, Snapshot.class);
 
         InMemoryJournal.waitForDeleteMessagesComplete(id);
+        InMemoryJournal.waitForWriteMessagesComplete(id);
         // We expect the ApplyJournalEntries for index 2 to remain in the persisted log b/c it's still queued for
         // persistence by the time we initiate capture so the last persisted journal sequence number doesn't include it.
         // This is OK - on recovery it will be a no-op since index 2 has already been applied.
@@ -1257,6 +1261,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
 
         InMemorySnapshotStore.addSnapshotSavedLatch(id);
         InMemoryJournal.addDeleteMessagesCompleteLatch(id);
+        InMemoryJournal.addWriteMessagesCompleteLatch(id, 1, ApplyJournalEntries.class);
 
         List<ReplicatedLogEntry> entries = Arrays.asList(
                 newReplicatedLogEntry(1, 0, "one"), newReplicatedLogEntry(1, 1, "two"),
@@ -1272,6 +1277,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
         final Snapshot snapshot = InMemorySnapshotStore.waitForSavedSnapshot(id, Snapshot.class);
 
         InMemoryJournal.waitForDeleteMessagesComplete(id);
+        InMemoryJournal.waitForWriteMessagesComplete(id);
         // We expect the ApplyJournalEntries for index 0 to remain in the persisted log b/c it's still queued for
         // persistence by the time we initiate capture so the last persisted journal sequence number doesn't include it.
         // This is OK - on recovery it will be a no-op since index 0 has already been applied.
index 8c6cf09..c17c29c 100644 (file)
@@ -76,6 +76,9 @@ public class InMemoryJournal extends AsyncWriteJournal {
 
     public static void clear() {
         JOURNALS.clear();
+        DELETE_MESSAGES_COMPLETE_LATCHES.clear();
+        WRITE_MESSAGES_COMPLETE.clear();
+        BLOCK_READ_MESSAGES_LATCHES.clear();
     }
 
     @SuppressWarnings("unchecked")

©2013 OpenDaylight, A Linux Foundation Collaborative Project. All Rights Reserved.
OpenDaylight is a registered trademark of The OpenDaylight Project, Inc.
Linux Foundation and OpenDaylight are registered trademarks of the Linux Foundation.
Linux is a registered trademark of Linus Torvalds.