Fix intermittent failures in FollowerTest 15/54815/2 15/54815/3
authorTom Pantelis <tompantelis@gmail.com>
Tue, 11 Apr 2017 21:39:38 +0000 (17:39 -0400)
committerTom Pantelis <tompantelis@gmail.com>
Wed, 12 Apr 2017 06:52:41 +0000 (06:52 +0000)
FollowerTest.testCaptureSnapshotOnLastEntryInAppendEntries:1152 Persisted journal entries size: [] expected:<1> but was:<0>

The test waits on the deletion of journal entries after the snapshot is saved
to occur and then checks the persistent journal for the remaining
ApplyJournalEntries. But occasionally the persisting of the ApplyJournalEntries
message occurs after the deletion so the assertion fails b/c the
ApplyJournalEntries wasn;t persisted yet. This is a little odd b/c the
sequencing in the raft code is that the ApplyJournalEntries write is done
before the delete so it should also be observed the same way in the
InMemoryJournal, even though it doesn't really matter either way.

To alleviate the problem I added a wait for the ApplyJournalEntries
message in the journal in the 3 similar tests.

I also made a couple other minor changes that I observed while running the
tests.

Change-Id: I67cbb8fd79c91cd1cc23c363b78e7f5e9b9f2bbe
Signed-off-by: Tom Pantelis <tompantelis@gmail.com>
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/TestActorFactory.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractRaftActorBehaviorTest.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/FollowerTest.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/utils/InMemoryJournal.java

index e3875843515c8fca320443196d31e89ff02b0008..639436d026c04c5c96169b3fc330a120484896ba 100644 (file)
@@ -22,6 +22,7 @@ import akka.actor.ActorRef;
 import akka.actor.ActorSelection;
 import akka.actor.ActorSystem;
 import akka.actor.Identify;
+import akka.actor.InvalidActorNameException;
 import akka.actor.PoisonPill;
 import akka.actor.Props;
 import akka.pattern.Patterns;
@@ -108,8 +109,18 @@ public class TestActorFactory implements AutoCloseable {
      */
     @SuppressWarnings("unchecked")
     public <T extends Actor> TestActorRef<T> createTestActor(Props props, String actorId) {
-        TestActorRef<T> actorRef = TestActorRef.create(system, props, actorId);
-        return (TestActorRef<T>) addActor(actorRef, true);
+        InvalidActorNameException lastError = null;
+        for (int i = 0; i < 10; i++) {
+            try {
+                TestActorRef<T> actorRef = TestActorRef.create(system, props, actorId);
+                return (TestActorRef<T>) addActor(actorRef, true);
+            } catch (InvalidActorNameException e) {
+                lastError = e;
+                Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
+            }
+        }
+
+        throw lastError;
     }
 
     /**
index df219436007be602f73946e178f8bf510577df84..3dba8d240fe1f4eb7f0d12967a1ce817455eae1f 100644 (file)
@@ -39,6 +39,8 @@ import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
 import org.opendaylight.controller.cluster.raft.persisted.SimpleReplicatedLogEntry;
 import org.opendaylight.controller.cluster.raft.policy.RaftPolicy;
 import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload;
+import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
+import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
 import org.slf4j.LoggerFactory;
 
@@ -58,6 +60,9 @@ public abstract class AbstractRaftActorBehaviorTest<T extends RaftActorBehavior>
         }
 
         actorFactory.close();
+
+        InMemoryJournal.clear();
+        InMemorySnapshotStore.clear();
     }
 
     /**
@@ -350,7 +355,7 @@ public abstract class AbstractRaftActorBehaviorTest<T extends RaftActorBehavior>
     }
 
     protected void logStart(String name) {
-        LoggerFactory.getLogger(LeaderTest.class).info("Starting " + name);
+        LoggerFactory.getLogger(getClass()).info("Starting " + name);
     }
 
     protected RaftPolicy createRaftPolicy(final boolean automaticElectionsEnabled,
index a6df34aa5968f88e4bc346d38a61104d00ec89c5..15fca0cb8d80696393e30ea465d5263ba339fa91 100644 (file)
@@ -1131,6 +1131,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
 
         InMemorySnapshotStore.addSnapshotSavedLatch(id);
         InMemoryJournal.addDeleteMessagesCompleteLatch(id);
+        InMemoryJournal.addWriteMessagesCompleteLatch(id, 1, ApplyJournalEntries.class);
 
         List<ReplicatedLogEntry> entries = Arrays.asList(
                 newReplicatedLogEntry(1, 0, "one"), newReplicatedLogEntry(1, 1, "two"));
@@ -1145,6 +1146,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
         final Snapshot snapshot = InMemorySnapshotStore.waitForSavedSnapshot(id, Snapshot.class);
 
         InMemoryJournal.waitForDeleteMessagesComplete(id);
+        InMemoryJournal.waitForWriteMessagesComplete(id);
         // We expect the ApplyJournalEntries for index 1 to remain in the persisted log b/c it's still queued for
         // persistence by the time we initiate capture so the last persisted journal sequence number doesn't include it.
         // This is OK - on recovery it will be a no-op since index 1 has already been applied.
@@ -1184,6 +1186,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
 
         InMemorySnapshotStore.addSnapshotSavedLatch(id);
         InMemoryJournal.addDeleteMessagesCompleteLatch(id);
+        InMemoryJournal.addWriteMessagesCompleteLatch(id, 1, ApplyJournalEntries.class);
 
         List<ReplicatedLogEntry> entries = Arrays.asList(
                 newReplicatedLogEntry(1, 0, "one"), newReplicatedLogEntry(1, 1, "two"),
@@ -1199,6 +1202,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
         final Snapshot snapshot = InMemorySnapshotStore.waitForSavedSnapshot(id, Snapshot.class);
 
         InMemoryJournal.waitForDeleteMessagesComplete(id);
+        InMemoryJournal.waitForWriteMessagesComplete(id);
         // We expect the ApplyJournalEntries for index 2 to remain in the persisted log b/c it's still queued for
         // persistence by the time we initiate capture so the last persisted journal sequence number doesn't include it.
         // This is OK - on recovery it will be a no-op since index 2 has already been applied.
@@ -1257,6 +1261,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
 
         InMemorySnapshotStore.addSnapshotSavedLatch(id);
         InMemoryJournal.addDeleteMessagesCompleteLatch(id);
+        InMemoryJournal.addWriteMessagesCompleteLatch(id, 1, ApplyJournalEntries.class);
 
         List<ReplicatedLogEntry> entries = Arrays.asList(
                 newReplicatedLogEntry(1, 0, "one"), newReplicatedLogEntry(1, 1, "two"),
@@ -1272,6 +1277,7 @@ public class FollowerTest extends AbstractRaftActorBehaviorTest<Follower> {
         final Snapshot snapshot = InMemorySnapshotStore.waitForSavedSnapshot(id, Snapshot.class);
 
         InMemoryJournal.waitForDeleteMessagesComplete(id);
+        InMemoryJournal.waitForWriteMessagesComplete(id);
         // We expect the ApplyJournalEntries for index 0 to remain in the persisted log b/c it's still queued for
         // persistence by the time we initiate capture so the last persisted journal sequence number doesn't include it.
         // This is OK - on recovery it will be a no-op since index 0 has already been applied.
index 8c6cf0945713ed0cbae3d5f7fbb2870220985dc5..c17c29c0fd1846ff0ad76a5299fe5bb10cd8c68e 100644 (file)
@@ -76,6 +76,9 @@ public class InMemoryJournal extends AsyncWriteJournal {
 
     public static void clear() {
         JOURNALS.clear();
+        DELETE_MESSAGES_COMPLETE_LATCHES.clear();
+        WRITE_MESSAGES_COMPLETE.clear();
+        BLOCK_READ_MESSAGES_LATCHES.clear();
     }
 
     @SuppressWarnings("unchecked")