Fix intermittent failure in testRemoveShardReplica 53/44953/2
authorTom Pantelis <tpanteli@brocade.com>
Wed, 31 Aug 2016 16:48:38 +0000 (12:48 -0400)
committerTom Pantelis <tpanteli@brocade.com>
Thu, 1 Sep 2016 02:56:45 +0000 (02:56 +0000)
This test has failed on jenkins a few times:

java.lang.AssertionError: Shard cars is present
at org.junit.Assert.fail(Assert.java:88)
at org.opendaylight.controller.cluster.datastore.MemberNode.verifyNoShardPresent(MemberNode.java:174)
at org.opendaylight.controller.cluster.datastore.admin.ClusterAdminRpcServiceTest.testRemoveShardReplica(ClusterAdminRpcServiceTest.java:373)

The log output indicates member-2 hadn't re-joined with member-1 yet after it was
restarted. So when RemoveServer was sent to member-1 to remove member-2, it tried to
send the ServerRemoved message to the member-2 shard but it wasn't delivered and thus
the shard wasn't shut down and removed. To alleviate this I added a waitTillReady call
on member-2's config data store to ensure it has synced with the shard leader on
member-1.

Change-Id: I8de9e585998d9f7b2ab8e4fd3f23c1ab222886cc
Signed-off-by: Tom Pantelis <tpanteli@brocade.com>
opendaylight/md-sal/sal-cluster-admin/src/test/java/org/opendaylight/controller/cluster/datastore/admin/ClusterAdminRpcServiceTest.java

index e1872933e210285b252f7159ed668147d4540195..62c1a2eb722dabf4caf0ec15b7ed2ad6005e5b05 100644 (file)
@@ -60,7 +60,6 @@ import org.opendaylight.controller.cluster.raft.ReplicatedLogImplEntry;
 import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload;
 import org.opendaylight.controller.cluster.raft.persisted.ServerInfo;
 import org.opendaylight.controller.cluster.raft.base.messages.UpdateElectionTerm;
-import org.opendaylight.controller.cluster.raft.client.messages.OnDemandRaftState;
 import org.opendaylight.controller.cluster.raft.utils.InMemoryJournal;
 import org.opendaylight.controller.cluster.raft.utils.InMemorySnapshotStore;
 import org.opendaylight.controller.md.cluster.datastore.model.CarsModel;
@@ -197,12 +196,9 @@ public class ClusterAdminRpcServiceTest {
         // Verify all data has been replicated. We expect 3 log entries and thus last applied index of 2 -
         // 2 ServerConfigurationPayload entries and the transaction payload entry.
 
-        RaftStateVerifier verifier = new RaftStateVerifier() {
-            @Override
-            public void verify(OnDemandRaftState raftState) {
-                assertEquals("Commit index", 2, raftState.getCommitIndex());
-                assertEquals("Last applied index", 2, raftState.getLastApplied());
-            }
+        RaftStateVerifier verifier = raftState -> {
+            assertEquals("Commit index", 2, raftState.getCommitIndex());
+            assertEquals("Last applied index", 2, raftState.getLastApplied());
         };
 
         verifyRaftState(leaderNode1.configDataStore(), "cars", verifier);
@@ -317,7 +313,7 @@ public class ClusterAdminRpcServiceTest {
 
     @Test
     public void testRemoveShardReplica() throws Exception {
-        String name = "testRemoveShardReplicaLocal";
+        String name = "testRemoveShardReplica";
         String moduleShardsConfig = "module-shards-member1-and-2-and-3.conf";
         MemberNode leaderNode1 = MemberNode.builder(memberNodes).akkaConfig("Member1").testName(name ).
                 moduleShardsConfig(moduleShardsConfig).
@@ -331,6 +327,7 @@ public class ClusterAdminRpcServiceTest {
                 moduleShardsConfig(moduleShardsConfig).build();
 
         leaderNode1.configDataStore().waitTillReady();
+        replicaNode3.configDataStore().waitTillReady();
         verifyRaftPeersPresent(leaderNode1.configDataStore(), "cars", "member-2", "member-3");
         verifyRaftPeersPresent(replicaNode2.configDataStore(), "cars", "member-1", "member-3");
         verifyRaftPeersPresent(replicaNode3.configDataStore(), "cars", "member-1", "member-2");
@@ -357,6 +354,7 @@ public class ClusterAdminRpcServiceTest {
         replicaNode2 = MemberNode.builder(memberNodes).akkaConfig("Member2").testName(name).
                 moduleShardsConfig(moduleShardsConfig).build();
 
+        replicaNode2.configDataStore().waitTillReady();
         verifyRaftPeersPresent(replicaNode2.configDataStore(), "cars", "member-1");
 
         // Invoke RPC service on member-1 to remove member-2
@@ -406,13 +404,8 @@ public class ClusterAdminRpcServiceTest {
                         get(10, TimeUnit.SECONDS);
         verifySuccessfulRpcResult(rpcResult);
 
-        verifyRaftState(replicaNode2.configDataStore(), "cars", new RaftStateVerifier() {
-            @Override
-            public void verify(OnDemandRaftState raftState) {
-                assertThat("Leader Id", raftState.getLeader(), anyOf(containsString("member-2"),
-                        containsString("member-3")));
-            }
-        });
+        verifyRaftState(replicaNode2.configDataStore(), "cars", raftState ->
+                assertThat("Leader Id", raftState.getLeader(), anyOf(containsString("member-2"), containsString("member-3"))));
 
         verifyRaftPeersPresent(replicaNode2.configDataStore(), "cars", "member-3");
         verifyRaftPeersPresent(replicaNode3.configDataStore(), "cars", "member-2");