Fix intermittent test failures in CDS 28/41028/3
authorTom Pantelis <tpanteli@brocade.com>
Wed, 29 Jun 2016 07:04:47 +0000 (03:04 -0400)
committerTom Pantelis <tpanteli@brocade.com>
Wed, 29 Jun 2016 20:00:50 +0000 (16:00 -0400)
Seeing intermittent failures on jenkins, eg

Failed tests:
  PartitionedLeadersElectionScenarioTest.runTest1:37->setupInitialMemberBehaviors:313->AbstractLeaderElectionScenarioTest.initializeLeaderBehavior:207
Missing messages of type class
org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply

Sometimes the initial AppendEntries messages go to dead letters,
probably b/c the follower actors haven't been fully created/initialized by akka.
So added retries as a workaround.

Failed tests:
  ClusterAdminRpcServiceTest.testChangeMemberVotingStatesForShard:555->verifySuccessfulRpcResult:296
Rpc failed with error: RpcError [message=Failed to change member voting
states for shard cars: Shard
member-3-shard-cars-config_testChangeMemberVotingStatusForShard
currently has no leader. Try again later., severity=ERROR,
errorType=RPC, tag=operation-failed, applicationTag=null, info=null,
cause=null]

Needs to ensure node3's datastore shards are ready with leaders.

Change-Id: I5031c2a7b3e6eeddbf80b8eb346492acd11d664c
Signed-off-by: Tom Pantelis <tpanteli@brocade.com>
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeaderElectionScenarioTest.java
opendaylight/md-sal/sal-cluster-admin/src/test/java/org/opendaylight/controller/cluster/datastore/admin/ClusterAdminRpcServiceTest.java
opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/shardmanager/ShardManagerTest.java

index 36f5fd5..348b3d9 100644 (file)
@@ -203,13 +203,31 @@ public class AbstractLeaderElectionScenarioTest {
 
     void initializeLeaderBehavior(MemberActor actor, MockRaftActorContext context, int numActiveFollowers) throws Exception {
         // Leader sends immediate heartbeats - we don't care about it so ignore it.
+        // Sometimes the initial AppendEntries messages go to dead letters, probably b/c the follower actors
+        // haven't been fully created/initialized by akka. So we try up to 3 times to create the Leader as
+        // a workaround.
 
-        actor.expectMessageClass(AppendEntriesReply.class, numActiveFollowers);
+        Leader leader = null;
+        AssertionError lastAssertError = null;
+        for(int i = 1; i <= 3; i++) {
+            actor.expectMessageClass(AppendEntriesReply.class, numActiveFollowers);
+
+            leader = new Leader(context);
+            try {
+                actor.waitForExpectedMessages(AppendEntriesReply.class);
+                lastAssertError = null;
+                break;
+            } catch (AssertionError e) {
+                lastAssertError = e;
+            }
+        }
+
+        if(lastAssertError != null) {
+            throw lastAssertError;
+        }
 
-        Leader leader = new Leader(context);
         context.setCurrentBehavior(leader);
 
-        actor.waitForExpectedMessages(AppendEntriesReply.class);
         // Delay assignment here so the AppendEntriesReply isn't forwarded to the behavior.
         actor.behavior = leader;
 
index af2b4a9..38368db 100644 (file)
@@ -543,6 +543,7 @@ public class ClusterAdminRpcServiceTest {
                 moduleShardsConfig(moduleShardsConfig).build();
 
         leaderNode1.configDataStore().waitTillReady();
+        replicaNode3.configDataStore().waitTillReady();
         verifyRaftPeersPresent(leaderNode1.configDataStore(), "cars", "member-2", "member-3");
         verifyRaftPeersPresent(replicaNode2.configDataStore(), "cars", "member-1", "member-3");
         verifyRaftPeersPresent(replicaNode3.configDataStore(), "cars", "member-1", "member-2");
@@ -609,6 +610,8 @@ public class ClusterAdminRpcServiceTest {
 
         leaderNode1.configDataStore().waitTillReady();
         leaderNode1.operDataStore().waitTillReady();
+        replicaNode3.configDataStore().waitTillReady();
+        replicaNode3.operDataStore().waitTillReady();
         verifyRaftPeersPresent(leaderNode1.configDataStore(), "cars", "member-2", "member-3");
         verifyRaftPeersPresent(replicaNode2.configDataStore(), "cars", "member-1", "member-3");
         verifyRaftPeersPresent(replicaNode3.configDataStore(), "cars", "member-1", "member-2");
@@ -661,6 +664,8 @@ public class ClusterAdminRpcServiceTest {
 
         leaderNode1.configDataStore().waitTillReady();
         leaderNode1.operDataStore().waitTillReady();
+        replicaNode3.configDataStore().waitTillReady();
+        replicaNode3.operDataStore().waitTillReady();
         verifyVotingStates(leaderNode1.configDataStore(), "cars", new SimpleEntry<>("member-1", true),
                 new SimpleEntry<>("member-2", true), new SimpleEntry<>("member-3", false));
 
index 3b81412..2164450 100644 (file)
@@ -238,7 +238,8 @@ public class ShardManagerTest extends AbstractActorTest {
 
 
     private Props newPropsShardMgrWithMockShardActor() {
-        return newTestShardMgrBuilderWithMockShardActor().props();
+        return newTestShardMgrBuilderWithMockShardActor().props().withDispatcher(
+                Dispatchers.DefaultDispatcherId());
     }
 
     private Props newPropsShardMgrWithMockShardActor(ActorRef shardActor) {