From 0c165317c86dff04fdca1bba3412a1f4ec292e0a Mon Sep 17 00:00:00 2001 From: Tom Pantelis Date: Wed, 29 Jun 2016 03:04:47 -0400 Subject: [PATCH] Fix intermittent test failures in CDS Seeing intermittent failures on jenkins, eg Failed tests: PartitionedLeadersElectionScenarioTest.runTest1:37->setupInitialMemberBehaviors:313->AbstractLeaderElectionScenarioTest.initializeLeaderBehavior:207 Missing messages of type class org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply Sometimes the initial AppendEntries messages go to dead letters, probably b/c the follower actors haven't been fully created/initialized by akka. So added retries as a workaround. Failed tests: ClusterAdminRpcServiceTest.testChangeMemberVotingStatesForShard:555->verifySuccessfulRpcResult:296 Rpc failed with error: RpcError [message=Failed to change member voting states for shard cars: Shard member-3-shard-cars-config_testChangeMemberVotingStatusForShard currently has no leader. Try again later., severity=ERROR, errorType=RPC, tag=operation-failed, applicationTag=null, info=null, cause=null] Needs to ensure node3's datastore shards are ready with leaders. Change-Id: I5031c2a7b3e6eeddbf80b8eb346492acd11d664c Signed-off-by: Tom Pantelis --- .../AbstractLeaderElectionScenarioTest.java | 24 ++++++++++++++++--- .../admin/ClusterAdminRpcServiceTest.java | 5 ++++ .../shardmanager/ShardManagerTest.java | 3 ++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeaderElectionScenarioTest.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeaderElectionScenarioTest.java index 36f5fd502e..348b3d9acd 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeaderElectionScenarioTest.java +++ b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeaderElectionScenarioTest.java @@ -203,13 +203,31 @@ public class AbstractLeaderElectionScenarioTest { void initializeLeaderBehavior(MemberActor actor, MockRaftActorContext context, int numActiveFollowers) throws Exception { // Leader sends immediate heartbeats - we don't care about it so ignore it. + // Sometimes the initial AppendEntries messages go to dead letters, probably b/c the follower actors + // haven't been fully created/initialized by akka. So we try up to 3 times to create the Leader as + // a workaround. - actor.expectMessageClass(AppendEntriesReply.class, numActiveFollowers); + Leader leader = null; + AssertionError lastAssertError = null; + for(int i = 1; i <= 3; i++) { + actor.expectMessageClass(AppendEntriesReply.class, numActiveFollowers); + + leader = new Leader(context); + try { + actor.waitForExpectedMessages(AppendEntriesReply.class); + lastAssertError = null; + break; + } catch (AssertionError e) { + lastAssertError = e; + } + } + + if(lastAssertError != null) { + throw lastAssertError; + } - Leader leader = new Leader(context); context.setCurrentBehavior(leader); - actor.waitForExpectedMessages(AppendEntriesReply.class); // Delay assignment here so the AppendEntriesReply isn't forwarded to the behavior. actor.behavior = leader; diff --git a/opendaylight/md-sal/sal-cluster-admin/src/test/java/org/opendaylight/controller/cluster/datastore/admin/ClusterAdminRpcServiceTest.java b/opendaylight/md-sal/sal-cluster-admin/src/test/java/org/opendaylight/controller/cluster/datastore/admin/ClusterAdminRpcServiceTest.java index af2b4a9df6..38368dbd52 100644 --- a/opendaylight/md-sal/sal-cluster-admin/src/test/java/org/opendaylight/controller/cluster/datastore/admin/ClusterAdminRpcServiceTest.java +++ b/opendaylight/md-sal/sal-cluster-admin/src/test/java/org/opendaylight/controller/cluster/datastore/admin/ClusterAdminRpcServiceTest.java @@ -543,6 +543,7 @@ public class ClusterAdminRpcServiceTest { moduleShardsConfig(moduleShardsConfig).build(); leaderNode1.configDataStore().waitTillReady(); + replicaNode3.configDataStore().waitTillReady(); verifyRaftPeersPresent(leaderNode1.configDataStore(), "cars", "member-2", "member-3"); verifyRaftPeersPresent(replicaNode2.configDataStore(), "cars", "member-1", "member-3"); verifyRaftPeersPresent(replicaNode3.configDataStore(), "cars", "member-1", "member-2"); @@ -609,6 +610,8 @@ public class ClusterAdminRpcServiceTest { leaderNode1.configDataStore().waitTillReady(); leaderNode1.operDataStore().waitTillReady(); + replicaNode3.configDataStore().waitTillReady(); + replicaNode3.operDataStore().waitTillReady(); verifyRaftPeersPresent(leaderNode1.configDataStore(), "cars", "member-2", "member-3"); verifyRaftPeersPresent(replicaNode2.configDataStore(), "cars", "member-1", "member-3"); verifyRaftPeersPresent(replicaNode3.configDataStore(), "cars", "member-1", "member-2"); @@ -661,6 +664,8 @@ public class ClusterAdminRpcServiceTest { leaderNode1.configDataStore().waitTillReady(); leaderNode1.operDataStore().waitTillReady(); + replicaNode3.configDataStore().waitTillReady(); + replicaNode3.operDataStore().waitTillReady(); verifyVotingStates(leaderNode1.configDataStore(), "cars", new SimpleEntry<>("member-1", true), new SimpleEntry<>("member-2", true), new SimpleEntry<>("member-3", false)); diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/shardmanager/ShardManagerTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/shardmanager/ShardManagerTest.java index 3b81412bac..2164450ed5 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/shardmanager/ShardManagerTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/shardmanager/ShardManagerTest.java @@ -238,7 +238,8 @@ public class ShardManagerTest extends AbstractActorTest { private Props newPropsShardMgrWithMockShardActor() { - return newTestShardMgrBuilderWithMockShardActor().props(); + return newTestShardMgrBuilderWithMockShardActor().props().withDispatcher( + Dispatchers.DefaultDispatcherId()); } private Props newPropsShardMgrWithMockShardActor(ActorRef shardActor) { -- 2.36.6