From: Tom Pantelis Date: Wed, 10 May 2017 07:00:09 +0000 (-0400) Subject: Fix testTransactionForwardedToLeaderAfterRetry failure X-Git-Tag: release/nitrogen~279 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=commitdiff_plain;h=e345c2a17f737d537cda45b0f737dff417e3b359;hp=e12b3d81b61a3be12d22afcb7a190611a6895d11;ds=sidebyside Fix testTransactionForwardedToLeaderAfterRetry failure java.util.concurrent.ExecutionException: ReadFailedException{message=Error executeRead ReadData for path /(urn:opendaylight:params:xml:ns:yang:controller:md:sal:dom:store:test:cars?revision=2014-03-13)cars/car, errorList=[RpcError [message=Error executeRead ReadData for path /(urn:opendaylight:params:xml:ns:yang:controller:md:sal:dom:store:test:cars?revision=2014-03-13)cars/car, severity=ERROR, errorType=APPLICATION, tag=operation-failed, applicationTag=null, info=null, cause=org.opendaylight.controller.md.sal.common.api.data.DataStoreUnavailableException: Shard member-1-shard-cars-testTransactionForwardedToLeaderAfterRetry currently has no leader. Try again later.]]} The test submits transactions and deposes the current leader so it forwards the pending transactions to the other member-2 that assumes leadership. However it calls Cluster.get(followerSystem).leave(MEMBER_1_ADDRESS); which may result in an untimely MemberExited message sent to the ShardManager that clears the peer address, causing the FindPrimary message to fail to find the leader. I'm not clear why this was call was put in but it's unnecessary and may cause a failure if the timing is right. I also saw a failure due to a timeout when forwarding a pending transaction. This is b/c it takes some time for member-2 to switch to candidate and become leader due to the checking of current leader availability via the akka cluster on ElectionTimout. If it takes too long the pending transaction forwarding may time out. To alleviate this, I forced the swicth to candidate by sending an immediate TimeoutNow message. Change-Id: I2dd228964779e2b755b1740a518e2c400b5cb88d Signed-off-by: Tom Pantelis --- diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java index 63d785c984..c52e4a39a2 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java @@ -68,6 +68,7 @@ import org.opendaylight.controller.cluster.datastore.modification.MergeModificat import org.opendaylight.controller.cluster.datastore.modification.WriteModification; import org.opendaylight.controller.cluster.datastore.persisted.MetadataShardDataTreeSnapshot; import org.opendaylight.controller.cluster.datastore.persisted.ShardSnapshotState; +import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow; import org.opendaylight.controller.cluster.raft.client.messages.Shutdown; import org.opendaylight.controller.cluster.raft.persisted.ApplyJournalEntries; import org.opendaylight.controller.cluster.raft.persisted.Snapshot; @@ -845,7 +846,6 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest { .customRaftPolicyImplementation(DisableElectionsRaftPolicy.class.getName()) .shardElectionTimeoutFactor(10)); - Cluster.get(followerSystem).leave(MEMBER_1_ADDRESS); leaderTestKit.waitUntilNoLeader(leaderDistributedDataStore.getActorContext(), "cars"); // Submit all tx's - the messages should get queued for retry. @@ -860,6 +860,10 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest { sendDatastoreContextUpdate(followerDistributedDataStore, followerDatastoreContextBuilder .customRaftPolicyImplementation(null).shardElectionTimeoutFactor(1)); + IntegrationTestKit.findLocalShard(followerDistributedDataStore.getActorContext(), "cars") + .tell(TimeoutNow.INSTANCE, ActorRef.noSender()); + IntegrationTestKit.findLocalShard(followerDistributedDataStore.getActorContext(), "people") + .tell(TimeoutNow.INSTANCE, ActorRef.noSender()); followerTestKit.doCommit(writeTx1CanCommit, writeTx1Cohort); followerTestKit.doCommit(writeTx2CanCommit, writeTx2Cohort);