From e345c2a17f737d537cda45b0f737dff417e3b359 Mon Sep 17 00:00:00 2001 From: Tom Pantelis Date: Wed, 10 May 2017 03:00:09 -0400 Subject: [PATCH 1/1] Fix testTransactionForwardedToLeaderAfterRetry failure java.util.concurrent.ExecutionException: ReadFailedException{message=Error executeRead ReadData for path /(urn:opendaylight:params:xml:ns:yang:controller:md:sal:dom:store:test:cars?revision=2014-03-13)cars/car, errorList=[RpcError [message=Error executeRead ReadData for path /(urn:opendaylight:params:xml:ns:yang:controller:md:sal:dom:store:test:cars?revision=2014-03-13)cars/car, severity=ERROR, errorType=APPLICATION, tag=operation-failed, applicationTag=null, info=null, cause=org.opendaylight.controller.md.sal.common.api.data.DataStoreUnavailableException: Shard member-1-shard-cars-testTransactionForwardedToLeaderAfterRetry currently has no leader. Try again later.]]} The test submits transactions and deposes the current leader so it forwards the pending transactions to the other member-2 that assumes leadership. However it calls Cluster.get(followerSystem).leave(MEMBER_1_ADDRESS); which may result in an untimely MemberExited message sent to the ShardManager that clears the peer address, causing the FindPrimary message to fail to find the leader. I'm not clear why this was call was put in but it's unnecessary and may cause a failure if the timing is right. I also saw a failure due to a timeout when forwarding a pending transaction. This is b/c it takes some time for member-2 to switch to candidate and become leader due to the checking of current leader availability via the akka cluster on ElectionTimout. If it takes too long the pending transaction forwarding may time out. To alleviate this, I forced the swicth to candidate by sending an immediate TimeoutNow message. Change-Id: I2dd228964779e2b755b1740a518e2c400b5cb88d Signed-off-by: Tom Pantelis --- .../DistributedDataStoreRemotingIntegrationTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java index 63d785c984..c52e4a39a2 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java @@ -68,6 +68,7 @@ import org.opendaylight.controller.cluster.datastore.modification.MergeModificat import org.opendaylight.controller.cluster.datastore.modification.WriteModification; import org.opendaylight.controller.cluster.datastore.persisted.MetadataShardDataTreeSnapshot; import org.opendaylight.controller.cluster.datastore.persisted.ShardSnapshotState; +import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow; import org.opendaylight.controller.cluster.raft.client.messages.Shutdown; import org.opendaylight.controller.cluster.raft.persisted.ApplyJournalEntries; import org.opendaylight.controller.cluster.raft.persisted.Snapshot; @@ -845,7 +846,6 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest { .customRaftPolicyImplementation(DisableElectionsRaftPolicy.class.getName()) .shardElectionTimeoutFactor(10)); - Cluster.get(followerSystem).leave(MEMBER_1_ADDRESS); leaderTestKit.waitUntilNoLeader(leaderDistributedDataStore.getActorContext(), "cars"); // Submit all tx's - the messages should get queued for retry. @@ -860,6 +860,10 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest { sendDatastoreContextUpdate(followerDistributedDataStore, followerDatastoreContextBuilder .customRaftPolicyImplementation(null).shardElectionTimeoutFactor(1)); + IntegrationTestKit.findLocalShard(followerDistributedDataStore.getActorContext(), "cars") + .tell(TimeoutNow.INSTANCE, ActorRef.noSender()); + IntegrationTestKit.findLocalShard(followerDistributedDataStore.getActorContext(), "people") + .tell(TimeoutNow.INSTANCE, ActorRef.noSender()); followerTestKit.doCommit(writeTx1CanCommit, writeTx1Cohort); followerTestKit.doCommit(writeTx2CanCommit, writeTx2Cohort); -- 2.36.6