Allow incremental recovery
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / test / java / org / opendaylight / controller / cluster / datastore / DistributedDataStoreRemotingIntegrationTest.java
index 5abadccd500d42745dde4c37998c2ee885a2a11e..0c74c71d832988b5959f1119da2c5e9016b98985 100644 (file)
@@ -8,6 +8,7 @@
 package org.opendaylight.controller.cluster.datastore;
 
 import static org.awaitility.Awaitility.await;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -24,6 +25,7 @@ import akka.actor.ActorSystem;
 import akka.actor.Address;
 import akka.actor.AddressFromURIString;
 import akka.cluster.Cluster;
+import akka.cluster.Member;
 import akka.dispatch.Futures;
 import akka.pattern.Patterns;
 import akka.testkit.javadsl.TestKit;
@@ -36,7 +38,6 @@ import com.google.common.util.concurrent.ListenableFuture;
 import com.google.common.util.concurrent.MoreExecutors;
 import com.google.common.util.concurrent.Uninterruptibles;
 import com.typesafe.config.ConfigFactory;
-import java.math.BigInteger;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -46,7 +47,10 @@ import java.util.List;
 import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Supplier;
 import org.junit.After;
@@ -67,6 +71,8 @@ import org.opendaylight.controller.cluster.databroker.ConcurrentDOMDataBroker;
 import org.opendaylight.controller.cluster.databroker.TestClientBackedDataStore;
 import org.opendaylight.controller.cluster.datastore.DatastoreContext.Builder;
 import org.opendaylight.controller.cluster.datastore.TestShard.RequestFrontendMetadata;
+import org.opendaylight.controller.cluster.datastore.TestShard.StartDropMessages;
+import org.opendaylight.controller.cluster.datastore.TestShard.StopDropMessages;
 import org.opendaylight.controller.cluster.datastore.exceptions.NoShardLeaderException;
 import org.opendaylight.controller.cluster.datastore.exceptions.ShardLeaderNotRespondingException;
 import org.opendaylight.controller.cluster.datastore.messages.CommitTransactionReply;
@@ -81,7 +87,11 @@ import org.opendaylight.controller.cluster.datastore.persisted.FrontendShardData
 import org.opendaylight.controller.cluster.datastore.persisted.MetadataShardDataTreeSnapshot;
 import org.opendaylight.controller.cluster.datastore.persisted.ShardSnapshotState;
 import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow;
+import org.opendaylight.controller.cluster.raft.client.messages.GetOnDemandRaftState;
+import org.opendaylight.controller.cluster.raft.client.messages.OnDemandRaftState;
 import org.opendaylight.controller.cluster.raft.client.messages.Shutdown;
+import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
+import org.opendaylight.controller.cluster.raft.messages.RequestVote;
 import org.opendaylight.controller.cluster.raft.persisted.ApplyJournalEntries;
 import org.opendaylight.controller.cluster.raft.persisted.Snapshot;
 import org.opendaylight.controller.cluster.raft.policy.DisableElectionsRaftPolicy;
@@ -101,6 +111,7 @@ import org.opendaylight.mdsal.dom.spi.store.DOMStoreReadWriteTransaction;
 import org.opendaylight.mdsal.dom.spi.store.DOMStoreThreePhaseCommitCohort;
 import org.opendaylight.mdsal.dom.spi.store.DOMStoreTransactionChain;
 import org.opendaylight.mdsal.dom.spi.store.DOMStoreWriteTransaction;
+import org.opendaylight.yangtools.yang.common.Uint64;
 import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier;
 import org.opendaylight.yangtools.yang.data.api.schema.ContainerNode;
 import org.opendaylight.yangtools.yang.data.api.schema.MapEntryNode;
@@ -265,11 +276,11 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         writeTx.write(CarsModel.BASE_PATH, CarsModel.emptyContainer());
         writeTx.write(CarsModel.CAR_LIST_PATH, CarsModel.newCarMapNode());
 
-        final MapEntryNode car1 = CarsModel.newCarEntry("optima", BigInteger.valueOf(20000));
+        final MapEntryNode car1 = CarsModel.newCarEntry("optima", Uint64.valueOf(20000));
         final YangInstanceIdentifier car1Path = CarsModel.newCarPath("optima");
         writeTx.merge(car1Path, car1);
 
-        final MapEntryNode car2 = CarsModel.newCarEntry("sportage", BigInteger.valueOf(25000));
+        final MapEntryNode car2 = CarsModel.newCarEntry("sportage", Uint64.valueOf(25000));
         final YangInstanceIdentifier car2Path = CarsModel.newCarPath("sportage");
         writeTx.merge(car2Path, car2);
 
@@ -354,7 +365,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         for (int i = 0; i < numCars; i++) {
             writeTx = txChain.newWriteOnlyTransaction();
             writeTx.write(CarsModel.newCarPath("car" + i),
-                    CarsModel.newCarEntry("car" + i, BigInteger.valueOf(20000)));
+                    CarsModel.newCarEntry("car" + i, Uint64.valueOf(20000)));
 
             followerTestKit.doCommit(writeTx.ready());
 
@@ -473,12 +484,12 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         rwTx.write(CarsModel.BASE_PATH, CarsModel.emptyContainer());
         rwTx.write(CarsModel.CAR_LIST_PATH, CarsModel.newCarMapNode());
 
-        final MapEntryNode car1 = CarsModel.newCarEntry("optima", BigInteger.valueOf(20000));
+        final MapEntryNode car1 = CarsModel.newCarEntry("optima", Uint64.valueOf(20000));
         rwTx.merge(CarsModel.newCarPath("optima"), car1);
 
         verifyCars(rwTx, car1);
 
-        final MapEntryNode car2 = CarsModel.newCarEntry("sportage", BigInteger.valueOf(25000));
+        final MapEntryNode car2 = CarsModel.newCarEntry("sportage", Uint64.valueOf(25000));
         final YangInstanceIdentifier car2Path = CarsModel.newCarPath("sportage");
         rwTx.merge(car2Path, car2);
 
@@ -562,7 +573,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
 
         rwTx.merge(CarsModel.CAR_LIST_PATH, CarsModel.newCarMapNode());
 
-        final MapEntryNode car1 = CarsModel.newCarEntry("optima", BigInteger.valueOf(20000));
+        final MapEntryNode car1 = CarsModel.newCarEntry("optima", Uint64.valueOf(20000));
         final YangInstanceIdentifier car1Path = CarsModel.newCarPath("optima");
         rwTx.write(car1Path, car1);
 
@@ -570,7 +581,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
 
         verifyCars(rwTx, car1);
 
-        final MapEntryNode car2 = CarsModel.newCarEntry("sportage", BigInteger.valueOf(25000));
+        final MapEntryNode car2 = CarsModel.newCarEntry("sportage", Uint64.valueOf(25000));
         rwTx.merge(CarsModel.newCarPath("sportage"), car2);
 
         rwTx.delete(car1Path);
@@ -603,7 +614,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
 
         final DOMStoreReadWriteTransaction readWriteTx = txChain.newReadWriteTransaction();
 
-        final MapEntryNode car = CarsModel.newCarEntry("optima", BigInteger.valueOf(20000));
+        final MapEntryNode car = CarsModel.newCarEntry("optima", Uint64.valueOf(20000));
         final YangInstanceIdentifier carPath = CarsModel.newCarPath("optima");
         readWriteTx.write(carPath, car);
 
@@ -756,7 +767,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
 
             writeTx = followerDistributedDataStore.newWriteOnlyTransaction();
 
-            MapEntryNode car1 = CarsModel.newCarEntry("optima", BigInteger.valueOf(20000));
+            MapEntryNode car1 = CarsModel.newCarEntry("optima", Uint64.valueOf(20000));
             YangInstanceIdentifier car1Path = CarsModel.newCarPath("optima");
             writeTx.merge(car1Path, car1);
 
@@ -785,7 +796,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         new WriteModification(CarsModel.BASE_PATH, CarsModel.emptyContainer()).apply(modification);
         new MergeModification(CarsModel.CAR_LIST_PATH, CarsModel.newCarMapNode()).apply(modification);
 
-        final MapEntryNode car1 = CarsModel.newCarEntry("optima", BigInteger.valueOf(20000));
+        final MapEntryNode car1 = CarsModel.newCarEntry("optima", Uint64.valueOf(20000));
         new WriteModification(CarsModel.newCarPath("optima"), car1).apply(modification);
         modification.ready();
 
@@ -804,7 +815,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         // Send another tx without immediate commit.
 
         modification = dataTree.takeSnapshot().newModification();
-        MapEntryNode car2 = CarsModel.newCarEntry("sportage", BigInteger.valueOf(30000));
+        MapEntryNode car2 = CarsModel.newCarEntry("sportage", Uint64.valueOf(30000));
         new WriteModification(CarsModel.newCarPath("sportage"), car2).apply(modification);
         modification.ready();
 
@@ -852,7 +863,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         new WriteModification(CarsModel.BASE_PATH, CarsModel.emptyContainer()).apply(modification);
         new MergeModification(CarsModel.CAR_LIST_PATH, CarsModel.newCarMapNode()).apply(modification);
 
-        final MapEntryNode car1 = CarsModel.newCarEntry("optima", BigInteger.valueOf(20000));
+        final MapEntryNode car1 = CarsModel.newCarEntry("optima", Uint64.valueOf(20000));
         new WriteModification(CarsModel.newCarPath("optima"), car1).apply(modification);
 
         ForwardedReadyTransaction forwardedReady = new ForwardedReadyTransaction(tx1,
@@ -873,7 +884,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         // Send another tx without immediate commit.
 
         modification = dataTree.takeSnapshot().newModification();
-        MapEntryNode car2 = CarsModel.newCarEntry("sportage", BigInteger.valueOf(30000));
+        MapEntryNode car2 = CarsModel.newCarEntry("sportage", Uint64.valueOf(30000));
         new WriteModification(CarsModel.newCarPath("sportage"), car2).apply(modification);
 
         forwardedReady = new ForwardedReadyTransaction(tx2,
@@ -943,7 +954,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         final DOMStoreWriteTransaction writeTx2 = followerDistributedDataStore.newWriteOnlyTransaction();
         final LinkedList<MapEntryNode> cars = new LinkedList<>();
         int carIndex = 1;
-        cars.add(CarsModel.newCarEntry("car" + carIndex, BigInteger.valueOf(carIndex)));
+        cars.add(CarsModel.newCarEntry("car" + carIndex, Uint64.valueOf(carIndex)));
         writeTx2.write(CarsModel.newCarPath("car" + carIndex), cars.getLast());
         carIndex++;
         NormalizedNode<?, ?> people = ImmutableNodes.mapNodeBuilder(PeopleModel.PERSON_QNAME)
@@ -958,7 +969,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
 
         final DOMStoreWriteTransaction writeTx3 = followerDistributedDataStore.newWriteOnlyTransaction();
         for (int i = 1; i <= 5; i++, carIndex++) {
-            cars.add(CarsModel.newCarEntry("car" + carIndex, BigInteger.valueOf(carIndex)));
+            cars.add(CarsModel.newCarEntry("car" + carIndex, Uint64.valueOf(carIndex)));
             writeTx3.write(CarsModel.newCarPath("car" + carIndex), cars.getLast());
         }
 
@@ -966,7 +977,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         // message on ready.
 
         final DOMStoreWriteTransaction writeTx4 = followerDistributedDataStore.newWriteOnlyTransaction();
-        cars.add(CarsModel.newCarEntry("car" + carIndex, BigInteger.valueOf(carIndex)));
+        cars.add(CarsModel.newCarEntry("car" + carIndex, Uint64.valueOf(carIndex)));
         writeTx4.write(CarsModel.newCarPath("car" + carIndex), cars.getLast());
         carIndex++;
 
@@ -974,7 +985,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         // leader shard on ready.
 
         final DOMStoreReadWriteTransaction readWriteTx = followerDistributedDataStore.newReadWriteTransaction();
-        cars.add(CarsModel.newCarEntry("car" + carIndex, BigInteger.valueOf(carIndex)));
+        cars.add(CarsModel.newCarEntry("car" + carIndex, Uint64.valueOf(carIndex)));
         readWriteTx.write(CarsModel.newCarPath("car" + carIndex), cars.getLast());
 
         IntegrationTestKit.verifyShardStats(leaderDistributedDataStore, "cars",
@@ -1046,7 +1057,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
                 stats -> assertEquals("getTxCohortCacheSize", 1, stats.getTxCohortCacheSize()));
 
             writeTx = followerDistributedDataStore.newWriteOnlyTransaction();
-            final MapEntryNode car = CarsModel.newCarEntry("optima", BigInteger.valueOf(20000));
+            final MapEntryNode car = CarsModel.newCarEntry("optima", Uint64.valueOf(20000));
             writeTx.write(CarsModel.newCarPath("optima"), car);
             final DOMStoreThreePhaseCommitCohort cohort2 = writeTx.ready();
 
@@ -1250,6 +1261,60 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         }
     }
 
+    @Test
+    public void testSemiReachableCandidateNotDroppingLeader() throws Exception {
+        final String testName = "testSemiReachableCandidateNotDroppingLeader";
+        initDatastores(testName, MODULE_SHARDS_CARS_1_2_3, CARS);
+
+        final DatastoreContext.Builder follower2DatastoreContextBuilder = DatastoreContext.newBuilder()
+                .shardHeartbeatIntervalInMillis(100).shardElectionTimeoutFactor(10);
+        final IntegrationTestKit follower2TestKit = new IntegrationTestKit(
+                follower2System, follower2DatastoreContextBuilder, commitTimeout);
+
+        final AbstractDataStore ds2 =
+                     follower2TestKit.setupAbstractDataStore(
+                             testParameter, testName, MODULE_SHARDS_CARS_1_2_3, false, CARS);
+
+        followerTestKit.waitForMembersUp("member-1", "member-3");
+        follower2TestKit.waitForMembersUp("member-1", "member-2");
+
+        TestKit.shutdownActorSystem(follower2System);
+
+        ActorRef cars = leaderDistributedDataStore.getActorUtils().findLocalShard("cars").get();
+        OnDemandRaftState initialState = (OnDemandRaftState) leaderDistributedDataStore.getActorUtils()
+                .executeOperation(cars, GetOnDemandRaftState.INSTANCE);
+
+        Cluster leaderCluster = Cluster.get(leaderSystem);
+        Cluster followerCluster = Cluster.get(followerSystem);
+        Cluster follower2Cluster = Cluster.get(follower2System);
+
+        Member follower2Member = follower2Cluster.readView().self();
+
+        await().atMost(10, TimeUnit.SECONDS)
+                .until(() -> leaderCluster.readView().unreachableMembers().contains(follower2Member));
+        await().atMost(10, TimeUnit.SECONDS)
+                .until(() -> followerCluster.readView().unreachableMembers().contains(follower2Member));
+
+        ActorRef followerCars = followerDistributedDataStore.getActorUtils().findLocalShard("cars").get();
+
+        // to simulate a follower not being able to receive messages, but still being able to send messages and becoming
+        // candidate, we can just send a couple of RequestVotes to both leader and follower.
+        cars.tell(new RequestVote(initialState.getCurrentTerm() + 1, "member-3-shard-cars", -1, -1), null);
+        followerCars.tell(new RequestVote(initialState.getCurrentTerm() + 1, "member-3-shard-cars", -1, -1), null);
+        cars.tell(new RequestVote(initialState.getCurrentTerm() + 3, "member-3-shard-cars", -1, -1), null);
+        followerCars.tell(new RequestVote(initialState.getCurrentTerm() + 3, "member-3-shard-cars", -1, -1), null);
+
+        OnDemandRaftState stateAfter = (OnDemandRaftState) leaderDistributedDataStore.getActorUtils()
+                .executeOperation(cars, GetOnDemandRaftState.INSTANCE);
+        OnDemandRaftState followerState = (OnDemandRaftState) followerDistributedDataStore.getActorUtils()
+                .executeOperation(cars, GetOnDemandRaftState.INSTANCE);
+
+        assertEquals(initialState.getCurrentTerm(), stateAfter.getCurrentTerm());
+        assertEquals(initialState.getCurrentTerm(), followerState.getCurrentTerm());
+
+        ds2.close();
+    }
+
     @Test
     public void testInstallSnapshot() throws Exception {
         final String testName = "testInstallSnapshot";
@@ -1263,7 +1328,7 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
             SchemaContextHelper.full());
 
         final ContainerNode carsNode = CarsModel.newCarsNode(
-                CarsModel.newCarsMapNode(CarsModel.newCarEntry("optima", BigInteger.valueOf(20000))));
+                CarsModel.newCarsMapNode(CarsModel.newCarEntry("optima", Uint64.valueOf(20000))));
         AbstractShardTest.writeToStore(tree, CarsModel.BASE_PATH, carsNode);
 
         final NormalizedNode<?, ?> snapshotRoot = AbstractShardTest.readStore(tree, YangInstanceIdentifier.empty());
@@ -1306,6 +1371,70 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest {
         verifyNode(rwTx, CarsModel.BASE_PATH, carsNode);
     }
 
+    @SuppressWarnings("IllegalCatch")
+    @Test
+    public void testRaftCallbackDuringLeadershipDrop() throws Exception {
+        final String testName = "testRaftCallbackDuringLeadershipDrop";
+        initDatastores(testName, MODULE_SHARDS_CARS_1_2_3, CARS);
+
+        final ExecutorService executor = Executors.newSingleThreadExecutor();
+
+        final IntegrationTestKit follower2TestKit = new IntegrationTestKit(follower2System,
+                DatastoreContext.newBuilderFrom(followerDatastoreContextBuilder.build()).operationTimeoutInMillis(500)
+                        .shardLeaderElectionTimeoutInSeconds(3600),
+                commitTimeout);
+
+        final DOMStoreWriteTransaction initialWriteTx = leaderDistributedDataStore.newWriteOnlyTransaction();
+        initialWriteTx.write(CarsModel.BASE_PATH, CarsModel.emptyContainer());
+        leaderTestKit.doCommit(initialWriteTx.ready());
+
+        try (AbstractDataStore follower2DistributedDataStore = follower2TestKit.setupAbstractDataStore(
+                testParameter, testName, MODULE_SHARDS_CARS_1_2_3, false)) {
+
+            final ActorRef member3Cars = ((LocalShardStore) follower2DistributedDataStore).getLocalShards()
+                    .getLocalShards().get("cars").getActor();
+            final ActorRef member2Cars = ((LocalShardStore)followerDistributedDataStore).getLocalShards()
+                    .getLocalShards().get("cars").getActor();
+            member2Cars.tell(new StartDropMessages(AppendEntries.class), null);
+            member3Cars.tell(new StartDropMessages(AppendEntries.class), null);
+
+            final DOMStoreWriteTransaction newTx = leaderDistributedDataStore.newWriteOnlyTransaction();
+            newTx.write(CarsModel.CAR_LIST_PATH, CarsModel.newCarMapNode());
+            final AtomicBoolean submitDone = new AtomicBoolean(false);
+            executor.submit(() -> {
+                try {
+                    leaderTestKit.doCommit(newTx.ready());
+                    submitDone.set(true);
+                } catch (Exception e) {
+                    throw new RuntimeException(e);
+                }
+            });
+            final ActorRef leaderCars = ((LocalShardStore) leaderDistributedDataStore).getLocalShards()
+                    .getLocalShards().get("cars").getActor();
+            await().atMost(10, TimeUnit.SECONDS)
+                    .until(() -> ((OnDemandRaftState) leaderDistributedDataStore.getActorUtils()
+                            .executeOperation(leaderCars, GetOnDemandRaftState.INSTANCE)).getLastIndex() >= 1);
+
+            final OnDemandRaftState raftState = (OnDemandRaftState)leaderDistributedDataStore.getActorUtils()
+                    .executeOperation(leaderCars, GetOnDemandRaftState.INSTANCE);
+
+            // Simulate a follower not receiving heartbeats but still being able to send messages ie RequestVote with
+            // new term(switching to candidate after election timeout)
+            leaderCars.tell(new RequestVote(raftState.getCurrentTerm() + 1,
+                    "member-3-shard-cars-testRaftCallbackDuringLeadershipDrop", -1,
+                            -1), member3Cars);
+
+            member2Cars.tell(new StopDropMessages(AppendEntries.class), null);
+            member3Cars.tell(new StopDropMessages(AppendEntries.class), null);
+
+            await("Is tx stuck in COMMIT_PENDING")
+                    .atMost(10, TimeUnit.SECONDS).untilAtomic(submitDone, equalTo(true));
+
+        }
+
+        executor.shutdownNow();
+    }
+
     private static void verifySnapshot(final Snapshot actual, final Snapshot expected,
                                        final NormalizedNode<?, ?> expRoot) {
         assertEquals("Snapshot getLastAppliedTerm", expected.getLastAppliedTerm(), actual.getLastAppliedTerm());