Allow shard settle timeout to be tuned 41/85741/9
authorTibor Král <tibor.kral@pantheon.tech>
Mon, 11 Nov 2019 17:23:31 +0000 (18:23 +0100)
committerRobert Varga <nite@hq.sk>
Wed, 20 Nov 2019 13:28:04 +0000 (13:28 +0000)
When we are starting the datastore, we wait up to a fixed number of
election timeout intervals (3) for shards to finish electing a leader.

This is not always enough, especially if recovery takes a long time,
hence introduce shardLeaderElectionTimeoutMultiplier to make this
tunable.

JIRA: CONTROLLER-1914
Change-Id: Iba1d116d0248fc6046aeeae3ec30ecac50f373c9
Signed-off-by: Tibor Král <tibor.kral@pantheon.tech>
Signed-off-by: Robert Varga <robert.varga@pantheon.tech>
opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/AbstractDataStore.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreFactory.java
opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang
opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextIntrospectorTest.java
opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextTest.java
opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreTest.java

index 250a8149ff1e0b77e6f203c275beb473c03d8c2e..d43ed4b01b9d571bb4bb4fbbbf4a2b7fc6d0d85f 100644 (file)
@@ -99,3 +99,8 @@ operational.persistent=false
 # the stack trace of the creator of the Tx when there is an exception when the transaction is submitted 
 # (e.g. for a failed validation).  Defaults to false due to performance impact.
 #transaction-debug-context-enabled=true
+
+# Multiplicator of shard-leader-election-timeout-in-seconds for the purposes of initial datastore
+# convergence. Each frontend datastore instance will wait specified amount of time before becoming
+# exposed as a service. A value of 0 indicates waiting forever. Defaults to 3.
+initial-settle-timeout-multiplier=3
index 43314cd3ba0c7ab84987ea2a695533f852bf850a..bc4a8e739577d6264038594d67980673fc937fe2 100644 (file)
@@ -13,11 +13,13 @@ import akka.actor.ActorRef;
 import akka.actor.ActorSystem;
 import akka.actor.PoisonPill;
 import akka.actor.Props;
+import com.google.common.annotations.Beta;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Throwables;
 import com.google.common.util.concurrent.Uninterruptibles;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier;
 import org.opendaylight.controller.cluster.common.actor.Dispatchers;
 import org.opendaylight.controller.cluster.databroker.actors.dds.DataStoreClient;
@@ -45,6 +47,7 @@ import org.opendaylight.yangtools.yang.model.api.SchemaContext;
 import org.opendaylight.yangtools.yang.model.api.SchemaContextListener;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import scala.concurrent.duration.Duration;
 
 /**
  * Base implementation of a distributed DOMStore.
@@ -55,22 +58,15 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface
 
     private static final Logger LOG = LoggerFactory.getLogger(AbstractDataStore.class);
 
-    private static final long READY_WAIT_FACTOR = 3;
-
+    private final CountDownLatch waitTillReadyCountDownLatch = new CountDownLatch(1);
+    private final ClientIdentifier identifier;
+    private final DataStoreClient client;
     private final ActorUtils actorUtils;
-    private final long waitTillReadyTimeInMillis;
 
     private AutoCloseable closeable;
-
     private DatastoreConfigurationMXBeanImpl datastoreConfigMXBean;
-
     private DatastoreInfoMXBeanImpl datastoreInfoMXBean;
 
-    private final CountDownLatch waitTillReadyCountDownLatch = new CountDownLatch(1);
-
-    private final ClientIdentifier identifier;
-    private final DataStoreClient client;
-
     @SuppressWarnings("checkstyle:IllegalCatch")
     protected AbstractDataStore(final ActorSystem actorSystem, final ClusterWrapper cluster,
             final Configuration configuration, final DatastoreContextFactory datastoreContextFactory,
@@ -116,9 +112,6 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface
         identifier = client.getIdentifier();
         LOG.debug("Distributed data store client {} started", identifier);
 
-        this.waitTillReadyTimeInMillis = actorUtils.getDatastoreContext().getShardLeaderElectionTimeout()
-                .duration().toMillis() * READY_WAIT_FACTOR;
-
         datastoreConfigMXBean = new DatastoreConfigurationMXBeanImpl(
                 datastoreContextFactory.getBaseDatastoreContext().getDataStoreMXBeanType());
         datastoreConfigMXBean.setContext(datastoreContextFactory.getBaseDatastoreContext());
@@ -134,8 +127,6 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface
         this.actorUtils = requireNonNull(actorUtils, "actorContext should not be null");
         this.client = null;
         this.identifier = requireNonNull(identifier);
-        this.waitTillReadyTimeInMillis = actorUtils.getDatastoreContext().getShardLeaderElectionTimeout()
-                .duration().toMillis() * READY_WAIT_FACTOR;
     }
 
     @VisibleForTesting
@@ -144,8 +135,6 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface
         this.actorUtils = requireNonNull(actorUtils, "actorContext should not be null");
         this.client = clientActor;
         this.identifier = requireNonNull(identifier);
-        this.waitTillReadyTimeInMillis = actorUtils.getDatastoreContext().getShardLeaderElectionTimeout()
-                .duration().toMillis() * READY_WAIT_FACTOR;
     }
 
     protected AbstractShardManagerCreator<?> getShardManagerCreator() {
@@ -242,18 +231,32 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface
         return actorUtils;
     }
 
+    // TODO: consider removing this in favor of awaitReadiness()
     public void waitTillReady() {
         LOG.info("Beginning to wait for data store to become ready : {}", identifier);
 
+        final Duration toWait = initialSettleTime();
         try {
-            if (waitTillReadyCountDownLatch.await(waitTillReadyTimeInMillis, TimeUnit.MILLISECONDS)) {
-                LOG.debug("Data store {} is now ready", identifier);
+            if (toWait.isFinite()) {
+                if (!waitTillReadyCountDownLatch.await(toWait.toNanos(), TimeUnit.NANOSECONDS)) {
+                    LOG.error("Shard leaders failed to settle in {}, giving up", toWait);
+                    return;
+                }
             } else {
-                LOG.error("Shard leaders failed to settle in {} seconds, giving up",
-                        TimeUnit.MILLISECONDS.toSeconds(waitTillReadyTimeInMillis));
+                waitTillReadyCountDownLatch.await();
             }
         } catch (InterruptedException e) {
             LOG.error("Interrupted while waiting for shards to settle", e);
+            return;
+        }
+
+        LOG.debug("Data store {} is now ready", identifier);
+    }
+
+    @Beta
+    public void awaitReadiness(final long timeout, final TimeUnit unit) throws InterruptedException, TimeoutException {
+        if (!waitTillReadyCountDownLatch.await(timeout, unit)) {
+            throw new TimeoutException("Shard leaders failed to settle");
         }
     }
 
@@ -321,4 +324,9 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface
         return (ListenerRegistration<L>) proxy;
     }
 
+    private Duration initialSettleTime() {
+        final DatastoreContext context = actorUtils.getDatastoreContext();
+        final int multiplier = context.getInitialSettleTimeoutMultiplier();
+        return multiplier == 0 ? Duration.Inf() : context.getShardLeaderElectionTimeout().duration().$times(multiplier);
+    }
 }
index 57443bc729104ef665af5221ba3ae540718a6e91..e6d8ed84573d0094e222b38f0088a8175c953280 100644 (file)
@@ -51,6 +51,7 @@ public class DatastoreContext implements ClientActorConfig {
     public static final int DEFAULT_SHARD_TX_COMMIT_QUEUE_CAPACITY = 50000;
     public static final Timeout DEFAULT_SHARD_INITIALIZATION_TIMEOUT = new Timeout(5, TimeUnit.MINUTES);
     public static final Timeout DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT = new Timeout(30, TimeUnit.SECONDS);
+    public static final int DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER = 3;
     public static final boolean DEFAULT_PERSISTENT = true;
     public static final FileAkkaConfigurationReader DEFAULT_CONFIGURATION_READER = new FileAkkaConfigurationReader();
     public static final int DEFAULT_SHARD_SNAPSHOT_DATA_THRESHOLD_PERCENTAGE = 12;
@@ -80,6 +81,7 @@ public class DatastoreContext implements ClientActorConfig {
     private int shardTransactionCommitQueueCapacity = DEFAULT_SHARD_TX_COMMIT_QUEUE_CAPACITY;
     private Timeout shardInitializationTimeout = DEFAULT_SHARD_INITIALIZATION_TIMEOUT;
     private Timeout shardLeaderElectionTimeout = DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT;
+    private int initialSettleTimeoutMultiplier = DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER;
     private boolean persistent = DEFAULT_PERSISTENT;
     private AkkaConfigurationReader configurationReader = DEFAULT_CONFIGURATION_READER;
     private long transactionCreationInitialRateLimit = DEFAULT_TX_CREATION_INITIAL_RATE_LIMIT;
@@ -123,6 +125,7 @@ public class DatastoreContext implements ClientActorConfig {
         this.shardTransactionCommitQueueCapacity = other.shardTransactionCommitQueueCapacity;
         this.shardInitializationTimeout = other.shardInitializationTimeout;
         this.shardLeaderElectionTimeout = other.shardLeaderElectionTimeout;
+        this.initialSettleTimeoutMultiplier = other.initialSettleTimeoutMultiplier;
         this.persistent = other.persistent;
         this.configurationReader = other.configurationReader;
         this.transactionCreationInitialRateLimit = other.transactionCreationInitialRateLimit;
@@ -200,6 +203,16 @@ public class DatastoreContext implements ClientActorConfig {
         return shardLeaderElectionTimeout;
     }
 
+    /**
+     * Return the multiplier of {@link #getShardLeaderElectionTimeout()} which the frontend will wait for all shards
+     * on the local node to settle.
+     *
+     * @return Non-negative multiplier. Value of {@code 0} indicates to wait indefinitely.
+     */
+    public int getInitialSettleTimeoutMultiplier() {
+        return initialSettleTimeoutMultiplier;
+    }
+
     public boolean isPersistent() {
         return persistent;
     }
@@ -457,6 +470,12 @@ public class DatastoreContext implements ClientActorConfig {
             return this;
         }
 
+        public Builder initialSettleTimeoutMultiplier(final int multiplier) {
+            checkArgument(multiplier >= 0);
+            datastoreContext.initialSettleTimeoutMultiplier = multiplier;
+            return this;
+        }
+
         public Builder shardLeaderElectionTimeoutInSeconds(final long timeout) {
             return shardLeaderElectionTimeout(timeout, TimeUnit.SECONDS);
         }
index 0846590c1e1cd912320b2d5b13bc5399bf43f964..a765fa3efe072b994adff57d1d71ae222aa55a10 100644 (file)
@@ -34,6 +34,7 @@ public final class DistributedDataStoreFactory {
                 introspector, updater, null);
     }
 
+    // TODO: separate out settle wait so it is better controlled
     public static AbstractDataStore createInstance(final DOMSchemaService schemaService,
             final DatastoreContext initialDatastoreContext, final DatastoreSnapshotRestore datastoreSnapshotRestore,
             final ActorSystemProvider actorSystemProvider, final DatastoreContextIntrospector introspector,
index af37589f9cfcda9d0b9c9eafebfc144583c62d78..7a932b8b1107b599d6a9f5b494e516224e59fbe1 100644 (file)
@@ -144,6 +144,13 @@ module distributed-datastore-provider {
                           an operation (eg transaction create).";
         }
 
+        leaf initial-settle-timeout-multiplier {
+            default 3;
+            type uint32;
+            description "Multiplier for the maximum amount of time to wait for a shard to elect a leader.
+                         Zero value means wait indefinitely (as long as it takes).";
+        }
+
         leaf shard-batched-modification-count {
             default 1000;
             type non-zero-uint32-type;
index 6d661e6aa22839d060974bd924bc6ed6ceff869d..cca8eb6198f2453a919ef53e43e04009265ccec2 100644 (file)
@@ -85,6 +85,7 @@ public class DatastoreContextIntrospectorTest {
         properties.put("shard-transaction-commit-queue-capacity", "567");
         properties.put("shard-initialization-timeout-in-seconds", "82");
         properties.put("shard-leader-election-timeout-in-seconds", "66");
+        properties.put("initial-settle-timeout-multiplier", "5");
         properties.put("shard-isolated-leader-check-interval-in-millis", "123");
         properties.put("shard-snapshot-data-threshold-percentage", "100");
         properties.put("shard-election-timeout-factor", "21");
@@ -110,6 +111,7 @@ public class DatastoreContextIntrospectorTest {
         assertEquals(567, context.getShardTransactionCommitQueueCapacity());
         assertEquals(82, context.getShardInitializationTimeout().duration().toSeconds());
         assertEquals(66, context.getShardLeaderElectionTimeout().duration().toSeconds());
+        assertEquals(5, context.getInitialSettleTimeoutMultiplier());
         assertEquals(123, context.getShardRaftConfig().getIsolatedCheckIntervalInMillis());
         assertEquals(100, context.getShardRaftConfig().getSnapshotDataThresholdPercentage());
         assertEquals(21, context.getShardRaftConfig().getElectionTimeoutFactor());
@@ -126,6 +128,7 @@ public class DatastoreContextIntrospectorTest {
         properties.put("operation-timeout-in-seconds", "27");
         properties.put("shard-heartbeat-interval-in-millis", "102");
         properties.put("shard-election-timeout-factor", "22");
+        properties.put("initial-settle-timeout-multiplier", "6");
         properties.put("max-shard-data-change-executor-pool-size", "42");
         properties.put("max-shard-data-store-executor-queue-size", "4444");
         properties.put("persistent", "true");
@@ -143,6 +146,7 @@ public class DatastoreContextIntrospectorTest {
         assertEquals(567, context.getShardTransactionCommitQueueCapacity());
         assertEquals(82, context.getShardInitializationTimeout().duration().toSeconds());
         assertEquals(66, context.getShardLeaderElectionTimeout().duration().toSeconds());
+        assertEquals(6, context.getInitialSettleTimeoutMultiplier());
         assertEquals(123, context.getShardRaftConfig().getIsolatedCheckIntervalInMillis());
         assertEquals(100, context.getShardRaftConfig().getSnapshotDataThresholdPercentage());
         assertEquals(22, context.getShardRaftConfig().getElectionTimeoutFactor());
index 6bd8f367fab185da5ee3b6231992c316eaafe79b..504618cd1fa94a777afe0252a60b2db0f3e317a7 100644 (file)
@@ -11,6 +11,7 @@ import static org.junit.Assert.assertEquals;
 import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_CONFIGURATION_READER;
 import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_HEARTBEAT_INTERVAL_IN_MILLIS;
 import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_INITIAL_PAYLOAD_SERIALIZED_BUFFER_CAPACITY;
+import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER;
 import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS;
 import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_JOURNAL_RECOVERY_BATCH_SIZE;
 import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_MAX_MESSAGE_SLICE_SIZE;
@@ -51,6 +52,8 @@ public class DatastoreContextTest {
                 context.getShardInitializationTimeout().duration().toMillis());
         assertEquals(DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT.duration().toMillis(),
                 context.getShardLeaderElectionTimeout().duration().toMillis());
+        assertEquals(DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER,
+                context.getInitialSettleTimeoutMultiplier());
         assertEquals(DEFAULT_PERSISTENT, context.isPersistent());
         assertEquals(DEFAULT_CONFIGURATION_READER, context.getConfigurationReader());
         assertEquals(DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS,
@@ -90,6 +93,7 @@ public class DatastoreContextTest {
                 TimeUnit.MILLISECONDS);
         builder.shardLeaderElectionTimeout(DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT.duration().toMillis() + 1,
                 TimeUnit.MILLISECONDS);
+        builder.initialSettleTimeoutMultiplier(DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER + 1);
         builder.persistent(!DEFAULT_PERSISTENT);
         builder.shardIsolatedLeaderCheckIntervalInMillis(DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS + 1);
         builder.shardSnapshotDataThresholdPercentage(DEFAULT_SHARD_SNAPSHOT_DATA_THRESHOLD_PERCENTAGE + 1);
@@ -137,6 +141,8 @@ public class DatastoreContextTest {
                 context.getShardInitializationTimeout().duration().toMillis());
         assertEquals(DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT.duration().toMillis() + 1,
                 context.getShardLeaderElectionTimeout().duration().toMillis());
+        assertEquals(DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER + 1,
+                context.getInitialSettleTimeoutMultiplier());
         assertEquals(!DEFAULT_PERSISTENT, context.isPersistent());
         assertEquals(DEFAULT_CONFIGURATION_READER, context.getConfigurationReader());
         assertEquals(DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS + 1,
index f608aa060f0a6d9aa4ad00ba6daf5fc3c35582cb..8ddf854a7ef46094e5974f7c97d991d95beb6b00 100644 (file)
@@ -101,6 +101,7 @@ public class DistributedDataStoreTest extends AbstractActorTest {
     public void testWaitTillReadyBlocking() {
         doReturn(datastoreContext).when(actorUtils).getDatastoreContext();
         doReturn(shardElectionTimeout).when(datastoreContext).getShardLeaderElectionTimeout();
+        doReturn(1).when(datastoreContext).getInitialSettleTimeoutMultiplier();
         doReturn(FiniteDuration.apply(50, TimeUnit.MILLISECONDS)).when(shardElectionTimeout).duration();
         try (DistributedDataStore distributedDataStore = new DistributedDataStore(actorUtils, UNKNOWN_ID)) {