From: Tibor Král Date: Mon, 11 Nov 2019 17:23:31 +0000 (+0100) Subject: Allow shard settle timeout to be tuned X-Git-Tag: release/magnesium~30 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=commitdiff_plain;h=b65f82f3c6e6e4c53ea2efa4dba30ddf82d61616 Allow shard settle timeout to be tuned When we are starting the datastore, we wait up to a fixed number of election timeout intervals (3) for shards to finish electing a leader. This is not always enough, especially if recovery takes a long time, hence introduce shardLeaderElectionTimeoutMultiplier to make this tunable. JIRA: CONTROLLER-1914 Change-Id: Iba1d116d0248fc6046aeeae3ec30ecac50f373c9 Signed-off-by: Tibor Král Signed-off-by: Robert Varga --- diff --git a/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg b/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg index 250a8149ff..d43ed4b01b 100644 --- a/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg +++ b/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg @@ -99,3 +99,8 @@ operational.persistent=false # the stack trace of the creator of the Tx when there is an exception when the transaction is submitted # (e.g. for a failed validation). Defaults to false due to performance impact. #transaction-debug-context-enabled=true + +# Multiplicator of shard-leader-election-timeout-in-seconds for the purposes of initial datastore +# convergence. Each frontend datastore instance will wait specified amount of time before becoming +# exposed as a service. A value of 0 indicates waiting forever. Defaults to 3. +initial-settle-timeout-multiplier=3 diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/AbstractDataStore.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/AbstractDataStore.java index 43314cd3ba..bc4a8e7395 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/AbstractDataStore.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/AbstractDataStore.java @@ -13,11 +13,13 @@ import akka.actor.ActorRef; import akka.actor.ActorSystem; import akka.actor.PoisonPill; import akka.actor.Props; +import com.google.common.annotations.Beta; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Throwables; import com.google.common.util.concurrent.Uninterruptibles; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier; import org.opendaylight.controller.cluster.common.actor.Dispatchers; import org.opendaylight.controller.cluster.databroker.actors.dds.DataStoreClient; @@ -45,6 +47,7 @@ import org.opendaylight.yangtools.yang.model.api.SchemaContext; import org.opendaylight.yangtools.yang.model.api.SchemaContextListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import scala.concurrent.duration.Duration; /** * Base implementation of a distributed DOMStore. @@ -55,22 +58,15 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface private static final Logger LOG = LoggerFactory.getLogger(AbstractDataStore.class); - private static final long READY_WAIT_FACTOR = 3; - + private final CountDownLatch waitTillReadyCountDownLatch = new CountDownLatch(1); + private final ClientIdentifier identifier; + private final DataStoreClient client; private final ActorUtils actorUtils; - private final long waitTillReadyTimeInMillis; private AutoCloseable closeable; - private DatastoreConfigurationMXBeanImpl datastoreConfigMXBean; - private DatastoreInfoMXBeanImpl datastoreInfoMXBean; - private final CountDownLatch waitTillReadyCountDownLatch = new CountDownLatch(1); - - private final ClientIdentifier identifier; - private final DataStoreClient client; - @SuppressWarnings("checkstyle:IllegalCatch") protected AbstractDataStore(final ActorSystem actorSystem, final ClusterWrapper cluster, final Configuration configuration, final DatastoreContextFactory datastoreContextFactory, @@ -116,9 +112,6 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface identifier = client.getIdentifier(); LOG.debug("Distributed data store client {} started", identifier); - this.waitTillReadyTimeInMillis = actorUtils.getDatastoreContext().getShardLeaderElectionTimeout() - .duration().toMillis() * READY_WAIT_FACTOR; - datastoreConfigMXBean = new DatastoreConfigurationMXBeanImpl( datastoreContextFactory.getBaseDatastoreContext().getDataStoreMXBeanType()); datastoreConfigMXBean.setContext(datastoreContextFactory.getBaseDatastoreContext()); @@ -134,8 +127,6 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface this.actorUtils = requireNonNull(actorUtils, "actorContext should not be null"); this.client = null; this.identifier = requireNonNull(identifier); - this.waitTillReadyTimeInMillis = actorUtils.getDatastoreContext().getShardLeaderElectionTimeout() - .duration().toMillis() * READY_WAIT_FACTOR; } @VisibleForTesting @@ -144,8 +135,6 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface this.actorUtils = requireNonNull(actorUtils, "actorContext should not be null"); this.client = clientActor; this.identifier = requireNonNull(identifier); - this.waitTillReadyTimeInMillis = actorUtils.getDatastoreContext().getShardLeaderElectionTimeout() - .duration().toMillis() * READY_WAIT_FACTOR; } protected AbstractShardManagerCreator getShardManagerCreator() { @@ -242,18 +231,32 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface return actorUtils; } + // TODO: consider removing this in favor of awaitReadiness() public void waitTillReady() { LOG.info("Beginning to wait for data store to become ready : {}", identifier); + final Duration toWait = initialSettleTime(); try { - if (waitTillReadyCountDownLatch.await(waitTillReadyTimeInMillis, TimeUnit.MILLISECONDS)) { - LOG.debug("Data store {} is now ready", identifier); + if (toWait.isFinite()) { + if (!waitTillReadyCountDownLatch.await(toWait.toNanos(), TimeUnit.NANOSECONDS)) { + LOG.error("Shard leaders failed to settle in {}, giving up", toWait); + return; + } } else { - LOG.error("Shard leaders failed to settle in {} seconds, giving up", - TimeUnit.MILLISECONDS.toSeconds(waitTillReadyTimeInMillis)); + waitTillReadyCountDownLatch.await(); } } catch (InterruptedException e) { LOG.error("Interrupted while waiting for shards to settle", e); + return; + } + + LOG.debug("Data store {} is now ready", identifier); + } + + @Beta + public void awaitReadiness(final long timeout, final TimeUnit unit) throws InterruptedException, TimeoutException { + if (!waitTillReadyCountDownLatch.await(timeout, unit)) { + throw new TimeoutException("Shard leaders failed to settle"); } } @@ -321,4 +324,9 @@ public abstract class AbstractDataStore implements DistributedDataStoreInterface return (ListenerRegistration) proxy; } + private Duration initialSettleTime() { + final DatastoreContext context = actorUtils.getDatastoreContext(); + final int multiplier = context.getInitialSettleTimeoutMultiplier(); + return multiplier == 0 ? Duration.Inf() : context.getShardLeaderElectionTimeout().duration().$times(multiplier); + } } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java index 57443bc729..e6d8ed8457 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java @@ -51,6 +51,7 @@ public class DatastoreContext implements ClientActorConfig { public static final int DEFAULT_SHARD_TX_COMMIT_QUEUE_CAPACITY = 50000; public static final Timeout DEFAULT_SHARD_INITIALIZATION_TIMEOUT = new Timeout(5, TimeUnit.MINUTES); public static final Timeout DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT = new Timeout(30, TimeUnit.SECONDS); + public static final int DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER = 3; public static final boolean DEFAULT_PERSISTENT = true; public static final FileAkkaConfigurationReader DEFAULT_CONFIGURATION_READER = new FileAkkaConfigurationReader(); public static final int DEFAULT_SHARD_SNAPSHOT_DATA_THRESHOLD_PERCENTAGE = 12; @@ -80,6 +81,7 @@ public class DatastoreContext implements ClientActorConfig { private int shardTransactionCommitQueueCapacity = DEFAULT_SHARD_TX_COMMIT_QUEUE_CAPACITY; private Timeout shardInitializationTimeout = DEFAULT_SHARD_INITIALIZATION_TIMEOUT; private Timeout shardLeaderElectionTimeout = DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT; + private int initialSettleTimeoutMultiplier = DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER; private boolean persistent = DEFAULT_PERSISTENT; private AkkaConfigurationReader configurationReader = DEFAULT_CONFIGURATION_READER; private long transactionCreationInitialRateLimit = DEFAULT_TX_CREATION_INITIAL_RATE_LIMIT; @@ -123,6 +125,7 @@ public class DatastoreContext implements ClientActorConfig { this.shardTransactionCommitQueueCapacity = other.shardTransactionCommitQueueCapacity; this.shardInitializationTimeout = other.shardInitializationTimeout; this.shardLeaderElectionTimeout = other.shardLeaderElectionTimeout; + this.initialSettleTimeoutMultiplier = other.initialSettleTimeoutMultiplier; this.persistent = other.persistent; this.configurationReader = other.configurationReader; this.transactionCreationInitialRateLimit = other.transactionCreationInitialRateLimit; @@ -200,6 +203,16 @@ public class DatastoreContext implements ClientActorConfig { return shardLeaderElectionTimeout; } + /** + * Return the multiplier of {@link #getShardLeaderElectionTimeout()} which the frontend will wait for all shards + * on the local node to settle. + * + * @return Non-negative multiplier. Value of {@code 0} indicates to wait indefinitely. + */ + public int getInitialSettleTimeoutMultiplier() { + return initialSettleTimeoutMultiplier; + } + public boolean isPersistent() { return persistent; } @@ -457,6 +470,12 @@ public class DatastoreContext implements ClientActorConfig { return this; } + public Builder initialSettleTimeoutMultiplier(final int multiplier) { + checkArgument(multiplier >= 0); + datastoreContext.initialSettleTimeoutMultiplier = multiplier; + return this; + } + public Builder shardLeaderElectionTimeoutInSeconds(final long timeout) { return shardLeaderElectionTimeout(timeout, TimeUnit.SECONDS); } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreFactory.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreFactory.java index 0846590c1e..a765fa3efe 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreFactory.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreFactory.java @@ -34,6 +34,7 @@ public final class DistributedDataStoreFactory { introspector, updater, null); } + // TODO: separate out settle wait so it is better controlled public static AbstractDataStore createInstance(final DOMSchemaService schemaService, final DatastoreContext initialDatastoreContext, final DatastoreSnapshotRestore datastoreSnapshotRestore, final ActorSystemProvider actorSystemProvider, final DatastoreContextIntrospector introspector, diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang b/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang index af37589f9c..7a932b8b11 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang @@ -144,6 +144,13 @@ module distributed-datastore-provider { an operation (eg transaction create)."; } + leaf initial-settle-timeout-multiplier { + default 3; + type uint32; + description "Multiplier for the maximum amount of time to wait for a shard to elect a leader. + Zero value means wait indefinitely (as long as it takes)."; + } + leaf shard-batched-modification-count { default 1000; type non-zero-uint32-type; diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextIntrospectorTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextIntrospectorTest.java index 6d661e6aa2..cca8eb6198 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextIntrospectorTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextIntrospectorTest.java @@ -85,6 +85,7 @@ public class DatastoreContextIntrospectorTest { properties.put("shard-transaction-commit-queue-capacity", "567"); properties.put("shard-initialization-timeout-in-seconds", "82"); properties.put("shard-leader-election-timeout-in-seconds", "66"); + properties.put("initial-settle-timeout-multiplier", "5"); properties.put("shard-isolated-leader-check-interval-in-millis", "123"); properties.put("shard-snapshot-data-threshold-percentage", "100"); properties.put("shard-election-timeout-factor", "21"); @@ -110,6 +111,7 @@ public class DatastoreContextIntrospectorTest { assertEquals(567, context.getShardTransactionCommitQueueCapacity()); assertEquals(82, context.getShardInitializationTimeout().duration().toSeconds()); assertEquals(66, context.getShardLeaderElectionTimeout().duration().toSeconds()); + assertEquals(5, context.getInitialSettleTimeoutMultiplier()); assertEquals(123, context.getShardRaftConfig().getIsolatedCheckIntervalInMillis()); assertEquals(100, context.getShardRaftConfig().getSnapshotDataThresholdPercentage()); assertEquals(21, context.getShardRaftConfig().getElectionTimeoutFactor()); @@ -126,6 +128,7 @@ public class DatastoreContextIntrospectorTest { properties.put("operation-timeout-in-seconds", "27"); properties.put("shard-heartbeat-interval-in-millis", "102"); properties.put("shard-election-timeout-factor", "22"); + properties.put("initial-settle-timeout-multiplier", "6"); properties.put("max-shard-data-change-executor-pool-size", "42"); properties.put("max-shard-data-store-executor-queue-size", "4444"); properties.put("persistent", "true"); @@ -143,6 +146,7 @@ public class DatastoreContextIntrospectorTest { assertEquals(567, context.getShardTransactionCommitQueueCapacity()); assertEquals(82, context.getShardInitializationTimeout().duration().toSeconds()); assertEquals(66, context.getShardLeaderElectionTimeout().duration().toSeconds()); + assertEquals(6, context.getInitialSettleTimeoutMultiplier()); assertEquals(123, context.getShardRaftConfig().getIsolatedCheckIntervalInMillis()); assertEquals(100, context.getShardRaftConfig().getSnapshotDataThresholdPercentage()); assertEquals(22, context.getShardRaftConfig().getElectionTimeoutFactor()); diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextTest.java index 6bd8f367fa..504618cd1f 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextTest.java @@ -11,6 +11,7 @@ import static org.junit.Assert.assertEquals; import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_CONFIGURATION_READER; import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_HEARTBEAT_INTERVAL_IN_MILLIS; import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_INITIAL_PAYLOAD_SERIALIZED_BUFFER_CAPACITY; +import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER; import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS; import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_JOURNAL_RECOVERY_BATCH_SIZE; import static org.opendaylight.controller.cluster.datastore.DatastoreContext.DEFAULT_MAX_MESSAGE_SLICE_SIZE; @@ -51,6 +52,8 @@ public class DatastoreContextTest { context.getShardInitializationTimeout().duration().toMillis()); assertEquals(DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT.duration().toMillis(), context.getShardLeaderElectionTimeout().duration().toMillis()); + assertEquals(DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER, + context.getInitialSettleTimeoutMultiplier()); assertEquals(DEFAULT_PERSISTENT, context.isPersistent()); assertEquals(DEFAULT_CONFIGURATION_READER, context.getConfigurationReader()); assertEquals(DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS, @@ -90,6 +93,7 @@ public class DatastoreContextTest { TimeUnit.MILLISECONDS); builder.shardLeaderElectionTimeout(DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT.duration().toMillis() + 1, TimeUnit.MILLISECONDS); + builder.initialSettleTimeoutMultiplier(DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER + 1); builder.persistent(!DEFAULT_PERSISTENT); builder.shardIsolatedLeaderCheckIntervalInMillis(DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS + 1); builder.shardSnapshotDataThresholdPercentage(DEFAULT_SHARD_SNAPSHOT_DATA_THRESHOLD_PERCENTAGE + 1); @@ -137,6 +141,8 @@ public class DatastoreContextTest { context.getShardInitializationTimeout().duration().toMillis()); assertEquals(DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT.duration().toMillis() + 1, context.getShardLeaderElectionTimeout().duration().toMillis()); + assertEquals(DEFAULT_INITIAL_SETTLE_TIMEOUT_MULTIPLIER + 1, + context.getInitialSettleTimeoutMultiplier()); assertEquals(!DEFAULT_PERSISTENT, context.isPersistent()); assertEquals(DEFAULT_CONFIGURATION_READER, context.getConfigurationReader()); assertEquals(DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS + 1, diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreTest.java index f608aa060f..8ddf854a7e 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreTest.java @@ -101,6 +101,7 @@ public class DistributedDataStoreTest extends AbstractActorTest { public void testWaitTillReadyBlocking() { doReturn(datastoreContext).when(actorUtils).getDatastoreContext(); doReturn(shardElectionTimeout).when(datastoreContext).getShardLeaderElectionTimeout(); + doReturn(1).when(datastoreContext).getInitialSettleTimeoutMultiplier(); doReturn(FiniteDuration.apply(50, TimeUnit.MILLISECONDS)).when(shardElectionTimeout).duration(); try (DistributedDataStore distributedDataStore = new DistributedDataStore(actorUtils, UNKNOWN_ID)) {