From fa60e0fbe54f1604d3b68dcd5df14ba3aed7183f Mon Sep 17 00:00:00 2001 From: Tom Pantelis Date: Wed, 26 Jul 2017 12:16:48 -0400 Subject: [PATCH] Bug 8885: Fix DistributedShardedDOMDataTree initialization DistributedShardedDOMDataTree initialization expects the prefix configuration shard to be present and ready with leader however the latter isn't the case when the static module-shards is bootstrapped without the local member so it can be dynamically joined into an existing cluster. So I modified the ConfigShardLookupTask to elide the ConfigShardReadinessTask. Once past that, creation of the prefix-based default shard is attempted as there isn't a local module-based shard however this fails b/c the local prefix configuration shard is not connected to a leader. To alleviate this I just commented out the code to create the shard. Since the default shard configuration is present in the out-of-box modules.conf and is expected to be present, we can assume at this point that the local member isn't in the replica list with the intention of dynamically joining it to an existing cluster, at which time the shard will be created. These changes at least fix the regression with the boostrapping scenario. We can revisit this iniialization later w.r.t. prefix-based shards. Change-Id: I1faf531f4c79914d45203ee132dd4e65ad2f18ba Signed-off-by: Tom Pantelis --- .../DistributedShardedDOMDataTree.java | 90 ++++++++----------- .../sharding/ShardedDataTreeActor.java | 17 +--- ...ributedShardedDOMDataTreeRemotingTest.java | 25 ++++-- .../DistributedShardedDOMDataTreeTest.java | 2 +- 4 files changed, 63 insertions(+), 71 deletions(-) diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTree.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTree.java index cdb249f3cd..53893e2251 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTree.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTree.java @@ -87,7 +87,6 @@ import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.compat.java8.FutureConverters; -import scala.concurrent.Await; import scala.concurrent.Future; import scala.concurrent.Promise; import scala.concurrent.duration.FiniteDuration; @@ -123,9 +122,6 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat private final DOMDataTreePrefixTable> shards = DOMDataTreePrefixTable.create(); - private final EnumMap defaultShardRegistrations = - new EnumMap<>(LogicalDatastoreType.class); - private final EnumMap> configurationShardMap = new EnumMap<>(LogicalDatastoreType.class); @@ -196,21 +192,17 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat public void init() { // create our writers to the configuration try { - LOG.debug("{} - starting config shard lookup.", - distributedConfigDatastore.getActorContext().getCurrentMemberName()); + LOG.debug("{} - starting config shard lookup.", memberName); // We have to wait for prefix config shards to be up and running // so we can create datastore clients for them handleConfigShardLookup().get(SHARD_FUTURE_TIMEOUT_DURATION.length(), SHARD_FUTURE_TIMEOUT_DURATION.unit()); - - LOG.debug("Prefix configuration shards ready - creating clients"); - } catch (InterruptedException | ExecutionException | TimeoutException e) { throw new IllegalStateException("Prefix config shards not found", e); } try { - LOG.debug("Prefix configuration shards ready - creating clients"); + LOG.debug("{}: Prefix configuration shards ready - creating clients", memberName); configurationShardMap.put(LogicalDatastoreType.CONFIGURATION, createDatastoreClient(ClusterUtils.PREFIX_CONFIG_SHARD_ID, distributedConfigDatastore.getActorContext())); @@ -244,15 +236,13 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat //create shard registration for DEFAULT_SHARD try { - defaultShardRegistrations.put(LogicalDatastoreType.CONFIGURATION, - initDefaultShard(LogicalDatastoreType.CONFIGURATION)); + initDefaultShard(LogicalDatastoreType.CONFIGURATION); } catch (final InterruptedException | ExecutionException e) { throw new IllegalStateException("Unable to create default shard frontend for config shard", e); } try { - defaultShardRegistrations.put(LogicalDatastoreType.OPERATIONAL, - initDefaultShard(LogicalDatastoreType.OPERATIONAL)); + initDefaultShard(LogicalDatastoreType.OPERATIONAL); } catch (final InterruptedException | ExecutionException e) { throw new IllegalStateException("Unable to create default shard frontend for operational shard", e); } @@ -298,15 +288,13 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat @Nonnull @Override public DOMDataTreeProducer createProducer(@Nonnull final Collection subtrees) { - LOG.debug("{} - Creating producer for {}", - distributedConfigDatastore.getActorContext().getClusterWrapper().getCurrentMemberName(), subtrees); + LOG.debug("{} - Creating producer for {}", memberName, subtrees); final DOMDataTreeProducer producer = shardedDOMDataTree.createProducer(subtrees); final Object response = distributedConfigDatastore.getActorContext() .executeOperation(shardedDataTreeActor, new ProducerCreated(subtrees)); if (response == null) { - LOG.debug("{} - Received success from remote nodes, creating producer:{}", - distributedConfigDatastore.getActorContext().getClusterWrapper().getCurrentMemberName(), subtrees); + LOG.debug("{} - Received success from remote nodes, creating producer:{}", memberName, subtrees); return new ProxyProducer(producer, subtrees, shardedDataTreeActor, distributedConfigDatastore.getActorContext(), shards); } @@ -375,7 +363,7 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat } void resolveShardAdditions(final Set additions) { - LOG.debug("Member {}: Resolving additions : {}", memberName, additions); + LOG.debug("{}: Resolving additions : {}", memberName, additions); final ArrayList list = new ArrayList<>(additions); // we need to register the shards from top to bottom, so we need to atleast make sure the ordering reflects that Collections.sort(list, (o1, o2) -> { @@ -392,14 +380,14 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat } void resolveShardRemovals(final Set removals) { - LOG.debug("Member {}: Resolving removals : {}", memberName, removals); + LOG.debug("{}: Resolving removals : {}", memberName, removals); // do we need to go from bottom to top? removals.forEach(this::despawnShardFrontend); } private void createShardFrontend(final DOMDataTreeIdentifier prefix) { - LOG.debug("Member {}: Creating CDS shard for prefix: {}", memberName, prefix); + LOG.debug("{}: Creating CDS shard for prefix: {}", memberName, prefix); final String shardName = ClusterUtils.getCleanShardName(prefix.getRootIdentifier()); final AbstractDataStore distributedDataStore = prefix.getDatastoreType().equals(org.opendaylight.mdsal.common.api.LogicalDatastoreType.CONFIGURATION) @@ -430,14 +418,14 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat } private void despawnShardFrontend(final DOMDataTreeIdentifier prefix) { - LOG.debug("Member {}: Removing CDS shard for prefix: {}", memberName, prefix); + LOG.debug("{}: Removing CDS shard for prefix: {}", memberName, prefix); final DOMDataTreePrefixTableEntry> lookup; synchronized (shards) { lookup = shards.lookup(prefix); } if (lookup == null || !lookup.getValue().getPrefix().equals(prefix)) { - LOG.debug("Member {}: Received despawn for non-existing CDS shard frontend, prefix: {}, ignoring..", + LOG.debug("{}: Received despawn for non-existing CDS shard frontend, prefix: {}, ignoring..", memberName, prefix); return; } @@ -497,7 +485,7 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat final String shardName, final ActorContext actorContext) throws DOMDataTreeShardCreationFailedException { - LOG.debug("Creating distributed datastore client for shard {}", shardName); + LOG.debug("{}: Creating distributed datastore client for shard {}", memberName, shardName); final Props distributedDataStoreClientProps = SimpleDataStoreClientActor.props(memberName, "Shard-" + shardName, actorContext, shardName); @@ -506,7 +494,7 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat return new SimpleEntry<>(SimpleDataStoreClientActor .getDistributedDataStoreClient(clientActor, 30, TimeUnit.SECONDS), clientActor); } catch (final Exception e) { - LOG.error("Failed to get actor for {}", distributedDataStoreClientProps, e); + LOG.error("{}: Failed to get actor for {}", distributedDataStoreClientProps, memberName, e); clientActor.tell(PoisonPill.getInstance(), noSender()); throw new DOMDataTreeShardCreationFailedException( "Unable to create datastore client for shard{" + shardName + "}", e); @@ -514,26 +502,21 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat } @SuppressWarnings("checkstyle:IllegalCatch") - private DistributedShardRegistration initDefaultShard(final LogicalDatastoreType logicalDatastoreType) + private void initDefaultShard(final LogicalDatastoreType logicalDatastoreType) throws ExecutionException, InterruptedException { - final Collection names = - distributedConfigDatastore.getActorContext().getConfiguration().getUniqueMemberNamesForAllShards(); final PrefixedShardConfigWriter writer = writerMap.get(logicalDatastoreType); if (writer.checkDefaultIsPresent()) { - LOG.debug("Default shard for {} is already present in the config. Possibly saved in snapshot.", - logicalDatastoreType); - return new DistributedShardRegistrationImpl( - new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY), - shardedDataTreeActor, this); + LOG.debug("{}: Default shard for {} is already present in the config. Possibly saved in snapshot.", + memberName, logicalDatastoreType); } else { try { - // There can be situation when there is already started default shard - // because it is present in modules.conf. In that case we have to create - // just frontend for default shard, but not shard itself - // TODO we don't have to do it for config and operational default shard - // separately. Just one of them should be enough + // Currently the default shard configuration is present in the out-of-box modules.conf and is + // expected to be present. So look up the local default shard here and create the frontend. + + // TODO we don't have to do it for config and operational default shard separately. Just one of them + // should be enough final ActorContext actorContext = logicalDatastoreType == LogicalDatastoreType.CONFIGURATION ? distributedConfigDatastore.getActorContext() : distributedOperDatastore.getActorContext(); @@ -541,24 +524,27 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat actorContext.findLocalShard(ClusterUtils.getCleanShardName(YangInstanceIdentifier.EMPTY)); if (defaultLocalShardOptional.isPresent()) { - LOG.debug("{} Default shard is already started, creating just frontend", logicalDatastoreType); + LOG.debug("{}: Default shard for {} is already started, creating just frontend", memberName, + logicalDatastoreType); createShardFrontend(new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY)); - return new DistributedShardRegistrationImpl( - new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY), - shardedDataTreeActor, this); } - // we should probably only have one node create the default shards - return Await.result(FutureConverters.toScala(createDistributedShard( - new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY), names)), - SHARD_FUTURE_TIMEOUT_DURATION); - } catch (DOMDataTreeShardingConflictException e) { - LOG.debug("Default shard already registered, possibly due to other node doing it faster"); - return new DistributedShardRegistrationImpl( - new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY), - shardedDataTreeActor, this); + // The local shard isn't present - we assume that means the local member isn't in the replica list + // and will be dynamically created later via an explicit add-shard-replica request. This is the + // bootstrapping mechanism to add a new node into an existing cluster. The following code to create + // the default shard as a prefix shard is problematic in this scenario so it is commented out. Since + // the default shard is a module-based shard by default, it makes sense to always treat it as such, + // ie bootstrap it in the same manner as the special prefix-configuration and EOS shards. +// final Collection names = distributedConfigDatastore.getActorContext().getConfiguration() +// .getUniqueMemberNamesForAllShards(); +// Await.result(FutureConverters.toScala(createDistributedShard( +// new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY), names)), +// SHARD_FUTURE_TIMEOUT_DURATION); +// } catch (DOMDataTreeShardingConflictException e) { +// LOG.debug("{}: Default shard for {} already registered, possibly due to other node doing it faster", +// memberName, logicalDatastoreType); } catch (Exception e) { - LOG.error("{} default shard initialization failed", logicalDatastoreType, e); + LOG.error("{}: Default shard initialization for {} failed", memberName, logicalDatastoreType, e); throw new RuntimeException(e); } } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/ShardedDataTreeActor.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/ShardedDataTreeActor.java index 224ca0b3d0..0f55831e36 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/ShardedDataTreeActor.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/ShardedDataTreeActor.java @@ -102,7 +102,7 @@ public class ShardedDataTreeActor extends AbstractUntypedPersistentActor { private final Cluster cluster; - private Map currentConfiguration = new HashMap<>(); + private final Map currentConfiguration = new HashMap<>(); ShardedDataTreeActor(final ShardedDataTreeActorCreator builder) { LOG.debug("Creating ShardedDataTreeActor on {}", builder.getClusterWrapper().getCurrentMemberName()); @@ -364,7 +364,7 @@ public class ShardedDataTreeActor extends AbstractUntypedPersistentActor { // schedule a notification task for the reply actorSystem.scheduler().scheduleOnce(SHARD_LOOKUP_TASK_INTERVAL, new ConfigShardLookupTask( - actorSystem, getSender(), context, clusterWrapper, message, lookupTaskMaxRetries), + actorSystem, getSender(), context, message, lookupTaskMaxRetries), actorSystem.dispatcher()); } @@ -664,21 +664,16 @@ public class ShardedDataTreeActor extends AbstractUntypedPersistentActor { private final ActorSystem system; private final ActorRef replyTo; private final ActorContext context; - private final ClusterWrapper clusterWrapper; - private final int lookupTaskMaxRetries; ConfigShardLookupTask(final ActorSystem system, final ActorRef replyTo, final ActorContext context, - final ClusterWrapper clusterWrapper, final StartConfigShardLookup message, final int lookupMaxRetries) { super(replyTo, lookupMaxRetries); this.system = system; this.replyTo = replyTo; this.context = context; - this.clusterWrapper = clusterWrapper; - this.lookupTaskMaxRetries = lookupMaxRetries; } @Override @@ -696,12 +691,8 @@ public class ShardedDataTreeActor extends AbstractUntypedPersistentActor { if (!localShard.isPresent()) { tryReschedule(null); } else { - LOG.debug("Local backend for prefix configuration shard lookup successful, starting leader lookup.."); - system.scheduler().scheduleOnce( - SHARD_LOOKUP_TASK_INTERVAL, - new ConfigShardReadinessTask( - system, replyTo, context, clusterWrapper, localShard.get(), lookupTaskMaxRetries), - system.dispatcher()); + LOG.debug("Local backend for prefix configuration shard lookup successful"); + replyTo.tell(new Status.Success(null), noSender()); } } } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeRemotingTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeRemotingTest.java index af07d62d9c..b0d01d514b 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeRemotingTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeRemotingTest.java @@ -136,13 +136,17 @@ public class DistributedShardedDOMDataTreeRemotingTest extends AbstractTest { } private void initEmptyDatastores() throws Exception { + initEmptyDatastores(MODULE_SHARDS_CONFIG); + } + + private void initEmptyDatastores(String moduleShardsConfig) throws Exception { leaderTestKit = new IntegrationTestKit(leaderSystem, leaderDatastoreContextBuilder); leaderConfigDatastore = leaderTestKit.setupDistributedDataStore( - "config", MODULE_SHARDS_CONFIG, true, + "config", moduleShardsConfig, true, SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext()); leaderOperDatastore = leaderTestKit.setupDistributedDataStore( - "operational", MODULE_SHARDS_CONFIG, true, + "operational", moduleShardsConfig, true, SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext()); leaderShardFactory = new DistributedShardedDOMDataTree(leaderSystemProvider, @@ -152,9 +156,9 @@ public class DistributedShardedDOMDataTreeRemotingTest extends AbstractTest { followerTestKit = new IntegrationTestKit(followerSystem, followerDatastoreContextBuilder); followerConfigDatastore = followerTestKit.setupDistributedDataStore( - "config", MODULE_SHARDS_CONFIG, true, SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext()); + "config", moduleShardsConfig, true, SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext()); followerOperDatastore = followerTestKit.setupDistributedDataStore( - "operational", MODULE_SHARDS_CONFIG, true, + "operational", moduleShardsConfig, true, SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext()); followerShardFactory = new DistributedShardedDOMDataTree(followerSystemProvider, @@ -163,14 +167,17 @@ public class DistributedShardedDOMDataTreeRemotingTest extends AbstractTest { followerTestKit.waitForMembersUp("member-1"); + LOG.info("Initializing leader DistributedShardedDOMDataTree"); leaderShardFactory.init(); - followerShardFactory.init(); leaderTestKit.waitUntilLeader(leaderConfigDatastore.getActorContext(), ClusterUtils.getCleanShardName(YangInstanceIdentifier.EMPTY)); leaderTestKit.waitUntilLeader(leaderOperDatastore.getActorContext(), ClusterUtils.getCleanShardName(YangInstanceIdentifier.EMPTY)); + + LOG.info("Initializing follower DistributedShardedDOMDataTree"); + followerShardFactory.init(); } @Test @@ -419,4 +426,12 @@ public class DistributedShardedDOMDataTreeRemotingTest extends AbstractTest { LOG.info("testMultipleRegistrationsAtOnePrefix ending"); } + + @Test + public void testInitialBootstrappingWithNoModuleShards() throws Exception { + LOG.info("testInitialBootstrappingWithNoModuleShards starting"); + initEmptyDatastores("module-shards-default-member-1.conf"); + + // We just verify the DistributedShardedDOMDataTree initialized without error. + } } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeTest.java index b7b736e853..23e6fdd70e 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeTest.java @@ -110,7 +110,7 @@ public class DistributedShardedDOMDataTreeTest extends AbstractTest { .node(TestModel.INNER_LIST_QNAME)); private static final Set SINGLE_MEMBER = Collections.singleton(AbstractTest.MEMBER_NAME); - private static final String MODULE_SHARDS_CONFIG = "module-shards-cars-member-1.conf"; + private static final String MODULE_SHARDS_CONFIG = "module-shards-default-member-1.conf"; private ActorSystem leaderSystem; -- 2.36.6