Bug 8885: Fix DistributedShardedDOMDataTree initialization 32/61132/2
authorTom Pantelis <tompantelis@gmail.com>
Wed, 26 Jul 2017 16:16:48 +0000 (12:16 -0400)
committerRobert Varga <robert.varga@pantheon.tech>
Thu, 24 Aug 2017 12:38:14 +0000 (14:38 +0200)
DistributedShardedDOMDataTree initialization expects the prefix
configuration shard to be present and ready with leader however
the latter isn't the case when the static module-shards is
bootstrapped without the local member so it can be dynamically
joined into an existing cluster. So I modified the ConfigShardLookupTask
to elide the ConfigShardReadinessTask.

Once past that, creation of the prefix-based default shard is attempted
as there isn't a local module-based shard however this fails b/c the
local prefix configuration shard is not connected to a leader. To alleviate
this I just commented out the code to create the shard. Since the default
shard configuration is present in the out-of-box modules.conf and is
expected to be present, we can assume at this point that the local member
isn't in the replica list with the intention of dynamically joining it to
an existing cluster, at which time the shard will be created.

These changes at least fix the regression with the boostrapping scenario.
We can revisit this iniialization later w.r.t. prefix-based shards.

Change-Id: I1faf531f4c79914d45203ee132dd4e65ad2f18ba
Signed-off-by: Tom Pantelis <tompantelis@gmail.com>
(cherry picked from commit fa60e0fbe54f1604d3b68dcd5df14ba3aed7183f)

opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTree.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/sharding/ShardedDataTreeActor.java
opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeRemotingTest.java
opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/sharding/DistributedShardedDOMDataTreeTest.java

index ca0d649e8b85b7ebfecb09c87af8ffb273cb32c0..8e44281ba8fa7ac1ef43d77fbcf42c09367429c3 100644 (file)
@@ -86,7 +86,6 @@ import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.compat.java8.FutureConverters;
-import scala.concurrent.Await;
 import scala.concurrent.Future;
 import scala.concurrent.Promise;
 import scala.concurrent.duration.FiniteDuration;
@@ -122,9 +121,6 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
     private final DOMDataTreePrefixTable<DOMDataTreeShardRegistration<DOMDataTreeShard>> shards =
             DOMDataTreePrefixTable.create();
 
-    private final EnumMap<LogicalDatastoreType, DistributedShardRegistration> defaultShardRegistrations =
-            new EnumMap<>(LogicalDatastoreType.class);
-
     private final EnumMap<LogicalDatastoreType, Entry<DataStoreClient, ActorRef>> configurationShardMap =
             new EnumMap<>(LogicalDatastoreType.class);
 
@@ -195,21 +191,17 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
     public void init() {
         // create our writers to the configuration
         try {
-            LOG.debug("{} - starting config shard lookup.",
-                    distributedConfigDatastore.getActorContext().getCurrentMemberName());
+            LOG.debug("{} - starting config shard lookup.", memberName);
 
             // We have to wait for prefix config shards to be up and running
             // so we can create datastore clients for them
             handleConfigShardLookup().get(SHARD_FUTURE_TIMEOUT_DURATION.length(), SHARD_FUTURE_TIMEOUT_DURATION.unit());
-
-            LOG.debug("Prefix configuration shards ready - creating clients");
-
         } catch (InterruptedException | ExecutionException | TimeoutException e) {
             throw new IllegalStateException("Prefix config shards not found", e);
         }
 
         try {
-            LOG.debug("Prefix configuration shards ready - creating clients");
+            LOG.debug("{}: Prefix configuration shards ready - creating clients", memberName);
             configurationShardMap.put(LogicalDatastoreType.CONFIGURATION,
                     createDatastoreClient(ClusterUtils.PREFIX_CONFIG_SHARD_ID,
                             distributedConfigDatastore.getActorContext()));
@@ -243,15 +235,13 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
 
         //create shard registration for DEFAULT_SHARD
         try {
-            defaultShardRegistrations.put(LogicalDatastoreType.CONFIGURATION,
-                    initDefaultShard(LogicalDatastoreType.CONFIGURATION));
+            initDefaultShard(LogicalDatastoreType.CONFIGURATION);
         } catch (final InterruptedException | ExecutionException e) {
             throw new IllegalStateException("Unable to create default shard frontend for config shard", e);
         }
 
         try {
-            defaultShardRegistrations.put(LogicalDatastoreType.OPERATIONAL,
-                    initDefaultShard(LogicalDatastoreType.OPERATIONAL));
+            initDefaultShard(LogicalDatastoreType.OPERATIONAL);
         } catch (final InterruptedException | ExecutionException e) {
             throw new IllegalStateException("Unable to create default shard frontend for operational shard", e);
         }
@@ -297,15 +287,13 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
     @Nonnull
     @Override
     public DOMDataTreeProducer createProducer(@Nonnull final Collection<DOMDataTreeIdentifier> subtrees) {
-        LOG.debug("{} - Creating producer for {}",
-                distributedConfigDatastore.getActorContext().getClusterWrapper().getCurrentMemberName(), subtrees);
+        LOG.debug("{} - Creating producer for {}", memberName, subtrees);
         final DOMDataTreeProducer producer = shardedDOMDataTree.createProducer(subtrees);
 
         final Object response = distributedConfigDatastore.getActorContext()
                 .executeOperation(shardedDataTreeActor, new ProducerCreated(subtrees));
         if (response == null) {
-            LOG.debug("{} - Received success from remote nodes, creating producer:{}",
-                    distributedConfigDatastore.getActorContext().getClusterWrapper().getCurrentMemberName(), subtrees);
+            LOG.debug("{} - Received success from remote nodes, creating producer:{}", memberName, subtrees);
             return new ProxyProducer(producer, subtrees, shardedDataTreeActor,
                     distributedConfigDatastore.getActorContext(), shards);
         } else if (response instanceof Exception) {
@@ -372,7 +360,7 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
     }
 
     void resolveShardAdditions(final Set<DOMDataTreeIdentifier> additions) {
-        LOG.debug("Member {}: Resolving additions : {}", memberName, additions);
+        LOG.debug("{}: Resolving additions : {}", memberName, additions);
         final ArrayList<DOMDataTreeIdentifier> list = new ArrayList<>(additions);
         // we need to register the shards from top to bottom, so we need to atleast make sure the ordering reflects that
         Collections.sort(list, (o1, o2) -> {
@@ -389,14 +377,14 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
     }
 
     void resolveShardRemovals(final Set<DOMDataTreeIdentifier> removals) {
-        LOG.debug("Member {}: Resolving removals : {}", memberName, removals);
+        LOG.debug("{}: Resolving removals : {}", memberName, removals);
 
         // do we need to go from bottom to top?
         removals.forEach(this::despawnShardFrontend);
     }
 
     private void createShardFrontend(final DOMDataTreeIdentifier prefix) {
-        LOG.debug("Member {}: Creating CDS shard for prefix: {}", memberName, prefix);
+        LOG.debug("{}: Creating CDS shard for prefix: {}", memberName, prefix);
         final String shardName = ClusterUtils.getCleanShardName(prefix.getRootIdentifier());
         final AbstractDataStore distributedDataStore =
                 prefix.getDatastoreType().equals(org.opendaylight.mdsal.common.api.LogicalDatastoreType.CONFIGURATION)
@@ -428,14 +416,14 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
     }
 
     private void despawnShardFrontend(final DOMDataTreeIdentifier prefix) {
-        LOG.debug("Member {}: Removing CDS shard for prefix: {}", memberName, prefix);
+        LOG.debug("{}: Removing CDS shard for prefix: {}", memberName, prefix);
         final DOMDataTreePrefixTableEntry<DOMDataTreeShardRegistration<DOMDataTreeShard>> lookup;
         synchronized (shards) {
             lookup = shards.lookup(prefix);
         }
 
         if (lookup == null || !lookup.getValue().getPrefix().equals(prefix)) {
-            LOG.debug("Member {}: Received despawn for non-existing CDS shard frontend, prefix: {}, ignoring..",
+            LOG.debug("{}: Received despawn for non-existing CDS shard frontend, prefix: {}, ignoring..",
                     memberName, prefix);
             return;
         }
@@ -495,7 +483,7 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
             final String shardName, final ActorContext actorContext)
             throws DOMDataTreeShardCreationFailedException {
 
-        LOG.debug("Creating distributed datastore client for shard {}", shardName);
+        LOG.debug("{}: Creating distributed datastore client for shard {}", memberName, shardName);
         final Props distributedDataStoreClientProps =
                 SimpleDataStoreClientActor.props(memberName, "Shard-" + shardName, actorContext, shardName);
 
@@ -504,7 +492,7 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
             return new SimpleEntry<>(SimpleDataStoreClientActor
                     .getDistributedDataStoreClient(clientActor, 30, TimeUnit.SECONDS), clientActor);
         } catch (final Exception e) {
-            LOG.error("Failed to get actor for {}", distributedDataStoreClientProps, e);
+            LOG.error("{}: Failed to get actor for {}", distributedDataStoreClientProps, memberName, e);
             clientActor.tell(PoisonPill.getInstance(), noSender());
             throw new DOMDataTreeShardCreationFailedException(
                     "Unable to create datastore client for shard{" + shardName + "}", e);
@@ -512,26 +500,21 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
     }
 
     @SuppressWarnings("checkstyle:IllegalCatch")
-    private DistributedShardRegistration initDefaultShard(final LogicalDatastoreType logicalDatastoreType)
+    private void initDefaultShard(final LogicalDatastoreType logicalDatastoreType)
             throws ExecutionException, InterruptedException {
-        final Collection<MemberName> names =
-                distributedConfigDatastore.getActorContext().getConfiguration().getUniqueMemberNamesForAllShards();
 
         final PrefixedShardConfigWriter writer = writerMap.get(logicalDatastoreType);
 
         if (writer.checkDefaultIsPresent()) {
-            LOG.debug("Default shard for {} is already present in the config. Possibly saved in snapshot.",
-                    logicalDatastoreType);
-            return new DistributedShardRegistrationImpl(
-                    new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY),
-                    shardedDataTreeActor, this);
+            LOG.debug("{}: Default shard for {} is already present in the config. Possibly saved in snapshot.",
+                    memberName, logicalDatastoreType);
         } else {
             try {
-                // There can be situation when there is already started default shard
-                // because it is present in modules.conf. In that case we have to create
-                // just frontend for default shard, but not shard itself
-                // TODO we don't have to do it for config and operational default shard
-                // separately. Just one of them should be enough
+                // Currently the default shard configuration is present in the out-of-box modules.conf and is
+                // expected to be present. So look up the local default shard here and create the frontend.
+
+                // TODO we don't have to do it for config and operational default shard separately. Just one of them
+                // should be enough
                 final ActorContext actorContext = logicalDatastoreType == LogicalDatastoreType.CONFIGURATION
                         ? distributedConfigDatastore.getActorContext() : distributedOperDatastore.getActorContext();
 
@@ -539,24 +522,27 @@ public class DistributedShardedDOMDataTree implements DOMDataTreeService, DOMDat
                         actorContext.findLocalShard(ClusterUtils.getCleanShardName(YangInstanceIdentifier.EMPTY));
 
                 if (defaultLocalShardOptional.isPresent()) {
-                    LOG.debug("{} Default shard is already started, creating just frontend", logicalDatastoreType);
+                    LOG.debug("{}: Default shard for {} is already started, creating just frontend", memberName,
+                            logicalDatastoreType);
                     createShardFrontend(new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY));
-                    return new DistributedShardRegistrationImpl(
-                            new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY),
-                            shardedDataTreeActor, this);
                 }
 
-                // we should probably only have one node create the default shards
-                return Await.result(FutureConverters.toScala(createDistributedShard(
-                        new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY), names)),
-                        SHARD_FUTURE_TIMEOUT_DURATION);
-            } catch (DOMDataTreeShardingConflictException e) {
-                LOG.debug("Default shard already registered, possibly due to other node doing it faster");
-                return new DistributedShardRegistrationImpl(
-                        new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY),
-                        shardedDataTreeActor, this);
+                // The local shard isn't present - we assume that means the local member isn't in the replica list
+                // and will be dynamically created later via an explicit add-shard-replica request. This is the
+                // bootstrapping mechanism to add a new node into an existing cluster. The following code to create
+                // the default shard as a prefix shard is problematic in this scenario so it is commented out. Since
+                // the default shard is a module-based shard by default, it makes sense to always treat it as such,
+                // ie bootstrap it in the same manner as the special prefix-configuration and EOS shards.
+//                final Collection<MemberName> names = distributedConfigDatastore.getActorContext().getConfiguration()
+//                        .getUniqueMemberNamesForAllShards();
+//                Await.result(FutureConverters.toScala(createDistributedShard(
+//                        new DOMDataTreeIdentifier(logicalDatastoreType, YangInstanceIdentifier.EMPTY), names)),
+//                        SHARD_FUTURE_TIMEOUT_DURATION);
+//            } catch (DOMDataTreeShardingConflictException e) {
+//                LOG.debug("{}: Default shard for {} already registered, possibly due to other node doing it faster",
+//                        memberName, logicalDatastoreType);
             } catch (Exception e) {
-                LOG.error("{} default shard initialization failed", logicalDatastoreType, e);
+                LOG.error("{}: Default shard initialization for {} failed", memberName, logicalDatastoreType, e);
                 throw new RuntimeException(e);
             }
         }
index 671fbb89652b42ed1b34a2623346715ef6d559c1..1b33ad7476473c65561a6368ba6b6d64b63ae7d1 100644 (file)
@@ -102,7 +102,7 @@ public class ShardedDataTreeActor extends AbstractUntypedPersistentActor {
 
     private final Cluster cluster;
 
-    private Map<DOMDataTreeIdentifier, PrefixShardConfiguration> currentConfiguration = new HashMap<>();
+    private final Map<DOMDataTreeIdentifier, PrefixShardConfiguration> currentConfiguration = new HashMap<>();
 
     ShardedDataTreeActor(final ShardedDataTreeActorCreator builder) {
         LOG.debug("Creating ShardedDataTreeActor on {}", builder.getClusterWrapper().getCurrentMemberName());
@@ -364,7 +364,7 @@ public class ShardedDataTreeActor extends AbstractUntypedPersistentActor {
         // schedule a notification task for the reply
         actorSystem.scheduler().scheduleOnce(SHARD_LOOKUP_TASK_INTERVAL,
                 new ConfigShardLookupTask(
-                        actorSystem, getSender(), context, clusterWrapper, message, lookupTaskMaxRetries),
+                        actorSystem, getSender(), context, message, lookupTaskMaxRetries),
                 actorSystem.dispatcher());
     }
 
@@ -664,21 +664,16 @@ public class ShardedDataTreeActor extends AbstractUntypedPersistentActor {
         private final ActorSystem system;
         private final ActorRef replyTo;
         private final ActorContext context;
-        private final ClusterWrapper clusterWrapper;
-        private final int lookupTaskMaxRetries;
 
         ConfigShardLookupTask(final ActorSystem system,
                               final ActorRef replyTo,
                               final ActorContext context,
-                              final ClusterWrapper clusterWrapper,
                               final StartConfigShardLookup message,
                               final int lookupMaxRetries) {
             super(replyTo, lookupMaxRetries);
             this.system = system;
             this.replyTo = replyTo;
             this.context = context;
-            this.clusterWrapper = clusterWrapper;
-            this.lookupTaskMaxRetries = lookupMaxRetries;
         }
 
         @Override
@@ -696,12 +691,8 @@ public class ShardedDataTreeActor extends AbstractUntypedPersistentActor {
             if (!localShard.isPresent()) {
                 tryReschedule(null);
             } else {
-                LOG.debug("Local backend for prefix configuration shard lookup successful, starting leader lookup..");
-                system.scheduler().scheduleOnce(
-                        SHARD_LOOKUP_TASK_INTERVAL,
-                        new ConfigShardReadinessTask(
-                                system, replyTo, context, clusterWrapper, localShard.get(), lookupTaskMaxRetries),
-                        system.dispatcher());
+                LOG.debug("Local backend for prefix configuration shard lookup successful");
+                replyTo.tell(new Status.Success(null), noSender());
             }
         }
     }
index af07d62d9cf44ec713ba3bf9bbca6d6cb4dfbc9f..b0d01d514b9ae34fc72ce49051e3ff7113bf4a96 100644 (file)
@@ -136,13 +136,17 @@ public class DistributedShardedDOMDataTreeRemotingTest extends AbstractTest {
     }
 
     private void initEmptyDatastores() throws Exception {
+        initEmptyDatastores(MODULE_SHARDS_CONFIG);
+    }
+
+    private void initEmptyDatastores(String moduleShardsConfig) throws Exception {
         leaderTestKit = new IntegrationTestKit(leaderSystem, leaderDatastoreContextBuilder);
 
         leaderConfigDatastore = leaderTestKit.setupDistributedDataStore(
-                "config", MODULE_SHARDS_CONFIG, true,
+                "config", moduleShardsConfig, true,
                 SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext());
         leaderOperDatastore = leaderTestKit.setupDistributedDataStore(
-                "operational", MODULE_SHARDS_CONFIG, true,
+                "operational", moduleShardsConfig, true,
                 SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext());
 
         leaderShardFactory = new DistributedShardedDOMDataTree(leaderSystemProvider,
@@ -152,9 +156,9 @@ public class DistributedShardedDOMDataTreeRemotingTest extends AbstractTest {
         followerTestKit = new IntegrationTestKit(followerSystem, followerDatastoreContextBuilder);
 
         followerConfigDatastore = followerTestKit.setupDistributedDataStore(
-                "config", MODULE_SHARDS_CONFIG, true, SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext());
+                "config", moduleShardsConfig, true, SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext());
         followerOperDatastore = followerTestKit.setupDistributedDataStore(
-                "operational", MODULE_SHARDS_CONFIG, true,
+                "operational", moduleShardsConfig, true,
                 SchemaContextHelper.distributedShardedDOMDataTreeSchemaContext());
 
         followerShardFactory = new DistributedShardedDOMDataTree(followerSystemProvider,
@@ -163,14 +167,17 @@ public class DistributedShardedDOMDataTreeRemotingTest extends AbstractTest {
 
         followerTestKit.waitForMembersUp("member-1");
 
+        LOG.info("Initializing leader DistributedShardedDOMDataTree");
         leaderShardFactory.init();
-        followerShardFactory.init();
 
         leaderTestKit.waitUntilLeader(leaderConfigDatastore.getActorContext(),
                 ClusterUtils.getCleanShardName(YangInstanceIdentifier.EMPTY));
 
         leaderTestKit.waitUntilLeader(leaderOperDatastore.getActorContext(),
                 ClusterUtils.getCleanShardName(YangInstanceIdentifier.EMPTY));
+
+        LOG.info("Initializing follower DistributedShardedDOMDataTree");
+        followerShardFactory.init();
     }
 
     @Test
@@ -419,4 +426,12 @@ public class DistributedShardedDOMDataTreeRemotingTest extends AbstractTest {
 
         LOG.info("testMultipleRegistrationsAtOnePrefix ending");
     }
+
+    @Test
+    public void testInitialBootstrappingWithNoModuleShards() throws Exception {
+        LOG.info("testInitialBootstrappingWithNoModuleShards starting");
+        initEmptyDatastores("module-shards-default-member-1.conf");
+
+        // We just verify the DistributedShardedDOMDataTree initialized without error.
+    }
 }
index b7b736e853cfee3eb6391646d7ff869375b4b302..23e6fdd70e7793dfc356cb2bc9cdc1513b2ed4a0 100644 (file)
@@ -110,7 +110,7 @@ public class DistributedShardedDOMDataTreeTest extends AbstractTest {
                             .node(TestModel.INNER_LIST_QNAME));
     private static final Set<MemberName> SINGLE_MEMBER = Collections.singleton(AbstractTest.MEMBER_NAME);
 
-    private static final String MODULE_SHARDS_CONFIG = "module-shards-cars-member-1.conf";
+    private static final String MODULE_SHARDS_CONFIG = "module-shards-default-member-1.conf";
 
     private ActorSystem leaderSystem;