BUG 3156 : Recreating CDS should not fail 81/20181/3
authorMoiz Raja <moraja@cisco.com>
Thu, 7 May 2015 01:43:31 +0000 (18:43 -0700)
committerGerrit Code Review <gerrit@opendaylight.org>
Wed, 13 May 2015 04:01:01 +0000 (04:01 +0000)
When CDS is recreated it can fail because we try to
create actors on a datastore that is already terminated.

To address this issue I have enhanced DistributedDataStoreFactory
as follows,

- Simplified the actor system creation code by synchronizing
  all static methods. Since these methods are anyway not used in
  any fast paths - it is a fair enough solution.
- Added another static field in DistributedDataStoreFactory to
  track the createdInstances. This is so that we can properly
  cleanup the actor system as instances of the data store are destroyed.
- The actor system is now shutdown when there are no datastrore instances.
- Removed actor system shutdown from ActorContext to ensure that we
  use the symmetric method destroyInstance

Now that we do not shutdown actor system till both the data stores
are destroyed it may so happen that we may have a situation where we are
trying to create an actor on the old actor system with an already used name
like shardmnanager-config or shardmanager-operational. To avoid this
I have added a loop when creating shardmanager which catches the thrown
exception and retries 100 times after waiting for 100 milliseconds. This
is to give some time for the actor to properly die.

Change-Id: I3c8b2b3b21a1519610bf2ed5e1af8be93f3120ce
Signed-off-by: Moiz Raja <moraja@cisco.com>
(cherry picked from commit 35f8cf2335afa8cf87ce565bbb974431ad5008a6)

opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DistributedDataStore.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreFactory.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/utils/ActorContext.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/config/yang/config/concurrent_data_broker/DomConcurrentDataBrokerModule.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/config/yang/config/distributed_datastore_provider/DistributedConfigDataStoreProviderModule.java
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/config/yang/config/distributed_datastore_provider/DistributedOperationalDataStoreProviderModule.java
opendaylight/md-sal/sal-remoterpc-connector/src/main/java/org/opendaylight/controller/config/yang/config/remote_rpc_connector/RemoteRPCBrokerModule.java

index a136cc6..8051f7d 100644 (file)
@@ -8,9 +8,11 @@
 
 package org.opendaylight.controller.cluster.datastore;
 
+import akka.actor.ActorRef;
 import akka.actor.ActorSystem;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.util.concurrent.Uninterruptibles;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import org.opendaylight.controller.cluster.datastore.identifiers.ShardManagerIdentifier;
@@ -77,10 +79,8 @@ public class DistributedDataStore implements DOMStore, SchemaContextListener,
         String shardDispatcher =
                 new Dispatchers(actorSystem.dispatchers()).getDispatcherPath(Dispatchers.DispatcherType.Shard);
 
-        actorContext = new ActorContext(actorSystem, actorSystem.actorOf(
-                ShardManager.props(cluster, configuration, datastoreContext, waitTillReadyCountDownLatch)
-                        .withDispatcher(shardDispatcher).withMailbox(ActorContext.MAILBOX), shardManagerId ),
-                cluster, configuration, datastoreContext);
+        actorContext = new ActorContext(actorSystem, createShardManager(actorSystem, cluster, configuration,
+                datastoreContext, shardDispatcher, shardManagerId ), cluster, configuration, datastoreContext);
 
         this.waitTillReadyTimeInMillis =
                 actorContext.getDatastoreContext().getShardLeaderElectionTimeout().duration().toMillis() * READY_WAIT_FACTOR;
@@ -186,11 +186,12 @@ public class DistributedDataStore implements DOMStore, SchemaContextListener,
             try {
                 closeable.close();
             } catch (Exception e) {
-                LOG.debug("Error closing insance", e);
+                LOG.debug("Error closing instance", e);
             }
         }
 
         actorContext.shutdown();
+        DistributedDataStoreFactory.destroyInstance(this);
     }
 
     @VisibleForTesting
@@ -212,6 +213,25 @@ public class DistributedDataStore implements DOMStore, SchemaContextListener,
         }
     }
 
+    private ActorRef createShardManager(ActorSystem actorSystem, ClusterWrapper cluster, Configuration configuration,
+                                        DatastoreContext datastoreContext, String shardDispatcher, String shardManagerId){
+        Exception lastException = null;
+
+        for(int i=0;i<100;i++) {
+            try {
+                return actorSystem.actorOf(
+                        ShardManager.props(cluster, configuration, datastoreContext, waitTillReadyCountDownLatch)
+                                .withDispatcher(shardDispatcher).withMailbox(ActorContext.MAILBOX), shardManagerId);
+            } catch (Exception e){
+                lastException = e;
+                Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
+                LOG.debug(String.format("Could not create actor %s because of %s - waiting for sometime before retrying (retry count = %d)", shardManagerId, e.getMessage(), i));
+            }
+        }
+
+        throw new IllegalStateException("Failed to create Shard Manager", lastException);
+    }
+
     @VisibleForTesting
     public CountDownLatch getWaitTillReadyCountDownLatch() {
         return waitTillReadyCountDownLatch;
index 8199e33..ee8ac61 100644 (file)
@@ -10,26 +10,36 @@ package org.opendaylight.controller.cluster.datastore;
 import akka.actor.ActorSystem;
 import akka.actor.Props;
 import akka.osgi.BundleDelegatingClassLoader;
+import com.google.common.base.Preconditions;
 import com.typesafe.config.ConfigFactory;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
 import org.opendaylight.controller.cluster.datastore.config.ConfigurationReader;
 import org.opendaylight.controller.cluster.datastore.shardstrategy.ShardStrategyFactory;
 import org.opendaylight.controller.sal.core.api.model.SchemaService;
 import org.osgi.framework.BundleContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.concurrent.duration.Duration;
 
 public class DistributedDataStoreFactory {
     private static final String ACTOR_SYSTEM_NAME = "opendaylight-cluster-data";
     private static final String CONFIGURATION_NAME = "odl-cluster-data";
+    private static ActorSystem actorSystem = null;
+    private static final Set<DistributedDataStore> createdInstances = new HashSet<>(2);
+    private static final Logger LOG = LoggerFactory.getLogger(DistributedDataStoreFactory.class);
 
-    private static volatile ActorSystem persistentActorSystem = null;
-
-    public static DistributedDataStore createInstance(SchemaService schemaService,
+    public static synchronized DistributedDataStore createInstance(SchemaService schemaService,
             DatastoreContext datastoreContext, BundleContext bundleContext) {
 
+        LOG.info("Create data store instance of type : {}", datastoreContext.getDataStoreType());
+
         DatastoreContextIntrospector introspector = new DatastoreContextIntrospector(datastoreContext);
         DatastoreContextConfigAdminOverlay overlay = new DatastoreContextConfigAdminOverlay(
                 introspector, bundleContext);
 
-        ActorSystem actorSystem = getOrCreateInstance(bundleContext, datastoreContext.getConfigurationReader());
+        ActorSystem actorSystem = getActorSystem(bundleContext, datastoreContext.getConfigurationReader());
         Configuration config = new ConfigurationImpl("module-shards.conf", "modules.conf");
         final DistributedDataStore dataStore = new DistributedDataStore(actorSystem,
                 new ClusterWrapperImpl(actorSystem), config, introspector.getContext());
@@ -41,28 +51,44 @@ public class DistributedDataStoreFactory {
 
         dataStore.setCloseable(overlay);
         dataStore.waitTillReady();
+
+        createdInstances.add(dataStore);
         return dataStore;
     }
 
-    private static final ActorSystem getOrCreateInstance(final BundleContext bundleContext, ConfigurationReader configurationReader) {
-        ActorSystem ret = persistentActorSystem;
-        if (ret == null) {
-            synchronized (DistributedDataStoreFactory.class) {
-                ret = persistentActorSystem;
-                if (ret == null) {
-                    // Create an OSGi bundle classloader for actor system
-                    BundleDelegatingClassLoader classLoader = new BundleDelegatingClassLoader(bundleContext.getBundle(),
-                        Thread.currentThread().getContextClassLoader());
-
-                    ret = ActorSystem.create(ACTOR_SYSTEM_NAME,
-                        ConfigFactory.load(configurationReader.read()).getConfig(CONFIGURATION_NAME), classLoader);
-                    ret.actorOf(Props.create(TerminationMonitor.class), "termination-monitor");
-
-                    persistentActorSystem = ret;
+    private static synchronized final ActorSystem getActorSystem(final BundleContext bundleContext,
+                                                                 ConfigurationReader configurationReader) {
+        if (actorSystem == null) {
+            // Create an OSGi bundle classloader for actor system
+            BundleDelegatingClassLoader classLoader = new BundleDelegatingClassLoader(bundleContext.getBundle(),
+                Thread.currentThread().getContextClassLoader());
+
+            actorSystem = ActorSystem.create(ACTOR_SYSTEM_NAME,
+                ConfigFactory.load(configurationReader.read()).getConfig(CONFIGURATION_NAME), classLoader);
+            actorSystem.actorOf(Props.create(TerminationMonitor.class), "termination-monitor");
+        }
+
+        return actorSystem;
+    }
+
+    public static synchronized void destroyInstance(DistributedDataStore dataStore){
+        Preconditions.checkNotNull(dataStore, "dataStore should not be null");
+
+        LOG.info("Destroy data store instance of type : {}", dataStore.getActorContext().getDataStoreType());
+
+        if(createdInstances.remove(dataStore)){
+            if(createdInstances.size() == 0){
+                if(actorSystem != null) {
+                    actorSystem.shutdown();
+                    try {
+                        actorSystem.awaitTermination(Duration.create(10, TimeUnit.SECONDS));
+                    } catch (Exception e) {
+                        LOG.warn("Error awaiting actor termination", e);
+                    }
+                    actorSystem = null;
                 }
             }
         }
-
-        return ret;
     }
+
 }
index 73f1a8f..18a8798 100644 (file)
@@ -378,8 +378,7 @@ public class ActorContext {
     }
 
     public void shutdown() {
-        shardManager.tell(PoisonPill.getInstance(), null);
-        actorSystem.shutdown();
+        shardManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
     }
 
     public ClusterWrapper getClusterWrapper() {
index 62da307..6412231 100644 (file)
@@ -34,6 +34,11 @@ public class DomConcurrentDataBrokerModule extends AbstractDomConcurrentDataBrok
         super(identifier, dependencyResolver, oldModule, oldInstance);
     }
 
+    @Override
+    public boolean canReuseInstance(AbstractDomConcurrentDataBrokerModule oldModule) {
+        return true;
+    }
+
     @Override
     public AutoCloseable createInstance() {
         //Initializing Operational DOM DataStore defaulting to InMemoryDOMDataStore if one is not configured
index f78b134..2527882 100644 (file)
@@ -28,8 +28,12 @@ public class DistributedConfigDataStoreProviderModule extends
     }
 
     @Override
-    public java.lang.AutoCloseable createInstance() {
+    public boolean canReuseInstance(AbstractDistributedConfigDataStoreProviderModule oldModule) {
+        return true;
+    }
 
+    @Override
+    public java.lang.AutoCloseable createInstance() {
         ConfigProperties props = getConfigProperties();
         if(props == null) {
             props = new ConfigProperties();
index c2e8125..5d23141 100644 (file)
@@ -21,6 +21,11 @@ public class RemoteRPCBrokerModule extends org.opendaylight.controller.config.ya
      // add custom validation form module attributes here.
   }
 
+  @Override
+  public boolean canReuseInstance(AbstractRemoteRPCBrokerModule oldModule) {
+      return true;
+  }
+
   @Override
   public java.lang.AutoCloseable createInstance() {
     Broker broker = getDomBrokerDependency();

©2013 OpenDaylight, A Linux Foundation Collaborative Project. All Rights Reserved.
OpenDaylight is a registered trademark of The OpenDaylight Project, Inc.
Linux Foundation and OpenDaylight are registered trademarks of the Linux Foundation.
Linux is a registered trademark of Linus Torvalds.