Implement scatter/gather on module shards
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / databroker / actors / dds / ModuleShardBackendResolver.java
index 9e6485b296e0e87dc20febe7db12ab08991d4db3..ee887b00faca112951952d18715259871904cb84 100644 (file)
  */
 package org.opendaylight.controller.cluster.databroker.actors.dds;
 
-import akka.actor.ActorRef;
+import static akka.pattern.Patterns.ask;
+import static com.google.common.base.Verify.verifyNotNull;
+
+import akka.dispatch.ExecutionContexts;
+import akka.dispatch.OnComplete;
 import akka.util.Timeout;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
-import com.google.common.collect.BiMap;
 import com.google.common.collect.ImmutableBiMap;
-import com.google.common.collect.ImmutableBiMap.Builder;
-import com.google.common.primitives.UnsignedLong;
-import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
-import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CompletionStage;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicLong;
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import javax.annotation.concurrent.GuardedBy;
-import javax.annotation.concurrent.ThreadSafe;
-import org.opendaylight.controller.cluster.access.ABIVersion;
+import java.util.stream.Stream;
+import org.checkerframework.checker.lock.qual.GuardedBy;
+import org.eclipse.jdt.annotation.NonNull;
 import org.opendaylight.controller.cluster.access.client.BackendInfoResolver;
-import org.opendaylight.controller.cluster.access.commands.ConnectClientRequest;
-import org.opendaylight.controller.cluster.access.commands.ConnectClientSuccess;
 import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier;
-import org.opendaylight.controller.cluster.access.concepts.RequestFailure;
-import org.opendaylight.controller.cluster.common.actor.ExplicitAsk;
+import org.opendaylight.controller.cluster.datastore.shardmanager.RegisterForShardAvailabilityChanges;
 import org.opendaylight.controller.cluster.datastore.shardstrategy.DefaultShardStrategy;
-import org.opendaylight.controller.cluster.datastore.utils.ActorContext;
+import org.opendaylight.controller.cluster.datastore.utils.ActorUtils;
+import org.opendaylight.yangtools.concepts.Registration;
 import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import scala.Function1;
-import scala.compat.java8.FutureConverters;
+import scala.concurrent.Future;
 
 /**
  * {@link BackendInfoResolver} implementation for static shard configuration based on ShardManager. Each string-named
  * shard is assigned a single cookie and this mapping is stored in a bidirectional map. Information about corresponding
- * shard leader is resolved via {@link ActorContext}. The product of resolution is {@link ShardBackendInfo}.
+ * shard leader is resolved via {@link ActorUtils}. The product of resolution is {@link ShardBackendInfo}.
+ *
+ * <p>
+ * This class is thread-safe.
  *
  * @author Robert Varga
  */
-@SuppressFBWarnings(value = "NP_NONNULL_PARAM_VIOLATION",
-                    justification = "Pertains to the NULL_FUTURE field below. Null is allowed and is intended")
-@ThreadSafe
-final class ModuleShardBackendResolver extends BackendInfoResolver<ShardBackendInfo> {
-    private static final class Entry {
-        private final CompletionStage<ShardBackendInfo> stage;
-        @GuardedBy("this")
-        private ShardBackendInfo result;
-
-        Entry(final CompletionStage<ShardBackendInfo> stage) {
-            this.stage = Preconditions.checkNotNull(stage);
-            stage.whenComplete(this::onStageResolved);
-        }
+final class ModuleShardBackendResolver extends AbstractShardBackendResolver {
+    private static final Logger LOG = LoggerFactory.getLogger(ModuleShardBackendResolver.class);
 
-        @Nonnull CompletionStage<ShardBackendInfo> getStage() {
-            return stage;
-        }
+    private final ConcurrentMap<Long, ShardState> backends = new ConcurrentHashMap<>();
 
-        synchronized @Nullable ShardBackendInfo getResult() {
-            return result;
-        }
+    private final Future<Registration> shardAvailabilityChangesRegFuture;
 
-        private synchronized void onStageResolved(final ShardBackendInfo result, final Throwable failure) {
-            if (failure == null) {
-                this.result = Preconditions.checkNotNull(result);
-            } else {
-                LOG.warn("Failed to resolve shard", failure);
+    @GuardedBy("this")
+    private long nextShard = 1;
+
+    private volatile ImmutableBiMap<String, Long> shards = ImmutableBiMap.of(DefaultShardStrategy.DEFAULT_SHARD, 0L);
+
+    // FIXME: we really need just ActorContext.findPrimaryShardAsync()
+    ModuleShardBackendResolver(final ClientIdentifier clientId, final ActorUtils actorUtils) {
+        super(clientId, actorUtils);
+
+        shardAvailabilityChangesRegFuture = ask(actorUtils.getShardManager(), new RegisterForShardAvailabilityChanges(
+            this::onShardAvailabilityChange), Timeout.apply(60, TimeUnit.MINUTES))
+                .map(reply -> (Registration)reply, ExecutionContexts.global());
+
+        shardAvailabilityChangesRegFuture.onComplete(new OnComplete<Registration>() {
+            @Override
+            public void onComplete(final Throwable failure, final Registration reply) {
+                if (failure != null) {
+                    LOG.error("RegisterForShardAvailabilityChanges failed", failure);
+                }
             }
-        }
+        }, ExecutionContexts.global());
     }
 
-    private static final CompletableFuture<ShardBackendInfo> NULL_FUTURE = CompletableFuture.completedFuture(null);
-    private static final Logger LOG = LoggerFactory.getLogger(ModuleShardBackendResolver.class);
+    private void onShardAvailabilityChange(final String shardName) {
+        LOG.debug("onShardAvailabilityChange for {}", shardName);
 
-    /**
-     * Fall-over-dead timeout. If we do not make progress in this long, just fall over and propagate the failure.
-     * All users are expected to fail, possibly attempting to recover by restarting. It is fair to remain
-     * non-operational.
-     */
-    // TODO: maybe make this configurable somehow?
-    private static final Timeout DEAD_TIMEOUT = Timeout.apply(15, TimeUnit.MINUTES);
+        Long cookie = shards.get(shardName);
+        if (cookie == null) {
+            LOG.debug("No shard cookie found for {}", shardName);
+            return;
+        }
 
-    private final ConcurrentMap<Long, Entry> backends = new ConcurrentHashMap<>();
-    private final AtomicLong nextSessionId = new AtomicLong();
-    private final Function1<ActorRef, ?> connectFunction;
-    private final ActorContext actorContext;
+        notifyStaleBackendInfoCallbacks(cookie);
+    }
 
-    @GuardedBy("this")
-    private long nextShard = 1;
+    Long resolveShardForPath(final YangInstanceIdentifier path) {
+        return resolveCookie(actorUtils().getShardStrategyFactory().getStrategy(path).findShard(path));
+    }
 
-    private volatile BiMap<String, Long> shards = ImmutableBiMap.of(DefaultShardStrategy.DEFAULT_SHARD, 0L);
+    Stream<Long> resolveAllShards() {
+        return actorUtils().getConfiguration().getAllShardNames().stream()
+            .sorted()
+            .map(this::resolveCookie);
+    }
 
-    // FIXME: we really need just ActorContext.findPrimaryShardAsync()
-    ModuleShardBackendResolver(final ClientIdentifier clientId, final ActorContext actorContext) {
-        this.actorContext = Preconditions.checkNotNull(actorContext);
-        this.connectFunction = ExplicitAsk.toScala(t -> new ConnectClientRequest(clientId, t, ABIVersion.BORON,
-            ABIVersion.current()));
+    private @NonNull Long resolveCookie(final String shardName) {
+        final Long cookie = shards.get(shardName);
+        return cookie != null ? cookie : populateShard(shardName);
     }
 
-    Long resolveShardForPath(final YangInstanceIdentifier path) {
-        final String shardName = actorContext.getShardStrategyFactory().getStrategy(path).findShard(path);
+    private synchronized @NonNull Long populateShard(final String shardName) {
         Long cookie = shards.get(shardName);
         if (cookie == null) {
-            synchronized (this) {
-                cookie = shards.get(shardName);
-                if (cookie == null) {
-                    cookie = nextShard++;
-
-                    Builder<String, Long> builder = ImmutableBiMap.builder();
-                    builder.putAll(shards);
-                    builder.put(shardName, cookie);
-                    shards = builder.build();
-                }
-            }
+            cookie = nextShard++;
+            shards = ImmutableBiMap.<String, Long>builder().putAll(shards).put(shardName, cookie).build();
         }
-
         return cookie;
     }
 
-    private CompletionStage<ShardBackendInfo> resolveBackendInfo(final Long cookie) {
+    @Override
+    public CompletionStage<ShardBackendInfo> getBackendInfo(final Long cookie) {
+        /*
+         * We cannot perform a simple computeIfAbsent() here because we need to control sequencing of when the state
+         * is inserted into the map and retired from it (based on the stage result).
+         *
+         * We do not want to hook another stage one processing completes and hooking a removal on failure from a compute
+         * method runs the inherent risk of stage completing before the insertion does (i.e. we have a removal of
+         * non-existent element.
+         */
+        final ShardState existing = backends.get(cookie);
+        if (existing != null) {
+            return existing.getStage();
+        }
+
         final String shardName = shards.inverse().get(cookie);
         if (shardName == null) {
             LOG.warn("Failing request for non-existent cookie {}", cookie);
-            return NULL_FUTURE;
+            throw new IllegalArgumentException("Cookie " + cookie + " does not have a shard assigned");
         }
 
         LOG.debug("Resolving cookie {} to shard {}", cookie, shardName);
+        final ShardState toInsert = resolveBackendInfo(shardName, cookie);
 
-        return FutureConverters.toJava(actorContext.findPrimaryShardAsync(shardName)).thenCompose(info -> {
-            LOG.debug("Looking up primary info for {} from {}", shardName, info);
-            return FutureConverters.toJava(ExplicitAsk.ask(info.getPrimaryShardActor(), connectFunction, DEAD_TIMEOUT));
-        }).thenApply(response -> {
-            if (response instanceof RequestFailure) {
-                final RequestFailure<?, ?> failure = (RequestFailure<?, ?>) response;
-                LOG.debug("Connect request failed {}", failure, failure.getCause());
-                throw Throwables.propagate(failure.getCause());
-            }
-
-            LOG.debug("Resolved backend information to {}", response);
+        final ShardState raced = backends.putIfAbsent(cookie, toInsert);
+        if (raced != null) {
+            // We have had a concurrent insertion, return that
+            LOG.debug("Race during insertion of state for cookie {} shard {}", cookie, shardName);
+            return raced.getStage();
+        }
 
-            Preconditions.checkArgument(response instanceof ConnectClientSuccess, "Unhandled response {}", response);
-            final ConnectClientSuccess success = (ConnectClientSuccess) response;
+        // We have succeeded in populating the map, now we need to take care of pruning the entry if it fails to
+        // complete
+        final CompletionStage<ShardBackendInfo> stage = toInsert.getStage();
+        stage.whenComplete((info, failure) -> {
+            if (failure != null) {
+                LOG.debug("Resolution of cookie {} shard {} failed, removing state", cookie, shardName, failure);
+                backends.remove(cookie, toInsert);
 
-            return new ShardBackendInfo(success.getBackend(),
-                nextSessionId.getAndIncrement(), success.getVersion(), shardName, UnsignedLong.fromLongBits(cookie),
-                success.getDataTree(), success.getMaxMessages());
+                // Remove cache state in case someone else forgot to invalidate it
+                flushCache(shardName);
+            }
         });
-    }
 
-    @Override
-    public CompletionStage<? extends ShardBackendInfo> getBackendInfo(final Long cookie) {
-        return backends.computeIfAbsent(cookie, key -> new Entry(resolveBackendInfo(key))).getStage();
+        return stage;
     }
 
     @Override
-    public CompletionStage<? extends ShardBackendInfo> refreshBackendInfo(final Long cookie,
+    public CompletionStage<ShardBackendInfo> refreshBackendInfo(final Long cookie,
             final ShardBackendInfo staleInfo) {
-        final Entry existing = backends.get(cookie);
+        final ShardState existing = backends.get(cookie);
         if (existing != null) {
             if (!staleInfo.equals(existing.getResult())) {
                 return existing.getStage();
             }
 
             LOG.debug("Invalidating backend information {}", staleInfo);
-            actorContext.getPrimaryShardInfoCache().remove(staleInfo.getShardName());
+            flushCache(staleInfo.getName());
 
-            LOG.trace("Invalidated cache %s -> %s", Long.toUnsignedString(cookie), staleInfo);
+            LOG.trace("Invalidated cache {}", staleInfo);
             backends.remove(cookie, existing);
         }
 
         return getBackendInfo(cookie);
     }
+
+    @Override
+    public void close() {
+        shardAvailabilityChangesRegFuture.onComplete(new OnComplete<Registration>() {
+            @Override
+            public void onComplete(final Throwable failure, final Registration reply) {
+                reply.close();
+            }
+        }, ExecutionContexts.global());
+    }
+
+    @Override
+    public String resolveCookieName(final Long cookie) {
+        return verifyNotNull(shards.inverse().get(cookie), "Unexpected null cookie: %s", cookie);
+    }
 }