Bump upstream versions
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / databroker / actors / dds / AbstractShardBackendResolver.java
index a81cf337acc72f7b383700093be742e6541a7f73..ca784fed7a73783e81e89812f9a06412e2197552 100644 (file)
@@ -7,26 +7,37 @@
  */
 package org.opendaylight.controller.cluster.databroker.actors.dds;
 
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.util.Objects.requireNonNull;
+
 import akka.actor.ActorRef;
 import akka.util.Timeout;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
 import com.google.common.primitives.UnsignedLong;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CompletionStage;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicLong;
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import javax.annotation.concurrent.GuardedBy;
-import javax.annotation.concurrent.ThreadSafe;
+import java.util.function.Consumer;
+import org.checkerframework.checker.lock.qual.GuardedBy;
+import org.eclipse.jdt.annotation.NonNull;
+import org.eclipse.jdt.annotation.Nullable;
 import org.opendaylight.controller.cluster.access.ABIVersion;
 import org.opendaylight.controller.cluster.access.client.BackendInfoResolver;
 import org.opendaylight.controller.cluster.access.commands.ConnectClientRequest;
 import org.opendaylight.controller.cluster.access.commands.ConnectClientSuccess;
+import org.opendaylight.controller.cluster.access.commands.NotLeaderException;
 import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier;
 import org.opendaylight.controller.cluster.access.concepts.RequestFailure;
 import org.opendaylight.controller.cluster.common.actor.ExplicitAsk;
-import org.opendaylight.controller.cluster.datastore.utils.ActorContext;
+import org.opendaylight.controller.cluster.datastore.exceptions.NoShardLeaderException;
+import org.opendaylight.controller.cluster.datastore.exceptions.NotInitializedException;
+import org.opendaylight.controller.cluster.datastore.exceptions.PrimaryNotFoundException;
+import org.opendaylight.controller.cluster.datastore.messages.PrimaryShardInfo;
+import org.opendaylight.controller.cluster.datastore.utils.ActorUtils;
+import org.opendaylight.yangtools.concepts.Registration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.Function1;
@@ -35,11 +46,13 @@ import scala.compat.java8.FutureConverters;
 /**
  * {@link BackendInfoResolver} implementation for static shard configuration based on ShardManager. Each string-named
  * shard is assigned a single cookie and this mapping is stored in a bidirectional map. Information about corresponding
- * shard leader is resolved via {@link ActorContext}. The product of resolution is {@link ShardBackendInfo}.
+ * shard leader is resolved via {@link ActorUtils}. The product of resolution is {@link ShardBackendInfo}.
+ *
+ * <p>
+ * This class is thread-safe.
  *
  * @author Robert Varga
  */
-@ThreadSafe
 abstract class AbstractShardBackendResolver extends BackendInfoResolver<ShardBackendInfo> {
     static final class ShardState {
         private final CompletionStage<ShardBackendInfo> stage;
@@ -47,21 +60,21 @@ abstract class AbstractShardBackendResolver extends BackendInfoResolver<ShardBac
         private ShardBackendInfo result;
 
         ShardState(final CompletionStage<ShardBackendInfo> stage) {
-            this.stage = Preconditions.checkNotNull(stage);
+            this.stage = requireNonNull(stage);
             stage.whenComplete(this::onStageResolved);
         }
 
-        @Nonnull CompletionStage<ShardBackendInfo> getStage() {
+        @NonNull CompletionStage<ShardBackendInfo> getStage() {
             return stage;
         }
 
-        @Nullable synchronized ShardBackendInfo getResult() {
+        synchronized @Nullable ShardBackendInfo getResult() {
             return result;
         }
 
-        private synchronized void onStageResolved(final ShardBackendInfo result, final Throwable failure) {
+        private synchronized void onStageResolved(final ShardBackendInfo info, final Throwable failure) {
             if (failure == null) {
-                this.result = Preconditions.checkNotNull(result);
+                this.result = requireNonNull(info);
             } else {
                 LOG.warn("Failed to resolve shard", failure);
             }
@@ -71,49 +84,104 @@ abstract class AbstractShardBackendResolver extends BackendInfoResolver<ShardBac
     private static final Logger LOG = LoggerFactory.getLogger(AbstractShardBackendResolver.class);
 
     /**
-     * Fall-over-dead timeout. If we do not make progress in this long, just fall over and propagate the failure.
-     * All users are expected to fail, possibly attempting to recover by restarting. It is fair to remain
-     * non-operational.
+     * Connect request timeout. If the shard does not respond within this interval, we retry the lookup and connection.
      */
     // TODO: maybe make this configurable somehow?
-    private static final Timeout DEAD_TIMEOUT = Timeout.apply(15, TimeUnit.MINUTES);
+    private static final Timeout CONNECT_TIMEOUT = Timeout.apply(5, TimeUnit.SECONDS);
 
     private final AtomicLong nextSessionId = new AtomicLong();
     private final Function1<ActorRef, ?> connectFunction;
-    private final ActorContext actorContext;
+    private final ActorUtils actorUtils;
+    private final Set<Consumer<Long>> staleBackendInfoCallbacks = ConcurrentHashMap.newKeySet();
 
     // FIXME: we really need just ActorContext.findPrimaryShardAsync()
-    AbstractShardBackendResolver(final ClientIdentifier clientId, final ActorContext actorContext) {
-        this.actorContext = Preconditions.checkNotNull(actorContext);
+    AbstractShardBackendResolver(final ClientIdentifier clientId, final ActorUtils actorUtils) {
+        this.actorUtils = requireNonNull(actorUtils);
         this.connectFunction = ExplicitAsk.toScala(t -> new ConnectClientRequest(clientId, t, ABIVersion.BORON,
             ABIVersion.current()));
     }
 
+    @Override
+    public Registration notifyWhenBackendInfoIsStale(final Consumer<Long> callback) {
+        staleBackendInfoCallbacks.add(callback);
+        return () -> staleBackendInfoCallbacks.remove(callback);
+    }
+
+    protected void notifyStaleBackendInfoCallbacks(Long cookie) {
+        staleBackendInfoCallbacks.forEach(callback -> callback.accept(cookie));
+    }
+
+    protected ActorUtils actorUtils() {
+        return actorUtils;
+    }
+
     protected final void flushCache(final String shardName) {
-        actorContext.getPrimaryShardInfoCache().remove(shardName);
+        actorUtils.getPrimaryShardInfoCache().remove(shardName);
     }
 
     protected final ShardState resolveBackendInfo(final String shardName, final long cookie) {
         LOG.debug("Resolving cookie {} to shard {}", cookie, shardName);
 
-        return new ShardState(FutureConverters.toJava(actorContext.findPrimaryShardAsync(shardName)).thenCompose(i -> {
-            LOG.debug("Looking up primary info for {} from {}", shardName, i);
-            return FutureConverters.toJava(ExplicitAsk.ask(i.getPrimaryShardActor(), connectFunction, DEAD_TIMEOUT));
-        }).thenApply(response -> {
-            if (response instanceof RequestFailure) {
-                final RequestFailure<?, ?> failure = (RequestFailure<?, ?>) response;
-                LOG.debug("Connect request failed {}", failure, failure.getCause());
-                throw Throwables.propagate(failure.getCause());
+        final CompletableFuture<ShardBackendInfo> future = new CompletableFuture<>();
+        FutureConverters.toJava(actorUtils.findPrimaryShardAsync(shardName)).whenComplete((info, failure) -> {
+            if (failure == null) {
+                connectShard(shardName, cookie, info, future);
+                return;
+            }
+
+            LOG.debug("Shard {} failed to resolve", shardName, failure);
+            if (failure instanceof NoShardLeaderException) {
+                future.completeExceptionally(wrap("Shard has no current leader", failure));
+            } else if (failure instanceof NotInitializedException) {
+                // FIXME: this actually is an exception we can retry on
+                LOG.info("Shard {} has not initialized yet", shardName);
+                future.completeExceptionally(failure);
+            } else if (failure instanceof PrimaryNotFoundException) {
+                LOG.info("Failed to find primary for shard {}", shardName);
+                future.completeExceptionally(failure);
+            } else {
+                future.completeExceptionally(failure);
             }
+        });
+
+        return new ShardState(future);
+    }
+
+    private static TimeoutException wrap(final String message, final Throwable cause) {
+        final TimeoutException ret = new TimeoutException(message);
+        ret.initCause(requireNonNull(cause));
+        return ret;
+    }
+
+    private void connectShard(final String shardName, final long cookie, final PrimaryShardInfo info,
+            final CompletableFuture<ShardBackendInfo> future) {
+        LOG.debug("Shard {} resolved to {}, attempting to connect", shardName, info);
 
-            LOG.debug("Resolved backend information to {}", response);
+        FutureConverters.toJava(ExplicitAsk.ask(info.getPrimaryShardActor(), connectFunction, CONNECT_TIMEOUT))
+            .whenComplete((response, failure) -> onConnectResponse(shardName, cookie, future, response, failure));
+    }
 
-            Preconditions.checkArgument(response instanceof ConnectClientSuccess, "Unhandled response {}", response);
-            final ConnectClientSuccess success = (ConnectClientSuccess) response;
+    private void onConnectResponse(final String shardName, final long cookie,
+            final CompletableFuture<ShardBackendInfo> future, final Object response, final Throwable failure) {
+        if (failure != null) {
+            LOG.debug("Connect attempt to {} failed, will retry", shardName, failure);
+            future.completeExceptionally(wrap("Connection attempt failed", failure));
+            return;
+        }
+        if (response instanceof RequestFailure) {
+            final Throwable cause = ((RequestFailure<?, ?>) response).getCause().unwrap();
+            LOG.debug("Connect attempt to {} failed to process", shardName, cause);
+            final Throwable result = cause instanceof NotLeaderException
+                    ? wrap("Leader moved during establishment", cause) : cause;
+            future.completeExceptionally(result);
+            return;
+        }
 
-            return new ShardBackendInfo(success.getBackend(),
-                nextSessionId.getAndIncrement(), success.getVersion(), shardName, UnsignedLong.fromLongBits(cookie),
-                success.getDataTree(), success.getMaxMessages());
-        }));
+        LOG.debug("Resolved backend information to {}", response);
+        checkArgument(response instanceof ConnectClientSuccess, "Unhandled response %s", response);
+        final ConnectClientSuccess success = (ConnectClientSuccess) response;
+        future.complete(new ShardBackendInfo(success.getBackend(), nextSessionId.getAndIncrement(),
+            success.getVersion(), shardName, UnsignedLong.fromLongBits(cookie), success.getDataTree(),
+            success.getMaxMessages()));
     }
 }