From 561acd5df914671e4f3f0736b4223861170fa3a7 Mon Sep 17 00:00:00 2001 From: Robert Varga Date: Mon, 29 May 2017 10:40:06 +0200 Subject: [PATCH] BUG-8515: make sure we retry connection on NotLeaderException There is a race window when we are establishing connection to the backend: When we received the pointer to shard leader, we send a connect request, but during that time window the leader may move, resulting in a NotLeaderException response to ConnectClientRequest. Since we are in reconnection mode, this will result in hard abort of connection. Fix this by wrapping NotLeaderException and akka failures in a TimeoutException -- hence we will retry connecting. Change-Id: Ia5d1915d59e80a70c54302c1790121d0767ff08a Signed-off-by: Robert Varga (cherry picked from commit 51a85b6c8fce1d9808285a6ad81dc7068afbf7c7) --- .../actors/dds/AbstractShardBackendResolver.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractShardBackendResolver.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractShardBackendResolver.java index 6b221da766..a1ddcc3449 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractShardBackendResolver.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractShardBackendResolver.java @@ -24,6 +24,7 @@ import org.opendaylight.controller.cluster.access.ABIVersion; import org.opendaylight.controller.cluster.access.client.BackendInfoResolver; import org.opendaylight.controller.cluster.access.commands.ConnectClientRequest; import org.opendaylight.controller.cluster.access.commands.ConnectClientSuccess; +import org.opendaylight.controller.cluster.access.commands.NotLeaderException; import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier; import org.opendaylight.controller.cluster.access.concepts.RequestFailure; import org.opendaylight.controller.cluster.common.actor.ExplicitAsk; @@ -137,14 +138,16 @@ abstract class AbstractShardBackendResolver extends BackendInfoResolver { if (failure != null) { - LOG.debug("Connect attempt to {} failed", shardName, failure); - future.completeExceptionally(failure); + LOG.debug("Connect attempt to {} failed, will retry", shardName, failure); + future.completeExceptionally(wrap("Connection attempt failed", failure)); return; } if (response instanceof RequestFailure) { final Throwable cause = ((RequestFailure) response).getCause().unwrap(); LOG.debug("Connect attempt to {} failed to process", shardName, cause); - future.completeExceptionally(cause); + final Throwable result = cause instanceof NotLeaderException + ? wrap("Leader moved during establishment", cause) : cause; + future.completeExceptionally(result); return; } -- 2.36.6