From 417f3a8c2401e6c8ce3c91ea969da24929c24c1c Mon Sep 17 00:00:00 2001 From: Robert Varga Date: Mon, 26 Jun 2017 16:31:30 +0200 Subject: [PATCH 1/1] BUG-8445: check sessionId before propagating failures When we have leader movement ocurring, based on timing details we can re-establish a connection to the new leader and then start receiving responses from the old leader telling us it no longer is the leader. To stop this from happening we need to check connection session ID against the incoming failure. Change-Id: If9a891016c7f213f2552283e3ec13485e598f5a4 Signed-off-by: Robert Varga (cherry picked from commit 1c495bceb8d9c203f5ce53ea1ab9d907efb4d7b3) --- .../access/client/ClientActorBehavior.java | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java index 554ffe97c7..ccfbba6cd3 100644 --- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java +++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java @@ -13,6 +13,7 @@ import com.google.common.base.Stopwatch; import com.google.common.base.Verify; import java.util.Collection; import java.util.Map; +import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -160,6 +161,25 @@ public abstract class ClientActorBehavior extends } private ClientActorBehavior internalOnRequestFailure(final FailureEnvelope command) { + final AbstractClientConnection conn = getConnection(command); + if (conn != null) { + /* + * We are talking to multiple actors, which may be lagging behind our state significantly. This has + * the effect that we may be receiving responses from a previous connection after we have created a new + * one to a different actor. + * + * Since we are already replaying requests to the new actor, we want to ignore errors reported on the old + * connection -- for example NotLeaderException, which must not cause a new reconnect. Check the envelope's + * sessionId and if it does not match our current connection just ignore it. + */ + final Optional optBackend = conn.getBackendInfo(); + if (optBackend.isPresent() && optBackend.get().getSessionId() != command.getSessionId()) { + LOG.debug("{}: Mismatched current connection {} and envelope {}, ignoring response", persistenceId(), + conn, command); + return this; + } + } + final RequestFailure failure = command.getMessage(); final RequestException cause = failure.getCause(); if (cause instanceof RetiredGenerationException) { @@ -169,7 +189,6 @@ public abstract class ClientActorBehavior extends return null; } if (cause instanceof NotLeaderException) { - final AbstractClientConnection conn = getConnection(command); if (conn instanceof ReconnectingClientConnection) { // Already reconnecting, do not churn the logs return this; @@ -179,7 +198,6 @@ public abstract class ClientActorBehavior extends } } if (cause instanceof OutOfSequenceEnvelopeException) { - final AbstractClientConnection conn = getConnection(command); if (conn instanceof ReconnectingClientConnection) { // Already reconnecting, do not churn the logs return this; -- 2.36.6