From: Robert Varga Date: Mon, 15 May 2017 14:56:14 +0000 (+0200) Subject: BUG-8452: make NoShardLeaderException retriable X-Git-Tag: release/nitrogen~161 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=commitdiff_plain;h=bc2b83e97bc73930badd4a3063c65b849f82c664 BUG-8452: make NoShardLeaderException retriable We can recover from this exception by retrying the connection to the backend. Wrap it in a TimeoutException, which will cause a new connection attempt. Change-Id: I1d5c771fdb89cbdd7723e0425542154a1ed85853 Signed-off-by: Robert Varga (cherry picked from commit c74608b67d88d809ebec51c0e84add37a0b98711) --- diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java index da016bae88..cd81a4e544 100644 --- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java +++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/AbstractClientConnection.java @@ -16,6 +16,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.Collection; import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.function.Consumer; @@ -26,6 +27,7 @@ import org.opendaylight.controller.cluster.access.concepts.Request; import org.opendaylight.controller.cluster.access.concepts.RequestException; import org.opendaylight.controller.cluster.access.concepts.Response; import org.opendaylight.controller.cluster.access.concepts.ResponseEnvelope; +import org.opendaylight.controller.cluster.access.concepts.RuntimeRequestException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.concurrent.duration.FiniteDuration; @@ -173,7 +175,8 @@ public abstract class AbstractClientConnection { } @GuardedBy("lock") - abstract ClientActorBehavior lockedReconnect(ClientActorBehavior current); + abstract ClientActorBehavior lockedReconnect(ClientActorBehavior current, + RequestException runtimeRequestException); final long enqueueEntry(final ConnectionEntry entry, final long now) { lock.lock(); @@ -193,10 +196,10 @@ public abstract class AbstractClientConnection { } } - final ClientActorBehavior reconnect(final ClientActorBehavior current) { + final ClientActorBehavior reconnect(final ClientActorBehavior current, final RequestException cause) { lock.lock(); try { - return lockedReconnect(current); + return lockedReconnect(current, cause); } finally { lock.unlock(); } @@ -261,7 +264,8 @@ public abstract class AbstractClientConnection { delay = lockedCheckTimeout(now); if (delay == null) { // We have timed out. There is no point in scheduling a timer - return lockedReconnect(current); + return lockedReconnect(current, new RuntimeRequestException("Backend connection timed out", + new TimeoutException())); } if (delay.isPresent()) { @@ -339,10 +343,14 @@ public abstract class AbstractClientConnection { @GuardedBy("lock") private void lockedPoison(final RequestException cause) { - poisoned = cause; + poisoned = enrichPoison(cause); queue.poison(cause); } + RequestException enrichPoison(final RequestException ex) { + return ex; + } + @VisibleForTesting final RequestException poisoned() { return poisoned; diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/BackendInfoResolver.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/BackendInfoResolver.java index 2e8ab4e103..4ece691d89 100644 --- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/BackendInfoResolver.java +++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/BackendInfoResolver.java @@ -21,7 +21,7 @@ import javax.annotation.Nonnull; * If the completion stage returned by this interface's methods fails with a * {@link org.opendaylight.controller.cluster.access.concepts.RequestException}, it will be forwarded to all * outstanding requests towards the leader. If it fails with a {@link java.util.concurrent.TimeoutException}, - * resolution process will be retries. If it fails with any other cause, it will we wrapped as a + * resolution process will be retried. If it fails with any other cause, it will we wrapped as a * {@link org.opendaylight.controller.cluster.access.concepts.RuntimeRequestException} wrapping that cause. * * @author Robert Varga diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java index ee63eddaa7..9ba118ed6b 100644 --- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java +++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ClientActorBehavior.java @@ -174,7 +174,7 @@ public abstract class ClientActorBehavior extends return this; } else if (conn != null) { LOG.info("{}: connection {} indicated no leadership, reconnecting it", persistenceId(), conn, cause); - return conn.reconnect(this); + return conn.reconnect(this, cause); } } if (cause instanceof OutOfSequenceEnvelopeException) { @@ -185,7 +185,7 @@ public abstract class ClientActorBehavior extends } else if (conn != null) { LOG.info("{}: connection {} indicated no sequencing mismatch on {} sequence {}, reconnecting it", persistenceId(), conn, failure.getTarget(), failure.getSequence(), cause); - return conn.reconnect(this); + return conn.reconnect(this, cause); } } diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ConnectedClientConnection.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ConnectedClientConnection.java index cade27f77d..0afc7acf49 100644 --- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ConnectedClientConnection.java +++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ConnectedClientConnection.java @@ -9,6 +9,7 @@ package org.opendaylight.controller.cluster.access.client; import com.google.common.annotations.Beta; import javax.annotation.concurrent.NotThreadSafe; +import org.opendaylight.controller.cluster.access.concepts.RequestException; @Beta @NotThreadSafe @@ -18,8 +19,8 @@ public final class ConnectedClientConnection extends Abst } @Override - ClientActorBehavior lockedReconnect(final ClientActorBehavior current) { - final ReconnectingClientConnection next = new ReconnectingClientConnection<>(this); + ClientActorBehavior lockedReconnect(final ClientActorBehavior current, final RequestException cause) { + final ReconnectingClientConnection next = new ReconnectingClientConnection<>(this, cause); setForwarder(new SimpleReconnectForwarder(next)); current.reconnectConnection(this, next); return current; diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ConnectingClientConnection.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ConnectingClientConnection.java index 12c520bb17..a36267c44f 100644 --- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ConnectingClientConnection.java +++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ConnectingClientConnection.java @@ -9,6 +9,7 @@ package org.opendaylight.controller.cluster.access.client; import com.google.common.annotations.Beta; import java.util.Optional; +import org.opendaylight.controller.cluster.access.concepts.RequestException; @Beta public final class ConnectingClientConnection extends AbstractClientConnection { @@ -30,7 +31,7 @@ public final class ConnectingClientConnection extends Abs } @Override - ClientActorBehavior lockedReconnect(final ClientActorBehavior current) { - throw new UnsupportedOperationException("Attempted to reconnect a connecting connection"); + ClientActorBehavior lockedReconnect(final ClientActorBehavior current, final RequestException cause) { + throw new UnsupportedOperationException("Attempted to reconnect a connecting connection", cause); } } diff --git a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ReconnectingClientConnection.java b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ReconnectingClientConnection.java index 0aac7f4663..b59c9e324a 100644 --- a/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ReconnectingClientConnection.java +++ b/opendaylight/md-sal/cds-access-client/src/main/java/org/opendaylight/controller/cluster/access/client/ReconnectingClientConnection.java @@ -7,6 +7,8 @@ */ package org.opendaylight.controller.cluster.access.client; +import com.google.common.base.Preconditions; +import org.opendaylight.controller.cluster.access.concepts.RequestException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -20,14 +22,29 @@ import org.slf4j.LoggerFactory; public final class ReconnectingClientConnection extends AbstractReceivingClientConnection { private static final Logger LOG = LoggerFactory.getLogger(ReconnectingClientConnection.class); - ReconnectingClientConnection(final ConnectedClientConnection oldConnection) { + private RequestException cause; + + ReconnectingClientConnection(final ConnectedClientConnection oldConnection, final RequestException cause) { super(oldConnection); + this.cause = Preconditions.checkNotNull(cause); } @Override - ClientActorBehavior lockedReconnect(final ClientActorBehavior current) { - // Intentional no-op + ClientActorBehavior lockedReconnect(final ClientActorBehavior current, final RequestException cause) { + this.cause = Preconditions.checkNotNull(cause); LOG.debug("Skipping reconnect of already-reconnecting connection {}", this); return current; } + + @Override + RequestException enrichPoison(final RequestException ex) { + if (ex.getCause() != null) { + ex.addSuppressed(cause); + } else { + ex.initCause(cause); + } + + return ex; + } + } diff --git a/opendaylight/md-sal/cds-access-client/src/test/java/org/opendaylight/controller/cluster/access/client/ConnectedClientConnectionTest.java b/opendaylight/md-sal/cds-access-client/src/test/java/org/opendaylight/controller/cluster/access/client/ConnectedClientConnectionTest.java index eb101520e3..e66512d041 100644 --- a/opendaylight/md-sal/cds-access-client/src/test/java/org/opendaylight/controller/cluster/access/client/ConnectedClientConnectionTest.java +++ b/opendaylight/md-sal/cds-access-client/src/test/java/org/opendaylight/controller/cluster/access/client/ConnectedClientConnectionTest.java @@ -14,6 +14,7 @@ import static org.mockito.Mockito.verify; import org.junit.Test; import org.opendaylight.controller.cluster.access.ABIVersion; +import org.opendaylight.controller.cluster.access.concepts.RequestException; public class ConnectedClientConnectionTest extends AbstractClientConnectionTest, BackendInfo> { @@ -28,7 +29,7 @@ public class ConnectedClientConnectionTest @Test public void testReconnectConnection() throws Exception { final ClientActorBehavior behavior = mock(ClientActorBehavior.class); - connection.lockedReconnect(behavior); + connection.lockedReconnect(behavior, mock(RequestException.class)); verify(behavior).reconnectConnection(same(connection), any(ReconnectingClientConnection.class)); } diff --git a/opendaylight/md-sal/cds-access-client/src/test/java/org/opendaylight/controller/cluster/access/client/ReconnectingClientConnectionTest.java b/opendaylight/md-sal/cds-access-client/src/test/java/org/opendaylight/controller/cluster/access/client/ReconnectingClientConnectionTest.java index 142e948820..b36dd7ccec 100644 --- a/opendaylight/md-sal/cds-access-client/src/test/java/org/opendaylight/controller/cluster/access/client/ReconnectingClientConnectionTest.java +++ b/opendaylight/md-sal/cds-access-client/src/test/java/org/opendaylight/controller/cluster/access/client/ReconnectingClientConnectionTest.java @@ -19,6 +19,7 @@ import org.opendaylight.controller.cluster.access.ABIVersion; import org.opendaylight.controller.cluster.access.commands.TransactionAbortSuccess; import org.opendaylight.controller.cluster.access.concepts.LocalHistoryIdentifier; import org.opendaylight.controller.cluster.access.concepts.Request; +import org.opendaylight.controller.cluster.access.concepts.RequestException; import org.opendaylight.controller.cluster.access.concepts.RequestSuccess; import org.opendaylight.controller.cluster.access.concepts.Response; import org.opendaylight.controller.cluster.access.concepts.ResponseEnvelope; @@ -34,14 +35,14 @@ public class ReconnectingClientConnectionTest final ConnectedClientConnection oldConnection = new ConnectedClientConnection<>(context, 0L, backend); - return new ReconnectingClientConnection<>(oldConnection); + return new ReconnectingClientConnection<>(oldConnection, mock(RequestException.class)); } @Override @Test public void testReconnectConnection() throws Exception { final ClientActorBehavior behavior = mock(ClientActorBehavior.class); - Assert.assertSame(behavior, connection.lockedReconnect(behavior)); + Assert.assertSame(behavior, connection.lockedReconnect(behavior, mock(RequestException.class))); } @Override diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractShardBackendResolver.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractShardBackendResolver.java index 93cf7931e5..6b221da766 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractShardBackendResolver.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/databroker/actors/dds/AbstractShardBackendResolver.java @@ -14,6 +14,7 @@ import com.google.common.primitives.UnsignedLong; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionStage; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -107,8 +108,7 @@ abstract class AbstractShardBackendResolver extends BackendInfoResolver future) { LOG.debug("Shard {} resolved to {}, attempting to connect", shardName, info); diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreIntegrationTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreIntegrationTest.java index 848a8a6cda..4a7a8134ad 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreIntegrationTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreIntegrationTest.java @@ -40,6 +40,7 @@ import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import org.junit.After; import org.junit.Before; @@ -658,8 +659,11 @@ public class DistributedDataStoreIntegrationTest { assertTrue("Expected LocalShardFound. Actual: " + result, result instanceof LocalShardFound); // Create the write Tx. - try (DOMStoreWriteTransaction writeTx = writeOnly ? dataStore.newWriteOnlyTransaction() - : dataStore.newReadWriteTransaction()) { + DOMStoreWriteTransaction writeTxToClose = null; + try { + writeTxToClose = writeOnly ? dataStore.newWriteOnlyTransaction() + : dataStore.newReadWriteTransaction(); + final DOMStoreWriteTransaction writeTx = writeTxToClose; assertNotNull("newReadWriteTransaction returned null", writeTx); // Do some modifications and ready the Tx on a separate @@ -698,7 +702,20 @@ public class DistributedDataStoreIntegrationTest { txCohort.get().canCommit().get(5, TimeUnit.SECONDS); fail("Expected NoShardLeaderException"); } catch (final ExecutionException e) { - Throwables.propagate(Throwables.getRootCause(e)); + assertTrue(Throwables.getRootCause(e) instanceof NoShardLeaderException); + assertEquals(DistributedDataStore.class, testParameter); + } catch (TimeoutException e) { + // ClientBackedDataStore doesn't set cause to ExecutionException, future just time outs + assertEquals(ClientBackedDataStore.class, testParameter); + } + } finally { + try { + if (writeTxToClose != null) { + writeTxToClose.close(); + } + } catch (Exception e) { + // FIXME TransactionProxy.close throws IllegalStateException: + // Transaction is ready, it cannot be closed } } } @@ -706,13 +723,13 @@ public class DistributedDataStoreIntegrationTest { }; } - @Test(expected = NoShardLeaderException.class) + @Test public void testWriteOnlyTransactionCommitFailureWithNoShardLeader() throws Exception { datastoreContextBuilder.writeOnlyTransactionOptimizationsEnabled(true); testTransactionCommitFailureWithNoShardLeader(true, "testWriteOnlyTransactionCommitFailureWithNoShardLeader"); } - @Test(expected = NoShardLeaderException.class) + @Test public void testReadWriteTransactionCommitFailureWithNoShardLeader() throws Exception { testTransactionCommitFailureWithNoShardLeader(false, "testReadWriteTransactionCommitFailureWithNoShardLeader"); } diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java index f58fd0d668..dad3f1b223 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DistributedDataStoreRemotingIntegrationTest.java @@ -42,6 +42,7 @@ import java.util.LinkedList; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import org.junit.After; import org.junit.Assume; @@ -1024,6 +1025,10 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest { final String msg = "Unexpected exception: " + Throwables.getStackTraceAsString(e.getCause()); assertTrue(msg, Throwables.getRootCause(e) instanceof NoShardLeaderException || e.getCause() instanceof ShardLeaderNotRespondingException); + assertEquals(DistributedDataStore.class, testParameter); + } catch (final TimeoutException e) { + // ClientBackedDataStore doesn't set cause to ExecutionException, future just time outs + assertEquals(ClientBackedDataStore.class, testParameter); } } @@ -1058,6 +1063,10 @@ public class DistributedDataStoreRemotingIntegrationTest extends AbstractTest { final String msg = "Expected instance of NoShardLeaderException, actual: \n" + Throwables.getStackTraceAsString(e.getCause()); assertTrue(msg, Throwables.getRootCause(e) instanceof NoShardLeaderException); + assertEquals(DistributedDataStore.class, testParameter); + } catch (TimeoutException e) { + // ClientBackedDataStore doesn't set cause to ExecutionException, future just time outs + assertEquals(ClientBackedDataStore.class, testParameter); } }