+ @Holding("connectionsLock")
+ protected abstract @NonNull ConnectionConnectCohort connectionUp(@NonNull ConnectedClientConnection<T> newConn);
+
+ private void backendConnectFinished(final Long shard, final AbstractClientConnection<T> oldConn,
+ final T backend, final Throwable failure) {
+ if (failure != null) {
+ if (failure instanceof TimeoutException) {
+ if (!oldConn.equals(connections.get(shard))) {
+ // AbstractClientConnection will remove itself when it decides there is no point in continuing,
+ // at which point we want to stop retrying
+ LOG.info("{}: stopping resolution of shard {} on stale connection {}", persistenceId(), shard,
+ oldConn, failure);
+ return;
+ }
+
+ LOG.debug("{}: timed out resolving shard {}, scheduling retry in {}", persistenceId(), shard,
+ RESOLVE_RETRY_DURATION, failure);
+ context().executeInActor(b -> {
+ resolveConnection(shard, oldConn);
+ return b;
+ }, RESOLVE_RETRY_DURATION);
+ return;
+ }
+
+ LOG.error("{}: failed to resolve shard {}", persistenceId(), shard, failure);
+ final RequestException cause;
+ if (failure instanceof RequestException) {
+ cause = (RequestException) failure;
+ } else {
+ cause = new RuntimeRequestException("Failed to resolve shard " + shard, failure);
+ }
+
+ oldConn.poison(cause);
+ return;
+ }
+
+ LOG.info("{}: resolved shard {} to {}", persistenceId(), shard, backend);
+ final long stamp = connectionsLock.writeLock();
+ try {
+ final Stopwatch sw = Stopwatch.createStarted();
+
+ // Create a new connected connection
+ final ConnectedClientConnection<T> newConn = new ConnectedClientConnection<>(oldConn, backend);
+ LOG.info("{}: resolving connection {} to {}", persistenceId(), oldConn, newConn);
+
+ // Start reconnecting without the old connection lock held
+ final ConnectionConnectCohort cohort = Verify.verifyNotNull(connectionUp(newConn));
+
+ // Lock the old connection and get a reference to its entries
+ final Collection<ConnectionEntry> replayIterable = oldConn.startReplay();
+
+ // Finish the connection attempt
+ final ReconnectForwarder forwarder = Verify.verifyNotNull(cohort.finishReconnect(replayIterable));
+
+ // Cancel sleep debt after entries were replayed, before new connection starts receiving.
+ newConn.cancelDebt();
+
+ // Install the forwarder, unlocking the old connection
+ oldConn.finishReplay(forwarder);
+
+ // Make sure new lookups pick up the new connection
+ if (!connections.replace(shard, oldConn, newConn)) {
+ final AbstractClientConnection<T> existing = connections.get(oldConn.cookie());
+ LOG.warn("{}: old connection {} does not match existing {}, new connection {} in limbo",
+ persistenceId(), oldConn, existing, newConn);
+ } else {
+ LOG.info("{}: replaced connection {} with {} in {}", persistenceId(), oldConn, newConn, sw);
+ }
+ } finally {
+ connectionsLock.unlockWrite(stamp);
+ }
+ }
+
+ void removeConnection(final AbstractClientConnection<?> conn) {
+ final long stamp = connectionsLock.writeLock();
+ try {
+ if (!connections.remove(conn.cookie(), conn)) {
+ final AbstractClientConnection<T> existing = connections.get(conn.cookie());
+ if (existing != null) {
+ LOG.warn("{}: failed to remove connection {}, as it was superseded by {}", persistenceId(), conn,
+ existing);
+ } else {
+ LOG.warn("{}: failed to remove connection {}, as it was not tracked", persistenceId(), conn);
+ }
+ } else {
+ LOG.info("{}: removed connection {}", persistenceId(), conn);
+ cancelSlicing(conn.cookie());
+ }
+ } finally {
+ connectionsLock.unlockWrite(stamp);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ void reconnectConnection(final ConnectedClientConnection<?> oldConn,
+ final ReconnectingClientConnection<?> newConn) {
+ final ReconnectingClientConnection<T> conn = (ReconnectingClientConnection<T>)newConn;
+ LOG.info("{}: connection {} reconnecting as {}", persistenceId(), oldConn, newConn);
+
+ final long stamp = connectionsLock.writeLock();
+ try {
+ final boolean replaced = connections.replace(oldConn.cookie(), (AbstractClientConnection<T>)oldConn, conn);
+ if (!replaced) {
+ final AbstractClientConnection<T> existing = connections.get(oldConn.cookie());
+ if (existing != null) {
+ LOG.warn("{}: failed to replace connection {}, as it was superseded by {}", persistenceId(), conn,
+ existing);
+ } else {
+ LOG.warn("{}: failed to replace connection {}, as it was not tracked", persistenceId(), conn);
+ }
+ } else {
+ cancelSlicing(oldConn.cookie());
+ }
+ } finally {
+ connectionsLock.unlockWrite(stamp);
+ }
+
+ final Long shard = oldConn.cookie();
+ LOG.info("{}: refreshing backend for shard {}", persistenceId(), shard);
+ resolver().refreshBackendInfo(shard, conn.getBackendInfo().get()).whenComplete(
+ (backend, failure) -> context().executeInActor(behavior -> {
+ backendConnectFinished(shard, conn, backend, failure);
+ return behavior;
+ }));
+ }
+
+ private void cancelSlicing(final Long cookie) {
+ context().messageSlicer().cancelSlicing(id -> {
+ try {
+ return cookie.equals(extractCookie(id));
+ } catch (IllegalArgumentException e) {
+ LOG.debug("extractCookie failed while cancelling slicing for cookie {}", cookie, e);
+ return false;
+ }
+ });
+ }
+
+ private ConnectingClientConnection<T> createConnection(final Long shard) {
+ final ConnectingClientConnection<T> conn = new ConnectingClientConnection<>(context(), shard,
+ resolver().resolveCookieName(shard));
+ resolveConnection(shard, conn);
+ return conn;
+ }
+
+ private void resolveConnection(final Long shard, final AbstractClientConnection<T> conn) {
+ LOG.debug("{}: resolving shard {} connection {}", persistenceId(), shard, conn);
+ resolver().getBackendInfo(shard).whenComplete((backend, failure) -> context().executeInActor(behavior -> {
+ backendConnectFinished(shard, conn, backend, failure);
+ return behavior;
+ }));
+ }
+
+ private static class BackendStaleException extends RequestException {
+ private static final long serialVersionUID = 1L;
+
+ BackendStaleException(final Long shard) {
+ super("Backend for shard " + shard + " is stale");
+ }
+
+ @Override
+ public boolean isRetriable() {
+ return false;
+ }