2 * Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.controller.cluster.access.client;
10 import com.google.common.annotations.Beta;
11 import com.google.common.base.Preconditions;
12 import com.google.common.base.Stopwatch;
13 import com.google.common.base.Verify;
14 import java.util.Collection;
16 import java.util.Optional;
17 import java.util.concurrent.ConcurrentHashMap;
18 import java.util.concurrent.TimeUnit;
19 import java.util.concurrent.TimeoutException;
20 import javax.annotation.Nonnull;
21 import javax.annotation.Nullable;
22 import javax.annotation.concurrent.GuardedBy;
23 import org.opendaylight.controller.cluster.access.commands.NotLeaderException;
24 import org.opendaylight.controller.cluster.access.commands.OutOfSequenceEnvelopeException;
25 import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier;
26 import org.opendaylight.controller.cluster.access.concepts.FailureEnvelope;
27 import org.opendaylight.controller.cluster.access.concepts.LocalHistoryIdentifier;
28 import org.opendaylight.controller.cluster.access.concepts.RequestException;
29 import org.opendaylight.controller.cluster.access.concepts.RequestFailure;
30 import org.opendaylight.controller.cluster.access.concepts.ResponseEnvelope;
31 import org.opendaylight.controller.cluster.access.concepts.RetiredGenerationException;
32 import org.opendaylight.controller.cluster.access.concepts.RuntimeRequestException;
33 import org.opendaylight.controller.cluster.access.concepts.SuccessEnvelope;
34 import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
35 import org.opendaylight.controller.cluster.common.actor.Dispatchers.DispatcherType;
36 import org.opendaylight.controller.cluster.io.FileBackedOutputStreamFactory;
37 import org.opendaylight.controller.cluster.messaging.MessageAssembler;
38 import org.opendaylight.yangtools.concepts.Identifiable;
39 import org.opendaylight.yangtools.concepts.WritableIdentifier;
40 import org.slf4j.Logger;
41 import org.slf4j.LoggerFactory;
42 import scala.concurrent.duration.FiniteDuration;
45 * A behavior, which handles messages sent to a {@link AbstractClientActor}.
47 * @author Robert Varga
50 public abstract class ClientActorBehavior<T extends BackendInfo> extends
51 RecoveredClientActorBehavior<ClientActorContext> implements Identifiable<ClientIdentifier> {
53 * Connection reconnect cohort, driven by this class.
56 protected interface ConnectionConnectCohort {
58 * Finish the connection by replaying previous messages onto the new connection.
60 * @param enqueuedEntries Previously-enqueued entries
61 * @return A {@link ReconnectForwarder} to handle any straggler messages which arrive after this method returns.
63 @Nonnull ReconnectForwarder finishReconnect(@Nonnull Collection<ConnectionEntry> enqueuedEntries);
66 private static final Logger LOG = LoggerFactory.getLogger(ClientActorBehavior.class);
67 private static final FiniteDuration RESOLVE_RETRY_DURATION = FiniteDuration.apply(5, TimeUnit.SECONDS);
70 * Map of connections to the backend. This map is concurrent to allow lookups, but given complex operations
71 * involved in connection transitions it is protected by a {@link InversibleLock}. Write-side of the lock is taken
72 * during connection transitions. Optimistic read-side of the lock is taken when new connections are introduced
76 * The lock detects potential AB/BA deadlock scenarios and will force the reader side out by throwing
77 * a {@link InversibleLockException} -- which must be propagated up, releasing locks as it propagates. The initial
78 * entry point causing the the conflicting lookup must then call {@link InversibleLockException#awaitResolution()}
79 * before retrying the operation.
81 // TODO: it should be possible to move these two into ClientActorContext
82 private final Map<Long, AbstractClientConnection<T>> connections = new ConcurrentHashMap<>();
83 private final InversibleLock connectionsLock = new InversibleLock();
84 private final BackendInfoResolver<T> resolver;
85 private final MessageAssembler responseMessageAssembler;
87 protected ClientActorBehavior(@Nonnull final ClientActorContext context,
88 @Nonnull final BackendInfoResolver<T> resolver) {
90 this.resolver = Preconditions.checkNotNull(resolver);
92 final ClientActorConfig config = context.config();
93 responseMessageAssembler = MessageAssembler.builder().logContext(persistenceId())
94 .fileBackedStreamFactory(new FileBackedOutputStreamFactory(config.getFileBackedStreamingThreshold(),
95 config.getTempFileDirectory()))
96 .assembledMessageCallback((message, sender) -> context.self().tell(message, sender)).build();
101 public final ClientIdentifier getIdentifier() {
102 return context().getIdentifier();
106 public void close() {
107 responseMessageAssembler.close();
111 * Get a connection to a shard.
113 * @param shard Shard cookie
114 * @return Connection to a shard
115 * @throws InversibleLockException if the shard is being reconnected
117 public final AbstractClientConnection<T> getConnection(final Long shard) {
119 final long stamp = connectionsLock.optimisticRead();
120 final AbstractClientConnection<T> conn = connections.computeIfAbsent(shard, this::createConnection);
121 if (connectionsLock.validate(stamp)) {
122 // No write-lock in-between, return success
128 private AbstractClientConnection<T> getConnection(final ResponseEnvelope<?> response) {
129 // Always called from actor context: no locking required
130 return connections.get(extractCookie(response.getMessage().getTarget()));
133 @SuppressWarnings("unchecked")
135 final ClientActorBehavior<T> onReceiveCommand(final Object command) {
136 if (command instanceof InternalCommand) {
137 return ((InternalCommand<T>) command).execute(this);
140 if (command instanceof SuccessEnvelope) {
141 return onRequestSuccess((SuccessEnvelope) command);
144 if (command instanceof FailureEnvelope) {
145 return internalOnRequestFailure((FailureEnvelope) command);
148 if (MessageAssembler.isHandledMessage(command)) {
149 context().dispatchers().getDispatcher(DispatcherType.Serialization).execute(
150 () -> responseMessageAssembler.handleMessage(command, context().self()));
154 return onCommand(command);
157 private static long extractCookie(final WritableIdentifier id) {
158 if (id instanceof TransactionIdentifier) {
159 return ((TransactionIdentifier) id).getHistoryId().getCookie();
160 } else if (id instanceof LocalHistoryIdentifier) {
161 return ((LocalHistoryIdentifier) id).getCookie();
163 throw new IllegalArgumentException("Unhandled identifier " + id);
167 private void onResponse(final ResponseEnvelope<?> response) {
168 final AbstractClientConnection<T> connection = getConnection(response);
169 if (connection != null) {
170 connection.receiveResponse(response);
172 LOG.info("{}: Ignoring unknown response {}", persistenceId(), response);
176 private ClientActorBehavior<T> onRequestSuccess(final SuccessEnvelope success) {
181 private ClientActorBehavior<T> onRequestFailure(final FailureEnvelope failure) {
186 private ClientActorBehavior<T> internalOnRequestFailure(final FailureEnvelope command) {
187 final AbstractClientConnection<T> conn = getConnection(command);
190 * We are talking to multiple actors, which may be lagging behind our state significantly. This has
191 * the effect that we may be receiving responses from a previous connection after we have created a new
192 * one to a different actor.
194 * Since we are already replaying requests to the new actor, we want to ignore errors reported on the old
195 * connection -- for example NotLeaderException, which must not cause a new reconnect. Check the envelope's
196 * sessionId and if it does not match our current connection just ignore it.
198 final Optional<T> optBackend = conn.getBackendInfo();
199 if (optBackend.isPresent() && optBackend.get().getSessionId() != command.getSessionId()) {
200 LOG.debug("{}: Mismatched current connection {} and envelope {}, ignoring response", persistenceId(),
206 final RequestFailure<?, ?> failure = command.getMessage();
207 final RequestException cause = failure.getCause();
208 if (cause instanceof RetiredGenerationException) {
209 LOG.error("{}: current generation {} has been superseded", persistenceId(), getIdentifier(), cause);
214 if (cause instanceof NotLeaderException) {
215 if (conn instanceof ReconnectingClientConnection) {
216 // Already reconnecting, do not churn the logs
218 } else if (conn != null) {
219 LOG.info("{}: connection {} indicated no leadership, reconnecting it", persistenceId(), conn, cause);
220 return conn.reconnect(this, cause);
223 if (cause instanceof OutOfSequenceEnvelopeException) {
224 if (conn instanceof ReconnectingClientConnection) {
225 // Already reconnecting, do not churn the logs
227 } else if (conn != null) {
228 LOG.info("{}: connection {} indicated no sequencing mismatch on {} sequence {}, reconnecting it",
229 persistenceId(), conn, failure.getTarget(), failure.getSequence(), cause);
230 return conn.reconnect(this, cause);
234 return onRequestFailure(command);
237 private void poison(final RequestException cause) {
238 final long stamp = connectionsLock.writeLock();
240 for (AbstractClientConnection<T> q : connections.values()) {
246 connectionsLock.unlockWrite(stamp);
251 * Halt And Catch Fire. Halt processing on this client. Implementations need to ensure they initiate state flush
252 * procedures. No attempt to use this instance should be made after this method returns. Any such use may result
253 * in undefined behavior.
255 * @param cause Failure cause
257 protected abstract void haltClient(@Nonnull Throwable cause);
260 * Override this method to handle any command which is not handled by the base behavior.
262 * @param command the command to process
263 * @return Next behavior to use, null if this actor should shut down.
266 protected abstract ClientActorBehavior<T> onCommand(@Nonnull Object command);
269 * Override this method to provide a backend resolver instance.
271 * @return a backend resolver instance
273 protected final @Nonnull BackendInfoResolver<T> resolver() {
278 * Callback invoked when a new connection has been established. Implementations are expected perform preparatory
279 * tasks before the previous connection is frozen.
281 * @param newConn New connection
282 * @return ConnectionConnectCohort which will be used to complete the process of bringing the connection up.
284 @GuardedBy("connectionsLock")
285 @Nonnull protected abstract ConnectionConnectCohort connectionUp(@Nonnull ConnectedClientConnection<T> newConn);
287 private void backendConnectFinished(final Long shard, final AbstractClientConnection<T> conn,
288 final T backend, final Throwable failure) {
289 if (failure != null) {
290 if (failure instanceof TimeoutException) {
291 if (!conn.equals(connections.get(shard))) {
292 // AbstractClientConnection will remove itself when it decides there is no point in continuing,
293 // at which point we want to stop retrying
294 LOG.info("{}: stopping resolution of shard {} on stale connection {}", persistenceId(), shard, conn,
299 LOG.debug("{}: timed out resolving shard {}, scheduling retry in {}", persistenceId(), shard,
300 RESOLVE_RETRY_DURATION, failure);
301 context().executeInActor(b -> {
302 resolveConnection(shard, conn);
304 }, RESOLVE_RETRY_DURATION);
308 LOG.error("{}: failed to resolve shard {}", persistenceId(), shard, failure);
309 final RequestException cause;
310 if (failure instanceof RequestException) {
311 cause = (RequestException) failure;
313 cause = new RuntimeRequestException("Failed to resolve shard " + shard, failure);
320 LOG.info("{}: resolved shard {} to {}", persistenceId(), shard, backend);
321 final long stamp = connectionsLock.writeLock();
323 final Stopwatch sw = Stopwatch.createStarted();
325 // Create a new connected connection
326 final ConnectedClientConnection<T> newConn = new ConnectedClientConnection<>(conn.context(),
327 conn.cookie(), backend);
328 LOG.info("{}: resolving connection {} to {}", persistenceId(), conn, newConn);
330 // Start reconnecting without the old connection lock held
331 final ConnectionConnectCohort cohort = Verify.verifyNotNull(connectionUp(newConn));
333 // Lock the old connection and get a reference to its entries
334 final Collection<ConnectionEntry> replayIterable = conn.startReplay();
336 // Finish the connection attempt
337 final ReconnectForwarder forwarder = Verify.verifyNotNull(cohort.finishReconnect(replayIterable));
339 // Install the forwarder, unlocking the old connection
340 conn.finishReplay(forwarder);
342 // Make sure new lookups pick up the new connection
343 if (!connections.replace(shard, conn, newConn)) {
344 final AbstractClientConnection<T> existing = connections.get(conn.cookie());
345 LOG.warn("{}: old connection {} does not match existing {}, new connection {} in limbo",
346 persistenceId(), conn, existing, newConn);
348 LOG.info("{}: replaced connection {} with {} in {}", persistenceId(), conn, newConn, sw);
351 connectionsLock.unlockWrite(stamp);
355 void removeConnection(final AbstractClientConnection<?> conn) {
356 final long stamp = connectionsLock.writeLock();
358 if (!connections.remove(conn.cookie(), conn)) {
359 final AbstractClientConnection<T> existing = connections.get(conn.cookie());
360 if (existing != null) {
361 LOG.warn("{}: failed to remove connection {}, as it was superseded by {}", persistenceId(), conn,
364 LOG.warn("{}: failed to remove connection {}, as it was not tracked", persistenceId(), conn);
367 LOG.info("{}: removed connection {}", persistenceId(), conn);
370 connectionsLock.unlockWrite(stamp);
374 @SuppressWarnings("unchecked")
375 void reconnectConnection(final ConnectedClientConnection<?> oldConn,
376 final ReconnectingClientConnection<?> newConn) {
377 final ReconnectingClientConnection<T> conn = (ReconnectingClientConnection<T>)newConn;
378 LOG.info("{}: connection {} reconnecting as {}", persistenceId(), oldConn, newConn);
380 final long stamp = connectionsLock.writeLock();
382 final boolean replaced = connections.replace(oldConn.cookie(), (AbstractClientConnection<T>)oldConn, conn);
384 final AbstractClientConnection<T> existing = connections.get(oldConn.cookie());
385 if (existing != null) {
386 LOG.warn("{}: failed to replace connection {}, as it was superseded by {}", persistenceId(), conn,
389 LOG.warn("{}: failed to replace connection {}, as it was not tracked", persistenceId(), conn);
393 connectionsLock.unlockWrite(stamp);
396 final Long shard = oldConn.cookie();
397 LOG.info("{}: refreshing backend for shard {}", persistenceId(), shard);
398 resolver().refreshBackendInfo(shard, conn.getBackendInfo().get()).whenComplete(
399 (backend, failure) -> context().executeInActor(behavior -> {
400 backendConnectFinished(shard, conn, backend, failure);
405 private ConnectingClientConnection<T> createConnection(final Long shard) {
406 final ConnectingClientConnection<T> conn = new ConnectingClientConnection<>(context(), shard);
407 resolveConnection(shard, conn);
411 private void resolveConnection(final Long shard, final AbstractClientConnection<T> conn) {
412 LOG.debug("{}: resolving shard {} connection {}", persistenceId(), shard, conn);
413 resolver().getBackendInfo(shard).whenComplete((backend, failure) -> context().executeInActor(behavior -> {
414 backendConnectFinished(shard, conn, backend, failure);