BUG-8309: Add message identity information
[controller.git] / opendaylight / md-sal / cds-access-client / src / main / java / org / opendaylight / controller / cluster / access / client / ClientActorBehavior.java
1 /*
2  * Copyright (c) 2016 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.access.client;
9
10 import com.google.common.annotations.Beta;
11 import com.google.common.base.Preconditions;
12 import com.google.common.base.Verify;
13 import java.util.Map;
14 import java.util.concurrent.ConcurrentHashMap;
15 import java.util.concurrent.TimeUnit;
16 import java.util.concurrent.TimeoutException;
17 import javax.annotation.Nonnull;
18 import javax.annotation.Nullable;
19 import javax.annotation.concurrent.GuardedBy;
20 import org.opendaylight.controller.cluster.access.commands.NotLeaderException;
21 import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier;
22 import org.opendaylight.controller.cluster.access.concepts.FailureEnvelope;
23 import org.opendaylight.controller.cluster.access.concepts.LocalHistoryIdentifier;
24 import org.opendaylight.controller.cluster.access.concepts.RequestException;
25 import org.opendaylight.controller.cluster.access.concepts.RequestFailure;
26 import org.opendaylight.controller.cluster.access.concepts.ResponseEnvelope;
27 import org.opendaylight.controller.cluster.access.concepts.RetiredGenerationException;
28 import org.opendaylight.controller.cluster.access.concepts.RuntimeRequestException;
29 import org.opendaylight.controller.cluster.access.concepts.SuccessEnvelope;
30 import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
31 import org.opendaylight.yangtools.concepts.Identifiable;
32 import org.opendaylight.yangtools.concepts.WritableIdentifier;
33 import org.slf4j.Logger;
34 import org.slf4j.LoggerFactory;
35 import scala.concurrent.duration.FiniteDuration;
36
37 /**
38  * A behavior, which handles messages sent to a {@link AbstractClientActor}.
39  *
40  * @author Robert Varga
41  */
42 @Beta
43 public abstract class ClientActorBehavior<T extends BackendInfo> extends
44         RecoveredClientActorBehavior<ClientActorContext> implements Identifiable<ClientIdentifier> {
45     /**
46      * Connection reconnect cohort, driven by this class.
47      */
48     @FunctionalInterface
49     protected interface ConnectionConnectCohort {
50         /**
51          * Finish the connection by replaying previous messages onto the new connection.
52          *
53          * @param enqueuedEntries Previously-enqueued entries
54          * @return A {@link ReconnectForwarder} to handle any straggler messages which arrive after this method returns.
55          */
56         @Nonnull ReconnectForwarder finishReconnect(@Nonnull Iterable<ConnectionEntry> enqueuedEntries);
57     }
58
59     private static final Logger LOG = LoggerFactory.getLogger(ClientActorBehavior.class);
60     private static final FiniteDuration RESOLVE_RETRY_DURATION = FiniteDuration.apply(5, TimeUnit.SECONDS);
61
62     /**
63      * Map of connections to the backend. This map is concurrent to allow lookups, but given complex operations
64      * involved in connection transitions it is protected by a {@link InversibleLock}. Write-side of the lock is taken
65      * during connection transitions. Optimistic read-side of the lock is taken when new connections are introduced
66      * into the map.
67      *
68      * <p>
69      * The lock detects potential AB/BA deadlock scenarios and will force the reader side out by throwing
70      * a {@link InversibleLockException} -- which must be propagated up, releasing locks as it propagates. The initial
71      * entry point causing the the conflicting lookup must then call {@link InversibleLockException#awaitResolution()}
72      * before retrying the operation.
73      */
74     // TODO: it should be possible to move these two into ClientActorContext
75     private final Map<Long, AbstractClientConnection<T>> connections = new ConcurrentHashMap<>();
76     private final InversibleLock connectionsLock = new InversibleLock();
77     private final BackendInfoResolver<T> resolver;
78
79     protected ClientActorBehavior(@Nonnull final ClientActorContext context,
80             @Nonnull final BackendInfoResolver<T> resolver) {
81         super(context);
82         this.resolver = Preconditions.checkNotNull(resolver);
83     }
84
85     @Override
86     @Nonnull
87     public final ClientIdentifier getIdentifier() {
88         return context().getIdentifier();
89     }
90
91     /**
92      * Get a connection to a shard.
93      *
94      * @param shard Shard cookie
95      * @return Connection to a shard
96      * @throws InversibleLockException if the shard is being reconnected
97      */
98     public final AbstractClientConnection<T> getConnection(final Long shard) {
99         while (true) {
100             final long stamp = connectionsLock.optimisticRead();
101             final AbstractClientConnection<T> conn = connections.computeIfAbsent(shard, this::createConnection);
102             if (connectionsLock.validate(stamp)) {
103                 // No write-lock in-between, return success
104                 return conn;
105             }
106         }
107     }
108
109     private AbstractClientConnection<T> getConnection(final ResponseEnvelope<?> response) {
110         // Always called from actor context: no locking required
111         return connections.get(extractCookie(response.getMessage().getTarget()));
112     }
113
114     @SuppressWarnings("unchecked")
115     @Override
116     final ClientActorBehavior<T> onReceiveCommand(final Object command) {
117         if (command instanceof InternalCommand) {
118             return ((InternalCommand<T>) command).execute(this);
119         }
120         if (command instanceof SuccessEnvelope) {
121             return onRequestSuccess((SuccessEnvelope) command);
122         }
123         if (command instanceof FailureEnvelope) {
124             return internalOnRequestFailure((FailureEnvelope) command);
125         }
126
127         return onCommand(command);
128     }
129
130     private static long extractCookie(final WritableIdentifier id) {
131         if (id instanceof TransactionIdentifier) {
132             return ((TransactionIdentifier) id).getHistoryId().getCookie();
133         } else if (id instanceof LocalHistoryIdentifier) {
134             return ((LocalHistoryIdentifier) id).getCookie();
135         } else {
136             throw new IllegalArgumentException("Unhandled identifier " + id);
137         }
138     }
139
140     private void onResponse(final ResponseEnvelope<?> response) {
141         final AbstractClientConnection<T> connection = getConnection(response);
142         if (connection != null) {
143             connection.receiveResponse(response);
144         } else {
145             LOG.info("{}: Ignoring unknown response {}", persistenceId(), response);
146         }
147     }
148
149     private ClientActorBehavior<T> onRequestSuccess(final SuccessEnvelope success) {
150         onResponse(success);
151         return this;
152     }
153
154     private ClientActorBehavior<T> onRequestFailure(final FailureEnvelope failure) {
155         onResponse(failure);
156         return this;
157     }
158
159     private ClientActorBehavior<T> internalOnRequestFailure(final FailureEnvelope command) {
160         final RequestFailure<?, ?> failure = command.getMessage();
161         final RequestException cause = failure.getCause();
162         if (cause instanceof RetiredGenerationException) {
163             LOG.error("{}: current generation {} has been superseded", persistenceId(), getIdentifier(), cause);
164             haltClient(cause);
165             poison(cause);
166             return null;
167         }
168         if (cause instanceof NotLeaderException) {
169             final AbstractClientConnection<T> conn = getConnection(command);
170             if (conn instanceof ReconnectingClientConnection) {
171                 // Already reconnecting, do not churn the logs
172                 return this;
173             } else if (conn != null) {
174                 LOG.info("{}: connection {} indicated no leadership, reconnecting it", persistenceId(), conn, cause);
175                 return conn.reconnect(this);
176             }
177         }
178
179         return onRequestFailure(command);
180     }
181
182     private void poison(final RequestException cause) {
183         final long stamp = connectionsLock.writeLock();
184         try {
185             for (AbstractClientConnection<T> q : connections.values()) {
186                 q.poison(cause);
187             }
188
189             connections.clear();
190         } finally {
191             connectionsLock.unlockWrite(stamp);
192         }
193     }
194
195     /**
196      * Halt And Catch Fire. Halt processing on this client. Implementations need to ensure they initiate state flush
197      * procedures. No attempt to use this instance should be made after this method returns. Any such use may result
198      * in undefined behavior.
199      *
200      * @param cause Failure cause
201      */
202     protected abstract void haltClient(@Nonnull Throwable cause);
203
204     /**
205      * Override this method to handle any command which is not handled by the base behavior.
206      *
207      * @param command the command to process
208      * @return Next behavior to use, null if this actor should shut down.
209      */
210     @Nullable
211     protected abstract ClientActorBehavior<T> onCommand(@Nonnull Object command);
212
213     /**
214      * Override this method to provide a backend resolver instance.
215      *
216      * @return a backend resolver instance
217      */
218     protected final @Nonnull BackendInfoResolver<T> resolver() {
219         return resolver;
220     }
221
222     /**
223      * Callback invoked when a new connection has been established. Implementations are expected perform preparatory
224      * tasks before the previous connection is frozen.
225      *
226      * @param newConn New connection
227      * @return ConnectionConnectCohort which will be used to complete the process of bringing the connection up.
228      */
229     @GuardedBy("connectionsLock")
230     @Nonnull protected abstract ConnectionConnectCohort connectionUp(@Nonnull ConnectedClientConnection<T> newConn);
231
232     private void backendConnectFinished(final Long shard, final AbstractClientConnection<T> conn,
233             final T backend, final Throwable failure) {
234         if (failure != null) {
235             if (failure instanceof TimeoutException) {
236                 if (!conn.equals(connections.get(shard))) {
237                     // AbstractClientConnection will remove itself when it decides there is no point in continuing,
238                     // at which point we want to stop retrying
239                     LOG.info("{}: stopping resolution of shard {} on stale connection {}", persistenceId(), shard, conn,
240                         failure);
241                     return;
242                 }
243
244                 LOG.debug("{}: timed out resolving shard {}, scheduling retry in {}", persistenceId(), shard,
245                     RESOLVE_RETRY_DURATION, failure);
246                 context().executeInActor(b -> {
247                     resolveConnection(shard, conn);
248                     return b;
249                 }, RESOLVE_RETRY_DURATION);
250                 return;
251             }
252
253             LOG.error("{}: failed to resolve shard {}", persistenceId(), shard, failure);
254             final RequestException cause;
255             if (failure instanceof RequestException) {
256                 cause = (RequestException) failure;
257             } else {
258                 cause = new RuntimeRequestException("Failed to resolve shard " + shard, failure);
259             }
260
261             conn.poison(cause);
262             return;
263         }
264
265         LOG.info("{}: resolved shard {} to {}", persistenceId(), shard, backend);
266         final long stamp = connectionsLock.writeLock();
267         try {
268             // Create a new connected connection
269             final ConnectedClientConnection<T> newConn = new ConnectedClientConnection<>(conn.context(),
270                     conn.cookie(), backend);
271             LOG.info("{}: resolving connection {} to {}", persistenceId(), conn, newConn);
272
273             // Start reconnecting without the old connection lock held
274             final ConnectionConnectCohort cohort = Verify.verifyNotNull(connectionUp(newConn));
275
276             // Lock the old connection and get a reference to its entries
277             final Iterable<ConnectionEntry> replayIterable = conn.startReplay();
278
279             // Finish the connection attempt
280             final ReconnectForwarder forwarder = Verify.verifyNotNull(cohort.finishReconnect(replayIterable));
281
282             // Install the forwarder, unlocking the old connection
283             conn.finishReplay(forwarder);
284
285             // Make sure new lookups pick up the new connection
286             connections.replace(shard, conn, newConn);
287             LOG.info("{}: replaced connection {} with {}", persistenceId(), conn, newConn);
288         } finally {
289             connectionsLock.unlockWrite(stamp);
290         }
291     }
292
293     void removeConnection(final AbstractClientConnection<?> conn) {
294         connections.remove(conn.cookie(), conn);
295         LOG.debug("{}: removed connection {}", persistenceId(), conn);
296     }
297
298     @SuppressWarnings("unchecked")
299     void reconnectConnection(final ConnectedClientConnection<?> oldConn,
300             final ReconnectingClientConnection<?> newConn) {
301         final ReconnectingClientConnection<T> conn = (ReconnectingClientConnection<T>)newConn;
302         LOG.info("{}: connection {} reconnecting as {}", persistenceId(), oldConn, newConn);
303
304         final boolean replaced = connections.replace(oldConn.cookie(), (AbstractClientConnection<T>)oldConn, conn);
305         if (!replaced) {
306             final AbstractClientConnection<T> existing = connections.get(oldConn.cookie());
307             LOG.warn("{}: old connection {} does not match existing {}, new connection {} in limbo", persistenceId(),
308                 oldConn, existing, newConn);
309         }
310
311         final Long shard = oldConn.cookie();
312         LOG.info("{}: refreshing backend for shard {}", persistenceId(), shard);
313         resolver().refreshBackendInfo(shard, conn.getBackendInfo().get()).whenComplete(
314             (backend, failure) -> context().executeInActor(behavior -> {
315                 backendConnectFinished(shard, conn, backend, failure);
316                 return behavior;
317             }));
318     }
319
320     private ConnectingClientConnection<T> createConnection(final Long shard) {
321         final ConnectingClientConnection<T> conn = new ConnectingClientConnection<>(context(), shard);
322         resolveConnection(shard, conn);
323         return conn;
324     }
325
326     private void resolveConnection(final Long shard, final AbstractClientConnection<T> conn) {
327         LOG.debug("{}: resolving shard {} connection {}", persistenceId(), shard, conn);
328         resolver().getBackendInfo(shard).whenComplete((backend, failure) -> context().executeInActor(behavior -> {
329             backendConnectFinished(shard, conn, backend, failure);
330             return behavior;
331         }));
332     }
333 }