BUG-5280: fix InversibleLock race
[controller.git] / opendaylight / md-sal / cds-access-client / src / main / java / org / opendaylight / controller / cluster / access / client / ClientActorBehavior.java
1 /*
2  * Copyright (c) 2016 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.access.client;
9
10 import com.google.common.annotations.Beta;
11 import com.google.common.base.Preconditions;
12 import com.google.common.base.Verify;
13 import java.util.Map;
14 import java.util.concurrent.ConcurrentHashMap;
15 import javax.annotation.Nonnull;
16 import javax.annotation.Nullable;
17 import javax.annotation.concurrent.GuardedBy;
18 import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier;
19 import org.opendaylight.controller.cluster.access.concepts.FailureEnvelope;
20 import org.opendaylight.controller.cluster.access.concepts.LocalHistoryIdentifier;
21 import org.opendaylight.controller.cluster.access.concepts.RequestException;
22 import org.opendaylight.controller.cluster.access.concepts.RequestFailure;
23 import org.opendaylight.controller.cluster.access.concepts.ResponseEnvelope;
24 import org.opendaylight.controller.cluster.access.concepts.RetiredGenerationException;
25 import org.opendaylight.controller.cluster.access.concepts.RuntimeRequestException;
26 import org.opendaylight.controller.cluster.access.concepts.SuccessEnvelope;
27 import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
28 import org.opendaylight.yangtools.concepts.Identifiable;
29 import org.opendaylight.yangtools.concepts.WritableIdentifier;
30 import org.slf4j.Logger;
31 import org.slf4j.LoggerFactory;
32
33 /**
34  * A behavior, which handles messages sent to a {@link AbstractClientActor}.
35  *
36  * @author Robert Varga
37  */
38 @Beta
39 public abstract class ClientActorBehavior<T extends BackendInfo> extends
40         RecoveredClientActorBehavior<ClientActorContext> implements Identifiable<ClientIdentifier> {
41     /**
42      * Connection reconnect cohort, driven by this class.
43      */
44     @FunctionalInterface
45     protected interface ConnectionConnectCohort {
46         /**
47          * Finish the connection by replaying previous messages onto the new connection.
48          *
49          * @param enqueuedEntries Previously-enqueued entries
50          * @return A {@link ReconnectForwarder} to handle any straggler messages which arrive after this method returns.
51          */
52         @Nonnull ReconnectForwarder finishReconnect(@Nonnull Iterable<ConnectionEntry> enqueuedEntries);
53     }
54
55     private static final Logger LOG = LoggerFactory.getLogger(ClientActorBehavior.class);
56
57     /**
58      * Map of connections to the backend. This map is concurrent to allow lookups, but given complex operations
59      * involved in connection transitions it is protected by a {@link InversibleLock}. Write-side of the lock is taken
60      * during connection transitions. Optimistic read-side of the lock is taken when new connections are introduced
61      * into the map.
62      *
63      * <p>
64      * The lock detects potential AB/BA deadlock scenarios and will force the reader side out by throwing
65      * a {@link InversibleLockException} -- which must be propagated up, releasing locks as it propagates. The initial
66      * entry point causing the the conflicting lookup must then call {@link InversibleLockException#awaitResolution()}
67      * before retrying the operation.
68      */
69     // TODO: it should be possible to move these two into ClientActorContext
70     private final Map<Long, AbstractClientConnection<T>> connections = new ConcurrentHashMap<>();
71     private final InversibleLock connectionsLock = new InversibleLock();
72     private final BackendInfoResolver<T> resolver;
73
74     protected ClientActorBehavior(@Nonnull final ClientActorContext context,
75             @Nonnull final BackendInfoResolver<T> resolver) {
76         super(context);
77         this.resolver = Preconditions.checkNotNull(resolver);
78     }
79
80     @Override
81     @Nonnull
82     public final ClientIdentifier getIdentifier() {
83         return context().getIdentifier();
84     }
85
86     /**
87      * Get a connection to a shard.
88      *
89      * @param shard Shard cookie
90      * @return Connection to a shard
91      * @throws InversibleLockException if the shard is being reconnected
92      */
93     public final AbstractClientConnection<T> getConnection(final Long shard) {
94         while (true) {
95             final long stamp = connectionsLock.optimisticRead();
96             final AbstractClientConnection<T> conn = connections.computeIfAbsent(shard, this::createConnection);
97             if (connectionsLock.validate(stamp)) {
98                 // No write-lock in-between, return success
99                 return conn;
100             }
101         }
102     }
103
104     @SuppressWarnings("unchecked")
105     @Override
106     final ClientActorBehavior<T> onReceiveCommand(final Object command) {
107         if (command instanceof InternalCommand) {
108             return ((InternalCommand<T>) command).execute(this);
109         }
110         if (command instanceof SuccessEnvelope) {
111             return onRequestSuccess((SuccessEnvelope) command);
112         }
113         if (command instanceof FailureEnvelope) {
114             return internalOnRequestFailure((FailureEnvelope) command);
115         }
116
117         return onCommand(command);
118     }
119
120     private static long extractCookie(final WritableIdentifier id) {
121         if (id instanceof TransactionIdentifier) {
122             return ((TransactionIdentifier) id).getHistoryId().getCookie();
123         } else if (id instanceof LocalHistoryIdentifier) {
124             return ((LocalHistoryIdentifier) id).getCookie();
125         } else {
126             throw new IllegalArgumentException("Unhandled identifier " + id);
127         }
128     }
129
130     private void onResponse(final ResponseEnvelope<?> response) {
131         final long cookie = extractCookie(response.getMessage().getTarget());
132         final AbstractClientConnection<T> connection = connections.get(cookie);
133         if (connection != null) {
134             connection.receiveResponse(response);
135         } else {
136             LOG.info("{}: Ignoring unknown response {}", persistenceId(), response);
137         }
138     }
139
140     private ClientActorBehavior<T> onRequestSuccess(final SuccessEnvelope success) {
141         onResponse(success);
142         return this;
143     }
144
145     private ClientActorBehavior<T> onRequestFailure(final FailureEnvelope failure) {
146         onResponse(failure);
147         return this;
148     }
149
150     private ClientActorBehavior<T> internalOnRequestFailure(final FailureEnvelope command) {
151         final RequestFailure<?, ?> failure = command.getMessage();
152         final RequestException cause = failure.getCause();
153         if (cause instanceof RetiredGenerationException) {
154             LOG.error("{}: current generation {} has been superseded", persistenceId(), getIdentifier(), cause);
155             haltClient(cause);
156             poison(cause);
157             return null;
158         }
159
160         return onRequestFailure(command);
161     }
162
163     private void poison(final RequestException cause) {
164         final long stamp = connectionsLock.writeLock();
165         try {
166             for (AbstractClientConnection<T> q : connections.values()) {
167                 q.poison(cause);
168             }
169
170             connections.clear();
171         } finally {
172             connectionsLock.unlockWrite(stamp);
173         }
174     }
175
176     /**
177      * Halt And Catch Fire. Halt processing on this client. Implementations need to ensure they initiate state flush
178      * procedures. No attempt to use this instance should be made after this method returns. Any such use may result
179      * in undefined behavior.
180      *
181      * @param cause Failure cause
182      */
183     protected abstract void haltClient(@Nonnull Throwable cause);
184
185     /**
186      * Override this method to handle any command which is not handled by the base behavior.
187      *
188      * @param command the command to process
189      * @return Next behavior to use, null if this actor should shut down.
190      */
191     @Nullable
192     protected abstract ClientActorBehavior<T> onCommand(@Nonnull Object command);
193
194     /**
195      * Override this method to provide a backend resolver instance.
196      *
197      * @return a backend resolver instance
198      */
199     protected final @Nonnull BackendInfoResolver<T> resolver() {
200         return resolver;
201     }
202
203     /**
204      * Callback invoked when a new connection has been established. Implementations are expected perform preparatory
205      * tasks before the previous connection is frozen.
206      *
207      * @param newConn New connection
208      * @return ConnectionConnectCohort which will be used to complete the process of bringing the connection up.
209      */
210     @GuardedBy("connectionsLock")
211     @Nonnull protected abstract ConnectionConnectCohort connectionUp(@Nonnull ConnectedClientConnection<T> newConn);
212
213     private void backendConnectFinished(final Long shard, final AbstractClientConnection<T> conn,
214             final T backend, final Throwable failure) {
215         if (failure != null) {
216             LOG.error("{}: failed to resolve shard {}", persistenceId(), shard, failure);
217             conn.poison(new RuntimeRequestException("Failed to resolve shard " + shard, failure));
218             return;
219         }
220
221         LOG.debug("{}: resolved shard {} to {}", persistenceId(), shard, backend);
222         final long stamp = connectionsLock.writeLock();
223         try {
224             // Create a new connected connection
225             final ConnectedClientConnection<T> newConn = new ConnectedClientConnection<>(conn.context(),
226                     conn.cookie(), backend);
227             LOG.debug("{}: resolving connection {} to {}", persistenceId(), conn, newConn);
228
229             // Start reconnecting without the old connection lock held
230             final ConnectionConnectCohort cohort = Verify.verifyNotNull(connectionUp(newConn));
231
232             // Lock the old connection and get a reference to its entries
233             final Iterable<ConnectionEntry> replayIterable = conn.startReplay();
234
235             // Finish the connection attempt
236             final ReconnectForwarder forwarder = Verify.verifyNotNull(cohort.finishReconnect(replayIterable));
237
238             // Install the forwarder, unlocking the old connection
239             conn.finishReplay(forwarder);
240
241             // Make sure new lookups pick up the new connection
242             connections.replace(shard, conn, newConn);
243             LOG.debug("{}: replaced connection {} with {}", persistenceId(), conn, newConn);
244         } finally {
245             connectionsLock.unlockWrite(stamp);
246         }
247     }
248
249     void removeConnection(final AbstractClientConnection<?> conn) {
250         connections.remove(conn.cookie(), conn);
251         LOG.debug("{}: removed connection {}", persistenceId(), conn);
252     }
253
254     @SuppressWarnings("unchecked")
255     void reconnectConnection(final ConnectedClientConnection<?> oldConn,
256             final ReconnectingClientConnection<?> newConn) {
257         final ReconnectingClientConnection<T> conn = (ReconnectingClientConnection<T>)newConn;
258         connections.replace(oldConn.cookie(), (AbstractClientConnection<T>)oldConn, conn);
259         LOG.debug("{}: connection {} reconnecting as {}", persistenceId(), oldConn, newConn);
260
261         final Long shard = oldConn.cookie();
262         resolver().refreshBackendInfo(shard, conn.getBackendInfo().get()).whenComplete(
263             (backend, failure) -> context().executeInActor(behavior -> {
264                 backendConnectFinished(shard, conn, backend, failure);
265                 return behavior;
266             }));
267     }
268
269     private ConnectingClientConnection<T> createConnection(final Long shard) {
270         final ConnectingClientConnection<T> conn = new ConnectingClientConnection<>(context(), shard);
271
272         resolver().getBackendInfo(shard).whenComplete((backend, failure) -> context().executeInActor(behavior -> {
273             backendConnectFinished(shard, conn, backend, failure);
274             return behavior;
275         }));
276
277         return conn;
278     }
279 }