Fix shard deadlock in 3 nodes
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / datastore / RemoteTransactionContext.java
1 /*
2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
3  * Copyright (c) 2015 Brocade Communications Systems, Inc. and others.  All rights reserved.
4  *
5  * This program and the accompanying materials are made available under the
6  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
7  * and is available at http://www.eclipse.org/legal/epl-v10.html
8  */
9 package org.opendaylight.controller.cluster.datastore;
10
11 import akka.actor.ActorSelection;
12 import akka.dispatch.Futures;
13 import akka.dispatch.OnComplete;
14 import com.google.common.base.Preconditions;
15 import com.google.common.util.concurrent.SettableFuture;
16 import java.util.Optional;
17 import java.util.SortedSet;
18 import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
19 import org.opendaylight.controller.cluster.datastore.messages.AbstractRead;
20 import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications;
21 import org.opendaylight.controller.cluster.datastore.messages.CloseTransaction;
22 import org.opendaylight.controller.cluster.datastore.modification.AbstractModification;
23 import org.opendaylight.controller.cluster.datastore.modification.Modification;
24 import org.opendaylight.controller.cluster.datastore.utils.ActorContext;
25 import org.opendaylight.controller.md.sal.common.api.data.ReadFailedException;
26 import org.slf4j.Logger;
27 import org.slf4j.LoggerFactory;
28 import scala.concurrent.Future;
29
30 /**
31  * Redirects front-end transaction operations to a shard for processing. Instances of this class are used
32  * when the destination shard is remote to the caller.
33  *
34  * @author Thomas Pantelis
35  */
36 public class RemoteTransactionContext extends AbstractTransactionContext {
37     private static final Logger LOG = LoggerFactory.getLogger(RemoteTransactionContext.class);
38
39     private final ActorContext actorContext;
40     private final ActorSelection actor;
41     private final OperationLimiter limiter;
42
43     private BatchedModifications batchedModifications;
44     private int totalBatchedModificationsSent;
45     private int batchPermits;
46
47     /**
48      * We have observed a failed modification batch. This transaction context is effectively doomed, as the backend
49      * does not have a correct view of the world. If this happens, we do not limit operations but rather short-cut them
50      * to a either a no-op (modifications) or a failure (reads). Once the transaction is ready, though, we send the
51      * message to resynchronize with the backend, sharing a 'lost message' failure path.
52      */
53     private volatile Throwable failedModification;
54
55     protected RemoteTransactionContext(final TransactionIdentifier identifier, final ActorSelection actor,
56             final ActorContext actorContext, final short remoteTransactionVersion, final OperationLimiter limiter) {
57         super(identifier, remoteTransactionVersion);
58         this.limiter = Preconditions.checkNotNull(limiter);
59         this.actor = actor;
60         this.actorContext = actorContext;
61     }
62
63     private ActorSelection getActor() {
64         return actor;
65     }
66
67     protected ActorContext getActorContext() {
68         return actorContext;
69     }
70
71     @Override
72     public void closeTransaction() {
73         LOG.debug("Tx {} closeTransaction called", getIdentifier());
74         TransactionContextCleanup.untrack(this);
75
76         actorContext.sendOperationAsync(getActor(), new CloseTransaction(getTransactionVersion()).toSerializable());
77     }
78
79     @Override
80     public Future<Object> directCommit(final Boolean havePermit) {
81         LOG.debug("Tx {} directCommit called", getIdentifier());
82
83         // Send the remaining batched modifications, if any, with the ready flag set.
84         bumpPermits(havePermit);
85         return sendBatchedModifications(true, true, Optional.empty());
86     }
87
88     @Override
89     public Future<ActorSelection> readyTransaction(final Boolean havePermit,
90             final Optional<SortedSet<String>> participatingShardNames) {
91         logModificationCount();
92
93         LOG.debug("Tx {} readyTransaction called", getIdentifier());
94
95         // Send the remaining batched modifications, if any, with the ready flag set.
96
97         bumpPermits(havePermit);
98         Future<Object> lastModificationsFuture = sendBatchedModifications(true, false, participatingShardNames);
99
100         return transformReadyReply(lastModificationsFuture);
101     }
102
103     private void bumpPermits(final Boolean havePermit) {
104         if (Boolean.TRUE.equals(havePermit)) {
105             ++batchPermits;
106         }
107     }
108
109     protected Future<ActorSelection> transformReadyReply(final Future<Object> readyReplyFuture) {
110         // Transform the last reply Future into a Future that returns the cohort actor path from
111         // the last reply message. That's the end result of the ready operation.
112
113         return TransactionReadyReplyMapper.transform(readyReplyFuture, actorContext, getIdentifier());
114     }
115
116     private BatchedModifications newBatchedModifications() {
117         return new BatchedModifications(getIdentifier(), getTransactionVersion());
118     }
119
120     private void batchModification(final Modification modification, final boolean havePermit) {
121         incrementModificationCount();
122         if (havePermit) {
123             ++batchPermits;
124         }
125
126         if (batchedModifications == null) {
127             batchedModifications = newBatchedModifications();
128         }
129
130         batchedModifications.addModification(modification);
131
132         if (batchedModifications.getModifications().size()
133                 >= actorContext.getDatastoreContext().getShardBatchedModificationCount()) {
134             sendBatchedModifications();
135         }
136     }
137
138     protected Future<Object> sendBatchedModifications() {
139         return sendBatchedModifications(false, false, Optional.empty());
140     }
141
142     protected Future<Object> sendBatchedModifications(final boolean ready, final boolean doCommitOnReady,
143             final Optional<SortedSet<String>> participatingShardNames) {
144         Future<Object> sent = null;
145         if (ready || batchedModifications != null && !batchedModifications.getModifications().isEmpty()) {
146             if (batchedModifications == null) {
147                 batchedModifications = newBatchedModifications();
148             }
149
150             LOG.debug("Tx {} sending {} batched modifications, ready: {}", getIdentifier(),
151                     batchedModifications.getModifications().size(), ready);
152
153             batchedModifications.setDoCommitOnReady(doCommitOnReady);
154             batchedModifications.setTotalMessagesSent(++totalBatchedModificationsSent);
155
156             final BatchedModifications toSend = batchedModifications;
157             final int permitsToRelease = batchPermits;
158             batchPermits = 0;
159
160             if (ready) {
161                 batchedModifications.setReady(participatingShardNames);
162                 batchedModifications.setDoCommitOnReady(doCommitOnReady);
163                 batchedModifications = null;
164             } else {
165                 batchedModifications = newBatchedModifications();
166
167                 final Throwable failure = failedModification;
168                 if (failure != null) {
169                     // We have observed a modification failure, it does not make sense to send this batch. This speeds
170                     // up the time when the application could be blocked due to messages timing out and operation
171                     // limiter kicking in.
172                     LOG.debug("Tx {} modifications previously failed, not sending a non-ready batch", getIdentifier());
173                     limiter.release(permitsToRelease);
174                     return Futures.failed(failure);
175                 }
176             }
177
178             sent = actorContext.executeOperationAsync(getActor(), toSend.toSerializable(),
179                 actorContext.getTransactionCommitOperationTimeout());
180             sent.onComplete(new OnComplete<Object>() {
181                 @Override
182                 public void onComplete(final Throwable failure, final Object success) {
183                     if (failure != null) {
184                         LOG.debug("Tx {} modifications failed", getIdentifier(), failure);
185                         failedModification = failure;
186                     } else {
187                         LOG.debug("Tx {} modifications completed with {}", getIdentifier(), success);
188                     }
189                     limiter.release(permitsToRelease);
190                 }
191             }, actorContext.getClientDispatcher());
192         }
193
194         return sent;
195     }
196
197     @Override
198     public void executeModification(final AbstractModification modification, final Boolean havePermit) {
199         LOG.debug("Tx {} executeModification {} called path = {}", getIdentifier(),
200                 modification.getClass().getSimpleName(), modification.getPath());
201
202         final boolean permitToRelease;
203         if (havePermit == null) {
204             permitToRelease = failedModification == null && acquireOperation();
205         } else {
206             permitToRelease = havePermit.booleanValue();
207         }
208
209         batchModification(modification, permitToRelease);
210     }
211
212     @Override
213     public <T> void executeRead(final AbstractRead<T> readCmd, final SettableFuture<T> returnFuture,
214             final Boolean havePermit) {
215         LOG.debug("Tx {} executeRead {} called path = {}", getIdentifier(), readCmd.getClass().getSimpleName(),
216                 readCmd.getPath());
217
218         final Throwable failure = failedModification;
219         if (failure != null) {
220             // If we know there was a previous modification failure, we must not send a read request, as it risks
221             // returning incorrect data. We check this before acquiring an operation simply because we want the app
222             // to complete this transaction as soon as possible.
223             returnFuture.setException(new ReadFailedException("Previous modification failed, cannot "
224                     + readCmd.getClass().getSimpleName() + " for path " + readCmd.getPath(), failure));
225             return;
226         }
227
228         // Send any batched modifications. This is necessary to honor the read uncommitted semantics of the
229         // public API contract.
230
231         final boolean permitToRelease = havePermit == null ? acquireOperation() : havePermit.booleanValue();
232         sendBatchedModifications();
233
234         OnComplete<Object> onComplete = new OnComplete<Object>() {
235             @Override
236             public void onComplete(final Throwable failure, final Object response) {
237                 // We have previously acquired an operation, now release it, no matter what happened
238                 if (permitToRelease) {
239                     limiter.release();
240                 }
241
242                 if (failure != null) {
243                     LOG.debug("Tx {} {} operation failed: {}", getIdentifier(), readCmd.getClass().getSimpleName(),
244                             failure);
245
246                     returnFuture.setException(new ReadFailedException("Error checking "
247                         + readCmd.getClass().getSimpleName() + " for path " + readCmd.getPath(), failure));
248                 } else {
249                     LOG.debug("Tx {} {} operation succeeded", getIdentifier(), readCmd.getClass().getSimpleName());
250                     readCmd.processResponse(response, returnFuture);
251                 }
252             }
253         };
254
255         final Future<Object> future = actorContext.executeOperationAsync(getActor(),
256             readCmd.asVersion(getTransactionVersion()).toSerializable(), actorContext.getOperationTimeout());
257         future.onComplete(onComplete, actorContext.getClientDispatcher());
258     }
259
260     /**
261      * Acquire operation from the limiter if the hand-off has completed. If the hand-off is still ongoing, this method
262      * does nothing.
263      *
264      * @return True if a permit was successfully acquired, false otherwise
265      */
266     private boolean acquireOperation() {
267         Preconditions.checkState(isOperationHandOffComplete(),
268             "Attempted to acquire execute operation permit for transaction %s on actor %s during handoff",
269             getIdentifier(), actor);
270
271         if (limiter.acquire()) {
272             return true;
273         }
274
275         LOG.warn("Failed to acquire execute operation permit for transaction {} on actor {}", getIdentifier(), actor);
276         return false;
277     }
278
279     @Override
280     public boolean usesOperationLimiting() {
281         return true;
282     }
283 }