8073184508e58e680ea65cc389763705e02321d9
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / datastore / ShardCommitCoordinator.java
1 /*
2  * Copyright (c) 2014 Brocade Communications Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.datastore;
9
10 import static java.util.Objects.requireNonNull;
11
12 import akka.actor.ActorRef;
13 import akka.actor.Status.Failure;
14 import akka.serialization.Serialization;
15 import com.google.common.annotations.VisibleForTesting;
16 import com.google.common.primitives.UnsignedLong;
17 import com.google.common.util.concurrent.FutureCallback;
18 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
19 import java.util.ArrayDeque;
20 import java.util.ArrayList;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.LinkedList;
25 import java.util.Map;
26 import org.eclipse.jdt.annotation.NonNull;
27 import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
28 import org.opendaylight.controller.cluster.datastore.messages.AbortTransactionReply;
29 import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications;
30 import org.opendaylight.controller.cluster.datastore.messages.BatchedModificationsReply;
31 import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransaction;
32 import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransactionReply;
33 import org.opendaylight.controller.cluster.datastore.messages.CommitTransaction;
34 import org.opendaylight.controller.cluster.datastore.messages.CommitTransactionReply;
35 import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction;
36 import org.opendaylight.controller.cluster.datastore.messages.ReadyLocalTransaction;
37 import org.opendaylight.controller.cluster.datastore.messages.ReadyTransactionReply;
38 import org.opendaylight.controller.cluster.datastore.messages.VersionedExternalizableMessage;
39 import org.opendaylight.controller.cluster.datastore.utils.AbstractBatchedModificationsCursor;
40 import org.opendaylight.yangtools.concepts.Identifier;
41 import org.opendaylight.yangtools.yang.common.Empty;
42 import org.opendaylight.yangtools.yang.data.tree.api.DataTreeCandidate;
43 import org.slf4j.Logger;
44
45 /**
46  * Coordinates commits for a shard ensuring only one concurrent 3-phase commit.
47  *
48  * @author Thomas Pantelis
49  */
50 final class ShardCommitCoordinator {
51
52     // Interface hook for unit tests to replace or decorate the ShardDataTreeCohorts.
53     @VisibleForTesting
54     public interface CohortDecorator {
55         ShardDataTreeCohort decorate(Identifier transactionID, ShardDataTreeCohort actual);
56     }
57
58     private final Map<Identifier, CohortEntry> cohortCache = new HashMap<>();
59
60     private final ShardDataTree dataTree;
61
62     private final Logger log;
63
64     private final String name;
65
66     // This is a hook for unit tests to replace or decorate the ShardDataTreeCohorts.
67     @VisibleForTesting
68     private CohortDecorator cohortDecorator;
69
70     private ReadyTransactionReply readyTransactionReply;
71
72     ShardCommitCoordinator(final ShardDataTree dataTree, final Logger log, final String name) {
73         this.log = log;
74         this.name = name;
75         this.dataTree = requireNonNull(dataTree);
76     }
77
78     int getCohortCacheSize() {
79         return cohortCache.size();
80     }
81
82     private String persistenceId() {
83         return dataTree.logContext();
84     }
85
86     private ReadyTransactionReply readyTransactionReply(final ActorRef cohort) {
87         if (readyTransactionReply == null) {
88             readyTransactionReply = new ReadyTransactionReply(Serialization.serializedActorPath(cohort));
89         }
90
91         return readyTransactionReply;
92     }
93
94     /**
95      * This method is called to ready a transaction that was prepared by ShardTransaction actor. It caches
96      * the prepared cohort entry for the given transactions ID in preparation for the subsequent 3-phase commit.
97      *
98      * @param ready the ForwardedReadyTransaction message to process
99      * @param sender the sender of the message
100      * @param shard the transaction's shard actor
101      */
102     void handleForwardedReadyTransaction(final ForwardedReadyTransaction ready, final ActorRef sender,
103             final Shard shard) {
104         log.debug("{}: Readying transaction {}, client version {}", name,
105                 ready.getTransactionId(), ready.getTxnClientVersion());
106
107         final ShardDataTreeCohort cohort = ready.getTransaction().ready(ready.getParticipatingShardNames());
108         final CohortEntry cohortEntry = CohortEntry.createReady(cohort, ready.getTxnClientVersion());
109         cohortCache.put(cohortEntry.getTransactionId(), cohortEntry);
110
111         if (ready.isDoImmediateCommit()) {
112             cohortEntry.setDoImmediateCommit(true);
113             cohortEntry.setReplySender(sender);
114             cohortEntry.setShard(shard);
115             handleCanCommit(cohortEntry);
116         } else {
117             // The caller does not want immediate commit - the 3-phase commit will be coordinated by the
118             // front-end so send back a ReadyTransactionReply with our actor path.
119             sender.tell(readyTransactionReply(shard.self()), shard.self());
120         }
121     }
122
123     /**
124      * This method handles a BatchedModifications message for a transaction being prepared directly on the
125      * Shard actor instead of via a ShardTransaction actor. If there's no currently cached
126      * DOMStoreWriteTransaction, one is created. The batched modifications are applied to the write Tx. If
127      * the BatchedModifications is ready to commit then a DOMStoreThreePhaseCommitCohort is created.
128      *
129      * @param batched the BatchedModifications message to process
130      * @param sender the sender of the message
131      */
132     @SuppressFBWarnings(value = "THROWS_METHOD_THROWS_RUNTIMEEXCEPTION", justification = "Replay of captured failure")
133     void handleBatchedModifications(final BatchedModifications batched, final ActorRef sender, final Shard shard) {
134         CohortEntry cohortEntry = cohortCache.get(batched.getTransactionId());
135         if (cohortEntry == null || cohortEntry.isSealed()) {
136             cohortEntry = CohortEntry.createOpen(dataTree.newReadWriteTransaction(batched.getTransactionId()),
137                 batched.getVersion());
138             cohortCache.put(cohortEntry.getTransactionId(), cohortEntry);
139         }
140
141         if (log.isDebugEnabled()) {
142             log.debug("{}: Applying {} batched modifications for Tx {}", name,
143                     batched.getModifications().size(), batched.getTransactionId());
144         }
145
146         cohortEntry.applyModifications(batched.getModifications());
147
148         if (batched.isReady()) {
149             if (cohortEntry.getLastBatchedModificationsException() != null) {
150                 cohortCache.remove(cohortEntry.getTransactionId());
151                 throw cohortEntry.getLastBatchedModificationsException();
152             }
153
154             if (cohortEntry.getTotalBatchedModificationsReceived() != batched.getTotalMessagesSent()) {
155                 cohortCache.remove(cohortEntry.getTransactionId());
156                 throw new IllegalStateException(String.format(
157                         "The total number of batched messages received %d does not match the number sent %d",
158                         cohortEntry.getTotalBatchedModificationsReceived(), batched.getTotalMessagesSent()));
159             }
160
161             if (log.isDebugEnabled()) {
162                 log.debug("{}: Readying Tx {} of {} operations, client version {}", name,
163                         batched.getTransactionId(), cohortEntry.getTotalOperationsProcessed(), batched.getVersion());
164             }
165
166             cohortEntry.setDoImmediateCommit(batched.isDoCommitOnReady());
167             cohortEntry.ready(batched.getParticipatingShardNames(), cohortDecorator);
168
169             if (batched.isDoCommitOnReady()) {
170                 cohortEntry.setReplySender(sender);
171                 cohortEntry.setShard(shard);
172                 handleCanCommit(cohortEntry);
173             } else {
174                 sender.tell(readyTransactionReply(shard.self()), shard.self());
175             }
176         } else {
177             sender.tell(new BatchedModificationsReply(batched.getModifications().size()), shard.self());
178         }
179     }
180
181     /**
182      * This method handles {@link ReadyLocalTransaction} message. All transaction modifications have
183      * been prepared beforehand by the sender and we just need to drive them through into the
184      * dataTree.
185      *
186      * @param message the ReadyLocalTransaction message to process
187      * @param sender the sender of the message
188      * @param shard the transaction's shard actor
189      */
190     void handleReadyLocalTransaction(final ReadyLocalTransaction message, final ActorRef sender, final Shard shard) {
191         final TransactionIdentifier txId = message.getTransactionId();
192         final ShardDataTreeCohort cohort = dataTree.newReadyCohort(txId, message.getModification(),
193                 message.getParticipatingShardNames());
194         final CohortEntry cohortEntry = CohortEntry.createReady(cohort, DataStoreVersions.CURRENT_VERSION);
195         cohortCache.put(cohortEntry.getTransactionId(), cohortEntry);
196         cohortEntry.setDoImmediateCommit(message.isDoCommitOnReady());
197
198         log.debug("{}: Applying local modifications for Tx {}", name, txId);
199
200         if (message.isDoCommitOnReady()) {
201             cohortEntry.setReplySender(sender);
202             cohortEntry.setShard(shard);
203             handleCanCommit(cohortEntry);
204         } else {
205             sender.tell(readyTransactionReply(shard.self()), shard.self());
206         }
207     }
208
209     Collection<BatchedModifications> createForwardedBatchedModifications(final BatchedModifications from,
210             final int maxModificationsPerBatch) {
211         CohortEntry cohortEntry = cohortCache.remove(from.getTransactionId());
212         if (cohortEntry == null || cohortEntry.getTransaction() == null) {
213             return Collections.singletonList(from);
214         }
215
216         cohortEntry.applyModifications(from.getModifications());
217
218         final LinkedList<BatchedModifications> newModifications = new LinkedList<>();
219         cohortEntry.getTransaction().getSnapshot().applyToCursor(new AbstractBatchedModificationsCursor() {
220             @Override
221             protected BatchedModifications getModifications() {
222                 if (newModifications.isEmpty()
223                         || newModifications.getLast().getModifications().size() >= maxModificationsPerBatch) {
224                     newModifications.add(new BatchedModifications(from.getTransactionId(), from.getVersion()));
225                 }
226
227                 return newModifications.getLast();
228             }
229         });
230
231         BatchedModifications last = newModifications.getLast();
232         last.setDoCommitOnReady(from.isDoCommitOnReady());
233         if (from.isReady()) {
234             last.setReady(from.getParticipatingShardNames());
235         }
236         last.setTotalMessagesSent(newModifications.size());
237         return newModifications;
238     }
239
240     private void handleCanCommit(final CohortEntry cohortEntry) {
241         cohortEntry.canCommit(new FutureCallback<>() {
242             @Override
243             public void onSuccess(final Empty result) {
244                 log.debug("{}: canCommit for {}: success", name, cohortEntry.getTransactionId());
245
246                 if (cohortEntry.isDoImmediateCommit()) {
247                     doCommit(cohortEntry);
248                 } else {
249                     cohortEntry.getReplySender().tell(
250                         CanCommitTransactionReply.yes(cohortEntry.getClientVersion()).toSerializable(),
251                         cohortEntry.getShard().self());
252                 }
253             }
254
255             @Override
256             public void onFailure(final Throwable failure) {
257                 log.debug("{}: An exception occurred during canCommit for {}", name, cohortEntry.getTransactionId(),
258                     failure);
259
260                 cohortCache.remove(cohortEntry.getTransactionId());
261                 cohortEntry.getReplySender().tell(new Failure(failure), cohortEntry.getShard().self());
262             }
263         });
264     }
265
266     /**
267      * This method handles the canCommit phase for a transaction.
268      *
269      * @param transactionID the ID of the transaction to canCommit
270      * @param sender the actor to which to send the response
271      * @param shard the transaction's shard actor
272      */
273     void handleCanCommit(final Identifier transactionID, final ActorRef sender, final Shard shard) {
274         // Lookup the cohort entry that was cached previously (or should have been) by
275         // transactionReady (via the ForwardedReadyTransaction message).
276         final CohortEntry cohortEntry = cohortCache.get(transactionID);
277         if (cohortEntry == null) {
278             // Either canCommit was invoked before ready (shouldn't happen) or a long time passed
279             // between canCommit and ready and the entry was expired from the cache or it was aborted.
280             IllegalStateException ex = new IllegalStateException(
281                     String.format("%s: Cannot canCommit transaction %s - no cohort entry found", name, transactionID));
282             log.error("{}: Inconsistency during transaction {} canCommit", name, transactionID, ex);
283             sender.tell(new Failure(ex), shard.self());
284             return;
285         }
286
287         cohortEntry.setReplySender(sender);
288         cohortEntry.setShard(shard);
289
290         handleCanCommit(cohortEntry);
291     }
292
293     void doCommit(final CohortEntry cohortEntry) {
294         log.debug("{}: Committing transaction {}", name, cohortEntry.getTransactionId());
295
296         // We perform the preCommit phase here atomically with the commit phase. This is an
297         // optimization to eliminate the overhead of an extra preCommit message. We lose front-end
298         // coordination of preCommit across shards in case of failure but preCommit should not
299         // normally fail since we ensure only one concurrent 3-phase commit.
300         cohortEntry.preCommit(new FutureCallback<DataTreeCandidate>() {
301             @Override
302             public void onSuccess(final DataTreeCandidate candidate) {
303                 finishCommit(cohortEntry.getReplySender(), cohortEntry);
304             }
305
306             @Override
307             public void onFailure(final Throwable failure) {
308                 log.error("{} An exception occurred while preCommitting transaction {}", name,
309                         cohortEntry.getTransactionId(), failure);
310
311                 cohortCache.remove(cohortEntry.getTransactionId());
312                 cohortEntry.getReplySender().tell(new Failure(failure), cohortEntry.getShard().self());
313             }
314         });
315     }
316
317     void finishCommit(final @NonNull ActorRef sender, final @NonNull CohortEntry cohortEntry) {
318         log.debug("{}: Finishing commit for transaction {}", persistenceId(), cohortEntry.getTransactionId());
319
320         cohortEntry.commit(new FutureCallback<UnsignedLong>() {
321             @Override
322             public void onSuccess(final UnsignedLong result) {
323                 final TransactionIdentifier txId = cohortEntry.getTransactionId();
324                 log.debug("{}: Transaction {} committed as {}, sending response to {}", persistenceId(), txId, result,
325                     sender);
326
327                 cohortCache.remove(cohortEntry.getTransactionId());
328                 sender.tell(CommitTransactionReply.instance(cohortEntry.getClientVersion()).toSerializable(),
329                     cohortEntry.getShard().self());
330             }
331
332             @Override
333             public void onFailure(final Throwable failure) {
334                 final TransactionIdentifier txId = cohortEntry.getTransactionId();
335                 log.error("{}, An exception occurred while committing transaction {}", persistenceId(), txId, failure);
336
337                 cohortCache.remove(cohortEntry.getTransactionId());
338                 sender.tell(new Failure(failure), cohortEntry.getShard().self());
339             }
340         });
341     }
342
343     /**
344      * This method handles the preCommit and commit phases for a transaction.
345      *
346      * @param transactionID the ID of the transaction to commit
347      * @param sender the actor to which to send the response
348      * @param shard the transaction's shard actor
349      */
350     void handleCommit(final Identifier transactionID, final ActorRef sender, final Shard shard) {
351         final CohortEntry cohortEntry = cohortCache.get(transactionID);
352         if (cohortEntry == null) {
353             // Either a long time passed between canCommit and commit and the entry was expired from the cache
354             // or it was aborted.
355             IllegalStateException ex = new IllegalStateException(
356                     String.format("%s: Cannot commit transaction %s - no cohort entry found", name, transactionID));
357             log.error("{}: Inconsistency during transaction {} commit", name, transactionID, ex);
358             sender.tell(new Failure(ex), shard.self());
359             return;
360         }
361
362         cohortEntry.setReplySender(sender);
363         doCommit(cohortEntry);
364     }
365
366     @SuppressWarnings("checkstyle:IllegalCatch")
367     void handleAbort(final Identifier transactionID, final ActorRef sender, final Shard shard) {
368         CohortEntry cohortEntry = cohortCache.remove(transactionID);
369         if (cohortEntry == null) {
370             return;
371         }
372
373         log.debug("{}: Aborting transaction {}", name, transactionID);
374
375         final ActorRef self = shard.getSelf();
376         cohortEntry.abort(new FutureCallback<>() {
377             @Override
378             public void onSuccess(final Empty result) {
379                 if (sender != null) {
380                     sender.tell(AbortTransactionReply.instance(cohortEntry.getClientVersion()).toSerializable(), self);
381                 }
382             }
383
384             @Override
385             public void onFailure(final Throwable failure) {
386                 log.error("{}: An exception happened during abort", name, failure);
387
388                 if (sender != null) {
389                     sender.tell(new Failure(failure), self);
390                 }
391             }
392         });
393
394         shard.getShardMBean().incrementAbortTransactionsCount();
395     }
396
397     void checkForExpiredTransactions(final long timeout, final Shard shard) {
398         cohortCache.values().removeIf(CohortEntry::isFailed);
399     }
400
401     void abortPendingTransactions(final String reason, final Shard shard) {
402         final var failure = new Failure(new RuntimeException(reason));
403         final var pending = dataTree.getAndClearPendingTransactions();
404
405         log.debug("{}: Aborting {} pending queued transactions", name, pending.size());
406
407         for (var cohort : pending) {
408             final var cohortEntry = cohortCache.remove(cohort.transactionId());
409             if (cohortEntry != null) {
410                 final var replySender = cohortEntry.getReplySender();
411                 if (replySender != null) {
412                     replySender.tell(failure, shard.self());
413                 }
414             }
415         }
416
417         cohortCache.clear();
418     }
419
420     Collection<?> convertPendingTransactionsToMessages(final int maxModificationsPerBatch) {
421         final var messages = new ArrayList<VersionedExternalizableMessage>();
422         for (var cohort : dataTree.getAndClearPendingTransactions()) {
423             final var cohortEntry = cohortCache.remove(cohort.transactionId());
424             if (cohortEntry == null) {
425                 continue;
426             }
427
428             final var newMessages = new ArrayDeque<BatchedModifications>();
429             cohortEntry.getDataTreeModification().applyToCursor(new AbstractBatchedModificationsCursor() {
430                 @Override
431                 protected BatchedModifications getModifications() {
432                     final var lastBatch = newMessages.peekLast();
433                     if (lastBatch != null && lastBatch.getModifications().size() >= maxModificationsPerBatch) {
434                         return lastBatch;
435                     }
436
437                     // Allocate a new message
438                     final var ret = new BatchedModifications(cohortEntry.getTransactionId(),
439                         cohortEntry.getClientVersion());
440                     newMessages.add(ret);
441                     return ret;
442                 }
443             });
444
445             final var last = newMessages.peekLast();
446             if (last != null) {
447                 final boolean immediate = cohortEntry.isDoImmediateCommit();
448                 last.setDoCommitOnReady(immediate);
449                 last.setReady(cohortEntry.getParticipatingShardNames());
450                 last.setTotalMessagesSent(newMessages.size());
451
452                 messages.addAll(newMessages);
453
454                 if (!immediate) {
455                     switch (cohort.getState()) {
456                         case CAN_COMMIT_COMPLETE:
457                         case CAN_COMMIT_PENDING:
458                             messages.add(new CanCommitTransaction(cohortEntry.getTransactionId(),
459                                 cohortEntry.getClientVersion()));
460                             break;
461                         case PRE_COMMIT_COMPLETE:
462                         case PRE_COMMIT_PENDING:
463                             messages.add(new CommitTransaction(cohortEntry.getTransactionId(),
464                                 cohortEntry.getClientVersion()));
465                             break;
466                         default:
467                             break;
468                     }
469                 }
470             }
471         }
472
473         return messages;
474     }
475
476     @VisibleForTesting
477     void setCohortDecorator(final CohortDecorator cohortDecorator) {
478         this.cohortDecorator = cohortDecorator;
479     }
480 }