2 * Copyright (c) 2014 Brocade Communications Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.controller.cluster.datastore;
10 import akka.actor.ActorRef;
11 import akka.actor.Status.Failure;
12 import akka.serialization.Serialization;
13 import com.google.common.annotations.VisibleForTesting;
14 import com.google.common.base.Preconditions;
15 import com.google.common.base.Stopwatch;
16 import java.util.ArrayList;
17 import java.util.HashMap;
18 import java.util.Iterator;
19 import java.util.LinkedList;
20 import java.util.List;
22 import java.util.Queue;
23 import java.util.concurrent.ExecutionException;
24 import java.util.concurrent.TimeUnit;
25 import org.opendaylight.controller.cluster.datastore.compat.BackwardsCompatibleThreePhaseCommitCohort;
26 import org.opendaylight.controller.cluster.datastore.messages.AbortTransactionReply;
27 import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications;
28 import org.opendaylight.controller.cluster.datastore.messages.BatchedModificationsReply;
29 import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransactionReply;
30 import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction;
31 import org.opendaylight.controller.cluster.datastore.messages.ReadyLocalTransaction;
32 import org.opendaylight.controller.cluster.datastore.messages.ReadyTransactionReply;
33 import org.opendaylight.controller.cluster.datastore.modification.Modification;
34 import org.opendaylight.controller.md.sal.common.api.data.TransactionCommitFailedException;
35 import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTreeCandidate;
36 import org.slf4j.Logger;
39 * Coordinates commits for a shard ensuring only one concurrent 3-phase commit.
41 * @author Thomas Pantelis
43 class ShardCommitCoordinator {
45 // Interface hook for unit tests to replace or decorate the DOMStoreThreePhaseCommitCohorts.
46 public interface CohortDecorator {
47 ShardDataTreeCohort decorate(String transactionID, ShardDataTreeCohort actual);
50 private final Map<String, CohortEntry> cohortCache = new HashMap<>();
52 private CohortEntry currentCohortEntry;
54 private final ShardDataTree dataTree;
56 // We use a LinkedList here to avoid synchronization overhead with concurrent queue impls
57 // since this should only be accessed on the shard's dispatcher.
58 private final Queue<CohortEntry> queuedCohortEntries = new LinkedList<>();
60 private int queueCapacity;
62 private final Logger log;
64 private final String name;
66 private final long cacheExpiryTimeoutInMillis;
68 // This is a hook for unit tests to replace or decorate the DOMStoreThreePhaseCommitCohorts.
69 private CohortDecorator cohortDecorator;
71 private ReadyTransactionReply readyTransactionReply;
73 private Runnable runOnPendingTransactionsComplete;
75 ShardCommitCoordinator(ShardDataTree dataTree,
76 long cacheExpiryTimeoutInMillis, int queueCapacity, Logger log, String name) {
78 this.queueCapacity = queueCapacity;
81 this.dataTree = Preconditions.checkNotNull(dataTree);
82 this.cacheExpiryTimeoutInMillis = cacheExpiryTimeoutInMillis;
86 return queuedCohortEntries.size();
89 int getCohortCacheSize() {
90 return cohortCache.size();
93 void setQueueCapacity(int queueCapacity) {
94 this.queueCapacity = queueCapacity;
97 private ReadyTransactionReply readyTransactionReply(Shard shard) {
98 if(readyTransactionReply == null) {
99 readyTransactionReply = new ReadyTransactionReply(Serialization.serializedActorPath(shard.self()));
102 return readyTransactionReply;
105 private boolean queueCohortEntry(CohortEntry cohortEntry, ActorRef sender, Shard shard) {
106 if(queuedCohortEntries.size() < queueCapacity) {
107 queuedCohortEntries.offer(cohortEntry);
109 log.debug("{}: Enqueued transaction {}, queue size {}", name, cohortEntry.getTransactionID(),
110 queuedCohortEntries.size());
114 cohortCache.remove(cohortEntry.getTransactionID());
116 RuntimeException ex = new RuntimeException(
117 String.format("%s: Could not enqueue transaction %s - the maximum commit queue"+
118 " capacity %d has been reached.",
119 name, cohortEntry.getTransactionID(), queueCapacity));
120 log.error(ex.getMessage());
121 sender.tell(new Failure(ex), shard.self());
127 * This method is called to ready a transaction that was prepared by ShardTransaction actor. It caches
128 * the prepared cohort entry for the given transactions ID in preparation for the subsequent 3-phase commit.
130 * @param ready the ForwardedReadyTransaction message to process
131 * @param sender the sender of the message
132 * @param shard the transaction's shard actor
134 void handleForwardedReadyTransaction(ForwardedReadyTransaction ready, ActorRef sender, Shard shard) {
135 log.debug("{}: Readying transaction {}, client version {}", name,
136 ready.getTransactionID(), ready.getTxnClientVersion());
138 ShardDataTreeCohort cohort = ready.getTransaction().ready();
139 CohortEntry cohortEntry = new CohortEntry(ready.getTransactionID(), cohort);
140 cohortCache.put(ready.getTransactionID(), cohortEntry);
142 if(!queueCohortEntry(cohortEntry, sender, shard)) {
146 if(ready.getTxnClientVersion() < DataStoreVersions.LITHIUM_VERSION) {
147 // Return our actor path as we'll handle the three phase commit except if the Tx client
148 // version < Helium-1 version which means the Tx was initiated by a base Helium version node.
149 // In that case, the subsequent 3-phase commit messages won't contain the transactionId so to
150 // maintain backwards compatibility, we create a separate cohort actor to provide the compatible behavior.
151 ActorRef replyActorPath = shard.self();
152 if(ready.getTxnClientVersion() < DataStoreVersions.HELIUM_1_VERSION) {
153 log.debug("{}: Creating BackwardsCompatibleThreePhaseCommitCohort", name);
154 replyActorPath = shard.getContext().actorOf(BackwardsCompatibleThreePhaseCommitCohort.props(
155 ready.getTransactionID()));
158 ReadyTransactionReply readyTransactionReply =
159 new ReadyTransactionReply(Serialization.serializedActorPath(replyActorPath),
160 ready.getTxnClientVersion());
161 sender.tell(ready.isReturnSerialized() ? readyTransactionReply.toSerializable() :
162 readyTransactionReply, shard.self());
164 if(ready.isDoImmediateCommit()) {
165 cohortEntry.setDoImmediateCommit(true);
166 cohortEntry.setReplySender(sender);
167 cohortEntry.setShard(shard);
168 handleCanCommit(cohortEntry);
170 // The caller does not want immediate commit - the 3-phase commit will be coordinated by the
171 // front-end so send back a ReadyTransactionReply with our actor path.
172 sender.tell(readyTransactionReply(shard), shard.self());
178 * This method handles a BatchedModifications message for a transaction being prepared directly on the
179 * Shard actor instead of via a ShardTransaction actor. If there's no currently cached
180 * DOMStoreWriteTransaction, one is created. The batched modifications are applied to the write Tx. If
181 * the BatchedModifications is ready to commit then a DOMStoreThreePhaseCommitCohort is created.
183 * @param batched the BatchedModifications message to process
184 * @param sender the sender of the message
185 * @param shard the transaction's shard actor
187 void handleBatchedModifications(BatchedModifications batched, ActorRef sender, Shard shard) {
188 CohortEntry cohortEntry = cohortCache.get(batched.getTransactionID());
189 if(cohortEntry == null) {
190 cohortEntry = new CohortEntry(batched.getTransactionID(),
191 dataTree.newReadWriteTransaction(batched.getTransactionID(),
192 batched.getTransactionChainID()));
193 cohortCache.put(batched.getTransactionID(), cohortEntry);
196 if(log.isDebugEnabled()) {
197 log.debug("{}: Applying {} batched modifications for Tx {}", name,
198 batched.getModifications().size(), batched.getTransactionID());
201 cohortEntry.applyModifications(batched.getModifications());
203 if(batched.isReady()) {
204 if(cohortEntry.getLastBatchedModificationsException() != null) {
205 cohortCache.remove(cohortEntry.getTransactionID());
206 throw cohortEntry.getLastBatchedModificationsException();
209 if(cohortEntry.getTotalBatchedModificationsReceived() != batched.getTotalMessagesSent()) {
210 cohortCache.remove(cohortEntry.getTransactionID());
211 throw new IllegalStateException(String.format(
212 "The total number of batched messages received %d does not match the number sent %d",
213 cohortEntry.getTotalBatchedModificationsReceived(), batched.getTotalMessagesSent()));
216 if(!queueCohortEntry(cohortEntry, sender, shard)) {
220 if(log.isDebugEnabled()) {
221 log.debug("{}: Readying Tx {}, client version {}", name,
222 batched.getTransactionID(), batched.getVersion());
225 cohortEntry.ready(cohortDecorator, batched.isDoCommitOnReady());
227 if(batched.isDoCommitOnReady()) {
228 cohortEntry.setReplySender(sender);
229 cohortEntry.setShard(shard);
230 handleCanCommit(cohortEntry);
232 sender.tell(readyTransactionReply(shard), shard.self());
235 sender.tell(new BatchedModificationsReply(batched.getModifications().size()), shard.self());
240 * This method handles {@link ReadyLocalTransaction} message. All transaction modifications have
241 * been prepared beforehand by the sender and we just need to drive them through into the dataTree.
243 * @param message the ReadyLocalTransaction message to process
244 * @param sender the sender of the message
245 * @param shard the transaction's shard actor
247 void handleReadyLocalTransaction(ReadyLocalTransaction message, ActorRef sender, Shard shard) {
248 final ShardDataTreeCohort cohort = new SimpleShardDataTreeCohort(dataTree, message.getModification(),
249 message.getTransactionID());
250 final CohortEntry cohortEntry = new CohortEntry(message.getTransactionID(), cohort);
251 cohortCache.put(message.getTransactionID(), cohortEntry);
252 cohortEntry.setDoImmediateCommit(message.isDoCommitOnReady());
254 if(!queueCohortEntry(cohortEntry, sender, shard)) {
258 log.debug("{}: Applying local modifications for Tx {}", name, message.getTransactionID());
260 if (message.isDoCommitOnReady()) {
261 cohortEntry.setReplySender(sender);
262 cohortEntry.setShard(shard);
263 handleCanCommit(cohortEntry);
265 sender.tell(readyTransactionReply(shard), shard.self());
269 private void handleCanCommit(CohortEntry cohortEntry) {
270 String transactionID = cohortEntry.getTransactionID();
272 cohortEntry.updateLastAccessTime();
274 if(currentCohortEntry != null) {
275 // There's already a Tx commit in progress so we can't process this entry yet - but it's in the
276 // queue and will get processed after all prior entries complete.
278 if(log.isDebugEnabled()) {
279 log.debug("{}: Commit for Tx {} already in progress - skipping canCommit for {} for now",
280 name, currentCohortEntry.getTransactionID(), transactionID);
286 // No Tx commit currently in progress - check if this entry is the next one in the queue, If so make
287 // it the current entry and proceed with canCommit.
288 // Purposely checking reference equality here.
289 if(queuedCohortEntries.peek() == cohortEntry) {
290 currentCohortEntry = queuedCohortEntries.poll();
291 doCanCommit(currentCohortEntry);
293 if(log.isDebugEnabled()) {
294 log.debug("{}: Tx {} is the next pending canCommit - skipping {} for now",
295 name, queuedCohortEntries.peek().getTransactionID(), transactionID);
301 * This method handles the canCommit phase for a transaction.
303 * @param transactionID the ID of the transaction to canCommit
304 * @param sender the actor to which to send the response
305 * @param shard the transaction's shard actor
307 void handleCanCommit(String transactionID, final ActorRef sender, final Shard shard) {
308 // Lookup the cohort entry that was cached previously (or should have been) by
309 // transactionReady (via the ForwardedReadyTransaction message).
310 final CohortEntry cohortEntry = cohortCache.get(transactionID);
311 if(cohortEntry == null) {
312 // Either canCommit was invoked before ready(shouldn't happen) or a long time passed
313 // between canCommit and ready and the entry was expired from the cache.
314 IllegalStateException ex = new IllegalStateException(
315 String.format("%s: No cohort entry found for transaction %s", name, transactionID));
316 log.error(ex.getMessage());
317 sender.tell(new Failure(ex), shard.self());
321 cohortEntry.setReplySender(sender);
322 cohortEntry.setShard(shard);
324 handleCanCommit(cohortEntry);
327 private void doCanCommit(final CohortEntry cohortEntry) {
328 boolean canCommit = false;
330 canCommit = cohortEntry.canCommit();
332 log.debug("{}: canCommit for {}: {}", name, cohortEntry.getTransactionID(), canCommit);
334 if(cohortEntry.isDoImmediateCommit()) {
336 doCommit(cohortEntry);
338 cohortEntry.getReplySender().tell(new Failure(new TransactionCommitFailedException(
339 "Can Commit failed, no detailed cause available.")), cohortEntry.getShard().self());
342 cohortEntry.getReplySender().tell(
343 canCommit ? CanCommitTransactionReply.YES.toSerializable() :
344 CanCommitTransactionReply.NO.toSerializable(), cohortEntry.getShard().self());
346 } catch (Exception e) {
347 log.debug("{}: An exception occurred during canCommit", name, e);
349 Throwable failure = e;
350 if(e instanceof ExecutionException) {
351 failure = e.getCause();
354 cohortEntry.getReplySender().tell(new Failure(failure), cohortEntry.getShard().self());
357 // Remove the entry from the cache now.
358 currentTransactionComplete(cohortEntry.getTransactionID(), true);
363 private boolean doCommit(CohortEntry cohortEntry) {
364 log.debug("{}: Committing transaction {}", name, cohortEntry.getTransactionID());
366 boolean success = false;
368 // We perform the preCommit phase here atomically with the commit phase. This is an
369 // optimization to eliminate the overhead of an extra preCommit message. We lose front-end
370 // coordination of preCommit across shards in case of failure but preCommit should not
371 // normally fail since we ensure only one concurrent 3-phase commit.
374 cohortEntry.preCommit();
376 cohortEntry.getShard().continueCommit(cohortEntry);
378 cohortEntry.updateLastAccessTime();
381 } catch (Exception e) {
382 log.error("{} An exception occurred while preCommitting transaction {}",
383 name, cohortEntry.getTransactionID(), e);
384 cohortEntry.getReplySender().tell(new Failure(e), cohortEntry.getShard().self());
386 currentTransactionComplete(cohortEntry.getTransactionID(), true);
393 * This method handles the preCommit and commit phases for a transaction.
395 * @param transactionID the ID of the transaction to commit
396 * @param sender the actor to which to send the response
397 * @param shard the transaction's shard actor
398 * @return true if the transaction was successfully prepared, false otherwise.
400 boolean handleCommit(final String transactionID, final ActorRef sender, final Shard shard) {
401 // Get the current in-progress cohort entry in the commitCoordinator if it corresponds to
403 final CohortEntry cohortEntry = getCohortEntryIfCurrent(transactionID);
404 if(cohortEntry == null) {
405 // We're not the current Tx - the Tx was likely expired b/c it took too long in
406 // between the canCommit and commit messages.
407 IllegalStateException ex = new IllegalStateException(
408 String.format("%s: Cannot commit transaction %s - it is not the current transaction",
409 name, transactionID));
410 log.error(ex.getMessage());
411 sender.tell(new Failure(ex), shard.self());
415 cohortEntry.setReplySender(sender);
416 return doCommit(cohortEntry);
419 void handleAbort(final String transactionID, final ActorRef sender, final Shard shard) {
420 CohortEntry cohortEntry = getCohortEntryIfCurrent(transactionID);
421 if(cohortEntry != null) {
422 // We don't remove the cached cohort entry here (ie pass false) in case the Tx was
423 // aborted during replication in which case we may still commit locally if replication
425 currentTransactionComplete(transactionID, false);
427 cohortEntry = getAndRemoveCohortEntry(transactionID);
430 if(cohortEntry == null) {
434 log.debug("{}: Aborting transaction {}", name, transactionID);
436 final ActorRef self = shard.getSelf();
440 shard.getShardMBean().incrementAbortTransactionsCount();
443 sender.tell(new AbortTransactionReply().toSerializable(), self);
445 } catch (Exception e) {
446 log.error("{}: An exception happened during abort", name, e);
449 sender.tell(new Failure(e), self);
454 void checkForExpiredTransactions(final long timeout, final Shard shard) {
455 CohortEntry cohortEntry = getCurrentCohortEntry();
456 if(cohortEntry != null) {
457 if(cohortEntry.isExpired(timeout)) {
458 log.warn("{}: Current transaction {} has timed out after {} ms - aborting",
459 name, cohortEntry.getTransactionID(), timeout);
461 handleAbort(cohortEntry.getTransactionID(), null, shard);
465 cleanupExpiredCohortEntries();
468 void abortPendingTransactions(final String reason, final Shard shard) {
469 if(currentCohortEntry == null && queuedCohortEntries.isEmpty()) {
473 List<CohortEntry> cohortEntries = new ArrayList<>();
475 if(currentCohortEntry != null) {
476 cohortEntries.add(currentCohortEntry);
477 currentCohortEntry = null;
480 cohortEntries.addAll(queuedCohortEntries);
481 queuedCohortEntries.clear();
483 for(CohortEntry cohortEntry: cohortEntries) {
484 if(cohortEntry.getReplySender() != null) {
485 cohortEntry.getReplySender().tell(new Failure(new RuntimeException(reason)), shard.self());
491 * Returns the cohort entry for the Tx commit currently in progress if the given transaction ID
492 * matches the current entry.
494 * @param transactionID the ID of the transaction
495 * @return the current CohortEntry or null if the given transaction ID does not match the
498 CohortEntry getCohortEntryIfCurrent(String transactionID) {
499 if(isCurrentTransaction(transactionID)) {
500 return currentCohortEntry;
506 CohortEntry getCurrentCohortEntry() {
507 return currentCohortEntry;
510 CohortEntry getAndRemoveCohortEntry(String transactionID) {
511 return cohortCache.remove(transactionID);
514 boolean isCurrentTransaction(String transactionID) {
515 return currentCohortEntry != null &&
516 currentCohortEntry.getTransactionID().equals(transactionID);
520 * This method is called when a transaction is complete, successful or not. If the given
521 * given transaction ID matches the current in-progress transaction, the next cohort entry,
522 * if any, is dequeued and processed.
524 * @param transactionID the ID of the completed transaction
525 * @param removeCohortEntry if true the CohortEntry for the transaction is also removed from
528 void currentTransactionComplete(String transactionID, boolean removeCohortEntry) {
529 if(removeCohortEntry) {
530 cohortCache.remove(transactionID);
533 if(isCurrentTransaction(transactionID)) {
534 currentCohortEntry = null;
536 log.debug("{}: currentTransactionComplete: {}", name, transactionID);
538 maybeProcessNextCohortEntry();
542 private void maybeProcessNextCohortEntry() {
543 // Check if there's a next cohort entry waiting in the queue and if it is ready to commit. Also
544 // clean out expired entries.
545 Iterator<CohortEntry> iter = queuedCohortEntries.iterator();
546 while(iter.hasNext()) {
547 CohortEntry next = iter.next();
548 if(next.isReadyToCommit()) {
549 if(currentCohortEntry == null) {
550 if(log.isDebugEnabled()) {
551 log.debug("{}: Next entry to canCommit {}", name, next);
555 currentCohortEntry = next;
556 currentCohortEntry.updateLastAccessTime();
557 doCanCommit(currentCohortEntry);
561 } else if(next.isExpired(cacheExpiryTimeoutInMillis)) {
562 log.warn("{}: canCommit for transaction {} was not received within {} ms - entry removed from cache",
563 name, next.getTransactionID(), cacheExpiryTimeoutInMillis);
564 } else if(!next.isAborted()) {
569 cohortCache.remove(next.getTransactionID());
572 maybeRunOperationOnPendingTransactionsComplete();
575 void cleanupExpiredCohortEntries() {
576 maybeProcessNextCohortEntry();
579 void setRunOnPendingTransactionsComplete(Runnable operation) {
580 runOnPendingTransactionsComplete = operation;
581 maybeRunOperationOnPendingTransactionsComplete();
584 private void maybeRunOperationOnPendingTransactionsComplete() {
585 if(runOnPendingTransactionsComplete != null && currentCohortEntry == null && queuedCohortEntries.isEmpty()) {
586 log.debug("{}: Pending transactions complete - running operation {}", name, runOnPendingTransactionsComplete);
588 runOnPendingTransactionsComplete.run();
589 runOnPendingTransactionsComplete = null;
594 void setCohortDecorator(CohortDecorator cohortDecorator) {
595 this.cohortDecorator = cohortDecorator;
598 static class CohortEntry {
599 private final String transactionID;
600 private ShardDataTreeCohort cohort;
601 private final ReadWriteShardDataTreeTransaction transaction;
602 private RuntimeException lastBatchedModificationsException;
603 private ActorRef replySender;
605 private boolean doImmediateCommit;
606 private final Stopwatch lastAccessTimer = Stopwatch.createStarted();
607 private int totalBatchedModificationsReceived;
608 private boolean aborted;
610 CohortEntry(String transactionID, ReadWriteShardDataTreeTransaction transaction) {
611 this.transaction = Preconditions.checkNotNull(transaction);
612 this.transactionID = transactionID;
615 CohortEntry(String transactionID, ShardDataTreeCohort cohort) {
616 this.transactionID = transactionID;
617 this.cohort = cohort;
618 this.transaction = null;
621 void updateLastAccessTime() {
622 lastAccessTimer.reset();
623 lastAccessTimer.start();
626 String getTransactionID() {
627 return transactionID;
630 DataTreeCandidate getCandidate() {
631 return cohort.getCandidate();
634 int getTotalBatchedModificationsReceived() {
635 return totalBatchedModificationsReceived;
638 RuntimeException getLastBatchedModificationsException() {
639 return lastBatchedModificationsException;
642 void applyModifications(Iterable<Modification> modifications) {
643 totalBatchedModificationsReceived++;
644 if(lastBatchedModificationsException == null) {
645 for (Modification modification : modifications) {
647 modification.apply(transaction.getSnapshot());
648 } catch (RuntimeException e) {
649 lastBatchedModificationsException = e;
656 boolean canCommit() throws InterruptedException, ExecutionException {
657 // We block on the future here (and also preCommit(), commit(), abort()) so we don't have to worry
658 // about possibly accessing our state on a different thread outside of our dispatcher.
659 // TODO: the ShardDataTreeCohort returns immediate Futures anyway which begs the question - why
660 // bother even returning Futures from ShardDataTreeCohort if we have to treat them synchronously
661 // anyway?. The Futures are really a remnant from when we were using the InMemoryDataBroker.
662 return cohort.canCommit().get();
665 void preCommit() throws InterruptedException, ExecutionException {
666 cohort.preCommit().get();
669 void commit() throws InterruptedException, ExecutionException {
670 cohort.commit().get();
673 void abort() throws InterruptedException, ExecutionException {
675 cohort.abort().get();
678 void ready(CohortDecorator cohortDecorator, boolean doImmediateCommit) {
679 Preconditions.checkState(cohort == null, "cohort was already set");
681 setDoImmediateCommit(doImmediateCommit);
683 cohort = transaction.ready();
685 if(cohortDecorator != null) {
686 // Call the hook for unit tests.
687 cohort = cohortDecorator.decorate(transactionID, cohort);
691 boolean isReadyToCommit() {
692 return replySender != null;
695 boolean isExpired(long expireTimeInMillis) {
696 return lastAccessTimer.elapsed(TimeUnit.MILLISECONDS) >= expireTimeInMillis;
699 boolean isDoImmediateCommit() {
700 return doImmediateCommit;
703 void setDoImmediateCommit(boolean doImmediateCommit) {
704 this.doImmediateCommit = doImmediateCommit;
707 ActorRef getReplySender() {
711 void setReplySender(ActorRef replySender) {
712 this.replySender = replySender;
719 void setShard(Shard shard) {
724 boolean isAborted() {
729 public String toString() {
730 StringBuilder builder = new StringBuilder();
731 builder.append("CohortEntry [transactionID=").append(transactionID).append(", doImmediateCommit=")
732 .append(doImmediateCommit).append("]");
733 return builder.toString();