2 * Copyright (c) 2014 Brocade Communications Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.controller.cluster.datastore;
10 import akka.actor.ActorRef;
11 import akka.actor.Status;
12 import akka.serialization.Serialization;
13 import com.google.common.annotations.VisibleForTesting;
14 import com.google.common.base.Preconditions;
15 import com.google.common.base.Stopwatch;
16 import java.util.HashMap;
17 import java.util.Iterator;
18 import java.util.LinkedList;
20 import java.util.Queue;
21 import java.util.concurrent.ExecutionException;
22 import java.util.concurrent.TimeUnit;
23 import org.opendaylight.controller.cluster.datastore.compat.BackwardsCompatibleThreePhaseCommitCohort;
24 import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications;
25 import org.opendaylight.controller.cluster.datastore.messages.BatchedModificationsReply;
26 import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransactionReply;
27 import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction;
28 import org.opendaylight.controller.cluster.datastore.messages.ReadyLocalTransaction;
29 import org.opendaylight.controller.cluster.datastore.messages.ReadyTransactionReply;
30 import org.opendaylight.controller.cluster.datastore.modification.Modification;
31 import org.opendaylight.controller.cluster.datastore.modification.MutableCompositeModification;
32 import org.opendaylight.controller.md.sal.common.api.data.TransactionCommitFailedException;
33 import org.slf4j.Logger;
36 * Coordinates commits for a shard ensuring only one concurrent 3-phase commit.
38 * @author Thomas Pantelis
40 class ShardCommitCoordinator {
42 // Interface hook for unit tests to replace or decorate the DOMStoreThreePhaseCommitCohorts.
43 public interface CohortDecorator {
44 ShardDataTreeCohort decorate(String transactionID, ShardDataTreeCohort actual);
47 private final Map<String, CohortEntry> cohortCache = new HashMap<>();
49 private CohortEntry currentCohortEntry;
51 private final ShardDataTree dataTree;
53 // We use a LinkedList here to avoid synchronization overhead with concurrent queue impls
54 // since this should only be accessed on the shard's dispatcher.
55 private final Queue<CohortEntry> queuedCohortEntries = new LinkedList<>();
57 private int queueCapacity;
59 private final Logger log;
61 private final String name;
63 private final long cacheExpiryTimeoutInMillis;
65 // This is a hook for unit tests to replace or decorate the DOMStoreThreePhaseCommitCohorts.
66 private CohortDecorator cohortDecorator;
68 private ReadyTransactionReply readyTransactionReply;
70 ShardCommitCoordinator(ShardDataTree dataTree,
71 long cacheExpiryTimeoutInMillis, int queueCapacity, ActorRef shardActor, Logger log, String name) {
73 this.queueCapacity = queueCapacity;
76 this.dataTree = Preconditions.checkNotNull(dataTree);
77 this.cacheExpiryTimeoutInMillis = cacheExpiryTimeoutInMillis;
80 void setQueueCapacity(int queueCapacity) {
81 this.queueCapacity = queueCapacity;
84 private ReadyTransactionReply readyTransactionReply(Shard shard) {
85 if(readyTransactionReply == null) {
86 readyTransactionReply = new ReadyTransactionReply(Serialization.serializedActorPath(shard.self()));
89 return readyTransactionReply;
92 private boolean queueCohortEntry(CohortEntry cohortEntry, ActorRef sender, Shard shard) {
93 if(queuedCohortEntries.size() < queueCapacity) {
94 queuedCohortEntries.offer(cohortEntry);
97 cohortCache.remove(cohortEntry.getTransactionID());
99 RuntimeException ex = new RuntimeException(
100 String.format("%s: Could not enqueue transaction %s - the maximum commit queue"+
101 " capacity %d has been reached.",
102 name, cohortEntry.getTransactionID(), queueCapacity));
103 log.error(ex.getMessage());
104 sender.tell(new Status.Failure(ex), shard.self());
110 * This method is called to ready a transaction that was prepared by ShardTransaction actor. It caches
111 * the prepared cohort entry for the given transactions ID in preparation for the subsequent 3-phase commit.
113 * @param ready the ForwardedReadyTransaction message to process
114 * @param sender the sender of the message
115 * @param shard the transaction's shard actor
117 void handleForwardedReadyTransaction(ForwardedReadyTransaction ready, ActorRef sender, Shard shard) {
118 log.debug("{}: Readying transaction {}, client version {}", name,
119 ready.getTransactionID(), ready.getTxnClientVersion());
121 CohortEntry cohortEntry = new CohortEntry(ready.getTransactionID(), ready.getCohort(),
122 (MutableCompositeModification) ready.getModification());
123 cohortCache.put(ready.getTransactionID(), cohortEntry);
125 if(!queueCohortEntry(cohortEntry, sender, shard)) {
129 if(ready.getTxnClientVersion() < DataStoreVersions.LITHIUM_VERSION) {
130 // Return our actor path as we'll handle the three phase commit except if the Tx client
131 // version < Helium-1 version which means the Tx was initiated by a base Helium version node.
132 // In that case, the subsequent 3-phase commit messages won't contain the transactionId so to
133 // maintain backwards compatibility, we create a separate cohort actor to provide the compatible behavior.
134 ActorRef replyActorPath = shard.self();
135 if(ready.getTxnClientVersion() < DataStoreVersions.HELIUM_1_VERSION) {
136 log.debug("{}: Creating BackwardsCompatibleThreePhaseCommitCohort", name);
137 replyActorPath = shard.getContext().actorOf(BackwardsCompatibleThreePhaseCommitCohort.props(
138 ready.getTransactionID()));
141 ReadyTransactionReply readyTransactionReply =
142 new ReadyTransactionReply(Serialization.serializedActorPath(replyActorPath),
143 ready.getTxnClientVersion());
144 sender.tell(ready.isReturnSerialized() ? readyTransactionReply.toSerializable() :
145 readyTransactionReply, shard.self());
147 if(ready.isDoImmediateCommit()) {
148 cohortEntry.setDoImmediateCommit(true);
149 cohortEntry.setReplySender(sender);
150 cohortEntry.setShard(shard);
151 handleCanCommit(cohortEntry);
153 // The caller does not want immediate commit - the 3-phase commit will be coordinated by the
154 // front-end so send back a ReadyTransactionReply with our actor path.
155 sender.tell(readyTransactionReply(shard), shard.self());
161 * This method handles a BatchedModifications message for a transaction being prepared directly on the
162 * Shard actor instead of via a ShardTransaction actor. If there's no currently cached
163 * DOMStoreWriteTransaction, one is created. The batched modifications are applied to the write Tx. If
164 * the BatchedModifications is ready to commit then a DOMStoreThreePhaseCommitCohort is created.
166 * @param batched the BatchedModifications message to process
167 * @param sender the sender of the message
168 * @param shard the transaction's shard actor
170 void handleBatchedModifications(BatchedModifications batched, ActorRef sender, Shard shard) {
171 CohortEntry cohortEntry = cohortCache.get(batched.getTransactionID());
172 if(cohortEntry == null) {
173 cohortEntry = new CohortEntry(batched.getTransactionID(),
174 dataTree.newReadWriteTransaction(batched.getTransactionID(),
175 batched.getTransactionChainID()));
176 cohortCache.put(batched.getTransactionID(), cohortEntry);
179 if(log.isDebugEnabled()) {
180 log.debug("{}: Applying {} batched modifications for Tx {}", name,
181 batched.getModifications().size(), batched.getTransactionID());
184 cohortEntry.applyModifications(batched.getModifications());
186 if(batched.isReady()) {
187 if(cohortEntry.getLastBatchedModificationsException() != null) {
188 cohortCache.remove(cohortEntry.getTransactionID());
189 throw cohortEntry.getLastBatchedModificationsException();
192 if(cohortEntry.getTotalBatchedModificationsReceived() != batched.getTotalMessagesSent()) {
193 cohortCache.remove(cohortEntry.getTransactionID());
194 throw new IllegalStateException(String.format(
195 "The total number of batched messages received %d does not match the number sent %d",
196 cohortEntry.getTotalBatchedModificationsReceived(), batched.getTotalMessagesSent()));
199 if(!queueCohortEntry(cohortEntry, sender, shard)) {
203 if(log.isDebugEnabled()) {
204 log.debug("{}: Readying Tx {}, client version {}", name,
205 batched.getTransactionID(), batched.getVersion());
208 cohortEntry.ready(cohortDecorator, batched.isDoCommitOnReady());
210 if(batched.isDoCommitOnReady()) {
211 cohortEntry.setReplySender(sender);
212 cohortEntry.setShard(shard);
213 handleCanCommit(cohortEntry);
215 sender.tell(readyTransactionReply(shard), shard.self());
218 sender.tell(new BatchedModificationsReply(batched.getModifications().size()), shard.self());
223 * This method handles {@link ReadyLocalTransaction} message. All transaction modifications have
224 * been prepared beforehand by the sender and we just need to drive them through into the dataTree.
226 * @param message the ReadyLocalTransaction message to process
227 * @param sender the sender of the message
228 * @param shard the transaction's shard actor
230 void handleReadyLocalTransaction(ReadyLocalTransaction message, ActorRef sender, Shard shard) {
231 final ShardDataTreeCohort cohort = new SimpleShardDataTreeCohort(dataTree, message.getModification(),
232 message.getTransactionID());
233 final CohortEntry cohortEntry = new CohortEntry(message.getTransactionID(), cohort);
234 cohortCache.put(message.getTransactionID(), cohortEntry);
235 cohortEntry.setDoImmediateCommit(message.isDoCommitOnReady());
237 if(!queueCohortEntry(cohortEntry, sender, shard)) {
241 log.debug("{}: Applying local modifications for Tx {}", name, message.getTransactionID());
243 if (message.isDoCommitOnReady()) {
244 cohortEntry.setReplySender(sender);
245 cohortEntry.setShard(shard);
246 handleCanCommit(cohortEntry);
248 sender.tell(readyTransactionReply(shard), shard.self());
252 private void handleCanCommit(CohortEntry cohortEntry) {
253 String transactionID = cohortEntry.getTransactionID();
255 cohortEntry.updateLastAccessTime();
257 if(currentCohortEntry != null) {
258 // There's already a Tx commit in progress so we can't process this entry yet - but it's in the
259 // queue and will get processed after all prior entries complete.
261 if(log.isDebugEnabled()) {
262 log.debug("{}: Commit for Tx {} already in progress - skipping canCommit for {} for now",
263 name, currentCohortEntry.getTransactionID(), transactionID);
269 // No Tx commit currently in progress - check if this entry is the next one in the queue, If so make
270 // it the current entry and proceed with canCommit.
271 // Purposely checking reference equality here.
272 if(queuedCohortEntries.peek() == cohortEntry) {
273 currentCohortEntry = queuedCohortEntries.poll();
274 doCanCommit(currentCohortEntry);
276 if(log.isDebugEnabled()) {
277 log.debug("{}: Tx {} is the next pending canCommit - skipping {} for now",
278 name, queuedCohortEntries.peek().getTransactionID(), transactionID);
284 * This method handles the canCommit phase for a transaction.
286 * @param transactionID the ID of the transaction to canCommit
287 * @param sender the actor to which to send the response
288 * @param shard the transaction's shard actor
290 void handleCanCommit(String transactionID, final ActorRef sender, final Shard shard) {
291 // Lookup the cohort entry that was cached previously (or should have been) by
292 // transactionReady (via the ForwardedReadyTransaction message).
293 final CohortEntry cohortEntry = cohortCache.get(transactionID);
294 if(cohortEntry == null) {
295 // Either canCommit was invoked before ready(shouldn't happen) or a long time passed
296 // between canCommit and ready and the entry was expired from the cache.
297 IllegalStateException ex = new IllegalStateException(
298 String.format("%s: No cohort entry found for transaction %s", name, transactionID));
299 log.error(ex.getMessage());
300 sender.tell(new Status.Failure(ex), shard.self());
304 cohortEntry.setReplySender(sender);
305 cohortEntry.setShard(shard);
307 handleCanCommit(cohortEntry);
310 private void doCanCommit(final CohortEntry cohortEntry) {
311 boolean canCommit = false;
313 // We block on the future here so we don't have to worry about possibly accessing our
314 // state on a different thread outside of our dispatcher. Also, the data store
315 // currently uses a same thread executor anyway.
316 canCommit = cohortEntry.getCohort().canCommit().get();
318 log.debug("{}: canCommit for {}: {}", name, cohortEntry.getTransactionID(), canCommit);
320 if(cohortEntry.isDoImmediateCommit()) {
322 doCommit(cohortEntry);
324 cohortEntry.getReplySender().tell(new Status.Failure(new TransactionCommitFailedException(
325 "Can Commit failed, no detailed cause available.")), cohortEntry.getShard().self());
328 cohortEntry.getReplySender().tell(
329 canCommit ? CanCommitTransactionReply.YES.toSerializable() :
330 CanCommitTransactionReply.NO.toSerializable(), cohortEntry.getShard().self());
332 } catch (Exception e) {
333 log.debug("{}: An exception occurred during canCommit", name, e);
335 Throwable failure = e;
336 if(e instanceof ExecutionException) {
337 failure = e.getCause();
340 cohortEntry.getReplySender().tell(new Status.Failure(failure), cohortEntry.getShard().self());
343 // Remove the entry from the cache now.
344 currentTransactionComplete(cohortEntry.getTransactionID(), true);
349 private boolean doCommit(CohortEntry cohortEntry) {
350 log.debug("{}: Committing transaction {}", name, cohortEntry.getTransactionID());
352 boolean success = false;
354 // We perform the preCommit phase here atomically with the commit phase. This is an
355 // optimization to eliminate the overhead of an extra preCommit message. We lose front-end
356 // coordination of preCommit across shards in case of failure but preCommit should not
357 // normally fail since we ensure only one concurrent 3-phase commit.
360 // We block on the future here so we don't have to worry about possibly accessing our
361 // state on a different thread outside of our dispatcher. Also, the data store
362 // currently uses a same thread executor anyway.
363 cohortEntry.getCohort().preCommit().get();
365 cohortEntry.getShard().continueCommit(cohortEntry);
367 cohortEntry.updateLastAccessTime();
370 } catch (Exception e) {
371 log.error("{} An exception occurred while preCommitting transaction {}",
372 name, cohortEntry.getTransactionID(), e);
373 cohortEntry.getReplySender().tell(new akka.actor.Status.Failure(e), cohortEntry.getShard().self());
375 currentTransactionComplete(cohortEntry.getTransactionID(), true);
382 * This method handles the preCommit and commit phases for a transaction.
384 * @param transactionID the ID of the transaction to commit
385 * @param sender the actor to which to send the response
386 * @param shard the transaction's shard actor
387 * @return true if the transaction was successfully prepared, false otherwise.
389 boolean handleCommit(final String transactionID, final ActorRef sender, final Shard shard) {
390 // Get the current in-progress cohort entry in the commitCoordinator if it corresponds to
392 final CohortEntry cohortEntry = getCohortEntryIfCurrent(transactionID);
393 if(cohortEntry == null) {
394 // We're not the current Tx - the Tx was likely expired b/c it took too long in
395 // between the canCommit and commit messages.
396 IllegalStateException ex = new IllegalStateException(
397 String.format("%s: Cannot commit transaction %s - it is not the current transaction",
398 name, transactionID));
399 log.error(ex.getMessage());
400 sender.tell(new akka.actor.Status.Failure(ex), shard.self());
404 cohortEntry.setReplySender(sender);
405 return doCommit(cohortEntry);
409 * Returns the cohort entry for the Tx commit currently in progress if the given transaction ID
410 * matches the current entry.
412 * @param transactionID the ID of the transaction
413 * @return the current CohortEntry or null if the given transaction ID does not match the
416 public CohortEntry getCohortEntryIfCurrent(String transactionID) {
417 if(isCurrentTransaction(transactionID)) {
418 return currentCohortEntry;
424 public CohortEntry getCurrentCohortEntry() {
425 return currentCohortEntry;
428 public CohortEntry getAndRemoveCohortEntry(String transactionID) {
429 return cohortCache.remove(transactionID);
432 public boolean isCurrentTransaction(String transactionID) {
433 return currentCohortEntry != null &&
434 currentCohortEntry.getTransactionID().equals(transactionID);
438 * This method is called when a transaction is complete, successful or not. If the given
439 * given transaction ID matches the current in-progress transaction, the next cohort entry,
440 * if any, is dequeued and processed.
442 * @param transactionID the ID of the completed transaction
443 * @param removeCohortEntry if true the CohortEntry for the transaction is also removed from
446 public void currentTransactionComplete(String transactionID, boolean removeCohortEntry) {
447 if(removeCohortEntry) {
448 cohortCache.remove(transactionID);
451 if(isCurrentTransaction(transactionID)) {
452 currentCohortEntry = null;
454 log.debug("{}: currentTransactionComplete: {}", name, transactionID);
456 maybeProcessNextCohortEntry();
460 private void maybeProcessNextCohortEntry() {
461 // Check if there's a next cohort entry waiting in the queue and if it is ready to commit. Also
462 // clean out expired entries.
463 Iterator<CohortEntry> iter = queuedCohortEntries.iterator();
464 while(iter.hasNext()) {
465 CohortEntry next = iter.next();
466 if(next.isReadyToCommit()) {
467 if(currentCohortEntry == null) {
468 if(log.isDebugEnabled()) {
469 log.debug("{}: Next entry to canCommit {}", name, next);
473 currentCohortEntry = next;
474 currentCohortEntry.updateLastAccessTime();
475 doCanCommit(currentCohortEntry);
479 } else if(next.isExpired(cacheExpiryTimeoutInMillis)) {
480 log.warn("{}: canCommit for transaction {} was not received within {} ms - entry removed from cache",
481 name, next.getTransactionID(), cacheExpiryTimeoutInMillis);
484 cohortCache.remove(next.getTransactionID());
491 void cleanupExpiredCohortEntries() {
492 maybeProcessNextCohortEntry();
496 void setCohortDecorator(CohortDecorator cohortDecorator) {
497 this.cohortDecorator = cohortDecorator;
500 static class CohortEntry {
501 private final String transactionID;
502 private ShardDataTreeCohort cohort;
503 private final ReadWriteShardDataTreeTransaction transaction;
504 private RuntimeException lastBatchedModificationsException;
505 private ActorRef replySender;
507 private boolean doImmediateCommit;
508 private final Stopwatch lastAccessTimer = Stopwatch.createStarted();
509 private int totalBatchedModificationsReceived;
511 CohortEntry(String transactionID, ReadWriteShardDataTreeTransaction transaction) {
512 this.transaction = Preconditions.checkNotNull(transaction);
513 this.transactionID = transactionID;
516 CohortEntry(String transactionID, ShardDataTreeCohort cohort,
517 MutableCompositeModification compositeModification) {
518 this.transactionID = transactionID;
519 this.cohort = cohort;
520 this.transaction = null;
523 CohortEntry(String transactionID, ShardDataTreeCohort cohort) {
524 this.transactionID = transactionID;
525 this.cohort = cohort;
526 this.transaction = null;
529 void updateLastAccessTime() {
530 lastAccessTimer.reset();
531 lastAccessTimer.start();
534 String getTransactionID() {
535 return transactionID;
538 ShardDataTreeCohort getCohort() {
542 int getTotalBatchedModificationsReceived() {
543 return totalBatchedModificationsReceived;
546 RuntimeException getLastBatchedModificationsException() {
547 return lastBatchedModificationsException;
550 void applyModifications(Iterable<Modification> modifications) {
551 totalBatchedModificationsReceived++;
552 if(lastBatchedModificationsException == null) {
553 for (Modification modification : modifications) {
555 modification.apply(transaction.getSnapshot());
556 } catch (RuntimeException e) {
557 lastBatchedModificationsException = e;
564 void ready(CohortDecorator cohortDecorator, boolean doImmediateCommit) {
565 Preconditions.checkState(cohort == null, "cohort was already set");
567 setDoImmediateCommit(doImmediateCommit);
569 cohort = transaction.ready();
571 if(cohortDecorator != null) {
572 // Call the hook for unit tests.
573 cohort = cohortDecorator.decorate(transactionID, cohort);
577 boolean isReadyToCommit() {
578 return replySender != null;
581 boolean isExpired(long expireTimeInMillis) {
582 return lastAccessTimer.elapsed(TimeUnit.MILLISECONDS) >= expireTimeInMillis;
585 boolean isDoImmediateCommit() {
586 return doImmediateCommit;
589 void setDoImmediateCommit(boolean doImmediateCommit) {
590 this.doImmediateCommit = doImmediateCommit;
593 ActorRef getReplySender() {
597 void setReplySender(ActorRef replySender) {
598 this.replySender = replySender;
605 void setShard(Shard shard) {
610 public String toString() {
611 StringBuilder builder = new StringBuilder();
612 builder.append("CohortEntry [transactionID=").append(transactionID).append(", doImmediateCommit=")
613 .append(doImmediateCommit).append("]");
614 return builder.toString();