X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FThreePhaseCommitCohortProxy.java;h=c51ea80726e54d9cf656e193ca8521d242206c76;hp=b56dc9432f0b28067ca2daaba1cd95f936cb816e;hb=6056a678a4409d4d43a0f44b8d0326d387bb81e2;hpb=0eb621d29daaf08979c356e2148e99c48458e169 diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ThreePhaseCommitCohortProxy.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ThreePhaseCommitCohortProxy.java index b56dc9432f..c51ea80726 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ThreePhaseCommitCohortProxy.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ThreePhaseCommitCohortProxy.java @@ -8,146 +8,361 @@ package org.opendaylight.controller.cluster.datastore; -import akka.actor.ActorPath; import akka.actor.ActorSelection; +import akka.dispatch.Futures; +import akka.dispatch.OnComplete; +import com.codahale.metrics.Snapshot; +import com.codahale.metrics.Timer; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListenableFutureTask; -import org.opendaylight.controller.cluster.datastore.exceptions.TimeoutException; +import com.google.common.util.concurrent.SettableFuture; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.TimeUnit; import org.opendaylight.controller.cluster.datastore.messages.AbortTransaction; import org.opendaylight.controller.cluster.datastore.messages.AbortTransactionReply; import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransaction; import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransactionReply; import org.opendaylight.controller.cluster.datastore.messages.CommitTransaction; import org.opendaylight.controller.cluster.datastore.messages.CommitTransactionReply; -import org.opendaylight.controller.cluster.datastore.messages.PreCommitTransaction; -import org.opendaylight.controller.cluster.datastore.messages.PreCommitTransactionReply; import org.opendaylight.controller.cluster.datastore.utils.ActorContext; import org.opendaylight.controller.sal.core.spi.data.DOMStoreThreePhaseCommitCohort; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import java.util.Collections; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; +import scala.concurrent.Future; +import scala.runtime.AbstractFunction1; /** * ThreePhaseCommitCohortProxy represents a set of remote cohort proxies */ -public class ThreePhaseCommitCohortProxy implements - DOMStoreThreePhaseCommitCohort{ +public class ThreePhaseCommitCohortProxy implements DOMStoreThreePhaseCommitCohort{ - private static final Logger - LOG = LoggerFactory.getLogger(DistributedDataStore.class); + private static final Logger LOG = LoggerFactory.getLogger(ThreePhaseCommitCohortProxy.class); + + private static final ListenableFuture IMMEDIATE_SUCCESS = + com.google.common.util.concurrent.Futures.immediateFuture(null); private final ActorContext actorContext; - private final List cohortPaths; - private final ExecutorService executor; + private final List> cohortFutures; + private volatile List cohorts; private final String transactionId; + private static final OperationCallback NO_OP_CALLBACK = new OperationCallback() { + @Override + public void run() { + } + @Override + public void success() { + } - public ThreePhaseCommitCohortProxy(ActorContext actorContext, - List cohortPaths, - String transactionId, - ExecutorService executor) { + @Override + public void failure() { + } + }; + public ThreePhaseCommitCohortProxy(ActorContext actorContext, + List> cohortFutures, String transactionId) { this.actorContext = actorContext; - this.cohortPaths = cohortPaths; + this.cohortFutures = cohortFutures; this.transactionId = transactionId; - this.executor = executor; } - @Override public ListenableFuture canCommit() { - Callable call = new Callable() { + private Future buildCohortList() { + + Future> combinedFutures = Futures.sequence(cohortFutures, + actorContext.getActorSystem().dispatcher()); + + return combinedFutures.transform(new AbstractFunction1, Void>() { + @Override + public Void apply(Iterable actorSelections) { + cohorts = Lists.newArrayList(actorSelections); + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {} successfully built cohort path list: {}", + transactionId, cohorts); + } + return null; + } + }, TransactionProxy.SAME_FAILURE_TRANSFORMER, actorContext.getActorSystem().dispatcher()); + } + + @Override + public ListenableFuture canCommit() { + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {} canCommit", transactionId); + } + final SettableFuture returnFuture = SettableFuture.create(); + + // The first phase of canCommit is to gather the list of cohort actor paths that will + // participate in the commit. buildCohortPathsList combines the cohort path Futures into + // one Future which we wait on asynchronously here. The cohort actor paths are + // extracted from ReadyTransactionReply messages by the Futures that were obtained earlier + // and passed to us from upstream processing. If any one fails then we'll fail canCommit. + + buildCohortList().onComplete(new OnComplete() { + @Override + public void onComplete(Throwable failure, Void notUsed) throws Throwable { + if(failure != null) { + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {}: a cohort Future failed: {}", transactionId, failure); + } + returnFuture.setException(failure); + } else { + finishCanCommit(returnFuture); + } + } + }, actorContext.getActorSystem().dispatcher()); + + return returnFuture; + } + + private void finishCanCommit(final SettableFuture returnFuture) { + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {} finishCanCommit", transactionId); + } + // The last phase of canCommit is to invoke all the cohort actors asynchronously to perform + // their canCommit processing. If any one fails then we'll fail canCommit. - @Override public Boolean call() throws Exception { - for(ActorPath actorPath : cohortPaths){ - ActorSelection cohort = actorContext.actorSelection(actorPath); + Future> combinedFuture = + invokeCohorts(new CanCommitTransaction(transactionId).toSerializable()); - try { - Object response = - actorContext.executeRemoteOperation(cohort, - new CanCommitTransaction().toSerializable(), - ActorContext.ASK_DURATION); + combinedFuture.onComplete(new OnComplete>() { + @Override + public void onComplete(Throwable failure, Iterable responses) throws Throwable { + if(failure != null) { + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {}: a canCommit cohort Future failed: {}", transactionId, failure); + } + returnFuture.setException(failure); + return; + } + boolean result = true; + for(Object response: responses) { if (response.getClass().equals(CanCommitTransactionReply.SERIALIZABLE_CLASS)) { CanCommitTransactionReply reply = - CanCommitTransactionReply.fromSerializable(response); + CanCommitTransactionReply.fromSerializable(response); if (!reply.getCanCommit()) { - return false; + result = false; + break; } + } else { + LOG.error("Unexpected response type {}", response.getClass()); + returnFuture.setException(new IllegalArgumentException( + String.format("Unexpected response type %s", response.getClass()))); + return; } - } catch(RuntimeException e){ - LOG.error("Unexpected Exception", e); - return false; } - - + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {}: canCommit returning result: {}", transactionId, result); + } + returnFuture.set(Boolean.valueOf(result)); } - return true; + }, actorContext.getActorSystem().dispatcher()); + } + + private Future> invokeCohorts(Object message) { + List> futureList = Lists.newArrayListWithCapacity(cohorts.size()); + for(ActorSelection cohort : cohorts) { + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {}: Sending {} to cohort {}", transactionId, message, cohort); } - }; + futureList.add(actorContext.executeOperationAsync(cohort, message, actorContext.getTransactionCommitOperationTimeout())); + } - ListenableFutureTask - future = ListenableFutureTask.create(call); + return Futures.sequence(futureList, actorContext.getActorSystem().dispatcher()); + } - executor.submit(future); + @Override + public ListenableFuture preCommit() { + // We don't need to do anything here - preCommit is done atomically with the commit phase + // by the shard. + return IMMEDIATE_SUCCESS; + } - return future; + @Override + public ListenableFuture abort() { + // Note - we pass false for propagateException. In the front-end data broker, this method + // is called when one of the 3 phases fails with an exception. We'd rather have that + // original exception propagated to the client. If our abort fails and we propagate the + // exception then that exception will supersede and suppress the original exception. But + // it's the original exception that is the root cause and of more interest to the client. + + return voidOperation("abort", new AbortTransaction(transactionId).toSerializable(), + AbortTransactionReply.SERIALIZABLE_CLASS, false); } - @Override public ListenableFuture preCommit() { - return voidOperation(new PreCommitTransaction().toSerializable(), PreCommitTransactionReply.SERIALIZABLE_CLASS); + @Override + public ListenableFuture commit() { + OperationCallback operationCallback = (cohortFutures.size() == 0) ? NO_OP_CALLBACK : + new CommitCallback(actorContext); + + return voidOperation("commit", new CommitTransaction(transactionId).toSerializable(), + CommitTransactionReply.SERIALIZABLE_CLASS, true, operationCallback); } - @Override public ListenableFuture abort() { - return voidOperation(new AbortTransaction().toSerializable(), AbortTransactionReply.SERIALIZABLE_CLASS); + private ListenableFuture voidOperation(final String operationName, final Object message, + final Class expectedResponseClass, final boolean propagateException) { + return voidOperation(operationName, message, expectedResponseClass, propagateException, NO_OP_CALLBACK); } - @Override public ListenableFuture commit() { - return voidOperation(new CommitTransaction().toSerializable(), CommitTransactionReply.SERIALIZABLE_CLASS); + private ListenableFuture voidOperation(final String operationName, final Object message, + final Class expectedResponseClass, final boolean propagateException, final OperationCallback callback) { + + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {} {}", transactionId, operationName); + } + final SettableFuture returnFuture = SettableFuture.create(); + + // The cohort actor list should already be built at this point by the canCommit phase but, + // if not for some reason, we'll try to build it here. + + if(cohorts != null) { + finishVoidOperation(operationName, message, expectedResponseClass, propagateException, + returnFuture, callback); + } else { + buildCohortList().onComplete(new OnComplete() { + @Override + public void onComplete(Throwable failure, Void notUsed) throws Throwable { + if(failure != null) { + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {}: a {} cohort path Future failed: {}", transactionId, + operationName, failure); + } + if(propagateException) { + returnFuture.setException(failure); + } else { + returnFuture.set(null); + } + } else { + finishVoidOperation(operationName, message, expectedResponseClass, + propagateException, returnFuture, callback); + } + } + }, actorContext.getActorSystem().dispatcher()); + } + + return returnFuture; } - private ListenableFuture voidOperation(final Object message, final Class expectedResponseClass){ - Callable call = new Callable() { - - @Override public Void call() throws Exception { - for(ActorPath actorPath : cohortPaths){ - ActorSelection cohort = actorContext.actorSelection(actorPath); - - try { - Object response = - actorContext.executeRemoteOperation(cohort, - message, - ActorContext.ASK_DURATION); - - if (response != null && !response.getClass() - .equals(expectedResponseClass)) { - throw new RuntimeException( - String.format( - "did not get the expected response \n\t\t expected : %s \n\t\t actual : %s", - expectedResponseClass.toString(), - response.getClass().toString()) - ); + private void finishVoidOperation(final String operationName, final Object message, + final Class expectedResponseClass, final boolean propagateException, + final SettableFuture returnFuture, final OperationCallback callback) { + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {} finish {}", transactionId, operationName); + } + + callback.run(); + + Future> combinedFuture = invokeCohorts(message); + + combinedFuture.onComplete(new OnComplete>() { + @Override + public void onComplete(Throwable failure, Iterable responses) throws Throwable { + + Throwable exceptionToPropagate = failure; + if(exceptionToPropagate == null) { + for(Object response: responses) { + if(!response.getClass().equals(expectedResponseClass)) { + exceptionToPropagate = new IllegalArgumentException( + String.format("Unexpected response type %s", + response.getClass())); + break; } - } catch(TimeoutException e){ - LOG.error(String.format("A timeout occurred when processing operation : %s", message)); } } - return null; - } - }; - ListenableFutureTask - future = ListenableFutureTask.create(call); + if(exceptionToPropagate != null) { - executor.submit(future); + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {}: a {} cohort Future failed: {}", transactionId, + operationName, exceptionToPropagate); + } + if(propagateException) { + // We don't log the exception here to avoid redundant logging since we're + // propagating to the caller in MD-SAL core who will log it. + returnFuture.setException(exceptionToPropagate); + } else { + // Since the caller doesn't want us to propagate the exception we'll also + // not log it normally. But it's usually not good to totally silence + // exceptions so we'll log it to debug level. + if(LOG.isDebugEnabled()) { + LOG.debug(String.format("%s failed", message.getClass().getSimpleName()), + exceptionToPropagate); + } + returnFuture.set(null); + } + + callback.failure(); + } else { - return future; + if(LOG.isDebugEnabled()) { + LOG.debug("Tx {}: {} succeeded", transactionId, operationName); + } + returnFuture.set(null); + callback.success(); + } + } + }, actorContext.getActorSystem().dispatcher()); + } + + @VisibleForTesting + List> getCohortFutures() { + return Collections.unmodifiableList(cohortFutures); } - public List getCohortPaths() { - return Collections.unmodifiableList(this.cohortPaths); + private static interface OperationCallback { + void run(); + void success(); + void failure(); } + + private static class CommitCallback implements OperationCallback{ + + private static final Logger LOG = LoggerFactory.getLogger(CommitCallback.class); + private static final String COMMIT = "commit"; + + private final Timer commitTimer; + private final ActorContext actorContext; + private Timer.Context timerContext; + + CommitCallback(ActorContext actorContext){ + this.actorContext = actorContext; + commitTimer = actorContext.getOperationTimer(COMMIT); + } + + @Override + public void run() { + timerContext = commitTimer.time(); + } + + @Override + public void success() { + timerContext.stop(); + + Snapshot timerSnapshot = commitTimer.getSnapshot(); + double allowedLatencyInNanos = timerSnapshot.get95thPercentile(); + + long commitTimeoutInSeconds = actorContext.getDatastoreContext() + .getShardTransactionCommitTimeoutInSeconds(); + long commitTimeoutInNanos = TimeUnit.SECONDS.toNanos(commitTimeoutInSeconds); + + // Here we are trying to find out how many transactions per second are allowed + double newRateLimit = ((double) commitTimeoutInNanos / allowedLatencyInNanos) / commitTimeoutInSeconds; + + LOG.debug("Data Store {} commit rateLimit adjusted to {} allowedLatencyInNanos = {}", + actorContext.getDataStoreType(), newRateLimit, allowedLatencyInNanos); + + actorContext.setTxCreationLimit(newRateLimit); + } + + @Override + public void failure() { + // This would mean we couldn't get a transaction completed in 30 seconds which is + // the default transaction commit timeout. Using the timeout information to figure out the rate limit is + // not going to be useful - so we leave it as it is + } + } + }