opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/Shard.java

   1 /*
   2  * Copyright (c) 2014 Cisco Systems, Inc. and others.  All rights reserved.
   3  *
   4  * This program and the accompanying materials are made available under the
   5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
   6  * and is available at http://www.eclipse.org/legal/epl-v10.html
   7  */
   8
   9 package org.opendaylight.controller.cluster.datastore;
  10
  11 import akka.actor.ActorRef;
  12 import akka.actor.ActorSelection;
  13 import akka.actor.Cancellable;
  14 import akka.actor.Props;
  15 import akka.japi.Creator;
  16 import akka.persistence.RecoveryFailure;
  17 import akka.serialization.Serialization;
  18 import com.google.common.annotations.VisibleForTesting;
  19 import com.google.common.base.Optional;
  20 import com.google.common.base.Preconditions;
  21 import com.google.common.util.concurrent.FutureCallback;
  22 import com.google.common.util.concurrent.Futures;
  23 import com.google.common.util.concurrent.ListenableFuture;
  24 import java.io.IOException;
  25 import java.util.HashMap;
  26 import java.util.Map;
  27 import java.util.concurrent.TimeUnit;
  28 import javax.annotation.Nonnull;
  29 import org.opendaylight.controller.cluster.common.actor.CommonConfig;
  30 import org.opendaylight.controller.cluster.common.actor.MeteringBehavior;
  31 import org.opendaylight.controller.cluster.datastore.ShardCommitCoordinator.CohortEntry;
  32 import org.opendaylight.controller.cluster.datastore.exceptions.NoShardLeaderException;
  33 import org.opendaylight.controller.cluster.datastore.identifiers.ShardIdentifier;
  34 import org.opendaylight.controller.cluster.datastore.identifiers.ShardTransactionIdentifier;
  35 import org.opendaylight.controller.cluster.datastore.jmx.mbeans.shard.ShardMBeanFactory;
  36 import org.opendaylight.controller.cluster.datastore.jmx.mbeans.shard.ShardStats;
  37 import org.opendaylight.controller.cluster.datastore.messages.AbortTransaction;
  38 import org.opendaylight.controller.cluster.datastore.messages.AbortTransactionReply;
  39 import org.opendaylight.controller.cluster.datastore.messages.ActorInitialized;
  40 import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications;
  41 import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransaction;
  42 import org.opendaylight.controller.cluster.datastore.messages.CloseTransactionChain;
  43 import org.opendaylight.controller.cluster.datastore.messages.CommitTransaction;
  44 import org.opendaylight.controller.cluster.datastore.messages.CommitTransactionReply;
  45 import org.opendaylight.controller.cluster.datastore.messages.CreateTransaction;
  46 import org.opendaylight.controller.cluster.datastore.messages.CreateTransactionReply;
  47 import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction;
  48 import org.opendaylight.controller.cluster.datastore.messages.PeerAddressResolved;
  49 import org.opendaylight.controller.cluster.datastore.messages.RegisterChangeListener;
  50 import org.opendaylight.controller.cluster.datastore.messages.RegisterDataTreeChangeListener;
  51 import org.opendaylight.controller.cluster.datastore.messages.UpdateSchemaContext;
  52 import org.opendaylight.controller.cluster.datastore.modification.Modification;
  53 import org.opendaylight.controller.cluster.datastore.modification.ModificationPayload;
  54 import org.opendaylight.controller.cluster.datastore.modification.MutableCompositeModification;
  55 import org.opendaylight.controller.cluster.datastore.utils.Dispatchers;
  56 import org.opendaylight.controller.cluster.datastore.utils.MessageTracker;
  57 import org.opendaylight.controller.cluster.notifications.RegisterRoleChangeListener;
  58 import org.opendaylight.controller.cluster.notifications.RoleChangeNotifier;
  59 import org.opendaylight.controller.cluster.raft.RaftActor;
  60 import org.opendaylight.controller.cluster.raft.RaftActorRecoveryCohort;
  61 import org.opendaylight.controller.cluster.raft.RaftActorSnapshotCohort;
  62 import org.opendaylight.controller.cluster.raft.base.messages.FollowerInitialSyncUpStatus;
  63 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
  64 import org.opendaylight.controller.cluster.raft.protobuff.client.messages.CompositeModificationByteStringPayload;
  65 import org.opendaylight.controller.cluster.raft.protobuff.client.messages.CompositeModificationPayload;
  66 import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTreeCandidate;
  67 import org.opendaylight.yangtools.yang.data.api.schema.tree.DataValidationFailedException;
  68 import org.opendaylight.yangtools.yang.data.api.schema.tree.ModificationType;
  69 import org.opendaylight.yangtools.yang.model.api.SchemaContext;
  70 import scala.concurrent.duration.Duration;
  71 import scala.concurrent.duration.FiniteDuration;
  72
  73 /**
  74  * A Shard represents a portion of the logical data tree <br/>
  75  * <p>
  76  * Our Shard uses InMemoryDataTree as it's internal representation and delegates all requests it
  77  * </p>
  78  */
  79 public class Shard extends RaftActor {
  80
  81     private static final Object TX_COMMIT_TIMEOUT_CHECK_MESSAGE = "txCommitTimeoutCheck";
  82
  83     @VisibleForTesting
  84     static final String DEFAULT_NAME = "default";
  85
  86     // The state of this Shard
  87     private final ShardDataTree store;
  88
  89     /// The name of this shard
  90     private final String name;
  91
  92     private final ShardStats shardMBean;
  93
  94     private DatastoreContext datastoreContext;
  95
  96     private final ShardCommitCoordinator commitCoordinator;
  97
  98     private long transactionCommitTimeout;
  99
 100     private Cancellable txCommitTimeoutCheckSchedule;
 101
 102     private final Optional<ActorRef> roleChangeNotifier;
 103
 104     private final MessageTracker appendEntriesReplyTracker;
 105
 106     private final ShardTransactionActorFactory transactionActorFactory;
 107
 108     private final ShardSnapshotCohort snapshotCohort;
 109
 110     private final DataTreeChangeListenerSupport treeChangeSupport = new DataTreeChangeListenerSupport(this);
 111     private final DataChangeListenerSupport changeSupport = new DataChangeListenerSupport(this);
 112
 113     protected Shard(final ShardIdentifier name, final Map<String, String> peerAddresses,
 114             final DatastoreContext datastoreContext, final SchemaContext schemaContext) {
 115         super(name.toString(), new HashMap<>(peerAddresses), Optional.of(datastoreContext.getShardRaftConfig()));
 116
 117         this.name = name.toString();
 118         this.datastoreContext = datastoreContext;
 119
 120         setPersistence(datastoreContext.isPersistent());
 121
 122         LOG.info("Shard created : {}, persistent : {}", name, datastoreContext.isPersistent());
 123
 124         store = new ShardDataTree(schemaContext);
 125
 126         shardMBean = ShardMBeanFactory.getShardStatsMBean(name.toString(),
 127                 datastoreContext.getDataStoreMXBeanType());
 128         shardMBean.setShardActor(getSelf());
 129
 130         if (isMetricsCaptureEnabled()) {
 131             getContext().become(new MeteringBehavior(this));
 132         }
 133
 134         commitCoordinator = new ShardCommitCoordinator(store,
 135                 TimeUnit.SECONDS.convert(5, TimeUnit.MINUTES),
 136                 datastoreContext.getShardTransactionCommitQueueCapacity(), self(), LOG, this.name);
 137
 138         setTransactionCommitTimeout();
 139
 140         // create a notifier actor for each cluster member
 141         roleChangeNotifier = createRoleChangeNotifier(name.toString());
 142
 143         appendEntriesReplyTracker = new MessageTracker(AppendEntriesReply.class,
 144                 getRaftActorContext().getConfigParams().getIsolatedCheckIntervalInMillis());
 145
 146         transactionActorFactory = new ShardTransactionActorFactory(store, datastoreContext,
 147                 new Dispatchers(context().system().dispatchers()).getDispatcherPath(
 148                         Dispatchers.DispatcherType.Transaction), self(), getContext(), shardMBean);
 149
 150         snapshotCohort = new ShardSnapshotCohort(transactionActorFactory, store, LOG, this.name);
 151     }
 152
 153     private void setTransactionCommitTimeout() {
 154         transactionCommitTimeout = TimeUnit.MILLISECONDS.convert(
 155                 datastoreContext.getShardTransactionCommitTimeoutInSeconds(), TimeUnit.SECONDS);
 156     }
 157
 158     public static Props props(final ShardIdentifier name,
 159         final Map<String, String> peerAddresses,
 160         final DatastoreContext datastoreContext, final SchemaContext schemaContext) {
 161         Preconditions.checkNotNull(name, "name should not be null");
 162         Preconditions.checkNotNull(peerAddresses, "peerAddresses should not be null");
 163         Preconditions.checkNotNull(datastoreContext, "dataStoreContext should not be null");
 164         Preconditions.checkNotNull(schemaContext, "schemaContext should not be null");
 165
 166         return Props.create(new ShardCreator(name, peerAddresses, datastoreContext, schemaContext));
 167     }
 168
 169     private Optional<ActorRef> createRoleChangeNotifier(String shardId) {
 170         ActorRef shardRoleChangeNotifier = this.getContext().actorOf(
 171             RoleChangeNotifier.getProps(shardId), shardId + "-notifier");
 172         return Optional.of(shardRoleChangeNotifier);
 173     }
 174
 175     @Override
 176     public void postStop() {
 177         LOG.info("Stopping Shard {}", persistenceId());
 178
 179         super.postStop();
 180
 181         if(txCommitTimeoutCheckSchedule != null) {
 182             txCommitTimeoutCheckSchedule.cancel();
 183         }
 184
 185         shardMBean.unregisterMBean();
 186     }
 187
 188     @Override
 189     public void onReceiveRecover(final Object message) throws Exception {
 190         if(LOG.isDebugEnabled()) {
 191             LOG.debug("{}: onReceiveRecover: Received message {} from {}", persistenceId(),
 192                 message.getClass().toString(), getSender());
 193         }
 194
 195         if (message instanceof RecoveryFailure){
 196             LOG.error("{}: Recovery failed because of this cause",
 197                     persistenceId(), ((RecoveryFailure) message).cause());
 198
 199             // Even though recovery failed, we still need to finish our recovery, eg send the
 200             // ActorInitialized message and start the txCommitTimeoutCheckSchedule.
 201             onRecoveryComplete();
 202         } else {
 203             super.onReceiveRecover(message);
 204             if(LOG.isTraceEnabled()) {
 205                 appendEntriesReplyTracker.begin();
 206             }
 207         }
 208     }
 209
 210     @Override
 211     public void onReceiveCommand(final Object message) throws Exception {
 212
 213         MessageTracker.Context context = appendEntriesReplyTracker.received(message);
 214
 215         if(context.error().isPresent()){
 216             LOG.trace("{} : AppendEntriesReply failed to arrive at the expected interval {}", persistenceId(),
 217                     context.error());
 218         }
 219
 220         try {
 221             if (CreateTransaction.SERIALIZABLE_CLASS.isInstance(message)) {
 222                 handleCreateTransaction(message);
 223             } else if (BatchedModifications.class.isInstance(message)) {
 224                 handleBatchedModifications((BatchedModifications)message);
 225             } else if (message instanceof ForwardedReadyTransaction) {
 226                 commitCoordinator.handleForwardedReadyTransaction((ForwardedReadyTransaction) message,
 227                         getSender(), this);
 228             } else if (CanCommitTransaction.SERIALIZABLE_CLASS.isInstance(message)) {
 229                 handleCanCommitTransaction(CanCommitTransaction.fromSerializable(message));
 230             } else if (CommitTransaction.SERIALIZABLE_CLASS.isInstance(message)) {
 231                 handleCommitTransaction(CommitTransaction.fromSerializable(message));
 232             } else if (AbortTransaction.SERIALIZABLE_CLASS.isInstance(message)) {
 233                 handleAbortTransaction(AbortTransaction.fromSerializable(message));
 234             } else if (CloseTransactionChain.SERIALIZABLE_CLASS.isInstance(message)) {
 235                 closeTransactionChain(CloseTransactionChain.fromSerializable(message));
 236             } else if (message instanceof RegisterChangeListener) {
 237                 changeSupport.onMessage((RegisterChangeListener) message, isLeader());
 238             } else if (message instanceof RegisterDataTreeChangeListener) {
 239                 treeChangeSupport.onMessage((RegisterDataTreeChangeListener) message, isLeader());
 240             } else if (message instanceof UpdateSchemaContext) {
 241                 updateSchemaContext((UpdateSchemaContext) message);
 242             } else if (message instanceof PeerAddressResolved) {
 243                 PeerAddressResolved resolved = (PeerAddressResolved) message;
 244                 setPeerAddress(resolved.getPeerId().toString(),
 245                         resolved.getPeerAddress());
 246             } else if (message.equals(TX_COMMIT_TIMEOUT_CHECK_MESSAGE)) {
 247                 handleTransactionCommitTimeoutCheck();
 248             } else if(message instanceof DatastoreContext) {
 249                 onDatastoreContext((DatastoreContext)message);
 250             } else if(message instanceof RegisterRoleChangeListener){
 251                 roleChangeNotifier.get().forward(message, context());
 252             } else if (message instanceof FollowerInitialSyncUpStatus){
 253                 shardMBean.setFollowerInitialSyncStatus(((FollowerInitialSyncUpStatus) message).isInitialSyncDone());
 254                 context().parent().tell(message, self());
 255             } else {
 256                 super.onReceiveCommand(message);
 257             }
 258         } finally {
 259             context.done();
 260         }
 261     }
 262
 263     @Override
 264     protected Optional<ActorRef> getRoleChangeNotifier() {
 265         return roleChangeNotifier;
 266     }
 267
 268     private void onDatastoreContext(DatastoreContext context) {
 269         datastoreContext = context;
 270
 271         commitCoordinator.setQueueCapacity(datastoreContext.getShardTransactionCommitQueueCapacity());
 272
 273         setTransactionCommitTimeout();
 274
 275         if(datastoreContext.isPersistent() && !persistence().isRecoveryApplicable()) {
 276             setPersistence(true);
 277         } else if(!datastoreContext.isPersistent() && persistence().isRecoveryApplicable()) {
 278             setPersistence(false);
 279         }
 280
 281         updateConfigParams(datastoreContext.getShardRaftConfig());
 282     }
 283
 284     private void handleTransactionCommitTimeoutCheck() {
 285         CohortEntry cohortEntry = commitCoordinator.getCurrentCohortEntry();
 286         if(cohortEntry != null) {
 287             long elapsed = System.currentTimeMillis() - cohortEntry.getLastAccessTime();
 288             if(elapsed > transactionCommitTimeout) {
 289                 LOG.warn("{}: Current transaction {} has timed out after {} ms - aborting",
 290                         persistenceId(), cohortEntry.getTransactionID(), transactionCommitTimeout);
 291
 292                 doAbortTransaction(cohortEntry.getTransactionID(), null);
 293             }
 294         }
 295     }
 296
 297     private static boolean isEmptyCommit(final DataTreeCandidate candidate) {
 298         return ModificationType.UNMODIFIED.equals(candidate.getRootNode().getModificationType());
 299     }
 300
 301     void continueCommit(final CohortEntry cohortEntry) throws Exception {
 302         final DataTreeCandidate candidate = cohortEntry.getCohort().getCandidate();
 303
 304         // If we do not have any followers and we are not using persistence
 305         // or if cohortEntry has no modifications
 306         // we can apply modification to the state immediately
 307         if ((!hasFollowers() && !persistence().isRecoveryApplicable()) || isEmptyCommit(candidate)) {
 308             applyModificationToState(getSender(), cohortEntry.getTransactionID(), candidate);
 309         } else {
 310             Shard.this.persistData(getSender(), cohortEntry.getTransactionID(),
 311                 DataTreeCandidatePayload.create(candidate));
 312         }
 313     }
 314
 315     private void handleCommitTransaction(final CommitTransaction commit) {
 316         if(!commitCoordinator.handleCommit(commit.getTransactionID(), getSender(), this)) {
 317             shardMBean.incrementFailedTransactionsCount();
 318         }
 319     }
 320
 321     private void finishCommit(@Nonnull final ActorRef sender, @Nonnull final String transactionID, @Nonnull final CohortEntry cohortEntry) {
 322         LOG.debug("{}: Finishing commit for transaction {}", persistenceId(), cohortEntry.getTransactionID());
 323
 324         try {
 325             // We block on the future here so we don't have to worry about possibly accessing our
 326             // state on a different thread outside of our dispatcher. Also, the data store
 327             // currently uses a same thread executor anyway.
 328             cohortEntry.getCohort().commit().get();
 329
 330             sender.tell(CommitTransactionReply.INSTANCE.toSerializable(), getSelf());
 331
 332             shardMBean.incrementCommittedTransactionCount();
 333             shardMBean.setLastCommittedTransactionTime(System.currentTimeMillis());
 334
 335         } catch (Exception e) {
 336             sender.tell(new akka.actor.Status.Failure(e), getSelf());
 337
 338             LOG.error("{}, An exception occurred while committing transaction {}", persistenceId(),
 339                     transactionID, e);
 340             shardMBean.incrementFailedTransactionsCount();
 341         } finally {
 342             commitCoordinator.currentTransactionComplete(transactionID, true);
 343         }
 344     }
 345
 346     private void finishCommit(@Nonnull final ActorRef sender, final @Nonnull String transactionID) {
 347         // With persistence enabled, this method is called via applyState by the leader strategy
 348         // after the commit has been replicated to a majority of the followers.
 349
 350         CohortEntry cohortEntry = commitCoordinator.getCohortEntryIfCurrent(transactionID);
 351         if (cohortEntry == null) {
 352             // The transaction is no longer the current commit. This can happen if the transaction
 353             // was aborted prior, most likely due to timeout in the front-end. We need to finish
 354             // committing the transaction though since it was successfully persisted and replicated
 355             // however we can't use the original cohort b/c it was already preCommitted and may
 356             // conflict with the current commit or may have been aborted so we commit with a new
 357             // transaction.
 358             cohortEntry = commitCoordinator.getAndRemoveCohortEntry(transactionID);
 359             if(cohortEntry != null) {
 360                 try {
 361                     store.applyForeignCandidate(transactionID, cohortEntry.getCohort().getCandidate());
 362                 } catch (DataValidationFailedException e) {
 363                     shardMBean.incrementFailedTransactionsCount();
 364                     LOG.error("{}: Failed to re-apply transaction {}", persistenceId(), transactionID, e);
 365                 }
 366
 367                 sender.tell(CommitTransactionReply.INSTANCE.toSerializable(), getSelf());
 368             } else {
 369                 // This really shouldn't happen - it likely means that persistence or replication
 370                 // took so long to complete such that the cohort entry was expired from the cache.
 371                 IllegalStateException ex = new IllegalStateException(
 372                         String.format("%s: Could not finish committing transaction %s - no CohortEntry found",
 373                                 persistenceId(), transactionID));
 374                 LOG.error(ex.getMessage());
 375                 sender.tell(new akka.actor.Status.Failure(ex), getSelf());
 376             }
 377         } else {
 378             finishCommit(sender, transactionID, cohortEntry);
 379         }
 380     }
 381
 382     private void handleCanCommitTransaction(final CanCommitTransaction canCommit) {
 383         LOG.debug("{}: Can committing transaction {}", persistenceId(), canCommit.getTransactionID());
 384         commitCoordinator.handleCanCommit(canCommit.getTransactionID(), getSender(), this);
 385     }
 386
 387     private void handleBatchedModifications(BatchedModifications batched) {
 388         // This message is sent to prepare the modificationsa transaction directly on the Shard as an
 389         // optimization to avoid the extra overhead of a separate ShardTransaction actor. On the last
 390         // BatchedModifications message, the caller sets the ready flag in the message indicating
 391         // modifications are complete. The reply contains the cohort actor path (this actor) for the caller
 392         // to initiate the 3-phase commit. This also avoids the overhead of sending an additional
 393         // ReadyTransaction message.
 394
 395         // If we're not the leader then forward to the leader. This is a safety measure - we shouldn't
 396         // normally get here if we're not the leader as the front-end (TransactionProxy) should determine
 397         // the primary/leader shard. However with timing and caching on the front-end, there's a small
 398         // window where it could have a stale leader during leadership transitions.
 399         //
 400         if(isLeader()) {
 401             try {
 402                 commitCoordinator.handleBatchedModifications(batched, getSender(), this);
 403             } catch (Exception e) {
 404                 LOG.error("{}: Error handling BatchedModifications for Tx {}", persistenceId(),
 405                         batched.getTransactionID(), e);
 406                 getSender().tell(new akka.actor.Status.Failure(e), getSelf());
 407             }
 408         } else {
 409             ActorSelection leader = getLeader();
 410             if(leader != null) {
 411                 // TODO: what if this is not the first batch and leadership changed in between batched messages?
 412                 // We could check if the commitCoordinator already has a cached entry and forward all the previous
 413                 // batched modifications.
 414                 LOG.debug("{}: Forwarding BatchedModifications to leader {}", persistenceId(), leader);
 415                 leader.forward(batched, getContext());
 416             } else {
 417                 // TODO: rather than throwing an immediate exception, we could schedule a timer to try again to make
 418                 // it more resilient in case we're in the process of electing a new leader.
 419                 getSender().tell(new akka.actor.Status.Failure(new NoShardLeaderException(String.format(
 420                     "Could not find the leader for shard %s. This typically happens" +
 421                     " when the system is coming up or recovering and a leader is being elected. Try again" +
 422                     " later.", persistenceId()))), getSelf());
 423             }
 424         }
 425     }
 426
 427     private void handleAbortTransaction(final AbortTransaction abort) {
 428         doAbortTransaction(abort.getTransactionID(), getSender());
 429     }
 430
 431     void doAbortTransaction(final String transactionID, final ActorRef sender) {
 432         final CohortEntry cohortEntry = commitCoordinator.getCohortEntryIfCurrent(transactionID);
 433         if(cohortEntry != null) {
 434             LOG.debug("{}: Aborting transaction {}", persistenceId(), transactionID);
 435
 436             // We don't remove the cached cohort entry here (ie pass false) in case the Tx was
 437             // aborted during replication in which case we may still commit locally if replication
 438             // succeeds.
 439             commitCoordinator.currentTransactionComplete(transactionID, false);
 440
 441             final ListenableFuture<Void> future = cohortEntry.getCohort().abort();
 442             final ActorRef self = getSelf();
 443
 444             Futures.addCallback(future, new FutureCallback<Void>() {
 445                 @Override
 446                 public void onSuccess(final Void v) {
 447                     shardMBean.incrementAbortTransactionsCount();
 448
 449                     if(sender != null) {
 450                         sender.tell(AbortTransactionReply.INSTANCE.toSerializable(), self);
 451                     }
 452                 }
 453
 454                 @Override
 455                 public void onFailure(final Throwable t) {
 456                     LOG.error("{}: An exception happened during abort", persistenceId(), t);
 457
 458                     if(sender != null) {
 459                         sender.tell(new akka.actor.Status.Failure(t), self);
 460                     }
 461                 }
 462             });
 463         }
 464     }
 465
 466     private void handleCreateTransaction(final Object message) {
 467         if (isLeader()) {
 468             createTransaction(CreateTransaction.fromSerializable(message));
 469         } else if (getLeader() != null) {
 470             getLeader().forward(message, getContext());
 471         } else {
 472             getSender().tell(new akka.actor.Status.Failure(new NoShardLeaderException(String.format(
 473                 "Could not find leader for shard %s so transaction cannot be created. This typically happens" +
 474                 " when the system is coming up or recovering and a leader is being elected. Try again" +
 475                 " later.", persistenceId()))), getSelf());
 476         }
 477     }
 478
 479     private void closeTransactionChain(final CloseTransactionChain closeTransactionChain) {
 480         store.closeTransactionChain(closeTransactionChain.getTransactionChainId());
 481     }
 482
 483     private ActorRef createTypedTransactionActor(int transactionType,
 484             ShardTransactionIdentifier transactionId, String transactionChainId,
 485             short clientVersion ) {
 486
 487         return transactionActorFactory.newShardTransaction(TransactionProxy.TransactionType.fromInt(transactionType),
 488                 transactionId, transactionChainId, clientVersion);
 489     }
 490
 491     private void createTransaction(CreateTransaction createTransaction) {
 492         try {
 493             ActorRef transactionActor = createTransaction(createTransaction.getTransactionType(),
 494                 createTransaction.getTransactionId(), createTransaction.getTransactionChainId(),
 495                 createTransaction.getVersion());
 496
 497             getSender().tell(new CreateTransactionReply(Serialization.serializedActorPath(transactionActor),
 498                     createTransaction.getTransactionId()).toSerializable(), getSelf());
 499         } catch (Exception e) {
 500             getSender().tell(new akka.actor.Status.Failure(e), getSelf());
 501         }
 502     }
 503
 504     private ActorRef createTransaction(int transactionType, String remoteTransactionId,
 505             String transactionChainId, short clientVersion) {
 506
 507
 508         ShardTransactionIdentifier transactionId = new ShardTransactionIdentifier(remoteTransactionId);
 509
 510         if(LOG.isDebugEnabled()) {
 511             LOG.debug("{}: Creating transaction : {} ", persistenceId(), transactionId);
 512         }
 513
 514         ActorRef transactionActor = createTypedTransactionActor(transactionType, transactionId,
 515                 transactionChainId, clientVersion);
 516
 517         return transactionActor;
 518     }
 519
 520     private void commitWithNewTransaction(final Modification modification) {
 521         ReadWriteShardDataTreeTransaction tx = store.newReadWriteTransaction(modification.toString(), null);
 522         modification.apply(tx.getSnapshot());
 523         try {
 524             snapshotCohort.syncCommitTransaction(tx);
 525             shardMBean.incrementCommittedTransactionCount();
 526             shardMBean.setLastCommittedTransactionTime(System.currentTimeMillis());
 527         } catch (Exception e) {
 528             shardMBean.incrementFailedTransactionsCount();
 529             LOG.error("{}: Failed to commit", persistenceId(), e);
 530         }
 531     }
 532
 533     private void updateSchemaContext(final UpdateSchemaContext message) {
 534         updateSchemaContext(message.getSchemaContext());
 535     }
 536
 537     @VisibleForTesting
 538     void updateSchemaContext(final SchemaContext schemaContext) {
 539         store.updateSchemaContext(schemaContext);
 540     }
 541
 542     private boolean isMetricsCaptureEnabled() {
 543         CommonConfig config = new CommonConfig(getContext().system().settings().config());
 544         return config.isMetricCaptureEnabled();
 545     }
 546
 547     @Override
 548     protected RaftActorSnapshotCohort getRaftActorSnapshotCohort() {
 549         return snapshotCohort;
 550     }
 551
 552     @Override
 553     @Nonnull
 554     protected RaftActorRecoveryCohort getRaftActorRecoveryCohort() {
 555         return new ShardRecoveryCoordinator(store, persistenceId(), LOG);
 556     }
 557
 558     @Override
 559     protected void onRecoveryComplete() {
 560         //notify shard manager
 561         getContext().parent().tell(new ActorInitialized(), getSelf());
 562
 563         // Being paranoid here - this method should only be called once but just in case...
 564         if(txCommitTimeoutCheckSchedule == null) {
 565             // Schedule a message to be periodically sent to check if the current in-progress
 566             // transaction should be expired and aborted.
 567             FiniteDuration period = Duration.create(transactionCommitTimeout / 3, TimeUnit.MILLISECONDS);
 568             txCommitTimeoutCheckSchedule = getContext().system().scheduler().schedule(
 569                     period, period, getSelf(),
 570                     TX_COMMIT_TIMEOUT_CHECK_MESSAGE, getContext().dispatcher(), ActorRef.noSender());
 571         }
 572     }
 573
 574     @Override
 575     protected void applyState(final ActorRef clientActor, final String identifier, final Object data) {
 576         if (data instanceof DataTreeCandidatePayload) {
 577             if (clientActor == null) {
 578                 // No clientActor indicates a replica coming from the leader
 579                 try {
 580                     store.applyForeignCandidate(identifier, ((DataTreeCandidatePayload)data).getCandidate());
 581                 } catch (DataValidationFailedException | IOException e) {
 582                     LOG.error("{}: Error applying replica {}", persistenceId(), identifier, e);
 583                 }
 584             } else {
 585                 // Replication consensus reached, proceed to commit
 586                 finishCommit(clientActor, identifier);
 587             }
 588         } else if (data instanceof ModificationPayload) {
 589             try {
 590                 applyModificationToState(clientActor, identifier, ((ModificationPayload) data).getModification());
 591             } catch (ClassNotFoundException | IOException e) {
 592                 LOG.error("{}: Error extracting ModificationPayload", persistenceId(), e);
 593             }
 594         } else if (data instanceof CompositeModificationPayload) {
 595             Object modification = ((CompositeModificationPayload) data).getModification();
 596
 597             applyModificationToState(clientActor, identifier, modification);
 598         } else if(data instanceof CompositeModificationByteStringPayload ){
 599             Object modification = ((CompositeModificationByteStringPayload) data).getModification();
 600
 601             applyModificationToState(clientActor, identifier, modification);
 602         } else {
 603             LOG.error("{}: Unknown state received {} Class loader = {} CompositeNodeMod.ClassLoader = {}",
 604                     persistenceId(), data, data.getClass().getClassLoader(),
 605                     CompositeModificationPayload.class.getClassLoader());
 606         }
 607     }
 608
 609     private void applyModificationToState(ActorRef clientActor, String identifier, Object modification) {
 610         if(modification == null) {
 611             LOG.error(
 612                     "{}: modification is null - this is very unexpected, clientActor = {}, identifier = {}",
 613                     persistenceId(), identifier, clientActor != null ? clientActor.path().toString() : null);
 614         } else if(clientActor == null) {
 615             // There's no clientActor to which to send a commit reply so we must be applying
 616             // replicated state from the leader.
 617             commitWithNewTransaction(MutableCompositeModification.fromSerializable(modification));
 618         } else {
 619             // This must be the OK to commit after replication consensus.
 620             finishCommit(clientActor, identifier);
 621         }
 622     }
 623
 624     @Override
 625     protected void onStateChanged() {
 626         boolean isLeader = isLeader();
 627         changeSupport.onLeadershipChange(isLeader);
 628         treeChangeSupport.onLeadershipChange(isLeader);
 629
 630         // If this actor is no longer the leader close all the transaction chains
 631         if (!isLeader) {
 632             if(LOG.isDebugEnabled()) {
 633                 LOG.debug(
 634                     "{}: onStateChanged: Closing all transaction chains because shard {} is no longer the leader",
 635                     persistenceId(), getId());
 636             }
 637
 638             store.closeAllTransactionChains();
 639         }
 640     }
 641
 642     @Override
 643     protected void onLeaderChanged(String oldLeader, String newLeader) {
 644         shardMBean.incrementLeadershipChangeCount();
 645     }
 646
 647     @Override
 648     public String persistenceId() {
 649         return this.name;
 650     }
 651
 652     @VisibleForTesting
 653     ShardCommitCoordinator getCommitCoordinator() {
 654         return commitCoordinator;
 655     }
 656
 657
 658     private static class ShardCreator implements Creator<Shard> {
 659
 660         private static final long serialVersionUID = 1L;
 661
 662         final ShardIdentifier name;
 663         final Map<String, String> peerAddresses;
 664         final DatastoreContext datastoreContext;
 665         final SchemaContext schemaContext;
 666
 667         ShardCreator(final ShardIdentifier name, final Map<String, String> peerAddresses,
 668                 final DatastoreContext datastoreContext, final SchemaContext schemaContext) {
 669             this.name = name;
 670             this.peerAddresses = peerAddresses;
 671             this.datastoreContext = datastoreContext;
 672             this.schemaContext = schemaContext;
 673         }
 674
 675         @Override
 676         public Shard create() throws Exception {
 677             return new Shard(name, peerAddresses, datastoreContext, schemaContext);
 678         }
 679     }
 680
 681     @VisibleForTesting
 682     public ShardDataTree getDataStore() {
 683         return store;
 684     }
 685
 686     @VisibleForTesting
 687     ShardStats getShardMBean() {
 688         return shardMBean;
 689     }
 690 }