X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FShard.java;h=ef4bab44f8ebb51c65312d60822dd36fdc6d9c49;hb=204f45f8b3233dbea87e2c8065914f0d2a0ded07;hp=6aee29dd40399c018e879c8cd468023da014d871;hpb=a507f19b518f36065c74f4f88c9327ede28ff640;p=controller.git diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/Shard.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/Shard.java index 6aee29dd40..ef4bab44f8 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/Shard.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/Shard.java @@ -12,14 +12,13 @@ import akka.actor.ActorRef; import akka.actor.ActorSelection; import akka.actor.Cancellable; import akka.actor.Props; -import akka.japi.Creator; import akka.persistence.RecoveryFailure; import akka.serialization.Serialization; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import java.io.IOException; -import java.util.HashMap; +import java.util.Collections; import java.util.Map; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; @@ -40,7 +39,10 @@ import org.opendaylight.controller.cluster.datastore.messages.CommitTransaction; import org.opendaylight.controller.cluster.datastore.messages.CommitTransactionReply; import org.opendaylight.controller.cluster.datastore.messages.CreateTransaction; import org.opendaylight.controller.cluster.datastore.messages.CreateTransactionReply; +import org.opendaylight.controller.cluster.datastore.messages.DatastoreSnapshot; +import org.opendaylight.controller.cluster.datastore.messages.DatastoreSnapshot.ShardSnapshot; import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction; +import org.opendaylight.controller.cluster.datastore.messages.GetShardDataTree; import org.opendaylight.controller.cluster.datastore.messages.PeerAddressResolved; import org.opendaylight.controller.cluster.datastore.messages.ReadyLocalTransaction; import org.opendaylight.controller.cluster.datastore.messages.RegisterChangeListener; @@ -61,12 +63,14 @@ import org.opendaylight.controller.cluster.raft.RaftActorSnapshotCohort; import org.opendaylight.controller.cluster.raft.RaftState; import org.opendaylight.controller.cluster.raft.base.messages.FollowerInitialSyncUpStatus; import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply; +import org.opendaylight.controller.cluster.raft.messages.ServerRemoved; import org.opendaylight.controller.cluster.raft.protobuff.client.messages.CompositeModificationByteStringPayload; import org.opendaylight.controller.cluster.raft.protobuff.client.messages.CompositeModificationPayload; import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTree; import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTreeCandidate; import org.opendaylight.yangtools.yang.data.api.schema.tree.DataValidationFailedException; import org.opendaylight.yangtools.yang.data.api.schema.tree.ModificationType; +import org.opendaylight.yangtools.yang.data.api.schema.tree.TreeType; import org.opendaylight.yangtools.yang.model.api.SchemaContext; import scala.concurrent.duration.Duration; import scala.concurrent.duration.FiniteDuration; @@ -114,19 +118,24 @@ public class Shard extends RaftActor { private final DataTreeChangeListenerSupport treeChangeSupport = new DataTreeChangeListenerSupport(this); private final DataChangeListenerSupport changeSupport = new DataChangeListenerSupport(this); - protected Shard(final ShardIdentifier name, final Map peerAddresses, - final DatastoreContext datastoreContext, final SchemaContext schemaContext) { - super(name.toString(), new HashMap<>(peerAddresses), Optional.of(datastoreContext.getShardRaftConfig()), - DataStoreVersions.CURRENT_VERSION); - this.name = name.toString(); - this.datastoreContext = datastoreContext; + private ShardSnapshot restoreFromSnapshot; + + private final ShardTransactionMessageRetrySupport messageRetrySupport; + + protected Shard(AbstractBuilder builder) { + super(builder.getId().toString(), builder.getPeerAddresses(), + Optional.of(builder.getDatastoreContext().getShardRaftConfig()), DataStoreVersions.CURRENT_VERSION); + + this.name = builder.getId().toString(); + this.datastoreContext = builder.getDatastoreContext(); + this.restoreFromSnapshot = builder.getRestoreFromSnapshot(); setPersistence(datastoreContext.isPersistent()); LOG.info("Shard created : {}, persistent : {}", name, datastoreContext.isPersistent()); - store = new ShardDataTree(schemaContext); + store = new ShardDataTree(builder.getSchemaContext(), builder.getTreeType()); shardMBean = ShardMBeanFactory.getShardStatsMBean(name.toString(), datastoreContext.getDataStoreMXBeanType()); @@ -138,7 +147,7 @@ public class Shard extends RaftActor { commitCoordinator = new ShardCommitCoordinator(store, datastoreContext.getShardCommitQueueExpiryTimeoutInMillis(), - datastoreContext.getShardTransactionCommitQueueCapacity(), self(), LOG, this.name); + datastoreContext.getShardTransactionCommitQueueCapacity(), LOG, this.name); setTransactionCommitTimeout(); @@ -154,7 +163,7 @@ public class Shard extends RaftActor { snapshotCohort = new ShardSnapshotCohort(transactionActorFactory, store, LOG, this.name); - + messageRetrySupport = new ShardTransactionMessageRetrySupport(this); } private void setTransactionCommitTimeout() { @@ -162,11 +171,6 @@ public class Shard extends RaftActor { datastoreContext.getShardTransactionCommitTimeoutInSeconds(), TimeUnit.SECONDS) / 2; } - public static Props props(final ShardIdentifier name, final Map peerAddresses, - final DatastoreContext datastoreContext, final SchemaContext schemaContext) { - return Props.create(new ShardCreator(name, peerAddresses, datastoreContext, schemaContext)); - } - private Optional createRoleChangeNotifier(String shardId) { ActorRef shardRoleChangeNotifier = this.getContext().actorOf( RoleChangeNotifier.getProps(shardId), shardId + "-notifier"); @@ -179,6 +183,8 @@ public class Shard extends RaftActor { super.postStop(); + messageRetrySupport.close(); + if(txCommitTimeoutCheckSchedule != null) { txCommitTimeoutCheckSchedule.cancel(); } @@ -224,8 +230,7 @@ public class Shard extends RaftActor { } else if (BatchedModifications.class.isInstance(message)) { handleBatchedModifications((BatchedModifications)message); } else if (message instanceof ForwardedReadyTransaction) { - commitCoordinator.handleForwardedReadyTransaction((ForwardedReadyTransaction) message, - getSender(), this); + handleForwardedReadyTransaction((ForwardedReadyTransaction) message); } else if (message instanceof ReadyLocalTransaction) { handleReadyLocalTransaction((ReadyLocalTransaction)message); } else if (CanCommitTransaction.SERIALIZABLE_CLASS.isInstance(message)) { @@ -257,6 +262,12 @@ public class Shard extends RaftActor { context().parent().tell(message, self()); } else if(GET_SHARD_MBEAN_MESSAGE.equals(message)){ sender().tell(getShardMBean(), self()); + } else if(message instanceof GetShardDataTree) { + sender().tell(store.getDataTree(), self()); + } else if(message instanceof ServerRemoved){ + context().parent().forward(message, context()); + } else if(ShardTransactionMessageRetrySupport.TIMER_MESSAGE_CLASS.isInstance(message)) { + messageRetrySupport.onTimerMessage(message); } else { super.onReceiveCommand(message); } @@ -273,6 +284,10 @@ public class Shard extends RaftActor { return commitCoordinator.getQueueSize(); } + public int getCohortCacheSize() { + return commitCoordinator.getCohortCacheSize(); + } + @Override protected Optional getRoleChangeNotifier() { return roleChangeNotifier; @@ -319,7 +334,7 @@ public class Shard extends RaftActor { return ModificationType.UNMODIFIED.equals(candidate.getRootNode().getModificationType()); } - void continueCommit(final CohortEntry cohortEntry) throws Exception { + void continueCommit(final CohortEntry cohortEntry) { final DataTreeCandidate candidate = cohortEntry.getCandidate(); // If we do not have any followers and we are not using persistence @@ -329,7 +344,7 @@ public class Shard extends RaftActor { applyModificationToState(cohortEntry.getReplySender(), cohortEntry.getTransactionID(), candidate); } else { Shard.this.persistData(cohortEntry.getReplySender(), cohortEntry.getTransactionID(), - DataTreeCandidatePayload.create(candidate)); + DataTreeCandidatePayload.create(candidate)); } } @@ -402,12 +417,6 @@ public class Shard extends RaftActor { commitCoordinator.handleCanCommit(canCommit.getTransactionID(), getSender(), this); } - private void noLeaderError(String errMessage, Object message) { - // TODO: rather than throwing an immediate exception, we could schedule a timer to try again to make - // it more resilient in case we're in the process of electing a new leader. - getSender().tell(new akka.actor.Status.Failure(new NoShardLeaderException(errMessage, persistenceId())), getSelf()); - } - protected void handleBatchedModificationsLocal(BatchedModifications batched, ActorRef sender) { try { commitCoordinator.handleBatchedModifications(batched, sender, this); @@ -431,20 +440,20 @@ public class Shard extends RaftActor { // the primary/leader shard. However with timing and caching on the front-end, there's a small // window where it could have a stale leader during leadership transitions. // - if(isLeader()) { - failIfIsolatedLeader(getSender()); - + boolean isLeaderActive = isLeaderActive(); + if (isLeader() && isLeaderActive) { handleBatchedModificationsLocal(batched, getSender()); } else { ActorSelection leader = getLeader(); - if(leader != null) { + if (!isLeaderActive || leader == null) { + messageRetrySupport.addMessageToRetry(batched, getSender(), + "Could not commit transaction " + batched.getTransactionID()); + } else { // TODO: what if this is not the first batch and leadership changed in between batched messages? // We could check if the commitCoordinator already has a cached entry and forward all the previous // batched modifications. LOG.debug("{}: Forwarding BatchedModifications to leader {}", persistenceId(), leader); leader.forward(batched, getContext()); - } else { - noLeaderError("Could not commit transaction " + batched.getTransactionID(), batched); } } } @@ -466,9 +475,10 @@ public class Shard extends RaftActor { } private void handleReadyLocalTransaction(final ReadyLocalTransaction message) { - if (isLeader()) { - failIfIsolatedLeader(getSender()); + LOG.debug("{}: handleReadyLocalTransaction for {}", persistenceId(), message.getTransactionID()); + boolean isLeaderActive = isLeaderActive(); + if (isLeader() && isLeaderActive) { try { commitCoordinator.handleReadyLocalTransaction(message, getSender(), this); } catch (Exception e) { @@ -478,12 +488,35 @@ public class Shard extends RaftActor { } } else { ActorSelection leader = getLeader(); - if (leader != null) { + if (!isLeaderActive || leader == null) { + messageRetrySupport.addMessageToRetry(message, getSender(), + "Could not commit transaction " + message.getTransactionID()); + } else { LOG.debug("{}: Forwarding ReadyLocalTransaction to leader {}", persistenceId(), leader); message.setRemoteVersion(getCurrentBehavior().getLeaderPayloadVersion()); leader.forward(message, getContext()); + } + } + } + + private void handleForwardedReadyTransaction(ForwardedReadyTransaction forwardedReady) { + LOG.debug("{}: handleForwardedReadyTransaction for {}", persistenceId(), forwardedReady.getTransactionID()); + + boolean isLeaderActive = isLeaderActive(); + if (isLeader() && isLeaderActive) { + commitCoordinator.handleForwardedReadyTransaction(forwardedReady, getSender(), this); + } else { + ActorSelection leader = getLeader(); + if (!isLeaderActive || leader == null) { + messageRetrySupport.addMessageToRetry(forwardedReady, getSender(), + "Could not commit transaction " + forwardedReady.getTransactionID()); } else { - noLeaderError("Could not commit transaction " + message.getTransactionID(), message); + LOG.debug("{}: Forwarding ForwardedReadyTransaction to leader {}", persistenceId(), leader); + + ReadyLocalTransaction readyLocal = new ReadyLocalTransaction(forwardedReady.getTransactionID(), + forwardedReady.getTransaction().getSnapshot(), forwardedReady.isDoImmediateCommit()); + readyLocal.setRemoteVersion(getCurrentBehavior().getLeaderPayloadVersion()); + leader.forward(readyLocal, getContext()); } } } @@ -589,11 +622,14 @@ public class Shard extends RaftActor { @Override @Nonnull protected RaftActorRecoveryCohort getRaftActorRecoveryCohort() { - return new ShardRecoveryCoordinator(store, store.getSchemaContext(), persistenceId(), LOG); + return new ShardRecoveryCoordinator(store, store.getSchemaContext(), + restoreFromSnapshot != null ? restoreFromSnapshot.getSnapshot() : null, persistenceId(), LOG); } @Override protected void onRecoveryComplete() { + restoreFromSnapshot = null; + //notify shard manager getContext().parent().tell(new ActorInitialized(), getSelf()); @@ -675,11 +711,25 @@ public class Shard extends RaftActor { store.closeAllTransactionChains(); } + + if(hasLeader && !isIsolatedLeader()) { + messageRetrySupport.retryMessages(); + } } @Override protected void onLeaderChanged(String oldLeader, String newLeader) { shardMBean.incrementLeadershipChangeCount(); + + if(hasLeader() && !isIsolatedLeader()) { + messageRetrySupport.retryMessages(); + } + } + + @Override + protected void pauseLeader(Runnable operation) { + LOG.debug("{}: In pauseLeader, operation: {}", persistenceId(), operation); + commitCoordinator.setRunOnPendingTransactionsComplete(operation); } @Override @@ -692,48 +742,124 @@ public class Shard extends RaftActor { return commitCoordinator; } - protected DatastoreContext getDatastoreContext() { + public DatastoreContext getDatastoreContext() { return datastoreContext; } - protected abstract static class AbstractShardCreator implements Creator { - private static final long serialVersionUID = 1L; + @VisibleForTesting + public ShardDataTree getDataStore() { + return store; + } + + @VisibleForTesting + ShardStats getShardMBean() { + return shardMBean; + } + + public static Builder builder() { + return new Builder(); + } - protected final ShardIdentifier name; - protected final Map peerAddresses; - protected final DatastoreContext datastoreContext; - protected final SchemaContext schemaContext; + public static abstract class AbstractBuilder, S extends Shard> { + private final Class shardClass; + private ShardIdentifier id; + private Map peerAddresses = Collections.emptyMap(); + private DatastoreContext datastoreContext; + private SchemaContext schemaContext; + private DatastoreSnapshot.ShardSnapshot restoreFromSnapshot; + private volatile boolean sealed; - protected AbstractShardCreator(final ShardIdentifier name, final Map peerAddresses, - final DatastoreContext datastoreContext, final SchemaContext schemaContext) { - this.name = Preconditions.checkNotNull(name, "name should not be null"); - this.peerAddresses = Preconditions.checkNotNull(peerAddresses, "peerAddresses should not be null"); - this.datastoreContext = Preconditions.checkNotNull(datastoreContext, "dataStoreContext should not be null"); - this.schemaContext = Preconditions.checkNotNull(schemaContext, "schemaContext should not be null"); + protected AbstractBuilder(Class shardClass) { + this.shardClass = shardClass; } - } - private static class ShardCreator extends AbstractShardCreator { - private static final long serialVersionUID = 1L; + protected void checkSealed() { + Preconditions.checkState(!sealed, "Builder isalready sealed - further modifications are not allowed"); + } - ShardCreator(final ShardIdentifier name, final Map peerAddresses, - final DatastoreContext datastoreContext, final SchemaContext schemaContext) { - super(name, peerAddresses, datastoreContext, schemaContext); + @SuppressWarnings("unchecked") + private T self() { + return (T) this; } - @Override - public Shard create() throws Exception { - return new Shard(name, peerAddresses, datastoreContext, schemaContext); + public T id(ShardIdentifier id) { + checkSealed(); + this.id = id; + return self(); } - } - @VisibleForTesting - public ShardDataTree getDataStore() { - return store; + public T peerAddresses(Map peerAddresses) { + checkSealed(); + this.peerAddresses = peerAddresses; + return self(); + } + + public T datastoreContext(DatastoreContext datastoreContext) { + checkSealed(); + this.datastoreContext = datastoreContext; + return self(); + } + + public T schemaContext(SchemaContext schemaContext) { + checkSealed(); + this.schemaContext = schemaContext; + return self(); + } + + public T restoreFromSnapshot(DatastoreSnapshot.ShardSnapshot restoreFromSnapshot) { + checkSealed(); + this.restoreFromSnapshot = restoreFromSnapshot; + return self(); + } + + public ShardIdentifier getId() { + return id; + } + + public Map getPeerAddresses() { + return peerAddresses; + } + + public DatastoreContext getDatastoreContext() { + return datastoreContext; + } + + public SchemaContext getSchemaContext() { + return schemaContext; + } + + public DatastoreSnapshot.ShardSnapshot getRestoreFromSnapshot() { + return restoreFromSnapshot; + } + + public TreeType getTreeType() { + switch (datastoreContext.getLogicalStoreType()) { + case CONFIGURATION: + return TreeType.CONFIGURATION; + case OPERATIONAL: + return TreeType.OPERATIONAL; + } + + throw new IllegalStateException("Unhandled logical store type " + datastoreContext.getLogicalStoreType()); + } + + protected void verify() { + Preconditions.checkNotNull(id, "id should not be null"); + Preconditions.checkNotNull(peerAddresses, "peerAddresses should not be null"); + Preconditions.checkNotNull(datastoreContext, "dataStoreContext should not be null"); + Preconditions.checkNotNull(schemaContext, "schemaContext should not be null"); + } + + public Props props() { + sealed = true; + verify(); + return Props.create(shardClass, this); + } } - @VisibleForTesting - ShardStats getShardMBean() { - return shardMBean; + public static class Builder extends AbstractBuilder { + private Builder() { + super(Shard.class); + } } }