X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2Fentityownership%2FEntityOwnershipShard.java;h=6e1d1a855a5ff0c99ec2df73ab2847fa398b0412;hp=1641b668c325804e1680cd3089f95a83e91c2d7b;hb=69584f4fa7b55eb89d28b3b1d8003b7c4918b5b6;hpb=16263ee5c532e75ca6f14ed5fa9053a38694af14 diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/entityownership/EntityOwnershipShard.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/entityownership/EntityOwnershipShard.java index 1641b668c3..6e1d1a855a 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/entityownership/EntityOwnershipShard.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/entityownership/EntityOwnershipShard.java @@ -18,32 +18,44 @@ import static org.opendaylight.controller.cluster.datastore.entityownership.Enti import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_TYPES_PATH; import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_TYPE_NODE_ID; import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_TYPE_QNAME; -import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.candidateMapEntry; import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.candidateNodeKey; import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.candidatePath; -import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.createEntity; import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.entityOwnersWithCandidate; + import akka.actor.ActorRef; import akka.actor.ActorSelection; -import akka.actor.Props; +import akka.actor.Cancellable; +import akka.cluster.Cluster; +import akka.cluster.ClusterEvent.CurrentClusterState; +import akka.cluster.Member; +import akka.cluster.MemberStatus; import akka.pattern.Patterns; -import com.google.common.base.Optional; +import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; +import org.opendaylight.controller.cluster.access.concepts.MemberName; import org.opendaylight.controller.cluster.datastore.DatastoreContext; import org.opendaylight.controller.cluster.datastore.Shard; import org.opendaylight.controller.cluster.datastore.entityownership.messages.CandidateAdded; import org.opendaylight.controller.cluster.datastore.entityownership.messages.CandidateRemoved; import org.opendaylight.controller.cluster.datastore.entityownership.messages.RegisterCandidateLocal; import org.opendaylight.controller.cluster.datastore.entityownership.messages.RegisterListenerLocal; +import org.opendaylight.controller.cluster.datastore.entityownership.messages.RemoveAllCandidates; +import org.opendaylight.controller.cluster.datastore.entityownership.messages.SelectOwner; import org.opendaylight.controller.cluster.datastore.entityownership.messages.UnregisterCandidateLocal; import org.opendaylight.controller.cluster.datastore.entityownership.messages.UnregisterListenerLocal; +import org.opendaylight.controller.cluster.datastore.entityownership.selectionstrategy.EntityOwnerSelectionStrategy; +import org.opendaylight.controller.cluster.datastore.entityownership.selectionstrategy.EntityOwnerSelectionStrategyConfig; import org.opendaylight.controller.cluster.datastore.identifiers.ShardIdentifier; import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications; import org.opendaylight.controller.cluster.datastore.messages.PeerDown; @@ -51,18 +63,20 @@ import org.opendaylight.controller.cluster.datastore.messages.PeerUp; import org.opendaylight.controller.cluster.datastore.messages.SuccessReply; import org.opendaylight.controller.cluster.datastore.modification.DeleteModification; import org.opendaylight.controller.cluster.datastore.modification.MergeModification; +import org.opendaylight.controller.cluster.datastore.modification.Modification; import org.opendaylight.controller.cluster.datastore.modification.WriteModification; -import org.opendaylight.controller.md.sal.common.api.clustering.Entity; +import org.opendaylight.controller.cluster.raft.RaftState; +import org.opendaylight.controller.cluster.raft.VotingState; +import org.opendaylight.mdsal.eos.dom.api.DOMEntity; import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier; import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier.PathArgument; import org.opendaylight.yangtools.yang.data.api.schema.DataContainerChild; import org.opendaylight.yangtools.yang.data.api.schema.MapEntryNode; import org.opendaylight.yangtools.yang.data.api.schema.MapNode; import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode; -import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTreeSnapshot; import org.opendaylight.yangtools.yang.data.impl.schema.ImmutableNodes; -import org.opendaylight.yangtools.yang.model.api.SchemaContext; import scala.concurrent.Future; +import scala.concurrent.duration.FiniteDuration; /** * Special Shard for EntityOwnership. @@ -70,31 +84,31 @@ import scala.concurrent.Future; * @author Thomas Pantelis */ class EntityOwnershipShard extends Shard { - private final String localMemberName; + private final MemberName localMemberName; private final EntityOwnershipShardCommitCoordinator commitCoordinator; private final EntityOwnershipListenerSupport listenerSupport; - private final Set downPeerMemberNames = new HashSet<>(); - private final Map peerIdToMemberNames = new HashMap<>(); - - private static DatastoreContext noPersistenceDatastoreContext(DatastoreContext datastoreContext) { - return DatastoreContext.newBuilderFrom(datastoreContext).persistent(false).build(); - } - - protected EntityOwnershipShard(ShardIdentifier name, Map peerAddresses, - DatastoreContext datastoreContext, SchemaContext schemaContext, String localMemberName) { - super(name, peerAddresses, noPersistenceDatastoreContext(datastoreContext), schemaContext); - this.localMemberName = localMemberName; - this.commitCoordinator = new EntityOwnershipShardCommitCoordinator(localMemberName, LOG); + private final Set downPeerMemberNames = new HashSet<>(); + private final EntityOwnerSelectionStrategyConfig strategyConfig; + private final Map entityToScheduledOwnershipTask = new HashMap<>(); + private final EntityOwnershipStatistics entityOwnershipStatistics; + private boolean removeAllInitialCandidates = true; + + protected EntityOwnershipShard(final Builder builder) { + super(builder); + this.localMemberName = builder.localMemberName; + this.commitCoordinator = new EntityOwnershipShardCommitCoordinator(builder.localMemberName, LOG); this.listenerSupport = new EntityOwnershipListenerSupport(getContext(), persistenceId()); + this.strategyConfig = builder.ownerSelectionStrategyConfig; + this.entityOwnershipStatistics = new EntityOwnershipStatistics(); + this.entityOwnershipStatistics.init(getDataStore()); + } - for(String peerId: peerAddresses.keySet()) { - ShardIdentifier shardId = ShardIdentifier.builder().fromShardIdString(peerId).build(); - peerIdToMemberNames.put(peerId, shardId.getMemberName()); - } + private static DatastoreContext noPersistenceDatastoreContext(final DatastoreContext datastoreContext) { + return DatastoreContext.newBuilderFrom(datastoreContext).persistent(false).build(); } @Override - protected void onDatastoreContext(DatastoreContext context) { + protected void onDatastoreContext(final DatastoreContext context) { super.onDatastoreContext(noPersistenceDatastoreContext(context)); } @@ -107,47 +121,72 @@ class EntityOwnershipShard extends Shard { } @Override - public void onReceiveCommand(final Object message) throws Exception { - if(message instanceof RegisterCandidateLocal) { - onRegisterCandidateLocal((RegisterCandidateLocal)message); - } else if(message instanceof UnregisterCandidateLocal) { - onUnregisterCandidateLocal((UnregisterCandidateLocal)message); - } else if(message instanceof CandidateAdded){ + public void handleNonRaftCommand(final Object message) { + if (message instanceof RegisterCandidateLocal) { + onRegisterCandidateLocal((RegisterCandidateLocal) message); + } else if (message instanceof UnregisterCandidateLocal) { + onUnregisterCandidateLocal((UnregisterCandidateLocal) message); + } else if (message instanceof CandidateAdded) { onCandidateAdded((CandidateAdded) message); - } else if(message instanceof CandidateRemoved){ + } else if (message instanceof CandidateRemoved) { onCandidateRemoved((CandidateRemoved) message); - } else if(message instanceof PeerDown) { + } else if (message instanceof PeerDown) { onPeerDown((PeerDown) message); - } else if(message instanceof PeerUp) { + } else if (message instanceof PeerUp) { onPeerUp((PeerUp) message); - } if(message instanceof RegisterListenerLocal) { - onRegisterListenerLocal((RegisterListenerLocal)message); - } if(message instanceof UnregisterListenerLocal) { - onUnregisterListenerLocal((UnregisterListenerLocal)message); - } else if(!commitCoordinator.handleMessage(message, this)) { - super.onReceiveCommand(message); + } else if (message instanceof RegisterListenerLocal) { + onRegisterListenerLocal((RegisterListenerLocal) message); + } else if (message instanceof UnregisterListenerLocal) { + onUnregisterListenerLocal((UnregisterListenerLocal) message); + } else if (message instanceof SelectOwner) { + onSelectOwner((SelectOwner) message); + } else if (message instanceof RemoveAllCandidates) { + onRemoveAllCandidates((RemoveAllCandidates) message); + } else if (!commitCoordinator.handleMessage(message, this)) { + super.handleNonRaftCommand(message); } } - private void onRegisterCandidateLocal(RegisterCandidateLocal registerCandidate) { - LOG.debug("{}: onRegisterCandidateLocal: {}", persistenceId(), registerCandidate); + private void onRemoveAllCandidates(final RemoveAllCandidates message) { + LOG.debug("{}: onRemoveAllCandidates: {}", persistenceId(), message); - listenerSupport.setHasCandidateForEntity(registerCandidate.getEntity()); + removeCandidateFromEntities(message.getMemberName()); + } + + private void onSelectOwner(final SelectOwner selectOwner) { + LOG.debug("{}: onSelectOwner: {}", persistenceId(), selectOwner); + + String currentOwner = getCurrentOwner(selectOwner.getEntityPath()); + if (Strings.isNullOrEmpty(currentOwner)) { + writeNewOwner(selectOwner.getEntityPath(), newOwner(currentOwner, selectOwner.getAllCandidates(), + selectOwner.getOwnerSelectionStrategy())); + + Cancellable cancellable = entityToScheduledOwnershipTask.get(selectOwner.getEntityPath()); + if (cancellable != null) { + if (!cancellable.isCancelled()) { + cancellable.cancel(); + } + entityToScheduledOwnershipTask.remove(selectOwner.getEntityPath()); + } + } + } + + private void onRegisterCandidateLocal(final RegisterCandidateLocal registerCandidate) { + LOG.debug("{}: onRegisterCandidateLocal: {}", persistenceId(), registerCandidate); NormalizedNode entityOwners = entityOwnersWithCandidate(registerCandidate.getEntity().getType(), - registerCandidate.getEntity().getId(), localMemberName); + registerCandidate.getEntity().getIdentifier(), localMemberName.getName()); commitCoordinator.commitModification(new MergeModification(ENTITY_OWNERS_PATH, entityOwners), this); getSender().tell(SuccessReply.INSTANCE, getSelf()); } - private void onUnregisterCandidateLocal(UnregisterCandidateLocal unregisterCandidate) { + private void onUnregisterCandidateLocal(final UnregisterCandidateLocal unregisterCandidate) { LOG.debug("{}: onUnregisterCandidateLocal: {}", persistenceId(), unregisterCandidate); - Entity entity = unregisterCandidate.getEntity(); - listenerSupport.unsetHasCandidateForEntity(entity); - - YangInstanceIdentifier candidatePath = candidatePath(entity.getType(), entity.getId(), localMemberName); + DOMEntity entity = unregisterCandidate.getEntity(); + YangInstanceIdentifier candidatePath = candidatePath(entity.getType(), entity.getIdentifier(), + localMemberName.getName()); commitCoordinator.commitModification(new DeleteModification(candidatePath), this); getSender().tell(SuccessReply.INSTANCE, getSelf()); @@ -160,32 +199,44 @@ class EntityOwnershipShard extends Shard { getSender().tell(SuccessReply.INSTANCE, getSelf()); - searchForEntitiesOwnedBy(localMemberName, new EntityWalker() { - @Override - public void onEntity(MapEntryNode entityTypeNode, MapEntryNode entityNode) { - Optional> possibleType = - entityTypeNode.getChild(ENTITY_TYPE_NODE_ID); - String entityType = possibleType.isPresent() ? possibleType.get().getValue().toString() : null; - if(registerListener.getEntityType().equals(entityType)) { - Entity entity = new Entity(entityType, - (YangInstanceIdentifier) entityNode.getChild(ENTITY_ID_NODE_ID).get().getValue()); - listenerSupport.notifyEntityOwnershipListener(entity, false, true, true, registerListener.getListener()); + searchForEntities((entityTypeNode, entityNode) -> { + Optional> possibleType = entityTypeNode.getChild(ENTITY_TYPE_NODE_ID); + String entityType = possibleType.isPresent() ? possibleType.get().getValue().toString() : null; + if (registerListener.getEntityType().equals(entityType)) { + final boolean hasOwner; + final boolean isOwner; + + Optional> possibleOwner = entityNode.getChild(ENTITY_OWNER_NODE_ID); + if (possibleOwner.isPresent()) { + isOwner = localMemberName.getName().equals(possibleOwner.get().getValue().toString()); + hasOwner = true; + } else { + isOwner = false; + hasOwner = false; } + + DOMEntity entity = new DOMEntity(entityType, + (YangInstanceIdentifier) entityNode.getChild(ENTITY_ID_NODE_ID).get().getValue()); + + listenerSupport.notifyEntityOwnershipListener(entity, false, isOwner, hasOwner, + registerListener.getListener()); } }); } - private void onUnregisterListenerLocal(UnregisterListenerLocal unregisterListener) { + private void onUnregisterListenerLocal(final UnregisterListenerLocal unregisterListener) { LOG.debug("{}: onUnregisterListenerLocal: {}", persistenceId(), unregisterListener); - listenerSupport.removeEntityOwnershipListener(unregisterListener.getEntityType(), unregisterListener.getListener()); + listenerSupport.removeEntityOwnershipListener(unregisterListener.getEntityType(), + unregisterListener.getListener()); getSender().tell(SuccessReply.INSTANCE, getSelf()); } void tryCommitModifications(final BatchedModifications modifications) { - if(isLeader()) { - LOG.debug("{}: Committing BatchedModifications {} locally", persistenceId(), modifications.getTransactionID()); + if (isLeader()) { + LOG.debug("{}: Committing BatchedModifications {} locally", persistenceId(), + modifications.getTransactionId()); // Note that it's possible the commit won't get consensus and will timeout and not be applied // to the state. However we don't need to retry it in that case b/c it will be committed to @@ -195,10 +246,10 @@ class EntityOwnershipShard extends Shard { } else { final ActorSelection leader = getLeader(); if (leader != null) { - if(LOG.isDebugEnabled()) { - LOG.debug("{}: Sending BatchedModifications {} to leader {}", persistenceId(), - modifications.getTransactionID(), leader); - } + possiblyRemoveAllInitialCandidates(leader); + + LOG.debug("{}: Sending BatchedModifications {} to leader {}", persistenceId(), + modifications.getTransactionId(), leader); Future future = Patterns.ask(leader, modifications, TimeUnit.SECONDS.toMillis( getDatastoreContext().getShardTransactionCommitTimeoutInSeconds())); @@ -208,66 +259,196 @@ class EntityOwnershipShard extends Shard { } } + void possiblyRemoveAllInitialCandidates(final ActorSelection leader) { + // The following handles removing all candidates on startup when re-joining with a remote leader. When a + // follower is detected as down, the leader will re-assign new owners to entities that were owned by the + // down member but doesn't remove the down member as a candidate, as the down node may actually be isolated + // and still running. Therefore on startup we send an initial message to the remote leader to remove any + // potential stale candidates we had previously registered, as it's possible a candidate may not be + // registered by a client in the new incarnation. We have to send the RemoveAllCandidates message prior to any + // pending registrations. + if (removeAllInitialCandidates && leader != null) { + removeAllInitialCandidates = false; + if (!isLeader()) { + LOG.debug("{} - got new leader {} on startup - sending RemoveAllCandidates", persistenceId(), leader); + + leader.tell(new RemoveAllCandidates(localMemberName), ActorRef.noSender()); + } + } + } + boolean hasLeader() { - return getLeader() != null && !isIsolatedLeader(); + return getLeader() != null && (!isLeader() || isLeaderActive()); + } + + /** + * Determine if we are in jeopardy based on observed RAFT state. + */ + private static boolean inJeopardy(final RaftState state) { + switch (state) { + case Candidate: + case Follower: + case Leader: + case PreLeader: + return false; + case IsolatedLeader: + return true; + default: + throw new IllegalStateException("Unsupported RAFT state " + state); + } + } + + private void notifyAllListeners() { + searchForEntities((entityTypeNode, entityNode) -> { + Optional> possibleType = entityTypeNode.getChild(ENTITY_TYPE_NODE_ID); + if (possibleType.isPresent()) { + final boolean hasOwner; + final boolean isOwner; + + Optional> possibleOwner = entityNode.getChild(ENTITY_OWNER_NODE_ID); + if (possibleOwner.isPresent()) { + isOwner = localMemberName.getName().equals(possibleOwner.get().getValue().toString()); + hasOwner = true; + } else { + isOwner = false; + hasOwner = false; + } + + DOMEntity entity = new DOMEntity(possibleType.get().getValue().toString(), + (YangInstanceIdentifier) entityNode.getChild(ENTITY_ID_NODE_ID).get().getValue()); + + listenerSupport.notifyEntityOwnershipListeners(entity, isOwner, isOwner, hasOwner); + } + }); } @Override protected void onStateChanged() { - super.onStateChanged(); + boolean isLeader = isLeader(); + LOG.debug("{}: onStateChanged: isLeader: {}, hasLeader: {}", persistenceId(), isLeader, hasLeader()); + + // Examine current RAFT state to see if we are in jeopardy, potentially notifying all listeners + final boolean inJeopardy = inJeopardy(getRaftState()); + final boolean wasInJeopardy = listenerSupport.setInJeopardy(inJeopardy); + if (inJeopardy != wasInJeopardy) { + LOG.debug("{}: {} jeopardy state, notifying all listeners", persistenceId(), + inJeopardy ? "entered" : "left"); + notifyAllListeners(); + } - commitCoordinator.onStateChanged(this, isLeader()); + commitCoordinator.onStateChanged(this, isLeader); + + super.onStateChanged(); } @Override - protected void onLeaderChanged(String oldLeader, String newLeader) { + protected void onLeaderChanged(final String oldLeader, final String newLeader) { + boolean isLeader = isLeader(); + LOG.debug("{}: onLeaderChanged: oldLeader: {}, newLeader: {}, isLeader: {}", persistenceId(), oldLeader, + newLeader, isLeader); + + if (isLeader) { + + // Re-initialize the downPeerMemberNames from the current akka Cluster state. The previous leader, if any, + // is most likely down however it's possible we haven't received the PeerDown message yet. + initializeDownPeerMemberNamesFromClusterState(); + + // Clear all existing strategies so that they get re-created when we call createStrategy again + // This allows the strategies to be re-initialized with existing statistics maintained by + // EntityOwnershipStatistics + strategyConfig.clearStrategies(); + + // Re-assign owners for all members that are known to be down. In a cluster which has greater than + // 3 nodes it is possible for some node beside the leader being down when the leadership transitions + // it makes sense to use this event to re-assign owners for those downed nodes. + Set ownedBy = new HashSet<>(downPeerMemberNames.size() + 1); + for (MemberName downPeerName : downPeerMemberNames) { + ownedBy.add(downPeerName.getName()); + } + + // Also try to assign owners for entities that have no current owner. See explanation in onPeerUp. + ownedBy.add(""); + selectNewOwnerForEntitiesOwnedBy(ownedBy); + } else { + // The leader changed - notify the coordinator to check if pending modifications need to be sent. + // While onStateChanged also does this, this method handles the case where the shard hears from a + // leader and stays in the follower state. In that case no behavior state change occurs. + commitCoordinator.onStateChanged(this, isLeader); + } + super.onLeaderChanged(oldLeader, newLeader); + } - LOG.debug("{}: onLeaderChanged: oldLeader: {}, newLeader: {}, isLeader: {}", persistenceId(), oldLeader, - newLeader, isLeader()); + @Override + protected void onVotingStateChangeComplete() { + // Re-evaluate ownership for all entities - if a member changed from voting to non-voting it should lose + // ownership and vice versa it now is a candidate to become owner. + final List modifications = new ArrayList<>(); + searchForEntities((entityTypeNode, entityNode) -> { + YangInstanceIdentifier entityPath = YangInstanceIdentifier.builder(ENTITY_TYPES_PATH) + .node(entityTypeNode.getIdentifier()).node(ENTITY_NODE_ID).node(entityNode.getIdentifier()) + .node(ENTITY_OWNER_NODE_ID).build(); + + Optional possibleOwner = + entityNode.getChild(ENTITY_OWNER_NODE_ID).map(node -> node.getValue().toString()); + String newOwner = newOwner(possibleOwner.orElse(null), getCandidateNames(entityNode), + getEntityOwnerElectionStrategy(entityPath)); + + if (!newOwner.equals(possibleOwner.orElse(""))) { + modifications.add(new WriteModification(entityPath, + ImmutableNodes.leafNode(ENTITY_OWNER_NODE_ID, newOwner))); + } + }); + + commitCoordinator.commitModifications(modifications, this); + } - if(isLeader()) { - // We were just elected leader. If the old leader is down, select new owners for the entities - // owned by the down leader. + private void initializeDownPeerMemberNamesFromClusterState() { + Optional cluster = getRaftActorContext().getCluster(); + if (!cluster.isPresent()) { + return; + } - String oldLeaderMemberName = peerIdToMemberNames.get(oldLeader); + CurrentClusterState state = cluster.get().state(); + Set unreachable = state.getUnreachable(); - LOG.debug("{}: oldLeaderMemberName: {}", persistenceId(), oldLeaderMemberName); + LOG.debug( + "{}: initializeDownPeerMemberNamesFromClusterState - current downPeerMemberNames: {}, unreachable: {}", + persistenceId(), downPeerMemberNames, unreachable); - if(downPeerMemberNames.contains(oldLeaderMemberName)) { - selectNewOwnerForEntitiesOwnedBy(oldLeaderMemberName); + downPeerMemberNames.clear(); + for (Member m: unreachable) { + downPeerMemberNames.add(MemberName.forName(m.getRoles().iterator().next())); + } + + for (Member m: state.getMembers()) { + if (m.status() != MemberStatus.up() && m.status() != MemberStatus.weaklyUp()) { + LOG.debug("{}: Adding down member with status {}", persistenceId(), m.status()); + downPeerMemberNames.add(MemberName.forName(m.getRoles().iterator().next())); } } + + LOG.debug("{}: new downPeerMemberNames: {}", persistenceId(), downPeerMemberNames); } - private void onCandidateRemoved(CandidateRemoved message) { + private void onCandidateRemoved(final CandidateRemoved message) { LOG.debug("{}: onCandidateRemoved: {}", persistenceId(), message); - if(isLeader()) { + if (isLeader()) { String currentOwner = getCurrentOwner(message.getEntityPath()); - if(message.getRemovedCandidate().equals(currentOwner)){ - writeNewOwner(message.getEntityPath(), newOwner(message.getRemainingCandidates())); - } - } else { - // We're not the leader. If the removed candidate is our local member then check if we actually - // have a local candidate registered. If we do then we must have been partitioned from the leader - // and the leader removed our candidate since the leader can't tell the difference between a - // temporary network partition and a node's process actually restarted. So, in that case, re-add - // our candidate. - if(localMemberName.equals(message.getRemovedCandidate()) && - listenerSupport.hasCandidateForEntity(createEntity(message.getEntityPath()))) { - LOG.debug("Local candidate member was removed but a local candidate is registered for {}" + - " - adding back local candidate", message.getEntityPath()); - - commitCoordinator.commitModification(new MergeModification( - candidatePath(message.getEntityPath(), localMemberName), - candidateMapEntry(localMemberName)), this); - } + writeNewOwner(message.getEntityPath(), + newOwner(currentOwner, message.getRemainingCandidates(), + getEntityOwnerElectionStrategy(message.getEntityPath()))); } } - private void onCandidateAdded(CandidateAdded message) { - if(!isLeader()){ + private EntityOwnerSelectionStrategy getEntityOwnerElectionStrategy(final YangInstanceIdentifier entityPath) { + final String entityType = EntityOwnersModel.entityTypeFromEntityPath(entityPath); + return strategyConfig.createStrategy(entityType, entityOwnershipStatistics.byEntityType(entityType)); + } + + private void onCandidateAdded(final CandidateAdded message) { + if (!isLeader()) { return; } @@ -275,179 +456,257 @@ class EntityOwnershipShard extends Shard { // Since a node's candidate member is only added by the node itself, we can assume the node is up so // remove it from the downPeerMemberNames. - downPeerMemberNames.remove(message.getNewCandidate()); - - String currentOwner = getCurrentOwner(message.getEntityPath()); - if(Strings.isNullOrEmpty(currentOwner)){ - writeNewOwner(message.getEntityPath(), newOwner(message.getAllCandidates())); + downPeerMemberNames.remove(MemberName.forName(message.getNewCandidate())); + + final String currentOwner = getCurrentOwner(message.getEntityPath()); + final EntityOwnerSelectionStrategy strategy = getEntityOwnerElectionStrategy(message.getEntityPath()); + + // Available members is all the known peers - the number of peers that are down + self + // So if there are 2 peers and 1 is down then availableMembers will be 2 + final int availableMembers = getRaftActorContext().getPeerIds().size() - downPeerMemberNames.size() + 1; + + LOG.debug("{}: Using strategy {} to select owner, currentOwner = {}", persistenceId(), strategy, currentOwner); + + if (strategy.getSelectionDelayInMillis() == 0L) { + writeNewOwner(message.getEntityPath(), newOwner(currentOwner, message.getAllCandidates(), + strategy)); + } else if (message.getAllCandidates().size() == availableMembers) { + LOG.debug("{}: Received the maximum candidates requests : {} writing new owner", + persistenceId(), availableMembers); + cancelOwnerSelectionTask(message.getEntityPath()); + writeNewOwner(message.getEntityPath(), newOwner(currentOwner, message.getAllCandidates(), + strategy)); + } else { + scheduleOwnerSelection(message.getEntityPath(), message.getAllCandidates(), strategy); } } - private void onPeerDown(PeerDown peerDown) { + private void onPeerDown(final PeerDown peerDown) { LOG.info("{}: onPeerDown: {}", persistenceId(), peerDown); - String downMemberName = peerDown.getMemberName(); - if(downPeerMemberNames.add(downMemberName) && isLeader()) { - // Remove the down peer as a candidate from all entities. - removeCandidateFromEntities(downMemberName); - } - } - - private void onPeerUp(PeerUp peerUp) { - LOG.debug("{}: onPeerUp: {}", persistenceId(), peerUp); + MemberName downMemberName = peerDown.getMemberName(); + if (downPeerMemberNames.add(downMemberName) && isLeader()) { + // Select new owners for entities owned by the down peer and which have other candidates. For an entity for + // which the down peer is the only candidate, we leave it as the owner and don't clear it. This is done to + // handle the case where the peer member process is actually still running but the node is partitioned. + // When the partition is healed, the peer just remains as the owner. If the peer process actually restarted, + // it will first remove all its candidates on startup. If another candidate is registered during the time + // the peer is down, the new candidate will be selected as the new owner. - peerIdToMemberNames.put(peerUp.getPeerId(), peerUp.getMemberName()); - downPeerMemberNames.remove(peerUp.getMemberName()); + selectNewOwnerForEntitiesOwnedBy(ImmutableSet.of(downMemberName.getName())); + } } - private void selectNewOwnerForEntitiesOwnedBy(String owner) { - final BatchedModifications modifications = commitCoordinator.newBatchedModifications(); - searchForEntitiesOwnedBy(owner, new EntityWalker() { - @Override - public void onEntity(MapEntryNode entityTypeNode, MapEntryNode entityNode) { - Object newOwner = newOwner(getCandidateNames(entityNode)); - YangInstanceIdentifier entityPath = YangInstanceIdentifier.builder(ENTITY_TYPES_PATH). - node(entityTypeNode.getIdentifier()).node(ENTITY_NODE_ID).node(entityNode.getIdentifier()). - node(ENTITY_OWNER_NODE_ID).build(); + private void selectNewOwnerForEntitiesOwnedBy(final Set ownedBy) { + final List modifications = new ArrayList<>(); + searchForEntitiesOwnedBy(ownedBy, (entityTypeNode, entityNode) -> { + YangInstanceIdentifier entityPath = YangInstanceIdentifier.builder(ENTITY_TYPES_PATH) + .node(entityTypeNode.getIdentifier()).node(ENTITY_NODE_ID).node(entityNode.getIdentifier()) + .node(ENTITY_OWNER_NODE_ID).build(); + String newOwner = newOwner(getCurrentOwner(entityPath), getCandidateNames(entityNode), + getEntityOwnerElectionStrategy(entityPath)); + if (!newOwner.isEmpty()) { LOG.debug("{}: Found entity {}, writing new owner {}", persistenceId(), entityPath, newOwner); - modifications.addModification(new WriteModification(entityPath, - ImmutableNodes.leafNode(ENTITY_OWNER_NODE_ID, newOwner))); + modifications.add(new WriteModification(entityPath, + ImmutableNodes.leafNode(ENTITY_OWNER_NODE_ID, newOwner))); + + } else { + LOG.debug("{}: Found entity {} but no other candidates - not clearing owner", persistenceId(), + entityPath); } }); commitCoordinator.commitModifications(modifications, this); } - private void removeCandidateFromEntities(final String owner) { - final BatchedModifications modifications = commitCoordinator.newBatchedModifications(); - searchForEntities(new EntityWalker() { - @Override - public void onEntity(MapEntryNode entityTypeNode, MapEntryNode entityNode) { - if(hasCandidate(entityNode, owner)) { - YangInstanceIdentifier entityId = - (YangInstanceIdentifier)entityNode.getIdentifier().getKeyValues().get(ENTITY_ID_QNAME); - YangInstanceIdentifier candidatePath = candidatePath( - entityTypeNode.getIdentifier().getKeyValues().get(ENTITY_TYPE_QNAME).toString(), - entityId, owner); + private void onPeerUp(final PeerUp peerUp) { + LOG.debug("{}: onPeerUp: {}", persistenceId(), peerUp); - LOG.info("{}: Found entity {}, removing candidate {}, path {}", persistenceId(), entityId, - owner, candidatePath); + downPeerMemberNames.remove(peerUp.getMemberName()); - modifications.addModification(new DeleteModification(candidatePath)); - } - } - }); + // Notify the coordinator to check if pending modifications need to be sent. We do this here + // to handle the case where the leader's peer address isn't known yet when a prior state or + // leader change occurred. + commitCoordinator.onStateChanged(this, isLeader()); - commitCoordinator.commitModifications(modifications, this); + if (isLeader()) { + // Try to assign owners for entities that have no current owner. It's possible the peer that is now up + // had previously registered as a candidate and was the only candidate but the owner write tx couldn't be + // committed due to a leader change. Eg, the leader is able to successfully commit the candidate add tx but + // becomes isolated before it can commit the owner change and switches to follower. The majority partition + // with a new leader has the candidate but the entity has no owner. When the partition is healed and the + // previously isolated leader reconnects, we'll receive onPeerUp and, if there's still no owner, the + // previous leader will gain ownership. + selectNewOwnerForEntitiesOwnedBy(ImmutableSet.of("")); + } } - private boolean hasCandidate(MapEntryNode entity, String candidateName) { - return ((MapNode)entity.getChild(CANDIDATE_NODE_ID).get()).getChild(candidateNodeKey(candidateName)).isPresent(); + private static Collection getCandidateNames(final MapEntryNode entity) { + return entity.getChild(CANDIDATE_NODE_ID).map(child -> { + Collection candidates = ((MapNode) child).getValue(); + Collection candidateNames = new ArrayList<>(candidates.size()); + for (MapEntryNode candidate: candidates) { + candidateNames.add(candidate.getChild(CANDIDATE_NAME_NODE_ID).get().getValue().toString()); + } + return candidateNames; + }).orElse(ImmutableList.of()); } - private void searchForEntitiesOwnedBy(final String owner, final EntityWalker walker) { - DataTreeSnapshot snapshot = getDataStore().getDataTree().takeSnapshot(); - Optional> possibleEntityTypes = snapshot.readNode(ENTITY_TYPES_PATH); - if(!possibleEntityTypes.isPresent()) { - return; - } + private void searchForEntitiesOwnedBy(final Set ownedBy, final EntityWalker walker) { + LOG.debug("{}: Searching for entities owned by {}", persistenceId(), ownedBy); - LOG.debug("{}: Searching for entities owned by {}", persistenceId(), owner); + searchForEntities((entityTypeNode, entityNode) -> { + Optional> possibleOwner = + entityNode.getChild(ENTITY_OWNER_NODE_ID); + String currentOwner = possibleOwner.isPresent() ? possibleOwner.get().getValue().toString() : ""; + if (ownedBy.contains(currentOwner)) { + walker.onEntity(entityTypeNode, entityNode); + } + }); + } - searchForEntities(new EntityWalker() { - @Override - public void onEntity(MapEntryNode entityTypeNode, MapEntryNode entityNode) { - Optional> possibleOwner = - entityNode.getChild(ENTITY_OWNER_NODE_ID); - if(possibleOwner.isPresent() && owner.equals(possibleOwner.get().getValue().toString())) { - walker.onEntity(entityTypeNode, entityNode); - } + private void removeCandidateFromEntities(final MemberName member) { + final List modifications = new ArrayList<>(); + searchForEntities((entityTypeNode, entityNode) -> { + if (hasCandidate(entityNode, member)) { + YangInstanceIdentifier entityId = (YangInstanceIdentifier) entityNode.getIdentifier() + .getValue(ENTITY_ID_QNAME); + YangInstanceIdentifier candidatePath = candidatePath(entityTypeNode.getIdentifier() + .getValue(ENTITY_TYPE_QNAME).toString(), entityId, member.getName()); + + LOG.info("{}: Found entity {}, removing candidate {}, path {}", persistenceId(), entityId, + member, candidatePath); + + modifications.add(new DeleteModification(candidatePath)); } }); + + commitCoordinator.commitModifications(modifications, this); } - private void searchForEntities(EntityWalker walker) { - DataTreeSnapshot snapshot = getDataStore().getDataTree().takeSnapshot(); - Optional> possibleEntityTypes = snapshot.readNode(ENTITY_TYPES_PATH); - if(!possibleEntityTypes.isPresent()) { + private static boolean hasCandidate(final MapEntryNode entity, final MemberName candidateName) { + return entity.getChild(CANDIDATE_NODE_ID) + .flatMap(child -> ((MapNode)child).getChild(candidateNodeKey(candidateName.getName()))) + .isPresent(); + } + + private void searchForEntities(final EntityWalker walker) { + Optional> possibleEntityTypes = getDataStore().readNode(ENTITY_TYPES_PATH); + if (!possibleEntityTypes.isPresent()) { return; } - for(MapEntryNode entityType: ((MapNode) possibleEntityTypes.get()).getValue()) { - Optional> possibleEntities = - entityType.getChild(ENTITY_NODE_ID); - if(!possibleEntities.isPresent()) { - continue; // shouldn't happen but handle anyway + for (MapEntryNode entityType : ((MapNode) possibleEntityTypes.get()).getValue()) { + Optional> possibleEntities = entityType.getChild(ENTITY_NODE_ID); + if (!possibleEntities.isPresent()) { + // shouldn't happen but handle anyway + continue; } - for(MapEntryNode entity: ((MapNode) possibleEntities.get()).getValue()) { + for (MapEntryNode entity: ((MapNode) possibleEntities.get()).getValue()) { walker.onEntity(entityType, entity); } } } - private Collection getCandidateNames(MapEntryNode entity) { - Collection candidates = ((MapNode)entity.getChild(CANDIDATE_NODE_ID).get()).getValue(); - Collection candidateNames = new ArrayList<>(candidates.size()); - for(MapEntryNode candidate: candidates) { - candidateNames.add(candidate.getChild(CANDIDATE_NAME_NODE_ID).get().getValue().toString()); - } - - return candidateNames; - } - - private void writeNewOwner(YangInstanceIdentifier entityPath, String newOwner) { + private void writeNewOwner(final YangInstanceIdentifier entityPath, final String newOwner) { LOG.debug("{}: Writing new owner {} for entity {}", persistenceId(), newOwner, entityPath); commitCoordinator.commitModification(new WriteModification(entityPath.node(ENTITY_OWNER_QNAME), ImmutableNodes.leafNode(ENTITY_OWNER_NODE_ID, newOwner)), this); } - private String newOwner(Collection candidates) { - for(String candidate: candidates) { - if(!downPeerMemberNames.contains(candidate)) { - return candidate; - } - } + /** + * Schedule a new owner selection job. Cancelling any outstanding job if it has not been cancelled. + */ + private void scheduleOwnerSelection(final YangInstanceIdentifier entityPath, final Collection allCandidates, + final EntityOwnerSelectionStrategy strategy) { + cancelOwnerSelectionTask(entityPath); - return ""; + LOG.debug("{}: Scheduling owner selection after {} ms", persistenceId(), strategy.getSelectionDelayInMillis()); + + final Cancellable lastScheduledTask = context().system().scheduler().scheduleOnce( + FiniteDuration.apply(strategy.getSelectionDelayInMillis(), TimeUnit.MILLISECONDS), self(), + new SelectOwner(entityPath, allCandidates, strategy), context().system().dispatcher(), self()); + + entityToScheduledOwnershipTask.put(entityPath, lastScheduledTask); } - private String getCurrentOwner(YangInstanceIdentifier entityId) { - DataTreeSnapshot snapshot = getDataStore().getDataTree().takeSnapshot(); - Optional> optionalEntityOwner = snapshot.readNode(entityId.node(ENTITY_OWNER_QNAME)); - if(optionalEntityOwner.isPresent()){ - return optionalEntityOwner.get().getValue().toString(); + private void cancelOwnerSelectionTask(final YangInstanceIdentifier entityPath) { + final Cancellable lastScheduledTask = entityToScheduledOwnershipTask.get(entityPath); + if (lastScheduledTask != null && !lastScheduledTask.isCancelled()) { + lastScheduledTask.cancel(); } - return null; } - public static Props props(final ShardIdentifier name, final Map peerAddresses, - final DatastoreContext datastoreContext, final SchemaContext schemaContext, final String localMemberName) { - return Props.create(new Creator(name, peerAddresses, datastoreContext, schemaContext, localMemberName)); + private String newOwner(final String currentOwner, final Collection candidates, + final EntityOwnerSelectionStrategy ownerSelectionStrategy) { + Collection viableCandidates = getViableCandidates(candidates); + if (viableCandidates.isEmpty()) { + return ""; + } + return ownerSelectionStrategy.newOwner(currentOwner, viableCandidates); } - private static class Creator extends AbstractShardCreator { - private static final long serialVersionUID = 1L; + private Collection getViableCandidates(final Collection candidates) { + Map memberToVotingState = new HashMap<>(); + getRaftActorContext().getPeers().forEach(peerInfo -> memberToVotingState.put( + ShardIdentifier.fromShardIdString(peerInfo.getId()).getMemberName(), peerInfo.getVotingState())); - private final String localMemberName; + Collection viableCandidates = new ArrayList<>(); - Creator(final ShardIdentifier name, final Map peerAddresses, - final DatastoreContext datastoreContext, final SchemaContext schemaContext, - final String localMemberName) { - super(name, peerAddresses, datastoreContext, schemaContext); - this.localMemberName = localMemberName; + for (String candidate : candidates) { + MemberName memberName = MemberName.forName(candidate); + if (memberToVotingState.get(memberName) != VotingState.NON_VOTING + && !downPeerMemberNames.contains(memberName)) { + viableCandidates.add(candidate); + } } + return viableCandidates; + } - @Override - public Shard create() throws Exception { - return new EntityOwnershipShard(name, peerAddresses, datastoreContext, schemaContext, localMemberName); - } + private String getCurrentOwner(final YangInstanceIdentifier entityId) { + return getDataStore().readNode(entityId.node(ENTITY_OWNER_QNAME)) + .map(owner -> owner.getValue().toString()) + .orElse(null); } - private static interface EntityWalker { + @FunctionalInterface + private interface EntityWalker { void onEntity(MapEntryNode entityTypeNode, MapEntryNode entityNode); } + + public static Builder newBuilder() { + return new Builder(); + } + + static class Builder extends Shard.AbstractBuilder { + private MemberName localMemberName; + private EntityOwnerSelectionStrategyConfig ownerSelectionStrategyConfig; + + protected Builder() { + super(EntityOwnershipShard.class); + } + + Builder localMemberName(final MemberName newLocalMemberName) { + checkSealed(); + this.localMemberName = newLocalMemberName; + return this; + } + + Builder ownerSelectionStrategyConfig(final EntityOwnerSelectionStrategyConfig newOwnerSelectionStrategyConfig) { + checkSealed(); + this.ownerSelectionStrategyConfig = newOwnerSelectionStrategyConfig; + return this; + } + + @Override + protected void verify() { + super.verify(); + Preconditions.checkNotNull(localMemberName, "localMemberName should not be null"); + Preconditions.checkNotNull(ownerSelectionStrategyConfig, "ownerSelectionStrategyConfig should not be null"); + } + } }