Bug 6540: EOS - handle edge case with pruning pending owner change commits
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / datastore / entityownership / EntityOwnershipShard.java
1 /*
2  * Copyright (c) 2015 Brocade Communications Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.datastore.entityownership;
9
10 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.CANDIDATE_NAME_NODE_ID;
11 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.CANDIDATE_NODE_ID;
12 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_ID_NODE_ID;
13 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_ID_QNAME;
14 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_NODE_ID;
15 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_OWNERS_PATH;
16 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_OWNER_NODE_ID;
17 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_OWNER_QNAME;
18 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_TYPES_PATH;
19 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_TYPE_NODE_ID;
20 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.ENTITY_TYPE_QNAME;
21 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.candidateNodeKey;
22 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.candidatePath;
23 import static org.opendaylight.controller.cluster.datastore.entityownership.EntityOwnersModel.entityOwnersWithCandidate;
24 import akka.actor.ActorRef;
25 import akka.actor.ActorSelection;
26 import akka.actor.Cancellable;
27 import akka.cluster.Cluster;
28 import akka.cluster.Member;
29 import akka.cluster.MemberStatus;
30 import akka.cluster.ClusterEvent.CurrentClusterState;
31 import akka.pattern.Patterns;
32 import com.google.common.base.Optional;
33 import com.google.common.base.Preconditions;
34 import com.google.common.base.Strings;
35 import com.google.common.collect.ImmutableSet;
36 import java.util.ArrayList;
37 import java.util.Collection;
38 import java.util.HashMap;
39 import java.util.HashSet;
40 import java.util.Map;
41 import java.util.Set;
42 import java.util.concurrent.TimeUnit;
43 import org.opendaylight.controller.cluster.access.concepts.MemberName;
44 import org.opendaylight.controller.cluster.datastore.DatastoreContext;
45 import org.opendaylight.controller.cluster.datastore.Shard;
46 import org.opendaylight.controller.cluster.datastore.entityownership.messages.CandidateAdded;
47 import org.opendaylight.controller.cluster.datastore.entityownership.messages.CandidateRemoved;
48 import org.opendaylight.controller.cluster.datastore.entityownership.messages.RegisterCandidateLocal;
49 import org.opendaylight.controller.cluster.datastore.entityownership.messages.RegisterListenerLocal;
50 import org.opendaylight.controller.cluster.datastore.entityownership.messages.RemoveAllCandidates;
51 import org.opendaylight.controller.cluster.datastore.entityownership.messages.SelectOwner;
52 import org.opendaylight.controller.cluster.datastore.entityownership.messages.UnregisterCandidateLocal;
53 import org.opendaylight.controller.cluster.datastore.entityownership.messages.UnregisterListenerLocal;
54 import org.opendaylight.controller.cluster.datastore.entityownership.selectionstrategy.EntityOwnerSelectionStrategy;
55 import org.opendaylight.controller.cluster.datastore.entityownership.selectionstrategy.EntityOwnerSelectionStrategyConfig;
56 import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications;
57 import org.opendaylight.controller.cluster.datastore.messages.PeerDown;
58 import org.opendaylight.controller.cluster.datastore.messages.PeerUp;
59 import org.opendaylight.controller.cluster.datastore.messages.SuccessReply;
60 import org.opendaylight.controller.cluster.datastore.modification.DeleteModification;
61 import org.opendaylight.controller.cluster.datastore.modification.MergeModification;
62 import org.opendaylight.controller.cluster.datastore.modification.WriteModification;
63 import org.opendaylight.controller.cluster.raft.RaftState;
64 import org.opendaylight.mdsal.eos.dom.api.DOMEntity;
65 import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier;
66 import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier.PathArgument;
67 import org.opendaylight.yangtools.yang.data.api.schema.DataContainerChild;
68 import org.opendaylight.yangtools.yang.data.api.schema.MapEntryNode;
69 import org.opendaylight.yangtools.yang.data.api.schema.MapNode;
70 import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode;
71 import org.opendaylight.yangtools.yang.data.impl.schema.ImmutableNodes;
72 import scala.concurrent.Future;
73 import scala.concurrent.duration.FiniteDuration;
74
75 /**
76  * Special Shard for EntityOwnership.
77  *
78  * @author Thomas Pantelis
79  */
80 class EntityOwnershipShard extends Shard {
81     private final MemberName localMemberName;
82     private final EntityOwnershipShardCommitCoordinator commitCoordinator;
83     private final EntityOwnershipListenerSupport listenerSupport;
84     private final Set<MemberName> downPeerMemberNames = new HashSet<>();
85     private final EntityOwnerSelectionStrategyConfig strategyConfig;
86     private final Map<YangInstanceIdentifier, Cancellable> entityToScheduledOwnershipTask = new HashMap<>();
87     private final EntityOwnershipStatistics entityOwnershipStatistics;
88     private boolean removeAllInitialCandidates = true;
89
90     private static DatastoreContext noPersistenceDatastoreContext(DatastoreContext datastoreContext) {
91         return DatastoreContext.newBuilderFrom(datastoreContext).persistent(false).build();
92     }
93
94     protected EntityOwnershipShard(Builder builder) {
95         super(builder);
96         this.localMemberName = builder.localMemberName;
97         this.commitCoordinator = new EntityOwnershipShardCommitCoordinator(builder.localMemberName, LOG);
98         this.listenerSupport = new EntityOwnershipListenerSupport(getContext(), persistenceId());
99         this.strategyConfig = builder.ownerSelectionStrategyConfig;
100         this.entityOwnershipStatistics = new EntityOwnershipStatistics();
101         this.entityOwnershipStatistics.init(getDataStore());
102     }
103
104     @Override
105     protected void onDatastoreContext(DatastoreContext context) {
106         super.onDatastoreContext(noPersistenceDatastoreContext(context));
107     }
108
109     @Override
110     protected void onRecoveryComplete() {
111         super.onRecoveryComplete();
112
113         new CandidateListChangeListener(getSelf(), persistenceId()).init(getDataStore());
114         new EntityOwnerChangeListener(localMemberName, listenerSupport).init(getDataStore());
115     }
116
117     @Override
118     public void handleNonRaftCommand(final Object message) {
119         if(message instanceof RegisterCandidateLocal) {
120             onRegisterCandidateLocal((RegisterCandidateLocal) message);
121         } else if(message instanceof UnregisterCandidateLocal) {
122             onUnregisterCandidateLocal((UnregisterCandidateLocal)message);
123         } else if(message instanceof CandidateAdded){
124             onCandidateAdded((CandidateAdded) message);
125         } else if(message instanceof CandidateRemoved){
126             onCandidateRemoved((CandidateRemoved) message);
127         } else if(message instanceof PeerDown) {
128             onPeerDown((PeerDown) message);
129         } else if(message instanceof PeerUp) {
130             onPeerUp((PeerUp) message);
131         } else if(message instanceof RegisterListenerLocal) {
132             onRegisterListenerLocal((RegisterListenerLocal)message);
133         } else if(message instanceof UnregisterListenerLocal) {
134             onUnregisterListenerLocal((UnregisterListenerLocal) message);
135         } else if(message instanceof SelectOwner) {
136             onSelectOwner((SelectOwner) message);
137         } else if(message instanceof RemoveAllCandidates) {
138             onRemoveAllCandidates((RemoveAllCandidates) message);
139         } else if(!commitCoordinator.handleMessage(message, this)) {
140             super.handleNonRaftCommand(message);
141         }
142     }
143
144     private void onRemoveAllCandidates(RemoveAllCandidates message) {
145         LOG.debug("{}: onRemoveAllCandidates: {}", persistenceId(), message);
146
147         removeCandidateFromEntities(message.getMemberName());
148     }
149
150     private void onSelectOwner(SelectOwner selectOwner) {
151         LOG.debug("{}: onSelectOwner: {}", persistenceId(), selectOwner);
152
153         String currentOwner = getCurrentOwner(selectOwner.getEntityPath());
154         if(Strings.isNullOrEmpty(currentOwner)) {
155             writeNewOwner(selectOwner.getEntityPath(), newOwner(currentOwner, selectOwner.getAllCandidates(),
156                     selectOwner.getOwnerSelectionStrategy()));
157
158             Cancellable cancellable = entityToScheduledOwnershipTask.get(selectOwner.getEntityPath());
159             if(cancellable != null){
160                 if(!cancellable.isCancelled()){
161                     cancellable.cancel();
162                 }
163                 entityToScheduledOwnershipTask.remove(selectOwner.getEntityPath());
164             }
165         }
166     }
167
168     private void onRegisterCandidateLocal(RegisterCandidateLocal registerCandidate) {
169         LOG.debug("{}: onRegisterCandidateLocal: {}", persistenceId(), registerCandidate);
170
171         listenerSupport.setHasCandidateForEntity(registerCandidate.getEntity());
172
173         NormalizedNode<?, ?> entityOwners = entityOwnersWithCandidate(registerCandidate.getEntity().getType(),
174                 registerCandidate.getEntity().getIdentifier(), localMemberName.getName());
175         commitCoordinator.commitModification(new MergeModification(ENTITY_OWNERS_PATH, entityOwners), this);
176
177         getSender().tell(SuccessReply.INSTANCE, getSelf());
178     }
179
180     private void onUnregisterCandidateLocal(UnregisterCandidateLocal unregisterCandidate) {
181         LOG.debug("{}: onUnregisterCandidateLocal: {}", persistenceId(), unregisterCandidate);
182
183         DOMEntity entity = unregisterCandidate.getEntity();
184         listenerSupport.unsetHasCandidateForEntity(entity);
185
186         YangInstanceIdentifier candidatePath = candidatePath(entity.getType(), entity.getIdentifier(), localMemberName.getName());
187         commitCoordinator.commitModification(new DeleteModification(candidatePath), this);
188
189         getSender().tell(SuccessReply.INSTANCE, getSelf());
190     }
191
192     private void onRegisterListenerLocal(final RegisterListenerLocal registerListener) {
193         LOG.debug("{}: onRegisterListenerLocal: {}", persistenceId(), registerListener);
194
195         listenerSupport.addEntityOwnershipListener(registerListener.getEntityType(), registerListener.getListener());
196
197         getSender().tell(SuccessReply.INSTANCE, getSelf());
198
199         searchForEntities((entityTypeNode, entityNode) -> {
200             Optional<DataContainerChild<?, ?>> possibleType = entityTypeNode.getChild(ENTITY_TYPE_NODE_ID);
201             String entityType = possibleType.isPresent() ? possibleType.get().getValue().toString() : null;
202             if (registerListener.getEntityType().equals(entityType)) {
203                 final boolean hasOwner;
204                 final boolean isOwner;
205
206                 Optional<DataContainerChild<?, ?>> possibleOwner = entityNode.getChild(ENTITY_OWNER_NODE_ID);
207                 if (possibleOwner.isPresent()) {
208                     isOwner = localMemberName.getName().equals(possibleOwner.get().getValue().toString());
209                     hasOwner = true;
210                 } else {
211                     isOwner = false;
212                     hasOwner = false;
213                 }
214
215                 DOMEntity entity = new DOMEntity(entityType,
216                     (YangInstanceIdentifier) entityNode.getChild(ENTITY_ID_NODE_ID).get().getValue());
217
218                 listenerSupport.notifyEntityOwnershipListener(entity, false, isOwner, hasOwner,
219                     registerListener.getListener());
220             }
221         });
222     }
223
224     private void onUnregisterListenerLocal(UnregisterListenerLocal unregisterListener) {
225         LOG.debug("{}: onUnregisterListenerLocal: {}", persistenceId(), unregisterListener);
226
227         listenerSupport.removeEntityOwnershipListener(unregisterListener.getEntityType(), unregisterListener.getListener());
228
229         getSender().tell(SuccessReply.INSTANCE, getSelf());
230     }
231
232     void tryCommitModifications(final BatchedModifications modifications) {
233         if(isLeader()) {
234             LOG.debug("{}: Committing BatchedModifications {} locally", persistenceId(), modifications.getTransactionID());
235
236             // Note that it's possible the commit won't get consensus and will timeout and not be applied
237             // to the state. However we don't need to retry it in that case b/c it will be committed to
238             // the journal first and, once a majority of followers come back on line and it is replicated,
239             // it will be applied at that point.
240             handleBatchedModificationsLocal(modifications, self());
241         } else {
242             final ActorSelection leader = getLeader();
243             if (leader != null) {
244                 possiblyRemoveAllInitialCandidates(leader);
245
246                 if(LOG.isDebugEnabled()) {
247                     LOG.debug("{}: Sending BatchedModifications {} to leader {}", persistenceId(),
248                             modifications.getTransactionID(), leader);
249                 }
250
251                 Future<Object> future = Patterns.ask(leader, modifications, TimeUnit.SECONDS.toMillis(
252                         getDatastoreContext().getShardTransactionCommitTimeoutInSeconds()));
253
254                 Patterns.pipe(future, getContext().dispatcher()).pipeTo(getSelf(), ActorRef.noSender());
255             }
256         }
257     }
258
259     void possiblyRemoveAllInitialCandidates(ActorSelection leader) {
260         // The following handles removing all candidates on startup when re-joining with a remote leader. When a
261         // follower is detected as down, the leader will re-assign new owners to entities that were owned by the
262         // down member but doesn't remove the down member as a candidate, as the down node may actually be isolated
263         // and still running. Therefore on startup we send an initial message to the remote leader to remove any
264         // potential stale candidates we had previously registered, as it's possible a candidate may not be
265         // registered by a client in the new incarnation. We have to send the RemoveAllCandidates message prior to any
266         // pending registrations.
267         if(removeAllInitialCandidates && leader != null) {
268             removeAllInitialCandidates = false;
269             if(!isLeader()) {
270                 LOG.debug("{} - got new leader {} on startup - sending RemoveAllCandidates", persistenceId(), leader);
271
272                 leader.tell(new RemoveAllCandidates(localMemberName), ActorRef.noSender());
273             }
274         }
275     }
276
277     boolean hasLeader() {
278         return getLeader() != null && (!isLeader() || isLeaderActive());
279     }
280
281     /**
282      * Determine if we are in jeopardy based on observed RAFT state.
283      */
284     private static boolean inJeopardy(final RaftState state) {
285         switch (state) {
286             case Candidate:
287             case Follower:
288             case Leader:
289             case PreLeader:
290                 return false;
291             case IsolatedLeader:
292                 return true;
293         }
294         throw new IllegalStateException("Unsupported RAFT state " + state);
295     }
296
297     private void notifyAllListeners() {
298         searchForEntities((entityTypeNode, entityNode) -> {
299             Optional<DataContainerChild<?, ?>> possibleType = entityTypeNode.getChild(ENTITY_TYPE_NODE_ID);
300             if (possibleType.isPresent()) {
301                 final boolean hasOwner;
302                 final boolean isOwner;
303
304                 Optional<DataContainerChild<?, ?>> possibleOwner = entityNode.getChild(ENTITY_OWNER_NODE_ID);
305                 if (possibleOwner.isPresent()) {
306                     isOwner = localMemberName.getName().equals(possibleOwner.get().getValue().toString());
307                     hasOwner = true;
308                 } else {
309                     isOwner = false;
310                     hasOwner = false;
311                 }
312
313                 DOMEntity entity = new DOMEntity(possibleType.get().getValue().toString(),
314                     (YangInstanceIdentifier) entityNode.getChild(ENTITY_ID_NODE_ID).get().getValue());
315
316                 listenerSupport.notifyEntityOwnershipListeners(entity, isOwner, isOwner, hasOwner);
317             }
318         });
319     }
320
321     @Override
322     protected void onStateChanged() {
323         boolean isLeader = isLeader();
324         LOG.debug("{}: onStateChanged: isLeader: {}, hasLeader: {}", persistenceId(), isLeader, hasLeader());
325
326         // Examine current RAFT state to see if we are in jeopardy, potentially notifying all listeners
327         final boolean inJeopardy = inJeopardy(getRaftState());
328         final boolean wasInJeopardy = listenerSupport.setInJeopardy(inJeopardy);
329         if (inJeopardy != wasInJeopardy) {
330             LOG.debug("{}: {} jeopardy state, notifying all listeners", persistenceId(),
331                 inJeopardy ? "entered" : "left");
332             notifyAllListeners();
333         }
334
335         commitCoordinator.onStateChanged(this, isLeader);
336
337         super.onStateChanged();
338     }
339
340     @Override
341     protected void onLeaderChanged(String oldLeader, String newLeader) {
342         boolean isLeader = isLeader();
343         LOG.debug("{}: onLeaderChanged: oldLeader: {}, newLeader: {}, isLeader: {}", persistenceId(), oldLeader,
344                 newLeader, isLeader);
345
346         if (isLeader) {
347
348             // Re-initialize the downPeerMemberNames from the current akka Cluster state. The previous leader, if any,
349             // is most likely down however it's possible we haven't received the PeerDown message yet.
350             initializeDownPeerMemberNamesFromClusterState();
351
352             // Clear all existing strategies so that they get re-created when we call createStrategy again
353             // This allows the strategies to be re-initialized with existing statistics maintained by
354             // EntityOwnershipStatistics
355             strategyConfig.clearStrategies();
356
357             // Re-assign owners for all members that are known to be down. In a cluster which has greater than
358             // 3 nodes it is possible for some node beside the leader being down when the leadership transitions
359             // it makes sense to use this event to re-assign owners for those downed nodes.
360             Set<String> ownedBy = new HashSet<>(downPeerMemberNames.size() + 1);
361             for (MemberName downPeerName : downPeerMemberNames) {
362                 ownedBy.add(downPeerName.getName());
363             }
364
365             // Also try to assign owners for entities that have no current owner. See explanation in onPeerUp.
366             ownedBy.add("");
367             selectNewOwnerForEntitiesOwnedBy(ownedBy);
368         } else {
369             // The leader changed - notify the coordinator to check if pending modifications need to be sent.
370             // While onStateChanged also does this, this method handles the case where the shard hears from a
371             // leader and stays in the follower state. In that case no behavior state change occurs.
372             commitCoordinator.onStateChanged(this, isLeader);
373         }
374
375         super.onLeaderChanged(oldLeader, newLeader);
376     }
377
378     private void initializeDownPeerMemberNamesFromClusterState() {
379         java.util.Optional<Cluster> cluster = getRaftActorContext().getCluster();
380         if(!cluster.isPresent()) {
381             return;
382         }
383
384         CurrentClusterState state = cluster.get().state();
385         Set<Member> unreachable = state.getUnreachable();
386
387         LOG.debug("{}: initializeDownPeerMemberNamesFromClusterState - current downPeerMemberNames: {}, unreachable: {}",
388                 persistenceId(), downPeerMemberNames, unreachable);
389
390         downPeerMemberNames.clear();
391         for(Member m: unreachable) {
392             downPeerMemberNames.add(MemberName.forName(m.getRoles().iterator().next()));
393         }
394
395         for(Member m: state.getMembers()) {
396             if(m.status() != MemberStatus.up() && m.status() != MemberStatus.weaklyUp()) {
397                 LOG.debug("{}: Adding down member with status {}", persistenceId(), m.status());
398                 downPeerMemberNames.add(MemberName.forName(m.getRoles().iterator().next()));
399             }
400         }
401
402         LOG.debug("{}: new downPeerMemberNames: {}", persistenceId(), downPeerMemberNames);
403     }
404
405     private void onCandidateRemoved(CandidateRemoved message) {
406         LOG.debug("{}: onCandidateRemoved: {}", persistenceId(), message);
407
408         if(isLeader()) {
409             String currentOwner = getCurrentOwner(message.getEntityPath());
410             writeNewOwner(message.getEntityPath(),
411                     newOwner(currentOwner, message.getRemainingCandidates(), getEntityOwnerElectionStrategy(message.getEntityPath())));
412         }
413     }
414
415     private EntityOwnerSelectionStrategy getEntityOwnerElectionStrategy(YangInstanceIdentifier entityPath) {
416         final String entityType = EntityOwnersModel.entityTypeFromEntityPath(entityPath);
417         return strategyConfig.createStrategy(entityType, entityOwnershipStatistics.byEntityType(entityType));
418     }
419
420     private void onCandidateAdded(CandidateAdded message) {
421         if(!isLeader()){
422             return;
423         }
424
425         LOG.debug("{}: onCandidateAdded: {}", persistenceId(), message);
426
427         // Since a node's candidate member is only added by the node itself, we can assume the node is up so
428         // remove it from the downPeerMemberNames.
429         downPeerMemberNames.remove(MemberName.forName(message.getNewCandidate()));
430
431         final String currentOwner = getCurrentOwner(message.getEntityPath());
432         final EntityOwnerSelectionStrategy strategy = getEntityOwnerElectionStrategy(message.getEntityPath());
433
434         // Available members is all the known peers - the number of peers that are down + self
435         // So if there are 2 peers and 1 is down then availableMembers will be 2
436         final int availableMembers = getRaftActorContext().getPeerIds().size() - downPeerMemberNames.size() + 1;
437
438         LOG.debug("{}: Using strategy {} to select owner, currentOwner = {}", persistenceId(), strategy, currentOwner);
439
440         if(strategy.getSelectionDelayInMillis() == 0L) {
441             writeNewOwner(message.getEntityPath(), newOwner(currentOwner, message.getAllCandidates(),
442                     strategy));
443         } else if(message.getAllCandidates().size() == availableMembers) {
444             LOG.debug("{}: Received the maximum candidates requests : {} writing new owner",
445                     persistenceId(), availableMembers);
446             cancelOwnerSelectionTask(message.getEntityPath());
447             writeNewOwner(message.getEntityPath(), newOwner(currentOwner, message.getAllCandidates(),
448                     strategy));
449         } else {
450             scheduleOwnerSelection(message.getEntityPath(), message.getAllCandidates(), strategy);
451         }
452     }
453
454     private void onPeerDown(PeerDown peerDown) {
455         LOG.info("{}: onPeerDown: {}", persistenceId(), peerDown);
456
457         MemberName downMemberName = peerDown.getMemberName();
458         if(downPeerMemberNames.add(downMemberName) && isLeader()) {
459             // Select new owners for entities owned by the down peer and which have other candidates. For an entity for
460             // which the down peer is the only candidate, we leave it as the owner and don't clear it. This is done to
461             // handle the case where the peer member process is actually still running but the node is partitioned.
462             // When the partition is healed, the peer just remains as the owner. If the peer process actually restarted,
463             // it will first remove all its candidates on startup. If another candidate is registered during the time
464             // the peer is down, the new candidate will be selected as the new owner.
465
466             selectNewOwnerForEntitiesOwnedBy(ImmutableSet.of(downMemberName.getName()));
467         }
468     }
469
470     private void selectNewOwnerForEntitiesOwnedBy(Set<String> ownedBy) {
471         final BatchedModifications modifications = commitCoordinator.newBatchedModifications();
472         searchForEntitiesOwnedBy(ownedBy, (entityTypeNode, entityNode) -> {
473             YangInstanceIdentifier entityPath = YangInstanceIdentifier.builder(ENTITY_TYPES_PATH).
474                     node(entityTypeNode.getIdentifier()).node(ENTITY_NODE_ID).node(entityNode.getIdentifier()).
475                     node(ENTITY_OWNER_NODE_ID).build();
476             String newOwner = newOwner(getCurrentOwner(entityPath), getCandidateNames(entityNode),
477                     getEntityOwnerElectionStrategy(entityPath));
478
479             if(!newOwner.isEmpty()) {
480                 LOG.debug("{}: Found entity {}, writing new owner {}", persistenceId(), entityPath, newOwner);
481
482                 modifications.addModification(new WriteModification(entityPath,
483                         ImmutableNodes.leafNode(ENTITY_OWNER_NODE_ID, newOwner)));
484
485             } else {
486                 LOG.debug("{}: Found entity {} but no other candidates - not clearing owner", persistenceId(),
487                         entityPath, newOwner);
488             }
489         });
490
491         commitCoordinator.commitModifications(modifications, this);
492     }
493
494     private void onPeerUp(PeerUp peerUp) {
495         LOG.debug("{}: onPeerUp: {}", persistenceId(), peerUp);
496
497         downPeerMemberNames.remove(peerUp.getMemberName());
498
499         // Notify the coordinator to check if pending modifications need to be sent. We do this here
500         // to handle the case where the leader's peer address isn't known yet when a prior state or
501         // leader change occurred.
502         commitCoordinator.onStateChanged(this, isLeader());
503
504         if(isLeader()) {
505             // Try to assign owners for entities that have no current owner. It's possible the peer that is now up
506             // had previously registered as a candidate and was the only candidate but the owner write tx couldn't be
507             // committed due to a leader change. Eg, the leader is able to successfully commit the candidate add tx but
508             // becomes isolated before it can commit the owner change and switches to follower. The majority partition
509             // with a new leader has the candidate but the entity has no owner. When the partition is healed and the
510             // previously isolated leader reconnects, we'll receive onPeerUp and, if there's still no owner, the
511             // previous leader will gain ownership.
512             selectNewOwnerForEntitiesOwnedBy(ImmutableSet.of(""));
513         }
514     }
515
516     private Collection<String> getCandidateNames(MapEntryNode entity) {
517         Collection<MapEntryNode> candidates = ((MapNode)entity.getChild(CANDIDATE_NODE_ID).get()).getValue();
518         Collection<String> candidateNames = new ArrayList<>(candidates.size());
519         for(MapEntryNode candidate: candidates) {
520             candidateNames.add(candidate.getChild(CANDIDATE_NAME_NODE_ID).get().getValue().toString());
521         }
522
523         return candidateNames;
524     }
525
526     private void searchForEntitiesOwnedBy(Set<String> ownedBy, EntityWalker walker) {
527         LOG.debug("{}: Searching for entities owned by {}", persistenceId(), ownedBy);
528
529         searchForEntities((entityTypeNode, entityNode) -> {
530             Optional<DataContainerChild<? extends PathArgument, ?>> possibleOwner =
531                     entityNode.getChild(ENTITY_OWNER_NODE_ID);
532             String currentOwner = possibleOwner.isPresent() ? possibleOwner.get().getValue().toString() : "";
533             if(ownedBy.contains(currentOwner)) {
534                 walker.onEntity(entityTypeNode, entityNode);
535             }
536         });
537     }
538
539     private void removeCandidateFromEntities(final MemberName member) {
540         final BatchedModifications modifications = commitCoordinator.newBatchedModifications();
541         searchForEntities((entityTypeNode, entityNode) -> {
542             if (hasCandidate(entityNode, member)) {
543                 YangInstanceIdentifier entityId =
544                         (YangInstanceIdentifier) entityNode.getIdentifier().getKeyValues().get(ENTITY_ID_QNAME);
545                 YangInstanceIdentifier candidatePath = candidatePath(
546                         entityTypeNode.getIdentifier().getKeyValues().get(ENTITY_TYPE_QNAME).toString(),
547                         entityId, member.getName());
548
549                 LOG.info("{}: Found entity {}, removing candidate {}, path {}", persistenceId(), entityId,
550                         member, candidatePath);
551
552                 modifications.addModification(new DeleteModification(candidatePath));
553             }
554         });
555
556         commitCoordinator.commitModifications(modifications, this);
557     }
558
559     private static boolean hasCandidate(MapEntryNode entity, MemberName candidateName) {
560         return ((MapNode)entity.getChild(CANDIDATE_NODE_ID).get()).getChild(candidateNodeKey(candidateName.getName()))
561                 .isPresent();
562     }
563
564     private void searchForEntities(EntityWalker walker) {
565         Optional<NormalizedNode<?, ?>> possibleEntityTypes = getDataStore().readNode(ENTITY_TYPES_PATH);
566         if(!possibleEntityTypes.isPresent()) {
567             return;
568         }
569
570         for(MapEntryNode entityType:  ((MapNode) possibleEntityTypes.get()).getValue()) {
571             Optional<DataContainerChild<?, ?>> possibleEntities = entityType.getChild(ENTITY_NODE_ID);
572             if(!possibleEntities.isPresent()) {
573                 // shouldn't happen but handle anyway
574                 continue;
575             }
576
577             for(MapEntryNode entity:  ((MapNode) possibleEntities.get()).getValue()) {
578                 walker.onEntity(entityType, entity);
579             }
580         }
581     }
582
583     private void writeNewOwner(YangInstanceIdentifier entityPath, String newOwner) {
584         LOG.debug("{}: Writing new owner {} for entity {}", persistenceId(), newOwner, entityPath);
585
586         commitCoordinator.commitModification(new WriteModification(entityPath.node(ENTITY_OWNER_QNAME),
587                 ImmutableNodes.leafNode(ENTITY_OWNER_NODE_ID, newOwner)), this);
588     }
589
590     /**
591      * Schedule a new owner selection job. Cancelling any outstanding job if it has not been cancelled.
592      *
593      * @param entityPath
594      * @param allCandidates
595      */
596     public void scheduleOwnerSelection(YangInstanceIdentifier entityPath, Collection<String> allCandidates,
597                                        EntityOwnerSelectionStrategy strategy){
598         cancelOwnerSelectionTask(entityPath);
599
600         LOG.debug("{}: Scheduling owner selection after {} ms", persistenceId(), strategy.getSelectionDelayInMillis());
601
602         final Cancellable lastScheduledTask = context().system().scheduler().scheduleOnce(
603                 FiniteDuration.apply(strategy.getSelectionDelayInMillis(), TimeUnit.MILLISECONDS)
604                 , self(), new SelectOwner(entityPath, allCandidates, strategy)
605                 , context().system().dispatcher(), self());
606
607         entityToScheduledOwnershipTask.put(entityPath, lastScheduledTask);
608     }
609
610     private void cancelOwnerSelectionTask(YangInstanceIdentifier entityPath){
611         final Cancellable lastScheduledTask = entityToScheduledOwnershipTask.get(entityPath);
612         if(lastScheduledTask != null && !lastScheduledTask.isCancelled()){
613             lastScheduledTask.cancel();
614         }
615     }
616
617     private String newOwner(String currentOwner, Collection<String> candidates, EntityOwnerSelectionStrategy ownerSelectionStrategy) {
618         Collection<String> viableCandidates = getViableCandidates(candidates);
619         if(viableCandidates.isEmpty()){
620             return "";
621         }
622         return ownerSelectionStrategy.newOwner(currentOwner, viableCandidates);
623     }
624
625     private Collection<String> getViableCandidates(Collection<String> candidates) {
626         Collection<String> viableCandidates = new ArrayList<>();
627
628         for (String candidate : candidates) {
629             if (!downPeerMemberNames.contains(MemberName.forName(candidate))) {
630                 viableCandidates.add(candidate);
631             }
632         }
633         return viableCandidates;
634     }
635
636     private String getCurrentOwner(YangInstanceIdentifier entityId) {
637         Optional<NormalizedNode<?, ?>> optionalEntityOwner = getDataStore().readNode(entityId.node(ENTITY_OWNER_QNAME));
638         if(optionalEntityOwner.isPresent()){
639             return optionalEntityOwner.get().getValue().toString();
640         }
641         return null;
642     }
643
644     @FunctionalInterface
645     private interface EntityWalker {
646         void onEntity(MapEntryNode entityTypeNode, MapEntryNode entityNode);
647     }
648
649     public static Builder newBuilder() {
650         return new Builder();
651     }
652
653     static class Builder extends Shard.AbstractBuilder<Builder, EntityOwnershipShard> {
654         private MemberName localMemberName;
655         private EntityOwnerSelectionStrategyConfig ownerSelectionStrategyConfig;
656
657         protected Builder() {
658             super(EntityOwnershipShard.class);
659         }
660
661         Builder localMemberName(MemberName localMemberName) {
662             checkSealed();
663             this.localMemberName = localMemberName;
664             return this;
665         }
666
667         Builder ownerSelectionStrategyConfig(EntityOwnerSelectionStrategyConfig ownerSelectionStrategyConfig){
668             checkSealed();
669             this.ownerSelectionStrategyConfig = ownerSelectionStrategyConfig;
670             return this;
671         }
672
673         @Override
674         protected void verify() {
675             super.verify();
676             Preconditions.checkNotNull(localMemberName, "localMemberName should not be null");
677             Preconditions.checkNotNull(ownerSelectionStrategyConfig, "ownerSelectionStrategyConfig should not be null");
678         }
679     }
680 }