c24156f506211c1ae7a61786da8809e71cd966b6
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / datastore / ShardManager.java
1 /*
2  * Copyright (c) 2014 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8
9 package org.opendaylight.controller.cluster.datastore;
10
11 import akka.actor.ActorPath;
12 import akka.actor.ActorRef;
13 import akka.actor.Address;
14 import akka.actor.Cancellable;
15 import akka.actor.OneForOneStrategy;
16 import akka.actor.Props;
17 import akka.actor.SupervisorStrategy;
18 import akka.cluster.ClusterEvent;
19 import akka.japi.Creator;
20 import akka.japi.Function;
21 import akka.persistence.RecoveryCompleted;
22 import akka.serialization.Serialization;
23 import com.google.common.annotations.VisibleForTesting;
24 import com.google.common.base.Objects;
25 import com.google.common.base.Optional;
26 import com.google.common.base.Preconditions;
27 import com.google.common.base.Strings;
28 import com.google.common.base.Supplier;
29 import com.google.common.collect.Sets;
30 import java.io.Serializable;
31 import java.util.ArrayList;
32 import java.util.Collection;
33 import java.util.HashMap;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.Map;
37 import java.util.Set;
38 import java.util.concurrent.CountDownLatch;
39 import java.util.concurrent.TimeUnit;
40 import org.opendaylight.controller.cluster.common.actor.AbstractUntypedPersistentActorWithMetering;
41 import org.opendaylight.controller.cluster.datastore.config.Configuration;
42 import org.opendaylight.controller.cluster.datastore.exceptions.NoShardLeaderException;
43 import org.opendaylight.controller.cluster.datastore.exceptions.NotInitializedException;
44 import org.opendaylight.controller.cluster.datastore.exceptions.PrimaryNotFoundException;
45 import org.opendaylight.controller.cluster.datastore.identifiers.ShardIdentifier;
46 import org.opendaylight.controller.cluster.datastore.identifiers.ShardManagerIdentifier;
47 import org.opendaylight.controller.cluster.datastore.jmx.mbeans.shardmanager.ShardManagerInfo;
48 import org.opendaylight.controller.cluster.datastore.jmx.mbeans.shardmanager.ShardManagerInfoMBean;
49 import org.opendaylight.controller.cluster.datastore.messages.ActorInitialized;
50 import org.opendaylight.controller.cluster.datastore.messages.CreateShard;
51 import org.opendaylight.controller.cluster.datastore.messages.CreateShardReply;
52 import org.opendaylight.controller.cluster.datastore.messages.FindLocalShard;
53 import org.opendaylight.controller.cluster.datastore.messages.FindPrimary;
54 import org.opendaylight.controller.cluster.datastore.messages.LocalPrimaryShardFound;
55 import org.opendaylight.controller.cluster.datastore.messages.LocalShardFound;
56 import org.opendaylight.controller.cluster.datastore.messages.LocalShardNotFound;
57 import org.opendaylight.controller.cluster.datastore.messages.PeerAddressResolved;
58 import org.opendaylight.controller.cluster.datastore.messages.RemoteFindPrimary;
59 import org.opendaylight.controller.cluster.datastore.messages.RemotePrimaryShardFound;
60 import org.opendaylight.controller.cluster.datastore.messages.ShardLeaderStateChanged;
61 import org.opendaylight.controller.cluster.datastore.messages.SwitchShardBehavior;
62 import org.opendaylight.controller.cluster.datastore.messages.UpdateSchemaContext;
63 import org.opendaylight.controller.cluster.datastore.utils.Dispatchers;
64 import org.opendaylight.controller.cluster.datastore.utils.PrimaryShardInfoFutureCache;
65 import org.opendaylight.controller.cluster.notifications.RegisterRoleChangeListener;
66 import org.opendaylight.controller.cluster.notifications.RoleChangeNotification;
67 import org.opendaylight.controller.cluster.raft.RaftState;
68 import org.opendaylight.controller.cluster.raft.base.messages.FollowerInitialSyncUpStatus;
69 import org.opendaylight.controller.cluster.raft.base.messages.SwitchBehavior;
70 import org.opendaylight.yangtools.yang.data.api.schema.tree.DataTree;
71 import org.opendaylight.yangtools.yang.model.api.SchemaContext;
72 import org.slf4j.Logger;
73 import org.slf4j.LoggerFactory;
74 import scala.concurrent.duration.Duration;
75 import scala.concurrent.duration.FiniteDuration;
76
77 /**
78  * The ShardManager has the following jobs,
79  * <ul>
80  * <li> Create all the local shard replicas that belong on this cluster member
81  * <li> Find the address of the local shard
82  * <li> Find the primary replica for any given shard
83  * <li> Monitor the cluster members and store their addresses
84  * <ul>
85  */
86 public class ShardManager extends AbstractUntypedPersistentActorWithMetering {
87
88     private static final Logger LOG = LoggerFactory.getLogger(ShardManager.class);
89
90     // Stores a mapping between a member name and the address of the member
91     // Member names look like "member-1", "member-2" etc and are as specified
92     // in configuration
93     private final Map<String, Address> memberNameToAddress = new HashMap<>();
94
95     // Stores a mapping between a shard name and it's corresponding information
96     // Shard names look like inventory, topology etc and are as specified in
97     // configuration
98     private final Map<String, ShardInformation> localShards = new HashMap<>();
99
100     // The type of a ShardManager reflects the type of the datastore itself
101     // A data store could be of type config/operational
102     private final String type;
103
104     private final String shardManagerIdentifierString;
105
106     private final ClusterWrapper cluster;
107
108     private final Configuration configuration;
109
110     private final String shardDispatcherPath;
111
112     private ShardManagerInfo mBean;
113
114     private DatastoreContext datastoreContext;
115
116     private final CountDownLatch waitTillReadyCountdownLatch;
117
118     private final PrimaryShardInfoFutureCache primaryShardInfoCache;
119
120     private SchemaContext schemaContext;
121
122     /**
123      */
124     protected ShardManager(ClusterWrapper cluster, Configuration configuration,
125             DatastoreContext datastoreContext, CountDownLatch waitTillReadyCountdownLatch,
126             PrimaryShardInfoFutureCache primaryShardInfoCache) {
127
128         this.cluster = Preconditions.checkNotNull(cluster, "cluster should not be null");
129         this.configuration = Preconditions.checkNotNull(configuration, "configuration should not be null");
130         this.datastoreContext = datastoreContext;
131         this.type = datastoreContext.getDataStoreType();
132         this.shardManagerIdentifierString = ShardManagerIdentifier.builder().type(type).build().toString();
133         this.shardDispatcherPath =
134                 new Dispatchers(context().system().dispatchers()).getDispatcherPath(Dispatchers.DispatcherType.Shard);
135         this.waitTillReadyCountdownLatch = waitTillReadyCountdownLatch;
136         this.primaryShardInfoCache = primaryShardInfoCache;
137
138         // Subscribe this actor to cluster member events
139         cluster.subscribeToMemberEvents(getSelf());
140
141         createLocalShards();
142     }
143
144     public static Props props(
145         final ClusterWrapper cluster,
146         final Configuration configuration,
147         final DatastoreContext datastoreContext,
148         final CountDownLatch waitTillReadyCountdownLatch,
149         final PrimaryShardInfoFutureCache primaryShardInfoCache) {
150
151         Preconditions.checkNotNull(cluster, "cluster should not be null");
152         Preconditions.checkNotNull(configuration, "configuration should not be null");
153         Preconditions.checkNotNull(waitTillReadyCountdownLatch, "waitTillReadyCountdownLatch should not be null");
154         Preconditions.checkNotNull(primaryShardInfoCache, "primaryShardInfoCache should not be null");
155
156         return Props.create(new ShardManagerCreator(cluster, configuration, datastoreContext,
157                 waitTillReadyCountdownLatch, primaryShardInfoCache));
158     }
159
160     @Override
161     public void postStop() {
162         LOG.info("Stopping ShardManager");
163
164         mBean.unregisterMBean();
165     }
166
167     @Override
168     public void handleCommand(Object message) throws Exception {
169         if (message  instanceof FindPrimary) {
170             findPrimary((FindPrimary)message);
171         } else if(message instanceof FindLocalShard){
172             findLocalShard((FindLocalShard) message);
173         } else if (message instanceof UpdateSchemaContext) {
174             updateSchemaContext(message);
175         } else if(message instanceof ActorInitialized) {
176             onActorInitialized(message);
177         } else if (message instanceof ClusterEvent.MemberUp){
178             memberUp((ClusterEvent.MemberUp) message);
179         } else if(message instanceof ClusterEvent.MemberRemoved) {
180             memberRemoved((ClusterEvent.MemberRemoved) message);
181         } else if(message instanceof ClusterEvent.UnreachableMember) {
182             memberUnreachable((ClusterEvent.UnreachableMember)message);
183         } else if(message instanceof ClusterEvent.ReachableMember) {
184             memberReachable((ClusterEvent.ReachableMember) message);
185         } else if(message instanceof DatastoreContext) {
186             onDatastoreContext((DatastoreContext)message);
187         } else if(message instanceof RoleChangeNotification) {
188             onRoleChangeNotification((RoleChangeNotification) message);
189         } else if(message instanceof FollowerInitialSyncUpStatus){
190             onFollowerInitialSyncStatus((FollowerInitialSyncUpStatus) message);
191         } else if(message instanceof ShardNotInitializedTimeout) {
192             onShardNotInitializedTimeout((ShardNotInitializedTimeout)message);
193         } else if(message instanceof ShardLeaderStateChanged) {
194             onLeaderStateChanged((ShardLeaderStateChanged) message);
195         } else if(message instanceof SwitchShardBehavior){
196             onSwitchShardBehavior((SwitchShardBehavior) message);
197         } else if(message instanceof CreateShard) {
198             onCreateShard((CreateShard)message);
199         } else {
200             unknownMessage(message);
201         }
202
203     }
204
205     private void onCreateShard(CreateShard createShard) {
206         Object reply;
207         try {
208             if(localShards.containsKey(createShard.getShardName())) {
209                 throw new IllegalStateException(String.format("Shard with name %s already exists",
210                         createShard.getShardName()));
211             }
212
213             ShardIdentifier shardId = getShardIdentifier(cluster.getCurrentMemberName(), createShard.getShardName());
214             Map<String, String> peerAddresses = getPeerAddresses(createShard.getShardName(), createShard.getMemberNames());
215
216             LOG.debug("onCreateShard: shardId: {}, peerAddresses: {}", shardId, peerAddresses);
217
218             DatastoreContext shardDatastoreContext = createShard.getDatastoreContext();
219             if(shardDatastoreContext == null) {
220                 shardDatastoreContext = datastoreContext;
221             }
222
223             ShardInformation info = new ShardInformation(createShard.getShardName(), shardId, peerAddresses,
224                     shardDatastoreContext, createShard.getShardPropsCreator());
225             localShards.put(createShard.getShardName(), info);
226
227             mBean.addLocalShard(shardId.toString());
228
229             if(schemaContext != null) {
230                 info.setActor(newShardActor(schemaContext, info));
231             }
232
233             reply = new CreateShardReply();
234         } catch (Exception e) {
235             LOG.error("onCreateShard failed", e);
236             reply = new akka.actor.Status.Failure(e);
237         }
238
239         if(getSender() != null && !getContext().system().deadLetters().equals(getSender())) {
240             getSender().tell(reply, getSelf());
241         }
242     }
243
244     private void checkReady(){
245         if (isReadyWithLeaderId()) {
246             LOG.info("{}: All Shards are ready - data store {} is ready, available count is {}",
247                     persistenceId(), type, waitTillReadyCountdownLatch.getCount());
248
249             waitTillReadyCountdownLatch.countDown();
250         }
251     }
252
253     private void onLeaderStateChanged(ShardLeaderStateChanged leaderStateChanged) {
254         LOG.info("{}: Received LeaderStateChanged message: {}", persistenceId(), leaderStateChanged);
255
256         ShardInformation shardInformation = findShardInformation(leaderStateChanged.getMemberId());
257         if(shardInformation != null) {
258             shardInformation.setLocalDataTree(leaderStateChanged.getLocalShardDataTree());
259             shardInformation.setLeaderVersion(leaderStateChanged.getLeaderPayloadVersion());
260             if(shardInformation.setLeaderId(leaderStateChanged.getLeaderId())) {
261                 primaryShardInfoCache.remove(shardInformation.getShardName());
262             }
263
264             checkReady();
265         } else {
266             LOG.debug("No shard found with member Id {}", leaderStateChanged.getMemberId());
267         }
268     }
269
270     private void onShardNotInitializedTimeout(ShardNotInitializedTimeout message) {
271         ShardInformation shardInfo = message.getShardInfo();
272
273         LOG.debug("{}: Received ShardNotInitializedTimeout message for shard {}", persistenceId(),
274                 shardInfo.getShardName());
275
276         shardInfo.removeOnShardInitialized(message.getOnShardInitialized());
277
278         if(!shardInfo.isShardInitialized()) {
279             LOG.debug("{}: Returning NotInitializedException for shard {}", persistenceId(), shardInfo.getShardName());
280             message.getSender().tell(createNotInitializedException(shardInfo.shardId), getSelf());
281         } else {
282             LOG.debug("{}: Returning NoShardLeaderException for shard {}", persistenceId(), shardInfo.getShardName());
283             message.getSender().tell(createNoShardLeaderException(shardInfo.shardId), getSelf());
284         }
285     }
286
287     private void onFollowerInitialSyncStatus(FollowerInitialSyncUpStatus status) {
288         LOG.info("{} Received follower initial sync status for {} status sync done {}", persistenceId(),
289                 status.getName(), status.isInitialSyncDone());
290
291         ShardInformation shardInformation = findShardInformation(status.getName());
292
293         if(shardInformation != null) {
294             shardInformation.setFollowerSyncStatus(status.isInitialSyncDone());
295
296             mBean.setSyncStatus(isInSync());
297         }
298
299     }
300
301     private void onRoleChangeNotification(RoleChangeNotification roleChanged) {
302         LOG.info("{}: Received role changed for {} from {} to {}", persistenceId(), roleChanged.getMemberId(),
303                 roleChanged.getOldRole(), roleChanged.getNewRole());
304
305         ShardInformation shardInformation = findShardInformation(roleChanged.getMemberId());
306         if(shardInformation != null) {
307             shardInformation.setRole(roleChanged.getNewRole());
308             checkReady();
309             mBean.setSyncStatus(isInSync());
310         }
311     }
312
313
314     private ShardInformation findShardInformation(String memberId) {
315         for(ShardInformation info : localShards.values()){
316             if(info.getShardId().toString().equals(memberId)){
317                 return info;
318             }
319         }
320
321         return null;
322     }
323
324     private boolean isReadyWithLeaderId() {
325         boolean isReady = true;
326         for (ShardInformation info : localShards.values()) {
327             if(!info.isShardReadyWithLeaderId()){
328                 isReady = false;
329                 break;
330             }
331         }
332         return isReady;
333     }
334
335     private boolean isInSync(){
336         for (ShardInformation info : localShards.values()) {
337             if(!info.isInSync()){
338                 return false;
339             }
340         }
341         return true;
342     }
343
344     private void onActorInitialized(Object message) {
345         final ActorRef sender = getSender();
346
347         if (sender == null) {
348             return; //why is a non-actor sending this message? Just ignore.
349         }
350
351         String actorName = sender.path().name();
352         //find shard name from actor name; actor name is stringified shardId
353         ShardIdentifier shardId = ShardIdentifier.builder().fromShardIdString(actorName).build();
354
355         if (shardId.getShardName() == null) {
356             return;
357         }
358
359         markShardAsInitialized(shardId.getShardName());
360     }
361
362     private void markShardAsInitialized(String shardName) {
363         LOG.debug("{}: Initializing shard [{}]", persistenceId(), shardName);
364
365         ShardInformation shardInformation = localShards.get(shardName);
366         if (shardInformation != null) {
367             shardInformation.setActorInitialized();
368
369             shardInformation.getActor().tell(new RegisterRoleChangeListener(), self());
370         }
371     }
372
373     @Override
374     protected void handleRecover(Object message) throws Exception {
375         if (message instanceof RecoveryCompleted) {
376             LOG.info("Recovery complete : {}", persistenceId());
377
378             // We no longer persist SchemaContext modules so delete all the prior messages from the akka
379             // journal on upgrade from Helium.
380             deleteMessages(lastSequenceNr());
381         }
382     }
383
384     private void findLocalShard(FindLocalShard message) {
385         final ShardInformation shardInformation = localShards.get(message.getShardName());
386
387         if(shardInformation == null){
388             getSender().tell(new LocalShardNotFound(message.getShardName()), getSelf());
389             return;
390         }
391
392         sendResponse(shardInformation, message.isWaitUntilInitialized(), false, new Supplier<Object>() {
393             @Override
394             public Object get() {
395                 return new LocalShardFound(shardInformation.getActor());
396             }
397         });
398     }
399
400     private void sendResponse(ShardInformation shardInformation, boolean doWait,
401             boolean wantShardReady, final Supplier<Object> messageSupplier) {
402         if (!shardInformation.isShardInitialized() || (wantShardReady && !shardInformation.isShardReadyWithLeaderId())) {
403             if(doWait) {
404                 final ActorRef sender = getSender();
405                 final ActorRef self = self();
406
407                 Runnable replyRunnable = new Runnable() {
408                     @Override
409                     public void run() {
410                         sender.tell(messageSupplier.get(), self);
411                     }
412                 };
413
414                 OnShardInitialized onShardInitialized = wantShardReady ? new OnShardReady(replyRunnable) :
415                     new OnShardInitialized(replyRunnable);
416
417                 shardInformation.addOnShardInitialized(onShardInitialized);
418
419                 LOG.debug("{}: Scheduling timer to wait for shard {}", persistenceId(), shardInformation.getShardName());
420
421                 FiniteDuration timeout = datastoreContext.getShardInitializationTimeout().duration();
422                 if(shardInformation.isShardInitialized()) {
423                     // If the shard is already initialized then we'll wait enough time for the shard to
424                     // elect a leader, ie 2 times the election timeout.
425                     timeout = FiniteDuration.create(datastoreContext.getShardRaftConfig()
426                             .getElectionTimeOutInterval().toMillis() * 2, TimeUnit.MILLISECONDS);
427                 }
428
429                 Cancellable timeoutSchedule = getContext().system().scheduler().scheduleOnce(
430                         timeout, getSelf(),
431                         new ShardNotInitializedTimeout(shardInformation, onShardInitialized, sender),
432                         getContext().dispatcher(), getSelf());
433
434                 onShardInitialized.setTimeoutSchedule(timeoutSchedule);
435
436             } else if (!shardInformation.isShardInitialized()) {
437                 LOG.debug("{}: Returning NotInitializedException for shard {}", persistenceId(),
438                         shardInformation.getShardName());
439                 getSender().tell(createNotInitializedException(shardInformation.shardId), getSelf());
440             } else {
441                 LOG.debug("{}: Returning NoShardLeaderException for shard {}", persistenceId(),
442                         shardInformation.getShardName());
443                 getSender().tell(createNoShardLeaderException(shardInformation.shardId), getSelf());
444             }
445
446             return;
447         }
448
449         getSender().tell(messageSupplier.get(), getSelf());
450     }
451
452     private NoShardLeaderException createNoShardLeaderException(ShardIdentifier shardId) {
453         return new NoShardLeaderException(null, shardId.toString());
454     }
455
456     private NotInitializedException createNotInitializedException(ShardIdentifier shardId) {
457         return new NotInitializedException(String.format(
458                 "Found primary shard %s but it's not initialized yet. Please try again later", shardId));
459     }
460
461     private void memberRemoved(ClusterEvent.MemberRemoved message) {
462         String memberName = message.member().roles().head();
463
464         LOG.debug("{}: Received MemberRemoved: memberName: {}, address: {}", persistenceId(), memberName,
465                 message.member().address());
466
467         memberNameToAddress.remove(message.member().roles().head());
468     }
469
470     private void memberUp(ClusterEvent.MemberUp message) {
471         String memberName = message.member().roles().head();
472
473         LOG.debug("{}: Received MemberUp: memberName: {}, address: {}", persistenceId(), memberName,
474                 message.member().address());
475
476         memberNameToAddress.put(memberName, message.member().address());
477
478         for(ShardInformation info : localShards.values()){
479             String shardName = info.getShardName();
480             info.updatePeerAddress(getShardIdentifier(memberName, shardName).toString(),
481                 getShardActorPath(shardName, memberName), getSelf());
482         }
483
484         checkReady();
485     }
486
487     private void memberReachable(ClusterEvent.ReachableMember message) {
488         String memberName = message.member().roles().head();
489         LOG.debug("Received ReachableMember: memberName {}, address: {}", memberName, message.member().address());
490
491         markMemberAvailable(memberName);
492     }
493
494     private void memberUnreachable(ClusterEvent.UnreachableMember message) {
495         String memberName = message.member().roles().head();
496         LOG.debug("Received UnreachableMember: memberName {}, address: {}", memberName, message.member().address());
497
498         markMemberUnavailable(memberName);
499     }
500
501     private void markMemberUnavailable(final String memberName) {
502         for(ShardInformation info : localShards.values()){
503             String leaderId = info.getLeaderId();
504             if(leaderId != null && leaderId.contains(memberName)) {
505                 LOG.debug("Marking Leader {} as unavailable.", leaderId);
506                 info.setLeaderAvailable(false);
507
508                 primaryShardInfoCache.remove(info.getShardName());
509             }
510         }
511     }
512
513     private void markMemberAvailable(final String memberName) {
514         for(ShardInformation info : localShards.values()){
515             String leaderId = info.getLeaderId();
516             if(leaderId != null && leaderId.contains(memberName)) {
517                 LOG.debug("Marking Leader {} as available.", leaderId);
518                 info.setLeaderAvailable(true);
519             }
520         }
521     }
522
523     private void onDatastoreContext(DatastoreContext context) {
524         datastoreContext = context;
525         for (ShardInformation info : localShards.values()) {
526             if (info.getActor() != null) {
527                 info.getActor().tell(datastoreContext, getSelf());
528             }
529         }
530     }
531
532     private void onSwitchShardBehavior(SwitchShardBehavior message) {
533         ShardIdentifier identifier = ShardIdentifier.builder().fromShardIdString(message.getShardName()).build();
534
535         ShardInformation shardInformation = localShards.get(identifier.getShardName());
536
537         if(shardInformation != null && shardInformation.getActor() != null) {
538             shardInformation.getActor().tell(
539                     new SwitchBehavior(RaftState.valueOf(message.getNewState()), message.getTerm()), getSelf());
540         } else {
541             LOG.warn("Could not switch the behavior of shard {} to {} - shard is not yet available",
542                     message.getShardName(), message.getNewState());
543         }
544     }
545
546     /**
547      * Notifies all the local shards of a change in the schema context
548      *
549      * @param message
550      */
551     private void updateSchemaContext(final Object message) {
552         schemaContext = ((UpdateSchemaContext) message).getSchemaContext();
553
554         LOG.debug("Got updated SchemaContext: # of modules {}", schemaContext.getAllModuleIdentifiers().size());
555
556         for (ShardInformation info : localShards.values()) {
557             if (info.getActor() == null) {
558                 LOG.debug("Creating Shard {}", info.getShardId());
559                 info.setActor(newShardActor(schemaContext, info));
560             } else {
561                 info.getActor().tell(message, getSelf());
562             }
563         }
564     }
565
566     @VisibleForTesting
567     protected ClusterWrapper getCluster() {
568         return cluster;
569     }
570
571     @VisibleForTesting
572     protected ActorRef newShardActor(final SchemaContext schemaContext, ShardInformation info) {
573         return getContext().actorOf(info.newProps(schemaContext)
574                         .withDispatcher(shardDispatcherPath), info.getShardId().toString());
575     }
576
577     private void findPrimary(FindPrimary message) {
578         LOG.debug("{}: In findPrimary: {}", persistenceId(), message);
579
580         final String shardName = message.getShardName();
581         final boolean canReturnLocalShardState = !(message instanceof RemoteFindPrimary);
582
583         // First see if the there is a local replica for the shard
584         final ShardInformation info = localShards.get(shardName);
585         if (info != null) {
586             sendResponse(info, message.isWaitUntilReady(), true, new Supplier<Object>() {
587                 @Override
588                 public Object get() {
589                     String primaryPath = info.getSerializedLeaderActor();
590                     Object found = canReturnLocalShardState && info.isLeader() ?
591                             new LocalPrimaryShardFound(primaryPath, info.getLocalShardDataTree().get()) :
592                                 new RemotePrimaryShardFound(primaryPath, info.getLeaderVersion());
593
594                     if(LOG.isDebugEnabled()) {
595                         LOG.debug("{}: Found primary for {}: {}", persistenceId(), shardName, found);
596                     }
597
598                     return found;
599                 }
600             });
601
602             return;
603         }
604
605         for(Map.Entry<String, Address> entry: memberNameToAddress.entrySet()) {
606             if(!cluster.getCurrentMemberName().equals(entry.getKey())) {
607                 String path = getShardManagerActorPathBuilder(entry.getValue()).toString();
608
609                 LOG.debug("{}: findPrimary for {} forwarding to remote ShardManager {}", persistenceId(),
610                         shardName, path);
611
612                 getContext().actorSelection(path).forward(new RemoteFindPrimary(shardName,
613                         message.isWaitUntilReady()), getContext());
614                 return;
615             }
616         }
617
618         LOG.debug("{}: No shard found for {}", persistenceId(), shardName);
619
620         getSender().tell(new PrimaryNotFoundException(
621                 String.format("No primary shard found for %s.", shardName)), getSelf());
622     }
623
624     private StringBuilder getShardManagerActorPathBuilder(Address address) {
625         StringBuilder builder = new StringBuilder();
626         builder.append(address.toString()).append("/user/").append(shardManagerIdentifierString);
627         return builder;
628     }
629
630     private String getShardActorPath(String shardName, String memberName) {
631         Address address = memberNameToAddress.get(memberName);
632         if(address != null) {
633             StringBuilder builder = getShardManagerActorPathBuilder(address);
634             builder.append("/")
635                 .append(getShardIdentifier(memberName, shardName));
636             return builder.toString();
637         }
638         return null;
639     }
640
641     /**
642      * Construct the name of the shard actor given the name of the member on
643      * which the shard resides and the name of the shard
644      *
645      * @param memberName
646      * @param shardName
647      * @return
648      */
649     private ShardIdentifier getShardIdentifier(String memberName, String shardName){
650         return ShardIdentifier.builder().memberName(memberName).shardName(shardName).type(type).build();
651     }
652
653     /**
654      * Create shards that are local to the member on which the ShardManager
655      * runs
656      *
657      */
658     private void createLocalShards() {
659         String memberName = this.cluster.getCurrentMemberName();
660         Collection<String> memberShardNames = this.configuration.getMemberShardNames(memberName);
661
662         ShardPropsCreator shardPropsCreator = new DefaultShardPropsCreator();
663         List<String> localShardActorNames = new ArrayList<>();
664         for(String shardName : memberShardNames){
665             ShardIdentifier shardId = getShardIdentifier(memberName, shardName);
666             Map<String, String> peerAddresses = getPeerAddresses(shardName);
667             localShardActorNames.add(shardId.toString());
668             localShards.put(shardName, new ShardInformation(shardName, shardId, peerAddresses, datastoreContext,
669                     shardPropsCreator));
670         }
671
672         mBean = ShardManagerInfo.createShardManagerMBean(memberName, "shard-manager-" + this.type,
673                     datastoreContext.getDataStoreMXBeanType(), localShardActorNames);
674
675         mBean.setShardManager(this);
676     }
677
678     /**
679      * Given the name of the shard find the addresses of all it's peers
680      *
681      * @param shardName
682      * @return
683      */
684     private Map<String, String> getPeerAddresses(String shardName) {
685         return getPeerAddresses(shardName, configuration.getMembersFromShardName(shardName));
686     }
687
688     private Map<String, String> getPeerAddresses(String shardName, Collection<String> members) {
689
690         Map<String, String> peerAddresses = new HashMap<>();
691
692         String currentMemberName = this.cluster.getCurrentMemberName();
693
694         for(String memberName : members) {
695             if(!currentMemberName.equals(memberName)) {
696                 ShardIdentifier shardId = getShardIdentifier(memberName, shardName);
697                 String path = getShardActorPath(shardName, currentMemberName);
698                 peerAddresses.put(shardId.toString(), path);
699             }
700         }
701         return peerAddresses;
702     }
703
704     @Override
705     public SupervisorStrategy supervisorStrategy() {
706
707         return new OneForOneStrategy(10, Duration.create("1 minute"),
708             new Function<Throwable, SupervisorStrategy.Directive>() {
709                 @Override
710                 public SupervisorStrategy.Directive apply(Throwable t) {
711                     LOG.warn("Supervisor Strategy caught unexpected exception - resuming", t);
712                     return SupervisorStrategy.resume();
713                 }
714             }
715         );
716
717     }
718
719     @Override
720     public String persistenceId() {
721         return "shard-manager-" + type;
722     }
723
724     @VisibleForTesting
725     ShardManagerInfoMBean getMBean(){
726         return mBean;
727     }
728
729     @VisibleForTesting
730     protected static class ShardInformation {
731         private final ShardIdentifier shardId;
732         private final String shardName;
733         private ActorRef actor;
734         private ActorPath actorPath;
735         private final Map<String, String> peerAddresses;
736         private Optional<DataTree> localShardDataTree;
737         private boolean leaderAvailable = false;
738
739         // flag that determines if the actor is ready for business
740         private boolean actorInitialized = false;
741
742         private boolean followerSyncStatus = false;
743
744         private final Set<OnShardInitialized> onShardInitializedSet = Sets.newHashSet();
745         private String role ;
746         private String leaderId;
747         private short leaderVersion;
748
749         private final DatastoreContext datastoreContext;
750         private final ShardPropsCreator shardPropsCreator;
751
752         private ShardInformation(String shardName, ShardIdentifier shardId,
753                 Map<String, String> peerAddresses, DatastoreContext datastoreContext,
754                 ShardPropsCreator shardPropsCreator) {
755             this.shardName = shardName;
756             this.shardId = shardId;
757             this.peerAddresses = peerAddresses;
758             this.datastoreContext = datastoreContext;
759             this.shardPropsCreator = shardPropsCreator;
760         }
761
762         Props newProps(SchemaContext schemaContext) {
763             return shardPropsCreator.newProps(shardId, peerAddresses, datastoreContext, schemaContext);
764         }
765
766         String getShardName() {
767             return shardName;
768         }
769
770         ActorRef getActor(){
771             return actor;
772         }
773
774         ActorPath getActorPath() {
775             return actorPath;
776         }
777
778         void setActor(ActorRef actor) {
779             this.actor = actor;
780             this.actorPath = actor.path();
781         }
782
783         ShardIdentifier getShardId() {
784             return shardId;
785         }
786
787         void setLocalDataTree(Optional<DataTree> localShardDataTree) {
788             this.localShardDataTree = localShardDataTree;
789         }
790
791         Optional<DataTree> getLocalShardDataTree() {
792             return localShardDataTree;
793         }
794
795         Map<String, String> getPeerAddresses() {
796             return peerAddresses;
797         }
798
799         void updatePeerAddress(String peerId, String peerAddress, ActorRef sender){
800             LOG.info("updatePeerAddress for peer {} with address {}", peerId,
801                 peerAddress);
802             if(peerAddresses.containsKey(peerId)){
803                 peerAddresses.put(peerId, peerAddress);
804
805                 if(actor != null) {
806                     if(LOG.isDebugEnabled()) {
807                         LOG.debug("Sending PeerAddressResolved for peer {} with address {} to {}",
808                                 peerId, peerAddress, actor.path());
809                     }
810
811                     actor.tell(new PeerAddressResolved(peerId.toString(), peerAddress), sender);
812                 }
813
814                 notifyOnShardInitializedCallbacks();
815             }
816         }
817
818         boolean isShardReady() {
819             return !RaftState.Candidate.name().equals(role) && !Strings.isNullOrEmpty(role);
820         }
821
822         boolean isShardReadyWithLeaderId() {
823             return leaderAvailable && isShardReady() && !RaftState.IsolatedLeader.name().equals(role) &&
824                     (isLeader() || peerAddresses.get(leaderId) != null);
825         }
826
827         boolean isShardInitialized() {
828             return getActor() != null && actorInitialized;
829         }
830
831         boolean isLeader() {
832             return Objects.equal(leaderId, shardId.toString());
833         }
834
835         String getSerializedLeaderActor() {
836             if(isLeader()) {
837                 return Serialization.serializedActorPath(getActor());
838             } else {
839                 return peerAddresses.get(leaderId);
840             }
841         }
842
843         void setActorInitialized() {
844             LOG.debug("Shard {} is initialized", shardId);
845
846             this.actorInitialized = true;
847
848             notifyOnShardInitializedCallbacks();
849         }
850
851         private void notifyOnShardInitializedCallbacks() {
852             if(onShardInitializedSet.isEmpty()) {
853                 return;
854             }
855
856             boolean ready = isShardReadyWithLeaderId();
857
858             if(LOG.isDebugEnabled()) {
859                 LOG.debug("Shard {} is {} - notifying {} OnShardInitialized callbacks", shardId,
860                         ready ? "ready" : "initialized", onShardInitializedSet.size());
861             }
862
863             Iterator<OnShardInitialized> iter = onShardInitializedSet.iterator();
864             while(iter.hasNext()) {
865                 OnShardInitialized onShardInitialized = iter.next();
866                 if(!(onShardInitialized instanceof OnShardReady) || ready) {
867                     iter.remove();
868                     onShardInitialized.getTimeoutSchedule().cancel();
869                     onShardInitialized.getReplyRunnable().run();
870                 }
871             }
872         }
873
874         void addOnShardInitialized(OnShardInitialized onShardInitialized) {
875             onShardInitializedSet.add(onShardInitialized);
876         }
877
878         void removeOnShardInitialized(OnShardInitialized onShardInitialized) {
879             onShardInitializedSet.remove(onShardInitialized);
880         }
881
882         void setRole(String newRole) {
883             this.role = newRole;
884
885             notifyOnShardInitializedCallbacks();
886         }
887
888         void setFollowerSyncStatus(boolean syncStatus){
889             this.followerSyncStatus = syncStatus;
890         }
891
892         boolean isInSync(){
893             if(RaftState.Follower.name().equals(this.role)){
894                 return followerSyncStatus;
895             } else if(RaftState.Leader.name().equals(this.role)){
896                 return true;
897             }
898
899             return false;
900         }
901
902         boolean setLeaderId(String leaderId) {
903             boolean changed = !Objects.equal(this.leaderId, leaderId);
904             this.leaderId = leaderId;
905             if(leaderId != null) {
906                 this.leaderAvailable = true;
907             }
908             notifyOnShardInitializedCallbacks();
909
910             return changed;
911         }
912
913         String getLeaderId() {
914             return leaderId;
915         }
916
917         void setLeaderAvailable(boolean leaderAvailable) {
918             this.leaderAvailable = leaderAvailable;
919         }
920
921         short getLeaderVersion() {
922             return leaderVersion;
923         }
924
925         void setLeaderVersion(short leaderVersion) {
926             this.leaderVersion = leaderVersion;
927         }
928     }
929
930     private static class ShardManagerCreator implements Creator<ShardManager> {
931         private static final long serialVersionUID = 1L;
932
933         final ClusterWrapper cluster;
934         final Configuration configuration;
935         final DatastoreContext datastoreContext;
936         private final CountDownLatch waitTillReadyCountdownLatch;
937         private final PrimaryShardInfoFutureCache primaryShardInfoCache;
938
939         ShardManagerCreator(ClusterWrapper cluster, Configuration configuration, DatastoreContext datastoreContext,
940                 CountDownLatch waitTillReadyCountdownLatch, PrimaryShardInfoFutureCache primaryShardInfoCache) {
941             this.cluster = cluster;
942             this.configuration = configuration;
943             this.datastoreContext = datastoreContext;
944             this.waitTillReadyCountdownLatch = waitTillReadyCountdownLatch;
945             this.primaryShardInfoCache = primaryShardInfoCache;
946         }
947
948         @Override
949         public ShardManager create() throws Exception {
950             return new ShardManager(cluster, configuration, datastoreContext, waitTillReadyCountdownLatch,
951                     primaryShardInfoCache);
952         }
953     }
954
955     private static class OnShardInitialized {
956         private final Runnable replyRunnable;
957         private Cancellable timeoutSchedule;
958
959         OnShardInitialized(Runnable replyRunnable) {
960             this.replyRunnable = replyRunnable;
961         }
962
963         Runnable getReplyRunnable() {
964             return replyRunnable;
965         }
966
967         Cancellable getTimeoutSchedule() {
968             return timeoutSchedule;
969         }
970
971         void setTimeoutSchedule(Cancellable timeoutSchedule) {
972             this.timeoutSchedule = timeoutSchedule;
973         }
974     }
975
976     private static class OnShardReady extends OnShardInitialized {
977         OnShardReady(Runnable replyRunnable) {
978             super(replyRunnable);
979         }
980     }
981
982     private static class ShardNotInitializedTimeout {
983         private final ActorRef sender;
984         private final ShardInformation shardInfo;
985         private final OnShardInitialized onShardInitialized;
986
987         ShardNotInitializedTimeout(ShardInformation shardInfo, OnShardInitialized onShardInitialized, ActorRef sender) {
988             this.sender = sender;
989             this.shardInfo = shardInfo;
990             this.onShardInitialized = onShardInitialized;
991         }
992
993         ActorRef getSender() {
994             return sender;
995         }
996
997         ShardInformation getShardInfo() {
998             return shardInfo;
999         }
1000
1001         OnShardInitialized getOnShardInitialized() {
1002             return onShardInitialized;
1003         }
1004     }
1005
1006     /**
1007      * We no longer persist SchemaContextModules but keep this class around for now for backwards
1008      * compatibility so we don't get de-serialization failures on upgrade from Helium.
1009      */
1010     @Deprecated
1011     static class SchemaContextModules implements Serializable {
1012         private static final long serialVersionUID = -8884620101025936590L;
1013
1014         private final Set<String> modules;
1015
1016         SchemaContextModules(Set<String> modules){
1017             this.modules = modules;
1018         }
1019
1020         public Set<String> getModules() {
1021             return modules;
1022         }
1023     }
1024 }
1025
1026
1027