2 * Copyright (c) 2014 Cisco Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
9 package org.opendaylight.controller.cluster.datastore;
11 import akka.actor.ActorPath;
12 import akka.actor.ActorRef;
13 import akka.actor.Address;
14 import akka.actor.OneForOneStrategy;
15 import akka.actor.Props;
16 import akka.actor.SupervisorStrategy;
17 import akka.cluster.ClusterEvent;
18 import akka.japi.Creator;
19 import akka.japi.Function;
20 import akka.japi.Procedure;
21 import akka.persistence.RecoveryCompleted;
22 import akka.persistence.RecoveryFailure;
23 import com.google.common.annotations.VisibleForTesting;
24 import com.google.common.base.Preconditions;
25 import com.google.common.base.Strings;
26 import com.google.common.base.Supplier;
27 import com.google.common.collect.ImmutableSet;
28 import com.google.common.collect.Lists;
29 import java.io.Serializable;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.HashMap;
34 import java.util.HashSet;
35 import java.util.List;
38 import java.util.concurrent.CountDownLatch;
39 import org.opendaylight.controller.cluster.DataPersistenceProvider;
40 import org.opendaylight.controller.cluster.common.actor.AbstractUntypedPersistentActorWithMetering;
41 import org.opendaylight.controller.cluster.datastore.identifiers.ShardIdentifier;
42 import org.opendaylight.controller.cluster.datastore.identifiers.ShardManagerIdentifier;
43 import org.opendaylight.controller.cluster.datastore.jmx.mbeans.shardmanager.ShardManagerInfo;
44 import org.opendaylight.controller.cluster.datastore.jmx.mbeans.shardmanager.ShardManagerInfoMBean;
45 import org.opendaylight.controller.cluster.datastore.messages.ActorInitialized;
46 import org.opendaylight.controller.cluster.datastore.messages.ActorNotInitialized;
47 import org.opendaylight.controller.cluster.datastore.messages.FindLocalShard;
48 import org.opendaylight.controller.cluster.datastore.messages.FindPrimary;
49 import org.opendaylight.controller.cluster.datastore.messages.LocalShardFound;
50 import org.opendaylight.controller.cluster.datastore.messages.LocalShardNotFound;
51 import org.opendaylight.controller.cluster.datastore.messages.PeerAddressResolved;
52 import org.opendaylight.controller.cluster.datastore.messages.PrimaryFound;
53 import org.opendaylight.controller.cluster.datastore.messages.PrimaryNotFound;
54 import org.opendaylight.controller.cluster.datastore.messages.UpdateSchemaContext;
55 import org.opendaylight.controller.cluster.datastore.utils.Dispatchers;
56 import org.opendaylight.controller.cluster.notifications.RegisterRoleChangeListener;
57 import org.opendaylight.controller.cluster.notifications.RoleChangeNotification;
58 import org.opendaylight.controller.cluster.raft.RaftState;
59 import org.opendaylight.controller.cluster.raft.base.messages.FollowerInitialSyncUpStatus;
60 import org.opendaylight.yangtools.yang.model.api.ModuleIdentifier;
61 import org.opendaylight.yangtools.yang.model.api.SchemaContext;
62 import org.slf4j.Logger;
63 import org.slf4j.LoggerFactory;
64 import scala.concurrent.duration.Duration;
67 * The ShardManager has the following jobs,
69 * <li> Create all the local shard replicas that belong on this cluster member
70 * <li> Find the address of the local shard
71 * <li> Find the primary replica for any given shard
72 * <li> Monitor the cluster members and store their addresses
75 public class ShardManager extends AbstractUntypedPersistentActorWithMetering {
77 private final Logger LOG = LoggerFactory.getLogger(getClass());
79 // Stores a mapping between a member name and the address of the member
80 // Member names look like "member-1", "member-2" etc and are as specified
82 private final Map<String, Address> memberNameToAddress = new HashMap<>();
84 // Stores a mapping between a shard name and it's corresponding information
85 // Shard names look like inventory, topology etc and are as specified in
87 private final Map<String, ShardInformation> localShards = new HashMap<>();
89 // The type of a ShardManager reflects the type of the datastore itself
90 // A data store could be of type config/operational
91 private final String type;
93 private final ClusterWrapper cluster;
95 private final Configuration configuration;
97 private final String shardDispatcherPath;
99 private ShardManagerInfo mBean;
101 private DatastoreContext datastoreContext;
103 private Collection<String> knownModules = Collections.emptySet();
105 private final DataPersistenceProvider dataPersistenceProvider;
107 private final CountDownLatch waitTillReadyCountdownLatch;
111 protected ShardManager(ClusterWrapper cluster, Configuration configuration,
112 DatastoreContext datastoreContext, CountDownLatch waitTillReadyCountdownLatch) {
114 this.cluster = Preconditions.checkNotNull(cluster, "cluster should not be null");
115 this.configuration = Preconditions.checkNotNull(configuration, "configuration should not be null");
116 this.datastoreContext = datastoreContext;
117 this.dataPersistenceProvider = createDataPersistenceProvider(datastoreContext.isPersistent());
118 this.type = datastoreContext.getDataStoreType();
119 this.shardDispatcherPath =
120 new Dispatchers(context().system().dispatchers()).getDispatcherPath(Dispatchers.DispatcherType.Shard);
121 this.waitTillReadyCountdownLatch = waitTillReadyCountdownLatch;
123 // Subscribe this actor to cluster member events
124 cluster.subscribeToMemberEvents(getSelf());
129 protected DataPersistenceProvider createDataPersistenceProvider(boolean persistent) {
130 return (persistent) ? new PersistentDataProvider() : new NonPersistentDataProvider();
133 public static Props props(
134 final ClusterWrapper cluster,
135 final Configuration configuration,
136 final DatastoreContext datastoreContext,
137 final CountDownLatch waitTillReadyCountdownLatch) {
139 Preconditions.checkNotNull(cluster, "cluster should not be null");
140 Preconditions.checkNotNull(configuration, "configuration should not be null");
141 Preconditions.checkNotNull(waitTillReadyCountdownLatch, "waitTillReadyCountdownLatch should not be null");
143 return Props.create(new ShardManagerCreator(cluster, configuration, datastoreContext, waitTillReadyCountdownLatch));
147 public void postStop() {
148 LOG.info("Stopping ShardManager");
150 mBean.unregisterMBean();
154 public void handleCommand(Object message) throws Exception {
155 if (FindPrimary.SERIALIZABLE_CLASS.isInstance(message)) {
156 findPrimary(FindPrimary.fromSerializable(message));
157 } else if(message instanceof FindLocalShard){
158 findLocalShard((FindLocalShard) message);
159 } else if (message instanceof UpdateSchemaContext) {
160 updateSchemaContext(message);
161 } else if(message instanceof ActorInitialized) {
162 onActorInitialized(message);
163 } else if (message instanceof ClusterEvent.MemberUp){
164 memberUp((ClusterEvent.MemberUp) message);
165 } else if(message instanceof ClusterEvent.MemberRemoved) {
166 memberRemoved((ClusterEvent.MemberRemoved) message);
167 } else if(message instanceof ClusterEvent.UnreachableMember) {
168 ignoreMessage(message);
169 } else if(message instanceof DatastoreContext) {
170 onDatastoreContext((DatastoreContext)message);
171 } else if(message instanceof RoleChangeNotification) {
172 onRoleChangeNotification((RoleChangeNotification) message);
173 } else if(message instanceof FollowerInitialSyncUpStatus){
174 onFollowerInitialSyncStatus((FollowerInitialSyncUpStatus) message);
176 unknownMessage(message);
181 private void onFollowerInitialSyncStatus(FollowerInitialSyncUpStatus status) {
182 LOG.info("Received follower initial sync status for {} status sync done {}", status.getName(),
183 status.isInitialSyncDone());
185 ShardInformation shardInformation = findShardInformation(status.getName());
187 if(shardInformation != null) {
188 shardInformation.setFollowerSyncStatus(status.isInitialSyncDone());
190 mBean.setSyncStatus(isInSync());
195 private void onRoleChangeNotification(RoleChangeNotification roleChanged) {
196 LOG.info("Received role changed for {} from {} to {}", roleChanged.getMemberId(),
197 roleChanged.getOldRole(), roleChanged.getNewRole());
199 ShardInformation shardInformation = findShardInformation(roleChanged.getMemberId());
200 if(shardInformation != null) {
201 shardInformation.setRole(roleChanged.getNewRole());
204 LOG.info("All Shards are ready - data store {} is ready, available count is {}", type,
205 waitTillReadyCountdownLatch.getCount());
207 waitTillReadyCountdownLatch.countDown();
210 mBean.setSyncStatus(isInSync());
215 private ShardInformation findShardInformation(String memberId) {
216 for(ShardInformation info : localShards.values()){
217 if(info.getShardId().toString().equals(memberId)){
225 private boolean isReady() {
226 boolean isReady = true;
227 for (ShardInformation info : localShards.values()) {
228 if(RaftState.Candidate.name().equals(info.getRole()) || Strings.isNullOrEmpty(info.getRole())){
236 private boolean isInSync(){
237 for (ShardInformation info : localShards.values()) {
238 if(!info.isInSync()){
245 private void onActorInitialized(Object message) {
246 final ActorRef sender = getSender();
248 if (sender == null) {
249 return; //why is a non-actor sending this message? Just ignore.
252 String actorName = sender.path().name();
253 //find shard name from actor name; actor name is stringified shardId
254 ShardIdentifier shardId = ShardIdentifier.builder().fromShardIdString(actorName).build();
256 if (shardId.getShardName() == null) {
259 markShardAsInitialized(shardId.getShardName());
262 private void markShardAsInitialized(String shardName) {
263 LOG.debug("Initializing shard [{}]", shardName);
264 ShardInformation shardInformation = localShards.get(shardName);
265 if (shardInformation != null) {
266 shardInformation.setActorInitialized();
271 protected void handleRecover(Object message) throws Exception {
272 if(dataPersistenceProvider.isRecoveryApplicable()) {
273 if (message instanceof SchemaContextModules) {
274 SchemaContextModules msg = (SchemaContextModules) message;
275 knownModules = ImmutableSet.copyOf(msg.getModules());
276 } else if (message instanceof RecoveryFailure) {
277 RecoveryFailure failure = (RecoveryFailure) message;
278 LOG.error("Recovery failed", failure.cause());
279 } else if (message instanceof RecoveryCompleted) {
280 LOG.info("Recovery complete : {}", persistenceId());
282 // Delete all the messages from the akka journal except the last one
283 deleteMessages(lastSequenceNr() - 1);
286 if (message instanceof RecoveryCompleted) {
287 LOG.info("Recovery complete : {}", persistenceId());
289 // Delete all the messages from the akka journal
290 deleteMessages(lastSequenceNr());
295 private void findLocalShard(FindLocalShard message) {
296 final ShardInformation shardInformation = localShards.get(message.getShardName());
298 if(shardInformation == null){
299 getSender().tell(new LocalShardNotFound(message.getShardName()), getSelf());
303 sendResponse(shardInformation, message.isWaitUntilInitialized(), new Supplier<Object>() {
305 public Object get() {
306 return new LocalShardFound(shardInformation.getActor());
311 private void sendResponse(ShardInformation shardInformation, boolean waitUntilInitialized,
312 final Supplier<Object> messageSupplier) {
313 if (!shardInformation.isShardInitialized()) {
314 if(waitUntilInitialized) {
315 final ActorRef sender = getSender();
316 final ActorRef self = self();
317 shardInformation.addRunnableOnInitialized(new Runnable() {
320 sender.tell(messageSupplier.get(), self);
324 getSender().tell(new ActorNotInitialized(), getSelf());
330 getSender().tell(messageSupplier.get(), getSelf());
333 private void memberRemoved(ClusterEvent.MemberRemoved message) {
334 memberNameToAddress.remove(message.member().roles().head());
337 private void memberUp(ClusterEvent.MemberUp message) {
338 String memberName = message.member().roles().head();
340 memberNameToAddress.put(memberName, message.member().address());
342 for(ShardInformation info : localShards.values()){
343 String shardName = info.getShardName();
344 info.updatePeerAddress(getShardIdentifier(memberName, shardName),
345 getShardActorPath(shardName, memberName));
349 private void onDatastoreContext(DatastoreContext context) {
350 datastoreContext = context;
351 for (ShardInformation info : localShards.values()) {
352 if (info.getActor() != null) {
353 info.getActor().tell(datastoreContext, getSelf());
359 * Notifies all the local shards of a change in the schema context
363 private void updateSchemaContext(final Object message) {
364 final SchemaContext schemaContext = ((UpdateSchemaContext) message).getSchemaContext();
366 Set<ModuleIdentifier> allModuleIdentifiers = schemaContext.getAllModuleIdentifiers();
367 Set<String> newModules = new HashSet<>(128);
369 for(ModuleIdentifier moduleIdentifier : allModuleIdentifiers){
370 String s = moduleIdentifier.getNamespace().toString();
374 if(newModules.containsAll(knownModules)) {
376 LOG.debug("New SchemaContext has a super set of current knownModules - persisting info");
378 knownModules = ImmutableSet.copyOf(newModules);
380 dataPersistenceProvider.persist(new SchemaContextModules(newModules), new Procedure<SchemaContextModules>() {
383 public void apply(SchemaContextModules param) throws Exception {
384 LOG.debug("Sending new SchemaContext to Shards");
385 for (ShardInformation info : localShards.values()) {
386 if (info.getActor() == null) {
387 info.setActor(getContext().actorOf(Shard.props(info.getShardId(),
388 info.getPeerAddresses(), datastoreContext, schemaContext)
389 .withDispatcher(shardDispatcherPath), info.getShardId().toString()));
391 info.getActor().tell(message, getSelf());
393 info.getActor().tell(new RegisterRoleChangeListener(), self());
399 LOG.debug("Rejecting schema context update - not a super set of previously known modules:\nUPDATE: {}\nKNOWN: {}",
400 newModules, knownModules);
405 private void findPrimary(FindPrimary message) {
406 String shardName = message.getShardName();
408 // First see if the there is a local replica for the shard
409 final ShardInformation info = localShards.get(shardName);
411 sendResponse(info, message.isWaitUntilInitialized(), new Supplier<Object>() {
413 public Object get() {
414 return new PrimaryFound(info.getActorPath().toString()).toSerializable();
421 List<String> members = configuration.getMembersFromShardName(shardName);
423 if(cluster.getCurrentMemberName() != null) {
424 members.remove(cluster.getCurrentMemberName());
428 * FIXME: Instead of sending remote shard actor path back to sender,
429 * forward FindPrimary message to remote shard manager
431 // There is no way for us to figure out the primary (for now) so assume
432 // that one of the remote nodes is a primary
433 for(String memberName : members) {
434 Address address = memberNameToAddress.get(memberName);
437 getShardActorPath(shardName, memberName);
438 getSender().tell(new PrimaryFound(path).toSerializable(), getSelf());
442 getSender().tell(new PrimaryNotFound(shardName).toSerializable(), getSelf());
445 private String getShardActorPath(String shardName, String memberName) {
446 Address address = memberNameToAddress.get(memberName);
447 if(address != null) {
448 StringBuilder builder = new StringBuilder();
449 builder.append(address.toString())
451 .append(ShardManagerIdentifier.builder().type(type).build().toString())
453 .append(getShardIdentifier(memberName, shardName));
454 return builder.toString();
460 * Construct the name of the shard actor given the name of the member on
461 * which the shard resides and the name of the shard
467 private ShardIdentifier getShardIdentifier(String memberName, String shardName){
468 return ShardIdentifier.builder().memberName(memberName).shardName(shardName).type(type).build();
472 * Create shards that are local to the member on which the ShardManager
476 private void createLocalShards() {
477 String memberName = this.cluster.getCurrentMemberName();
478 List<String> memberShardNames =
479 this.configuration.getMemberShardNames(memberName);
481 List<String> localShardActorNames = new ArrayList<>();
482 for(String shardName : memberShardNames){
483 ShardIdentifier shardId = getShardIdentifier(memberName, shardName);
484 Map<ShardIdentifier, String> peerAddresses = getPeerAddresses(shardName);
485 localShardActorNames.add(shardId.toString());
486 localShards.put(shardName, new ShardInformation(shardName, shardId, peerAddresses));
489 mBean = ShardManagerInfo.createShardManagerMBean("shard-manager-" + this.type,
490 datastoreContext.getDataStoreMXBeanType(), localShardActorNames);
494 * Given the name of the shard find the addresses of all it's peers
499 private Map<ShardIdentifier, String> getPeerAddresses(String shardName){
501 Map<ShardIdentifier, String> peerAddresses = new HashMap<>();
503 List<String> members =
504 this.configuration.getMembersFromShardName(shardName);
506 String currentMemberName = this.cluster.getCurrentMemberName();
508 for(String memberName : members){
509 if(!currentMemberName.equals(memberName)){
510 ShardIdentifier shardId = getShardIdentifier(memberName,
513 getShardActorPath(shardName, currentMemberName);
514 peerAddresses.put(shardId, path);
517 return peerAddresses;
521 public SupervisorStrategy supervisorStrategy() {
523 return new OneForOneStrategy(10, Duration.create("1 minute"),
524 new Function<Throwable, SupervisorStrategy.Directive>() {
526 public SupervisorStrategy.Directive apply(Throwable t) {
527 LOG.warn("Supervisor Strategy caught unexpected exception - resuming", t);
528 return SupervisorStrategy.resume();
536 public String persistenceId() {
537 return "shard-manager-" + type;
541 Collection<String> getKnownModules() {
546 DataPersistenceProvider getDataPersistenceProvider() {
547 return dataPersistenceProvider;
551 ShardManagerInfoMBean getMBean(){
555 private class ShardInformation {
556 private final ShardIdentifier shardId;
557 private final String shardName;
558 private ActorRef actor;
559 private ActorPath actorPath;
560 private final Map<ShardIdentifier, String> peerAddresses;
562 // flag that determines if the actor is ready for business
563 private boolean actorInitialized = false;
565 private boolean followerSyncStatus = false;
567 private final List<Runnable> runnablesOnInitialized = Lists.newArrayList();
568 private String role ;
570 private ShardInformation(String shardName, ShardIdentifier shardId,
571 Map<ShardIdentifier, String> peerAddresses) {
572 this.shardName = shardName;
573 this.shardId = shardId;
574 this.peerAddresses = peerAddresses;
577 String getShardName() {
585 ActorPath getActorPath() {
589 void setActor(ActorRef actor) {
591 this.actorPath = actor.path();
594 ShardIdentifier getShardId() {
598 Map<ShardIdentifier, String> getPeerAddresses() {
599 return peerAddresses;
602 void updatePeerAddress(ShardIdentifier peerId, String peerAddress){
603 LOG.info("updatePeerAddress for peer {} with address {}", peerId,
605 if(peerAddresses.containsKey(peerId)){
606 peerAddresses.put(peerId, peerAddress);
609 if(LOG.isDebugEnabled()) {
610 LOG.debug("Sending PeerAddressResolved for peer {} with address {} to {}",
611 peerId, peerAddress, actor.path());
614 actor.tell(new PeerAddressResolved(peerId, peerAddress), getSelf());
619 boolean isShardInitialized() {
620 return getActor() != null && actorInitialized;
623 void setActorInitialized() {
624 this.actorInitialized = true;
626 for(Runnable runnable: runnablesOnInitialized) {
630 runnablesOnInitialized.clear();
633 void addRunnableOnInitialized(Runnable runnable) {
634 runnablesOnInitialized.add(runnable);
637 public void setRole(String newRole) {
641 public String getRole(){
645 public void setFollowerSyncStatus(boolean syncStatus){
646 this.followerSyncStatus = syncStatus;
649 public boolean isInSync(){
650 if(RaftState.Follower.name().equals(this.role)){
651 return followerSyncStatus;
652 } else if(RaftState.Leader.name().equals(this.role)){
661 private static class ShardManagerCreator implements Creator<ShardManager> {
662 private static final long serialVersionUID = 1L;
664 final ClusterWrapper cluster;
665 final Configuration configuration;
666 final DatastoreContext datastoreContext;
667 private final CountDownLatch waitTillReadyCountdownLatch;
669 ShardManagerCreator(ClusterWrapper cluster,
670 Configuration configuration, DatastoreContext datastoreContext, CountDownLatch waitTillReadyCountdownLatch) {
671 this.cluster = cluster;
672 this.configuration = configuration;
673 this.datastoreContext = datastoreContext;
674 this.waitTillReadyCountdownLatch = waitTillReadyCountdownLatch;
678 public ShardManager create() throws Exception {
679 return new ShardManager(cluster, configuration, datastoreContext, waitTillReadyCountdownLatch);
683 static class SchemaContextModules implements Serializable {
684 private static final long serialVersionUID = -8884620101025936590L;
686 private final Set<String> modules;
688 SchemaContextModules(Set<String> modules){
689 this.modules = modules;
692 public Set<String> getModules() {