Deprecate ask-based protocol messages
[controller.git] / opendaylight / md-sal / sal-distributed-datastore / src / main / java / org / opendaylight / controller / cluster / datastore / Shard.java
1 /*
2  * Copyright (c) 2014 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.datastore;
9
10 import static com.google.common.base.Preconditions.checkState;
11 import static com.google.common.base.Verify.verify;
12 import static com.google.common.base.Verify.verifyNotNull;
13 import static java.util.Objects.requireNonNull;
14
15 import akka.actor.ActorRef;
16 import akka.actor.ActorSelection;
17 import akka.actor.Cancellable;
18 import akka.actor.ExtendedActorSystem;
19 import akka.actor.PoisonPill;
20 import akka.actor.Props;
21 import akka.actor.Status;
22 import akka.actor.Status.Failure;
23 import akka.persistence.RecoveryCompleted;
24 import akka.persistence.SnapshotOffer;
25 import akka.serialization.JavaSerializer;
26 import akka.serialization.Serialization;
27 import com.google.common.annotations.VisibleForTesting;
28 import com.google.common.base.Ticker;
29 import com.google.common.collect.ImmutableList;
30 import com.google.common.collect.ImmutableMap;
31 import com.google.common.collect.ImmutableSet;
32 import com.google.common.collect.Range;
33 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
34 import java.io.IOException;
35 import java.util.Arrays;
36 import java.util.Collection;
37 import java.util.Collections;
38 import java.util.HashMap;
39 import java.util.Map;
40 import java.util.Optional;
41 import java.util.OptionalLong;
42 import java.util.concurrent.TimeUnit;
43 import org.eclipse.jdt.annotation.NonNull;
44 import org.eclipse.jdt.annotation.Nullable;
45 import org.opendaylight.controller.cluster.access.ABIVersion;
46 import org.opendaylight.controller.cluster.access.commands.ConnectClientRequest;
47 import org.opendaylight.controller.cluster.access.commands.ConnectClientSuccess;
48 import org.opendaylight.controller.cluster.access.commands.LocalHistoryRequest;
49 import org.opendaylight.controller.cluster.access.commands.NotLeaderException;
50 import org.opendaylight.controller.cluster.access.commands.OutOfSequenceEnvelopeException;
51 import org.opendaylight.controller.cluster.access.commands.TransactionRequest;
52 import org.opendaylight.controller.cluster.access.concepts.ClientIdentifier;
53 import org.opendaylight.controller.cluster.access.concepts.FrontendIdentifier;
54 import org.opendaylight.controller.cluster.access.concepts.RequestEnvelope;
55 import org.opendaylight.controller.cluster.access.concepts.RequestException;
56 import org.opendaylight.controller.cluster.access.concepts.RequestSuccess;
57 import org.opendaylight.controller.cluster.access.concepts.RetiredGenerationException;
58 import org.opendaylight.controller.cluster.access.concepts.RuntimeRequestException;
59 import org.opendaylight.controller.cluster.access.concepts.SliceableMessage;
60 import org.opendaylight.controller.cluster.access.concepts.TransactionIdentifier;
61 import org.opendaylight.controller.cluster.access.concepts.UnsupportedRequestException;
62 import org.opendaylight.controller.cluster.common.actor.CommonConfig;
63 import org.opendaylight.controller.cluster.common.actor.Dispatchers;
64 import org.opendaylight.controller.cluster.common.actor.Dispatchers.DispatcherType;
65 import org.opendaylight.controller.cluster.common.actor.MessageTracker;
66 import org.opendaylight.controller.cluster.common.actor.MeteringBehavior;
67 import org.opendaylight.controller.cluster.datastore.actors.JsonExportActor;
68 import org.opendaylight.controller.cluster.datastore.exceptions.NoShardLeaderException;
69 import org.opendaylight.controller.cluster.datastore.identifiers.ShardIdentifier;
70 import org.opendaylight.controller.cluster.datastore.messages.AbortTransaction;
71 import org.opendaylight.controller.cluster.datastore.messages.ActorInitialized;
72 import org.opendaylight.controller.cluster.datastore.messages.BatchedModifications;
73 import org.opendaylight.controller.cluster.datastore.messages.CanCommitTransaction;
74 import org.opendaylight.controller.cluster.datastore.messages.CloseTransactionChain;
75 import org.opendaylight.controller.cluster.datastore.messages.CommitTransaction;
76 import org.opendaylight.controller.cluster.datastore.messages.CreateTransaction;
77 import org.opendaylight.controller.cluster.datastore.messages.CreateTransactionReply;
78 import org.opendaylight.controller.cluster.datastore.messages.DataTreeChangedReply;
79 import org.opendaylight.controller.cluster.datastore.messages.ForwardedReadyTransaction;
80 import org.opendaylight.controller.cluster.datastore.messages.GetKnownClients;
81 import org.opendaylight.controller.cluster.datastore.messages.GetKnownClientsReply;
82 import org.opendaylight.controller.cluster.datastore.messages.GetShardDataTree;
83 import org.opendaylight.controller.cluster.datastore.messages.MakeLeaderLocal;
84 import org.opendaylight.controller.cluster.datastore.messages.OnDemandShardState;
85 import org.opendaylight.controller.cluster.datastore.messages.PeerAddressResolved;
86 import org.opendaylight.controller.cluster.datastore.messages.ReadyLocalTransaction;
87 import org.opendaylight.controller.cluster.datastore.messages.RegisterDataTreeChangeListener;
88 import org.opendaylight.controller.cluster.datastore.messages.ShardLeaderStateChanged;
89 import org.opendaylight.controller.cluster.datastore.messages.UpdateSchemaContext;
90 import org.opendaylight.controller.cluster.datastore.persisted.DatastoreSnapshot;
91 import org.opendaylight.controller.cluster.datastore.persisted.DatastoreSnapshot.ShardSnapshot;
92 import org.opendaylight.controller.cluster.datastore.persisted.DisableTrackingPayload;
93 import org.opendaylight.controller.cluster.messaging.MessageAssembler;
94 import org.opendaylight.controller.cluster.messaging.MessageSlicer;
95 import org.opendaylight.controller.cluster.messaging.SliceOptions;
96 import org.opendaylight.controller.cluster.notifications.LeaderStateChanged;
97 import org.opendaylight.controller.cluster.notifications.RegisterRoleChangeListener;
98 import org.opendaylight.controller.cluster.notifications.RoleChangeNotifier;
99 import org.opendaylight.controller.cluster.raft.LeadershipTransferFailedException;
100 import org.opendaylight.controller.cluster.raft.RaftActor;
101 import org.opendaylight.controller.cluster.raft.RaftActorRecoveryCohort;
102 import org.opendaylight.controller.cluster.raft.RaftActorSnapshotCohort;
103 import org.opendaylight.controller.cluster.raft.RaftState;
104 import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
105 import org.opendaylight.controller.cluster.raft.base.messages.FollowerInitialSyncUpStatus;
106 import org.opendaylight.controller.cluster.raft.client.messages.OnDemandRaftState;
107 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
108 import org.opendaylight.controller.cluster.raft.messages.Payload;
109 import org.opendaylight.controller.cluster.raft.messages.RequestLeadership;
110 import org.opendaylight.controller.cluster.raft.messages.ServerRemoved;
111 import org.opendaylight.yang.gen.v1.urn.opendaylight.params.xml.ns.yang.controller.config.distributed.datastore.provider.rev231229.DataStoreProperties.ExportOnRecovery;
112 import org.opendaylight.yangtools.concepts.Identifier;
113 import org.opendaylight.yangtools.yang.data.tree.api.DataTree;
114 import org.opendaylight.yangtools.yang.data.tree.api.DataValidationFailedException;
115 import org.opendaylight.yangtools.yang.data.tree.api.TreeType;
116 import org.opendaylight.yangtools.yang.model.api.EffectiveModelContext;
117 import org.opendaylight.yangtools.yang.model.api.EffectiveModelContextProvider;
118 import scala.concurrent.duration.FiniteDuration;
119
120 /**
121  * A Shard represents a portion of the logical data tree.
122  *
123  * <p>
124  * Our Shard uses InMemoryDataTree as it's internal representation and delegates all requests it
125  */
126 // FIXME: non-final for testing?
127 public class Shard extends RaftActor {
128
129     @VisibleForTesting
130     static final Object TX_COMMIT_TIMEOUT_CHECK_MESSAGE = new Object() {
131         @Override
132         public String toString() {
133             return "txCommitTimeoutCheck";
134         }
135     };
136
137     @VisibleForTesting
138     static final Object GET_SHARD_MBEAN_MESSAGE = new Object() {
139         @Override
140         public String toString() {
141             return "getShardMBeanMessage";
142         }
143     };
144
145     static final Object RESUME_NEXT_PENDING_TRANSACTION = new Object() {
146         @Override
147         public String toString() {
148             return "resumeNextPendingTransaction";
149         }
150     };
151
152     // FIXME: shard names should be encapsulated in their own class and this should be exposed as a constant.
153     public static final String DEFAULT_NAME = "default";
154
155     private static final Collection<ABIVersion> SUPPORTED_ABIVERSIONS;
156
157     // Make sure to keep this in sync with the journal configuration in factory-akka.conf
158     public static final String NON_PERSISTENT_JOURNAL_ID = "akka.persistence.non-persistent.journal";
159
160     static {
161         final ABIVersion[] values = ABIVersion.values();
162         final ABIVersion[] real = Arrays.copyOfRange(values, 1, values.length - 1);
163         SUPPORTED_ABIVERSIONS = ImmutableList.copyOf(real).reverse();
164     }
165
166     // FIXME: make this a dynamic property based on mailbox size and maximum number of clients
167     private static final int CLIENT_MAX_MESSAGES = 1000;
168
169     // The state of this Shard
170     private final ShardDataTree store;
171
172     /// The name of this shard
173     private final String name;
174
175     private final String shardName;
176
177     private final ShardStats shardMBean;
178
179     private final ShardDataTreeListenerInfoMXBeanImpl listenerInfoMXBean;
180
181     private DatastoreContext datastoreContext;
182
183     @Deprecated(since = "9.0.0", forRemoval = true)
184     private final ShardCommitCoordinator commitCoordinator;
185
186     private long transactionCommitTimeout;
187
188     private Cancellable txCommitTimeoutCheckSchedule;
189
190     private final Optional<ActorRef> roleChangeNotifier;
191
192     private final MessageTracker appendEntriesReplyTracker;
193
194     @Deprecated(since = "9.0.0", forRemoval = true)
195     private final ShardTransactionActorFactory transactionActorFactory;
196
197     private final ShardSnapshotCohort snapshotCohort;
198
199     private final DataTreeChangeListenerSupport treeChangeSupport = new DataTreeChangeListenerSupport(this);
200
201     private ShardSnapshot restoreFromSnapshot;
202
203     @Deprecated(since = "9.0.0", forRemoval = true)
204     private final ShardTransactionMessageRetrySupport messageRetrySupport;
205
206     @VisibleForTesting
207     final FrontendMetadata frontendMetadata;
208
209     private Map<FrontendIdentifier, LeaderFrontendState> knownFrontends = ImmutableMap.of();
210     private boolean paused;
211
212     private final MessageSlicer responseMessageSlicer;
213     private final Dispatchers dispatchers;
214
215     private final MessageAssembler requestMessageAssembler;
216
217     private final ExportOnRecovery exportOnRecovery;
218
219     private final ActorRef exportActor;
220
221     @SuppressFBWarnings(value = "MC_OVERRIDABLE_METHOD_CALL_IN_CONSTRUCTOR", justification = "Akka class design")
222     Shard(final AbstractBuilder<?, ?> builder) {
223         super(builder.getId().toString(), builder.getPeerAddresses(),
224                 Optional.of(builder.getDatastoreContext().getShardRaftConfig()), DataStoreVersions.CURRENT_VERSION);
225
226         name = builder.getId().toString();
227         shardName = builder.getId().getShardName();
228         datastoreContext = builder.getDatastoreContext();
229         restoreFromSnapshot = builder.getRestoreFromSnapshot();
230         frontendMetadata = new FrontendMetadata(name);
231         exportOnRecovery = datastoreContext.getExportOnRecovery();
232
233         exportActor = switch (exportOnRecovery) {
234             case Json -> getContext().actorOf(JsonExportActor.props(builder.getSchemaContext(),
235                 datastoreContext.getRecoveryExportBaseDir()));
236             case Off -> null;
237         };
238
239         setPersistence(datastoreContext.isPersistent());
240
241         LOG.info("Shard created : {}, persistent : {}", name, datastoreContext.isPersistent());
242
243         ShardDataTreeChangeListenerPublisherActorProxy treeChangeListenerPublisher =
244                 new ShardDataTreeChangeListenerPublisherActorProxy(getContext(), name + "-DTCL-publisher", name);
245         if (builder.getDataTree() != null) {
246             store = new ShardDataTree(this, builder.getSchemaContext(), builder.getDataTree(),
247                     treeChangeListenerPublisher, name,
248                     frontendMetadata);
249         } else {
250             store = new ShardDataTree(this, builder.getSchemaContext(), builder.getTreeType(),
251                     builder.getDatastoreContext().getStoreRoot(), treeChangeListenerPublisher, name,
252                     frontendMetadata);
253         }
254
255         shardMBean = ShardStats.create(name, datastoreContext.getDataStoreMXBeanType(), this);
256
257         if (isMetricsCaptureEnabled()) {
258             getContext().become(new MeteringBehavior(this));
259         }
260
261         commitCoordinator = new ShardCommitCoordinator(store, LOG, name);
262
263         setTransactionCommitTimeout();
264
265         // create a notifier actor for each cluster member
266         roleChangeNotifier = createRoleChangeNotifier(name);
267
268         appendEntriesReplyTracker = new MessageTracker(AppendEntriesReply.class,
269                 getRaftActorContext().getConfigParams().getIsolatedCheckIntervalInMillis());
270
271         dispatchers = new Dispatchers(context().system().dispatchers());
272         transactionActorFactory = new ShardTransactionActorFactory(store, datastoreContext,
273             dispatchers.getDispatcherPath(Dispatchers.DispatcherType.Transaction),
274                 self(), getContext(), shardMBean, builder.getId().getShardName());
275
276         snapshotCohort = ShardSnapshotCohort.create(getContext(), builder.getId().getMemberName(), store, LOG,
277             name, datastoreContext);
278
279         messageRetrySupport = new ShardTransactionMessageRetrySupport(this);
280
281         responseMessageSlicer = MessageSlicer.builder().logContext(name)
282                 .messageSliceSize(datastoreContext.getMaximumMessageSliceSize())
283                 .fileBackedStreamFactory(getRaftActorContext().getFileBackedOutputStreamFactory())
284                 .expireStateAfterInactivity(2, TimeUnit.MINUTES).build();
285
286         requestMessageAssembler = MessageAssembler.builder().logContext(name)
287                 .fileBackedStreamFactory(getRaftActorContext().getFileBackedOutputStreamFactory())
288                 .assembledMessageCallback((message, sender) -> self().tell(message, sender))
289                 .expireStateAfterInactivity(datastoreContext.getRequestTimeout(), TimeUnit.NANOSECONDS).build();
290
291         listenerInfoMXBean = new ShardDataTreeListenerInfoMXBeanImpl(name, datastoreContext.getDataStoreMXBeanType(),
292                 self());
293         listenerInfoMXBean.register();
294     }
295
296     private void setTransactionCommitTimeout() {
297         transactionCommitTimeout = TimeUnit.MILLISECONDS.convert(
298                 datastoreContext.getShardTransactionCommitTimeoutInSeconds(), TimeUnit.SECONDS) / 2;
299     }
300
301     private Optional<ActorRef> createRoleChangeNotifier(final String shardId) {
302         ActorRef shardRoleChangeNotifier = getContext().actorOf(
303             RoleChangeNotifier.getProps(shardId), shardId + "-notifier");
304         return Optional.of(shardRoleChangeNotifier);
305     }
306
307     @Override
308     public final void postStop() throws Exception {
309         LOG.info("Stopping Shard {}", persistenceId());
310
311         super.postStop();
312
313         messageRetrySupport.close();
314
315         if (txCommitTimeoutCheckSchedule != null) {
316             txCommitTimeoutCheckSchedule.cancel();
317         }
318
319         commitCoordinator.abortPendingTransactions("Transaction aborted due to shutdown.", this);
320
321         shardMBean.unregisterMBean();
322         listenerInfoMXBean.unregister();
323     }
324
325     @Override
326     protected final void handleRecover(final Object message) {
327         LOG.debug("{}: onReceiveRecover: Received message {} from {}", persistenceId(), message.getClass(),
328             getSender());
329
330         super.handleRecover(message);
331
332         switch (exportOnRecovery) {
333             case Json:
334                 if (message instanceof SnapshotOffer) {
335                     exportActor.tell(new JsonExportActor.ExportSnapshot(store.readCurrentData().orElseThrow(), name),
336                             ActorRef.noSender());
337                 } else if (message instanceof ReplicatedLogEntry replicatedLogEntry) {
338                     exportActor.tell(new JsonExportActor.ExportJournal(replicatedLogEntry), ActorRef.noSender());
339                 } else if (message instanceof RecoveryCompleted) {
340                     exportActor.tell(new JsonExportActor.FinishExport(name), ActorRef.noSender());
341                     exportActor.tell(PoisonPill.getInstance(), ActorRef.noSender());
342                 }
343                 break;
344             case Off:
345             default:
346                 break;
347         }
348
349         if (LOG.isTraceEnabled()) {
350             appendEntriesReplyTracker.begin();
351         }
352     }
353
354     @Override
355     // non-final for TestShard
356     protected void handleNonRaftCommand(final Object message) {
357         try (var context = appendEntriesReplyTracker.received(message)) {
358             final var maybeError = context.error();
359             if (maybeError.isPresent()) {
360                 LOG.trace("{} : AppendEntriesReply failed to arrive at the expected interval {}", persistenceId(),
361                     maybeError.orElseThrow());
362             }
363
364             store.resetTransactionBatch();
365
366             if (message instanceof RequestEnvelope request) {
367                 handleRequestEnvelope(request);
368             } else if (MessageAssembler.isHandledMessage(message)) {
369                 handleRequestAssemblerMessage(message);
370             } else if (message instanceof ConnectClientRequest request) {
371                 handleConnectClient(request);
372             } else if (CreateTransaction.isSerializedType(message)) {
373                 handleCreateTransaction(message);
374             } else if (message instanceof BatchedModifications request) {
375                 handleBatchedModifications(request);
376             } else if (message instanceof ForwardedReadyTransaction request) {
377                 handleForwardedReadyTransaction(request);
378             } else if (message instanceof ReadyLocalTransaction request) {
379                 handleReadyLocalTransaction(request);
380             } else if (CanCommitTransaction.isSerializedType(message)) {
381                 handleCanCommitTransaction(CanCommitTransaction.fromSerializable(message));
382             } else if (CommitTransaction.isSerializedType(message)) {
383                 handleCommitTransaction(CommitTransaction.fromSerializable(message));
384             } else if (AbortTransaction.isSerializedType(message)) {
385                 handleAbortTransaction(AbortTransaction.fromSerializable(message));
386             } else if (CloseTransactionChain.isSerializedType(message)) {
387                 closeTransactionChain(CloseTransactionChain.fromSerializable(message));
388             } else if (message instanceof DataTreeChangedReply) {
389                 // Ignore reply
390             } else if (message instanceof RegisterDataTreeChangeListener request) {
391                 treeChangeSupport.onMessage(request, isLeader(), hasLeader());
392             } else if (message instanceof UpdateSchemaContext request) {
393                 updateSchemaContext(request);
394             } else if (message instanceof PeerAddressResolved resolved) {
395                 setPeerAddress(resolved.getPeerId(), resolved.getPeerAddress());
396             } else if (TX_COMMIT_TIMEOUT_CHECK_MESSAGE.equals(message)) {
397                 commitTimeoutCheck();
398             } else if (message instanceof DatastoreContext request) {
399                 onDatastoreContext(request);
400             } else if (message instanceof RegisterRoleChangeListener) {
401                 roleChangeNotifier.orElseThrow().forward(message, context());
402             } else if (message instanceof FollowerInitialSyncUpStatus request) {
403                 shardMBean.setFollowerInitialSyncStatus(request.isInitialSyncDone());
404                 context().parent().tell(message, self());
405             } else if (GET_SHARD_MBEAN_MESSAGE.equals(message)) {
406                 sender().tell(getShardMBean(), self());
407             } else if (message instanceof GetShardDataTree) {
408                 sender().tell(store.getDataTree(), self());
409             } else if (message instanceof ServerRemoved) {
410                 context().parent().forward(message, context());
411             } else if (ShardTransactionMessageRetrySupport.TIMER_MESSAGE_CLASS.isInstance(message)) {
412                 messageRetrySupport.onTimerMessage(message);
413             } else if (message instanceof DataTreeCohortActorRegistry.CohortRegistryCommand request) {
414                 store.processCohortRegistryCommand(getSender(), request);
415             } else if (message instanceof MakeLeaderLocal) {
416                 onMakeLeaderLocal();
417             } else if (RESUME_NEXT_PENDING_TRANSACTION.equals(message)) {
418                 store.resumeNextPendingTransaction();
419             } else if (GetKnownClients.INSTANCE.equals(message)) {
420                 handleGetKnownClients();
421             } else if (!responseMessageSlicer.handleMessage(message)) {
422                 super.handleNonRaftCommand(message);
423             }
424         }
425     }
426
427     private void handleRequestAssemblerMessage(final Object message) {
428         dispatchers.getDispatcher(DispatcherType.Serialization).execute(() -> {
429             JavaSerializer.currentSystem().value_$eq((ExtendedActorSystem) context().system());
430             requestMessageAssembler.handleMessage(message, self());
431         });
432     }
433
434     @SuppressWarnings("checkstyle:IllegalCatch")
435     private void handleRequestEnvelope(final RequestEnvelope envelope) {
436         final long now = ticker().read();
437         try {
438             final RequestSuccess<?, ?> success = handleRequest(envelope, now);
439             if (success != null) {
440                 final long executionTimeNanos = ticker().read() - now;
441                 if (success instanceof SliceableMessage) {
442                     dispatchers.getDispatcher(DispatcherType.Serialization).execute(() ->
443                         responseMessageSlicer.slice(SliceOptions.builder().identifier(success.getTarget())
444                             .message(envelope.newSuccessEnvelope(success, executionTimeNanos))
445                             .sendTo(envelope.getMessage().getReplyTo()).replyTo(self())
446                             .onFailureCallback(t -> LOG.warn("Error slicing response {}", success, t)).build()));
447                 } else {
448                     envelope.sendSuccess(success, executionTimeNanos);
449                 }
450             }
451         } catch (RequestException e) {
452             LOG.debug("{}: request {} failed", persistenceId(), envelope, e);
453             envelope.sendFailure(e, ticker().read() - now);
454         } catch (Exception e) {
455             LOG.debug("{}: request {} caused failure", persistenceId(), envelope, e);
456             envelope.sendFailure(new RuntimeRequestException("Request failed to process", e),
457                 ticker().read() - now);
458         }
459     }
460
461     private void commitTimeoutCheck() {
462         store.checkForExpiredTransactions(transactionCommitTimeout, this::updateAccess);
463         commitCoordinator.checkForExpiredTransactions(transactionCommitTimeout, this);
464         requestMessageAssembler.checkExpiredAssembledMessageState();
465     }
466
467     private OptionalLong updateAccess(final SimpleShardDataTreeCohort cohort) {
468         final FrontendIdentifier frontend = cohort.transactionId().getHistoryId().getClientId().getFrontendId();
469         final LeaderFrontendState state = knownFrontends.get(frontend);
470         if (state == null) {
471             // Not tell-based protocol, do nothing
472             return OptionalLong.empty();
473         }
474
475         if (isIsolatedLeader()) {
476             // We are isolated and no new request can come through until we emerge from it. We are still updating
477             // liveness of frontend when we see it attempting to communicate. Use the last access timer.
478             return OptionalLong.of(state.getLastSeenTicks());
479         }
480
481         // If this frontend has freshly connected, give it some time to catch up before killing its transactions.
482         return OptionalLong.of(state.getLastConnectTicks());
483     }
484
485     private void disableTracking(final DisableTrackingPayload payload) {
486         final ClientIdentifier clientId = payload.getIdentifier();
487         LOG.debug("{}: disabling tracking of {}", persistenceId(), clientId);
488         frontendMetadata.disableTracking(clientId);
489
490         if (isLeader()) {
491             final FrontendIdentifier frontendId = clientId.getFrontendId();
492             final LeaderFrontendState frontend = knownFrontends.get(frontendId);
493             if (frontend != null) {
494                 if (clientId.equals(frontend.getIdentifier())) {
495                     if (!(frontend instanceof LeaderFrontendState.Disabled)) {
496                         verify(knownFrontends.replace(frontendId, frontend,
497                             new LeaderFrontendState.Disabled(persistenceId(), clientId, store)));
498                         LOG.debug("{}: leader state for {} disabled", persistenceId(), clientId);
499                     } else {
500                         LOG.debug("{}: leader state {} is already disabled", persistenceId(), frontend);
501                     }
502                 } else {
503                     LOG.debug("{}: leader state {} does not match {}", persistenceId(), frontend, clientId);
504                 }
505             } else {
506                 LOG.debug("{}: leader state for {} not found", persistenceId(), clientId);
507                 knownFrontends.put(frontendId, new LeaderFrontendState.Disabled(persistenceId(), clientId,
508                     getDataStore()));
509             }
510         }
511     }
512
513     private void onMakeLeaderLocal() {
514         LOG.debug("{}: onMakeLeaderLocal received", persistenceId());
515         if (isLeader()) {
516             getSender().tell(new Status.Success(null), getSelf());
517             return;
518         }
519
520         final ActorSelection leader = getLeader();
521
522         if (leader == null) {
523             // Leader is not present. The cluster is most likely trying to
524             // elect a leader and we should let that run its normal course
525
526             // TODO we can wait for the election to complete and retry the
527             // request. We can also let the caller retry by sending a flag
528             // in the response indicating the request is "reTryable".
529             getSender().tell(new Failure(
530                     new LeadershipTransferFailedException("We cannot initiate leadership transfer to local node. "
531                             + "Currently there is no leader for " + persistenceId())),
532                     getSelf());
533             return;
534         }
535
536         leader.tell(new RequestLeadership(getId(), getSender()), getSelf());
537     }
538
539     // Acquire our frontend tracking handle and verify generation matches
540     private @Nullable LeaderFrontendState findFrontend(final ClientIdentifier clientId) throws RequestException {
541         final LeaderFrontendState existing = knownFrontends.get(clientId.getFrontendId());
542         if (existing != null) {
543             final int cmp = Long.compareUnsigned(existing.getIdentifier().getGeneration(), clientId.getGeneration());
544             if (cmp == 0) {
545                 existing.touch();
546                 return existing;
547             }
548             if (cmp > 0) {
549                 LOG.debug("{}: rejecting request from outdated client {}", persistenceId(), clientId);
550                 throw new RetiredGenerationException(clientId.getGeneration(),
551                     existing.getIdentifier().getGeneration());
552             }
553
554             LOG.info("{}: retiring state {}, outdated by request from client {}", persistenceId(), existing, clientId);
555             existing.retire();
556             knownFrontends.remove(clientId.getFrontendId());
557         } else {
558             LOG.debug("{}: client {} is not yet known", persistenceId(), clientId);
559         }
560
561         return null;
562     }
563
564     private LeaderFrontendState getFrontend(final ClientIdentifier clientId) throws RequestException {
565         final LeaderFrontendState ret = findFrontend(clientId);
566         if (ret != null) {
567             return ret;
568         }
569
570         // TODO: a dedicated exception would be better, but this is technically true, too
571         throw new OutOfSequenceEnvelopeException(0);
572     }
573
574     private static @NonNull ABIVersion selectVersion(final ConnectClientRequest message) {
575         final Range<ABIVersion> clientRange = Range.closed(message.getMinVersion(), message.getMaxVersion());
576         for (ABIVersion v : SUPPORTED_ABIVERSIONS) {
577             if (clientRange.contains(v)) {
578                 return v;
579             }
580         }
581
582         throw new IllegalArgumentException(String.format(
583             "No common version between backend versions %s and client versions %s", SUPPORTED_ABIVERSIONS,
584             clientRange));
585     }
586
587     @SuppressWarnings("checkstyle:IllegalCatch")
588     private void handleConnectClient(final ConnectClientRequest message) {
589         try {
590             final ClientIdentifier clientId = message.getTarget();
591             final LeaderFrontendState existing = findFrontend(clientId);
592             if (existing != null) {
593                 existing.touch();
594             }
595
596             if (!isLeader() || !isLeaderActive()) {
597                 LOG.info("{}: not currently leader, rejecting request {}. isLeader: {}, isLeaderActive: {},"
598                                 + "isLeadershipTransferInProgress: {}.",
599                         persistenceId(), message, isLeader(), isLeaderActive(), isLeadershipTransferInProgress());
600                 throw new NotLeaderException(getSelf());
601             }
602
603             final ABIVersion selectedVersion = selectVersion(message);
604             final LeaderFrontendState frontend;
605             if (existing == null) {
606                 frontend = new LeaderFrontendState.Enabled(persistenceId(), clientId, store);
607                 knownFrontends.put(clientId.getFrontendId(), frontend);
608                 LOG.debug("{}: created state {} for client {}", persistenceId(), frontend, clientId);
609             } else {
610                 frontend = existing;
611             }
612
613             frontend.reconnect();
614             message.getReplyTo().tell(new ConnectClientSuccess(message.getTarget(), message.getSequence(), getSelf(),
615                 ImmutableList.of(), store.getDataTree(), CLIENT_MAX_MESSAGES).toVersion(selectedVersion),
616                 ActorRef.noSender());
617         } catch (RequestException | RuntimeException e) {
618             message.getReplyTo().tell(new Failure(e), ActorRef.noSender());
619         }
620     }
621
622     private @Nullable RequestSuccess<?, ?> handleRequest(final RequestEnvelope envelope, final long now)
623             throws RequestException {
624         // We are not the leader, hence we want to fail-fast.
625         if (!isLeader() || paused || !isLeaderActive()) {
626             LOG.debug("{}: not currently active leader, rejecting request {}. isLeader: {}, isLeaderActive: {},"
627                             + "isLeadershipTransferInProgress: {}, paused: {}",
628                     persistenceId(), envelope, isLeader(), isLeaderActive(), isLeadershipTransferInProgress(), paused);
629             throw new NotLeaderException(getSelf());
630         }
631
632         final var request = envelope.getMessage();
633         if (request instanceof TransactionRequest<?> txReq) {
634             final var clientId = txReq.getTarget().getHistoryId().getClientId();
635             return getFrontend(clientId).handleTransactionRequest(txReq, envelope, now);
636         } else if (request instanceof LocalHistoryRequest<?> lhReq) {
637             final var clientId = lhReq.getTarget().getClientId();
638             return getFrontend(clientId).handleLocalHistoryRequest(lhReq, envelope, now);
639         } else {
640             LOG.warn("{}: rejecting unsupported request {}", persistenceId(), request);
641             throw new UnsupportedRequestException(request);
642         }
643     }
644
645     private void handleGetKnownClients() {
646         final ImmutableSet<ClientIdentifier> clients;
647         if (isLeader()) {
648             clients = knownFrontends.values().stream()
649                     .map(LeaderFrontendState::getIdentifier)
650                     .collect(ImmutableSet.toImmutableSet());
651         } else {
652             clients = frontendMetadata.getClients();
653         }
654         sender().tell(new GetKnownClientsReply(clients), self());
655     }
656
657     private boolean hasLeader() {
658         return getLeaderId() != null;
659     }
660
661     final int getPendingTxCommitQueueSize() {
662         return store.getQueueSize();
663     }
664
665     final int getCohortCacheSize() {
666         return commitCoordinator.getCohortCacheSize();
667     }
668
669     @Override
670     protected final Optional<ActorRef> getRoleChangeNotifier() {
671         return roleChangeNotifier;
672     }
673
674     final String getShardName() {
675         return shardName;
676     }
677
678     @Override
679     protected final LeaderStateChanged newLeaderStateChanged(final String memberId, final String leaderId,
680             final short leaderPayloadVersion) {
681         return isLeader() ? new ShardLeaderStateChanged(memberId, leaderId, store.getDataTree(), leaderPayloadVersion)
682                 : new ShardLeaderStateChanged(memberId, leaderId, leaderPayloadVersion);
683     }
684
685     private void onDatastoreContext(final DatastoreContext context) {
686         datastoreContext = verifyNotNull(context);
687
688         setTransactionCommitTimeout();
689
690         setPersistence(datastoreContext.isPersistent());
691
692         updateConfigParams(datastoreContext.getShardRaftConfig());
693     }
694
695     // applyState() will be invoked once consensus is reached on the payload
696     // non-final for mocking
697     void persistPayload(final Identifier id, final Payload payload, final boolean batchHint) {
698         final boolean canSkipPayload = !hasFollowers() && !persistence().isRecoveryApplicable();
699         if (canSkipPayload) {
700             applyState(self(), id, payload);
701         } else {
702             // We are faking the sender
703             persistData(self(), id, payload, batchHint);
704         }
705     }
706
707     @Deprecated(since = "9.0.0", forRemoval = true)
708     private void handleCommitTransaction(final CommitTransaction commit) {
709         final var txId = commit.getTransactionId();
710         if (isLeader()) {
711             askProtocolEncountered(txId);
712             commitCoordinator.handleCommit(txId, getSender(), this);
713         } else {
714             final var leader = getLeader();
715             if (leader == null) {
716                 messageRetrySupport.addMessageToRetry(commit, getSender(), "Could not commit transaction " + txId);
717             } else {
718                 LOG.debug("{}: Forwarding CommitTransaction to leader {}", persistenceId(), leader);
719                 leader.forward(commit, getContext());
720             }
721         }
722     }
723
724     @Deprecated(since = "9.0.0", forRemoval = true)
725     private void handleCanCommitTransaction(final CanCommitTransaction canCommit) {
726         final var txId = canCommit.getTransactionId();
727         LOG.debug("{}: Can committing transaction {}", persistenceId(), txId);
728
729         if (isLeader()) {
730             askProtocolEncountered(txId);
731             commitCoordinator.handleCanCommit(txId, getSender(), this);
732         } else {
733             final var leader = getLeader();
734             if (leader == null) {
735                 messageRetrySupport.addMessageToRetry(canCommit, getSender(),
736                         "Could not canCommit transaction " + txId);
737             } else {
738                 LOG.debug("{}: Forwarding CanCommitTransaction to leader {}", persistenceId(), leader);
739                 leader.forward(canCommit, getContext());
740             }
741         }
742     }
743
744     @SuppressWarnings("checkstyle:IllegalCatch")
745     @Deprecated(since = "9.0.0", forRemoval = true)
746     private void handleBatchedModificationsLocal(final BatchedModifications batched, final ActorRef sender) {
747         askProtocolEncountered(batched.getTransactionId());
748
749         try {
750             commitCoordinator.handleBatchedModifications(batched, sender, this);
751         } catch (Exception e) {
752             LOG.error("{}: Error handling BatchedModifications for Tx {}", persistenceId(),
753                     batched.getTransactionId(), e);
754             sender.tell(new Failure(e), getSelf());
755         }
756     }
757
758     @Deprecated(since = "9.0.0", forRemoval = true)
759     private void handleBatchedModifications(final BatchedModifications batched) {
760         // This message is sent to prepare the modifications transaction directly on the Shard as an
761         // optimization to avoid the extra overhead of a separate ShardTransaction actor. On the last
762         // BatchedModifications message, the caller sets the ready flag in the message indicating
763         // modifications are complete. The reply contains the cohort actor path (this actor) for the caller
764         // to initiate the 3-phase commit. This also avoids the overhead of sending an additional
765         // ReadyTransaction message.
766
767         // If we're not the leader then forward to the leader. This is a safety measure - we shouldn't
768         // normally get here if we're not the leader as the front-end (TransactionProxy) should determine
769         // the primary/leader shard. However with timing and caching on the front-end, there's a small
770         // window where it could have a stale leader during leadership transitions.
771         //
772         boolean isLeaderActive = isLeaderActive();
773         if (isLeader() && isLeaderActive) {
774             handleBatchedModificationsLocal(batched, getSender());
775         } else {
776             final var leader = getLeader();
777             if (!isLeaderActive || leader == null) {
778                 messageRetrySupport.addMessageToRetry(batched, getSender(),
779                         "Could not process BatchedModifications " + batched.getTransactionId());
780             } else {
781                 // If this is not the first batch and leadership changed in between batched messages,
782                 // we need to reconstruct previous BatchedModifications from the transaction
783                 // DataTreeModification, honoring the max batched modification count, and forward all the
784                 // previous BatchedModifications to the new leader.
785                 final var newModifications = commitCoordinator.createForwardedBatchedModifications(batched,
786                     datastoreContext.getShardBatchedModificationCount());
787
788                 LOG.debug("{}: Forwarding {} BatchedModifications to leader {}", persistenceId(),
789                         newModifications.size(), leader);
790
791                 for (BatchedModifications bm : newModifications) {
792                     leader.forward(bm, getContext());
793                 }
794             }
795         }
796     }
797
798     private boolean failIfIsolatedLeader(final ActorRef sender) {
799         if (isIsolatedLeader()) {
800             sender.tell(new Failure(new NoShardLeaderException(String.format(
801                     "Shard %s was the leader but has lost contact with all of its followers. Either all"
802                     + " other follower nodes are down or this node is isolated by a network partition.",
803                     persistenceId()))), getSelf());
804             return true;
805         }
806
807         return false;
808     }
809
810     protected boolean isIsolatedLeader() {
811         return getRaftState() == RaftState.IsolatedLeader;
812     }
813
814     @SuppressWarnings("checkstyle:IllegalCatch")
815     @Deprecated(since = "9.0.0", forRemoval = true)
816    private void handleReadyLocalTransaction(final ReadyLocalTransaction message) {
817         final var txId = message.getTransactionId();
818         LOG.debug("{}: handleReadyLocalTransaction for {}", persistenceId(), txId);
819
820         final var isLeaderActive = isLeaderActive();
821         if (isLeader() && isLeaderActive) {
822             askProtocolEncountered(txId);
823             try {
824                 commitCoordinator.handleReadyLocalTransaction(message, getSender(), this);
825             } catch (Exception e) {
826                 LOG.error("{}: Error handling ReadyLocalTransaction for Tx {}", persistenceId(), txId, e);
827                 getSender().tell(new Failure(e), getSelf());
828             }
829         } else {
830             final var leader = getLeader();
831             if (!isLeaderActive || leader == null) {
832                 messageRetrySupport.addMessageToRetry(message, getSender(),
833                         "Could not process ready local transaction " + txId);
834             } else {
835                 LOG.debug("{}: Forwarding ReadyLocalTransaction to leader {}", persistenceId(), leader);
836                 message.setRemoteVersion(getCurrentBehavior().getLeaderPayloadVersion());
837                 leader.forward(message, getContext());
838             }
839         }
840     }
841
842     @Deprecated(since = "9.0.0", forRemoval = true)
843     private void handleForwardedReadyTransaction(final ForwardedReadyTransaction forwardedReady) {
844         LOG.debug("{}: handleForwardedReadyTransaction for {}", persistenceId(), forwardedReady.getTransactionId());
845
846         final var isLeaderActive = isLeaderActive();
847         if (isLeader() && isLeaderActive) {
848             askProtocolEncountered(forwardedReady.getTransactionId());
849             commitCoordinator.handleForwardedReadyTransaction(forwardedReady, getSender(), this);
850         } else {
851             final var leader = getLeader();
852             if (!isLeaderActive || leader == null) {
853                 messageRetrySupport.addMessageToRetry(forwardedReady, getSender(),
854                         "Could not process forwarded ready transaction " + forwardedReady.getTransactionId());
855             } else {
856                 LOG.debug("{}: Forwarding ForwardedReadyTransaction to leader {}", persistenceId(), leader);
857
858                 final var readyLocal = new ReadyLocalTransaction(forwardedReady.getTransactionId(),
859                         forwardedReady.getTransaction().getSnapshot(), forwardedReady.isDoImmediateCommit(),
860                         forwardedReady.getParticipatingShardNames());
861                 readyLocal.setRemoteVersion(getCurrentBehavior().getLeaderPayloadVersion());
862                 leader.forward(readyLocal, getContext());
863             }
864         }
865     }
866
867     @Deprecated(since = "9.0.0", forRemoval = true)
868     private void handleAbortTransaction(final AbortTransaction abort) {
869         final var transactionId = abort.getTransactionId();
870         askProtocolEncountered(transactionId);
871         doAbortTransaction(transactionId, getSender());
872     }
873
874     final void doAbortTransaction(final Identifier transactionID, final ActorRef sender) {
875         commitCoordinator.handleAbort(transactionID, sender, this);
876     }
877
878     @Deprecated(since = "9.0.0", forRemoval = true)
879     private void handleCreateTransaction(final Object message) {
880         if (isLeader()) {
881             createTransaction(CreateTransaction.fromSerializable(message));
882         } else if (getLeader() != null) {
883             getLeader().forward(message, getContext());
884         } else {
885             getSender().tell(new Failure(new NoShardLeaderException(
886                     "Could not create a shard transaction", persistenceId())), getSelf());
887         }
888     }
889
890     @Deprecated(since = "9.0.0", forRemoval = true)
891     private void closeTransactionChain(final CloseTransactionChain closeTransactionChain) {
892         if (isLeader()) {
893             final var id = closeTransactionChain.getIdentifier();
894             askProtocolEncountered(id.getClientId());
895             store.closeTransactionChain(id);
896         } else if (getLeader() != null) {
897             getLeader().forward(closeTransactionChain, getContext());
898         } else {
899             LOG.warn("{}: Could not close transaction {}", persistenceId(), closeTransactionChain.getIdentifier());
900         }
901     }
902
903     @Deprecated(since = "9.0.0", forRemoval = true)
904     @SuppressWarnings("checkstyle:IllegalCatch")
905     private void createTransaction(final CreateTransaction createTransaction) {
906         askProtocolEncountered(createTransaction.getTransactionId());
907
908         try {
909             if (TransactionType.fromInt(createTransaction.getTransactionType()) != TransactionType.READ_ONLY
910                     && failIfIsolatedLeader(getSender())) {
911                 return;
912             }
913
914             final var transactionActor = createTransaction(createTransaction.getTransactionType(),
915                 createTransaction.getTransactionId());
916
917             getSender().tell(new CreateTransactionReply(Serialization.serializedActorPath(transactionActor),
918                     createTransaction.getTransactionId(), createTransaction.getVersion()).toSerializable(), getSelf());
919         } catch (Exception e) {
920             getSender().tell(new Failure(e), getSelf());
921         }
922     }
923
924     @Deprecated(since = "9.0.0", forRemoval = true)
925     private ActorRef createTransaction(final int transactionType, final TransactionIdentifier transactionId) {
926         LOG.debug("{}: Creating transaction : {} ", persistenceId(), transactionId);
927         return transactionActorFactory.newShardTransaction(TransactionType.fromInt(transactionType),
928             transactionId);
929     }
930
931     // Called on leader only
932     @Deprecated(since = "9.0.0", forRemoval = true)
933     private void askProtocolEncountered(final TransactionIdentifier transactionId) {
934         askProtocolEncountered(transactionId.getHistoryId().getClientId());
935     }
936
937     // Called on leader only
938     @Deprecated(since = "9.0.0", forRemoval = true)
939     private void askProtocolEncountered(final ClientIdentifier clientId) {
940         final var frontend = clientId.getFrontendId();
941         final var state = knownFrontends.get(frontend);
942         if (!(state instanceof LeaderFrontendState.Disabled)) {
943             LOG.debug("{}: encountered ask-based client {}, disabling transaction tracking", persistenceId(), clientId);
944             if (knownFrontends.isEmpty()) {
945                 knownFrontends = new HashMap<>();
946             }
947             knownFrontends.put(frontend, new LeaderFrontendState.Disabled(persistenceId(), clientId, getDataStore()));
948
949             persistPayload(clientId, DisableTrackingPayload.create(clientId,
950                 datastoreContext.getInitialPayloadSerializedBufferCapacity()), false);
951         }
952     }
953
954     private void updateSchemaContext(final UpdateSchemaContext message) {
955         updateSchemaContext(message.getEffectiveModelContext());
956     }
957
958     @VisibleForTesting
959     void updateSchemaContext(final @NonNull EffectiveModelContext schemaContext) {
960         store.updateSchemaContext(schemaContext);
961     }
962
963     private boolean isMetricsCaptureEnabled() {
964         CommonConfig config = new CommonConfig(getContext().system().settings().config());
965         return config.isMetricCaptureEnabled();
966     }
967
968     @Override
969     protected final RaftActorSnapshotCohort getRaftActorSnapshotCohort() {
970         return snapshotCohort;
971     }
972
973     @Override
974     protected final RaftActorRecoveryCohort getRaftActorRecoveryCohort() {
975         if (restoreFromSnapshot == null) {
976             return ShardRecoveryCoordinator.create(store, persistenceId(), LOG);
977         }
978
979         return ShardRecoveryCoordinator.forSnapshot(store, persistenceId(), LOG, restoreFromSnapshot.getSnapshot());
980     }
981
982     @Override
983     // non-final for testing
984     protected void onRecoveryComplete() {
985         restoreFromSnapshot = null;
986
987         //notify shard manager
988         getContext().parent().tell(new ActorInitialized(getSelf()), ActorRef.noSender());
989
990         // Being paranoid here - this method should only be called once but just in case...
991         if (txCommitTimeoutCheckSchedule == null) {
992             // Schedule a message to be periodically sent to check if the current in-progress
993             // transaction should be expired and aborted.
994             final var period = FiniteDuration.create(transactionCommitTimeout / 3, TimeUnit.MILLISECONDS);
995             txCommitTimeoutCheckSchedule = getContext().system().scheduler().schedule(
996                     period, period, getSelf(),
997                     TX_COMMIT_TIMEOUT_CHECK_MESSAGE, getContext().dispatcher(), ActorRef.noSender());
998         }
999     }
1000
1001     @Override
1002     protected final void applyState(final ActorRef clientActor, final Identifier identifier, final Object data) {
1003         if (data instanceof Payload payload) {
1004             if (payload instanceof DisableTrackingPayload disableTracking) {
1005                 disableTracking(disableTracking);
1006                 return;
1007             }
1008
1009             try {
1010                 store.applyReplicatedPayload(identifier, payload);
1011             } catch (DataValidationFailedException | IOException e) {
1012                 LOG.error("{}: Error applying replica {}", persistenceId(), identifier, e);
1013             }
1014         } else {
1015             LOG.error("{}: Unknown state for {} received {}", persistenceId(), identifier, data);
1016         }
1017     }
1018
1019     @Override
1020     protected final void onStateChanged() {
1021         boolean isLeader = isLeader();
1022         boolean hasLeader = hasLeader();
1023         treeChangeSupport.onLeadershipChange(isLeader, hasLeader);
1024
1025         // If this actor is no longer the leader close all the transaction chains
1026         if (!isLeader) {
1027             if (LOG.isDebugEnabled()) {
1028                 LOG.debug(
1029                     "{}: onStateChanged: Closing all transaction chains because shard {} is no longer the leader",
1030                     persistenceId(), getId());
1031             }
1032
1033             paused = false;
1034             store.purgeLeaderState();
1035         }
1036
1037         if (hasLeader && !isIsolatedLeader()) {
1038             messageRetrySupport.retryMessages();
1039         }
1040     }
1041
1042     @Override
1043     protected final void onLeaderChanged(final String oldLeader, final String newLeader) {
1044         shardMBean.incrementLeadershipChangeCount();
1045         paused = false;
1046
1047         if (!isLeader()) {
1048             if (!knownFrontends.isEmpty()) {
1049                 LOG.debug("{}: removing frontend state for {}", persistenceId(), knownFrontends.keySet());
1050                 knownFrontends = ImmutableMap.of();
1051             }
1052
1053             requestMessageAssembler.close();
1054
1055             if (!hasLeader()) {
1056                 // No leader anywhere, nothing else to do
1057                 return;
1058             }
1059
1060             // Another leader was elected. If we were the previous leader and had pending transactions, convert
1061             // them to transaction messages and send to the new leader.
1062             ActorSelection leader = getLeader();
1063             if (leader != null) {
1064                 // Clears all pending transactions and converts them to messages to be forwarded to a new leader.
1065                 Collection<?> messagesToForward = commitCoordinator.convertPendingTransactionsToMessages(
1066                     datastoreContext.getShardBatchedModificationCount());
1067
1068                 if (!messagesToForward.isEmpty()) {
1069                     LOG.debug("{}: Forwarding {} pending transaction messages to leader {}", persistenceId(),
1070                             messagesToForward.size(), leader);
1071
1072                     for (Object message : messagesToForward) {
1073                         LOG.debug("{}: Forwarding pending transaction message {}", persistenceId(), message);
1074
1075                         leader.tell(message, self());
1076                     }
1077                 }
1078             } else {
1079                 commitCoordinator.abortPendingTransactions("The transacton was aborted due to inflight leadership "
1080                         + "change and the leader address isn't available.", this);
1081             }
1082         } else {
1083             // We have become the leader, we need to reconstruct frontend state
1084             knownFrontends = verifyNotNull(frontendMetadata.toLeaderState(this));
1085             LOG.debug("{}: became leader with frontend state for {}", persistenceId(), knownFrontends.keySet());
1086         }
1087
1088         if (!isIsolatedLeader()) {
1089             messageRetrySupport.retryMessages();
1090         }
1091     }
1092
1093     @Override
1094     protected final void pauseLeader(final Runnable operation) {
1095         LOG.debug("{}: In pauseLeader, operation: {}", persistenceId(), operation);
1096         paused = true;
1097
1098         // Tell-based protocol can replay transaction state, so it is safe to blow it up when we are paused.
1099         knownFrontends.values().forEach(LeaderFrontendState::retire);
1100         knownFrontends = ImmutableMap.of();
1101
1102         store.setRunOnPendingTransactionsComplete(operation);
1103     }
1104
1105     @Override
1106     protected final void unpauseLeader() {
1107         LOG.debug("{}: In unpauseLeader", persistenceId());
1108         paused = false;
1109
1110         store.setRunOnPendingTransactionsComplete(null);
1111
1112         // Restore tell-based protocol state as if we were becoming the leader
1113         knownFrontends = verifyNotNull(frontendMetadata.toLeaderState(this));
1114     }
1115
1116     @Override
1117     protected final OnDemandRaftState.AbstractBuilder<?, ?> newOnDemandRaftStateBuilder() {
1118         return OnDemandShardState.newBuilder()
1119             .treeChangeListenerActors(treeChangeSupport.getListenerActors())
1120             .commitCohortActors(store.getCohortActors());
1121     }
1122
1123     @Override
1124     public final String persistenceId() {
1125         return name;
1126     }
1127
1128     @Override
1129     public final String journalPluginId() {
1130         // This method may be invoked from super constructor (wonderful), hence we also need to handle the case of
1131         // the field being uninitialized because our constructor is not finished.
1132         if (datastoreContext != null && !datastoreContext.isPersistent()) {
1133             return NON_PERSISTENT_JOURNAL_ID;
1134         }
1135         return super.journalPluginId();
1136     }
1137
1138     @VisibleForTesting
1139     final ShardCommitCoordinator getCommitCoordinator() {
1140         return commitCoordinator;
1141     }
1142
1143     // non-final for mocking
1144     DatastoreContext getDatastoreContext() {
1145         return datastoreContext;
1146     }
1147
1148     @VisibleForTesting
1149     final ShardDataTree getDataStore() {
1150         return store;
1151     }
1152
1153     @VisibleForTesting
1154     // non-final for mocking
1155     ShardStats getShardMBean() {
1156         return shardMBean;
1157     }
1158
1159     public static Builder builder() {
1160         return new Builder();
1161     }
1162
1163     public abstract static class AbstractBuilder<T extends AbstractBuilder<T, S>, S extends Shard> {
1164         private final Class<? extends S> shardClass;
1165         private ShardIdentifier id;
1166         private Map<String, String> peerAddresses = Collections.emptyMap();
1167         private DatastoreContext datastoreContext;
1168         private EffectiveModelContextProvider schemaContextProvider;
1169         private DatastoreSnapshot.ShardSnapshot restoreFromSnapshot;
1170         private DataTree dataTree;
1171
1172         private volatile boolean sealed;
1173
1174         AbstractBuilder(final Class<? extends S> shardClass) {
1175             this.shardClass = shardClass;
1176         }
1177
1178         final void checkSealed() {
1179             checkState(!sealed, "Builder is already sealed - further modifications are not allowed");
1180         }
1181
1182         @SuppressWarnings("unchecked")
1183         private T self() {
1184             return (T) this;
1185         }
1186
1187         public T id(final ShardIdentifier newId) {
1188             checkSealed();
1189             id = newId;
1190             return self();
1191         }
1192
1193         public T peerAddresses(final Map<String, String> newPeerAddresses) {
1194             checkSealed();
1195             peerAddresses = newPeerAddresses;
1196             return self();
1197         }
1198
1199         public T datastoreContext(final DatastoreContext newDatastoreContext) {
1200             checkSealed();
1201             datastoreContext = newDatastoreContext;
1202             return self();
1203         }
1204
1205         public T schemaContextProvider(final EffectiveModelContextProvider newSchemaContextProvider) {
1206             checkSealed();
1207             schemaContextProvider = requireNonNull(newSchemaContextProvider);
1208             return self();
1209         }
1210
1211         public T restoreFromSnapshot(final DatastoreSnapshot.ShardSnapshot newRestoreFromSnapshot) {
1212             checkSealed();
1213             restoreFromSnapshot = newRestoreFromSnapshot;
1214             return self();
1215         }
1216
1217         public T dataTree(final DataTree newDataTree) {
1218             checkSealed();
1219             dataTree = newDataTree;
1220             return self();
1221         }
1222
1223         public ShardIdentifier getId() {
1224             return id;
1225         }
1226
1227         public Map<String, String> getPeerAddresses() {
1228             return peerAddresses;
1229         }
1230
1231         public DatastoreContext getDatastoreContext() {
1232             return datastoreContext;
1233         }
1234
1235         public EffectiveModelContext getSchemaContext() {
1236             return verifyNotNull(schemaContextProvider.getEffectiveModelContext());
1237         }
1238
1239         public DatastoreSnapshot.ShardSnapshot getRestoreFromSnapshot() {
1240             return restoreFromSnapshot;
1241         }
1242
1243         public DataTree getDataTree() {
1244             return dataTree;
1245         }
1246
1247         public TreeType getTreeType() {
1248             return switch (datastoreContext.getLogicalStoreType()) {
1249                 case CONFIGURATION -> TreeType.CONFIGURATION;
1250                 case OPERATIONAL -> TreeType.OPERATIONAL;
1251             };
1252         }
1253
1254         protected void verify() {
1255             requireNonNull(id, "id should not be null");
1256             requireNonNull(peerAddresses, "peerAddresses should not be null");
1257             requireNonNull(datastoreContext, "dataStoreContext should not be null");
1258             requireNonNull(schemaContextProvider, "schemaContextProvider should not be null");
1259         }
1260
1261         public Props props() {
1262             sealed = true;
1263             verify();
1264             return Props.create(shardClass, this);
1265         }
1266     }
1267
1268     public static class Builder extends AbstractBuilder<Builder, Shard> {
1269         Builder() {
1270             this(Shard.class);
1271         }
1272
1273         Builder(final Class<? extends Shard> shardClass) {
1274             super(shardClass);
1275         }
1276     }
1277
1278     Ticker ticker() {
1279         return Ticker.systemTicker();
1280     }
1281
1282     void scheduleNextPendingTransaction() {
1283         self().tell(RESUME_NEXT_PENDING_TRANSACTION, ActorRef.noSender());
1284     }
1285 }