Do not allow persistence callbacks to throw Exception
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / behaviors / Follower.java
1 /*
2  * Copyright (c) 2014 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.raft.behaviors;
9
10 import akka.actor.ActorRef;
11 import akka.actor.ActorSelection;
12 import akka.actor.Address;
13 import akka.cluster.Cluster;
14 import akka.cluster.ClusterEvent.CurrentClusterState;
15 import akka.cluster.Member;
16 import akka.cluster.MemberStatus;
17 import com.google.common.annotations.VisibleForTesting;
18 import com.google.common.base.Stopwatch;
19 import java.io.IOException;
20 import java.util.ArrayList;
21 import java.util.List;
22 import java.util.Optional;
23 import java.util.Set;
24 import java.util.concurrent.TimeUnit;
25 import java.util.concurrent.atomic.AtomicBoolean;
26 import java.util.function.Consumer;
27 import org.eclipse.jdt.annotation.Nullable;
28 import org.opendaylight.controller.cluster.messaging.MessageAssembler;
29 import org.opendaylight.controller.cluster.raft.RaftActorContext;
30 import org.opendaylight.controller.cluster.raft.RaftState;
31 import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
32 import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
33 import org.opendaylight.controller.cluster.raft.base.messages.ElectionTimeout;
34 import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow;
35 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
36 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
37 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
38 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
39 import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
40 import org.opendaylight.controller.cluster.raft.messages.RequestVote;
41 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
42 import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload;
43 import org.opendaylight.controller.cluster.raft.persisted.Snapshot;
44
45 /**
46  * The behavior of a RaftActor in the Follower raft state.
47  * <ul>
48  * <li> Respond to RPCs from candidates and leaders
49  * <li> If election timeout elapses without receiving AppendEntries
50  * RPC from current leader or granting vote to candidate:
51  * convert to candidate
52  * </ul>
53  */
54 public class Follower extends AbstractRaftActorBehavior {
55     private static final long MAX_ELECTION_TIMEOUT_FACTOR = 18;
56
57     private final SyncStatusTracker initialSyncStatusTracker;
58
59     private final MessageAssembler appendEntriesMessageAssembler;
60
61     private final Stopwatch lastLeaderMessageTimer = Stopwatch.createStarted();
62     private SnapshotTracker snapshotTracker = null;
63     private String leaderId;
64     private short leaderPayloadVersion;
65
66     public Follower(final RaftActorContext context) {
67         this(context, null, (short)-1);
68     }
69
70     public Follower(final RaftActorContext context, final String initialLeaderId,
71             final short initialLeaderPayloadVersion) {
72         super(context, RaftState.Follower);
73         this.leaderId = initialLeaderId;
74         this.leaderPayloadVersion = initialLeaderPayloadVersion;
75
76         initialSyncStatusTracker = new SyncStatusTracker(context.getActor(), getId(), context.getConfigParams()
77             .getSyncIndexThreshold());
78
79         appendEntriesMessageAssembler = MessageAssembler.builder().logContext(logName())
80                 .fileBackedStreamFactory(context.getFileBackedOutputStreamFactory())
81                 .assembledMessageCallback((message, sender) -> handleMessage(sender, message)).build();
82
83         if (context.getPeerIds().isEmpty() && getLeaderId() == null) {
84             actor().tell(TimeoutNow.INSTANCE, actor());
85         } else {
86             scheduleElection(electionDuration());
87         }
88     }
89
90     @Override
91     public final String getLeaderId() {
92         return leaderId;
93     }
94
95     @VisibleForTesting
96     protected final void setLeaderId(final @Nullable String leaderId) {
97         this.leaderId = leaderId;
98     }
99
100     @Override
101     public short getLeaderPayloadVersion() {
102         return leaderPayloadVersion;
103     }
104
105     @VisibleForTesting
106     protected final void setLeaderPayloadVersion(final short leaderPayloadVersion) {
107         this.leaderPayloadVersion = leaderPayloadVersion;
108     }
109
110     private void restartLastLeaderMessageTimer() {
111         if (lastLeaderMessageTimer.isRunning()) {
112             lastLeaderMessageTimer.reset();
113         }
114
115         lastLeaderMessageTimer.start();
116     }
117
118     private boolean isLogEntryPresent(final long index) {
119         if (context.getReplicatedLog().isInSnapshot(index)) {
120             return true;
121         }
122
123         ReplicatedLogEntry entry = context.getReplicatedLog().get(index);
124         return entry != null;
125
126     }
127
128     private void updateInitialSyncStatus(final long currentLeaderCommit, final String newLeaderId) {
129         initialSyncStatusTracker.update(newLeaderId, currentLeaderCommit, context.getCommitIndex());
130     }
131
132     @Override
133     protected RaftActorBehavior handleAppendEntries(final ActorRef sender, final AppendEntries appendEntries) {
134         int numLogEntries = appendEntries.getEntries().size();
135         if (log.isTraceEnabled()) {
136             log.trace("{}: handleAppendEntries: {}", logName(), appendEntries);
137         } else if (log.isDebugEnabled() && numLogEntries > 0) {
138             log.debug("{}: handleAppendEntries: {}", logName(), appendEntries);
139         }
140
141         if (snapshotTracker != null && !snapshotTracker.getLeaderId().equals(appendEntries.getLeaderId())) {
142             log.debug("{}: snapshot install is in progress but the prior snapshot leaderId {} does not match the "
143                 + "AppendEntries leaderId {}", logName(), snapshotTracker.getLeaderId(), appendEntries.getLeaderId());
144             closeSnapshotTracker();
145         }
146
147         if (snapshotTracker != null || context.getSnapshotManager().isApplying()) {
148             // if snapshot install is in progress, follower should just acknowledge append entries with a reply.
149             AppendEntriesReply reply = new AppendEntriesReply(context.getId(), currentTerm(), true,
150                     lastIndex(), lastTerm(), context.getPayloadVersion(), false, needsLeaderAddress(),
151                     appendEntries.getLeaderRaftVersion());
152
153             log.debug("{}: snapshot install is in progress, replying immediately with {}", logName(), reply);
154             sender.tell(reply, actor());
155
156             return this;
157         }
158
159         // If we got here then we do appear to be talking to the leader
160         leaderId = appendEntries.getLeaderId();
161         leaderPayloadVersion = appendEntries.getPayloadVersion();
162
163         if (appendEntries.getLeaderAddress().isPresent()) {
164             final String address = appendEntries.getLeaderAddress().get();
165             log.debug("New leader address: {}", address);
166
167             context.setPeerAddress(leaderId, address);
168             context.getConfigParams().getPeerAddressResolver().setResolved(leaderId, address);
169         }
170
171         // First check if the logs are in sync or not
172         if (isOutOfSync(appendEntries, sender)) {
173             updateInitialSyncStatus(appendEntries.getLeaderCommit(), appendEntries.getLeaderId());
174             return this;
175         }
176
177         if (!processNewEntries(appendEntries, sender)) {
178             updateInitialSyncStatus(appendEntries.getLeaderCommit(), appendEntries.getLeaderId());
179             return this;
180         }
181
182         long lastIndex = lastIndex();
183         long prevCommitIndex = context.getCommitIndex();
184
185         // If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, index of last new entry)
186         if (appendEntries.getLeaderCommit() > prevCommitIndex) {
187             context.setCommitIndex(Math.min(appendEntries.getLeaderCommit(), lastIndex));
188         }
189
190         if (prevCommitIndex != context.getCommitIndex()) {
191             log.debug("{}: Commit index set to {}", logName(), context.getCommitIndex());
192         }
193
194         AppendEntriesReply reply = new AppendEntriesReply(context.getId(), currentTerm(), true,
195                 lastIndex, lastTerm(), context.getPayloadVersion(), false, needsLeaderAddress(),
196                 appendEntries.getLeaderRaftVersion());
197
198         if (log.isTraceEnabled()) {
199             log.trace("{}: handleAppendEntries returning : {}", logName(), reply);
200         } else if (log.isDebugEnabled() && numLogEntries > 0) {
201             log.debug("{}: handleAppendEntries returning : {}", logName(), reply);
202         }
203
204         // Reply to the leader before applying any previous state so as not to hold up leader consensus.
205         sender.tell(reply, actor());
206
207         updateInitialSyncStatus(appendEntries.getLeaderCommit(), appendEntries.getLeaderId());
208
209         // If leaderCommit > lastApplied, increment lastApplied and apply log[lastApplied] to state machine (§5.3).
210         // lastApplied can be equal to lastIndex.
211         if (appendEntries.getLeaderCommit() > context.getLastApplied() && context.getLastApplied() < lastIndex) {
212             if (log.isDebugEnabled()) {
213                 log.debug("{}: applyLogToStateMachine, appendEntries.getLeaderCommit(): {}, "
214                         + "context.getLastApplied(): {}, lastIndex(): {}", logName(),
215                     appendEntries.getLeaderCommit(), context.getLastApplied(), lastIndex);
216             }
217
218             applyLogToStateMachine(appendEntries.getLeaderCommit());
219         }
220
221         if (!context.getSnapshotManager().isCapturing()) {
222             super.performSnapshotWithoutCapture(appendEntries.getReplicatedToAllIndex());
223         }
224
225         appendEntriesMessageAssembler.checkExpiredAssembledMessageState();
226
227         return this;
228     }
229
230     private boolean processNewEntries(final AppendEntries appendEntries, final ActorRef sender) {
231         int numLogEntries = appendEntries.getEntries().size();
232         if (numLogEntries == 0) {
233             return true;
234         }
235
236         log.debug("{}: Number of entries to be appended = {}", logName(), numLogEntries);
237
238         long lastIndex = lastIndex();
239         int addEntriesFrom = 0;
240
241         // First check for conflicting entries. If an existing entry conflicts with a new one (same index but different
242         // term), delete the existing entry and all that follow it (§5.3)
243         if (context.getReplicatedLog().size() > 0) {
244             // Find the entry up until the one that is not in the follower's log
245             for (int i = 0;i < numLogEntries; i++, addEntriesFrom++) {
246                 ReplicatedLogEntry matchEntry = appendEntries.getEntries().get(i);
247
248                 if (!isLogEntryPresent(matchEntry.getIndex())) {
249                     // newEntry not found in the log
250                     break;
251                 }
252
253                 long existingEntryTerm = getLogEntryTerm(matchEntry.getIndex());
254
255                 log.debug("{}: matchEntry {} is present: existingEntryTerm: {}", logName(), matchEntry,
256                         existingEntryTerm);
257
258                 // existingEntryTerm == -1 means it's in the snapshot and not in the log. We don't know
259                 // what the term was so we'll assume it matches.
260                 if (existingEntryTerm == -1 || existingEntryTerm == matchEntry.getTerm()) {
261                     continue;
262                 }
263
264                 if (!context.getRaftPolicy().applyModificationToStateBeforeConsensus()) {
265                     log.info("{}: Removing entries from log starting at {}, commitIndex: {}, lastApplied: {}",
266                             logName(), matchEntry.getIndex(), context.getCommitIndex(), context.getLastApplied());
267
268                     // Entries do not match so remove all subsequent entries but only if the existing entries haven't
269                     // been applied to the state yet.
270                     if (matchEntry.getIndex() <= context.getLastApplied()
271                             || !context.getReplicatedLog().removeFromAndPersist(matchEntry.getIndex())) {
272                         // Could not remove the entries - this means the matchEntry index must be in the
273                         // snapshot and not the log. In this case the prior entries are part of the state
274                         // so we must send back a reply to force a snapshot to completely re-sync the
275                         // follower's log and state.
276
277                         log.info("{}: Could not remove entries - sending reply to force snapshot", logName());
278                         sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
279                                 lastTerm(), context.getPayloadVersion(), true, needsLeaderAddress(),
280                                 appendEntries.getLeaderRaftVersion()), actor());
281                         return false;
282                     }
283
284                     break;
285                 } else {
286                     sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
287                             lastTerm(), context.getPayloadVersion(), true, needsLeaderAddress(),
288                             appendEntries.getLeaderRaftVersion()), actor());
289                     return false;
290                 }
291             }
292         }
293
294         lastIndex = lastIndex();
295         log.debug("{}: After cleanup, lastIndex: {}, entries to be added from: {}", logName(), lastIndex,
296                 addEntriesFrom);
297
298         // When persistence successfully completes for each new log entry appended, we need to determine if we
299         // should capture a snapshot to compact the persisted log. shouldCaptureSnapshot tracks whether or not
300         // one of the log entries has exceeded the log size threshold whereby a snapshot should be taken. However
301         // we don't initiate the snapshot at that log entry but rather after the last log entry has been persisted.
302         // This is done because subsequent log entries after the one that tripped the threshold may have been
303         // applied to the state already, as the persistence callback occurs async, and we want those entries
304         // purged from the persisted log as well.
305         final AtomicBoolean shouldCaptureSnapshot = new AtomicBoolean(false);
306         final Consumer<ReplicatedLogEntry> appendAndPersistCallback = logEntry -> {
307             final List<ReplicatedLogEntry> entries = appendEntries.getEntries();
308             final ReplicatedLogEntry lastEntryToAppend = entries.get(entries.size() - 1);
309             if (shouldCaptureSnapshot.get() && logEntry == lastEntryToAppend) {
310                 context.getSnapshotManager().capture(context.getReplicatedLog().last(), getReplicatedToAllIndex());
311             }
312         };
313
314         // Append any new entries not already in the log
315         for (int i = addEntriesFrom; i < numLogEntries; i++) {
316             ReplicatedLogEntry entry = appendEntries.getEntries().get(i);
317
318             log.debug("{}: Append entry to log {}", logName(), entry.getData());
319
320             context.getReplicatedLog().appendAndPersist(entry, appendAndPersistCallback, false);
321
322             shouldCaptureSnapshot.compareAndSet(false,
323                     context.getReplicatedLog().shouldCaptureSnapshot(entry.getIndex()));
324
325             if (entry.getData() instanceof ServerConfigurationPayload) {
326                 context.updatePeerIds((ServerConfigurationPayload)entry.getData());
327             }
328         }
329
330         log.debug("{}: Log size is now {}", logName(), context.getReplicatedLog().size());
331
332         return true;
333     }
334
335     private boolean isOutOfSync(final AppendEntries appendEntries, final ActorRef sender) {
336
337         final long lastIndex = lastIndex();
338         if (lastIndex == -1 && appendEntries.getPrevLogIndex() != -1) {
339
340             // The follower's log is out of sync because the leader does have an entry at prevLogIndex and this
341             // follower has no entries in it's log.
342
343             log.info("{}: The followers log is empty and the senders prevLogIndex is {}", logName(),
344                 appendEntries.getPrevLogIndex());
345
346             sendOutOfSyncAppendEntriesReply(sender, false, appendEntries.getLeaderRaftVersion());
347             return true;
348         }
349
350         if (lastIndex > -1) {
351             if (isLogEntryPresent(appendEntries.getPrevLogIndex())) {
352                 final long leadersPrevLogTermInFollowersLogOrSnapshot =
353                         getLogEntryOrSnapshotTerm(appendEntries.getPrevLogIndex());
354                 if (leadersPrevLogTermInFollowersLogOrSnapshot != appendEntries.getPrevLogTerm()) {
355
356                     // The follower's log is out of sync because the Leader's prevLogIndex entry does exist
357                     // in the follower's log or snapshot but it has a different term.
358
359                     log.info("{}: The prevLogIndex {} was found in the log but the term {} is not equal to the append "
360                         + "entries prevLogTerm {} - lastIndex: {}, snapshotIndex: {}, snapshotTerm: {}", logName(),
361                         appendEntries.getPrevLogIndex(), leadersPrevLogTermInFollowersLogOrSnapshot,
362                         appendEntries.getPrevLogTerm(), lastIndex, context.getReplicatedLog().getSnapshotIndex(),
363                         context.getReplicatedLog().getSnapshotTerm());
364
365                     sendOutOfSyncAppendEntriesReply(sender, false, appendEntries.getLeaderRaftVersion());
366                     return true;
367                 }
368             } else if (appendEntries.getPrevLogIndex() != -1) {
369
370                 // The follower's log is out of sync because the Leader's prevLogIndex entry was not found in it's log
371
372                 log.info("{}: The log is not empty but the prevLogIndex {} was not found in it - lastIndex: {}, "
373                         + "snapshotIndex: {}, snapshotTerm: {}", logName(), appendEntries.getPrevLogIndex(), lastIndex,
374                         context.getReplicatedLog().getSnapshotIndex(), context.getReplicatedLog().getSnapshotTerm());
375
376                 sendOutOfSyncAppendEntriesReply(sender, false, appendEntries.getLeaderRaftVersion());
377                 return true;
378             }
379         }
380
381         if (appendEntries.getPrevLogIndex() == -1 && appendEntries.getPrevLogTerm() == -1
382                 && appendEntries.getReplicatedToAllIndex() != -1) {
383             if (!isLogEntryPresent(appendEntries.getReplicatedToAllIndex())) {
384                 // This append entry comes from a leader who has it's log aggressively trimmed and so does not have
385                 // the previous entry in it's in-memory journal
386
387                 log.info("{}: Cannot append entries because the replicatedToAllIndex {} does not appear to be in the "
388                         + "in-memory journal - lastIndex: {}, snapshotIndex: {}, snapshotTerm: {}", logName(),
389                         appendEntries.getReplicatedToAllIndex(), lastIndex,
390                         context.getReplicatedLog().getSnapshotIndex(), context.getReplicatedLog().getSnapshotTerm());
391
392                 sendOutOfSyncAppendEntriesReply(sender, false, appendEntries.getLeaderRaftVersion());
393                 return true;
394             }
395
396             final List<ReplicatedLogEntry> entries = appendEntries.getEntries();
397             if (entries.size() > 0 && !isLogEntryPresent(entries.get(0).getIndex() - 1)) {
398                 log.info("{}: Cannot append entries because the calculated previousIndex {} was not found in the "
399                         + "in-memory journal - lastIndex: {}, snapshotIndex: {}, snapshotTerm: {}", logName(),
400                         entries.get(0).getIndex() - 1, lastIndex, context.getReplicatedLog().getSnapshotIndex(),
401                         context.getReplicatedLog().getSnapshotTerm());
402
403                 sendOutOfSyncAppendEntriesReply(sender, false, appendEntries.getLeaderRaftVersion());
404                 return true;
405             }
406         }
407
408         return false;
409     }
410
411     private void sendOutOfSyncAppendEntriesReply(final ActorRef sender, boolean forceInstallSnapshot,
412             short leaderRaftVersion) {
413         // We found that the log was out of sync so just send a negative reply.
414         final AppendEntriesReply reply = new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex(),
415                 lastTerm(), context.getPayloadVersion(), forceInstallSnapshot, needsLeaderAddress(),
416                 leaderRaftVersion);
417
418         log.info("{}: Follower is out-of-sync so sending negative reply: {}", logName(), reply);
419         sender.tell(reply, actor());
420     }
421
422     private boolean needsLeaderAddress() {
423         return context.getPeerAddress(leaderId) == null;
424     }
425
426     @Override
427     protected RaftActorBehavior handleAppendEntriesReply(final ActorRef sender,
428         final AppendEntriesReply appendEntriesReply) {
429         return this;
430     }
431
432     @Override
433     protected RaftActorBehavior handleRequestVoteReply(final ActorRef sender,
434         final RequestVoteReply requestVoteReply) {
435         return this;
436     }
437
438     @Override
439     public RaftActorBehavior handleMessage(final ActorRef sender, final Object message) {
440         if (message instanceof ElectionTimeout || message instanceof TimeoutNow) {
441             return handleElectionTimeout(message);
442         }
443
444         if (appendEntriesMessageAssembler.handleMessage(message, actor())) {
445             return this;
446         }
447
448         if (!(message instanceof RaftRPC)) {
449             // The rest of the processing requires the message to be a RaftRPC
450             return null;
451         }
452
453         final RaftRPC rpc = (RaftRPC) message;
454         // If RPC request or response contains term T > currentTerm:
455         // set currentTerm = T, convert to follower (§5.1)
456         // This applies to all RPC messages and responses
457         if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) {
458             log.info("{}: Term {} in \"{}\" message is greater than follower's term {} - updating term",
459                 logName(), rpc.getTerm(), rpc, context.getTermInformation().getCurrentTerm());
460
461             context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
462         }
463
464         if (rpc instanceof InstallSnapshot) {
465             handleInstallSnapshot(sender, (InstallSnapshot) rpc);
466             restartLastLeaderMessageTimer();
467             scheduleElection(electionDuration());
468             return this;
469         }
470
471         if (!(rpc instanceof RequestVote) || canGrantVote((RequestVote) rpc)) {
472             restartLastLeaderMessageTimer();
473             scheduleElection(electionDuration());
474         }
475
476         return super.handleMessage(sender, rpc);
477     }
478
479     private RaftActorBehavior handleElectionTimeout(final Object message) {
480         // If the message is ElectionTimeout, verify we haven't actually seen a message from the leader
481         // during the election timeout interval. It may that the election timer expired b/c this actor
482         // was busy and messages got delayed, in which case leader messages would be backed up in the
483         // queue but would be processed before the ElectionTimeout message and thus would restart the
484         // lastLeaderMessageTimer.
485         long lastLeaderMessageInterval = lastLeaderMessageTimer.elapsed(TimeUnit.MILLISECONDS);
486         long electionTimeoutInMillis = context.getConfigParams().getElectionTimeOutInterval().toMillis();
487         boolean noLeaderMessageReceived = !lastLeaderMessageTimer.isRunning()
488                 || lastLeaderMessageInterval >= electionTimeoutInMillis;
489
490         if (canStartElection()) {
491             if (message instanceof TimeoutNow) {
492                 log.debug("{}: Received TimeoutNow - switching to Candidate", logName());
493                 return internalSwitchBehavior(RaftState.Candidate);
494             } else if (noLeaderMessageReceived) {
495                 // Check the cluster state to see if the leader is known to be up before we go to Candidate.
496                 // However if we haven't heard from the leader in a long time even though the cluster state
497                 // indicates it's up then something is wrong - leader might be stuck indefinitely - so switch
498                 // to Candidate,
499                 long maxElectionTimeout = electionTimeoutInMillis * MAX_ELECTION_TIMEOUT_FACTOR;
500                 if (isLeaderAvailabilityKnown() && lastLeaderMessageInterval < maxElectionTimeout) {
501                     log.debug("{}: Received ElectionTimeout but leader appears to be available", logName());
502                     scheduleElection(electionDuration());
503                 } else {
504                     log.debug("{}: Received ElectionTimeout - switching to Candidate", logName());
505                     return internalSwitchBehavior(RaftState.Candidate);
506                 }
507             } else {
508                 log.debug("{}: Received ElectionTimeout but lastLeaderMessageInterval {} < election timeout {}",
509                         logName(), lastLeaderMessageInterval, context.getConfigParams().getElectionTimeOutInterval());
510                 scheduleElection(electionDuration());
511             }
512         } else if (message instanceof ElectionTimeout) {
513             if (noLeaderMessageReceived) {
514                 setLeaderId(null);
515             }
516
517             scheduleElection(electionDuration());
518         }
519
520         return this;
521     }
522
523     private boolean isLeaderAvailabilityKnown() {
524         if (leaderId == null) {
525             return false;
526         }
527
528         Optional<Cluster> cluster = context.getCluster();
529         if (!cluster.isPresent()) {
530             return false;
531         }
532
533         ActorSelection leaderActor = context.getPeerActorSelection(leaderId);
534         if (leaderActor == null) {
535             return false;
536         }
537
538         Address leaderAddress = leaderActor.anchorPath().address();
539
540         CurrentClusterState state = cluster.get().state();
541         Set<Member> unreachable = state.getUnreachable();
542
543         log.debug("{}: Checking for leader {} in the cluster unreachable set {}", logName(), leaderAddress,
544                 unreachable);
545
546         for (Member m: unreachable) {
547             if (leaderAddress.equals(m.address())) {
548                 log.info("{}: Leader {} is unreachable", logName(), leaderAddress);
549                 return false;
550             }
551         }
552
553         for (Member m: state.getMembers()) {
554             if (leaderAddress.equals(m.address())) {
555                 if (m.status() == MemberStatus.up() || m.status() == MemberStatus.weaklyUp()) {
556                     log.debug("{}: Leader {} cluster status is {} - leader is available", logName(),
557                             leaderAddress, m.status());
558                     return true;
559                 } else {
560                     log.debug("{}: Leader {} cluster status is {} - leader is unavailable", logName(),
561                             leaderAddress, m.status());
562                     return false;
563                 }
564             }
565         }
566
567         log.debug("{}: Leader {} not found in the cluster member set", logName(), leaderAddress);
568
569         return false;
570     }
571
572     private void handleInstallSnapshot(final ActorRef sender, final InstallSnapshot installSnapshot) {
573
574         log.debug("{}: handleInstallSnapshot: {}", logName(), installSnapshot);
575
576         leaderId = installSnapshot.getLeaderId();
577
578         if (snapshotTracker == null) {
579             snapshotTracker = new SnapshotTracker(log, installSnapshot.getTotalChunks(), installSnapshot.getLeaderId(),
580                     context);
581         }
582
583         updateInitialSyncStatus(installSnapshot.getLastIncludedIndex(), installSnapshot.getLeaderId());
584
585         try {
586             final InstallSnapshotReply reply = new InstallSnapshotReply(
587                     currentTerm(), context.getId(), installSnapshot.getChunkIndex(), true);
588
589             if (snapshotTracker.addChunk(installSnapshot.getChunkIndex(), installSnapshot.getData(),
590                     installSnapshot.getLastChunkHashCode())) {
591
592                 log.info("{}: Snapshot installed from leader: {}", logName(), installSnapshot.getLeaderId());
593
594                 Snapshot snapshot = Snapshot.create(
595                         context.getSnapshotManager().convertSnapshot(snapshotTracker.getSnapshotBytes()),
596                         new ArrayList<>(),
597                         installSnapshot.getLastIncludedIndex(),
598                         installSnapshot.getLastIncludedTerm(),
599                         installSnapshot.getLastIncludedIndex(),
600                         installSnapshot.getLastIncludedTerm(),
601                         context.getTermInformation().getCurrentTerm(),
602                         context.getTermInformation().getVotedFor(),
603                         installSnapshot.getServerConfig().orNull());
604
605                 ApplySnapshot.Callback applySnapshotCallback = new ApplySnapshot.Callback() {
606                     @Override
607                     public void onSuccess() {
608                         log.debug("{}: handleInstallSnapshot returning: {}", logName(), reply);
609
610                         sender.tell(reply, actor());
611                     }
612
613                     @Override
614                     public void onFailure() {
615                         sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(), -1, false), actor());
616                     }
617                 };
618
619                 actor().tell(new ApplySnapshot(snapshot, applySnapshotCallback), actor());
620
621                 closeSnapshotTracker();
622             } else {
623                 log.debug("{}: handleInstallSnapshot returning: {}", logName(), reply);
624
625                 sender.tell(reply, actor());
626             }
627         } catch (IOException e) {
628             log.debug("{}: Exception in InstallSnapshot of follower", logName(), e);
629
630             sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(),
631                     -1, false), actor());
632
633             closeSnapshotTracker();
634         }
635     }
636
637     private void closeSnapshotTracker() {
638         if (snapshotTracker != null) {
639             snapshotTracker.close();
640             snapshotTracker = null;
641         }
642     }
643
644     @Override
645     public void close() {
646         closeSnapshotTracker();
647         stopElection();
648         appendEntriesMessageAssembler.close();
649     }
650
651     @VisibleForTesting
652     SnapshotTracker getSnapshotTracker() {
653         return snapshotTracker;
654     }
655 }