5f37af6e9d828482d16aac95178fc47418f27d6d
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / behaviors / Follower.java
1 /*
2  * Copyright (c) 2014 Cisco Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8
9 package org.opendaylight.controller.cluster.raft.behaviors;
10
11 import akka.actor.ActorRef;
12 import akka.japi.Procedure;
13 import com.google.common.annotations.VisibleForTesting;
14 import com.google.common.base.Stopwatch;
15 import java.util.ArrayList;
16 import java.util.concurrent.TimeUnit;
17 import javax.annotation.Nullable;
18 import org.opendaylight.controller.cluster.raft.RaftActorContext;
19 import org.opendaylight.controller.cluster.raft.RaftState;
20 import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
21 import org.opendaylight.controller.cluster.raft.Snapshot;
22 import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
23 import org.opendaylight.controller.cluster.raft.base.messages.ElectionTimeout;
24 import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow;
25 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
26 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
27 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
28 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
29 import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
30 import org.opendaylight.controller.cluster.raft.messages.RequestVote;
31 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
32 import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload;
33
34 /**
35  * The behavior of a RaftActor in the Follower state
36  * <p/>
37  * <ul>
38  * <li> Respond to RPCs from candidates and leaders
39  * <li> If election timeout elapses without receiving AppendEntries
40  * RPC from current leader or granting vote to candidate:
41  * convert to candidate
42  * </ul>
43  */
44 public class Follower extends AbstractRaftActorBehavior {
45     private static final int SYNC_THRESHOLD = 10;
46
47     private final SyncStatusTracker initialSyncStatusTracker;
48
49     private final Procedure<ReplicatedLogEntry> appendAndPersistCallback = new Procedure<ReplicatedLogEntry>() {
50         @Override
51         public void apply(ReplicatedLogEntry logEntry) {
52             context.getReplicatedLog().captureSnapshotIfReady(logEntry);
53         }
54     };
55
56     private final Stopwatch lastLeaderMessageTimer = Stopwatch.createUnstarted();
57     private SnapshotTracker snapshotTracker = null;
58     private String leaderId;
59     private short leaderPayloadVersion;
60
61     public Follower(RaftActorContext context) {
62         this(context, null, (short)-1);
63     }
64
65     public Follower(RaftActorContext context, String initialLeaderId, short initialLeaderPayloadVersion) {
66         super(context, RaftState.Follower);
67         this.leaderId = initialLeaderId;
68         this.leaderPayloadVersion = initialLeaderPayloadVersion;
69
70         initialSyncStatusTracker = new SyncStatusTracker(context.getActor(), getId(), SYNC_THRESHOLD);
71
72         if (context.getPeerIds().isEmpty() && getLeaderId() == null) {
73             actor().tell(TimeoutNow.INSTANCE, actor());
74         } else {
75             scheduleElection(electionDuration());
76         }
77     }
78
79     @Override
80     public final String getLeaderId() {
81         return leaderId;
82     }
83
84     @VisibleForTesting
85     protected final void setLeaderId(@Nullable final String leaderId) {
86         this.leaderId = leaderId;
87     }
88
89     @Override
90     public short getLeaderPayloadVersion() {
91         return leaderPayloadVersion;
92     }
93
94     @VisibleForTesting
95     protected final void setLeaderPayloadVersion(short leaderPayloadVersion) {
96         this.leaderPayloadVersion = leaderPayloadVersion;
97     }
98
99     private void restartLastLeaderMessageTimer() {
100         if (lastLeaderMessageTimer.isRunning()) {
101             lastLeaderMessageTimer.reset();
102         }
103
104         lastLeaderMessageTimer.start();
105     }
106
107     private boolean isLogEntryPresent(long index){
108         if(context.getReplicatedLog().isInSnapshot(index)) {
109             return true;
110         }
111
112         ReplicatedLogEntry entry = context.getReplicatedLog().get(index);
113         return entry != null;
114
115     }
116
117     private void updateInitialSyncStatus(long currentLeaderCommit, String leaderId){
118         initialSyncStatusTracker.update(leaderId, currentLeaderCommit, context.getCommitIndex());
119     }
120
121     @Override
122     protected RaftActorBehavior handleAppendEntries(ActorRef sender, AppendEntries appendEntries) {
123
124         int numLogEntries = appendEntries.getEntries() != null ? appendEntries.getEntries().size() : 0;
125         if(LOG.isTraceEnabled()) {
126             LOG.trace("{}: handleAppendEntries: {}", logName(), appendEntries);
127         } else if(LOG.isDebugEnabled() && numLogEntries > 0) {
128             LOG.debug("{}: handleAppendEntries: {}", logName(), appendEntries);
129         }
130
131         // TODO : Refactor this method into a bunch of smaller methods
132         // to make it easier to read. Before refactoring ensure tests
133         // cover the code properly
134
135         if (snapshotTracker != null || context.getSnapshotManager().isApplying()) {
136             // if snapshot install is in progress, follower should just acknowledge append entries with a reply.
137             AppendEntriesReply reply = new AppendEntriesReply(context.getId(), currentTerm(), true,
138                     lastIndex(), lastTerm(), context.getPayloadVersion());
139
140             if(LOG.isDebugEnabled()) {
141                 LOG.debug("{}: snapshot install is in progress, replying immediately with {}", logName(), reply);
142             }
143             sender.tell(reply, actor());
144
145             return this;
146         }
147
148         // If we got here then we do appear to be talking to the leader
149         leaderId = appendEntries.getLeaderId();
150         leaderPayloadVersion = appendEntries.getPayloadVersion();
151
152         updateInitialSyncStatus(appendEntries.getLeaderCommit(), appendEntries.getLeaderId());
153         // First check if the logs are in sync or not
154         long lastIndex = lastIndex();
155
156         if (isOutOfSync(appendEntries)) {
157             // We found that the log was out of sync so just send a negative
158             // reply and return
159
160             LOG.debug("{}: Follower is out-of-sync, so sending negative reply, lastIndex: {}, lastTerm: {}",
161                         logName(), lastIndex, lastTerm());
162
163             sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
164                     lastTerm(), context.getPayloadVersion()), actor());
165             return this;
166         }
167
168         if (appendEntries.getEntries() != null && appendEntries.getEntries().size() > 0) {
169
170             LOG.debug("{}: Number of entries to be appended = {}", logName(),
171                         appendEntries.getEntries().size());
172
173             // 3. If an existing entry conflicts with a new one (same index
174             // but different terms), delete the existing entry and all that
175             // follow it (ยง5.3)
176             int addEntriesFrom = 0;
177             if (context.getReplicatedLog().size() > 0) {
178
179                 // Find the entry up until the one that is not in the follower's log
180                 for (int i = 0;i < appendEntries.getEntries().size(); i++, addEntriesFrom++) {
181                     ReplicatedLogEntry matchEntry = appendEntries.getEntries().get(i);
182
183                     if(!isLogEntryPresent(matchEntry.getIndex())) {
184                         // newEntry not found in the log
185                         break;
186                     }
187
188                     long existingEntryTerm = getLogEntryTerm(matchEntry.getIndex());
189
190                     LOG.debug("{}: matchEntry {} is present: existingEntryTerm: {}", logName(), matchEntry,
191                             existingEntryTerm);
192
193                     // existingEntryTerm == -1 means it's in the snapshot and not in the log. We don't know
194                     // what the term was so we'll assume it matches.
195                     if(existingEntryTerm == -1 || existingEntryTerm == matchEntry.getTerm()) {
196                         continue;
197                     }
198
199                     if(!context.getRaftPolicy().applyModificationToStateBeforeConsensus()) {
200
201                         LOG.debug("{}: Removing entries from log starting at {}", logName(),
202                                 matchEntry.getIndex());
203
204                         // Entries do not match so remove all subsequent entries
205                         if(!context.getReplicatedLog().removeFromAndPersist(matchEntry.getIndex())) {
206                             // Could not remove the entries - this means the matchEntry index must be in the
207                             // snapshot and not the log. In this case the prior entries are part of the state
208                             // so we must send back a reply to force a snapshot to completely re-sync the
209                             // follower's log and state.
210
211                             LOG.debug("{}: Could not remove entries - sending reply to force snapshot", logName());
212                             sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
213                                     lastTerm(), context.getPayloadVersion(), true), actor());
214                             return this;
215                         }
216
217                         break;
218                     } else {
219                         sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
220                                 lastTerm(), context.getPayloadVersion(), true), actor());
221                         return this;
222                     }
223                 }
224             }
225
226             lastIndex = lastIndex();
227             LOG.debug("{}: After cleanup, lastIndex: {}, entries to be added from: {}", logName(),
228                     lastIndex, addEntriesFrom);
229
230             // 4. Append any new entries not already in the log
231             for (int i = addEntriesFrom; i < appendEntries.getEntries().size(); i++) {
232                 ReplicatedLogEntry entry = appendEntries.getEntries().get(i);
233
234                 LOG.debug("{}: Append entry to log {}", logName(), entry.getData());
235
236                 context.getReplicatedLog().appendAndPersist(entry, appendAndPersistCallback);
237
238                 if(entry.getData() instanceof ServerConfigurationPayload) {
239                     context.updatePeerIds((ServerConfigurationPayload)entry.getData());
240                 }
241             }
242
243             LOG.debug("{}: Log size is now {}", logName(), context.getReplicatedLog().size());
244         }
245
246         // 5. If leaderCommit > commitIndex, set commitIndex =
247         // min(leaderCommit, index of last new entry)
248
249         lastIndex = lastIndex();
250         long prevCommitIndex = context.getCommitIndex();
251
252         context.setCommitIndex(Math.min(appendEntries.getLeaderCommit(), lastIndex));
253
254         if (prevCommitIndex != context.getCommitIndex()) {
255             LOG.debug("{}: Commit index set to {}", logName(), context.getCommitIndex());
256         }
257
258         // If commitIndex > lastApplied: increment lastApplied, apply
259         // log[lastApplied] to state machine (ยง5.3)
260         // check if there are any entries to be applied. last-applied can be equal to last-index
261         if (appendEntries.getLeaderCommit() > context.getLastApplied() &&
262             context.getLastApplied() < lastIndex) {
263             if(LOG.isDebugEnabled()) {
264                 LOG.debug("{}: applyLogToStateMachine, " +
265                         "appendEntries.getLeaderCommit(): {}," +
266                         "context.getLastApplied(): {}, lastIndex(): {}", logName(),
267                     appendEntries.getLeaderCommit(), context.getLastApplied(), lastIndex);
268             }
269
270             applyLogToStateMachine(appendEntries.getLeaderCommit());
271         }
272
273         AppendEntriesReply reply = new AppendEntriesReply(context.getId(), currentTerm(), true,
274             lastIndex, lastTerm(), context.getPayloadVersion());
275
276         if(LOG.isTraceEnabled()) {
277             LOG.trace("{}: handleAppendEntries returning : {}", logName(), reply);
278         } else if(LOG.isDebugEnabled() && numLogEntries > 0) {
279             LOG.debug("{}: handleAppendEntries returning : {}", logName(), reply);
280         }
281
282         sender.tell(reply, actor());
283
284         if (!context.getSnapshotManager().isCapturing()) {
285             super.performSnapshotWithoutCapture(appendEntries.getReplicatedToAllIndex());
286         }
287
288         return this;
289     }
290
291     private boolean isOutOfSync(AppendEntries appendEntries) {
292
293         long prevLogTerm = getLogEntryTerm(appendEntries.getPrevLogIndex());
294         boolean prevEntryPresent = isLogEntryPresent(appendEntries.getPrevLogIndex());
295         long lastIndex = lastIndex();
296         int numLogEntries = appendEntries.getEntries() != null ? appendEntries.getEntries().size() : 0;
297         boolean outOfSync = true;
298
299         if (lastIndex == -1 && appendEntries.getPrevLogIndex() != -1) {
300
301             // The follower's log is out of sync because the leader does have
302             // an entry at prevLogIndex and this follower has no entries in
303             // it's log.
304
305             LOG.debug("{}: The followers log is empty and the senders prevLogIndex is {}",
306                         logName(), appendEntries.getPrevLogIndex());
307         } else if (lastIndex > -1 && appendEntries.getPrevLogIndex() != -1 && !prevEntryPresent) {
308
309             // The follower's log is out of sync because the Leader's
310             // prevLogIndex entry was not found in it's log
311
312             LOG.debug("{}: The log is not empty but the prevLogIndex {} was not found in it - lastIndex: {}, snapshotIndex: {}",
313                         logName(), appendEntries.getPrevLogIndex(), lastIndex, context.getReplicatedLog().getSnapshotIndex());
314         } else if (lastIndex > -1 && prevEntryPresent && prevLogTerm != appendEntries.getPrevLogTerm()) {
315
316             // The follower's log is out of sync because the Leader's
317             // prevLogIndex entry does exist in the follower's log but it has
318             // a different term in it
319
320             LOG.debug(
321                     "{}: Cannot append entries because previous entry term {}  is not equal to append entries prevLogTerm {}",
322                     logName(), prevLogTerm, appendEntries.getPrevLogTerm());
323         } else if(appendEntries.getPrevLogIndex() == -1 && appendEntries.getPrevLogTerm() == -1
324                 && appendEntries.getReplicatedToAllIndex() != -1
325                 && !isLogEntryPresent(appendEntries.getReplicatedToAllIndex())) {
326             // This append entry comes from a leader who has it's log aggressively trimmed and so does not have
327             // the previous entry in it's in-memory journal
328
329             LOG.debug(
330                     "{}: Cannot append entries because the replicatedToAllIndex {} does not appear to be in the in-memory journal",
331                     logName(), appendEntries.getReplicatedToAllIndex());
332         } else if(appendEntries.getPrevLogIndex() == -1 && appendEntries.getPrevLogTerm() == -1
333                 && appendEntries.getReplicatedToAllIndex() != -1 && numLogEntries > 0
334                 && !isLogEntryPresent(appendEntries.getEntries().get(0).getIndex() - 1)) {
335             LOG.debug(
336                     "{}: Cannot append entries because the calculated previousIndex {} was not found in the in-memory journal",
337                     logName(), appendEntries.getEntries().get(0).getIndex() - 1);
338         } else {
339             outOfSync = false;
340         }
341         return outOfSync;
342     }
343
344     @Override
345     protected RaftActorBehavior handleAppendEntriesReply(ActorRef sender,
346         AppendEntriesReply appendEntriesReply) {
347         return this;
348     }
349
350     @Override
351     protected RaftActorBehavior handleRequestVoteReply(ActorRef sender,
352         RequestVoteReply requestVoteReply) {
353         return this;
354     }
355
356     @Override
357     public RaftActorBehavior handleMessage(ActorRef sender, Object message) {
358         if (message instanceof ElectionTimeout || message instanceof TimeoutNow) {
359             return handleElectionTimeout(message);
360         }
361
362         if (!(message instanceof RaftRPC)) {
363             // The rest of the processing requires the message to be a RaftRPC
364             return null;
365         }
366
367         final RaftRPC rpc = (RaftRPC) message;
368         // If RPC request or response contains term T > currentTerm:
369         // set currentTerm = T, convert to follower (ยง5.1)
370         // This applies to all RPC messages and responses
371         if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) {
372             LOG.debug("{}: Term {} in \"{}\" message is greater than follower's term {} - updating term",
373                 logName(), rpc.getTerm(), rpc, context.getTermInformation().getCurrentTerm());
374
375             context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
376         }
377
378         if (rpc instanceof InstallSnapshot) {
379             handleInstallSnapshot(sender, (InstallSnapshot) rpc);
380             restartLastLeaderMessageTimer();
381             scheduleElection(electionDuration());
382             return this;
383         }
384
385         if (!(rpc instanceof RequestVote) || canGrantVote((RequestVote) rpc)) {
386             restartLastLeaderMessageTimer();
387             scheduleElection(electionDuration());
388         }
389
390         return super.handleMessage(sender, rpc);
391     }
392
393     private RaftActorBehavior handleElectionTimeout(Object message) {
394         // If the message is ElectionTimeout, verify we haven't actually seen a message from the leader
395         // during the election timeout interval. It may that the election timer expired b/c this actor
396         // was busy and messages got delayed, in which case leader messages would be backed up in the
397         // queue but would be processed before the ElectionTimeout message and thus would restart the
398         // lastLeaderMessageTimer.
399         long lastLeaderMessageInterval = lastLeaderMessageTimer.elapsed(TimeUnit.MILLISECONDS);
400         boolean noLeaderMessageReceived = !lastLeaderMessageTimer.isRunning() || lastLeaderMessageInterval >=
401                 context.getConfigParams().getElectionTimeOutInterval().toMillis();
402
403         if(canStartElection()) {
404             if(message instanceof TimeoutNow || noLeaderMessageReceived) {
405                 LOG.debug("{}: Received {} - switching to Candidate", logName(), message.getClass().getSimpleName());
406                 return internalSwitchBehavior(RaftState.Candidate);
407             } else {
408                 LOG.debug("{}: Received ElectionTimeout but lastLeaderMessageInterval {} < election timeout",
409                         logName(), lastLeaderMessageInterval);
410                 scheduleElection(electionDuration());
411             }
412         } else if(message instanceof ElectionTimeout) {
413             if(noLeaderMessageReceived) {
414                 setLeaderId(null);
415             }
416
417             scheduleElection(electionDuration());
418         }
419
420         return this;
421     }
422
423     private void handleInstallSnapshot(final ActorRef sender, InstallSnapshot installSnapshot) {
424
425         LOG.debug("{}: handleInstallSnapshot: {}", logName(), installSnapshot);
426
427         leaderId = installSnapshot.getLeaderId();
428
429         if(snapshotTracker == null){
430             snapshotTracker = new SnapshotTracker(LOG, installSnapshot.getTotalChunks());
431         }
432
433         updateInitialSyncStatus(installSnapshot.getLastIncludedIndex(), installSnapshot.getLeaderId());
434
435         try {
436             final InstallSnapshotReply reply = new InstallSnapshotReply(
437                     currentTerm(), context.getId(), installSnapshot.getChunkIndex(), true);
438
439             if(snapshotTracker.addChunk(installSnapshot.getChunkIndex(), installSnapshot.getData(),
440                     installSnapshot.getLastChunkHashCode())){
441                 Snapshot snapshot = Snapshot.create(snapshotTracker.getSnapshot(),
442                         new ArrayList<ReplicatedLogEntry>(),
443                         installSnapshot.getLastIncludedIndex(),
444                         installSnapshot.getLastIncludedTerm(),
445                         installSnapshot.getLastIncludedIndex(),
446                         installSnapshot.getLastIncludedTerm(),
447                         context.getTermInformation().getCurrentTerm(),
448                         context.getTermInformation().getVotedFor(),
449                         installSnapshot.getServerConfig().orNull());
450
451                 ApplySnapshot.Callback applySnapshotCallback = new ApplySnapshot.Callback() {
452                     @Override
453                     public void onSuccess() {
454                         LOG.debug("{}: handleInstallSnapshot returning: {}", logName(), reply);
455
456                         sender.tell(reply, actor());
457                     }
458
459                     @Override
460                     public void onFailure() {
461                         sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(), -1, false), actor());
462                     }
463                 };
464
465                 actor().tell(new ApplySnapshot(snapshot, applySnapshotCallback), actor());
466
467                 snapshotTracker = null;
468             } else {
469                 LOG.debug("{}: handleInstallSnapshot returning: {}", logName(), reply);
470
471                 sender.tell(reply, actor());
472             }
473         } catch (SnapshotTracker.InvalidChunkException e) {
474             LOG.debug("{}: Exception in InstallSnapshot of follower", logName(), e);
475
476             sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(),
477                     -1, false), actor());
478             snapshotTracker = null;
479
480         } catch (Exception e){
481             LOG.error("{}: Exception in InstallSnapshot of follower", logName(), e);
482
483             //send reply with success as false. The chunk will be sent again on failure
484             sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(),
485                     installSnapshot.getChunkIndex(), false), actor());
486
487         }
488     }
489
490     @Override
491     public void close() {
492         stopElection();
493     }
494
495     @VisibleForTesting
496     SnapshotTracker getSnapshotTracker(){
497         return snapshotTracker;
498     }
499 }