2 * Copyright (c) 2014 Cisco Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
9 package org.opendaylight.controller.cluster.raft.behaviors;
11 import akka.actor.ActorRef;
12 import akka.actor.ActorSelection;
13 import akka.actor.Cancellable;
14 import com.google.common.base.Preconditions;
15 import org.opendaylight.controller.cluster.raft.ClientRequestTracker;
16 import org.opendaylight.controller.cluster.raft.ClientRequestTrackerImpl;
17 import org.opendaylight.controller.cluster.raft.FollowerLogInformation;
18 import org.opendaylight.controller.cluster.raft.FollowerLogInformationImpl;
19 import org.opendaylight.controller.cluster.raft.RaftActorContext;
20 import org.opendaylight.controller.cluster.raft.RaftState;
21 import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
22 import org.opendaylight.controller.cluster.raft.base.messages.Replicate;
23 import org.opendaylight.controller.cluster.raft.base.messages.SendHeartBeat;
24 import org.opendaylight.controller.cluster.raft.base.messages.SendInstallSnapshot;
25 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
26 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
27 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
28 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
29 import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
30 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
31 import scala.concurrent.duration.FiniteDuration;
33 import java.util.ArrayList;
34 import java.util.Collections;
35 import java.util.HashMap;
36 import java.util.List;
39 import java.util.concurrent.TimeUnit;
40 import java.util.concurrent.atomic.AtomicLong;
43 * The behavior of a RaftActor when it is in the Leader state
47 * <li> Upon election: send initial empty AppendEntries RPCs
48 * (heartbeat) to each server; repeat during idle periods to
49 * prevent election timeouts (§5.2)
50 * <li> If command received from client: append entry to local log,
51 * respond after entry applied to state machine (§5.3)
52 * <li> If last log index ≥ nextIndex for a follower: send
53 * AppendEntries RPC with log entries starting at nextIndex
55 * <li> If successful: update nextIndex and matchIndex for
57 * <li> If AppendEntries fails because of log inconsistency:
58 * decrement nextIndex and retry (§5.3)
60 * <li> If there exists an N such that N > commitIndex, a majority
61 * of matchIndex[i] ≥ N, and log[N].term == currentTerm:
62 * set commitIndex = N (§5.3, §5.4).
64 public class Leader extends AbstractRaftActorBehavior {
67 private final Map<String, FollowerLogInformation> followerToLog =
70 private final Set<String> followers;
72 private Cancellable heartbeatSchedule = null;
73 private Cancellable appendEntriesSchedule = null;
74 private Cancellable installSnapshotSchedule = null;
76 private List<ClientRequestTracker> trackerList = new ArrayList<>();
78 private final int minReplicationCount;
80 public Leader(RaftActorContext context) {
83 if (lastIndex() >= 0) {
84 context.setCommitIndex(lastIndex());
87 followers = context.getPeerAddresses().keySet();
89 for (String followerId : followers) {
90 FollowerLogInformation followerLogInformation =
91 new FollowerLogInformationImpl(followerId,
92 new AtomicLong(lastIndex()),
95 followerToLog.put(followerId, followerLogInformation);
98 context.getLogger().debug("Election:Leader has following peers:"+ followers);
100 if (followers.size() > 0) {
101 minReplicationCount = (followers.size() + 1) / 2 + 1;
103 minReplicationCount = 0;
107 // Immediately schedule a heartbeat
108 // Upon election: send initial empty AppendEntries RPCs
109 // (heartbeat) to each server; repeat during idle periods to
110 // prevent election timeouts (§5.2)
111 scheduleHeartBeat(new FiniteDuration(0, TimeUnit.SECONDS));
113 scheduleInstallSnapshotCheck(
114 new FiniteDuration(context.getConfigParams().getHeartBeatInterval().length() * 1000,
115 context.getConfigParams().getHeartBeatInterval().unit())
120 @Override protected RaftState handleAppendEntries(ActorRef sender,
121 AppendEntries appendEntries) {
123 context.getLogger().info("Leader: Received {}", appendEntries.toString());
128 @Override protected RaftState handleAppendEntriesReply(ActorRef sender,
129 AppendEntriesReply appendEntriesReply) {
131 if(! appendEntriesReply.isSuccess()) {
133 .info("Leader: Received {}", appendEntriesReply.toString());
136 // Update the FollowerLogInformation
137 String followerId = appendEntriesReply.getFollowerId();
138 FollowerLogInformation followerLogInformation =
139 followerToLog.get(followerId);
141 if(followerLogInformation == null){
142 context.getLogger().error("Unknown follower {}", followerId);
146 if (appendEntriesReply.isSuccess()) {
147 followerLogInformation
148 .setMatchIndex(appendEntriesReply.getLogLastIndex());
149 followerLogInformation
150 .setNextIndex(appendEntriesReply.getLogLastIndex() + 1);
153 // TODO: When we find that the follower is out of sync with the
154 // Leader we simply decrement that followers next index by 1.
155 // Would it be possible to do better than this? The RAFT spec
156 // does not explicitly deal with it but may be something for us to
159 followerLogInformation.decrNextIndex();
162 // Now figure out if this reply warrants a change in the commitIndex
163 // If there exists an N such that N > commitIndex, a majority
164 // of matchIndex[i] ≥ N, and log[N].term == currentTerm:
165 // set commitIndex = N (§5.3, §5.4).
166 for (long N = context.getCommitIndex() + 1; ; N++) {
167 int replicatedCount = 1;
169 for (FollowerLogInformation info : followerToLog.values()) {
170 if (info.getMatchIndex().get() >= N) {
175 if (replicatedCount >= minReplicationCount) {
176 ReplicatedLogEntry replicatedLogEntry =
177 context.getReplicatedLog().get(N);
178 if (replicatedLogEntry != null
179 && replicatedLogEntry.getTerm()
181 context.setCommitIndex(N);
188 // Apply the change to the state machine
189 if (context.getCommitIndex() > context.getLastApplied()) {
190 applyLogToStateMachine(context.getCommitIndex());
196 protected ClientRequestTracker findClientRequestTracker(long logIndex) {
197 for (ClientRequestTracker tracker : trackerList) {
198 if (tracker.getIndex() == logIndex) {
206 @Override protected RaftState handleRequestVoteReply(ActorRef sender,
207 RequestVoteReply requestVoteReply) {
211 @Override public RaftState state() {
212 return RaftState.Leader;
215 @Override public RaftState handleMessage(ActorRef sender, Object originalMessage) {
216 Preconditions.checkNotNull(sender, "sender should not be null");
218 Object message = fromSerializableMessage(originalMessage);
220 if (message instanceof RaftRPC) {
221 RaftRPC rpc = (RaftRPC) message;
222 // If RPC request or response contains term T > currentTerm:
223 // set currentTerm = T, convert to follower (§5.1)
224 // This applies to all RPC messages and responses
225 if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) {
226 context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
227 return RaftState.Follower;
232 if (message instanceof SendHeartBeat) {
233 return sendHeartBeat();
234 } else if(message instanceof SendInstallSnapshot) {
235 installSnapshotIfNeeded();
236 } else if (message instanceof Replicate) {
237 replicate((Replicate) message);
238 } else if (message instanceof InstallSnapshotReply){
239 handleInstallSnapshotReply(
240 (InstallSnapshotReply) message);
243 scheduleHeartBeat(context.getConfigParams().getHeartBeatInterval());
246 return super.handleMessage(sender, message);
249 private void handleInstallSnapshotReply(InstallSnapshotReply message) {
250 InstallSnapshotReply reply = message;
251 String followerId = reply.getFollowerId();
252 FollowerLogInformation followerLogInformation =
253 followerToLog.get(followerId);
255 followerLogInformation
256 .setMatchIndex(context.getReplicatedLog().getSnapshotIndex());
257 followerLogInformation
258 .setNextIndex(context.getReplicatedLog().getSnapshotIndex() + 1);
261 private void replicate(Replicate replicate) {
262 long logIndex = replicate.getReplicatedLogEntry().getIndex();
264 context.getLogger().debug("Replicate message " + logIndex);
266 // Create a tracker entry we will use this later to notify the
269 new ClientRequestTrackerImpl(replicate.getClientActor(),
270 replicate.getIdentifier(),
274 if (followers.size() == 0) {
275 context.setCommitIndex(logIndex);
276 applyLogToStateMachine(logIndex);
282 private void sendAppendEntries() {
283 // Send an AppendEntries to all followers
284 for (String followerId : followers) {
285 ActorSelection followerActor =
286 context.getPeerActorSelection(followerId);
288 if (followerActor != null) {
289 FollowerLogInformation followerLogInformation =
290 followerToLog.get(followerId);
292 long nextIndex = followerLogInformation.getNextIndex().get();
294 List<ReplicatedLogEntry> entries = Collections.emptyList();
296 if (context.getReplicatedLog().isPresent(nextIndex)) {
297 // TODO: Instead of sending all entries from nextIndex
298 // only send a fixed number of entries to each follower
299 // This is to avoid the situation where there are a lot of
300 // entries to install for a fresh follower or to a follower
301 // that has fallen too far behind with the log but yet is not
302 // eligible to receive a snapshot
304 context.getReplicatedLog().getFrom(nextIndex, 1);
308 new AppendEntries(currentTerm(), context.getId(),
309 prevLogIndex(nextIndex),
310 prevLogTerm(nextIndex), entries,
311 context.getCommitIndex()).toSerializable(),
319 * An installSnapshot is scheduled at a interval that is a multiple of
320 * a HEARTBEAT_INTERVAL. This is to avoid the need to check for installing
321 * snapshots at every heartbeat.
323 private void installSnapshotIfNeeded(){
324 for (String followerId : followers) {
325 ActorSelection followerActor =
326 context.getPeerActorSelection(followerId);
328 if(followerActor != null) {
329 FollowerLogInformation followerLogInformation =
330 followerToLog.get(followerId);
332 long nextIndex = followerLogInformation.getNextIndex().get();
334 if (!context.getReplicatedLog().isPresent(nextIndex) && context
335 .getReplicatedLog().isInSnapshot(nextIndex)) {
337 new InstallSnapshot(currentTerm(), context.getId(),
338 context.getReplicatedLog().getSnapshotIndex(),
339 context.getReplicatedLog().getSnapshotTerm(),
340 context.getReplicatedLog().getSnapshot()
349 private RaftState sendHeartBeat() {
350 if (followers.size() > 0) {
356 private void stopHeartBeat() {
357 if (heartbeatSchedule != null && !heartbeatSchedule.isCancelled()) {
358 heartbeatSchedule.cancel();
362 private void stopInstallSnapshotSchedule() {
363 if (installSnapshotSchedule != null && !installSnapshotSchedule.isCancelled()) {
364 installSnapshotSchedule.cancel();
368 private void scheduleHeartBeat(FiniteDuration interval) {
369 if(followers.size() == 0){
370 // Optimization - do not bother scheduling a heartbeat as there are
377 // Schedule a heartbeat. When the scheduler triggers a SendHeartbeat
378 // message is sent to itself.
379 // Scheduling the heartbeat only once here because heartbeats do not
380 // need to be sent if there are other messages being sent to the remote
383 context.getActorSystem().scheduler().scheduleOnce(
385 context.getActor(), new SendHeartBeat(),
386 context.getActorSystem().dispatcher(), context.getActor());
390 private void scheduleInstallSnapshotCheck(FiniteDuration interval) {
391 if(followers.size() == 0){
392 // Optimization - do not bother scheduling a heartbeat as there are
397 stopInstallSnapshotSchedule();
399 // Schedule a message to send append entries to followers that can
400 // accept an append entries with some data in it
401 installSnapshotSchedule =
402 context.getActorSystem().scheduler().scheduleOnce(
404 context.getActor(), new SendInstallSnapshot(),
405 context.getActorSystem().dispatcher(), context.getActor());
410 @Override public void close() throws Exception {
414 @Override public String getLeaderId() {
415 return context.getId();