Implement pauseLeader timeout for leadership transfer
[controller.git] / opendaylight / md-sal / sal-akka-raft / src / main / java / org / opendaylight / controller / cluster / raft / RaftActorLeadershipTransferCohort.java
1 /*
2  * Copyright (c) 2015 Brocade Communications Systems, Inc. and others.  All rights reserved.
3  *
4  * This program and the accompanying materials are made available under the
5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6  * and is available at http://www.eclipse.org/legal/epl-v10.html
7  */
8 package org.opendaylight.controller.cluster.raft;
9
10 import akka.actor.ActorRef;
11 import akka.actor.ActorSelection;
12 import akka.actor.Cancellable;
13 import com.google.common.annotations.VisibleForTesting;
14 import com.google.common.base.Optional;
15 import com.google.common.base.Stopwatch;
16 import java.util.ArrayList;
17 import java.util.List;
18 import java.util.concurrent.TimeUnit;
19 import org.opendaylight.controller.cluster.raft.base.messages.LeaderTransitioning;
20 import org.opendaylight.controller.cluster.raft.behaviors.Leader;
21 import org.opendaylight.controller.cluster.raft.behaviors.RaftActorBehavior;
22 import org.slf4j.Logger;
23 import org.slf4j.LoggerFactory;
24 import scala.concurrent.duration.FiniteDuration;
25
26 /**
27  * A raft actor support class that participates in leadership transfer. An instance is created upon
28  * initialization of leadership transfer.
29  * <p>
30  * The transfer process is as follows:
31  * <ol>
32  * <li>Send a LeaderStateChanged message with a null leader Id to the local RoleChangeNotifier to notify
33  *     clients that we no longer have a working leader.</li>
34  * <li>Send a LeaderTransitioning message to each follower so each can send LeaderStateChanged messages to
35  *     their local RoleChangeNotifiers.</li>
36  * <li>Call {@link RaftActor#pauseLeader} passing this RaftActorLeadershipTransferCohort
37  *     instance. This allows derived classes to perform work prior to transferring leadership.</li>
38  * <li>When the pause is complete, the {@link #run} method is called which in turn calls
39  *     {@link Leader#transferLeadership}.</li>
40  * <li>The Leader calls {@link #transferComplete} on successful completion.</li>
41  * <li>Wait a short period of time for the new leader to be elected to give the derived class a chance to
42  *     possibly complete work that was suspended while we were transferring.</li>
43  * <li>On notification of the new leader from the RaftActor or on time out, notify {@link OnComplete} callbacks.</li>
44  * </ol>
45  * <p>
46  * NOTE: All methods on this class must be called on the actor's thread dispatcher as they may access/modify
47  * internal state.
48  *
49  * @author Thomas Pantelis
50  */
51 public class RaftActorLeadershipTransferCohort {
52     private static final Logger LOG = LoggerFactory.getLogger(RaftActorLeadershipTransferCohort.class);
53
54     private final RaftActor raftActor;
55     private final ActorRef replyTo;
56     private Cancellable newLeaderTimer;
57     private final List<OnComplete> onCompleteCallbacks = new ArrayList<>();
58     private long newLeaderTimeoutInMillis = 2000;
59     private final Stopwatch transferTimer = Stopwatch.createUnstarted();
60     private boolean isTransferring;
61
62     RaftActorLeadershipTransferCohort(RaftActor raftActor, ActorRef replyTo) {
63         this.raftActor = raftActor;
64         this.replyTo = replyTo;
65     }
66
67     void init() {
68         RaftActorContext context = raftActor.getRaftActorContext();
69         RaftActorBehavior currentBehavior = raftActor.getCurrentBehavior();
70
71         transferTimer.start();
72
73         Optional<ActorRef> roleChangeNotifier = raftActor.getRoleChangeNotifier();
74         if(roleChangeNotifier.isPresent()) {
75             roleChangeNotifier.get().tell(raftActor.newLeaderStateChanged(context.getId(), null,
76                     currentBehavior.getLeaderPayloadVersion()), raftActor.self());
77         }
78
79         LeaderTransitioning leaderTransitioning = new LeaderTransitioning();
80         for(String peerId: context.getPeerIds()) {
81             ActorSelection followerActor = context.getPeerActorSelection(peerId);
82             if(followerActor != null) {
83                 followerActor.tell(leaderTransitioning, context.getActor());
84             }
85         }
86
87         raftActor.pauseLeader(new TimedRunnable(context.getConfigParams().getElectionTimeOutInterval(), raftActor) {
88             @Override
89             protected void doRun() {
90                 doTransfer();
91             }
92
93             @Override
94             protected void doCancel() {
95                 LOG.debug("{}: pauseLeader timed out - aborting transfer", raftActor.persistenceId());
96                 abortTransfer();
97             }
98         });
99     }
100
101     /**
102      * This method is invoked to perform the leadership transfer.
103      */
104     @VisibleForTesting
105     void doTransfer() {
106         RaftActorBehavior behavior = raftActor.getCurrentBehavior();
107         // Sanity check...
108         if(behavior instanceof Leader) {
109             isTransferring = true;
110             ((Leader)behavior).transferLeadership(this);
111         } else {
112             LOG.debug("{}: No longer the leader - skipping transfer", raftActor.persistenceId());
113             finish(true);
114         }
115     }
116
117     /**
118      * This method is invoked to abort leadership transfer on failure.
119      */
120     public void abortTransfer() {
121         LOG.debug("{}: leader transfer aborted", raftActor.persistenceId());
122         finish(false);
123     }
124
125     /**
126      * This method is invoked when leadership transfer was carried out and complete.
127      */
128     public void transferComplete() {
129         LOG.debug("{}: leader transfer complete - waiting for new leader", raftActor.persistenceId());
130
131         // We'll give it a little time for the new leader to be elected to give the derived class a
132         // chance to possibly complete work that was suspended while we were transferring. The
133         // RequestVote message from the new leader candidate should cause us to step down as leader
134         // and convert to follower due to higher term. We should then get an AppendEntries heart
135         // beat with the new leader id.
136
137         // Add a timer in case we don't get a leader change - 2 sec should be plenty of time if a new
138         // leader is elected. Note: the Runnable is sent as a message to the raftActor which executes it
139         // safely run on the actor's thread dispatcher.
140         FiniteDuration timeout = FiniteDuration.create(newLeaderTimeoutInMillis, TimeUnit.MILLISECONDS);
141         newLeaderTimer = raftActor.getContext().system().scheduler().scheduleOnce(timeout, raftActor.self(),
142                 new Runnable() {
143                     @Override
144                     public void run() {
145                         LOG.debug("{}: leader not elected in time", raftActor.persistenceId());
146                         finish(true);
147                     }
148                 }, raftActor.getContext().system().dispatcher(), raftActor.self());
149     }
150
151     void onNewLeader(String newLeader) {
152         if(newLeader != null && newLeaderTimer != null) {
153             LOG.debug("{}: leader changed to {}", raftActor.persistenceId(), newLeader);
154             newLeaderTimer.cancel();
155             finish(true);
156         }
157     }
158
159     private void finish(boolean success) {
160         isTransferring = false;
161         if(transferTimer.isRunning()) {
162             transferTimer.stop();
163             if(success) {
164                 LOG.info("{}: Successfully transferred leadership to {} in {}", raftActor.persistenceId(),
165                         raftActor.getLeaderId(), transferTimer.toString());
166             } else {
167                 LOG.warn("{}: Failed to transfer leadership in {}", raftActor.persistenceId(),
168                         transferTimer.toString());
169             }
170         }
171
172         for(OnComplete onComplete: onCompleteCallbacks) {
173             if(success) {
174                 onComplete.onSuccess(raftActor.self(), replyTo);
175             } else {
176                 onComplete.onFailure(raftActor.self(), replyTo);
177             }
178         }
179     }
180
181     void addOnComplete(OnComplete onComplete) {
182         onCompleteCallbacks.add(onComplete);
183     }
184
185     boolean isTransferring() {
186         return isTransferring;
187     }
188
189     @VisibleForTesting
190     void setNewLeaderTimeoutInMillis(long newLeaderTimeoutInMillis) {
191         this.newLeaderTimeoutInMillis = newLeaderTimeoutInMillis;
192     }
193
194     interface OnComplete {
195         void onSuccess(ActorRef raftActorRef, ActorRef replyTo);
196         void onFailure(ActorRef raftActorRef, ActorRef replyTo);
197     }
198 }