import java.util.LinkedList;
import java.util.Queue;
import java.util.UUID;
-import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
import org.opendaylight.controller.cluster.raft.base.messages.SnapshotComplete;
import org.opendaylight.controller.cluster.raft.behaviors.AbstractLeader;
import org.opendaylight.controller.cluster.raft.messages.AddServer;
import org.opendaylight.controller.cluster.raft.messages.AddServerReply;
-import org.opendaylight.controller.cluster.raft.messages.FollowerCatchUpTimeout;
import org.opendaylight.controller.cluster.raft.messages.RemoveServer;
import org.opendaylight.controller.cluster.raft.messages.RemoveServerReply;
import org.opendaylight.controller.cluster.raft.messages.ServerChangeStatus;
import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import scala.concurrent.duration.FiniteDuration;
/**
* Handles server configuration related messages for a RaftActor.
} else if(message instanceof RemoveServer) {
onRemoveServer((RemoveServer) message, raftActor, sender);
return true;
- } else if (message instanceof FollowerCatchUpTimeout) {
- currentOperationState.onFollowerCatchupTimeout(raftActor, (FollowerCatchUpTimeout) message);
+ } else if (message instanceof ServerOperationTimeout) {
+ currentOperationState.onServerOperationTimeout(raftActor, (ServerOperationTimeout) message);
return true;
} else if (message instanceof UnInitializedFollowerSnapshotReply) {
currentOperationState.onUnInitializedFollowerSnapshotReply(raftActor,
}
private void onRemoveServer(RemoveServer removeServer, RaftActor raftActor, ActorRef sender) {
+ LOG.debug("{}: onRemoveServer: {}, state: {}", raftContext.getId(), removeServer, currentOperationState);
if(removeServer.getServerId().equals(raftActor.getLeaderId())){
// Removing current leader is not supported yet
// TODO: To properly support current leader removal we need to first implement transfer of leadership
* </ul>
*/
private void onAddServer(AddServer addServer, RaftActor raftActor, ActorRef sender) {
- LOG.debug("{}: onAddServer: {}", raftContext.getId(), addServer);
+ LOG.debug("{}: onAddServer: {}, state: {}", raftContext.getId(), addServer, currentOperationState);
onNewOperation(raftActor, new AddServerContext(addServer, sender));
}
private interface OperationState {
void onNewOperation(RaftActor raftActor, ServerOperationContext<?> operationContext);
- void onFollowerCatchupTimeout(RaftActor raftActor, FollowerCatchUpTimeout followerTimeout);
+ void onServerOperationTimeout(RaftActor raftActor, ServerOperationTimeout timeout);
void onUnInitializedFollowerSnapshotReply(RaftActor raftActor, UnInitializedFollowerSnapshotReply reply);
}
/**
- * Abstract base class for server operation FSM state. Handles common behavior for all states.
+ * Abstract base class for a server operation FSM state. Handles common behavior for all states.
*/
private abstract class AbstractOperationState implements OperationState {
@Override
}
@Override
- public void onFollowerCatchupTimeout(RaftActor raftActor, FollowerCatchUpTimeout followerTimeout) {
- LOG.debug("onFollowerCatchupTimeout should not be called in state {}", this);
+ public void onServerOperationTimeout(RaftActor raftActor, ServerOperationTimeout timeout) {
+ LOG.debug("onServerOperationTimeout should not be called in state {}", this);
}
@Override
raftActor.persistData(operationContext.getClientRequestor(), operationContext.getContextId(), payload);
- currentOperationState = new Persisting(operationContext);
+ currentOperationState = new Persisting(operationContext, newTimer(
+ new ServerOperationTimeout(operationContext.getServerId())));
sendReply(raftActor, operationContext, ServerChangeStatus.OK);
}
}
}
- private void sendReply(RaftActor raftActor, ServerOperationContext<?> operationContext,
+ protected void sendReply(RaftActor raftActor, ServerOperationContext<?> operationContext,
ServerChangeStatus status) {
LOG.debug("{}: Returning {} for operation {}", raftContext.getId(), status, operationContext.getOperation());
raftActor.self());
}
+ Cancellable newTimer(Object message) {
+ return raftContext.getActorSystem().scheduler().scheduleOnce(
+ raftContext.getConfigParams().getElectionTimeOutInterval().$times(2), raftContext.getActor(),
+ message, raftContext.getActorSystem().dispatcher(), raftContext.getActor());
+ }
+
@Override
public String toString() {
return getClass().getSimpleName();
*/
private class Persisting extends AbstractOperationState {
private final ServerOperationContext<?> operationContext;
+ private final Cancellable timer;
+ private boolean timedOut = false;
- Persisting(ServerOperationContext<?> operationContext) {
+ Persisting(ServerOperationContext<?> operationContext, Cancellable timer) {
this.operationContext = operationContext;
+ this.timer = timer;
}
@Override
LOG.info("{}: {} has been successfully replicated to a majority of followers", raftActor.getId(),
applyState.getReplicatedLogEntry().getData());
+ timer.cancel();
operationComplete(raftActor, operationContext, null);
}
}
+
+ @Override
+ public void onServerOperationTimeout(RaftActor raftActor, ServerOperationTimeout timeout) {
+ LOG.warn("{}: Timeout occured while replicating the new server configuration for {}", raftContext.getId(),
+ timeout.getServerId());
+
+ timedOut = true;
+
+ // Fail any pending operations
+ ServerOperationContext<?> nextOperation = pendingOperationsQueue.poll();
+ while(nextOperation != null) {
+ sendReply(raftActor, nextOperation, ServerChangeStatus.PRIOR_REQUEST_CONSENSUS_TIMEOUT);
+ nextOperation = pendingOperationsQueue.poll();
+ }
+ }
+
+ @Override
+ public void onNewOperation(RaftActor raftActor, ServerOperationContext<?> operationContext) {
+ if(timedOut) {
+ sendReply(raftActor, operationContext, ServerChangeStatus.PRIOR_REQUEST_CONSENSUS_TIMEOUT);
+ } else {
+ super.onNewOperation(raftActor, operationContext);
+ }
+ }
}
/**
}
Cancellable newInstallSnapshotTimer(RaftActor raftActor) {
- return raftContext.getActorSystem().scheduler().scheduleOnce(
- new FiniteDuration(((raftContext.getConfigParams().getElectionTimeOutInterval().toMillis()) * 2),
- TimeUnit.MILLISECONDS), raftContext.getActor(),
- new FollowerCatchUpTimeout(addServerContext.getOperation().getNewServerId()),
- raftContext.getActorSystem().dispatcher(), raftContext.getActor());
+ return newTimer(new ServerOperationTimeout(addServerContext.getOperation().getNewServerId()));
}
- void handleOnFollowerCatchupTimeout(RaftActor raftActor, FollowerCatchUpTimeout followerTimeout) {
- String serverId = followerTimeout.getNewServerId();
+ void handleInstallSnapshotTimeout(RaftActor raftActor, ServerOperationTimeout timeout) {
+ String serverId = timeout.getServerId();
- LOG.debug("{}: onFollowerCatchupTimeout for new server {}", raftContext.getId(), serverId);
+ LOG.debug("{}: handleInstallSnapshotTimeout for new server {}", raftContext.getId(), serverId);
// cleanup
raftContext.removePeer(serverId);
}
@Override
- public void onFollowerCatchupTimeout(RaftActor raftActor, FollowerCatchUpTimeout followerTimeout) {
- handleOnFollowerCatchupTimeout(raftActor, followerTimeout);
+ public void onServerOperationTimeout(RaftActor raftActor, ServerOperationTimeout timeout) {
+ handleInstallSnapshotTimeout(raftActor, timeout);
LOG.warn("{}: Timeout occured for new server {} while installing snapshot", raftContext.getId(),
- followerTimeout.getNewServerId());
+ timeout.getServerId());
}
@Override
}
@Override
- public void onFollowerCatchupTimeout(RaftActor raftActor, FollowerCatchUpTimeout followerTimeout) {
- handleOnFollowerCatchupTimeout(raftActor, followerTimeout);
+ public void onServerOperationTimeout(RaftActor raftActor, ServerOperationTimeout timeout) {
+ handleInstallSnapshotTimeout(raftActor, timeout);
LOG.warn("{}: Timeout occured for new server {} while waiting for prior snapshot to complete",
- raftContext.getId(), followerTimeout.getNewServerId());
+ raftContext.getId(), timeout.getServerId());
}
}
abstract InitialOperationState newInitialOperationState(RaftActorServerConfigurationSupport support);
abstract void operationComplete(RaftActor raftActor, ServerChangeStatus serverChangeStatus);
+
+ abstract String getServerId();
}
/**
void operationComplete(RaftActor raftActor, ServerChangeStatus serverChangeStatus) {
}
+
+ @Override
+ String getServerId() {
+ return getOperation().getNewServerId();
+ }
}
private abstract class RemoveServerState extends AbstractOperationState {
public RemoveServerContext getRemoveServerContext() {
return removeServerContext;
}
-
}
private class InitialRemoveServerState extends RemoveServerState implements InitialOperationState{
}
}
+ @Override
+ String getServerId() {
+ return getOperation().getServerId();
+ }
+ }
+
+ static class ServerOperationTimeout {
+ private final String serverId;
+
+ ServerOperationTimeout(String serverId){
+ this.serverId = Preconditions.checkNotNull(serverId, "serverId should not be null");
+ }
+
+ String getServerId() {
+ return serverId;
+ }
}
}
import org.opendaylight.controller.cluster.raft.messages.AddServer;
import org.opendaylight.controller.cluster.raft.messages.AddServerReply;
import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
-import org.opendaylight.controller.cluster.raft.messages.FollowerCatchUpTimeout;
import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
import org.opendaylight.controller.cluster.raft.messages.RemoveServer;
import org.opendaylight.controller.cluster.raft.messages.RemoveServerReply;
// Complete the prior snapshot - this should be a no-op b/c it's no longer the leader
leaderActor.tell(commitMsg, leaderActor);
- leaderActor.tell(new FollowerCatchUpTimeout(NEW_SERVER_ID), leaderActor);
+ leaderActor.tell(new RaftActorServerConfigurationSupport.ServerOperationTimeout(NEW_SERVER_ID), leaderActor);
AddServerReply addServerReply = testKit.expectMsgClass(JavaTestKit.duration("5 seconds"), AddServerReply.class);
assertEquals("getStatus", ServerChangeStatus.NO_LEADER, addServerReply.getStatus());
MockLeaderRaftActor leaderRaftActor = leaderActor.underlyingActor();
RaftActorContext leaderActorContext = leaderRaftActor.getRaftActorContext();
- newFollowerRaftActor.underlyingActor().setDropMessageOfType(AppendEntries.class);
+ TestActorRef<MessageCollectorActor> leaderCollectorActor = newLeaderCollectorActor(leaderRaftActor);
+
+ // Drop UnInitializedFollowerSnapshotReply initially
+ leaderRaftActor.setDropMessageOfType(UnInitializedFollowerSnapshotReply.class);
+
+ MockNewFollowerRaftActor newFollowerRaftActorInstance = newFollowerRaftActor.underlyingActor();
+ TestActorRef<MessageCollectorActor> newFollowerCollectorActor =
+ newCollectorActor(newFollowerRaftActorInstance, NEW_SERVER_ID);
+
+ // Drop AppendEntries to the new follower so consensus isn't reached
+ newFollowerRaftActorInstance.setDropMessageOfType(AppendEntries.class);
leaderActor.tell(new AddServer(NEW_SERVER_ID, newFollowerRaftActor.path().toString(), true), testKit.getRef());
+ // Capture the UnInitializedFollowerSnapshotReply
+ Object snapshotReply = expectFirstMatching(leaderCollectorActor, UnInitializedFollowerSnapshotReply.class);
+
+ // Send the UnInitializedFollowerSnapshotReply to resume the first request
+ leaderRaftActor.setDropMessageOfType(null);
+ leaderActor.tell(snapshotReply, leaderActor);
+
+ expectFirstMatching(newFollowerCollectorActor, AppendEntries.class);
+
+ // Send a second AddServer
+ leaderActor.tell(new AddServer(NEW_SERVER_ID2, "", false), testKit.getRef());
+
+ // The first AddServer should succeed with OK even though consensus wasn't reached
AddServerReply addServerReply = testKit.expectMsgClass(JavaTestKit.duration("5 seconds"), AddServerReply.class);
assertEquals("getStatus", ServerChangeStatus.OK, addServerReply.getStatus());
assertEquals("getLeaderHint", LEADER_ID, addServerReply.getLeaderHint());
// Verify ServerConfigurationPayload entry in leader's log
-
verifyServerConfigurationPayloadEntry(leaderActorContext.getReplicatedLog(), votingServer(LEADER_ID),
votingServer(NEW_SERVER_ID));
+
+ // The second AddServer should fail since consensus wasn't reached for the first
+ addServerReply = testKit.expectMsgClass(JavaTestKit.duration("5 seconds"), AddServerReply.class);
+ assertEquals("getStatus", ServerChangeStatus.PRIOR_REQUEST_CONSENSUS_TIMEOUT, addServerReply.getStatus());
+
+ // Re-send the second AddServer - should also fail
+ leaderActor.tell(new AddServer(NEW_SERVER_ID2, "", false), testKit.getRef());
+ addServerReply = testKit.expectMsgClass(JavaTestKit.duration("5 seconds"), AddServerReply.class);
+ assertEquals("getStatus", ServerChangeStatus.PRIOR_REQUEST_CONSENSUS_TIMEOUT, addServerReply.getStatus());
}
@Test
}
private TestActorRef<MessageCollectorActor> newLeaderCollectorActor(MockLeaderRaftActor leaderRaftActor) {
- TestActorRef<MessageCollectorActor> leaderCollectorActor = actorFactory.createTestActor(
+ return newCollectorActor(leaderRaftActor, LEADER_ID);
+ }
+
+ private TestActorRef<MessageCollectorActor> newCollectorActor(AbstractMockRaftActor raftActor, String id) {
+ TestActorRef<MessageCollectorActor> collectorActor = actorFactory.createTestActor(
MessageCollectorActor.props().withDispatcher(Dispatchers.DefaultDispatcherId()),
- actorFactory.generateActorId(LEADER_ID + "Collector"));
- leaderRaftActor.setCollectorActor(leaderCollectorActor);
- return leaderCollectorActor;
+ actorFactory.generateActorId(id + "Collector"));
+ raftActor.setCollectorActor(collectorActor);
+ return collectorActor;
}
private static void verifyServerConfigurationPayloadEntry(ReplicatedLog log, ServerInfo... expected) {