From 8cf40f4741c70a760dadb4300946c1dc88f95611 Mon Sep 17 00:00:00 2001 From: Kamal Rameshan Date: Wed, 12 Nov 2014 16:00:34 -0800 Subject: [PATCH] Bug-2277 : Isolated Leader Implementation A new RaftState has been added and on a scheduler, we check if the leader is isolated, looking at the majority peer statuses. If Yes, then it switches the behavior to IsolatedLeader. On the receipt of each AppendEntriesReply, the IsolatedLeader checks for isolation. And if no, then switches back to either Leader or Follower. These changes have been tested with TestDriver and 3 node cluster and the switching of Leader to IsolatedLeader and back, on the stopping and reinstating of followers, was successful. The Isolated Leader check interval can be configured via the config subsystem Change-Id: I42b9165cc477d812c7e0e02339537c0f1fe74934 Signed-off-by: Kamal Rameshan --- .../cluster/example/ExampleActor.java | 2 +- .../cluster/example/TestDriver.java | 5 + .../controller/cluster/raft/ConfigParams.java | 6 + .../cluster/raft/DefaultConfigParamsImpl.java | 11 + .../controller/cluster/raft/RaftState.java | 3 +- .../base/messages/IsolatedLeaderCheck.java | 15 + .../raft/behaviors/AbstractLeader.java | 738 ++++++++++++++++++ .../behaviors/AbstractRaftActorBehavior.java | 25 +- .../cluster/raft/behaviors/Candidate.java | 20 +- .../raft/behaviors/IsolatedLeader.java | 52 ++ .../cluster/raft/behaviors/Leader.java | 687 +--------------- .../raft/behaviors/IsolatedLeaderTest.java | 141 ++++ .../cluster/raft/behaviors/LeaderTest.java | 94 ++- .../cluster/datastore/DatastoreContext.java | 9 +- ...tributedConfigDataStoreProviderModule.java | 2 + ...tedOperationalDataStoreProviderModule.java | 2 + .../yang/distributed-datastore-provider.yang | 7 + 17 files changed, 1126 insertions(+), 693 deletions(-) create mode 100644 opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/IsolatedLeaderCheck.java create mode 100644 opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java create mode 100644 opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/IsolatedLeader.java create mode 100644 opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/IsolatedLeaderTest.java diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/example/ExampleActor.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/example/ExampleActor.java index 8e4a44cf20..6dfa4afd6b 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/example/ExampleActor.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/example/ExampleActor.java @@ -80,7 +80,7 @@ public class ExampleActor extends RaftActor { } else if (message instanceof PrintRole) { if(LOG.isDebugEnabled()) { String followers = ""; - if (getRaftState() == RaftState.Leader) { + if (getRaftState() == RaftState.Leader || getRaftState() == RaftState.IsolatedLeader) { followers = ((Leader)this.getCurrentBehavior()).printFollowerStates(); LOG.debug("{} = {}, Peers={}, followers={}", getId(), getRaftState(), getPeers(), followers); } else { diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/example/TestDriver.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/example/TestDriver.java index f202a8bb1d..de6169791e 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/example/TestDriver.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/example/TestDriver.java @@ -44,6 +44,11 @@ public class TestDriver { * stopLoggingForClient:{nodeName} * printNodes * printState + * + * Note: when run on IDE and on debug log level, the debug logs in + * AbstractUptypedActor and AbstractUptypedPersistentActor would need to be commented out. + * Also RaftActor handleCommand(), debug log which prints for every command other than AE/AER + * * @param args * @throws Exception */ diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/ConfigParams.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/ConfigParams.java index bff2a27797..433c3f7e4b 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/ConfigParams.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/ConfigParams.java @@ -62,4 +62,10 @@ public interface ConfigParams { * The number of journal log entries to batch on recovery before applying. */ int getJournalRecoveryLogBatchSize(); + + /** + * The interval in which the leader needs to check itself if its isolated + * @return FiniteDuration + */ + FiniteDuration getIsolatedCheckInterval(); } diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/DefaultConfigParamsImpl.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/DefaultConfigParamsImpl.java index dc4145358a..a2092234d5 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/DefaultConfigParamsImpl.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/DefaultConfigParamsImpl.java @@ -44,6 +44,8 @@ public class DefaultConfigParamsImpl implements ConfigParams { private FiniteDuration heartBeatInterval = HEART_BEAT_INTERVAL; private long snapshotBatchCount = SNAPSHOT_BATCH_COUNT; private int journalRecoveryLogBatchSize = JOURNAL_RECOVERY_LOG_BATCH_SIZE; + private FiniteDuration isolatedLeaderCheckInterval = + new FiniteDuration(HEART_BEAT_INTERVAL.length() * 1000, HEART_BEAT_INTERVAL.unit()); public void setHeartBeatInterval(FiniteDuration heartBeatInterval) { this.heartBeatInterval = heartBeatInterval; @@ -57,6 +59,10 @@ public class DefaultConfigParamsImpl implements ConfigParams { this.journalRecoveryLogBatchSize = journalRecoveryLogBatchSize; } + public void setIsolatedLeaderCheckInterval(FiniteDuration isolatedLeaderCheckInterval) { + this.isolatedLeaderCheckInterval = isolatedLeaderCheckInterval; + } + @Override public long getSnapshotBatchCount() { return snapshotBatchCount; @@ -87,4 +93,9 @@ public class DefaultConfigParamsImpl implements ConfigParams { public int getJournalRecoveryLogBatchSize() { return journalRecoveryLogBatchSize; } + + @Override + public FiniteDuration getIsolatedCheckInterval() { + return isolatedLeaderCheckInterval; + } } diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftState.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftState.java index 65114eb659..216ad4103d 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftState.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftState.java @@ -3,5 +3,6 @@ package org.opendaylight.controller.cluster.raft; public enum RaftState { Candidate, Follower, - Leader + Leader, + IsolatedLeader; } diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/IsolatedLeaderCheck.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/IsolatedLeaderCheck.java new file mode 100644 index 0000000000..36fd813664 --- /dev/null +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/IsolatedLeaderCheck.java @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2014 Cisco Systems, Inc. and others. All rights reserved. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v1.0 which accompanies this distribution, + * and is available at http://www.eclipse.org/legal/epl-v10.html + */ +package org.opendaylight.controller.cluster.raft.base.messages; + +/** + * Message sent by the IsolatedLeaderCheck scheduler in the Leader to itself + * in order to check if its isolated. + */ +public class IsolatedLeaderCheck { +} diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java new file mode 100644 index 0000000000..d85ac8ef67 --- /dev/null +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java @@ -0,0 +1,738 @@ +/* + * Copyright (c) 2014 Cisco Systems, Inc. and others. All rights reserved. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v1.0 which accompanies this distribution, + * and is available at http://www.eclipse.org/legal/epl-v10.html + */ + +package org.opendaylight.controller.cluster.raft.behaviors; + +import akka.actor.ActorRef; +import akka.actor.ActorSelection; +import akka.actor.Cancellable; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; +import com.google.protobuf.ByteString; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import org.opendaylight.controller.cluster.raft.ClientRequestTracker; +import org.opendaylight.controller.cluster.raft.ClientRequestTrackerImpl; +import org.opendaylight.controller.cluster.raft.FollowerLogInformation; +import org.opendaylight.controller.cluster.raft.FollowerLogInformationImpl; +import org.opendaylight.controller.cluster.raft.RaftActorContext; +import org.opendaylight.controller.cluster.raft.RaftState; +import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry; +import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot; +import org.opendaylight.controller.cluster.raft.base.messages.InitiateInstallSnapshot; +import org.opendaylight.controller.cluster.raft.base.messages.Replicate; +import org.opendaylight.controller.cluster.raft.base.messages.SendHeartBeat; +import org.opendaylight.controller.cluster.raft.base.messages.SendInstallSnapshot; +import org.opendaylight.controller.cluster.raft.messages.AppendEntries; +import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply; +import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot; +import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply; +import org.opendaylight.controller.cluster.raft.messages.RaftRPC; +import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply; +import scala.concurrent.duration.FiniteDuration; + +/** + * The behavior of a RaftActor when it is in the Leader state + *

+ * Leaders: + *