Keeping the Heartbeat interval small and increasing the timeout
factor allows us to skip a few heartbeats if neccessary without
causing Followers to become Candidates.
Simply increasing the heartbeat interval has the negative effect
of causing replication messages for out-of date followers to go
slower so making the election factor configurable will be helpful
in keeping the cluster state stable.
Change-Id: Iae8105d65bba4a37987bfddb9f22d9d4d862a1fd
Signed-off-by: Moiz Raja <moraja@cisco.com>
* @return FiniteDuration
*/
FiniteDuration getIsolatedCheckInterval();
* @return FiniteDuration
*/
FiniteDuration getIsolatedCheckInterval();
+
+
+ /**
+ * The multiplication factor to be used to determine shard election timeout. The election timeout
+ * is determined by multiplying the election timeout factor with the heartbeat duration.
+ */
+ long getElectionTimeoutFactor();
+
public static final FiniteDuration HEART_BEAT_INTERVAL =
new FiniteDuration(100, TimeUnit.MILLISECONDS);
public static final FiniteDuration HEART_BEAT_INTERVAL =
new FiniteDuration(100, TimeUnit.MILLISECONDS);
private FiniteDuration heartBeatInterval = HEART_BEAT_INTERVAL;
private long snapshotBatchCount = SNAPSHOT_BATCH_COUNT;
private int journalRecoveryLogBatchSize = JOURNAL_RECOVERY_LOG_BATCH_SIZE;
private FiniteDuration heartBeatInterval = HEART_BEAT_INTERVAL;
private long snapshotBatchCount = SNAPSHOT_BATCH_COUNT;
private int journalRecoveryLogBatchSize = JOURNAL_RECOVERY_LOG_BATCH_SIZE;
// in-memory journal can use before it needs to snapshot
private int snapshotDataThresholdPercentage = 12;
// in-memory journal can use before it needs to snapshot
private int snapshotDataThresholdPercentage = 12;
+ private long electionTimeoutFactor = 2;
+
public void setHeartBeatInterval(FiniteDuration heartBeatInterval) {
this.heartBeatInterval = heartBeatInterval;
}
public void setHeartBeatInterval(FiniteDuration heartBeatInterval) {
this.heartBeatInterval = heartBeatInterval;
}
this.isolatedLeaderCheckInterval = isolatedLeaderCheckInterval;
}
this.isolatedLeaderCheckInterval = isolatedLeaderCheckInterval;
}
+ public void setElectionTimeoutFactor(long electionTimeoutFactor){
+ this.electionTimeoutFactor = electionTimeoutFactor;
+ }
+
@Override
public long getSnapshotBatchCount() {
return snapshotBatchCount;
@Override
public long getSnapshotBatchCount() {
return snapshotBatchCount;
@Override
public FiniteDuration getElectionTimeOutInterval() {
@Override
public FiniteDuration getElectionTimeOutInterval() {
- // returns 2 times the heart beat interval
- return getHeartBeatInterval().$times(2);
+ return getHeartBeatInterval().$times(electionTimeoutFactor);
public FiniteDuration getIsolatedCheckInterval() {
return isolatedLeaderCheckInterval;
}
public FiniteDuration getIsolatedCheckInterval() {
return isolatedLeaderCheckInterval;
}
+
+ @Override
+ public long getElectionTimeoutFactor() {
+ return electionTimeoutFactor;
+ }
- RaftActorBehavior oldBehavior = currentBehavior;
- currentBehavior = new Follower(context);
- handleBehaviorChange(oldBehavior, currentBehavior);
replicatedLog.lastIndex(), replicatedLog.snapshotIndex,
replicatedLog.snapshotTerm, replicatedLog.size());
replicatedLog.lastIndex(), replicatedLog.snapshotIndex,
replicatedLog.snapshotTerm, replicatedLog.size());
+ initializeBehavior();
+ }
+
+ protected void initializeBehavior(){
+ changeCurrentBehavior(new Follower(context));
+ }
+
+ protected void changeCurrentBehavior(RaftActorBehavior newBehavior){
RaftActorBehavior oldBehavior = currentBehavior;
RaftActorBehavior oldBehavior = currentBehavior;
- currentBehavior = new Follower(context);
+ currentBehavior = newBehavior;
handleBehaviorChange(oldBehavior, currentBehavior);
}
handleBehaviorChange(oldBehavior, currentBehavior);
}
import akka.actor.ActorRef;
import akka.actor.Cancellable;
import akka.event.LoggingAdapter;
import akka.actor.ActorRef;
import akka.actor.Cancellable;
import akka.event.LoggingAdapter;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
import org.opendaylight.controller.cluster.raft.ClientRequestTracker;
import org.opendaylight.controller.cluster.raft.RaftActorContext;
import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
import org.opendaylight.controller.cluster.raft.ClientRequestTracker;
import org.opendaylight.controller.cluster.raft.RaftActorContext;
import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
import scala.concurrent.duration.FiniteDuration;
import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
import scala.concurrent.duration.FiniteDuration;
-import java.util.Random;
-import java.util.concurrent.TimeUnit;
-
/**
* Abstract class that represents the behavior of a RaftActor
* <p/>
/**
* Abstract class that represents the behavior of a RaftActor
* <p/>
protected FiniteDuration electionDuration() {
long variance = new Random().nextInt(context.getConfigParams().getElectionTimeVariance());
return context.getConfigParams().getElectionTimeOutInterval().$plus(
protected FiniteDuration electionDuration() {
long variance = new Random().nextInt(context.getConfigParams().getElectionTimeVariance());
return context.getConfigParams().getElectionTimeOutInterval().$plus(
- new FiniteDuration(variance, TimeUnit.MILLISECONDS));
+ new FiniteDuration(variance, TimeUnit.MILLISECONDS));
private final Timeout shardLeaderElectionTimeout;
private final boolean persistent;
private final ConfigurationReader configurationReader;
private final Timeout shardLeaderElectionTimeout;
private final boolean persistent;
private final ConfigurationReader configurationReader;
+ private final long shardElectionTimeoutFactor;
private DatastoreContext(InMemoryDOMDataStoreConfigProperties dataStoreProperties,
ConfigParams shardRaftConfig, String dataStoreMXBeanType, int operationTimeoutInSeconds,
Duration shardTransactionIdleTimeout, int shardTransactionCommitTimeoutInSeconds,
int shardTransactionCommitQueueCapacity, Timeout shardInitializationTimeout,
Timeout shardLeaderElectionTimeout,
private DatastoreContext(InMemoryDOMDataStoreConfigProperties dataStoreProperties,
ConfigParams shardRaftConfig, String dataStoreMXBeanType, int operationTimeoutInSeconds,
Duration shardTransactionIdleTimeout, int shardTransactionCommitTimeoutInSeconds,
int shardTransactionCommitQueueCapacity, Timeout shardInitializationTimeout,
Timeout shardLeaderElectionTimeout,
- boolean persistent, ConfigurationReader configurationReader) {
+ boolean persistent, ConfigurationReader configurationReader, long shardElectionTimeoutFactor) {
this.dataStoreProperties = dataStoreProperties;
this.shardRaftConfig = shardRaftConfig;
this.dataStoreMXBeanType = dataStoreMXBeanType;
this.dataStoreProperties = dataStoreProperties;
this.shardRaftConfig = shardRaftConfig;
this.dataStoreMXBeanType = dataStoreMXBeanType;
this.shardLeaderElectionTimeout = shardLeaderElectionTimeout;
this.persistent = persistent;
this.configurationReader = configurationReader;
this.shardLeaderElectionTimeout = shardLeaderElectionTimeout;
this.persistent = persistent;
this.configurationReader = configurationReader;
+ this.shardElectionTimeoutFactor = shardElectionTimeoutFactor;
}
public static Builder newBuilder() {
}
public static Builder newBuilder() {
return configurationReader;
}
return configurationReader;
}
+ public long getShardElectionTimeoutFactor(){
+ return this.shardElectionTimeoutFactor;
+ }
+
public static class Builder {
private InMemoryDOMDataStoreConfigProperties dataStoreProperties;
private Duration shardTransactionIdleTimeout = Duration.create(10, TimeUnit.MINUTES);
public static class Builder {
private InMemoryDOMDataStoreConfigProperties dataStoreProperties;
private Duration shardTransactionIdleTimeout = Duration.create(10, TimeUnit.MINUTES);
private ConfigurationReader configurationReader = new FileConfigurationReader();
private int shardIsolatedLeaderCheckIntervalInMillis = shardHeartbeatIntervalInMillis * 10;
private int shardSnapshotDataThresholdPercentage = 12;
private ConfigurationReader configurationReader = new FileConfigurationReader();
private int shardIsolatedLeaderCheckIntervalInMillis = shardHeartbeatIntervalInMillis * 10;
private int shardSnapshotDataThresholdPercentage = 12;
+ private long shardElectionTimeoutFactor = 2;
public Builder shardTransactionIdleTimeout(Duration shardTransactionIdleTimeout) {
this.shardTransactionIdleTimeout = shardTransactionIdleTimeout;
public Builder shardTransactionIdleTimeout(Duration shardTransactionIdleTimeout) {
this.shardTransactionIdleTimeout = shardTransactionIdleTimeout;
+ public Builder shardElectionTimeoutFactor(long shardElectionTimeoutFactor){
+ this.shardElectionTimeoutFactor = shardElectionTimeoutFactor;
+ return this;
+ }
+
public DatastoreContext build() {
DefaultConfigParamsImpl raftConfig = new DefaultConfigParamsImpl();
public DatastoreContext build() {
DefaultConfigParamsImpl raftConfig = new DefaultConfigParamsImpl();
raftConfig.setJournalRecoveryLogBatchSize(shardJournalRecoveryLogBatchSize);
raftConfig.setSnapshotBatchCount(shardSnapshotBatchCount);
raftConfig.setSnapshotDataThresholdPercentage(shardSnapshotDataThresholdPercentage);
raftConfig.setJournalRecoveryLogBatchSize(shardJournalRecoveryLogBatchSize);
raftConfig.setSnapshotBatchCount(shardSnapshotBatchCount);
raftConfig.setSnapshotDataThresholdPercentage(shardSnapshotDataThresholdPercentage);
+ raftConfig.setElectionTimeoutFactor(shardElectionTimeoutFactor);
raftConfig.setIsolatedLeaderCheckInterval(
new FiniteDuration(shardIsolatedLeaderCheckIntervalInMillis, TimeUnit.MILLISECONDS));
raftConfig.setIsolatedLeaderCheckInterval(
new FiniteDuration(shardIsolatedLeaderCheckIntervalInMillis, TimeUnit.MILLISECONDS));
operationTimeoutInSeconds, shardTransactionIdleTimeout,
shardTransactionCommitTimeoutInSeconds, shardTransactionCommitQueueCapacity,
shardInitializationTimeout, shardLeaderElectionTimeout,
operationTimeoutInSeconds, shardTransactionIdleTimeout,
shardTransactionCommitTimeoutInSeconds, shardTransactionCommitQueueCapacity,
shardInitializationTimeout, shardLeaderElectionTimeout,
- persistent, configurationReader);
+ persistent, configurationReader, shardElectionTimeoutFactor);
.persistent(props.getPersistent().booleanValue())
.shardIsolatedLeaderCheckIntervalInMillis(
props.getShardIsolatedLeaderCheckIntervalInMillis().getValue())
.persistent(props.getPersistent().booleanValue())
.shardIsolatedLeaderCheckIntervalInMillis(
props.getShardIsolatedLeaderCheckIntervalInMillis().getValue())
+ .shardElectionTimeoutFactor(props.getShardElectionTimeoutFactor().getValue())
.build();
return DistributedDataStoreFactory.createInstance("config", getConfigSchemaServiceDependency(),
.build();
return DistributedDataStoreFactory.createInstance("config", getConfigSchemaServiceDependency(),
.persistent(props.getPersistent().booleanValue())
.shardIsolatedLeaderCheckIntervalInMillis(
props.getShardIsolatedLeaderCheckIntervalInMillis().getValue())
.persistent(props.getPersistent().booleanValue())
.shardIsolatedLeaderCheckIntervalInMillis(
props.getShardIsolatedLeaderCheckIntervalInMillis().getValue())
+ .shardElectionTimeoutFactor(props.getShardElectionTimeoutFactor().getValue())
.build();
return DistributedDataStoreFactory.createInstance("operational",
.build();
return DistributedDataStoreFactory.createInstance("operational",
description "The interval at which a shard will send a heart beat message to its remote shard.";
}
description "The interval at which a shard will send a heart beat message to its remote shard.";
}
+ leaf shard-election-timeout-factor {
+ default 2;
+ type non-zero-uint32-type;
+ description "The multiplication factor to be used to determine shard election timeout. The shard election timeout
+ is determined by multiplying shard-heartbeat-interval-in-millis with the shard-election-timeout-factor";
+ }
+
leaf operation-timeout-in-seconds {
default 5;
type operation-timeout-type;
leaf operation-timeout-in-seconds {
default 5;
type operation-timeout-type;