Merge "Prevent multiple config pushers"
authorTony Tkacik <ttkacik@cisco.com>
Mon, 23 Feb 2015 08:49:48 +0000 (08:49 +0000)
committerGerrit Code Review <gerrit@opendaylight.org>
Mon, 23 Feb 2015 08:49:48 +0000 (08:49 +0000)
19 files changed:
opendaylight/commons/opendaylight/pom.xml
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/ConfigParams.java
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/DefaultConfigParamsImpl.java
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActor.java
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/base/messages/ApplyState.java
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeader.java
opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/behaviors/Leader.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/RaftActorTest.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeaderTest.java [new file with mode: 0644]
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/IsolatedLeaderTest.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/LeaderTest.java
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/utils/ForwardMessageToBehaviorActor.java [new file with mode: 0644]
opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/utils/MessageCollectorActor.java
opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/akka.conf
opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ThreePhaseCommitCohortProxy.java
opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/DatastoreContextTest.java
opendaylight/md-sal/sal-distributed-datastore/src/test/java/org/opendaylight/controller/cluster/datastore/ThreePhaseCommitCohortProxyTest.java
opendaylight/md-sal/sal-dummy-distributed-datastore/src/main/java/org/opendaylight/controller/dummy/datastore/DummyShard.java
opendaylight/md-sal/sal-dummy-distributed-datastore/src/main/resources/simplelogger.properties

index 9a88e610072036b74e986947e73e19eb8ff0a10b..338db8b03a04e0db32be91b42abea3f752cad48c 100644 (file)
@@ -15,7 +15,7 @@
 
   <properties>
 
-    <akka.version>2.3.4</akka.version>
+    <akka.version>2.3.9</akka.version>
     <appauth.version>0.5.0-SNAPSHOT</appauth.version>
     <archetype-app-northbound>0.1.0-SNAPSHOT</archetype-app-northbound>
     <arphandler.version>0.6.0-SNAPSHOT</arphandler.version>
index 78a1335d58a2ed7cacf78572425be7beb20274ca..fd49737cac45285e1fc1c89d82a157dad5179953 100644 (file)
@@ -75,7 +75,7 @@ public interface ConfigParams {
      * The interval in which the leader needs to check itself if its isolated
      * @return FiniteDuration
      */
-    FiniteDuration getIsolatedCheckInterval();
+    long getIsolatedCheckIntervalInMillis();
 
 
     /**
index 86867e1d040ee84396450ee72f6097093aecd70e..3e6742c17d37c178d30c7d570490ff993c068806 100644 (file)
@@ -42,8 +42,7 @@ public class DefaultConfigParamsImpl implements ConfigParams {
     private FiniteDuration heartBeatInterval = HEART_BEAT_INTERVAL;
     private long snapshotBatchCount = SNAPSHOT_BATCH_COUNT;
     private int journalRecoveryLogBatchSize = JOURNAL_RECOVERY_LOG_BATCH_SIZE;
-    private FiniteDuration isolatedLeaderCheckInterval =
-        new FiniteDuration(HEART_BEAT_INTERVAL.length() * 1000, HEART_BEAT_INTERVAL.unit());
+    private long isolatedLeaderCheckInterval = HEART_BEAT_INTERVAL.$times(1000).toMillis();
 
     // 12 is just an arbitrary percentage. This is the amount of the total memory that a raft actor's
     // in-memory journal can use before it needs to snapshot
@@ -68,7 +67,7 @@ public class DefaultConfigParamsImpl implements ConfigParams {
     }
 
     public void setIsolatedLeaderCheckInterval(FiniteDuration isolatedLeaderCheckInterval) {
-        this.isolatedLeaderCheckInterval = isolatedLeaderCheckInterval;
+        this.isolatedLeaderCheckInterval = isolatedLeaderCheckInterval.toMillis();
     }
 
     public void setElectionTimeoutFactor(long electionTimeoutFactor){
@@ -112,7 +111,7 @@ public class DefaultConfigParamsImpl implements ConfigParams {
     }
 
     @Override
-    public FiniteDuration getIsolatedCheckInterval() {
+    public long getIsolatedCheckIntervalInMillis() {
         return isolatedLeaderCheckInterval;
     }
 
index 854ceb23d047fabea219f3861c5f44c0f2afc907..3ec8cc5c5817d92825ba882101d21c5b437863cd 100644 (file)
@@ -22,6 +22,7 @@ import com.google.common.base.Stopwatch;
 import com.google.protobuf.ByteString;
 import java.io.Serializable;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 import org.opendaylight.controller.cluster.DataPersistenceProvider;
 import org.opendaylight.controller.cluster.common.actor.AbstractUntypedPersistentActor;
 import org.opendaylight.controller.cluster.notifications.RoleChanged;
@@ -82,6 +83,9 @@ import org.slf4j.LoggerFactory;
  * </ul>
  */
 public abstract class RaftActor extends AbstractUntypedPersistentActor {
+
+    private static final long APPLY_STATE_DELAY_THRESHOLD_IN_NANOS = TimeUnit.MILLISECONDS.toNanos(50L); // 50 millis
+
     protected final Logger LOG = LoggerFactory.getLogger(getClass());
 
     /**
@@ -278,6 +282,12 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor {
         if (message instanceof ApplyState){
             ApplyState applyState = (ApplyState) message;
 
+            long elapsedTime = (System.nanoTime() - applyState.getStartTime());
+            if(elapsedTime >= APPLY_STATE_DELAY_THRESHOLD_IN_NANOS){
+                LOG.warn("ApplyState took more time than expected. Elapsed Time = {} ms ApplyState = {}",
+                        TimeUnit.NANOSECONDS.toMillis(elapsedTime), applyState);
+            }
+
             if(LOG.isDebugEnabled()) {
                 LOG.debug("{}: Applying state for log index {} data {}",
                     persistenceId(), applyState.getReplicatedLogEntry().getIndex(),
@@ -788,7 +798,9 @@ public abstract class RaftActor extends AbstractUntypedPersistentActor {
 
                             dataSizeSinceLastSnapshot = 0;
 
-                            LOG.info("{}: Initiating Snapshot Capture..", persistenceId());
+                            LOG.info("{}: Initiating Snapshot Capture, journalSize = {}, dataSizeForCheck = {}," +
+                                " dataThreshold = {}", persistenceId(), journalSize, dataSizeForCheck, dataThreshold);
+
                             long lastAppliedIndex = -1;
                             long lastAppliedTerm = -1;
 
index 0a7a6328805a82f0d26d6cb9c1124cbcf75838c6..9299e752d16ace734cf4fddfec3b22b8aea72f01 100644 (file)
@@ -9,21 +9,22 @@
 package org.opendaylight.controller.cluster.raft.base.messages;
 
 import akka.actor.ActorRef;
-import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
-
 import java.io.Serializable;
+import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
 
 public class ApplyState implements Serializable {
     private static final long serialVersionUID = 1L;
     private final ActorRef clientActor;
     private final String identifier;
     private final ReplicatedLogEntry replicatedLogEntry;
+    private final long startTime;
 
     public ApplyState(ActorRef clientActor, String identifier,
         ReplicatedLogEntry replicatedLogEntry) {
         this.clientActor = clientActor;
         this.identifier = identifier;
         this.replicatedLogEntry = replicatedLogEntry;
+        this.startTime = System.nanoTime();
     }
 
     public ActorRef getClientActor() {
@@ -37,4 +38,17 @@ public class ApplyState implements Serializable {
     public ReplicatedLogEntry getReplicatedLogEntry() {
         return replicatedLogEntry;
     }
+
+    public long getStartTime() {
+        return startTime;
+    }
+
+    @Override
+    public String toString() {
+        return "ApplyState{" +
+                "identifier='" + identifier + '\'' +
+                ", replicatedLogEntry.index =" + replicatedLogEntry.getIndex() +
+                ", startTime=" + startTime +
+                '}';
+    }
 }
index b2bb127eab525e35b5f7db5995e9bd57fcbe0746..be51ba069cc5056636646566d1db00b30154073a 100644 (file)
@@ -126,6 +126,9 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
         // (heartbeat) to each server; repeat during idle periods to
         // prevent election timeouts (ยง5.2)
         sendAppendEntries(0, false);
+
+        // It is important to schedule this heartbeat here
+        scheduleHeartBeat(context.getConfigParams().getHeartBeatInterval());
     }
 
     /**
@@ -171,6 +174,14 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
             return this;
         }
 
+        if(followerLogInformation.timeSinceLastActivity() >
+                context.getConfigParams().getElectionTimeOutInterval().toMillis()) {
+            LOG.error("{} : handleAppendEntriesReply delayed beyond election timeout, " +
+                            "appendEntriesReply : {}, timeSinceLastActivity : {}, lastApplied : {}, commitIndex : {}",
+                    logName(), appendEntriesReply, followerLogInformation.timeSinceLastActivity(),
+                    context.getLastApplied(), context.getCommitIndex());
+        }
+
         followerLogInformation.markFollowerActive();
 
         if (appendEntriesReply.isSuccess()) {
@@ -273,6 +284,8 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
         return this;
     }
 
+    protected void beforeSendHeartbeat(){}
+
     @Override
     public RaftActorBehavior handleMessage(ActorRef sender, Object originalMessage) {
         Preconditions.checkNotNull(sender, "sender should not be null");
@@ -294,27 +307,26 @@ public abstract class AbstractLeader extends AbstractRaftActorBehavior {
             }
         }
 
-        try {
-            if (message instanceof SendHeartBeat) {
-                sendHeartBeat();
-                return this;
+        if (message instanceof SendHeartBeat) {
+            beforeSendHeartbeat();
+            sendHeartBeat();
+            scheduleHeartBeat(context.getConfigParams().getHeartBeatInterval());
+            return this;
 
-            } else if(message instanceof SendInstallSnapshot) {
-                // received from RaftActor
-                setSnapshot(Optional.of(((SendInstallSnapshot) message).getSnapshot()));
-                sendInstallSnapshot();
+        } else if(message instanceof SendInstallSnapshot) {
+            // received from RaftActor
+            setSnapshot(Optional.of(((SendInstallSnapshot) message).getSnapshot()));
+            sendInstallSnapshot();
 
-            } else if (message instanceof Replicate) {
-                replicate((Replicate) message);
+        } else if (message instanceof Replicate) {
+            replicate((Replicate) message);
 
-            } else if (message instanceof InstallSnapshotReply){
-                handleInstallSnapshotReply((InstallSnapshotReply) message);
+        } else if (message instanceof InstallSnapshotReply){
+            handleInstallSnapshotReply((InstallSnapshotReply) message);
 
-            }
-        } finally {
-            scheduleHeartBeat(context.getConfigParams().getHeartBeatInterval());
         }
 
+
         return super.handleMessage(sender, message);
     }
 
index 7a94c0c15849038f35105789856f98cb74580d51..ebcdcd40fb078ebcc16439ec2feaa87b6f62eca4 100644 (file)
@@ -8,12 +8,12 @@
 package org.opendaylight.controller.cluster.raft.behaviors;
 
 import akka.actor.ActorRef;
-import akka.actor.Cancellable;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Stopwatch;
+import java.util.concurrent.TimeUnit;
 import org.opendaylight.controller.cluster.raft.RaftActorContext;
 import org.opendaylight.controller.cluster.raft.base.messages.IsolatedLeaderCheck;
-import scala.concurrent.duration.FiniteDuration;
 
 /**
  * The behavior of a RaftActor when it is in the Leader state
@@ -38,15 +38,12 @@ import scala.concurrent.duration.FiniteDuration;
  * set commitIndex = N (ยง5.3, ยง5.4).
  */
 public class Leader extends AbstractLeader {
-    private Cancellable installSnapshotSchedule = null;
-    private Cancellable isolatedLeaderCheckSchedule = null;
+    private static final IsolatedLeaderCheck ISOLATED_LEADER_CHECK = new IsolatedLeaderCheck();
+    private final Stopwatch isolatedLeaderCheck;
 
     public Leader(RaftActorContext context) {
         super(context);
-
-        scheduleIsolatedLeaderCheck(
-            new FiniteDuration(context.getConfigParams().getHeartBeatInterval().length() * 10,
-                context.getConfigParams().getHeartBeatInterval().unit()));
+        isolatedLeaderCheck = Stopwatch.createStarted();
     }
 
     @Override public RaftActorBehavior handleMessage(ActorRef sender, Object originalMessage) {
@@ -54,8 +51,9 @@ public class Leader extends AbstractLeader {
 
         if (originalMessage instanceof IsolatedLeaderCheck) {
             if (isLeaderIsolated()) {
-                LOG.info("{}: At least {} followers need to be active, Switching {} from Leader to IsolatedLeader",
+                LOG.warn("{}: At least {} followers need to be active, Switching {} from Leader to IsolatedLeader",
                         context.getId(), minIsolatedLeaderPeerCount, leaderId);
+
                 return switchBehavior(new IsolatedLeader(context));
             }
         }
@@ -63,21 +61,17 @@ public class Leader extends AbstractLeader {
         return super.handleMessage(sender, originalMessage);
     }
 
-    protected void stopIsolatedLeaderCheckSchedule() {
-        if (isolatedLeaderCheckSchedule != null && !isolatedLeaderCheckSchedule.isCancelled()) {
-            isolatedLeaderCheckSchedule.cancel();
+    @Override
+    protected void beforeSendHeartbeat(){
+        if(isolatedLeaderCheck.elapsed(TimeUnit.MILLISECONDS) > context.getConfigParams().getIsolatedCheckIntervalInMillis()){
+            context.getActor().tell(ISOLATED_LEADER_CHECK, context.getActor());
+            isolatedLeaderCheck.reset().start();
         }
-    }
 
-    protected void scheduleIsolatedLeaderCheck(FiniteDuration isolatedCheckInterval) {
-        isolatedLeaderCheckSchedule = context.getActorSystem().scheduler().schedule(isolatedCheckInterval, isolatedCheckInterval,
-            context.getActor(), new IsolatedLeaderCheck(),
-            context.getActorSystem().dispatcher(), context.getActor());
     }
 
     @Override
     public void close() throws Exception {
-        stopIsolatedLeaderCheckSchedule();
         super.close();
     }
 
index ba8f49d8f6249b5ae3b4340128eeea2a9dc1bc3e..83868b6a2ad187a257bc3323479b8d84974276a5 100644 (file)
@@ -552,7 +552,6 @@ public class RaftActorTest extends AbstractActorTest {
                 assertNotEquals("voted for", "foobar", mockRaftActor.getRaftActorContext().getTermInformation().getVotedFor());
 
                 mockRaftActor.onReceiveRecover(mock(RecoveryCompleted.class));
-
             }};
     }
 
@@ -576,12 +575,12 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 mockRaftActor.getRaftActorContext().getTermInformation().updateAndPersist(10, "foobar");
 
                 assertEquals("Persist called", true, persistLatch.await(5, TimeUnit.SECONDS));
-
             }
-
         };
     }
 
@@ -602,14 +601,14 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 MockRaftActorContext.MockReplicatedLogEntry logEntry = new MockRaftActorContext.MockReplicatedLogEntry(10, 10, mock(Payload.class));
 
                 mockRaftActor.getRaftActorContext().getReplicatedLog().appendAndPersist(logEntry);
 
                 verify(dataPersistenceProvider).persist(eq(logEntry), any(Procedure.class));
-
             }
-
         };
     }
 
@@ -630,14 +629,14 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 mockRaftActor.getReplicatedLog().appendAndPersist(new MockRaftActorContext.MockReplicatedLogEntry(1, 0, mock(Payload.class)));
 
                 mockRaftActor.getRaftActorContext().getReplicatedLog().removeFromAndPersist(0);
 
                 verify(dataPersistenceProvider, times(2)).persist(anyObject(), any(Procedure.class));
-
             }
-
         };
     }
 
@@ -658,6 +657,8 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 mockRaftActor.onReceiveCommand(new ApplyLogEntries(10));
 
                 verify(dataPersistenceProvider, times(1)).persist(anyObject(), any(Procedure.class));
@@ -685,6 +686,8 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 ByteString snapshotBytes = fromObject(Arrays.asList(
                         new MockRaftActorContext.MockPayload("A"),
                         new MockRaftActorContext.MockPayload("B"),
@@ -722,6 +725,8 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 mockRaftActor.getReplicatedLog().append(new MockRaftActorContext.MockReplicatedLogEntry(1, 0, mock(Payload.class)));
                 mockRaftActor.getReplicatedLog().append(new MockRaftActorContext.MockReplicatedLogEntry(1, 1, mock(Payload.class)));
                 mockRaftActor.getReplicatedLog().append(new MockRaftActorContext.MockReplicatedLogEntry(1, 2, mock(Payload.class)));
@@ -783,6 +788,8 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 ReplicatedLogEntry entry = new MockRaftActorContext.MockReplicatedLogEntry(1, 5,
                         new MockRaftActorContext.MockPayload("F"));
 
@@ -811,6 +818,8 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 ReplicatedLog oldReplicatedLog = mockRaftActor.getReplicatedLog();
 
                 oldReplicatedLog.append(new MockRaftActorContext.MockReplicatedLogEntry(1, 0, mock(Payload.class)));
@@ -864,6 +873,8 @@ public class RaftActorTest extends AbstractActorTest {
 
                 MockRaftActor mockRaftActor = mockActorRef.underlyingActor();
 
+                mockRaftActor.waitForInitializeBehaviorComplete();
+
                 ByteString snapshotBytes = fromObject(Arrays.asList(
                         new MockRaftActorContext.MockPayload("A"),
                         new MockRaftActorContext.MockPayload("B"),
@@ -892,17 +903,28 @@ public class RaftActorTest extends AbstractActorTest {
     public void testRaftRoleChangeNotifier() throws Exception {
         new JavaTestKit(getSystem()) {{
             ActorRef notifierActor = factory.createActor(Props.create(MessageCollectorActor.class));
+            MessageCollectorActor.waitUntilReady(notifierActor);
+
             DefaultConfigParamsImpl config = new DefaultConfigParamsImpl();
+            long heartBeatInterval = 100;
+            config.setHeartBeatInterval(FiniteDuration.create(heartBeatInterval, TimeUnit.MILLISECONDS));
+            config.setElectionTimeoutFactor(1);
+
             String persistenceId = factory.generateActorId("notifier-");
 
             factory.createTestActor(MockRaftActor.props(persistenceId,
                     Collections.<String, String>emptyMap(), Optional.<ConfigParams>of(config), notifierActor), persistenceId);
 
-            // sleeping for a minimum of 2 seconds, if it spans more its fine.
-            Uninterruptibles.sleepUninterruptibly(2, TimeUnit.SECONDS);
+            List<RoleChanged> matches =  null;
+            for(int i = 0; i < 5000 / heartBeatInterval; i++) {
+                matches = MessageCollectorActor.getAllMatching(notifierActor, RoleChanged.class);
+                assertNotNull(matches);
+                if(matches.size() == 3) {
+                    break;
+                }
+                Uninterruptibles.sleepUninterruptibly(heartBeatInterval, TimeUnit.MILLISECONDS);
+            }
 
-            List<RoleChanged> matches = MessageCollectorActor.getAllMatching(notifierActor, RoleChanged.class);
-            assertNotNull(matches);
             assertEquals(3, matches.size());
 
             // check if the notifier got a role change from null to Follower
@@ -944,11 +966,12 @@ public class RaftActorTest extends AbstractActorTest {
                 Map<String, String> peerAddresses = new HashMap<>();
                 peerAddresses.put(follower1Id, followerActor1.path().toString());
 
-                TestActorRef<MockRaftActor> mockActorRef = TestActorRef.create(getSystem(),
+                TestActorRef<MockRaftActor> mockActorRef = factory.createTestActor(
                         MockRaftActor.props(persistenceId, peerAddresses,
                                 Optional.<ConfigParams>of(config), dataPersistenceProvider), persistenceId);
 
                 MockRaftActor leaderActor = mockActorRef.underlyingActor();
+
                 leaderActor.getRaftActorContext().setCommitIndex(4);
                 leaderActor.getRaftActorContext().setLastApplied(4);
                 leaderActor.getRaftActorContext().getTermInformation().update(1, persistenceId);
@@ -1034,7 +1057,7 @@ public class RaftActorTest extends AbstractActorTest {
                 Map<String, String> peerAddresses = new HashMap<>();
                 peerAddresses.put(leaderId, leaderActor1.path().toString());
 
-                TestActorRef<MockRaftActor> mockActorRef = TestActorRef.create(getSystem(),
+                TestActorRef<MockRaftActor> mockActorRef = factory.createTestActor(
                         MockRaftActor.props(persistenceId, peerAddresses,
                                 Optional.<ConfigParams>of(config), dataPersistenceProvider), persistenceId);
 
diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeaderTest.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/behaviors/AbstractLeaderTest.java
new file mode 100644 (file)
index 0000000..dd3ed23
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License v1.0 which accompanies this distribution,
+ * and is available at http://www.eclipse.org/legal/epl-v10.html
+ */
+
+package org.opendaylight.controller.cluster.raft.behaviors;
+
+import static org.junit.Assert.assertTrue;
+import akka.actor.ActorRef;
+import akka.testkit.JavaTestKit;
+import akka.testkit.TestActorRef;
+import com.google.common.util.concurrent.Uninterruptibles;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import org.junit.Test;
+import org.opendaylight.controller.cluster.raft.DefaultConfigParamsImpl;
+import org.opendaylight.controller.cluster.raft.MockRaftActorContext;
+import org.opendaylight.controller.cluster.raft.base.messages.SendHeartBeat;
+import org.opendaylight.controller.cluster.raft.utils.ForwardMessageToBehaviorActor;
+import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
+import scala.concurrent.duration.FiniteDuration;
+
+public abstract class AbstractLeaderTest extends AbstractRaftActorBehaviorTest{
+
+    /**
+     * When we removed scheduling of heartbeat in the AbstractLeader constructor we ended up with a situation where
+     * if no follower responded to an initial AppendEntries heartbeats would not be sent to it. This test verifies
+     * that regardless of whether followers respond or not we schedule heartbeats.
+     *
+     * @throws Exception
+     */
+    @Test
+    public void testLeaderSchedulesHeartbeatsEvenWhenNoFollowersRespondToInitialAppendEntries() throws Exception {
+        logStart("testLeaderSchedulesHeartbeatsEvenWhenNoFollowersRespondToInitialAppendEntries");
+        new JavaTestKit(getSystem()) {{
+            String leaderActorId = actorFactory.generateActorId("leader");
+            String follower1ActorId = actorFactory.generateActorId("follower");
+            String follower2ActorId = actorFactory.generateActorId("follower");
+
+            TestActorRef<ForwardMessageToBehaviorActor> leaderActor =
+                    actorFactory.createTestActor(ForwardMessageToBehaviorActor.props(), leaderActorId);
+            ActorRef follower1Actor = actorFactory.createActor(MessageCollectorActor.props(), follower1ActorId);
+            ActorRef follower2Actor = actorFactory.createActor(MessageCollectorActor.props(), follower2ActorId);
+
+            MockRaftActorContext leaderActorContext =
+                    new MockRaftActorContext(leaderActorId, getSystem(), leaderActor);
+
+            DefaultConfigParamsImpl configParams = new DefaultConfigParamsImpl();
+            configParams.setHeartBeatInterval(new FiniteDuration(200, TimeUnit.MILLISECONDS));
+            configParams.setIsolatedLeaderCheckInterval(new FiniteDuration(10, TimeUnit.SECONDS));
+
+            leaderActorContext.setConfigParams(configParams);
+
+            leaderActorContext.setReplicatedLog(
+                    new MockRaftActorContext.MockReplicatedLogBuilder().createEntries(1,5,1).build());
+
+            Map<String, String> peerAddresses = new HashMap<>();
+            peerAddresses.put(follower1ActorId,
+                    follower1Actor.path().toString());
+            peerAddresses.put(follower2ActorId,
+                    follower2Actor.path().toString());
+
+
+            leaderActorContext.setPeerAddresses(peerAddresses);
+
+            RaftActorBehavior leader = createBehavior(leaderActorContext);
+
+            leaderActor.underlyingActor().setBehavior(leader);
+
+            Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
+
+            List<SendHeartBeat> allMessages = MessageCollectorActor.getAllMatching(leaderActor, SendHeartBeat.class);
+
+            // Need more than 1 heartbeat to be delivered because we waited for 1 second with heartbeat interval 200ms
+            assertTrue(String.format("%s messages is less than expected", allMessages.size()),
+                    allMessages.size() > 1);
+
+        }};
+    }
+
+}
index b37ace9560b4bfa46984cb030f310541717a2d63..e16d765cdea29a76b0a18440a3128c1a4f2529e6 100644 (file)
@@ -22,7 +22,7 @@ import org.opendaylight.controller.cluster.raft.RaftState;
 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
 
-public class IsolatedLeaderTest  extends AbstractRaftActorBehaviorTest {
+public class IsolatedLeaderTest  extends AbstractLeaderTest {
 
     private final TestActorRef<MessageCollectorActor> leaderActor = actorFactory.createTestActor(
             Props.create(MessageCollectorActor.class), actorFactory.generateActorId("leader"));
@@ -44,7 +44,7 @@ public class IsolatedLeaderTest  extends AbstractRaftActorBehaviorTest {
 
     @Override
     protected RaftActorBehavior createBehavior(RaftActorContext actorContext) {
-        return new Leader(actorContext);
+        return new IsolatedLeader(actorContext);
     }
 
     @Override
index 853ed5867d4395d460be0692966b1abee92b2fdf..c57fce1cd553d8c41dd786d9c4bb6f33e97791b6 100644 (file)
@@ -41,11 +41,12 @@ import org.opendaylight.controller.cluster.raft.messages.InstallSnapshot;
 import org.opendaylight.controller.cluster.raft.messages.InstallSnapshotReply;
 import org.opendaylight.controller.cluster.raft.messages.RaftRPC;
 import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
+import org.opendaylight.controller.cluster.raft.utils.ForwardMessageToBehaviorActor;
 import org.opendaylight.controller.cluster.raft.utils.MessageCollectorActor;
 import org.opendaylight.controller.protobuff.messages.cluster.raft.InstallSnapshotMessages;
 import scala.concurrent.duration.FiniteDuration;
 
-public class LeaderTest extends AbstractRaftActorBehaviorTest {
+public class LeaderTest extends AbstractLeaderTest {
 
     static final String FOLLOWER_ID = "follower";
 
@@ -742,7 +743,7 @@ public class LeaderTest extends AbstractRaftActorBehaviorTest {
 
     private MockRaftActorContext createActorContextWithFollower() {
         MockRaftActorContext actorContext = createActorContext();
-        actorContext.setPeerAddresses(ImmutableMap.<String,String>builder().put(FOLLOWER_ID,
+        actorContext.setPeerAddresses(ImmutableMap.<String, String>builder().put(FOLLOWER_ID,
                 followerActor.path().toString()).build());
         return actorContext;
     }
@@ -756,22 +757,6 @@ public class LeaderTest extends AbstractRaftActorBehaviorTest {
         return context;
     }
 
-    public static class ForwardMessageToBehaviorActor extends MessageCollectorActor {
-        AbstractRaftActorBehavior behavior;
-
-        @Override public void onReceive(Object message) throws Exception {
-            if(behavior != null) {
-                behavior.handleMessage(sender(), message);
-            }
-
-            super.onReceive(message);
-        }
-
-        public static Props props() {
-            return Props.create(ForwardMessageToBehaviorActor.class);
-        }
-    }
-
     @Test
     public void testLeaderCreatedWithCommitIndexLessThanLastIndex() throws Exception {
         logStart("testLeaderCreatedWithCommitIndexLessThanLastIndex");
@@ -781,7 +766,7 @@ public class LeaderTest extends AbstractRaftActorBehaviorTest {
         MockRaftActorContext followerActorContext = createActorContext(FOLLOWER_ID, followerActor);
 
         Follower follower = new Follower(followerActorContext);
-        followerActor.underlyingActor().behavior = follower;
+        followerActor.underlyingActor().setBehavior(follower);
 
         Map<String, String> peerAddresses = new HashMap<>();
         peerAddresses.put(FOLLOWER_ID, followerActor.path().toString());
@@ -834,7 +819,7 @@ public class LeaderTest extends AbstractRaftActorBehaviorTest {
         MockRaftActorContext followerActorContext = createActorContext(FOLLOWER_ID, followerActor);
 
         Follower follower = new Follower(followerActorContext);
-        followerActor.underlyingActor().behavior = follower;
+        followerActor.underlyingActor().setBehavior(follower);
 
         Map<String, String> peerAddresses = new HashMap<>();
         peerAddresses.put(FOLLOWER_ID, followerActor.path().toString());
@@ -871,7 +856,7 @@ public class LeaderTest extends AbstractRaftActorBehaviorTest {
         assertEquals(2, appendEntriesReply.getLogLastIndex());
         assertEquals(1, appendEntriesReply.getLogLastTerm());
 
-        leaderActor.underlyingActor().behavior = leader;
+        leaderActor.underlyingActor().setBehavior(follower);
         leader.handleMessage(followerActor, appendEntriesReply);
 
         leaderActor.underlyingActor().clear();
@@ -1024,7 +1009,6 @@ public class LeaderTest extends AbstractRaftActorBehaviorTest {
             leaderActorContext.setPeerAddresses(peerAddresses);
 
             leader = new Leader(leaderActorContext);
-            leader.stopIsolatedLeaderCheckSchedule();
 
             leader.markFollowerActive("follower-1");
             leader.markFollowerActive("follower-2");
@@ -1077,7 +1061,7 @@ public class LeaderTest extends AbstractRaftActorBehaviorTest {
         followerActorContext.setConfigParams(configParams);
 
         Follower follower = new Follower(followerActorContext);
-        followerActor.underlyingActor().behavior = follower;
+        followerActor.underlyingActor().setBehavior(follower);
 
         leaderActorContext.getReplicatedLog().removeFrom(0);
         leaderActorContext.setCommitIndex(-1);
@@ -1135,6 +1119,66 @@ public class LeaderTest extends AbstractRaftActorBehaviorTest {
         follower.close();
     }
 
+    @Test
+    public void testLaggingFollowerStarvation() throws Exception {
+        logStart("testLaggingFollowerStarvation");
+        new JavaTestKit(getSystem()) {{
+            String leaderActorId = actorFactory.generateActorId("leader");
+            String follower1ActorId = actorFactory.generateActorId("follower");
+            String follower2ActorId = actorFactory.generateActorId("follower");
+
+            TestActorRef<ForwardMessageToBehaviorActor> leaderActor =
+                    actorFactory.createTestActor(ForwardMessageToBehaviorActor.props(), leaderActorId);
+            ActorRef follower1Actor = actorFactory.createActor(MessageCollectorActor.props(), follower1ActorId);
+            ActorRef follower2Actor = actorFactory.createActor(MessageCollectorActor.props(), follower2ActorId);
+
+            MockRaftActorContext leaderActorContext =
+                    new MockRaftActorContext(leaderActorId, getSystem(), leaderActor);
+
+            DefaultConfigParamsImpl configParams = new DefaultConfigParamsImpl();
+            configParams.setHeartBeatInterval(new FiniteDuration(200, TimeUnit.MILLISECONDS));
+            configParams.setIsolatedLeaderCheckInterval(new FiniteDuration(10, TimeUnit.SECONDS));
+
+            leaderActorContext.setConfigParams(configParams);
+
+            leaderActorContext.setReplicatedLog(
+                    new MockRaftActorContext.MockReplicatedLogBuilder().createEntries(1,5,1).build());
+
+            Map<String, String> peerAddresses = new HashMap<>();
+            peerAddresses.put(follower1ActorId,
+                    follower1Actor.path().toString());
+            peerAddresses.put(follower2ActorId,
+                    follower2Actor.path().toString());
+
+            leaderActorContext.setPeerAddresses(peerAddresses);
+            leaderActorContext.getTermInformation().update(1, leaderActorId);
+
+            RaftActorBehavior leader = createBehavior(leaderActorContext);
+
+            leaderActor.underlyingActor().setBehavior(leader);
+
+            for(int i=1;i<6;i++) {
+                // Each AppendEntriesReply could end up rescheduling the heartbeat (without the fix for bug 2733)
+                RaftActorBehavior newBehavior = leader.handleMessage(follower1Actor, new AppendEntriesReply(follower1ActorId, 1, true, i, 1));
+                assertTrue(newBehavior == leader);
+                Uninterruptibles.sleepUninterruptibly(200, TimeUnit.MILLISECONDS);
+            }
+
+            // Check if the leader has been receiving SendHeartbeat messages despite getting AppendEntriesReply
+            List<SendHeartBeat> heartbeats = MessageCollectorActor.getAllMatching(leaderActor, SendHeartBeat.class);
+
+            assertTrue(String.format("%s heartbeat(s) is less than expected", heartbeats.size()),
+                    heartbeats.size() > 1);
+
+            // Check if follower-2 got AppendEntries during this time and was not starved
+            List<AppendEntries> appendEntries = MessageCollectorActor.getAllMatching(follower2Actor, AppendEntries.class);
+
+            assertTrue(String.format("%s append entries is less than expected", appendEntries.size()),
+                    appendEntries.size() > 1);
+
+        }};
+    }
+
     @Override
     protected void assertStateChangesToFollowerWhenRaftRPCHasNewerTerm(RaftActorContext actorContext,
             ActorRef actorRef, RaftRPC rpc) throws Exception {
diff --git a/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/utils/ForwardMessageToBehaviorActor.java b/opendaylight/md-sal/sal-akka-raft/src/test/java/org/opendaylight/controller/cluster/raft/utils/ForwardMessageToBehaviorActor.java
new file mode 100644 (file)
index 0000000..9bcfcd9
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License v1.0 which accompanies this distribution,
+ * and is available at http://www.eclipse.org/legal/epl-v10.html
+ */
+
+package org.opendaylight.controller.cluster.raft.utils;
+
+import akka.actor.Props;
+import org.opendaylight.controller.cluster.raft.behaviors.RaftActorBehavior;
+
+public class ForwardMessageToBehaviorActor extends MessageCollectorActor {
+    private RaftActorBehavior behavior;
+
+    @Override
+    public void onReceive(Object message) throws Exception {
+        if(behavior != null) {
+            behavior.handleMessage(sender(), message);
+        }
+
+        super.onReceive(message);
+    }
+
+    public static Props props() {
+        return Props.create(ForwardMessageToBehaviorActor.class);
+    }
+
+    public void setBehavior(RaftActorBehavior behavior){
+        this.behavior = behavior;
+    }
+}
+
index 662c12180d29acc65851f6a28e64590ffe2b5463..62f163fb7d0270a2415e934d193136d610e01ca3 100644 (file)
@@ -9,6 +9,7 @@
 package org.opendaylight.controller.cluster.raft.utils;
 
 import akka.actor.ActorRef;
+import akka.actor.Props;
 import akka.actor.UntypedActor;
 import akka.pattern.Patterns;
 import akka.util.Timeout;
@@ -123,4 +124,8 @@ public class MessageCollectorActor extends UntypedActor {
 
         throw new TimeoutException("Actor not ready in time.");
     }
+
+    public static Props props() {
+        return Props.create(MessageCollectorActor.class);
+    }
 }
index 9da6a3b5a41a025fd1a3ffcf30d42789701a6bc1..e72f4b2675eb886226ad660204fe706513937094 100644 (file)
@@ -13,17 +13,27 @@ odl-cluster-data {
     loggers = ["akka.event.slf4j.Slf4jLogger"]
 
     actor {
-
       provider = "akka.cluster.ClusterActorRefProvider"
       serializers {
-                java = "akka.serialization.JavaSerializer"
-                proto = "akka.remote.serialization.ProtobufSerializer"
-              }
+        java = "akka.serialization.JavaSerializer"
+        proto = "akka.remote.serialization.ProtobufSerializer"
+      }
+
+      serialization-bindings {
+        "com.google.protobuf.Message" = proto
+      }
 
-              serialization-bindings {
-                  "com.google.protobuf.Message" = proto
+      default-dispatcher {
+        # Setting throughput to 1 makes the dispatcher fair. It processes 1 message from
+        # the mailbox before moving on to the next mailbox
+        throughput = 1
+      }
 
-              }
+      default-mailbox {
+        # When not using a BalancingDispatcher it is recommended that we use the SingleConsumerOnlyUnboundedMailbox
+        # as it is the most efficient for multiple producer/single consumer use cases
+        mailbox-type="akka.dispatch.SingleConsumerOnlyUnboundedMailbox"
+      }
     }
     remote {
       log-remote-lifecycle-events = off
index 4f472266c1f56acbd8fc531ae7189f1ae91951b4..c51ea80726e54d9cf656e193ca8521d242206c76 100644 (file)
@@ -342,7 +342,7 @@ public class ThreePhaseCommitCohortProxy implements DOMStoreThreePhaseCommitCoho
             timerContext.stop();
 
             Snapshot timerSnapshot = commitTimer.getSnapshot();
-            double allowedLatencyInNanos = timerSnapshot.get98thPercentile();
+            double allowedLatencyInNanos = timerSnapshot.get95thPercentile();
 
             long commitTimeoutInSeconds = actorContext.getDatastoreContext()
                     .getShardTransactionCommitTimeoutInSeconds();
index 3e8982371808d3eb2228494ababcc64dd04c844e..d3a3a8fc2df812b88ffc500c71e17d224189b541 100644 (file)
@@ -28,7 +28,7 @@ public class DatastoreContextTest {
         assertEquals(DatastoreContext.DEFAULT_SHARD_LEADER_ELECTION_TIMEOUT, build.getShardLeaderElectionTimeout());
         assertEquals(DatastoreContext.DEFAULT_PERSISTENT, build.isPersistent());
         assertEquals(DatastoreContext.DEFAULT_CONFIGURATION_READER, build.getConfigurationReader());
-        assertEquals(DatastoreContext.DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS, build.getShardRaftConfig().getIsolatedCheckInterval().length());
+        assertEquals(DatastoreContext.DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS, build.getShardRaftConfig().getIsolatedCheckIntervalInMillis());
         assertEquals(DatastoreContext.DEFAULT_SHARD_SNAPSHOT_DATA_THRESHOLD_PERCENTAGE, build.getShardRaftConfig().getSnapshotDataThresholdPercentage());
         assertEquals(DatastoreContext.DEFAULT_SHARD_ELECTION_TIMEOUT_FACTOR, build.getShardRaftConfig().getElectionTimeoutFactor());
         assertEquals(DatastoreContext.DEFAULT_TX_CREATION_INITIAL_RATE_LIMIT, build.getTransactionCreationInitialRateLimit());
index d2396e0524f340844ea5f7c0e35dfd6cad0b4806..b013515f2595950cba669ac08dfdb1a8eefe37fa 100644 (file)
@@ -71,7 +71,7 @@ public class ThreePhaseCommitCohortProxyTest extends AbstractActorTest {
         doReturn(commitTimer).when(actorContext).getOperationTimer("commit");
         doReturn(commitTimerContext).when(commitTimer).time();
         doReturn(commitSnapshot).when(commitTimer).getSnapshot();
-        doReturn(TimeUnit.MILLISECONDS.toNanos(2000) * 1.0).when(commitSnapshot).get98thPercentile();
+        doReturn(TimeUnit.MILLISECONDS.toNanos(2000) * 1.0).when(commitSnapshot).get95thPercentile();
         doReturn(10.0).when(actorContext).getTxCreationLimit();
     }
 
index 3dffdfce575d82a0118c11d137fe501a450defe7..0b72a32f1033884ba983c71b2651a4a88ad6c7b1 100644 (file)
@@ -11,6 +11,8 @@ package org.opendaylight.controller.dummy.datastore;
 import akka.actor.Props;
 import akka.actor.UntypedActor;
 import akka.japi.Creator;
+import com.google.common.base.Stopwatch;
+import java.util.concurrent.TimeUnit;
 import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
 import org.opendaylight.controller.cluster.raft.messages.AppendEntries;
 import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
@@ -27,6 +29,7 @@ public class DummyShard extends UntypedActor{
     private final Logger LOG = LoggerFactory.getLogger(DummyShard.class);
     private long lastMessageIndex  = -1;
     private long lastMessageSize = 0;
+    private Stopwatch appendEntriesWatch;
 
     public DummyShard(Configuration configuration, String followerId) {
         this.configuration = configuration;
@@ -57,10 +60,21 @@ public class DummyShard extends UntypedActor{
     }
 
     protected void handleAppendEntries(AppendEntries req) throws InterruptedException {
-
         LOG.info("{} - Received AppendEntries message : leader term = {}, index = {}, prevLogIndex = {}, size = {}",
                 followerId, req.getTerm(),req.getLeaderCommit(), req.getPrevLogIndex(), req.getEntries().size());
 
+        if(appendEntriesWatch != null){
+            long elapsed = appendEntriesWatch.elapsed(TimeUnit.SECONDS);
+            if(elapsed >= 5){
+                LOG.error("More than 5 seconds since last append entry, elapsed Time = {} seconds" +
+                                ", leaderCommit = {}, prevLogIndex = {}, size = {}",
+                        elapsed, req.getLeaderCommit(), req.getPrevLogIndex(), req.getEntries().size());
+            }
+            appendEntriesWatch.reset().start();
+        } else {
+            appendEntriesWatch = Stopwatch.createStarted();
+        }
+
         if(lastMessageIndex == req.getLeaderCommit() && req.getEntries().size() > 0 && lastMessageSize > 0){
             LOG.error("{} - Duplicate message with leaderCommit = {} prevLogIndex = {} received", followerId, req.getLeaderCommit(), req.getPrevLogIndex());
         }
index 067c048231d1da73c7c550fc4934b9523132c396..cd2d082079981b4f5323f823471fb3a7299c163c 100644 (file)
@@ -3,4 +3,4 @@ org.slf4j.simpleLogger.dateTimeFormat=hh:mm:ss,S a
 org.slf4j.simpleLogger.logFile=System.out
 org.slf4j.simpleLogger.showShortLogName=true
 org.slf4j.simpleLogger.levelInBrackets=true
-org.slf4j.simpleLogger.defaultLogLevel=info
\ No newline at end of file
+org.slf4j.simpleLogger.defaultLogLevel=error
\ No newline at end of file