import akka.actor.ActorRef;
import akka.actor.ActorSelection;
-import akka.event.Logging;
-import akka.event.LoggingAdapter;
import akka.japi.Procedure;
import akka.persistence.RecoveryCompleted;
import akka.persistence.SaveSnapshotFailure;
import akka.persistence.SnapshotOffer;
import akka.persistence.SnapshotSelectionCriteria;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Objects;
import com.google.common.base.Optional;
import com.google.common.base.Stopwatch;
-import com.google.protobuf.ByteString;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
import java.io.Serializable;
+import java.util.Collection;
+import java.util.List;
import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import org.apache.commons.lang3.time.DurationFormatUtils;
import org.opendaylight.controller.cluster.DataPersistenceProvider;
import org.opendaylight.controller.cluster.common.actor.AbstractUntypedPersistentActor;
+import org.opendaylight.controller.cluster.notifications.LeaderStateChanged;
import org.opendaylight.controller.cluster.notifications.RoleChanged;
+import org.opendaylight.controller.cluster.raft.base.messages.ApplyJournalEntries;
import org.opendaylight.controller.cluster.raft.base.messages.ApplyLogEntries;
import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
import org.opendaylight.controller.cluster.raft.base.messages.ApplyState;
import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot;
import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshotReply;
import org.opendaylight.controller.cluster.raft.base.messages.Replicate;
-import org.opendaylight.controller.cluster.raft.base.messages.SendHeartBeat;
-import org.opendaylight.controller.cluster.raft.base.messages.SendInstallSnapshot;
+import org.opendaylight.controller.cluster.raft.behaviors.AbstractLeader;
import org.opendaylight.controller.cluster.raft.behaviors.AbstractRaftActorBehavior;
import org.opendaylight.controller.cluster.raft.behaviors.Follower;
import org.opendaylight.controller.cluster.raft.behaviors.RaftActorBehavior;
import org.opendaylight.controller.cluster.raft.client.messages.FindLeader;
import org.opendaylight.controller.cluster.raft.client.messages.FindLeaderReply;
-import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply;
+import org.opendaylight.controller.cluster.raft.client.messages.FollowerInfo;
+import org.opendaylight.controller.cluster.raft.client.messages.GetOnDemandRaftState;
+import org.opendaylight.controller.cluster.raft.client.messages.OnDemandRaftState;
import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload;
-import org.opendaylight.controller.protobuff.messages.cluster.raft.AppendEntriesMessages;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* RaftActor encapsulates a state machine that needs to be kept synchronized
* </ul>
*/
public abstract class RaftActor extends AbstractUntypedPersistentActor {
- protected final LoggingAdapter LOG =
- Logging.getLogger(getContext().system(), this);
+
+ private static final long APPLY_STATE_DELAY_THRESHOLD_IN_NANOS = TimeUnit.MILLISECONDS.toNanos(50L); // 50 millis
+
+ private static final Procedure<ApplyJournalEntries> APPLY_JOURNAL_ENTRIES_PERSIST_CALLBACK =
+ new Procedure<ApplyJournalEntries>() {
+ @Override
+ public void apply(ApplyJournalEntries param) throws Exception {
+ }
+ };
+ private static final String COMMIT_SNAPSHOT = "commit_snapshot";
+
+ protected final Logger LOG = LoggerFactory.getLogger(getClass());
/**
* The current state determines the current behavior of a RaftActor
* This context should NOT be passed directly to any other actor it is
* only to be consumed by the RaftActorBehaviors
*/
- private final RaftActorContext context;
+ private final RaftActorContextImpl context;
+
+ private final Procedure<Void> createSnapshotProcedure = new CreateSnapshotProcedure();
/**
* The in-memory journal
*/
private ReplicatedLogImpl replicatedLog = new ReplicatedLogImpl();
- private CaptureSnapshot captureSnapshot = null;
-
- private volatile boolean hasSnapshotCaptureInitiated = false;
-
private Stopwatch recoveryTimer;
private int currentRecoveryBatchCount;
+ private final BehaviorStateHolder reusableBehaviorStateHolder = new BehaviorStateHolder();
+
public RaftActor(String id, Map<String, String> peerAddresses) {
this(id, peerAddresses, Optional.<ConfigParams>absent());
}
private void initRecoveryTimer() {
if(recoveryTimer == null) {
- recoveryTimer = new Stopwatch();
- recoveryTimer.start();
+ recoveryTimer = Stopwatch.createStarted();
}
}
super.preStart();
}
+ @Override
+ public void postStop() {
+ if(currentBehavior != null) {
+ try {
+ currentBehavior.close();
+ } catch (Exception e) {
+ LOG.debug("{}: Error closing behavior {}", persistenceId(), currentBehavior.state());
+ }
+ }
+
+ super.postStop();
+ }
+
@Override
public void handleRecover(Object message) {
if(persistence().isRecoveryApplicable()) {
} else if (message instanceof ReplicatedLogEntry) {
onRecoveredJournalLogEntry((ReplicatedLogEntry) message);
} else if (message instanceof ApplyLogEntries) {
- onRecoveredApplyLogEntries((ApplyLogEntries) message);
+ // Handle this message for backwards compatibility with pre-Lithium versions.
+ onRecoveredApplyLogEntries(((ApplyLogEntries) message).getToIndex());
+ } else if (message instanceof ApplyJournalEntries) {
+ onRecoveredApplyLogEntries(((ApplyJournalEntries) message).getToIndex());
} else if (message instanceof DeleteEntries) {
replicatedLog.removeFrom(((DeleteEntries) message).getFromIndex());
} else if (message instanceof UpdateElectionTerm) {
onRecoveryComplete();
- RaftActorBehavior oldBehavior = currentBehavior;
- currentBehavior = new Follower(context);
- handleBehaviorChange(oldBehavior, currentBehavior);
+ initializeBehavior();
}
}
}
private void onRecoveredSnapshot(SnapshotOffer offer) {
if(LOG.isDebugEnabled()) {
- LOG.debug("SnapshotOffer called..");
+ LOG.debug("{}: SnapshotOffer called..", persistenceId());
}
initRecoveryTimer();
context.setLastApplied(snapshot.getLastAppliedIndex());
context.setCommitIndex(snapshot.getLastAppliedIndex());
- Stopwatch timer = new Stopwatch();
- timer.start();
+ Stopwatch timer = Stopwatch.createStarted();
// Apply the snapshot to the actors state
- applyRecoverySnapshot(ByteString.copyFrom(snapshot.getState()));
+ applyRecoverySnapshot(snapshot.getState());
timer.stop();
LOG.info("Recovery snapshot applied for {} in {}: snapshotIndex={}, snapshotTerm={}, journal-size=" +
replicatedLog.size(), persistenceId(), timer.toString(),
- replicatedLog.snapshotIndex, replicatedLog.snapshotTerm);
+ replicatedLog.getSnapshotIndex(), replicatedLog.getSnapshotTerm());
}
private void onRecoveredJournalLogEntry(ReplicatedLogEntry logEntry) {
if(LOG.isDebugEnabled()) {
- LOG.debug("Received ReplicatedLogEntry for recovery: {}", logEntry.getIndex());
+ LOG.debug("{}: Received ReplicatedLogEntry for recovery: {}", persistenceId(), logEntry.getIndex());
}
replicatedLog.append(logEntry);
}
- private void onRecoveredApplyLogEntries(ApplyLogEntries ale) {
+ private void onRecoveredApplyLogEntries(long toIndex) {
if(LOG.isDebugEnabled()) {
- LOG.debug("Received ApplyLogEntries for recovery, applying to state: {} to {}",
- context.getLastApplied() + 1, ale.getToIndex());
+ LOG.debug("{}: Received ApplyLogEntries for recovery, applying to state: {} to {}",
+ persistenceId(), context.getLastApplied() + 1, toIndex);
}
- for (long i = context.getLastApplied() + 1; i <= ale.getToIndex(); i++) {
+ for (long i = context.getLastApplied() + 1; i <= toIndex; i++) {
batchRecoveredLogEntry(replicatedLog.get(i));
}
- context.setLastApplied(ale.getToIndex());
- context.setCommitIndex(ale.getToIndex());
+ context.setLastApplied(toIndex);
+ context.setCommitIndex(toIndex);
}
private void batchRecoveredLogEntry(ReplicatedLogEntry logEntry) {
"Persistence Id = " + persistenceId() +
" Last index in log={}, snapshotIndex={}, snapshotTerm={}, " +
"journal-size={}",
- replicatedLog.lastIndex(), replicatedLog.snapshotIndex,
- replicatedLog.snapshotTerm, replicatedLog.size());
+ replicatedLog.lastIndex(), replicatedLog.getSnapshotIndex(),
+ replicatedLog.getSnapshotTerm(), replicatedLog.size());
+
+ initializeBehavior();
+ }
+
+ protected void initializeBehavior(){
+ changeCurrentBehavior(new Follower(context));
+ }
- RaftActorBehavior oldBehavior = currentBehavior;
- currentBehavior = new Follower(context);
- handleBehaviorChange(oldBehavior, currentBehavior);
+ protected void changeCurrentBehavior(RaftActorBehavior newBehavior){
+ reusableBehaviorStateHolder.init(currentBehavior);
+ currentBehavior = newBehavior;
+ handleBehaviorChange(reusableBehaviorStateHolder, currentBehavior);
}
@Override public void handleCommand(Object message) {
if (message instanceof ApplyState){
ApplyState applyState = (ApplyState) message;
+ long elapsedTime = (System.nanoTime() - applyState.getStartTime());
+ if(elapsedTime >= APPLY_STATE_DELAY_THRESHOLD_IN_NANOS){
+ LOG.warn("ApplyState took more time than expected. Elapsed Time = {} ms ApplyState = {}",
+ TimeUnit.NANOSECONDS.toMillis(elapsedTime), applyState);
+ }
+
if(LOG.isDebugEnabled()) {
- LOG.debug("Applying state for log index {} data {}",
- applyState.getReplicatedLogEntry().getIndex(),
+ LOG.debug("{}: Applying state for log index {} data {}",
+ persistenceId(), applyState.getReplicatedLogEntry().getIndex(),
applyState.getReplicatedLogEntry().getData());
}
applyState(applyState.getClientActor(), applyState.getIdentifier(),
applyState.getReplicatedLogEntry().getData());
- } else if (message instanceof ApplyLogEntries){
- ApplyLogEntries ale = (ApplyLogEntries) message;
+ } else if (message instanceof ApplyJournalEntries){
+ ApplyJournalEntries applyEntries = (ApplyJournalEntries) message;
if(LOG.isDebugEnabled()) {
- LOG.debug("Persisting ApplyLogEntries with index={}", ale.getToIndex());
+ LOG.debug("{}: Persisting ApplyLogEntries with index={}", persistenceId(), applyEntries.getToIndex());
}
- persistence().persist(new ApplyLogEntries(ale.getToIndex()), new Procedure<ApplyLogEntries>() {
- @Override
- public void apply(ApplyLogEntries param) throws Exception {
- }
- });
+
+ persistence().persist(applyEntries, APPLY_JOURNAL_ENTRIES_PERSIST_CALLBACK);
} else if(message instanceof ApplySnapshot ) {
Snapshot snapshot = ((ApplySnapshot) message).getSnapshot();
if(LOG.isDebugEnabled()) {
- LOG.debug("ApplySnapshot called on Follower Actor " +
- "snapshotIndex:{}, snapshotTerm:{}", snapshot.getLastAppliedIndex(),
+ LOG.debug("{}: ApplySnapshot called on Follower Actor " +
+ "snapshotIndex:{}, snapshotTerm:{}", persistenceId(), snapshot.getLastAppliedIndex(),
snapshot.getLastAppliedTerm()
);
}
- applySnapshot(ByteString.copyFrom(snapshot.getState()));
+
+ applySnapshot(snapshot.getState());
//clears the followers log, sets the snapshot index to ensure adjusted-index works
replicatedLog = new ReplicatedLogImpl(snapshot);
} else if (message instanceof SaveSnapshotSuccess) {
SaveSnapshotSuccess success = (SaveSnapshotSuccess) message;
- LOG.info("SaveSnapshotSuccess received for snapshot");
+ LOG.info("{}: SaveSnapshotSuccess received for snapshot", persistenceId());
long sequenceNumber = success.metadata().sequenceNr();
} else if (message instanceof SaveSnapshotFailure) {
SaveSnapshotFailure saveSnapshotFailure = (SaveSnapshotFailure) message;
- LOG.info("saveSnapshotFailure.metadata():{}", saveSnapshotFailure.metadata().toString());
- LOG.error(saveSnapshotFailure.cause(), "SaveSnapshotFailure received for snapshot Cause:");
-
- context.getReplicatedLog().snapshotRollback();
+ LOG.error("{}: SaveSnapshotFailure received for snapshot Cause:",
+ persistenceId(), saveSnapshotFailure.cause());
- LOG.info("Replicated Log rollbacked. Snapshot will be attempted in the next cycle." +
- "snapshotIndex:{}, snapshotTerm:{}, log-size:{}",
- context.getReplicatedLog().getSnapshotIndex(),
- context.getReplicatedLog().getSnapshotTerm(),
- context.getReplicatedLog().size());
+ context.getSnapshotManager().rollback();
} else if (message instanceof CaptureSnapshot) {
- LOG.info("CaptureSnapshot received by actor");
- CaptureSnapshot cs = (CaptureSnapshot)message;
- captureSnapshot = cs;
- createSnapshot();
-
- } else if (message instanceof CaptureSnapshotReply){
- LOG.info("CaptureSnapshotReply received by actor");
- CaptureSnapshotReply csr = (CaptureSnapshotReply) message;
+ LOG.debug("{}: CaptureSnapshot received by actor: {}", persistenceId(), message);
- ByteString stateInBytes = csr.getSnapshot();
- LOG.info("CaptureSnapshotReply stateInBytes size:{}", stateInBytes.size());
- handleCaptureSnapshotReply(stateInBytes);
+ context.getSnapshotManager().create(createSnapshotProcedure);
+ } else if (message instanceof CaptureSnapshotReply) {
+ handleCaptureSnapshotReply(((CaptureSnapshotReply) message).getSnapshot());
+ } else if(message instanceof GetOnDemandRaftState) {
+ onGetOnDemandRaftStats();
+ } else if (message.equals(COMMIT_SNAPSHOT)) {
+ commitSnapshot(-1);
} else {
- if (!(message instanceof AppendEntriesMessages.AppendEntries)
- && !(message instanceof AppendEntriesReply) && !(message instanceof SendHeartBeat)) {
- if(LOG.isDebugEnabled()) {
- LOG.debug("onReceiveCommand: message: {}", message.getClass());
- }
- }
+ reusableBehaviorStateHolder.init(currentBehavior);
- RaftActorBehavior oldBehavior = currentBehavior;
currentBehavior = currentBehavior.handleMessage(getSender(), message);
- handleBehaviorChange(oldBehavior, currentBehavior);
+ handleBehaviorChange(reusableBehaviorStateHolder, currentBehavior);
+ }
+ }
+
+ private void onGetOnDemandRaftStats() {
+ // Debugging message to retrieve raft stats.
+
+ OnDemandRaftState.Builder builder = OnDemandRaftState.builder()
+ .commitIndex(context.getCommitIndex())
+ .currentTerm(context.getTermInformation().getCurrentTerm())
+ .inMemoryJournalDataSize(replicatedLog.dataSize())
+ .inMemoryJournalLogSize(replicatedLog.size())
+ .isSnapshotCaptureInitiated(context.getSnapshotManager().isCapturing())
+ .lastApplied(context.getLastApplied())
+ .lastIndex(replicatedLog.lastIndex())
+ .lastTerm(replicatedLog.lastTerm())
+ .leader(getLeaderId())
+ .raftState(currentBehavior.state().toString())
+ .replicatedToAllIndex(currentBehavior.getReplicatedToAllIndex())
+ .snapshotIndex(replicatedLog.getSnapshotIndex())
+ .snapshotTerm(replicatedLog.getSnapshotTerm())
+ .votedFor(context.getTermInformation().getVotedFor())
+ .peerAddresses(ImmutableMap.copyOf(context.getPeerAddresses()));
+
+ ReplicatedLogEntry lastLogEntry = getLastLogEntry();
+ if (lastLogEntry != null) {
+ builder.lastLogIndex(lastLogEntry.getIndex());
+ builder.lastLogTerm(lastLogEntry.getTerm());
+ }
+
+ if(currentBehavior instanceof AbstractLeader) {
+ AbstractLeader leader = (AbstractLeader)currentBehavior;
+ Collection<String> followerIds = leader.getFollowerIds();
+ List<FollowerInfo> followerInfoList = Lists.newArrayListWithCapacity(followerIds.size());
+ for(String id: followerIds) {
+ final FollowerLogInformation info = leader.getFollower(id);
+ followerInfoList.add(new FollowerInfo(id, info.getNextIndex(), info.getMatchIndex(),
+ info.isFollowerActive(), DurationFormatUtils.formatDurationHMS(info.timeSinceLastActivity())));
+ }
+
+ builder.followerInfoList(followerInfoList);
}
+
+ sender().tell(builder.build(), self());
+
}
- private void handleBehaviorChange(RaftActorBehavior oldBehavior, RaftActorBehavior currentBehavior) {
+ private void handleBehaviorChange(BehaviorStateHolder oldBehaviorState, RaftActorBehavior currentBehavior) {
+ RaftActorBehavior oldBehavior = oldBehaviorState.getBehavior();
+
if (oldBehavior != currentBehavior){
onStateChanged();
}
- if (oldBehavior != null) {
- // it can happen that the state has not changed but the leader has changed.
- onLeaderChanged(oldBehavior.getLeaderId(), currentBehavior.getLeaderId());
- if (getRoleChangeNotifier().isPresent() && oldBehavior.state() != currentBehavior.state()) {
- // we do not want to notify when the behavior/role is set for the first time (i.e follower)
- getRoleChangeNotifier().get().tell(new RoleChanged(getId(), oldBehavior.state().name(),
- currentBehavior.state().name()), getSelf());
+ String oldBehaviorLeaderId = oldBehavior == null ? null : oldBehaviorState.getLeaderId();
+ String oldBehaviorStateName = oldBehavior == null ? null : oldBehavior.state().name();
+
+ // it can happen that the state has not changed but the leader has changed.
+ Optional<ActorRef> roleChangeNotifier = getRoleChangeNotifier();
+ if(!Objects.equal(oldBehaviorLeaderId, currentBehavior.getLeaderId())) {
+ if(roleChangeNotifier.isPresent()) {
+ roleChangeNotifier.get().tell(new LeaderStateChanged(getId(), currentBehavior.getLeaderId()), getSelf());
}
+
+ onLeaderChanged(oldBehaviorLeaderId, currentBehavior.getLeaderId());
+ }
+
+ if (roleChangeNotifier.isPresent() &&
+ (oldBehavior == null || (oldBehavior.state() != currentBehavior.state()))) {
+ roleChangeNotifier.get().tell(new RoleChanged(getId(), oldBehaviorStateName ,
+ currentBehavior.state().name()), getSelf());
}
}
* @param identifier
* @param data
*/
- protected void persistData(ActorRef clientActor, String identifier,
- Payload data) {
+ protected void persistData(final ActorRef clientActor, final String identifier,
+ final Payload data) {
ReplicatedLogEntry replicatedLogEntry = new ReplicatedLogImplEntry(
context.getReplicatedLog().lastIndex() + 1,
context.getTermInformation().getCurrentTerm(), data);
if(LOG.isDebugEnabled()) {
- LOG.debug("Persist data {}", replicatedLogEntry);
+ LOG.debug("{}: Persist data {}", persistenceId(), replicatedLogEntry);
}
+ final RaftActorContext raftContext = getRaftActorContext();
+
replicatedLog
- .appendAndPersist(clientActor, identifier, replicatedLogEntry);
- }
+ .appendAndPersist(replicatedLogEntry, new Procedure<ReplicatedLogEntry>() {
+ @Override
+ public void apply(ReplicatedLogEntry replicatedLogEntry) throws Exception {
+ if(!hasFollowers()){
+ // Increment the Commit Index and the Last Applied values
+ raftContext.setCommitIndex(replicatedLogEntry.getIndex());
+ raftContext.setLastApplied(replicatedLogEntry.getIndex());
+
+ // Apply the state immediately
+ applyState(clientActor, identifier, data);
+
+ // Send a ApplyJournalEntries message so that we write the fact that we applied
+ // the state to durable storage
+ self().tell(new ApplyJournalEntries(replicatedLogEntry.getIndex()), self());
+
+ context.getSnapshotManager().trimLog(context.getLastApplied(), currentBehavior);
+
+ } else if (clientActor != null) {
+ // Send message for replication
+ currentBehavior.handleMessage(getSelf(),
+ new Replicate(clientActor, identifier,
+ replicatedLogEntry)
+ );
+ }
+
+ }
+ }); }
protected String getId() {
return context.getId();
return context;
}
+ protected void updateConfigParams(ConfigParams configParams) {
+ context.setConfigParams(configParams);
+ }
+
/**
* setPeerAddress sets the address of a known peer at a later time.
* <p>
}
protected void commitSnapshot(long sequenceNumber) {
- context.getReplicatedLog().snapshotCommit();
-
- // TODO: Not sure if we want to be this aggressive with trimming stuff
- trimPersistentData(sequenceNumber);
+ context.getSnapshotManager().commit(persistence(), sequenceNumber);
}
/**
/**
* This method is called during recovery to reconstruct the state of the actor.
*
- * @param snapshot A snapshot of the state of the actor
+ * @param snapshotBytes A snapshot of the state of the actor
*/
- protected abstract void applyRecoverySnapshot(ByteString snapshot);
+ protected abstract void applyRecoverySnapshot(byte[] snapshotBytes);
/**
* This method is called during recovery at the end of a batch to apply the current batched
* operations when the derived actor is out of sync with it's peers
* and the only way to bring it in sync is by applying a snapshot
*
- * @param snapshot A snapshot of the state of the actor
+ * @param snapshotBytes A snapshot of the state of the actor
*/
- protected abstract void applySnapshot(ByteString snapshot);
+ protected abstract void applySnapshot(byte[] snapshotBytes);
/**
* This method will be called by the RaftActor when the state of the
protected void onLeaderChanged(String oldLeader, String newLeader){};
- private void trimPersistentData(long sequenceNumber) {
- // Trim akka snapshots
- // FIXME : Not sure how exactly the SnapshotSelectionCriteria is applied
- // For now guessing that it is ANDed.
- persistence().deleteSnapshots(new SnapshotSelectionCriteria(
- sequenceNumber - context.getConfigParams().getSnapshotBatchCount(), 43200000));
-
- // Trim akka journal
- persistence().deleteMessages(sequenceNumber);
- }
-
private String getLeaderAddress(){
if(isLeader()){
return getSelf().path().toString();
}
String peerAddress = context.getPeerAddress(leaderId);
if(LOG.isDebugEnabled()) {
- LOG.debug("getLeaderAddress leaderId = {} peerAddress = {}",
- leaderId, peerAddress);
+ LOG.debug("{}: getLeaderAddress leaderId = {} peerAddress = {}",
+ persistenceId(), leaderId, peerAddress);
}
return peerAddress;
}
- private void handleCaptureSnapshotReply(ByteString stateInBytes) {
- // create a snapshot object from the state provided and save it
- // when snapshot is saved async, SaveSnapshotSuccess is raised.
-
- Snapshot sn = Snapshot.create(stateInBytes.toByteArray(),
- context.getReplicatedLog().getFrom(captureSnapshot.getLastAppliedIndex() + 1),
- captureSnapshot.getLastIndex(), captureSnapshot.getLastTerm(),
- captureSnapshot.getLastAppliedIndex(), captureSnapshot.getLastAppliedTerm());
-
- persistence().saveSnapshot(sn);
-
- LOG.info("Persisting of snapshot done:{}", sn.getLogMessage());
-
- //be greedy and remove entries from in-mem journal which are in the snapshot
- // and update snapshotIndex and snapshotTerm without waiting for the success,
-
- context.getReplicatedLog().snapshotPreCommit(
- captureSnapshot.getLastAppliedIndex(),
- captureSnapshot.getLastAppliedTerm());
+ private void handleCaptureSnapshotReply(byte[] snapshotBytes) {
+ LOG.debug("{}: CaptureSnapshotReply received by actor: snapshot size {}", persistenceId(), snapshotBytes.length);
- LOG.info("Removed in-memory snapshotted entries, adjusted snaphsotIndex:{} " +
- "and term:{}", captureSnapshot.getLastAppliedIndex(),
- captureSnapshot.getLastAppliedTerm());
+ context.getSnapshotManager().persist(persistence(), snapshotBytes, currentBehavior, getTotalMemory());
+ }
- if (isLeader() && captureSnapshot.isInstallSnapshotInitiated()) {
- // this would be call straight to the leader and won't initiate in serialization
- currentBehavior.handleMessage(getSelf(), new SendInstallSnapshot(stateInBytes));
- }
+ protected long getTotalMemory() {
+ return Runtime.getRuntime().totalMemory();
+ }
- captureSnapshot = null;
- hasSnapshotCaptureInitiated = false;
+ protected boolean hasFollowers(){
+ return getRaftActorContext().getPeerAddresses().keySet().size() > 0;
}
private class ReplicatedLogImpl extends AbstractReplicatedLogImpl {
+ private static final int DATA_SIZE_DIVIDER = 5;
+ private long dataSizeSinceLastSnapshot = 0L;
+
public ReplicatedLogImpl(Snapshot snapshot) {
super(snapshot.getLastAppliedIndex(), snapshot.getLastAppliedTerm(),
// FIXME: Maybe this should be done after the command is saved
journal.subList(adjustedIndex , journal.size()).clear();
- persistence().persist(new DeleteEntries(adjustedIndex), new Procedure<DeleteEntries>(){
+ persistence().persist(new DeleteEntries(adjustedIndex), new Procedure<DeleteEntries>() {
- @Override public void apply(DeleteEntries param)
- throws Exception {
+ @Override
+ public void apply(DeleteEntries param)
+ throws Exception {
//FIXME : Doing nothing for now
+ dataSize = 0;
+ for (ReplicatedLogEntry entry : journal) {
+ dataSize += entry.size();
+ }
}
});
}
@Override public void appendAndPersist(
final ReplicatedLogEntry replicatedLogEntry) {
- appendAndPersist(null, null, replicatedLogEntry);
+ appendAndPersist(replicatedLogEntry, null);
}
- public void appendAndPersist(final ActorRef clientActor,
- final String identifier,
- final ReplicatedLogEntry replicatedLogEntry) {
+ public void appendAndPersist(
+ final ReplicatedLogEntry replicatedLogEntry,
+ final Procedure<ReplicatedLogEntry> callback) {
if(LOG.isDebugEnabled()) {
- LOG.debug("Append log entry and persist {} ", replicatedLogEntry);
+ LOG.debug("{}: Append log entry and persist {} ", persistenceId(), replicatedLogEntry);
}
// FIXME : By adding the replicated log entry to the in-memory journal we are not truly ensuring durability of the logs
new Procedure<ReplicatedLogEntry>() {
@Override
public void apply(ReplicatedLogEntry evt) throws Exception {
- // when a snaphsot is being taken, captureSnapshot != null
- if (hasSnapshotCaptureInitiated == false &&
- journal.size() % context.getConfigParams().getSnapshotBatchCount() == 0) {
-
- LOG.info("Initiating Snapshot Capture..");
- long lastAppliedIndex = -1;
- long lastAppliedTerm = -1;
-
- ReplicatedLogEntry lastAppliedEntry = get(context.getLastApplied());
- if (lastAppliedEntry != null) {
- lastAppliedIndex = lastAppliedEntry.getIndex();
- lastAppliedTerm = lastAppliedEntry.getTerm();
- }
+ int logEntrySize = replicatedLogEntry.size();
+
+ dataSize += logEntrySize;
+ long dataSizeForCheck = dataSize;
+
+ dataSizeSinceLastSnapshot += logEntrySize;
+
+ if (!hasFollowers()) {
+ // When we do not have followers we do not maintain an in-memory log
+ // due to this the journalSize will never become anything close to the
+ // snapshot batch count. In fact will mostly be 1.
+ // Similarly since the journal's dataSize depends on the entries in the
+ // journal the journal's dataSize will never reach a value close to the
+ // memory threshold.
+ // By maintaining the dataSize outside the journal we are tracking essentially
+ // what we have written to the disk however since we no longer are in
+ // need of doing a snapshot just for the sake of freeing up memory we adjust
+ // the real size of data by the DATA_SIZE_DIVIDER so that we do not snapshot as often
+ // as if we were maintaining a real snapshot
+ dataSizeForCheck = dataSizeSinceLastSnapshot / DATA_SIZE_DIVIDER;
+ }
+ long journalSize = replicatedLogEntry.getIndex() + 1;
+ long dataThreshold = getTotalMemory() *
+ context.getConfigParams().getSnapshotDataThresholdPercentage() / 100;
+
+ if ((journalSize % context.getConfigParams().getSnapshotBatchCount() == 0
+ || dataSizeForCheck > dataThreshold)) {
+
+ boolean started = context.getSnapshotManager().capture(replicatedLogEntry,
+ currentBehavior.getReplicatedToAllIndex());
- if(LOG.isDebugEnabled()) {
- LOG.debug("Snapshot Capture logSize: {}", journal.size());
- LOG.debug("Snapshot Capture lastApplied:{} ",
- context.getLastApplied());
- LOG.debug("Snapshot Capture lastAppliedIndex:{}", lastAppliedIndex);
- LOG.debug("Snapshot Capture lastAppliedTerm:{}", lastAppliedTerm);
+ if(started){
+ dataSizeSinceLastSnapshot = 0;
}
- // send a CaptureSnapshot to self to make the expensive operation async.
- getSelf().tell(new CaptureSnapshot(
- lastIndex(), lastTerm(), lastAppliedIndex, lastAppliedTerm),
- null);
- hasSnapshotCaptureInitiated = true;
}
- // Send message for replication
- if (clientActor != null) {
- currentBehavior.handleMessage(getSelf(),
- new Replicate(clientActor, identifier,
- replicatedLogEntry)
- );
+
+ if (callback != null){
+ callback.apply(replicatedLogEntry);
}
}
}
@Override public void update(long currentTerm, String votedFor) {
if(LOG.isDebugEnabled()) {
- LOG.debug("Set currentTerm={}, votedFor={}", currentTerm, votedFor);
+ LOG.debug("{}: Set currentTerm={}, votedFor={}", persistenceId(), currentTerm, votedFor);
}
this.currentTerm = currentTerm;
this.votedFor = votedFor;
@Override
public void saveSnapshot(Object o) {
// Make saving Snapshot successful
- commitSnapshot(-1L);
+ // Committing the snapshot here would end up calling commit in the creating state which would
+ // be a state violation. That's why now we send a message to commit the snapshot.
+ self().tell(COMMIT_SNAPSHOT, self());
+ }
+ }
+
+
+ private class CreateSnapshotProcedure implements Procedure<Void> {
+
+ @Override
+ public void apply(Void aVoid) throws Exception {
+ createSnapshot();
}
}
return currentBehavior;
}
+ private static class BehaviorStateHolder {
+ private RaftActorBehavior behavior;
+ private String leaderId;
+
+ void init(RaftActorBehavior behavior) {
+ this.behavior = behavior;
+ this.leaderId = behavior != null ? behavior.getLeaderId() : null;
+ }
+
+ RaftActorBehavior getBehavior() {
+ return behavior;
+ }
+
+ String getLeaderId() {
+ return leaderId;
+ }
+ }
}