import akka.japi.Procedure;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
import javax.annotation.Nullable;
import org.opendaylight.controller.cluster.raft.RaftActorContext;
import org.opendaylight.controller.cluster.raft.RaftState;
import org.opendaylight.controller.cluster.raft.ReplicatedLogEntry;
-import org.opendaylight.controller.cluster.raft.Snapshot;
import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot;
import org.opendaylight.controller.cluster.raft.base.messages.ElectionTimeout;
import org.opendaylight.controller.cluster.raft.base.messages.TimeoutNow;
import org.opendaylight.controller.cluster.raft.messages.RequestVote;
import org.opendaylight.controller.cluster.raft.messages.RequestVoteReply;
import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload;
+import org.opendaylight.controller.cluster.raft.persisted.Snapshot;
/**
* The behavior of a RaftActor in the Follower raft state.
private final SyncStatusTracker initialSyncStatusTracker;
- private final Procedure<ReplicatedLogEntry> appendAndPersistCallback =
- logEntry -> context.getReplicatedLog().captureSnapshotIfReady(logEntry);
-
private final Stopwatch lastLeaderMessageTimer = Stopwatch.createStarted();
private SnapshotTracker snapshotTracker = null;
private String leaderId;
if (snapshotTracker != null && !snapshotTracker.getLeaderId().equals(appendEntries.getLeaderId())) {
log.debug("{}: snapshot install is in progress but the prior snapshot leaderId {} does not match the "
+ "AppendEntries leaderId {}", logName(), snapshotTracker.getLeaderId(), appendEntries.getLeaderId());
- snapshotTracker = null;
+ closeSnapshotTracker();
}
if (snapshotTracker != null || context.getSnapshotManager().isApplying()) {
// We found that the log was out of sync so just send a negative
// reply and return
- log.debug("{}: Follower is out-of-sync, so sending negative reply, lastIndex: {}, lastTerm: {}",
- logName(), lastIndex, lastTerm());
+ final AppendEntriesReply reply = new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
+ lastTerm(), context.getPayloadVersion());
- sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
- lastTerm(), context.getPayloadVersion()), actor());
+ log.info("{}: Follower is out-of-sync so sending negative reply: {}", logName(), reply);
+
+ sender.tell(reply, actor());
return this;
}
if (!context.getRaftPolicy().applyModificationToStateBeforeConsensus()) {
- log.debug("{}: Removing entries from log starting at {}", logName(),
- matchEntry.getIndex());
+ log.info("{}: Removing entries from log starting at {}", logName(), matchEntry.getIndex());
// Entries do not match so remove all subsequent entries
if (!context.getReplicatedLog().removeFromAndPersist(matchEntry.getIndex())) {
// so we must send back a reply to force a snapshot to completely re-sync the
// follower's log and state.
- log.debug("{}: Could not remove entries - sending reply to force snapshot", logName());
+ log.info("{}: Could not remove entries - sending reply to force snapshot", logName());
sender.tell(new AppendEntriesReply(context.getId(), currentTerm(), false, lastIndex,
lastTerm(), context.getPayloadVersion(), true), actor());
return this;
log.debug("{}: After cleanup, lastIndex: {}, entries to be added from: {}", logName(),
lastIndex, addEntriesFrom);
+ // When persistence successfully completes for each new log entry appended, we need to determine if we
+ // should capture a snapshot to compact the persisted log. shouldCaptureSnapshot tracks whether or not
+ // one of the log entries has exceeded the log size threshold whereby a snapshot should be taken. However
+ // we don't initiate the snapshot at that log entry but rather after the last log entry has been persisted.
+ // This is done because subsequent log entries after the one that tripped the threshold may have been
+ // applied to the state already, as the persistence callback occurs async, and we want those entries
+ // purged from the persisted log as well.
+ final AtomicBoolean shouldCaptureSnapshot = new AtomicBoolean(false);
+ final Procedure<ReplicatedLogEntry> appendAndPersistCallback = logEntry -> {
+ final ReplicatedLogEntry lastEntryToAppend = appendEntries.getEntries().get(
+ appendEntries.getEntries().size() - 1);
+ if (shouldCaptureSnapshot.get() && logEntry == lastEntryToAppend) {
+ context.getSnapshotManager().capture(context.getReplicatedLog().last(), getReplicatedToAllIndex());
+ }
+ };
+
// 4. Append any new entries not already in the log
for (int i = addEntriesFrom; i < appendEntries.getEntries().size(); i++) {
ReplicatedLogEntry entry = appendEntries.getEntries().get(i);
context.getReplicatedLog().appendAndPersist(entry, appendAndPersistCallback, false);
+ shouldCaptureSnapshot.compareAndSet(false,
+ context.getReplicatedLog().shouldCaptureSnapshot(entry.getIndex()));
+
if (entry.getData() instanceof ServerConfigurationPayload) {
context.updatePeerIds((ServerConfigurationPayload)entry.getData());
}
// an entry at prevLogIndex and this follower has no entries in
// it's log.
- log.debug("{}: The followers log is empty and the senders prevLogIndex is {}",
+ log.info("{}: The followers log is empty and the senders prevLogIndex is {}",
logName(), appendEntries.getPrevLogIndex());
} else if (lastIndex > -1 && appendEntries.getPrevLogIndex() != -1 && !prevEntryPresent) {
// The follower's log is out of sync because the Leader's
// prevLogIndex entry was not found in it's log
- log.debug("{}: The log is not empty but the prevLogIndex {} was not found in it - "
+ log.info("{}: The log is not empty but the prevLogIndex {} was not found in it - "
+ "lastIndex: {}, snapshotIndex: {}", logName(), appendEntries.getPrevLogIndex(), lastIndex,
context.getReplicatedLog().getSnapshotIndex());
} else if (lastIndex > -1 && prevEntryPresent && prevLogTerm != appendEntries.getPrevLogTerm()) {
// prevLogIndex entry does exist in the follower's log but it has
// a different term in it
- log.debug("{}: The prevLogIndex {} was found in the log but the term {} is not equal to the append entries"
+ log.info("{}: The prevLogIndex {} was found in the log but the term {} is not equal to the append entries"
+ "prevLogTerm {} - lastIndex: {}, snapshotIndex: {}", logName(), appendEntries.getPrevLogIndex(),
prevLogTerm, appendEntries.getPrevLogTerm(), lastIndex,
context.getReplicatedLog().getSnapshotIndex());
// This append entry comes from a leader who has it's log aggressively trimmed and so does not have
// the previous entry in it's in-memory journal
- log.debug("{}: Cannot append entries because the replicatedToAllIndex {} does not appear to be in the"
+ log.info("{}: Cannot append entries because the replicatedToAllIndex {} does not appear to be in the"
+ " in-memory journal", logName(), appendEntries.getReplicatedToAllIndex());
} else if (appendEntries.getPrevLogIndex() == -1 && appendEntries.getPrevLogTerm() == -1
&& appendEntries.getReplicatedToAllIndex() != -1 && numLogEntries > 0
&& !isLogEntryPresent(appendEntries.getEntries().get(0).getIndex() - 1)) {
- log.debug("{}: Cannot append entries because the calculated previousIndex {} was not found in the "
+ log.info("{}: Cannot append entries because the calculated previousIndex {} was not found in the "
+ " in-memory journal", logName(), appendEntries.getEntries().get(0).getIndex() - 1);
} else {
outOfSync = false;
// set currentTerm = T, convert to follower (ยง5.1)
// This applies to all RPC messages and responses
if (rpc.getTerm() > context.getTermInformation().getCurrentTerm()) {
- log.debug("{}: Term {} in \"{}\" message is greater than follower's term {} - updating term",
+ log.info("{}: Term {} in \"{}\" message is greater than follower's term {} - updating term",
logName(), rpc.getTerm(), rpc, context.getTermInformation().getCurrentTerm());
context.getTermInformation().updateAndPersist(rpc.getTerm(), null);
leaderId = installSnapshot.getLeaderId();
if (snapshotTracker == null) {
- snapshotTracker = new SnapshotTracker(log, installSnapshot.getTotalChunks(), installSnapshot.getLeaderId());
+ snapshotTracker = new SnapshotTracker(log, installSnapshot.getTotalChunks(), installSnapshot.getLeaderId(),
+ context);
}
updateInitialSyncStatus(installSnapshot.getLastIncludedIndex(), installSnapshot.getLeaderId());
if (snapshotTracker.addChunk(installSnapshot.getChunkIndex(), installSnapshot.getData(),
installSnapshot.getLastChunkHashCode())) {
- Snapshot snapshot = Snapshot.create(snapshotTracker.getSnapshot(),
+
+ log.info("{}: Snapshot installed from leader: {}", logName(), installSnapshot.getLeaderId());
+
+ Snapshot snapshot = Snapshot.create(
+ context.getSnapshotManager().convertSnapshot(snapshotTracker.getSnapshotBytes()),
new ArrayList<>(),
installSnapshot.getLastIncludedIndex(),
installSnapshot.getLastIncludedTerm(),
actor().tell(new ApplySnapshot(snapshot, applySnapshotCallback), actor());
- snapshotTracker = null;
+ closeSnapshotTracker();
} else {
log.debug("{}: handleInstallSnapshot returning: {}", logName(), reply);
sender.tell(reply, actor());
}
- } catch (SnapshotTracker.InvalidChunkException e) {
+ } catch (IOException e) {
log.debug("{}: Exception in InstallSnapshot of follower", logName(), e);
sender.tell(new InstallSnapshotReply(currentTerm(), context.getId(),
-1, false), actor());
- snapshotTracker = null;
+ closeSnapshotTracker();
+ }
+ }
+
+ private void closeSnapshotTracker() {
+ if (snapshotTracker != null) {
+ snapshotTracker.close();
+ snapshotTracker = null;
}
}
@Override
public void close() {
+ closeSnapshotTracker();
stopElection();
}