X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-akka-raft%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fraft%2FRaftActorRecoverySupport.java;h=10375f9406666234bb3f6f69d14cb2ea003ef7f8;hb=d0f46920468c8e4b67c68bd9058572b2d10d75f1;hp=f071d945662a73d22803fc9ea277a88d88ef29c1;hpb=634dfac8eead60f443bf75e749c70d1f2bb29198;p=controller.git diff --git a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorRecoverySupport.java b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorRecoverySupport.java index f071d94566..10375f9406 100644 --- a/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorRecoverySupport.java +++ b/opendaylight/md-sal/sal-akka-raft/src/main/java/org/opendaylight/controller/cluster/raft/RaftActorRecoverySupport.java @@ -11,6 +11,7 @@ import akka.persistence.RecoveryCompleted; import akka.persistence.SnapshotOffer; import com.google.common.base.Stopwatch; import java.util.Collections; +import java.util.concurrent.TimeUnit; import org.opendaylight.controller.cluster.PersistentDataProvider; import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot; import org.opendaylight.controller.cluster.raft.persisted.ApplyJournalEntries; @@ -19,6 +20,7 @@ import org.opendaylight.controller.cluster.raft.persisted.EmptyState; import org.opendaylight.controller.cluster.raft.persisted.MigratedSerializable; import org.opendaylight.controller.cluster.raft.persisted.ServerConfigurationPayload; import org.opendaylight.controller.cluster.raft.persisted.Snapshot; +import org.opendaylight.controller.cluster.raft.persisted.Snapshot.State; import org.opendaylight.controller.cluster.raft.persisted.UpdateElectionTerm; import org.opendaylight.controller.cluster.raft.protobuff.client.messages.PersistentPayload; import org.slf4j.Logger; @@ -38,6 +40,7 @@ class RaftActorRecoverySupport { private boolean hasMigratedDataRecovered; private Stopwatch recoveryTimer; + private Stopwatch recoverySnapshotTimer; private final Logger log; RaftActorRecoverySupport(final RaftActorContext context, final RaftActorRecoveryCohort cohort) { @@ -99,16 +102,19 @@ class RaftActorRecoverySupport { return context.getReplicatedLog(); } - private void initRecoveryTimer() { + private void initRecoveryTimers() { if (recoveryTimer == null) { recoveryTimer = Stopwatch.createStarted(); } + if (recoverySnapshotTimer == null && context.getConfigParams().getRecoverySnapshotIntervalSeconds() > 0) { + recoverySnapshotTimer = Stopwatch.createStarted(); + } } private void onRecoveredSnapshot(final SnapshotOffer offer) { log.debug("{}: SnapshotOffer called.", context.getId()); - initRecoveryTimer(); + initRecoveryTimers(); Snapshot snapshot = (Snapshot) offer.snapshot(); @@ -136,11 +142,15 @@ class RaftActorRecoverySupport { context.setCommitIndex(snapshot.getLastAppliedIndex()); context.getTermInformation().update(snapshot.getElectionTerm(), snapshot.getElectionVotedFor()); - Stopwatch timer = Stopwatch.createStarted(); + final Stopwatch timer = Stopwatch.createStarted(); // Apply the snapshot to the actors state - if (!(snapshot.getState() instanceof EmptyState)) { - cohort.applyRecoverySnapshot(snapshot.getState()); + final State snapshotState = snapshot.getState(); + if (snapshotState.needsMigration()) { + hasMigratedDataRecovered = true; + } + if (!(snapshotState instanceof EmptyState)) { + cohort.applyRecoverySnapshot(snapshotState); } if (snapshot.getServerConfiguration() != null) { @@ -149,8 +159,8 @@ class RaftActorRecoverySupport { timer.stop(); log.info("Recovery snapshot applied for {} in {}: snapshotIndex={}, snapshotTerm={}, journal-size={}", - context.getId(), timer.toString(), replicatedLog().getSnapshotIndex(), - replicatedLog().getSnapshotTerm(), replicatedLog().size()); + context.getId(), timer, replicatedLog().getSnapshotIndex(), replicatedLog().getSnapshotTerm(), + replicatedLog().size()); } private void onRecoveredJournalLogEntry(final ReplicatedLogEntry logEntry) { @@ -195,6 +205,14 @@ class RaftActorRecoverySupport { if (logEntry != null) { lastApplied++; batchRecoveredLogEntry(logEntry); + if (shouldTakeRecoverySnapshot() && !context.getSnapshotManager().isCapturing()) { + if (currentRecoveryBatchCount > 0) { + endCurrentLogRecoveryBatch(); + } + context.setLastApplied(lastApplied); + context.setCommitIndex(lastApplied); + takeRecoverySnapshot(logEntry); + } } else { // Shouldn't happen but cover it anyway. log.error("{}: Log entry not found for index {}", context.getId(), i); @@ -215,7 +233,7 @@ class RaftActorRecoverySupport { } private void batchRecoveredLogEntry(final ReplicatedLogEntry logEntry) { - initRecoveryTimer(); + initRecoveryTimers(); int batchSize = context.getConfigParams().getJournalRecoveryLogBatchSize(); if (!isServerConfigurationPayload(logEntry)) { @@ -231,6 +249,23 @@ class RaftActorRecoverySupport { } } + private void takeRecoverySnapshot(final ReplicatedLogEntry logEntry) { + log.info("Time for recovery snapshot on entry with index {}", logEntry.getIndex()); + final SnapshotManager snapshotManager = context.getSnapshotManager(); + if (snapshotManager.capture(logEntry, -1)) { + log.info("Capturing snapshot, resetting timer for the next recovery snapshot interval."); + this.recoverySnapshotTimer.reset().start(); + } else { + log.info("SnapshotManager is not able to capture snapshot at this time. It will be retried " + + "again with the next recovered entry."); + } + } + + private boolean shouldTakeRecoverySnapshot() { + return this.recoverySnapshotTimer != null && this.recoverySnapshotTimer.elapsed(TimeUnit.SECONDS) + >= context.getConfigParams().getRecoverySnapshotIntervalSeconds(); + } + private void endCurrentLogRecoveryBatch() { cohort.applyCurrentLogRecoveryBatch(); currentRecoveryBatchCount = 0; @@ -241,17 +276,23 @@ class RaftActorRecoverySupport { endCurrentLogRecoveryBatch(); } - String recoveryTime = ""; + final String recoveryTime; if (recoveryTimer != null) { - recoveryTimer.stop(); - recoveryTime = " in " + recoveryTimer.toString(); + recoveryTime = " in " + recoveryTimer.stop(); recoveryTimer = null; + } else { + recoveryTime = ""; } - log.info("Recovery completed {} - Switching actor to Follower - Persistence Id = {}" - + " Last index in log = {}, snapshotIndex = {}, snapshotTerm = {}, journal-size = {}", - recoveryTime, context.getId(), replicatedLog().lastIndex(), replicatedLog().getSnapshotIndex(), - replicatedLog().getSnapshotTerm(), replicatedLog().size()); + if (recoverySnapshotTimer != null) { + recoverySnapshotTimer.stop(); + recoverySnapshotTimer = null; + } + + log.info("{}: Recovery completed {} - Switching actor to Follower - last log index = {}, last log term = {}, " + + "snapshot index = {}, snapshot term = {}, journal size = {}", context.getId(), recoveryTime, + replicatedLog().lastIndex(), replicatedLog().lastTerm(), replicatedLog().getSnapshotIndex(), + replicatedLog().getSnapshotTerm(), replicatedLog().size()); if (dataRecoveredWithPersistenceDisabled || hasMigratedDataRecovered && !context.getPersistenceProvider().isRecoveryApplicable()) {