/* * Copyright (c) 2014 Cisco Systems, Inc. and others. All rights reserved. * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License v1.0 which accompanies this distribution, * and is available at http://www.eclipse.org/legal/epl-v10.html */ package org.opendaylight.controller.cluster.raft; import akka.actor.ActorRef; import akka.actor.ActorSelection; import akka.event.Logging; import akka.event.LoggingAdapter; import akka.japi.Procedure; import akka.persistence.RecoveryCompleted; import akka.persistence.SaveSnapshotFailure; import akka.persistence.SaveSnapshotSuccess; import akka.persistence.SnapshotOffer; import akka.persistence.SnapshotSelectionCriteria; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Optional; import com.google.common.base.Stopwatch; import com.google.protobuf.ByteString; import org.opendaylight.controller.cluster.DataPersistenceProvider; import org.opendaylight.controller.cluster.common.actor.AbstractUntypedPersistentActor; import org.opendaylight.controller.cluster.raft.base.messages.ApplyLogEntries; import org.opendaylight.controller.cluster.raft.base.messages.ApplySnapshot; import org.opendaylight.controller.cluster.raft.base.messages.ApplyState; import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshot; import org.opendaylight.controller.cluster.raft.base.messages.CaptureSnapshotReply; import org.opendaylight.controller.cluster.raft.base.messages.Replicate; import org.opendaylight.controller.cluster.raft.base.messages.SendHeartBeat; import org.opendaylight.controller.cluster.raft.base.messages.SendInstallSnapshot; import org.opendaylight.controller.cluster.raft.behaviors.AbstractRaftActorBehavior; import org.opendaylight.controller.cluster.raft.behaviors.Follower; import org.opendaylight.controller.cluster.raft.behaviors.RaftActorBehavior; import org.opendaylight.controller.cluster.raft.client.messages.FindLeader; import org.opendaylight.controller.cluster.raft.client.messages.FindLeaderReply; import org.opendaylight.controller.cluster.raft.messages.AppendEntriesReply; import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload; import org.opendaylight.controller.protobuff.messages.cluster.raft.AppendEntriesMessages; import java.io.Serializable; import java.util.Map; /** * RaftActor encapsulates a state machine that needs to be kept synchronized * in a cluster. It implements the RAFT algorithm as described in the paper * * In Search of an Understandable Consensus Algorithm *
* RaftActor has 3 states and each state has a certain behavior associated * with it. A Raft actor can behave as, ** This is to account for situations where a we know that a peer * exists but we do not know an address up-front. This may also be used in * situations where a known peer starts off in a different location and we * need to change it's address *
* Note that if the peerId does not match the list of peers passed to * this actor during construction an IllegalStateException will be thrown. * * @param peerId * @param peerAddress */ protected void setPeerAddress(String peerId, String peerAddress){ context.setPeerAddress(peerId, peerAddress); } protected void commitSnapshot(long sequenceNumber) { context.getReplicatedLog().snapshotCommit(); // TODO: Not sure if we want to be this aggressive with trimming stuff trimPersistentData(sequenceNumber); } /** * The applyState method will be called by the RaftActor when some data * needs to be applied to the actor's state * * @param clientActor A reference to the client who sent this message. This * is the same reference that was passed to persistData * by the derived actor. clientActor may be null when * the RaftActor is behaving as a follower or during * recovery. * @param identifier The identifier of the persisted data. This is also * the same identifier that was passed to persistData by * the derived actor. identifier may be null when * the RaftActor is behaving as a follower or during * recovery * @param data A piece of data that was persisted by the persistData call. * This should NEVER be null. */ protected abstract void applyState(ActorRef clientActor, String identifier, Object data); /** * This method is called during recovery at the start of a batch of state entries. Derived * classes should perform any initialization needed to start a batch. */ protected abstract void startLogRecoveryBatch(int maxBatchSize); /** * This method is called during recovery to append state data to the current batch. This method * is called 1 or more times after {@link #startLogRecoveryBatch}. * * @param data the state data */ protected abstract void appendRecoveredLogEntry(Payload data); /** * This method is called during recovery to reconstruct the state of the actor. * * @param snapshot A snapshot of the state of the actor */ protected abstract void applyRecoverySnapshot(ByteString snapshot); /** * This method is called during recovery at the end of a batch to apply the current batched * log entries. This method is called after {@link #appendRecoveredLogEntry}. */ protected abstract void applyCurrentLogRecoveryBatch(); /** * This method is called when recovery is complete. */ protected abstract void onRecoveryComplete(); /** * This method will be called by the RaftActor when a snapshot needs to be * created. The derived actor should respond with its current state. *
* During recovery the state that is returned by the derived actor will * be passed back to it by calling the applySnapshot method * * @return The current state of the actor */ protected abstract void createSnapshot(); /** * This method can be called at any other point during normal * operations when the derived actor is out of sync with it's peers * and the only way to bring it in sync is by applying a snapshot * * @param snapshot A snapshot of the state of the actor */ protected abstract void applySnapshot(ByteString snapshot); /** * This method will be called by the RaftActor when the state of the * RaftActor changes. The derived actor can then use methods like * isLeader or getLeader to do something useful */ protected abstract void onStateChanged(); protected abstract DataPersistenceProvider persistence(); protected void onLeaderChanged(String oldLeader, String newLeader){}; private void trimPersistentData(long sequenceNumber) { // Trim akka snapshots // FIXME : Not sure how exactly the SnapshotSelectionCriteria is applied // For now guessing that it is ANDed. persistence().deleteSnapshots(new SnapshotSelectionCriteria( sequenceNumber - context.getConfigParams().getSnapshotBatchCount(), 43200000)); // Trim akka journal persistence().deleteMessages(sequenceNumber); } private String getLeaderAddress(){ if(isLeader()){ return getSelf().path().toString(); } String leaderId = currentBehavior.getLeaderId(); if (leaderId == null) { return null; } String peerAddress = context.getPeerAddress(leaderId); if(LOG.isDebugEnabled()) { LOG.debug("getLeaderAddress leaderId = {} peerAddress = {}", leaderId, peerAddress); } return peerAddress; } private void handleCaptureSnapshotReply(ByteString stateInBytes) { // create a snapshot object from the state provided and save it // when snapshot is saved async, SaveSnapshotSuccess is raised. Snapshot sn = Snapshot.create(stateInBytes.toByteArray(), context.getReplicatedLog().getFrom(captureSnapshot.getLastAppliedIndex() + 1), captureSnapshot.getLastIndex(), captureSnapshot.getLastTerm(), captureSnapshot.getLastAppliedIndex(), captureSnapshot.getLastAppliedTerm()); persistence().saveSnapshot(sn); LOG.info("Persisting of snapshot done:{}", sn.getLogMessage()); //be greedy and remove entries from in-mem journal which are in the snapshot // and update snapshotIndex and snapshotTerm without waiting for the success, context.getReplicatedLog().snapshotPreCommit( captureSnapshot.getLastAppliedIndex(), captureSnapshot.getLastAppliedTerm()); LOG.info("Removed in-memory snapshotted entries, adjusted snaphsotIndex:{} " + "and term:{}", captureSnapshot.getLastAppliedIndex(), captureSnapshot.getLastAppliedTerm()); if (isLeader() && captureSnapshot.isInstallSnapshotInitiated()) { // this would be call straight to the leader and won't initiate in serialization currentBehavior.handleMessage(getSelf(), new SendInstallSnapshot(stateInBytes)); } captureSnapshot = null; hasSnapshotCaptureInitiated = false; } private class ReplicatedLogImpl extends AbstractReplicatedLogImpl { public ReplicatedLogImpl(Snapshot snapshot) { super(snapshot.getLastAppliedIndex(), snapshot.getLastAppliedTerm(), snapshot.getUnAppliedEntries()); } public ReplicatedLogImpl() { super(); } @Override public void removeFromAndPersist(long logEntryIndex) { int adjustedIndex = adjustedIndex(logEntryIndex); if (adjustedIndex < 0) { return; } // FIXME: Maybe this should be done after the command is saved journal.subList(adjustedIndex , journal.size()).clear(); persistence().persist(new DeleteEntries(adjustedIndex), new Procedure