2 * Copyright (c) 2019 Pantheon Technologies, s.r.o. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.controller.akka.segjournal;
10 import static com.google.common.base.Verify.verifyNotNull;
11 import static java.util.Objects.requireNonNull;
13 import akka.actor.AbstractActor;
14 import akka.actor.Props;
15 import akka.persistence.AtomicWrite;
16 import akka.persistence.PersistentRepr;
17 import com.codahale.metrics.Histogram;
18 import com.codahale.metrics.Meter;
19 import com.codahale.metrics.MetricRegistry;
20 import com.codahale.metrics.Timer;
21 import com.esotericsoftware.kryo.serializers.DefaultSerializers.LongSerializer;
22 import com.google.common.base.MoreObjects;
23 import com.google.common.base.Stopwatch;
24 import io.atomix.storage.journal.Indexed;
25 import io.atomix.storage.journal.SegmentedJournal;
26 import io.atomix.storage.journal.SegmentedJournalWriter;
27 import io.atomix.storage.journal.StorageLevel;
28 import io.atomix.utils.serializer.Namespace;
30 import java.util.ArrayList;
31 import java.util.List;
32 import java.util.Optional;
33 import java.util.concurrent.TimeUnit;
34 import java.util.function.Consumer;
35 import org.opendaylight.controller.cluster.common.actor.MeteringBehavior;
36 import org.opendaylight.controller.cluster.reporting.MetricsReporter;
37 import org.slf4j.Logger;
38 import org.slf4j.LoggerFactory;
39 import scala.concurrent.Future;
40 import scala.concurrent.Promise;
43 * This actor handles a single PersistentActor's journal. The journal is split into two {@link SegmentedJournal}s:
45 * <li>A memory-mapped data journal, containing actual data entries</li>
46 * <li>A simple file journal, containing sequence numbers of last deleted entry</li>
50 * This is a conscious design decision to minimize the amount of data that is being stored in the data journal while
51 * speeding up normal operations. Since the SegmentedJournal is an append-only linear log and Akka requires the ability
52 * to delete persistence entries, we need ability to mark a subset of a SegmentedJournal as deleted. While we could
53 * treat such delete requests as normal events, this leads to a mismatch between SegmentedJournal indices (as exposed by
54 * {@link Indexed}) and Akka sequence numbers -- requiring us to potentially perform costly deserialization to find the
55 * index corresponding to a particular sequence number, or maintain moderately-complex logic and data structures to
56 * perform that mapping in sub-linear time complexity.
59 * Split-file approach allows us to treat sequence numbers and indices as equivalent, without maintaining any explicit
60 * mapping information. The only additional information we need to maintain is the last deleted sequence number.
62 * @author Robert Varga
64 final class SegmentedJournalActor extends AbstractActor {
65 abstract static class AsyncMessage<T> {
66 final Promise<T> promise = Promise.apply();
69 private static final class ReadHighestSequenceNr extends AsyncMessage<Long> {
70 private final long fromSequenceNr;
72 ReadHighestSequenceNr(final long fromSequenceNr) {
73 this.fromSequenceNr = fromSequenceNr;
77 public String toString() {
78 return MoreObjects.toStringHelper(this).add("fromSequenceNr", fromSequenceNr).toString();
82 static final class ReplayMessages extends AsyncMessage<Void> {
83 private final long fromSequenceNr;
84 final long toSequenceNr;
86 final Consumer<PersistentRepr> replayCallback;
88 ReplayMessages(final long fromSequenceNr,
89 final long toSequenceNr, final long max, final Consumer<PersistentRepr> replayCallback) {
90 this.fromSequenceNr = fromSequenceNr;
91 this.toSequenceNr = toSequenceNr;
93 this.replayCallback = requireNonNull(replayCallback);
97 public String toString() {
98 return MoreObjects.toStringHelper(this).add("fromSequenceNr", fromSequenceNr)
99 .add("toSequenceNr", toSequenceNr).add("max", max).toString();
103 static final class WriteMessages {
104 private final List<AtomicWrite> requests = new ArrayList<>();
105 private final List<Promise<Optional<Exception>>> results = new ArrayList<>();
107 Future<Optional<Exception>> add(final AtomicWrite write) {
108 final Promise<Optional<Exception>> promise = Promise.apply();
110 results.add(promise);
111 return promise.future();
115 return requests.size();
118 AtomicWrite getRequest(final int index) {
119 return requests.get(index);
122 void setFailure(final int index, final Exception cause) {
123 results.get(index).success(Optional.of(cause));
127 void setSuccess(final int index) {
128 results.get(index).success(Optional.empty());
132 public String toString() {
133 return MoreObjects.toStringHelper(this).add("requests", requests).toString();
137 private static final class DeleteMessagesTo extends AsyncMessage<Void> {
138 final long toSequenceNr;
140 DeleteMessagesTo(final long toSequenceNr) {
141 this.toSequenceNr = toSequenceNr;
145 public String toString() {
146 return MoreObjects.toStringHelper(this).add("toSequenceNr", toSequenceNr).toString();
150 private static final Logger LOG = LoggerFactory.getLogger(SegmentedJournalActor.class);
151 private static final Namespace DELETE_NAMESPACE = Namespace.builder()
152 .register(new LongSerializer(), Long.class)
154 private static final int DELETE_SEGMENT_SIZE = 64 * 1024;
156 private final String persistenceId;
157 private final StorageLevel storage;
158 private final int maxSegmentSize;
159 private final int maxEntrySize;
160 private final File directory;
162 // Tracks the time it took us to write a batch of messages
163 private Timer batchWriteTime;
164 // Tracks the number of individual messages written
165 private Meter messageWriteCount;
166 // Tracks the size distribution of messages
167 private Histogram messageSize;
169 private DataJournal dataJournal;
170 private SegmentedJournal<Long> deleteJournal;
171 private long lastDelete;
173 SegmentedJournalActor(final String persistenceId, final File directory, final StorageLevel storage,
174 final int maxEntrySize, final int maxSegmentSize) {
175 this.persistenceId = requireNonNull(persistenceId);
176 this.directory = requireNonNull(directory);
177 this.storage = requireNonNull(storage);
178 this.maxEntrySize = maxEntrySize;
179 this.maxSegmentSize = maxSegmentSize;
182 static Props props(final String persistenceId, final File directory, final StorageLevel storage,
183 final int maxEntrySize, final int maxSegmentSize) {
184 return Props.create(SegmentedJournalActor.class, requireNonNull(persistenceId), directory, storage,
185 maxEntrySize, maxSegmentSize);
189 public Receive createReceive() {
190 return receiveBuilder()
191 .match(DeleteMessagesTo.class, this::handleDeleteMessagesTo)
192 .match(ReadHighestSequenceNr.class, this::handleReadHighestSequenceNr)
193 .match(ReplayMessages.class, this::handleReplayMessages)
194 .match(WriteMessages.class, this::handleWriteMessages)
195 .matchAny(this::handleUnknown)
200 public void preStart() throws Exception {
201 LOG.debug("{}: actor starting", persistenceId);
204 final MetricRegistry registry = MetricsReporter.getInstance(MeteringBehavior.DOMAIN).getMetricsRegistry();
205 final String actorName = self().path().parent().toStringWithoutAddress() + '/' + directory.getName();
207 batchWriteTime = registry.timer(MetricRegistry.name(actorName, "batchWriteTime"));
208 messageWriteCount = registry.meter(MetricRegistry.name(actorName, "messageWriteCount"));
209 messageSize = registry.histogram(MetricRegistry.name(actorName, "messageSize"));
213 public void postStop() throws Exception {
214 LOG.debug("{}: actor stopping", persistenceId);
215 if (dataJournal != null) {
217 LOG.debug("{}: data journal closed", persistenceId);
220 if (deleteJournal != null) {
221 deleteJournal.close();
222 LOG.debug("{}: delete journal closed", persistenceId);
223 deleteJournal = null;
225 LOG.debug("{}: actor stopped", persistenceId);
229 static AsyncMessage<Void> deleteMessagesTo(final long toSequenceNr) {
230 return new DeleteMessagesTo(toSequenceNr);
233 static AsyncMessage<Long> readHighestSequenceNr(final long fromSequenceNr) {
234 return new ReadHighestSequenceNr(fromSequenceNr);
237 static AsyncMessage<Void> replayMessages(final long fromSequenceNr, final long toSequenceNr, final long max,
238 final Consumer<PersistentRepr> replayCallback) {
239 return new ReplayMessages(fromSequenceNr, toSequenceNr, max, replayCallback);
242 private void handleDeleteMessagesTo(final DeleteMessagesTo message) {
245 LOG.debug("{}: delete messages {}", persistenceId, message);
246 final long to = Long.min(dataJournal.lastWrittenSequenceNr(), message.toSequenceNr);
247 LOG.debug("{}: adjusted delete to {}", persistenceId, to);
249 if (lastDelete < to) {
250 LOG.debug("{}: deleting entries up to {}", persistenceId, to);
253 final SegmentedJournalWriter<Long> deleteWriter = deleteJournal.writer();
254 final Indexed<Long> entry = deleteWriter.append(lastDelete);
255 deleteWriter.commit(entry.index());
256 dataJournal.deleteTo(lastDelete);
258 LOG.debug("{}: compaction started", persistenceId);
259 dataJournal.compactTo(lastDelete);
260 deleteJournal.compact(entry.index());
261 LOG.debug("{}: compaction finished", persistenceId);
263 LOG.debug("{}: entries up to {} already deleted", persistenceId, lastDelete);
266 message.promise.success(null);
269 private void handleReadHighestSequenceNr(final ReadHighestSequenceNr message) {
270 LOG.debug("{}: looking for highest sequence on {}", persistenceId, message);
272 if (directory.isDirectory()) {
274 sequence = dataJournal.lastWrittenSequenceNr();
279 LOG.debug("{}: highest sequence is {}", message, sequence);
280 message.promise.success(sequence);
283 private void handleReplayMessages(final ReplayMessages message) {
284 LOG.debug("{}: replaying messages {}", persistenceId, message);
287 final long from = Long.max(lastDelete + 1, message.fromSequenceNr);
288 LOG.debug("{}: adjusted fromSequenceNr to {}", persistenceId, from);
290 dataJournal.handleReplayMessages(message, from);
293 private void handleWriteMessages(final WriteMessages message) {
296 final Stopwatch sw = Stopwatch.createStarted();
297 final long start = dataJournal.lastWrittenSequenceNr();
298 final long bytes = dataJournal.handleWriteMessages(message);
301 batchWriteTime.update(sw.elapsed(TimeUnit.NANOSECONDS), TimeUnit.NANOSECONDS);
302 messageWriteCount.mark(dataJournal.lastWrittenSequenceNr() - start);
304 // log message after statistics are updated
305 LOG.debug("{}: write of {} bytes completed in {}", persistenceId, bytes, sw);
308 private void handleUnknown(final Object message) {
309 LOG.error("{}: Received unknown message {}", persistenceId, message);
312 private void ensureOpen() {
313 if (dataJournal != null) {
314 verifyNotNull(deleteJournal);
318 final Stopwatch sw = Stopwatch.createStarted();
319 deleteJournal = SegmentedJournal.<Long>builder().withDirectory(directory).withName("delete")
320 .withNamespace(DELETE_NAMESPACE).withMaxSegmentSize(DELETE_SEGMENT_SIZE).build();
321 final Indexed<Long> lastEntry = deleteJournal.writer().getLastEntry();
322 lastDelete = lastEntry == null ? 0 : lastEntry.entry();
324 dataJournal = new DataJournalV0(persistenceId, messageSize, context().system(), storage, directory,
325 maxEntrySize, maxSegmentSize);
326 dataJournal.deleteTo(lastDelete);
327 LOG.debug("{}: journal open in {} with last index {}, deleted to {}", persistenceId, sw,
328 dataJournal.lastWrittenSequenceNr(), lastDelete);