X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=controller.git;a=blobdiff_plain;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FShardRecoveryCoordinator.java;h=5b7f289eed4ea2fa37296a07b5e47e54af2201ac;hp=94fb5841021ea994e019d946e3e106f7d95256c3;hb=4bbbd57c8f96e864d4353c1bdbce0dc068a6a57b;hpb=c2f8c5b00708ab1d9e003ede0c960133989d5e0c diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardRecoveryCoordinator.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardRecoveryCoordinator.java index 94fb584102..5b7f289eed 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardRecoveryCoordinator.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardRecoveryCoordinator.java @@ -7,27 +7,15 @@ */ package org.opendaylight.controller.cluster.datastore; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -import org.opendaylight.controller.cluster.datastore.modification.MutableCompositeModification; -import org.opendaylight.controller.cluster.datastore.node.NormalizedNodeToNodeCodec; -import org.opendaylight.controller.protobuff.messages.common.NormalizedNodeMessages; -import org.opendaylight.controller.sal.core.spi.data.DOMStoreWriteTransaction; -import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier; +import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; +import java.io.File; +import org.opendaylight.controller.cluster.datastore.persisted.ShardDataTreeSnapshot; +import org.opendaylight.controller.cluster.datastore.utils.NormalizedNodeXMLOutput; +import org.opendaylight.controller.cluster.raft.RaftActorRecoveryCohort; +import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload; import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode; -import org.opendaylight.yangtools.yang.model.api.SchemaContext; import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.protobuf.ByteString; -import com.google.protobuf.InvalidProtocolBufferException; /** * Coordinates persistence recovery of journal log entries and snapshots for a shard. Each snapshot @@ -36,122 +24,88 @@ import com.google.protobuf.InvalidProtocolBufferException; * committed to the data store in the order the corresponding snapshot or log batch are received * to preserve data store integrity. * - * @author Thomas Panetelis + * @author Thomas Pantelis */ -class ShardRecoveryCoordinator { - - private static final int TIME_OUT = 10; - - private static final Logger LOG = LoggerFactory.getLogger(ShardRecoveryCoordinator.class); - - private final List resultingTxList = Lists.newArrayList(); - private final SchemaContext schemaContext; +class ShardRecoveryCoordinator implements RaftActorRecoveryCohort { + private final ShardDataTree store; private final String shardName; - private final ExecutorService executor; + private final Logger log; + private final byte[] restoreFromSnapshot; - ShardRecoveryCoordinator(String shardName, SchemaContext schemaContext) { - this.schemaContext = schemaContext; - this.shardName = shardName; + private boolean open; - executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(), - new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("ShardRecovery-" + shardName + "-%d").build()); - } + ShardRecoveryCoordinator(final ShardDataTree store, final byte[] restoreFromSnapshot, final String shardName, final Logger log) { + this.store = Preconditions.checkNotNull(store); + this.shardName = Preconditions.checkNotNull(shardName); + this.log = Preconditions.checkNotNull(log); - /** - * Submits a batch of journal log entries. - * - * @param logEntries the serialized journal log entries - * @param resultingTx the write Tx to which to apply the entries - */ - void submit(List logEntries, DOMStoreWriteTransaction resultingTx) { - LogRecoveryTask task = new LogRecoveryTask(logEntries, resultingTx); - resultingTxList.add(resultingTx); - executor.execute(task); + this.restoreFromSnapshot = restoreFromSnapshot; } - /** - * Submits a snapshot. - * - * @param snapshot the serialized snapshot - * @param resultingTx the write Tx to which to apply the entries - */ - void submit(ByteString snapshot, DOMStoreWriteTransaction resultingTx) { - SnapshotRecoveryTask task = new SnapshotRecoveryTask(snapshot, resultingTx); - resultingTxList.add(resultingTx); - executor.execute(task); + @Override + public void startLogRecoveryBatch(final int maxBatchSize) { + log.debug("{}: starting log recovery batch with max size {}", shardName, maxBatchSize); + open = true; } - Collection getTransactions() { - // Shutdown the executor and wait for task completion. - executor.shutdown(); + @Override + public void appendRecoveredLogEntry(final Payload payload) { + Preconditions.checkState(open, "call startLogRecovery before calling appendRecoveredLogEntry"); try { - if(executor.awaitTermination(TIME_OUT, TimeUnit.MINUTES)) { - return resultingTxList; - } else { - LOG.error("Recovery for shard {} timed out after {} minutes", shardName, TIME_OUT); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); + store.applyRecoveryPayload(payload); + } catch (Exception e) { + log.error("{}: failed to apply payload {}", shardName, payload, e); + throw new IllegalStateException(String.format("%s: Failed to apply recovery payload %s", + shardName, payload), e); } - - return Collections.emptyList(); } - private static abstract class ShardRecoveryTask implements Runnable { - - final DOMStoreWriteTransaction resultingTx; - - ShardRecoveryTask(DOMStoreWriteTransaction resultingTx) { - this.resultingTx = resultingTx; - } + /** + * Applies the current batched log entries to the data store. + */ + @Override + public void applyCurrentLogRecoveryBatch() { + Preconditions.checkState(open, "call startLogRecovery before calling applyCurrentLogRecoveryBatch"); + open = false; } - private class LogRecoveryTask extends ShardRecoveryTask { + private File writeRoot(final String kind, final NormalizedNode node) { + final File file = new File(System.getProperty("karaf.data", "."), + "failed-recovery-" + kind + "-" + shardName + ".xml"); + NormalizedNodeXMLOutput.toFile(file, node); + return file; + } - private final List logEntries; + /** + * Applies a recovered snapshot to the data store. + * + * @param snapshotBytes the serialized snapshot + */ + @Override + public void applyRecoverySnapshot(final byte[] snapshotBytes) { + log.debug("{}: Applying recovered snapshot", shardName); - LogRecoveryTask(List logEntries, DOMStoreWriteTransaction resultingTx) { - super(resultingTx); - this.logEntries = logEntries; + final ShardDataTreeSnapshot snapshot; + try { + snapshot = ShardDataTreeSnapshot.deserialize(snapshotBytes); + } catch (Exception e) { + log.error("{}: failed to deserialize snapshot", shardName, e); + throw Throwables.propagate(e); } - @Override - public void run() { - for(int i = 0; i < logEntries.size(); i++) { - MutableCompositeModification.fromSerializable( - logEntries.get(i), schemaContext).apply(resultingTx); - // Null out to GC quicker. - logEntries.set(i, null); - } + try { + store.applyRecoverySnapshot(snapshot); + } catch (Exception e) { + final File f = writeRoot("snapshot", snapshot.getRootNode().orElse(null)); + throw new IllegalStateException(String.format( + "%s: Failed to apply recovery snapshot %s. Node data was written to file %s", + shardName, snapshot, f), e); } } - private class SnapshotRecoveryTask extends ShardRecoveryTask { - - private final ByteString snapshot; - - SnapshotRecoveryTask(ByteString snapshot, DOMStoreWriteTransaction resultingTx) { - super(resultingTx); - this.snapshot = snapshot; - } - - @Override - public void run() { - try { - NormalizedNodeMessages.Node serializedNode = NormalizedNodeMessages.Node.parseFrom(snapshot); - NormalizedNode node = new NormalizedNodeToNodeCodec(schemaContext).decode( - serializedNode); - - // delete everything first - resultingTx.delete(YangInstanceIdentifier.builder().build()); - - // Add everything from the remote node back - resultingTx.write(YangInstanceIdentifier.builder().build(), node); - } catch (InvalidProtocolBufferException e) { - LOG.error("Error deserializing snapshot", e); - } - } + @Override + public byte[] getRestoreFromSnapshot() { + return restoreFromSnapshot; } }