X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=blobdiff_plain;ds=sidebyside;f=opendaylight%2Fmd-sal%2Fsal-distributed-datastore%2Fsrc%2Fmain%2Fjava%2Forg%2Fopendaylight%2Fcontroller%2Fcluster%2Fdatastore%2FShardRecoveryCoordinator.java;h=70a701075ec6b5867e7a765629fcc65d6b72d8cb;hb=6276a65120a674b545ea787a5e1d9311bcdbf2af;hp=238b4e46dce041add47117503fcb68feb54e8e27;hpb=014ab8770cfe40cf0ce1f2c88e0b0666429c760c;p=controller.git diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardRecoveryCoordinator.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardRecoveryCoordinator.java index 238b4e46dc..70a701075e 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardRecoveryCoordinator.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/ShardRecoveryCoordinator.java @@ -7,22 +7,15 @@ */ package org.opendaylight.controller.cluster.datastore; -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import org.opendaylight.controller.cluster.datastore.modification.MutableCompositeModification; -import org.opendaylight.controller.cluster.datastore.utils.SerializationUtils; -import org.opendaylight.controller.sal.core.spi.data.DOMStoreWriteTransaction; -import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier; +import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; +import java.io.File; +import org.opendaylight.controller.cluster.datastore.persisted.ShardDataTreeSnapshot; +import org.opendaylight.controller.cluster.datastore.utils.NormalizedNodeXMLOutput; +import org.opendaylight.controller.cluster.raft.RaftActorRecoveryCohort; +import org.opendaylight.controller.cluster.raft.protobuff.client.messages.Payload; import org.opendaylight.yangtools.yang.data.api.schema.NormalizedNode; -import org.opendaylight.yangtools.yang.model.api.SchemaContext; import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Coordinates persistence recovery of journal log entries and snapshots for a shard. Each snapshot @@ -31,116 +24,89 @@ import org.slf4j.LoggerFactory; * committed to the data store in the order the corresponding snapshot or log batch are received * to preserve data store integrity. * - * @author Thomas Panetelis + * @author Thomas Pantelis */ -class ShardRecoveryCoordinator { +class ShardRecoveryCoordinator implements RaftActorRecoveryCohort { + private final ShardDataTree store; + private final String shardName; + private final Logger log; + private final byte[] restoreFromSnapshot; - private static final int TIME_OUT = 10; + private boolean open; - private static final Logger LOG = LoggerFactory.getLogger(ShardRecoveryCoordinator.class); + ShardRecoveryCoordinator(final ShardDataTree store, final byte[] restoreFromSnapshot, final String shardName, final Logger log) { + this.store = Preconditions.checkNotNull(store); + this.shardName = Preconditions.checkNotNull(shardName); + this.log = Preconditions.checkNotNull(log); - private final List resultingTxList = Lists.newArrayList(); - private final SchemaContext schemaContext; - private final String shardName; - private final ExecutorService executor; + this.restoreFromSnapshot = restoreFromSnapshot; + } + + @Override + public void startLogRecoveryBatch(final int maxBatchSize) { + log.debug("{}: starting log recovery batch with max size {}", shardName, maxBatchSize); + open = true; + } - ShardRecoveryCoordinator(String shardName, SchemaContext schemaContext) { - this.schemaContext = schemaContext; - this.shardName = shardName; + @Override + public void appendRecoveredLogEntry(final Payload payload) { + Preconditions.checkState(open, "call startLogRecovery before calling appendRecoveredLogEntry"); - executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(), - new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("ShardRecovery-" + shardName + "-%d").build()); + try { + store.applyRecoveryPayload(payload); + } catch (Exception e) { + log.error("{}: failed to apply payload {}", shardName, payload, e); + throw new IllegalStateException(String.format("%s: Failed to apply recovery payload %s", + shardName, payload), e); + } } /** - * Submits a batch of journal log entries. - * - * @param logEntries the serialized journal log entries - * @param resultingTx the write Tx to which to apply the entries + * Applies the current batched log entries to the data store. */ - void submit(List logEntries, DOMStoreWriteTransaction resultingTx) { - LogRecoveryTask task = new LogRecoveryTask(logEntries, resultingTx); - resultingTxList.add(resultingTx); - executor.execute(task); + @Override + public void applyCurrentLogRecoveryBatch() { + Preconditions.checkState(open, "call startLogRecovery before calling applyCurrentLogRecoveryBatch"); + open = false; + } + + private File writeRoot(final String kind, final NormalizedNode node) { + final File file = new File(System.getProperty("karaf.data", "."), + "failed-" + kind + "-snapshot-" + shardName + ".xml"); + NormalizedNodeXMLOutput.toFile(file, node); + return file; } /** - * Submits a snapshot. + * Applies a recovered snapshot to the data store. * * @param snapshotBytes the serialized snapshot - * @param resultingTx the write Tx to which to apply the entries */ - void submit(byte[] snapshotBytes, DOMStoreWriteTransaction resultingTx) { - SnapshotRecoveryTask task = new SnapshotRecoveryTask(snapshotBytes, resultingTx); - resultingTxList.add(resultingTx); - executor.execute(task); - } - - Collection getTransactions() { - // Shutdown the executor and wait for task completion. - executor.shutdown(); + @Override + public void applyRecoverySnapshot(final byte[] snapshotBytes) { + log.debug("{}: Applying recovered snapshot", shardName); + final ShardDataTreeSnapshot snapshot; try { - if(executor.awaitTermination(TIME_OUT, TimeUnit.MINUTES)) { - return resultingTxList; - } else { - LOG.error("Recovery for shard {} timed out after {} minutes", shardName, TIME_OUT); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - - return Collections.emptyList(); - } - - private static abstract class ShardRecoveryTask implements Runnable { - - final DOMStoreWriteTransaction resultingTx; - - ShardRecoveryTask(DOMStoreWriteTransaction resultingTx) { - this.resultingTx = resultingTx; + snapshot = ShardDataTreeSnapshot.deserialize(snapshotBytes); + } catch (Exception e) { + log.error("{}: failed to deserialize snapshot", shardName, e); + throw Throwables.propagate(e); } - } - private class LogRecoveryTask extends ShardRecoveryTask { - - private final List logEntries; - - LogRecoveryTask(List logEntries, DOMStoreWriteTransaction resultingTx) { - super(resultingTx); - this.logEntries = logEntries; - } + try { + store.applyRecoverySnapshot(snapshot); + } catch (Exception e) { + log.error("{}: failed to apply snapshot {}", shardName, snapshot, e); - @Override - public void run() { - for(int i = 0; i < logEntries.size(); i++) { - MutableCompositeModification.fromSerializable( - logEntries.get(i)).apply(resultingTx); - // Null out to GC quicker. - logEntries.set(i, null); - } + final File f = writeRoot("recovery", snapshot.getRootNode().orElse(null)); + throw new IllegalStateException(String.format( + "%s: Failed to apply recovery snapshot. Node data was written to file %s", shardName, f), e); } } - private class SnapshotRecoveryTask extends ShardRecoveryTask { - - private final byte[] snapshotBytes; - - SnapshotRecoveryTask(byte[] snapshotBytes, DOMStoreWriteTransaction resultingTx) { - super(resultingTx); - this.snapshotBytes = snapshotBytes; - } - - @Override - public void run() { - NormalizedNode node = SerializationUtils.deserializeNormalizedNode(snapshotBytes); - - // delete everything first - resultingTx.delete(YangInstanceIdentifier.builder().build()); - - // Add everything from the remote node back - resultingTx.write(YangInstanceIdentifier.builder().build(), node); - } + @Override + public byte[] getRestoreFromSnapshot() { + return restoreFromSnapshot; } }