From 28313ad901a88b4a5e5e9f54da0368c7171ca080 Mon Sep 17 00:00:00 2001 From: Tom Pantelis Date: Mon, 4 Apr 2016 10:58:08 -0400 Subject: [PATCH] Default shard-journal-recovery-log-batch-size to 1 In Helium there was an issue with batching journal log entries in a single transaction on recovery which could cause validation exceptions and/or missing data. Setting the batch size to 1 alleviated the issue and thus it was defaulted to 1. It was thought this issue wasn't present in Lithium but it is as I have a Helium journal which exhibits the problem. I have tried this journal with the current code base and didn't see an issue (it looked like all data was recovered from what I could tell) but I'm not confident an issue isn't still lurking with the right combination of modifications across many journal transactions. It is safest to recover the transactions in the same manner as they were originally committed, ie one by one. Therefore I have defaulted the batch size to 1. In my testing, the prior setting of 1000 doesn't add any value anyway as the recovery time is virtually the same with batch size 1000 and 1. Setting it to 1 eliminates the potential risk of data loss. Change-Id: Icd7fd3c60bdd6cf1b677ccae38be810e779d2bd3 Signed-off-by: Tom Pantelis --- .../src/main/resources/initial/datastore.cfg | 3 --- .../controller/cluster/datastore/DatastoreContext.java | 2 +- .../src/main/yang/distributed-datastore-provider.yang | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg b/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg index cafe375f8b..42c46bf901 100644 --- a/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg +++ b/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg @@ -35,9 +35,6 @@ operational.persistent=false # failing an operation (eg transaction create and change listener registration). #shard-initialization-timeout-in-seconds=300 -# The maximum number of journal log entries to batch on recovery for a shard before committing to the data store. -#shard-journal-recovery-log-batch-size=1000 - # The minimum number of entries to be present in the in-memory journal log before a snapshot is to be taken. #shard-snapshot-batch-count=20000 diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java index 9f2103f5e1..6af9622199 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java @@ -36,7 +36,7 @@ public class DatastoreContext { public static final Duration DEFAULT_SHARD_TRANSACTION_IDLE_TIMEOUT = Duration.create(10, TimeUnit.MINUTES); public static final int DEFAULT_OPERATION_TIMEOUT_IN_MS = 5000; public static final int DEFAULT_SHARD_TX_COMMIT_TIMEOUT_IN_SECONDS = 30; - public static final int DEFAULT_JOURNAL_RECOVERY_BATCH_SIZE = 1000; + public static final int DEFAULT_JOURNAL_RECOVERY_BATCH_SIZE = 1; public static final int DEFAULT_SNAPSHOT_BATCH_COUNT = 20000; public static final int DEFAULT_HEARTBEAT_INTERVAL_IN_MILLIS = 500; public static final int DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS = DEFAULT_HEARTBEAT_INTERVAL_IN_MILLIS * 10; diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang b/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang index b6980a659f..8ae1ec9d00 100644 --- a/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang +++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang @@ -125,7 +125,7 @@ module distributed-datastore-provider { } leaf shard-journal-recovery-log-batch-size { - default 1000; + default 1; type non-zero-uint32-type; description "The maximum number of journal log entries to batch on recovery for a shard before committing to the data store."; } -- 2.36.6