From 28313ad901a88b4a5e5e9f54da0368c7171ca080 Mon Sep 17 00:00:00 2001
From: Tom Pantelis <tpanteli@brocade.com>
Date: Mon, 4 Apr 2016 10:58:08 -0400
Subject: [PATCH] Default shard-journal-recovery-log-batch-size to 1

In Helium there was an issue with batching journal log entries in a
single transaction on recovery which could cause validation exceptions
and/or missing data. Setting the batch size to 1 alleviated the issue and
thus it was defaulted to 1.

It was thought this issue wasn't present in Lithium but it is as I have
a Helium journal which exhibits the problem. I have tried this journal
with the current code base and didn't see an issue (it looked like all
data was recovered from what I could tell) but I'm not confident an issue
isn't still lurking with the right combination of modifications across
many journal transactions. It is safest to recover the transactions in the
same manner as they were originally committed, ie one by one.

Therefore I have defaulted the batch size to 1. In my testing, the prior
setting of 1000 doesn't add any value anyway as the recovery time is
virtually the same with batch size 1000 and 1. Setting it to 1
eliminates the potential risk of data loss.

Change-Id: Icd7fd3c60bdd6cf1b677ccae38be810e779d2bd3
Signed-off-by: Tom Pantelis <tpanteli@brocade.com>
---
 .../src/main/resources/initial/datastore.cfg                   | 3 ---
 .../controller/cluster/datastore/DatastoreContext.java         | 2 +-
 .../src/main/yang/distributed-datastore-provider.yang          | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg b/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg
index cafe375f8b..42c46bf901 100644
--- a/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg
+++ b/opendaylight/md-sal/sal-clustering-config/src/main/resources/initial/datastore.cfg
@@ -35,9 +35,6 @@ operational.persistent=false
 # failing an operation (eg transaction create and change listener registration).
 #shard-initialization-timeout-in-seconds=300
 
-# The maximum number of journal log entries to batch on recovery for a shard before committing to the data store.
-#shard-journal-recovery-log-batch-size=1000
-
 # The minimum number of entries to be present in the in-memory journal log before a snapshot is to be taken.
 #shard-snapshot-batch-count=20000
 
diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java
index 9f2103f5e1..6af9622199 100644
--- a/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java
+++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/java/org/opendaylight/controller/cluster/datastore/DatastoreContext.java
@@ -36,7 +36,7 @@ public class DatastoreContext {
     public static final Duration DEFAULT_SHARD_TRANSACTION_IDLE_TIMEOUT = Duration.create(10, TimeUnit.MINUTES);
     public static final int DEFAULT_OPERATION_TIMEOUT_IN_MS = 5000;
     public static final int DEFAULT_SHARD_TX_COMMIT_TIMEOUT_IN_SECONDS = 30;
-    public static final int DEFAULT_JOURNAL_RECOVERY_BATCH_SIZE = 1000;
+    public static final int DEFAULT_JOURNAL_RECOVERY_BATCH_SIZE = 1;
     public static final int DEFAULT_SNAPSHOT_BATCH_COUNT = 20000;
     public static final int DEFAULT_HEARTBEAT_INTERVAL_IN_MILLIS = 500;
     public static final int DEFAULT_ISOLATED_LEADER_CHECK_INTERVAL_IN_MILLIS = DEFAULT_HEARTBEAT_INTERVAL_IN_MILLIS * 10;
diff --git a/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang b/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang
index b6980a659f..8ae1ec9d00 100644
--- a/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang
+++ b/opendaylight/md-sal/sal-distributed-datastore/src/main/yang/distributed-datastore-provider.yang
@@ -125,7 +125,7 @@ module distributed-datastore-provider {
          }
 
          leaf shard-journal-recovery-log-batch-size {
-            default 1000;
+            default 1;
             type non-zero-uint32-type;
             description "The maximum number of journal log entries to batch on recovery for a shard before committing to the data store.";
          }
-- 
2.36.6