Helm charts for supporting clustering in ODL 61/99261/1 master
authorrahuliitr <Rahul.Sharma@fujitsu.com>
Thu, 6 Jan 2022 18:31:24 +0000 (13:31 -0500)
committerrahuliitr <Rahul.Sharma@fujitsu.com>
Thu, 6 Jan 2022 18:33:35 +0000 (13:33 -0500)
Signed-off-by: Rahul Sharma <Rahul.Sharma@fujitsu.com>
Change-Id: Ie128bef4349ece5109e21cd5869b6dd03a245f37

helm/README.md
helm/opendaylight/resources/bin/startodl.sh [new file with mode: 0644]
helm/opendaylight/resources/conf/akka.conf [new file with mode: 0644]
helm/opendaylight/templates/configmap.yaml
helm/opendaylight/templates/service.yaml
helm/opendaylight/templates/statefulset.yaml
helm/opendaylight/values.yaml

index 0a28a87e6dc1f6b7d42ca52584d052ca42dc92fb..ceba60e3b5ab3c82badc30f32a68309cc765e6be 100644 (file)
@@ -192,4 +192,35 @@ base on the notes output from heml install, set up port forwarding to first inst
   kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
 ```
 
-from browser, go to `http://127.0.0.1:8080/apidoc/explorer/index.html` then login with `admin/admin` 
\ No newline at end of file
+from browser, go to `http://127.0.0.1:8080/apidoc/explorer/index.html` then login with `admin/admin`
+
+## Clustering
+
+Opendaylight can be started as an Akka based cluster.
+To start Opendaylight as a cluster, following parametes in values.yaml need to be updated:
+```
+1. replicaCount : The value should be at least 3 or higher number (need to be an odd number).
+2. config.isClusterDeployment: The value should be set to true
+3. autoscaling.enabled: This should be disabled (i.e.; enabled: false)
+```
+Once set, you can run helm install as above:
+
+```
+helm install sdnc opendaylight
+
+Output:
+/home/rahul/packaging/helm$ kubectl  get pods
+NAME                  READY   STATUS    RESTARTS   AGE
+sdnc-opendaylight-1   1/1     Running     0                25m
+sdnc-opendaylight-2   1/1     Running     0                25m
+sdnc-opendaylight-0   1/1     Running     0                25m
+```
+
+NOTE: If the variable `config.isClusterDeployment` is set to `false` with replicaCount set to 3, you will have 3 independent ODL instances deployed (but not clustered). 
+
+### Cluster specific APIs
+Once the cluster is up, you can use jolokia APIs to query the status of cluster:
+- `http://{{vm-ip}}:{{odl-restconf-port}}/jolokia/read/org.opendaylight.controller:Category=Shards,name=member-<leader-id>-shard-default-operational,type=DistributedOperationalDatastore`
+-- NOTE: To know the leader-id in the above query, use: `http://{{vm-ip}}:{{odl-restconf-port}}/jolokia/read/org.opendaylight.controller:type=DistributedOperationalDatastore,Category=ShardManager,name=shard-manager-operational`
+
+For more details, refer here: https://docs.opendaylight.org/en/stable-phosphorus/getting-started-guide/clustering.html#cluster-monitoring
diff --git a/helm/opendaylight/resources/bin/startodl.sh b/helm/opendaylight/resources/bin/startodl.sh
new file mode 100644 (file)
index 0000000..8629aa6
--- /dev/null
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: EPL-1.0
+##############################################################################
+# Copyright (c) 2021 The Linux Foundation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+##############################################################################
+#!/bin/bash -e
+
+function setup_cluster(){
+  if [ -z $ODL_REPLICAS ]; then
+     echo "ODL_REPLICAS is not configured in Env field"
+     exit
+  fi
+
+  hm=$(hostname)
+  echo "Enable cluster for host: ${hm}"
+
+  ## For hostname viz; odl-opendaylight-1,
+  ## node_name will be 'odl-opendaylight' and node_index '1'
+  node_name=${hm%-*};
+  node_index=${hm##*-};
+  node_list="${node_name}-0.{{ include "opendaylight.fullname" . }}.{{ .Release.Namespace }}";
+
+  for ((i=1;i<${ODL_REPLICAS};i++));
+  do
+    node_list="${node_list} ${node_name}-$i.{{ include "opendaylight.fullname" . }}.{{ .Release.Namespace }}"
+  done
+
+  ${BASEDIR}/bin/configure_cluster.sh $((node_index+1)) ${node_list}
+}
+
+set -x
+
+mountpath="{{ .Values.persistence.mountPath }}"
+BASEDIR="{{ .Values.config.odl_basedir }}"
+odl_prefix="/opt/opendaylight"
+
+if [[ ! -d "$mountpath/snapshots" ]];then
+  mkdir -p $mountpath/snapshots
+fi
+
+if [[ ! -d "$mountpath/data" ]];then
+  mkdir -p $mountpath/data
+fi
+
+if [[ ! -d "$mountpath/segmented-journal" ]];then
+  mkdir -p $mountpath/segmented-journal
+fi
+
+if [[ ! -d "$mountpath/daexim" ]];then
+  mkdir -p $mountpath/daexim
+fi
+
+if [[ ! -L "$odl_prefix/snapshots" ]];then
+  rm -rf $odl_prefix/snapshots && ln -s $mountpath/snapshots $odl_prefix/snapshots
+fi
+
+if [[ ! -L "$odl_prefix/data" ]];then
+  rm -rf $odl_prefix/data && ln -s $mountpath/data $odl_prefix/data
+fi
+
+if [[ ! -L "$odl_prefix/segmented-journal" ]];then
+  rm -rf $odl_prefix/segmented-journal && ln -s $mountpath/segmented-journal $odl_prefix/segmented-journal
+fi
+
+if [[ ! -L "$odl_prefix/daexim" ]];then
+  rm -rf $odl_prefix/daexim && ln -s $mountpath/daexim $odl_prefix/daexim
+fi
+
+sed -i "s/\(featuresBoot= \|featuresBoot = \)/featuresBoot = ${FEATURES},/g" ${BASEDIR}/etc/org.apache.karaf.features.cfg
+cat ${BASEDIR}/etc/org.apache.karaf.features.cfg
+
+ODL_REPLICAS=${ODL_REPLICAS:-1}
+IS_CLUSTER_ENABLED=${IS_CLUSTER_ENABLED:-false}
+SLEEP_TIME=${SLEEP_TIME:-30}
+
+if $IS_CLUSTER_ENABLED; then
+  ${BASEDIR}/bin/start;
+  echo "Waiting ${SLEEP_TIME} seconds for OpenDaylight to initialize";
+  sleep ${SLEEP_TIME};
+  setup_cluster;
+  echo "Restart ODL after cluster configuration";
+  ${BASEDIR}/bin/stop;
+  sleep 20;
+fi
+
+echo "Starting OpenDaylight"
+${BASEDIR}/bin/karaf run
diff --git a/helm/opendaylight/resources/conf/akka.conf b/helm/opendaylight/resources/conf/akka.conf
new file mode 100644 (file)
index 0000000..5cfe9ce
--- /dev/null
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: EPL-1.0
+##############################################################################
+# Copyright (c) 2021 The Linux Foundation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+##############################################################################
+odl-cluster-data {
+  akka {
+    remote {
+      artery {
+        enabled = on
+        canonical.hostname = "127.0.0.1"
+        canonical.port = 2550
+      }
+
+      use-passive-connections = off
+      # when under load we might trip a false positive on the failure detector
+      # transport-failure-detector {
+      # heartbeat-interval = 4 s
+      # acceptable-heartbeat-pause = 16s
+      # }
+    }
+
+    actor {
+      debug {
+        autoreceive = on
+        lifecycle = on
+        unhandled = on
+        fsm = on
+        event-stream = on
+      }
+    }
+
+    cluster {
+      # Using artery.
+      seed-nodes = ["akka://opendaylight-cluster-data@127.0.0.1:2550"]
+
+      seed-node-timeout = {{ .Values.cluster.akka.seedNodeTimeout }}
+
+      downing-provider-class = "akka.cluster.sbr.SplitBrainResolverProvider"
+
+      roles = ["member-1"]
+
+    }
+
+    persistence {
+      # By default the snapshots/journal directories live in KARAF_HOME. You can choose to put it somewhere else by
+      # modifying the following two properties. The directory location specified may be a relative or absolute path.
+      # The relative path is always relative to KARAF_HOME.
+
+      # snapshot-store.local.dir = "target/snapshots"
+      # journal.leveldb.dir = "target/journal"
+
+      journal {
+        leveldb {
+            # Set native = off to use a Java-only implementation of leveldb.
+            # Note that the Java-only version is not currently considered by Akka to be production quality.
+
+            # native = off
+        }
+
+        journal-plugin-fallback {
+          circuit-breaker {
+              max-failures = {{ .Values.cluster.akka.circuitBreaker.maxFailures }}
+              call-timeout = {{ .Values.cluster.akka.circuitBreaker.callTimeout }}
+              reset-timeout = {{ .Values.cluster.akka.circuitBreaker.resetTimeout }}
+          }
+          recovery-event-timeout = {{ .Values.cluster.akka.recoveryEventTimeout }}
+        }
+
+        snapshot-store-plugin-fallback {
+          circuit-breaker {
+            max-failures = {{ .Values.cluster.akka.circuitBreaker.maxFailures }}
+            call-timeout = {{ .Values.cluster.akka.circuitBreaker.callTimeout }}
+            reset-timeout = {{ .Values.cluster.akka.circuitBreaker.resetTimeout }}
+          }
+          recovery-event-timeout = {{ .Values.cluster.akka.recoveryEventTimeout }}
+        }
+      }
+      
+      # Use lz4 compression for LocalSnapshotStore snapshots
+      snapshot-store.local.use-lz4-compression = false
+      # Size of blocks for lz4 compression: 64KB, 256KB, 1MB or 4MB
+      snapshot-store.local.lz4-blocksize = 256KB
+    }
+    disable-default-actor-system-quarantined-event-handling = "false"
+  }
+}
+
index 3d26566f542a826791186426a0d1c83f7959803e..5925460760aa36b9273b0fb1752f597234a06160 100644 (file)
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: {{ include "opendaylight.fullname" . }}
+  name: {{ include "opendaylight.fullname" . }}-scripts
 data:
-  startodl.sh: |
-    #!/bin/bash
-    mountpath="{{ .Values.persistence.mountPath }}"
-    BASEDIR="{{ .Values.config.odl_basedir }}"
-    odl_prefix="/opt/opendaylight"
-
-    if [[ ! -d "$mountpath/snapshots" ]];then
-      mkdir -p $mountpath/snapshots
-    fi
-
-    if [[ ! -d "$mountpath/data" ]];then
-      mkdir -p $mountpath/data
-    fi
-
-    if [[ ! -d "$mountpath/segmented-journal" ]];then
-      mkdir -p $mountpath/segmented-journal
-    fi
-
-    if [[ ! -d "$mountpath/daexim" ]];then
-      mkdir -p $mountpath/daexim
-    fi
-
-    if [[ ! -L "$odl_prefix/snapshots" ]];then
-      rm -rf $odl_prefix/snapshots && ln -s $mountpath/snapshots $odl_prefix/snapshots
-    fi
-
-    if [[ ! -L "$odl_prefix/data" ]];then
-      rm -rf $odl_prefix/data && ln -s $mountpath/data $odl_prefix/data
-    fi
-
-    if [[ ! -L "$odl_prefix/segmented-journal" ]];then
-      rm -rf $odl_prefix/segmented-journal && ln -s $mountpath/segmented-journal $odl_prefix/segmented-journal
-    fi
-
-    if [[ ! -L "$odl_prefix/daexim" ]];then
-      rm -rf $odl_prefix/daexim && ln -s $mountpath/daexim $odl_prefix/daexim
-    fi
-
-    sed -i "s/\(featuresBoot= \|featuresBoot = \)/featuresBoot = ${FEATURES},/g" ${BASEDIR}/etc/org.apache.karaf.features.cfg
-    cat ${BASEDIR}/etc/org.apache.karaf.features.cfg
-    ${BASEDIR}/bin/karaf run
+{{ tpl (.Files.Glob "resources/bin/*").AsConfig . | indent 2 }}
+---
+{{- if .Values.config.isClusterDeployment }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "opendaylight.fullname" . }}-conf
+data:
+{{ tpl (.Files.Glob "resources/conf/*").AsConfig . | indent 2 }}
+{{- end }}
\ No newline at end of file
index 11486e813241ecc47779aa32c68137dc24f39133..54b227e129bac12136435a841cc677144b350f7b 100644 (file)
@@ -16,6 +16,8 @@ metadata:
   name: {{ include "opendaylight.fullname" . }}
   labels:
     {{- include "opendaylight.labels" . | nindent 4 }}
+  annotations:
+    service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
 spec:
   type: {{ .Values.service.type }}
   ports:
@@ -23,6 +25,21 @@ spec:
       targetPort: http
       protocol: TCP
       name: http
+      {{- if eq .Values.service.type "NodePort" }}
+      nodePort: {{ .Values.service.nodePortHttp }}
+      {{- end }}
+    {{- if .Values.config.isClusterDeployment }}
+    - port: {{ .Values.service.clusterPort }}
+      targetPort: cluster
+      protocol: TCP
+      name: cluster
+      {{- if eq .Values.service.type "NodePort" }}
+      nodePort: {{ .Values.service.nodePortCluster }}
+      {{- end }}
+    {{- end }}
+  {{- if eq .Values.service.type "ClusterIP" }}
   clusterIP: None
+  {{- end }}
   selector:
     {{- include "opendaylight.selectorLabels" . | nindent 4 }}
+  publishNotReadyAddresses: true
index 1dc3fd7f08c8e656ec1aa9d337d10b74f9f30516..e1a2e09cccd88d63a37a3404a0a59f6f8e2c572c 100644 (file)
@@ -18,6 +18,7 @@ spec:
   {{- if not .Values.autoscaling.enabled }}
   replicas: {{ .Values.replicaCount }}
   {{- end }}
+  podManagementPolicy: Parallel
   serviceName: {{ include "opendaylight.fullname" . }}
   selector:
     matchLabels:
@@ -45,6 +46,18 @@ spec:
           volumeMounts:
           - name: {{ .Values.persistence.volName }}
             mountPath: {{ .Values.persistence.mountPath }}
+        {{- if .Values.config.isClusterDeployment }}
+        - name: inject-cluster-related-conf
+          image: busybox
+          command: ["/bin/sh"]
+          args: ["-c", "cp /config-input/akka.conf /config/akka.conf"]
+          volumeMounts:
+            - mountPath: /config-input/akka.conf
+              name: config-input
+              subPath: akka.conf
+            - mountPath: /config
+              name: update-conf
+        {{- end }}
       containers:
         - name: {{ .Chart.Name }}
           securityContext:
@@ -61,10 +74,19 @@ spec:
             value: "-Xms{{.Values.config.javaOptions.minMemory}} -Xmx{{.Values.config.javaOptions.maxMemory}}"
           - name: EXTRA_JAVA_OPTS
             value: "-XX:+UseG1GC -XX:MaxGCPauseMillis={{.Values.config.javaOptions.maxGCPauseMillis}} -XX:ParallelGCThreads={{.Values.config.javaOptions.parallelGCThreads}} -XX:+ParallelRefProcEnabled -XX:+UseStringDeduplication {{.Values.config.javaOptions.gcLogOptions}}"
+          - name: ODL_REPLICAS
+            value: "{{ .Values.replicaCount }}"
+          - name: IS_CLUSTER_ENABLED
+            value: "{{ .Values.config.isClusterDeployment }}"
           ports:
             - name: http
               containerPort: {{ .Values.service.port }}
               protocol: TCP
+            {{- if .Values.config.isClusterDeployment }}
+            - name: cluster
+              containerPort: {{ .Values.service.clusterPort }}
+              protocol: TCP
+            {{- end }}
           readinessProbe:
             tcpSocket:
               port: {{ .Values.service.port }}
@@ -77,6 +99,11 @@ spec:
             mountPath: {{ .Values.persistence.mountPath }}
           - name: scripts
             mountPath: /scripts
+          {{- if .Values.config.isClusterDeployment }}
+          - mountPath: {{ .Values.cluster.salConfigDir }}/{{ .Values.cluster.salConfigVersion}}/sal-clustering-config-{{ .Values.cluster.salConfigVersion}}-akkaconf.xml
+            name: update-conf
+            subPath: akka.conf
+          {{- end }}
       {{- with .Values.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}
@@ -92,7 +119,16 @@ spec:
       volumes:
         - name: scripts
           configMap:
-            name: {{ include "opendaylight.fullname" . }}
+            name: {{ include "opendaylight.fullname" . }}-scripts
+        {{- if .Values.config.isClusterDeployment }}
+        - name: config-input
+          configMap:
+            name: {{ include "opendaylight.fullname" . }}-conf
+            defaultMode: 0755
+        - name: update-conf
+          emptyDir:
+            medium: Memory
+        {{- end }}
   {{ if not .Values.persistence.enabled }}
         - name: {{ .Values.persistence.volName }} 
           emptyDir: {}
index a12c000f8cfecba0277a305c9979d7246c7c9027..0280e3c1989f650f90da804816d2be3e150bce39 100644 (file)
@@ -28,6 +28,7 @@ config:
   odl_basedir: /opt/opendaylight
   #features: odl-restconf,odl-restconf-all,odl-bgpcep-pcep,odl-bgpcep-bgp,odl-bgpcep-bgp-config-example,odl-bgpcep-bmp,odl-bgpcep-bmp-config-example,odl-jolokiaa,odl-daexim-all
   features: odl-restconf,odl-restconf-all
+  isClusterDeployment: false
   javaHome: /opt/openjdk-11/
   javaOptions:
     maxGCPauseMillis: 100
@@ -37,6 +38,17 @@ config:
     maxMemory: 2048m
     gcLogOptions: ""
 
+cluster:
+  salConfigDir: /opt/opendaylight/system/org/opendaylight/controller/sal-clustering-config
+  salConfigVersion: 3.0.10
+  akka:
+    seedNodeTimeout: 15s
+    circuitBreaker:
+      maxFailures: 10
+      callTimeout: 90s
+      resetTimeout: 30s
+    recoveryEventTimeout: 90s
+
 serviceAccount:
   # Specifies whether a service account should be created
   create: true
@@ -60,12 +72,15 @@ securityContext: {}
   # runAsUser: 1000
 
 readiness:
-  initialDelaySeconds: 30
+  initialDelaySeconds: 60
   periodSeconds: 10
 
 service:
   type: ClusterIP
   port: 8181
+  nodePortHttp: 30281
+  clusterPort: 2550
+  nodePortCluster: 30250
 
 ingress:
   enabled: false