Fix csit SUITES variable
[releng/builder.git] / jjb / integration / integration-start-cluster-run-test.sh
1 #!/bin/bash
2 #@IgnoreInspection BashAddShebang
3 # Activate robotframework virtualenv
4 # ${ROBOT_VENV} comes from the integration-install-robotframework.sh
5 # script.
6 # shellcheck source=${ROBOT_VENV}/bin/activate disable=SC1091
7 source "${ROBOT_VENV}/bin/activate"
8 source /tmp/common-functions.sh "${BUNDLEFOLDER}"
9 # Ensure we fail the job if any steps fail.
10 set -ex -o pipefail
11
12 echo "#################################################"
13 echo "##         Verify Cluster is UP                ##"
14 echo "#################################################"
15
16 cat > "${WORKSPACE}/verify-cluster-is-up.sh" <<EOF
17
18 CONTROLLERID="member-\$1"
19 ODL_SYSTEM_IP_PATH=\$2
20
21 echo "Waiting for controller to come up..."
22 COUNT="0"
23 while true; do
24     RESP="\$( curl --user admin:admin -sL -w "%{http_code} %{url_effective}\\n" http://localhost:8181/restconf/modules -o /dev/null )"
25     echo \$RESP
26     SHARD="\$( curl --user admin:admin -sL -w "%{http_code} %{url_effective}\\n" http://localhost:8181/jolokia/read/org.opendaylight.controller:Category=Shards,name=\$CONTROLLERID-shard-inventory-config,type=DistributedConfigDatastore)"
27     echo \$SHARD
28     if ([[ \$RESP == *"200"* ]] && [[ \$SHARD  == *'"status":200'* ]]); then
29         echo Controller is UP
30         break
31     elif (( "\$COUNT" > "600" )); then
32         echo Timeout Controller DOWN
33         echo "Dumping first 500K bytes of karaf log..."
34         head --bytes=500K "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
35         echo "Dumping last 500K bytes of karaf log..."
36         tail --bytes=500K "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
37         echo "Listing all open ports on controller system"
38         netstat -pnatu
39         exit 1
40     else
41         COUNT=\$(( \${COUNT} + 1 ))
42         sleep 1
43         if [[ \$((\$COUNT % 5)) == 0 ]]; then
44             echo already waited \${COUNT} seconds...
45         fi
46     fi
47 done
48
49 echo "Listing all open ports on controller system.."
50 netstat -pnatu
51
52 function exit_on_log_file_message {
53     echo "looking for \"\$1\" in log file"
54     if grep --quiet "\$1" "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"; then
55         echo ABORTING: found "\$1"
56         echo "Dumping first 500K bytes of karaf log..."
57         head --bytes=500K "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
58         echo "Dumping last 500K bytes of karaf log..."
59         tail --bytes=500K "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
60         exit 1
61     fi
62 }
63
64 exit_on_log_file_message 'BindException: Address already in use'
65 exit_on_log_file_message 'server is unhealthy'
66
67 EOF
68
69 for i in $(seq 1 "${NUM_ODL_SYSTEM}")
70 do
71     CONTROLLERIP=ODL_SYSTEM_${i}_IP
72     echo "Verifying member-${i} with IP address ${!CONTROLLERIP} is UP"
73     scp "${WORKSPACE}/verify-cluster-is-up.sh" "${!CONTROLLERIP}:/tmp"
74     # shellcheck disable=SC2029
75     ssh "${!CONTROLLERIP}" "bash /tmp/verify-cluster-is-up.sh ${i} ${!CONTROLLERIP}"
76 done
77
78 if [ "${NUM_OPENSTACK_SYSTEM}" -gt 0 ]; then
79    echo "Exiting without running tests to deploy openstack for testing"
80    exit
81 fi
82
83 if [ "${CONTROLLERSCOPE}" == 'all' ]; then
84     COOLDOWN_PERIOD="180"
85 else
86     COOLDOWN_PERIOD="60"
87 fi
88 echo "Cool down for ${COOLDOWN_PERIOD} seconds :)..."
89 sleep "${COOLDOWN_PERIOD}"
90
91 echo "Generating controller variables..."
92 for i in $(seq 1 "${NUM_ODL_SYSTEM}")
93 do
94     CONTROLLERIP=ODL_SYSTEM_${i}_IP
95     odl_variables=${odl_variables}" -v ${CONTROLLERIP}:${!CONTROLLERIP}"
96     echo "Lets's take the karaf thread dump"
97     ssh "${!CONTROLLERIP}" "sudo ps aux" > "${WORKSPACE}/ps_before.log"
98     pid=$(grep org.apache.karaf.main.Main "${WORKSPACE}/ps_before.log" | grep -v grep | tr -s ' ' | cut -f2 -d' ')
99     echo "karaf main: org.apache.karaf.main.Main, pid:${pid}"
100     # shellcheck disable=SC2029
101     ssh "${!CONTROLLERIP}" "${JAVA_HOME}/bin/jstack -l ${pid}" > "${WORKSPACE}/karaf_${i}_${pid}_threads_before.log" || true
102 done
103
104 echo "Generating mininet variables..."
105 for i in $(seq 1 "${NUM_TOOLS_SYSTEM}")
106 do
107     MININETIP=TOOLS_SYSTEM_${i}_IP
108     tools_variables=${tools_variables}" -v ${MININETIP}:${!MININETIP}"
109 done
110
111 get_test_suites SUITES
112
113 echo "Starting Robot test suites ${SUITES} ..."
114 # shellcheck disable=SC2086
115 robot -N "${TESTPLAN}" \
116       --removekeywords wuks -c critical -e exclude -e "skip_if_${DISTROSTREAM}" \
117       -v BUNDLEFOLDER:"${BUNDLEFOLDER}" \
118       -v BUNDLE_URL:"${ACTUAL_BUNDLE_URL}" \
119       -v CONTROLLER:"${ODL_SYSTEM_IP}" \
120       -v CONTROLLER1:"${ODL_SYSTEM_2_IP}" \
121       -v CONTROLLER2:"${ODL_SYSTEM_3_IP}" \
122       -v CONTROLLER_USER:"${USER}" \
123       -v JAVA_HOME:"${JAVA_HOME}" \
124       -v JDKVERSION:"${JDKVERSION}" \
125       -v JENKINS_WORKSPACE:"${WORKSPACE}" \
126       -v MININET:"${TOOLS_SYSTEM_IP}" \
127       -v MININET1:"${TOOLS_SYSTEM_2_IP}" \
128       -v MININET2:"${TOOLS_SYSTEM_3_IP}" \
129       -v MININET_USER:"${USER}" \
130       -v NEXUSURL_PREFIX:"${NEXUSURL_PREFIX}" \
131       -v NUM_ODL_SYSTEM:"${NUM_ODL_SYSTEM}" \
132       -v NUM_TOOLS_SYSTEM:"${NUM_TOOLS_SYSTEM}" \
133       -v ODL_STREAM:"${DISTROSTREAM}" \
134       -v ODL_SYSTEM_IP:"${ODL_SYSTEM_IP}" "${odl_variables}" \
135       -v ODL_SYSTEM_USER:"${USER}" \
136       -v TOOLS_SYSTEM_IP:"${TOOLS_SYSTEM_IP}" "${tools_variables}" \
137       -v TOOLS_SYSTEM_USER:"${USER}" \
138       -v USER_HOME:"${HOME}" \
139       -v WORKSPACE:/tmp \
140       "${TESTOPTIONS}" ${SUITES} || true
141
142
143
144 echo "Examining the files in data/log and checking filesize"
145 # shellcheck disable=SC2029
146 ssh "${ODL_SYSTEM_1_IP}" "ls -altr /tmp/${BUNDLEFOLDER}/data/log/"
147 # shellcheck disable=SC2029
148 ssh "${ODL_SYSTEM_1_IP}" "du -hs /tmp/${BUNDLEFOLDER}/data/log/*"
149 # shellcheck disable=SC2029
150 ssh "${ODL_SYSTEM_2_IP}" "ls -altr /tmp/${BUNDLEFOLDER}/data/log/"
151 # shellcheck disable=SC2029
152 ssh "${ODL_SYSTEM_2_IP}" "du -hs /tmp/${BUNDLEFOLDER}/data/log/*"
153 # shellcheck disable=SC2029
154 ssh "${ODL_SYSTEM_3_IP}" "ls -altr /tmp/${BUNDLEFOLDER}/data/log/"
155 # shellcheck disable=SC2029
156 ssh "${ODL_SYSTEM_3_IP}" "du -hs /tmp/${BUNDLEFOLDER}/data/log/*"
157
158 set +e  # We do not want to create red dot just because something went wrong while fetching logs.
159 for i in $(seq 1 "${NUM_ODL_SYSTEM}")
160 do
161     CONTROLLERIP="ODL_SYSTEM_${i}_IP"
162     echo "Lets's take the karaf thread dump again"
163     ssh "${!CONTROLLERIP}" "sudo ps aux" > "${WORKSPACE}/ps_after.log"
164     pid=$(grep org.apache.karaf.main.Main "${WORKSPACE}/ps_after.log" | grep -v grep | tr -s ' ' | cut -f2 -d' ')
165     echo "karaf main: org.apache.karaf.main.Main, pid:${pid}"
166     # shellcheck disable=SC2029
167     ssh "${!CONTROLLERIP}" "${JAVA_HOME}/bin/jstack -l ${pid}" > "${WORKSPACE}/karaf_${i}_${pid}_threads_after.log" || true
168     echo "killing karaf process..."
169     ssh "${!CONTROLLERIP}" bash -c 'ps axf | grep karaf | grep -v grep | awk '"'"'{print "kill -9 " $1}'"'"' | sh'
170 done
171 sleep 5
172 for i in $(seq 1 "${NUM_ODL_SYSTEM}")
173 do
174     CONTROLLERIP=ODL_SYSTEM_${i}_IP
175     echo "Compressing karaf.log ${i}"
176     ssh "${!CONTROLLERIP}" gzip --best "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
177     echo "Fetching compressed karaf.log ${i}"
178     scp "${!CONTROLLERIP}:/tmp/${BUNDLEFOLDER}/data/log/karaf.log.gz" "odl${i}_karaf.log.gz" && ssh "${!CONTROLLERIP}" rm -f "/tmp/${BUNDLEFOLDER}/data/log/karaf.log.gz"
179     # TODO: Should we compress the output log file as well?
180     scp "${!CONTROLLERIP}:/tmp/${BUNDLEFOLDER}/data/log/karaf_console.log" "odl${i}_karaf_console.log" && ssh "${!CONTROLLERIP}" rm -f "/tmp/${BUNDLEFOLDER}/data/log/karaf_console.log"
181     echo "Fetch GC logs"
182     # FIXME: Put member index in filename, instead of directory name.
183     mkdir -p "gclogs-${i}"
184     scp "${!CONTROLLERIP}:/tmp/${BUNDLEFOLDER}/data/log/*.log" "gclogs-${i}/" && ssh "${!CONTROLLERIP}" rm -f "/tmp/${BUNDLEFOLDER}/data/log/*.log"
185 done
186
187 echo "Examine copied files"
188 ls -lt
189
190 true  # perhaps Jenkins is testing last exit code
191
192 # vim: ts=4 sw=4 sts=4 et ft=sh :