Merge "Fix SC2059 for ShellCheck 0.4.4, printf formatting"
[releng/builder.git] / jjb / integration / integration-start-cluster-run-test.sh
1 #@IgnoreInspection BashAddShebang
2 # Activate robotframework virtualenv
3 # ${ROBOT_VENV} comes from the integration-install-robotframework.sh
4 # script.
5 # shellcheck source=${ROBOT_VENV}/bin/activate disable=SC1091
6 source ${ROBOT_VENV}/bin/activate
7 source /tmp/common-functions.sh ${BUNDLEFOLDER}
8 # Ensure we fail the job if any steps fail.
9 set -ex -o pipefail
10
11 echo "#################################################"
12 echo "##         Verify Cluster is UP                ##"
13 echo "#################################################"
14
15 cat > ${WORKSPACE}/verify-cluster-is-up.sh <<EOF
16
17 CONTROLLERID="member-\$1"
18 ODL_SYSTEM_IP_PATH=\$2
19
20 echo "Waiting for controller to come up..."
21 COUNT="0"
22 while true; do
23     RESP="\$( curl --user admin:admin -sL -w "%{http_code} %{url_effective}\\n" http://localhost:8181/restconf/modules -o /dev/null )"
24     echo \$RESP
25     SHARD="\$( curl --user admin:admin -sL -w "%{http_code} %{url_effective}\\n" http://localhost:8181/jolokia/read/org.opendaylight.controller:Category=Shards,name=\$CONTROLLERID-shard-inventory-config,type=DistributedConfigDatastore)"
26     echo \$SHARD
27     if ([[ \$RESP == *"200"* ]] && [[ \$SHARD  == *'"status":200'* ]]); then
28         echo Controller is UP
29         break
30     elif (( "\$COUNT" > "600" )); then
31         echo Timeout Controller DOWN
32         echo "Dumping first 500K bytes of karaf log..."
33         head --bytes=500K "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
34         echo "Dumping last 500K bytes of karaf log..."
35         tail --bytes=500K "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
36         echo "Listing all open ports on controller system"
37         netstat -pnatu
38         exit 1
39     else
40         COUNT=\$(( \${COUNT} + 1 ))
41         sleep 1
42         if [[ \$((\$COUNT % 5)) == 0 ]]; then
43             echo already waited \${COUNT} seconds...
44         fi
45     fi
46 done
47
48 echo "Listing all open ports on controller system.."
49 netstat -pnatu
50
51 function exit_on_log_file_message {
52     echo "looking for \"\$1\" in log file"
53     if grep --quiet "\$1" "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"; then
54         echo ABORTING: found "\$1"
55         echo "Dumping first 500K bytes of karaf log..."
56         head --bytes=500K "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
57         echo "Dumping last 500K bytes of karaf log..."
58         tail --bytes=500K "/tmp/${BUNDLEFOLDER}/data/log/karaf.log"
59         exit 1
60     fi
61 }
62
63 exit_on_log_file_message 'BindException: Address already in use'
64 exit_on_log_file_message 'server is unhealthy'
65
66 EOF
67
68 for i in `seq 1 ${NUM_ODL_SYSTEM}`
69 do
70     CONTROLLERIP=ODL_SYSTEM_${i}_IP
71     echo "Verifying member-${i} with IP address ${!CONTROLLERIP} is UP"
72     scp ${WORKSPACE}/verify-cluster-is-up.sh ${!CONTROLLERIP}:/tmp
73     ssh ${!CONTROLLERIP} "bash /tmp/verify-cluster-is-up.sh ${i} ${!CONTROLLERIP}"
74 done
75
76 if [ ${NUM_OPENSTACK_SYSTEM} -gt 0 ]; then
77    echo "Exiting without running tests to deploy openstack for testing"
78    exit
79 fi
80
81 if [ ${CONTROLLERSCOPE} == 'all' ]; then
82     COOLDOWN_PERIOD="180"
83 else
84     COOLDOWN_PERIOD="60"
85 fi
86 echo "Cool down for ${COOLDOWN_PERIOD} seconds :)..."
87 sleep ${COOLDOWN_PERIOD}
88
89 echo "Generating controller variables..."
90 for i in `seq 1 ${NUM_ODL_SYSTEM}`
91 do
92     CONTROLLERIP=ODL_SYSTEM_${i}_IP
93     odl_variables=${odl_variables}" -v ${CONTROLLERIP}:${!CONTROLLERIP}"
94     echo "Lets's take the karaf thread dump"
95     ssh ${!CONTROLLERIP} "sudo ps aux" > ${WORKSPACE}/ps_before.log
96     pid=$(grep org.apache.karaf.main.Main ${WORKSPACE}/ps_before.log | grep -v grep | tr -s ' ' | cut -f2 -d' ')
97     echo "karaf main: org.apache.karaf.main.Main, pid:${pid}"
98     ssh ${!CONTROLLERIP} "${JAVA_HOME}/bin/jstack -l ${pid}" > ${WORKSPACE}/karaf_${i}_${pid}_threads_before.log || true
99 done
100
101 echo "Generating mininet variables..."
102 for i in `seq 1 ${NUM_TOOLS_SYSTEM}`
103 do
104     MININETIP=TOOLS_SYSTEM_${i}_IP
105     tools_variables=${tools_variables}" -v ${MININETIP}:${!MININETIP}"
106 done
107
108 get_test_suites SUITES
109
110 echo "Starting Robot test suites ${SUITES} ..."
111 pybot -N ${TESTPLAN} \
112       --removekeywords wuks -c critical -e exclude -e skip_if_${DISTROSTREAM} \
113       -v BUNDLEFOLDER:${BUNDLEFOLDER} \
114       -v BUNDLE_URL:${ACTUAL_BUNDLE_URL} \
115       -v CONTROLLER:${ODL_SYSTEM_IP} \
116       -v CONTROLLER1:${ODL_SYSTEM_2_IP} \
117       -v CONTROLLER2:${ODL_SYSTEM_3_IP} \
118       -v CONTROLLER_USER:${USER} \
119       -v JAVA_HOME:${JAVA_HOME} \
120       -v JDKVERSION:${JDKVERSION} \
121       -v JENKINS_WORKSPACE:${WORKSPACE} \
122       -v MININET:${TOOLS_SYSTEM_IP} \
123       -v MININET1:${TOOLS_SYSTEM_2_IP} \
124       -v MININET2:${TOOLS_SYSTEM_3_IP} \
125       -v MININET_USER:${USER} \
126       -v NEXUSURL_PREFIX:${NEXUSURL_PREFIX} \
127       -v NUM_ODL_SYSTEM:${NUM_ODL_SYSTEM} \
128       -v NUM_TOOLS_SYSTEM:${NUM_TOOLS_SYSTEM} \
129       -v ODL_STREAM:${DISTROSTREAM} \
130       -v ODL_SYSTEM_IP:${ODL_SYSTEM_IP} ${odl_variables} \
131       -v ODL_SYSTEM_USER:${USER} \
132       -v TOOLS_SYSTEM_IP:${TOOLS_SYSTEM_IP} ${tools_variables} \
133       -v TOOLS_SYSTEM_USER:${USER} \
134       -v USER_HOME:${HOME} \
135       -v WORKSPACE:/tmp \
136       ${TESTOPTIONS} ${SUITES} || true
137
138
139
140 echo "Examining the files in data/log and checking filesize"
141 ssh ${ODL_SYSTEM_1_IP} "ls -altr /tmp/${BUNDLEFOLDER}/data/log/"
142 ssh ${ODL_SYSTEM_1_IP} "du -hs /tmp/${BUNDLEFOLDER}/data/log/*"
143 ssh ${ODL_SYSTEM_2_IP} "ls -altr /tmp/${BUNDLEFOLDER}/data/log/"
144 ssh ${ODL_SYSTEM_2_IP} "du -hs /tmp/${BUNDLEFOLDER}/data/log/*"
145 ssh ${ODL_SYSTEM_3_IP} "ls -altr /tmp/${BUNDLEFOLDER}/data/log/"
146 ssh ${ODL_SYSTEM_3_IP} "du -hs /tmp/${BUNDLEFOLDER}/data/log/*"
147
148 set +e  # We do not want to create red dot just because something went wrong while fetching logs.
149 for i in `seq 1 ${NUM_ODL_SYSTEM}`
150 do
151     CONTROLLERIP=ODL_SYSTEM_${i}_IP
152     echo "Lets's take the karaf thread dump again"
153     ssh ${!CONTROLLERIP} "sudo ps aux" > ${WORKSPACE}/ps_after.log
154     pid=$(grep org.apache.karaf.main.Main ${WORKSPACE}/ps_after.log | grep -v grep | tr -s ' ' | cut -f2 -d' ')
155     echo "karaf main: org.apache.karaf.main.Main, pid:${pid}"
156     ssh ${!CONTROLLERIP} "${JAVA_HOME}/bin/jstack -l ${pid}" > ${WORKSPACE}/karaf_${i}_${pid}_threads_after.log || true
157     echo "killing karaf process..."
158     ssh "${!CONTROLLERIP}" bash -c 'ps axf | grep karaf | grep -v grep | awk '"'"'{print "kill -9 " $1}'"'"' | sh'
159 done
160 sleep 5
161 for i in `seq 1 ${NUM_ODL_SYSTEM}`
162 do
163     CONTROLLERIP=ODL_SYSTEM_${i}_IP
164     echo "Compressing karaf.log ${i}"
165     ssh ${!CONTROLLERIP} gzip --best /tmp/${BUNDLEFOLDER}/data/log/karaf.log
166     echo "Fetching compressed karaf.log ${i}"
167     scp "${!CONTROLLERIP}:/tmp/${BUNDLEFOLDER}/data/log/karaf.log.gz" "odl${i}_karaf.log.gz" && ssh ${!CONTROLLERIP} rm -f "/tmp/${BUNDLEFOLDER}/data/log/karaf.log.gz"
168     # TODO: Should we compress the output log file as well?
169     scp "${!CONTROLLERIP}:/tmp/${BUNDLEFOLDER}/data/log/karaf_console.log" "odl${i}_karaf_console.log" && ssh ${!CONTROLLERIP} rm -f "/tmp/${BUNDLEFOLDER}/data/log/karaf_console.log"
170     echo "Fetch GC logs"
171     # FIXME: Put member index in filename, instead of directory name.
172     mkdir -p "gclogs-${i}"
173     scp "${!CONTROLLERIP}:/tmp/${BUNDLEFOLDER}/data/log/*.log" "gclogs-${i}/" && ssh ${!CONTROLLERIP} rm -f "/tmp/${BUNDLEFOLDER}/data/log/*.log"
174 done
175
176 echo "Examine copied files"
177 ls -lt
178
179 true  # perhaps Jenkins is testing last exit code
180
181 # vim: ts=4 sw=4 sts=4 et ft=sh :