Chore: Remove stable/argon jobs
[releng/builder.git] / jjb / packaging / openstack-k8s-create-with-template.sh
1 #!/bin/bash -l
2 # SPDX-License-Identifier: EPL-1.0
3 ##############################################################################
4 # Copyright (c) 2021 The Linux Foundation and others.
5 #
6 # All rights reserved. This program and the accompanying materials
7 # are made available under the terms of the Eclipse Public License v1.0
8 # which accompanies this distribution, and is available at
9 # http://www.eclipse.org/legal/epl-v10.html
10 ##############################################################################
11 # shellcheck disable=SC2153,SC2034
12 echo "---> Create K8S cluster with pre-existing template"
13
14 set -eux -o pipefail
15
16 # shellcheck disable=SC1090
17 . ~/lf-env.sh
18
19 lf-activate-venv --python python3 \
20     python-heatclient \
21     python-openstackclient \
22     urllib3~=1.26.15 \
23     yq
24
25 OS_TIMEOUT=20       # Wait time in minutes for OpenStack cluster to come up.
26 CLUSTER_RETRIES=3   # Number of times to retry creating a cluster.
27 CLUSTER_SUCCESSFUL=false
28
29 mkdir -p "$WORKSPACE/archives"
30
31 boot_volume_size="${BOOT_VOLUME_SIZE}"
32 cluster_name="${CLUSTER_NAME}"
33 cluster_settle_time="${CLUSTER_SETTLE_TIME:-1m}"
34 cluster_template_name="${CLUSTER_TEMPLATE_NAME}"
35 cloud_provider_tag="${CLOUD_PROVIDER_TAG}"
36 container_infra_prefix="${CONTAINER_INFRA_PREFIX}"
37 etcd_volume_size="${ETCD_VOLUME_SIZE}"
38 k8s_version="${K8S_VERSION}"
39 keypair="${KEYPAIR}"
40 kube_tag="${KUBE_TAG}"
41 helm_client_url="${HELM_CLIENT_URL}"
42 helm_sha256="${HELM_SHA256}"
43 helm_version="${HELM_VERSION}"
44 master_count="${MASTER_COUNT:-1}"
45 master_flavor="${MASTER_FLAVOR}"
46 master_lb_floating_ip_enabled="${MASTER_LB_FLOATING_IP_ENABLED:-false}"
47 node_count="${NODE_COUNT:-2}"
48 node_flavor="${NODE_FLAVOR}"
49 os_cloud="${OS_CLOUD:-vex}"
50
51 echo "INFO: Create a Cluster:${CLUSTER_NAME} for attempts:${CLUSTER_RETRIES}."
52 for try in $(seq $CLUSTER_RETRIES); do
53     # shellcheck disable=SC1083
54
55     # Create the cluster using pre-defined template. Returns the status which includes the $cluster_uuid
56     cluster_status=$(openstack --os-cloud "${os_cloud}" coe cluster create "${cluster_name}" \
57         --cluster-template "${cluster_template_name}" \
58         --keypair "${keypair}" \
59         --master-count "${master_count}" \
60         --node-count "${node_count}" \
61         --master-flavor "${master_flavor}" \
62         --flavor "${node_flavor}" \
63         --labels \
64 boot_volume_size="${boot_volume_size}",\
65 container_infra_prefix="${container_infra_prefix}",\
66 cloud_provider_tag="${cloud_provider_tag}",\
67 helm_client_sha256="${helm_sha256}",\
68 helm_client_tag="${helm_version}",\
69 etcd_volume_size="${etcd_volume_size}",\
70 kube_tag="${kube_tag}",\
71 master_lb_floating_ip_enabled=false,\
72 helm_client_url="${helm_client_url}" \
73         --floating-ip-disabled)
74
75     # Check return status and extract the $cluster_uuid from return status
76     if [[ -z "$cluster_status" ]]; then
77         echo "ERROR: Failed to create coe cluster ${cluster_name}"
78         exit 1
79     elif [[ "${cluster_status}" =~ .*accepted.* ]]; then
80         cluster_uuid=$(echo "${cluster_status}" | awk -F' ' '{print $5}')
81     fi
82
83     echo "INFO $try: Wait until ${OS_TIMEOUT} (in minutes) to rollout ${cluster_name}."
84     for i in $(seq $OS_TIMEOUT); do
85         sleep 90
86
87         CLUSTER_STATUS=$(openstack --os-cloud "$os_cloud" coe cluster show "$cluster_uuid" -c status -f value)
88         echo "$i: ${CLUSTER_STATUS}"
89
90         case "${CLUSTER_STATUS}" in
91             CREATE_COMPLETE)
92                 echo "INFO: Cluster ${cluster_name} initialized on infrastructure successful."
93                 CLUSTER_SUCCESSFUL=true
94                 break
95             ;;
96             CREATE_FAILED)
97                 reason=$(openstack coe cluster show "${cluster_name}" -f value -c health_status_reason)
98                 echo "ERROR: Failed to initialize infrastructure. Reason: ${reason}"
99                 openstack ceo cluster show "${cluster_name}"
100
101                 echo "INFO: Deleting cluster and re-try to create the cluster again ..."
102                 openstack coe cluster delete "${cluster_name}"
103
104                 # Post delete, poll for 5m to learn if cluster is fully removed
105                 for j in $(seq 20); do
106                     sleep 30
107                     delete_status=$(openstack coe cluster show "${cluster_name}" -f value -c status)
108                     echo "$j: ${delete_status}"
109                     if [[ ${delete_status} == "DELETE_FAILED" ]]; then
110                         reason=$(openstack coe cluster show "${cluster_name}" -f value -c health_status_reason)
111                         echo "ERROR: Failed to delete ${cluster_name}. Reason: ${reason}"
112
113                         echo "INFO: Deleting failed cluster again: ${cluster_name}"
114                         openstack coe cluster delete "${cluster_name}"
115                     fi
116
117                     if ! openstack coe cluster show "${cluster_name}" -f value -c status; then
118                         echo "INFO: Cluster show on ${cluster_name} came back empty. Assuming successful delete"
119                         break
120                     fi
121                 done
122
123                 # If we still see $CLUSTER_NAME in `openstack coe cluster show` this infers the delete hasn't fully
124                 # worked and we can exit forcefully
125                 if openstack coe cluster show "${cluster_name}" -f value -c stack_status; then
126                     echo "ERROR: Cluster ${cluster_name} still in cloud output after polling. Quitting!"
127                     exit 1
128                 fi
129                 break
130             ;;
131             CREATE_IN_PROGRESS)
132                 echo "INFO: Waiting to initialize cluster infrastructure ..."
133                 continue
134             ;;
135             *)
136                 echo "ERROR: Unexpected status: ${OS_STATUS}"
137                 # DO NOT exit on unexpected status. Openstack cluster sometimes returns unexpected status
138                 # before returning an expected status. Just print the message and loop until we have
139                 # a confirmed state or timeout.
140                 # exit 1
141             ;;
142         esac
143     done
144     if $CLUSTER_SUCCESSFUL; then
145         break
146     fi
147 done