jjb/packaging/openstack-k8s-create-with-template.sh

   1 #!/bin/bash -l
   2 # SPDX-License-Identifier: EPL-1.0
   3 ##############################################################################
   4 # Copyright (c) 2021 The Linux Foundation and others.
   5 #
   6 # All rights reserved. This program and the accompanying materials
   7 # are made available under the terms of the Eclipse Public License v1.0
   8 # which accompanies this distribution, and is available at
   9 # http://www.eclipse.org/legal/epl-v10.html
  10 ##############################################################################
  11 # shellcheck disable=SC2153,SC2034
  12 echo "---> Create K8S cluster with pre-existing template"
  13
  14 set -eux -o pipefail
  15
  16 # shellcheck disable=SC1090
  17 . ~/lf-env.sh
  18
  19 lf-activate-venv --python python3 \
  20     python-heatclient \
  21     python-openstackclient \
  22     yq
  23
  24 OS_TIMEOUT=20       # Wait time in minutes for OpenStack cluster to come up.
  25 CLUSTER_RETRIES=3   # Number of times to retry creating a cluster.
  26 CLUSTER_SUCCESSFUL=false
  27
  28 mkdir -p "$WORKSPACE/archives"
  29
  30 boot_volume_size="${BOOT_VOLUME_SIZE}"
  31 cluster_name="${CLUSTER_NAME}"
  32 cluster_settle_time="${CLUSTER_SETTLE_TIME:-1m}"
  33 cluster_template_name="${CLUSTER_TEMPLATE_NAME}"
  34 cloud_provider_tag="${CLOUD_PROVIDER_TAG}"
  35 container_infra_prefix="${CONTAINER_INFRA_PREFIX}"
  36 etcd_volume_size="${ETCD_VOLUME_SIZE}"
  37 k8s_version="${K8S_VERSION}"
  38 keypair="${KEYPAIR}"
  39 kube_tag="${KUBE_TAG}"
  40 helm_client_url="${HELM_CLIENT_URL}"
  41 helm_sha256="${HELM_SHA256}"
  42 helm_version="${HELM_VERSION}"
  43 master_count="${MASTER_COUNT:-1}"
  44 master_flavor="${MASTER_FLAVOR}"
  45 master_lb_floating_ip_enabled="${MASTER_LB_FLOATING_IP_ENABLED:-false}"
  46 node_count="${NODE_COUNT:-2}"
  47 node_flavor="${NODE_FLAVOR}"
  48 os_cloud="${OS_CLOUD:-vex}"
  49
  50 echo "INFO: Create a Cluster:${CLUSTER_NAME} for attempts:${CLUSTER_RETRIES}."
  51 for try in $(seq $CLUSTER_RETRIES); do
  52     # shellcheck disable=SC1083
  53
  54     # Create the cluster using pre-defined template. Returns the status which includes the $cluster_uuid
  55     cluster_status=$(openstack --os-cloud "${os_cloud}" coe cluster create "${cluster_name}" \
  56         --cluster-template "${cluster_template_name}" \
  57         --keypair "${keypair}" \
  58         --master-count "${master_count}" \
  59         --node-count "${node_count}" \
  60         --master-flavor "${master_flavor}" \
  61         --flavor "${node_flavor}" \
  62         --labels \
  63 boot_volume_size="${boot_volume_size}",\
  64 container_infra_prefix="${container_infra_prefix}",\
  65 cloud_provider_tag="${cloud_provider_tag}",\
  66 helm_client_sha256="${helm_sha256}",\
  67 helm_client_tag="${helm_version}",\
  68 etcd_volume_size="${etcd_volume_size}",\
  69 kube_tag="${kube_tag}",\
  70 master_lb_floating_ip_enabled=false,\
  71 helm_client_url="${helm_client_url}" \
  72         --floating-ip-disabled)
  73
  74     # Check return status and extract the $cluster_uuid from return status
  75     if [[ -z "$cluster_status" ]]; then
  76         echo "ERROR: Failed to create coe cluster ${cluster_name}"
  77         exit 1
  78     elif [[ "${cluster_status}" =~ .*accepted.* ]]; then
  79         cluster_uuid=$(echo "${cluster_status}" | awk -F' ' '{print $5}')
  80     fi
  81
  82     echo "INFO $try: Wait until ${OS_TIMEOUT} (in minutes) to rollout ${cluster_name}."
  83     for i in $(seq $OS_TIMEOUT); do
  84         sleep 90
  85
  86         CLUSTER_STATUS=$(openstack --os-cloud "$os_cloud" coe cluster show "$cluster_uuid" -c status -f value)
  87         echo "$i: ${CLUSTER_STATUS}"
  88
  89         case "${CLUSTER_STATUS}" in
  90             CREATE_COMPLETE)
  91                 echo "INFO: Cluster ${cluster_name} initialized on infrastructure successful."
  92                 CLUSTER_SUCCESSFUL=true
  93                 break
  94             ;;
  95             CREATE_FAILED)
  96                 reason=$(openstack coe cluster show "${cluster_name}" -f value -c health_status_reason)
  97                 echo "ERROR: Failed to initialize infrastructure. Reason: ${reason}"
  98                 openstack ceo cluster show "${cluster_name}"
  99
 100                 echo "INFO: Deleting cluster and re-try to create the cluster again ..."
 101                 openstack coe cluster delete "${cluster_name}"
 102
 103                 # Post delete, poll for 5m to learn if cluster is fully removed
 104                 for j in $(seq 20); do
 105                     sleep 30
 106                     delete_status=$(openstack coe cluster show "${cluster_name}" -f value -c status)
 107                     echo "$j: ${delete_status}"
 108                     if [[ ${delete_status} == "DELETE_FAILED" ]]; then
 109                         reason=$(openstack coe cluster show "${cluster_name}" -f value -c health_status_reason)
 110                         echo "ERROR: Failed to delete ${cluster_name}. Reason: ${reason}"
 111
 112                         echo "INFO: Deleting failed cluster again: ${cluster_name}"
 113                         openstack coe cluster delete "${cluster_name}"
 114                     fi
 115
 116                     if ! openstack coe cluster show "${cluster_name}" -f value -c status; then
 117                         echo "INFO: Cluster show on ${cluster_name} came back empty. Assuming successful delete"
 118                         break
 119                     fi
 120                 done
 121
 122                 # If we still see $CLUSTER_NAME in `openstack coe cluster show` this infers the delete hasn't fully
 123                 # worked and we can exit forcefully
 124                 if openstack coe cluster show "${cluster_name}" -f value -c stack_status; then
 125                     echo "ERROR: Cluster ${cluster_name} still in cloud output after polling. Quitting!"
 126                     exit 1
 127                 fi
 128                 break
 129             ;;
 130             CREATE_IN_PROGRESS)
 131                 echo "INFO: Waiting to initialize cluster infrastructure ..."
 132                 continue
 133             ;;
 134             *)
 135                 echo "ERROR: Unexpected status: ${OS_STATUS}"
 136                 # DO NOT exit on unexpected status. Openstack cluster sometimes returns unexpected status
 137                 # before returning an expected status. Just print the message and loop until we have
 138                 # a confirmed state or timeout.
 139                 # exit 1
 140             ;;
 141         esac
 142     done
 143     if $CLUSTER_SUCCESSFUL; then
 144         break
 145     fi
 146 done