X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=blobdiff_plain;f=jjb%2Fopendaylight-infra-stack.sh;h=f5f5e9d38113e331ff713489bd9d00a691dc2081;hb=4d4085229f1b0a34267152e00e1cc6358f26df48;hp=52d450a7162786651ec17d1b84fae711f495c21e;hpb=e00bc39f5352d8f51b59c68932c1b5964d83e732;p=releng%2Fbuilder.git diff --git a/jjb/opendaylight-infra-stack.sh b/jjb/opendaylight-infra-stack.sh index 52d450a71..f5f5e9d38 100644 --- a/jjb/opendaylight-infra-stack.sh +++ b/jjb/opendaylight-infra-stack.sh @@ -1,17 +1,100 @@ #!/bin/bash -virtualenv $WORKSPACE/.venv-openstack -source $WORKSPACE/.venv-openstack/bin/activate -pip install --upgrade pip -pip install --upgrade python-openstackclient python-heatclient -pip freeze +virtualenv "$WORKSPACE/.venv-openstack" +# shellcheck disable=SC1090 +source "$WORKSPACE/.venv-openstack/bin/activate" +PYTHON="$WORKSPACE/.venv-openstack/bin/python" +OPENSTACK="$WORKSPACE/.venv-openstack/bin/openstack" +$PYTHON -m pip install --upgrade pip +$PYTHON -m pip install --upgrade python-openstackclient python-heatclient +$PYTHON -m pip freeze -cd /builder/openstack-hot +cd /builder/openstack-hot || exit 1 -JOB_SUM=`echo $JOB_NAME | sum | awk '{{ print $1 }}'` +JOB_SUM=$(echo "$JOB_NAME" | sum | awk '{{ print $1 }}') VM_NAME="$JOB_SUM-$BUILD_NUMBER" -openstack --os-cloud rackspace stack create --wait --timeout 15 -t {stack-template} -e $WORKSPACE/opendaylight-infra-environment.yaml --parameter "job_name=$VM_NAME" --parameter "silo=$SILO" $STACK_NAME -OS_STATUS=`openstack --os-cloud rackspace stack show -f json -c stack_status $STACK_NAME | jq -r '.stack_status'` -if [ "$OS_STATUS" != "CREATE_COMPLETE" ]; then - echo "Failed to initialize infrastructure. Quitting..." + +OS_TIMEOUT=10 # Minutes to wait for OpenStack VM to come online +STACK_RETRIES=3 # Number of times to retry creating a stack before fully giving up +STACK_SUCCESSFUL=false +# seq X refers to waiting for X minutes for OpenStack to return +# a status that is not CREATE_IN_PROGRESS before giving up. +$PYTHON $OPENSTACK limits show --absolute +$PYTHON $OPENSTACK limits show --rate +echo "Trying up to $STACK_RETRIES times to create $STACK_NAME." +for try in $(seq $STACK_RETRIES); do + # shellcheck disable=SC1083 + $PYTHON $OPENSTACK stack create --timeout "$OS_TIMEOUT" -t {stack-template} -e "$WORKSPACE/opendaylight-infra-environment.yaml" --parameter "job_name=$VM_NAME" --parameter "silo=$SILO" "$STACK_NAME" + echo "$try: Waiting for $OS_TIMEOUT minutes to create $STACK_NAME." + for i in $(seq $OS_TIMEOUT); do + sleep 60 + OS_STATUS=$($PYTHON $OPENSTACK stack show -f json -c stack_status "$STACK_NAME" | jq -r '.stack_status') + echo "$i: $OS_STATUS" + + case "$OS_STATUS" in + CREATE_COMPLETE) + echo "Stack initialized on infrastructure successful." + STACK_SUCCESSFUL=true + break + ;; + CREATE_FAILED) + echo "ERROR: Failed to initialize infrastructure. Deleting stack and possibly retrying to create..." + $PYTHON $OPENSTACK stack delete --yes "$STACK_NAME" + $PYTHON $OPENSTACK stack show "$STACK_NAME" + # after stack delete, poll for 10m to know when stack is fully removed + # the logic here is that when "stack show $STACK_NAME" does not contain $STACK_NAME + # we assume it's successfully deleted and we can break to retry + for j in $(seq 20); do + sleep 30; + STACK_SHOW=$($PYTHON $OPENSTACK stack show "$STACK_NAME") + echo "$j: $STACK_SHOW" + if [[ $STACK_SHOW == *"DELETE_FAILED"* ]]; then + echo "stack delete failed. trying to stack abandon now" + # stack abandon does not work on RS, therefore requires acquiring a token + # and using http delete method to abondon DELETE_FAILED stacks + # Todo: remove the change once RS fixes the issue upstream + # openstack stack abandon "$STACK_NAME" + STACK_ID=$($PYTHON $OPENSTACK stack show -f json -c "id" "$STACK_NAME" | jq -r '."id"') + TOKEN=$($PYTHON $OPENSTACK token issue -f json -c id | jq -r '.id') + curl -si -X DELETE -H "Content-Type: application/json" -H "Accept: application/json"\ + -H "x-auth-token: $TOKEN"\ + "https://dfw.orchestration.api.rackspacecloud.com/v1/904885/stacks/$STACK_NAME/$STACK_ID/abandon" + STACK_SHOW=$($PYTHON $OPENSTACK stack show "$STACK_NAME") + echo "$STACK_SHOW" + fi + if [[ $STACK_SHOW != *"$STACK_NAME"* ]]; then + echo "stack show on $STACK_NAME came back empty. Assuming successful delete" + break + fi + done + # if we still see $STACK_NAME in $STACK_SHOW it means the delete hasn't fully + # worked and we can exit forcefully + if [[ $STACK_SHOW == *"$STACK_NAME"* ]]; then + echo "stack $STACK_NAME still in stack show output after polling. Quitting!" + exit 1 + fi + break + ;; + CREATE_IN_PROGRESS) + echo "Waiting to initialize infrastructure." + continue + ;; + *) + echo "Unexpected status: $OS_STATUS" + # DO NOT exit on unexpected status. Rackspace sometimes returns unexpected status + # before returning an expected status. Just print the message and loop until we have + # a confirmed state or timeout. + # exit 1 + ;; + esac + done + if $STACK_SUCCESSFUL; then + break + fi +done + +# capture stack info in console logs +$PYTHON $OPENSTACK stack show "$STACK_NAME" + +if ! $STACK_SUCCESSFUL; then exit 1 fi