Add retries to stacking 03/51703/8
authorJamo Luhrsen <jluhrsen@redhat.com>
Fri, 10 Feb 2017 22:14:19 +0000 (14:14 -0800)
committerJamo Luhrsen <jluhrsen@redhat.com>
Mon, 20 Feb 2017 05:38:12 +0000 (21:38 -0800)
Change-Id: I9d1c119ea4b60ee057b63ecb83192ff7fbbadf0f
Signed-off-by: Jamo Luhrsen <jluhrsen@redhat.com>
jjb/opendaylight-infra-stack.sh

index 8218cfff8e04e68d022836079abd5342f0c9495d..ae96b8c9872b3ed254973085bc2e225cfb2e2b8e 100644 (file)
@@ -10,36 +10,77 @@ cd /builder/openstack-hot
 JOB_SUM=`echo $JOB_NAME | sum | awk '{{ print $1 }}'`
 VM_NAME="$JOB_SUM-$BUILD_NUMBER"
 
+OS_TIMEOUT=10  # Minutes to wait for OpenStack VM to come online
+STACK_RETRIES=3  # Number of times to retry creating a stack before fully giving up
+STACK_SUCCESSFUL=false
 # seq X refers to waiting for X minutes for OpenStack to return
 # a status that is not CREATE_IN_PROGRESS before giving up.
-OS_TIMEOUT=15  # Minutes to wait for OpenStack VM to come online
 openstack --os-cloud rackspace limits show --absolute
 openstack --os-cloud rackspace limits show --rate
-openstack --os-cloud rackspace stack create --timeout $OS_TIMEOUT -t {stack-template} -e $WORKSPACE/opendaylight-infra-environment.yaml --parameter "job_name=$VM_NAME" --parameter "silo=$SILO" $STACK_NAME
-echo "Waiting for $OS_TIMEOUT minutes to create $STACK_NAME."
-for i in `seq $OS_TIMEOUT`; do
-    sleep 60
-    OS_STATUS=`openstack --os-cloud rackspace stack show -f json -c stack_status $STACK_NAME | jq -r '.stack_status'`
+echo "Trying up to $STACK_RETRIES times to create $STACK_NAME."
+for try in `seq $STACK_RETRIES`; do
+    openstack --os-cloud rackspace stack create --timeout $OS_TIMEOUT -t csit-2-instance-type.yaml -e $WORKSPACE/opendaylight-infra-environment.yaml --parameter "job_name=$VM_NAME" --parameter "silo=$SILO" $STACK_NAME
+    openstack --os-cloud rackspace stack list
+    echo "Waiting for $OS_TIMEOUT minutes to create $STACK_NAME."
+    for i in `seq $OS_TIMEOUT`; do
+        sleep 60
+        OS_STATUS=`openstack --os-cloud rackspace stack show -f json -c stack_status $STACK_NAME | jq -r '.stack_status'`
 
-    case "$OS_STATUS" in
-        CREATE_COMPLETE)
-            echo "Stack initialized on infrastructure successful."
-            break
-        ;;
-        CREATE_FAILED)
-            echo "ERROR: Failed to initialize infrastructure. Quitting..."
-            exit 1
-        ;;
-        CREATE_IN_PROGRESS)
-            echo "Waiting to initialize infrastructure."
-            continue
-        ;;
-        *)
-            echo "Unexpected status: $OS_STATUS"
-            exit 1
-        ;;
-    esac
+        case "$OS_STATUS" in
+            CREATE_COMPLETE)
+                echo "Stack initialized on infrastructure successful."
+                STACK_SUCCESSFUL=true
+                break
+            ;;
+            CREATE_FAILED)
+                echo "ERROR: Failed to initialize infrastructure. Deleting stack and possibly retrying to create..."
+                openstack --os-cloud rackspace stack list
+                openstack --os-cloud rackspace stack delete --yes $STACK_NAME
+                openstack --os-cloud rackspace stack show $STACK_NAME
+                # after stack delete, poll for 10m to know when stack is fully removed
+                # the logic here is that when "stack show $STACK_NAME" does not contain $STACK_NAME
+                # we assume it's successfully deleted and we can break to retry
+                for i in `seq 20`; do
+                    sleep 30;
+                    STACK_SHOW=$(openstack --os-cloud rackspace stack show $STACK_NAME)
+                    echo $STACK_SHOW
+                    if [[ $STACK_SHOW == *"DELETE_FAILED"* ]]; then
+                        echo "stack delete failed. trying to stack abandon now"
+                        openstack --os-cloud rackspace stack abandon $STACK_NAME
+                        STACK_SHOW=$(openstack --os-cloud rackspace stack show $STACK_NAME)
+                        echo $STACK_SHOW
+                    fi
+                    if [[ $STACK_SHOW != *"$STACK_NAME"* ]]; then
+                        echo "stack show on $STACK_NAME came back empty. Assuming successful delete"
+                        break
+                    fi
+                done
+                # if we still see $STACK_NAME in $STACK_SHOW it means the delete hasn't fully
+                # worked and we can exit forcefully
+                if [[ $STACK_SHOW == *"$STACK_NAME"* ]]; then
+                    echo "stack $STACK_NAME still in stack show output after polling. Quitting!"
+                    exit 1
+                fi
+                break
+            ;;
+            CREATE_IN_PROGRESS)
+                echo "Waiting to initialize infrastructure."
+                continue
+            ;;
+            *)
+                echo "Unexpected status: $OS_STATUS"
+                exit 1
+            ;;
+        esac
+    done
+    if $STACK_SUCCESSFUL; then
+        break
+    fi
 done
 
 # capture stack info in console logs
 openstack --os-cloud rackspace stack show $STACK_NAME
+
+if ! $STACK_SUCCESSFUL; then
+    exit 1
+fi