From 2b7c862e8c668d8fabc784680b05bec75baff2c6 Mon Sep 17 00:00:00 2001 From: Alexandru Avadanii Date: Sun, 31 Dec 2017 19:35:17 +0100 Subject: lib.sh: Extend wait_for function to catch no resp wait_for function should be able to also check for minions that did not return or not respond, in addition to the return code. To keep it backwards compatible, condition the new check on the max attempt number being specified in decimal format (e.g. '10.0' unlike old '10'). Change-Id: If2512cf9121cdd795638efe7362ef0485d4e8d91 Signed-off-by: Alexandru Avadanii (cherry picked from commit 3f559299c232bbb7639d02243c95d6256cdf94d4) --- mcp/config/states/baremetal_init | 3 +-- mcp/config/states/dpdk | 3 +-- mcp/config/states/maas | 2 +- mcp/config/states/virtual_control_plane | 13 +++++-------- mcp/scripts/lib.sh | 14 +++++++++++--- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/mcp/config/states/baremetal_init b/mcp/config/states/baremetal_init index 5674e6227..5a8cb49fc 100755 --- a/mcp/config/states/baremetal_init +++ b/mcp/config/states/baremetal_init @@ -29,7 +29,6 @@ salt -C 'kvm* or cmp*' service.force_reload salt-minion salt -C 'cmp*' state.apply linux.system salt -C 'cmp*' state.apply linux.network || true salt -C 'kvm* or cmp*' system.reboot -wait_for 90 "! salt -C 'kvm* or cmp*' test.ping | " \ - "tee /dev/stderr | grep -Fq 'Not connected'" +wait_for 90.0 "salt -C 'kvm* or cmp*' test.ping" salt -C 'kvm* or cmp*' state.apply linux,ntp diff --git a/mcp/config/states/dpdk b/mcp/config/states/dpdk index eb00d7279..1b2d269bc 100755 --- a/mcp/config/states/dpdk +++ b/mcp/config/states/dpdk @@ -13,8 +13,7 @@ CI_DEBUG=${CI_DEBUG:-0}; [[ "${CI_DEBUG}" =~ (false|0) ]] || set -x source "$(dirname "${BASH_SOURCE[0]}")/../../scripts/lib.sh" salt -I 'nova:compute' system.reboot -wait_for 90 "! salt -I 'nova:compute' test.ping | " \ - "tee /dev/stderr | grep -Fq 'Not connected'" +wait_for 90.0 "salt -I 'nova:compute' test.ping" salt -I 'nova:compute' state.sls linux.network # switch to UCA repos since fuel-infra packages have bugs diff --git a/mcp/config/states/maas b/mcp/config/states/maas index 2062cbabb..39f6badef 100755 --- a/mcp/config/states/maas +++ b/mcp/config/states/maas @@ -102,4 +102,4 @@ while [ $rc -ne 0 ] && [ ${attempt} -lt ${total_attempts} ]; do ((attempt+=1)) done -wait_for 10 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all" +wait_for 10.0 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all" diff --git a/mcp/config/states/virtual_control_plane b/mcp/config/states/virtual_control_plane index 039673ccf..33cc9dce5 100755 --- a/mcp/config/states/virtual_control_plane +++ b/mcp/config/states/virtual_control_plane @@ -27,11 +27,10 @@ if [ "${ERASE_ENV}" -eq 1 ]; then fi # KVM libvirt first, VCP deployment -wait_for 5 "salt -C 'kvm*' state.sls libvirt" +wait_for 5.0 "salt -C 'kvm*' state.sls libvirt" salt -C 'kvm* or cmp*' state.apply salt -wait_for 10 "! salt -C 'kvm*' state.sls salt.control | " \ - "tee /dev/stderr | grep -Fq 'Not connected'" +wait_for 10.0 "salt -C 'kvm*' state.sls salt.control" vcp_nodes=$(salt --out yaml 'kvm01*' pillar.get salt:control:cluster:internal:node | \ awk '/\s+\w+:$/ {gsub(/:$/, "*"); print $1}') @@ -49,7 +48,7 @@ while [ $rc -ne 0 ] && [ ${attempt} -lt ${total_attempts} ]; do ((attempt+=1)) done -wait_for 10 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all" +wait_for 10.0 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all" # Propagate APT proxy config created by curtin on baremetal nodes to VCP VMs APT_CONF_D_CURTIN='/etc/apt/apt.conf.d/90curtin-aptproxy' @@ -61,8 +60,7 @@ salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' cp.get_file \ wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply salt" wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' service.force_reload salt-minion" -wait_for 10 "! salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply linux,ntp | " \ - "tee /dev/stderr | grep -Eq '(Not connected|No response)'" +wait_for 10.0 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply linux,ntp" wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' ssh.set_auth_key ${SUDO_USER} \ $(awk 'NR==1{print $2}' "$(eval echo "~${SUDO_USER}/.ssh/authorized_keys")")" @@ -71,5 +69,4 @@ wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' ssh.set_auth_key ${SUDO_US salt -C 'prx*' file.write /etc/dhcp/dhclient-enter-hooks.d/no-default-route \ args='unset new_routers' salt -C 'prx*' system.reboot -wait_for 30 "! salt -C 'prx*' test.ping | " \ - "tee /dev/stderr | grep -Fq 'Not connected'" +wait_for 30.0 "salt -C 'prx*' test.ping" diff --git a/mcp/scripts/lib.sh b/mcp/scripts/lib.sh index e32f99511..210288022 100644 --- a/mcp/scripts/lib.sh +++ b/mcp/scripts/lib.sh @@ -436,13 +436,21 @@ function wait_for { local total_attempts=$1; shift local cmdstr=$* local sleep_time=10 - echo "[NOTE] Waiting for cmd to return success: ${cmdstr}" + echo "[wait_for] Waiting for cmd to return success: ${cmdstr}" # shellcheck disable=SC2034 for attempt in $(seq "${total_attempts}"); do - # shellcheck disable=SC2015 - eval "${cmdstr}" && return 0 || true + echo "[wait_for] Attempt ${attempt}/${total_attempts%.*} for: ${cmdstr}" + if [ "${total_attempts%.*}" = "${total_attempts}" ]; then + # shellcheck disable=SC2015 + eval "${cmdstr}" && echo "[wait_for] OK: ${cmdstr}" && return 0 || true + else + ( eval "${cmdstr}" || echo __fuel_wf_failure__ ) |& tee /dev/stderr | \ + grep -Eq '(Not connected|No response|__fuel_wf_failure__)' || \ + echo "[wait_for] OK: ${cmdstr}" && return 0 + fi echo -n '.'; sleep "${sleep_time}" done + echo "[wait_for] ERROR: Failed after max attempts: ${cmdstr}" return 1 ) } -- cgit 1.2.3-korg