From 3f559299c232bbb7639d02243c95d6256cdf94d4 Mon Sep 17 00:00:00 2001 From: Alexandru Avadanii Date: Sun, 31 Dec 2017 19:35:17 +0100 Subject: lib.sh: Extend wait_for function to catch no resp wait_for function should be able to also check for minions that did not return or not respond, in addition to the return code. To keep it backwards compatible, condition the new check on the max attempt number being specified in decimal format (e.g. '10.0' unlike old '10'). Change-Id: If2512cf9121cdd795638efe7362ef0485d4e8d91 Signed-off-by: Alexandru Avadanii --- mcp/config/states/baremetal_init | 3 +-- mcp/config/states/dpdk | 3 +-- mcp/config/states/maas | 2 +- mcp/config/states/virtual_control_plane | 13 +++++-------- mcp/scripts/lib.sh | 14 +++++++++++--- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/mcp/config/states/baremetal_init b/mcp/config/states/baremetal_init index eeb08187f..ef2f78b70 100755 --- a/mcp/config/states/baremetal_init +++ b/mcp/config/states/baremetal_init @@ -29,8 +29,7 @@ salt -C 'kvm* or cmp*' service.force_reload salt-minion salt -C 'cmp*' state.apply linux.system salt -C 'cmp*' state.apply linux.network || true salt -C 'kvm* or cmp*' system.reboot -wait_for 90 "! salt -C 'kvm* or cmp*' test.ping | " \ - "tee /dev/stderr | grep -Fq 'Not connected'" +wait_for 90.0 "salt -C 'kvm* or cmp*' test.ping" salt -C 'kvm* or cmp*' state.apply linux,ntp salt -C 'kvm* or cmp*' pkg.upgrade refresh=False diff --git a/mcp/config/states/dpdk b/mcp/config/states/dpdk index 653ffc0ed..5ae2aac48 100755 --- a/mcp/config/states/dpdk +++ b/mcp/config/states/dpdk @@ -13,8 +13,7 @@ CI_DEBUG=${CI_DEBUG:-0}; [[ "${CI_DEBUG}" =~ (false|0) ]] || set -x source "$(dirname "${BASH_SOURCE[0]}")/../../scripts/lib.sh" salt -I 'nova:compute' system.reboot -wait_for 90 "! salt -I 'nova:compute' test.ping | " \ - "tee /dev/stderr | grep -Fq 'Not connected'" +wait_for 90.0 "salt -I 'nova:compute' test.ping" salt -I 'nova:compute' alternatives.set ovs-vswitchd /usr/lib/openvswitch-switch-dpdk/ovs-vswitchd-dpdk salt -I 'nova:compute' service.restart openvswitch-switch diff --git a/mcp/config/states/maas b/mcp/config/states/maas index 02afd2c6a..8f7a86611 100755 --- a/mcp/config/states/maas +++ b/mcp/config/states/maas @@ -101,4 +101,4 @@ while [ $rc -ne 0 ] && [ ${attempt} -lt ${total_attempts} ]; do ((attempt+=1)) done -wait_for 10 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all" +wait_for 10.0 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all" diff --git a/mcp/config/states/virtual_control_plane b/mcp/config/states/virtual_control_plane index c7768f746..0607b318b 100755 --- a/mcp/config/states/virtual_control_plane +++ b/mcp/config/states/virtual_control_plane @@ -27,11 +27,10 @@ if [ "${ERASE_ENV}" -eq 1 ]; then fi # KVM libvirt first, VCP deployment -wait_for 5 "salt -C 'kvm*' state.sls libvirt" +wait_for 5.0 "salt -C 'kvm*' state.sls libvirt" salt -C 'kvm* or cmp*' state.apply salt -wait_for 10 "! salt -C 'kvm*' state.sls salt.control | " \ - "tee /dev/stderr | grep -Fq 'Not connected'" +wait_for 10.0 "salt -C 'kvm*' state.sls salt.control" vcp_nodes=$(salt --out yaml 'kvm01*' pillar.get salt:control:cluster:internal:node | \ awk '/\s+\w+:$/ {gsub(/:$/, "*"); print $1}') @@ -49,7 +48,7 @@ while [ $rc -ne 0 ] && [ ${attempt} -lt ${total_attempts} ]; do ((attempt+=1)) done -wait_for 10 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all" +wait_for 10.0 "salt -C '* and not cfg01* and not mas01*' saltutil.sync_all" # Propagate APT proxy config created by curtin on baremetal nodes to VCP VMs APT_CONF_D_CURTIN='/etc/apt/apt.conf.d/90curtin-aptproxy' @@ -61,8 +60,7 @@ salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' cp.get_file \ wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply salt" wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' service.force_reload salt-minion" -wait_for 10 "! salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply linux,ntp | " \ - "tee /dev/stderr | grep -Eq '(Not connected|No response)'" +wait_for 10.0 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' state.apply linux,ntp" wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' ssh.set_auth_key ${SUDO_USER} \ $(awk 'NR==1{print $2}' "$(eval echo "~${SUDO_USER}/.ssh/authorized_keys")")" @@ -71,7 +69,6 @@ wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' ssh.set_auth_key ${SUDO_US salt -C 'prx*' file.write /etc/dhcp/dhclient-enter-hooks.d/no-default-route \ args='unset new_routers' salt -C 'prx*' system.reboot -wait_for 30 "! salt -C 'prx*' test.ping | " \ - "tee /dev/stderr | grep -Fq 'Not connected'" +wait_for 30.0 "salt -C 'prx*' test.ping" salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' pkg.upgrade refresh=False diff --git a/mcp/scripts/lib.sh b/mcp/scripts/lib.sh index 63a5adc5a..e9dd30960 100644 --- a/mcp/scripts/lib.sh +++ b/mcp/scripts/lib.sh @@ -437,13 +437,21 @@ function wait_for { local total_attempts=$1; shift local cmdstr=$* local sleep_time=10 - echo "[NOTE] Waiting for cmd to return success: ${cmdstr}" + echo "[wait_for] Waiting for cmd to return success: ${cmdstr}" # shellcheck disable=SC2034 for attempt in $(seq "${total_attempts}"); do - # shellcheck disable=SC2015 - eval "${cmdstr}" && return 0 || true + echo "[wait_for] Attempt ${attempt}/${total_attempts%.*} for: ${cmdstr}" + if [ "${total_attempts%.*}" = "${total_attempts}" ]; then + # shellcheck disable=SC2015 + eval "${cmdstr}" && echo "[wait_for] OK: ${cmdstr}" && return 0 || true + else + ( eval "${cmdstr}" || echo __fuel_wf_failure__ ) |& tee /dev/stderr | \ + grep -Eq '(Not connected|No response|__fuel_wf_failure__)' || \ + echo "[wait_for] OK: ${cmdstr}" && return 0 + fi echo -n '.'; sleep "${sleep_time}" done + echo "[wait_for] ERROR: Failed after max attempts: ${cmdstr}" return 1 ) } -- cgit 1.2.3-korg