aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexandru Avadanii <Alexandru.Avadanii@enea.com>2017-12-18 22:12:23 +0100
committerAlexandru Avadanii <Alexandru.Avadanii@enea.com>2017-12-31 01:17:18 +0100
commitae331cdf3f4773ade96b1dc7d63e7d575288bbb1 (patch)
tree8366ee8fdb8586f3585f5d7b501fbcb73cc8bc61
parentf4b727ab3590be67acdb0e5a8405d91218870821 (diff)
[baremetal] MaaS: Reduce timeout values
`maas_fixup` is already re-entrant, so we can execute it more than once during a commissioning/deploy cycle. Reduce the timeout waiting for all nodes to reach a stable state, so nodes stuck in 'Ready' state instead of reaching 'Deploying' get dealt with sooner (~5 min vs old 30 min). While at it, let `maas_fixup` handle machine deploy as well, so we can catch nodes stuck in 'Ready' state and re-trigger the deploy. Change-Id: Id24cc97b17489835c5846288639a9a6032bd320a Signed-off-by: Alexandru Avadanii <Alexandru.Avadanii@enea.com> (cherry picked from commit 8da73521d3b9347a982ea6e77114bba0d0f0adeb)
-rwxr-xr-xmcp/config/states/maas17
-rw-r--r--mcp/patches/0003-maas-region-force-artifact-download.patch4
-rw-r--r--mcp/salt-formulas/maas/machines/mark_broken_fixed.sls2
3 files changed, 11 insertions, 12 deletions
diff --git a/mcp/config/states/maas b/mcp/config/states/maas
index 9ad053805..bf2de28db 100755
--- a/mcp/config/states/maas
+++ b/mcp/config/states/maas
@@ -19,7 +19,7 @@ function maas_fixup() {
local statuscmd="salt 'mas01*' --out yaml state.apply maas.machines.status"
local ncount=$(salt --out yaml 'mas01*' pillar.get maas:region:machines | \
grep -cE '^\s{2}\w+:$')
- wait_for 180 "${statuscmd} | tee /dev/stderr | " \
+ wait_for 30 "${statuscmd} | tee /dev/stderr | " \
"grep -Eq '((Deployed|Ready): ${ncount}|status: (Failed|Allocated))'"
local statusout=$(eval "${statuscmd}")
@@ -28,7 +28,7 @@ function maas_fixup() {
for node_system_id in ${fcnodes}; do
salt -C 'mas01*' state.apply maas.machines.delete \
pillar="{'system_id': '${node_system_id}'}"
- sleep 30
+ sleep 10
done
if [ -n "${fcnodes}" ]; then
salt -C 'mas01*' state.apply maas.machines
@@ -37,12 +37,14 @@ function maas_fixup() {
local fdnodes=$(echo "${statusout}" | \
grep -Pzo 'status: (Failed deployment|Allocated)\n\s+system_id: \K.+\n')
+ local rnodes=$(echo "${statusout}" | \
+ grep -Pzo 'status: Ready\n\s+system_id: \K.+\n')
for node_system_id in ${fdnodes}; do
salt -C 'mas01*' state.apply maas.machines.mark_broken_fixed \
pillar="{'system_id': '${node_system_id}'}"
- sleep 30
+ sleep 10
done
- if [ -n "${fdnodes}" ]; then
+ if [ -n "${fdnodes}" ] || [ -n "${rnodes}" ]; then
salt -C 'mas01*' state.apply maas.machines.deploy
return 1
fi
@@ -72,16 +74,13 @@ salt -C 'cfg01*' state.apply maas.pxe_route
wait_for 10 "salt -C 'mas01*' state.apply maas.region"
salt -C 'mas01*' state.apply maas.machines
-wait_for 10 maas_fixup
+# MaaS node deployment
+wait_for 20 maas_fixup
# cleanup outdated salt keys
salt-key --out yaml | awk '!/^(minions|- cfg01|- mas01)/ {print $2}' | \
xargs -I{} salt-key -yd {}
-# MaaS node deployment
-salt -C 'mas01*' state.apply maas.machines.deploy
-wait_for 10 maas_fixup
-
salt -C 'mas01*' pillar.item\
maas:region:admin:username \
maas:region:admin:password
diff --git a/mcp/patches/0003-maas-region-force-artifact-download.patch b/mcp/patches/0003-maas-region-force-artifact-download.patch
index ecda80a02..56e3bd504 100644
--- a/mcp/patches/0003-maas-region-force-artifact-download.patch
+++ b/mcp/patches/0003-maas-region-force-artifact-download.patch
@@ -66,8 +66,8 @@ new file mode 100644
+maas login {{ region.admin.username }} \
+ http://{{ region.bind.host }}:5240/MAAS/api/2.0 - < \
+ /var/lib/maas/.maas_credentials || exit 1
-+# wait max 15 min for service up / image download, 5 min region to rack sync
-+wait_for 90 "grep -qzE '(Unable to probe for DHCP servers|DHCP probe complete).*Rack controller' /var/log/maas/rackd.log"
++# wait max 5 min for service up, 15 min image download, 5 min region to rack sync
++wait_for 30 "grep -qzE '(Unable to probe for DHCP servers|DHCP probe complete).*Rack controller' /var/log/maas/rackd.log"
+maas opnfv boot-resources import || exit 2
+wait_for 90 "! maas opnfv boot-resources is-importing | grep -q 'true'"
+maas opnfv rack-controllers import-boot-images || exit 3
diff --git a/mcp/salt-formulas/maas/machines/mark_broken_fixed.sls b/mcp/salt-formulas/maas/machines/mark_broken_fixed.sls
index e036d610d..17a7df8d8 100644
--- a/mcp/salt-formulas/maas/machines/mark_broken_fixed.sls
+++ b/mcp/salt-formulas/maas/machines/mark_broken_fixed.sls
@@ -14,6 +14,6 @@ maas_login_admin:
# TODO: implement mark_broken_fixed_machine via _modules/maas.py
mark_broken_fixed_machine:
cmd.run:
- - name: "maas login {{ region.admin.username }} http://{{ region.bind.host }}:5240/MAAS/api/2.0 - < /var/lib/maas/.maas_credentials && maas opnfv machine mark-broken {{ pillar['system_id'] }} && sleep 30 && maas opnfv machine mark-fixed {{ pillar['system_id'] }}"
+ - name: "maas login {{ region.admin.username }} http://{{ region.bind.host }}:5240/MAAS/api/2.0 - < /var/lib/maas/.maas_credentials && maas opnfv machine mark-broken {{ pillar['system_id'] }} && sleep 10 && maas opnfv machine mark-fixed {{ pillar['system_id'] }}"
- require:
- cmd: maas_login_admin