From 512968e7b23a077b7396e494624c94468afe9e38 Mon Sep 17 00:00:00 2001 From: Alexandru Avadanii Date: Sat, 22 Sep 2018 21:54:01 +0200 Subject: [maas] Adopt maas, maasng proposed functions JIRA: FUEL-364 Change-Id: Ia470fc8103713e7a06cd9647675b0edfb4342bf8 Signed-off-by: Alexandru Avadanii --- .../0001-maas-region-skip-credentials-update.patch | 2 +- .../0002-maas-region-allow-timeout-override.patch | 2 +- .../0003-Add-machines.delete-co-pxe_nat-sls.patch | 137 ----------- ...d-wait_for-maas.py-wait_for_-attempts-arg.patch | 258 +++++++++++++++++++++ 4 files changed, 260 insertions(+), 139 deletions(-) delete mode 100644 mcp/patches/salt-formula-maas/0003-Add-machines.delete-co-pxe_nat-sls.patch create mode 100644 mcp/patches/salt-formula-maas/0003-Extend-wait_for-maas.py-wait_for_-attempts-arg.patch (limited to 'mcp/patches') diff --git a/mcp/patches/salt-formula-maas/0001-maas-region-skip-credentials-update.patch b/mcp/patches/salt-formula-maas/0001-maas-region-skip-credentials-update.patch index 4c9fdf553..f238978f2 100644 --- a/mcp/patches/salt-formula-maas/0001-maas-region-skip-credentials-update.patch +++ b/mcp/patches/salt-formula-maas/0001-maas-region-skip-credentials-update.patch @@ -21,7 +21,7 @@ Signed-off-by: Alexandru Avadanii 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/maas/region.sls b/maas/region.sls -index fd54fb1..e0f65b0 100644 +index 066490c..de5054a 100644 --- a/maas/region.sls +++ b/maas/region.sls @@ -6,10 +6,9 @@ maas_region_packages: diff --git a/mcp/patches/salt-formula-maas/0002-maas-region-allow-timeout-override.patch b/mcp/patches/salt-formula-maas/0002-maas-region-allow-timeout-override.patch index 58bed30f7..10dfddf48 100644 --- a/mcp/patches/salt-formula-maas/0002-maas-region-allow-timeout-override.patch +++ b/mcp/patches/salt-formula-maas/0002-maas-region-allow-timeout-override.patch @@ -43,7 +43,7 @@ Signed-off-by: Alexandru Avadanii 1 file changed, 30 insertions(+) diff --git a/maas/region.sls b/maas/region.sls -index e0f65b0..5da3a7f 100644 +index de5054a..4a7f6cc 100644 --- a/maas/region.sls +++ b/maas/region.sls @@ -38,6 +38,36 @@ restore_maas_database_{{ region.database.name }}: diff --git a/mcp/patches/salt-formula-maas/0003-Add-machines.delete-co-pxe_nat-sls.patch b/mcp/patches/salt-formula-maas/0003-Add-machines.delete-co-pxe_nat-sls.patch deleted file mode 100644 index 9f13c4616..000000000 --- a/mcp/patches/salt-formula-maas/0003-Add-machines.delete-co-pxe_nat-sls.patch +++ /dev/null @@ -1,137 +0,0 @@ -From: Alexandru Avadanii -Date: Sun, 19 Aug 2018 05:38:27 +0200 -Subject: [PATCH] Add machines.delete & co, pxe_nat sls - -Signed-off-by: Alexandru Avadanii ---- - maas/machines/delete.sls | 20 ++++++++++++ - maas/machines/mark_broken_fixed.sls | 20 ++++++++++++ - maas/machines/override_failed_testing.sls | 20 ++++++++++++ - maas/pxe_nat.sls | 37 +++++++++++++++++++++++ - 4 files changed, 97 insertions(+) - create mode 100644 maas/machines/delete.sls - create mode 100644 maas/machines/mark_broken_fixed.sls - create mode 100644 maas/machines/override_failed_testing.sls - create mode 100644 maas/pxe_nat.sls - -diff --git a/maas/machines/delete.sls b/maas/machines/delete.sls -new file mode 100644 -index 0000000..2903f92 ---- /dev/null -+++ b/maas/machines/delete.sls -@@ -0,0 +1,20 @@ -+############################################################################## -+# Copyright (c) 2017 Mirantis Inc., Enea AB and others. -+# All rights reserved. This program and the accompanying materials -+# are made available under the terms of the Apache License, Version 2.0 -+# which accompanies this distribution, and is available at -+# http://www.apache.org/licenses/LICENSE-2.0 -+############################################################################## -+{%- from "maas/map.jinja" import region with context %} -+ -+maas_login_admin: -+ cmd.run: -+ - name: "maas-region apikey --username {{ region.admin.username }} > /var/lib/maas/.maas_credentials" -+ - unless: 'test -e /var/lib/maas/.maas_credentials' -+ -+# TODO: implement delete_machine via _modules/maas.py -+delete_machine: -+ cmd.run: -+ - name: "maas login {{ region.admin.username }} http://{{ region.bind.host }}:5240/MAAS/api/2.0 - < /var/lib/maas/.maas_credentials && maas opnfv machine delete {{ pillar['system_id'] }}" -+ - require: -+ - cmd: maas_login_admin -diff --git a/maas/machines/mark_broken_fixed.sls b/maas/machines/mark_broken_fixed.sls -new file mode 100644 -index 0000000..46691bb ---- /dev/null -+++ b/maas/machines/mark_broken_fixed.sls -@@ -0,0 +1,20 @@ -+############################################################################## -+# Copyright (c) 2017 Mirantis Inc., Enea AB and others. -+# All rights reserved. This program and the accompanying materials -+# are made available under the terms of the Apache License, Version 2.0 -+# which accompanies this distribution, and is available at -+# http://www.apache.org/licenses/LICENSE-2.0 -+############################################################################## -+{%- from "maas/map.jinja" import region with context %} -+ -+maas_login_admin: -+ cmd.run: -+ - name: "maas-region apikey --username {{ region.admin.username }} > /var/lib/maas/.maas_credentials" -+ - unless: 'test -e /var/lib/maas/.maas_credentials' -+ -+# TODO: implement mark_broken_fixed_machine via _modules/maas.py -+mark_broken_fixed_machine: -+ cmd.run: -+ - name: "maas login {{ region.admin.username }} http://{{ region.bind.host }}:5240/MAAS/api/2.0 - < /var/lib/maas/.maas_credentials && maas opnfv machine mark-broken {{ pillar['system_id'] }} && sleep 10 && maas opnfv machine mark-fixed {{ pillar['system_id'] }} && maas opnfv machine test {{ pillar['system_id'] }} testing_scripts=fio" -+ - require: -+ - cmd: maas_login_admin -diff --git a/maas/machines/override_failed_testing.sls b/maas/machines/override_failed_testing.sls -new file mode 100644 -index 0000000..e7fe1d2 ---- /dev/null -+++ b/maas/machines/override_failed_testing.sls -@@ -0,0 +1,20 @@ -+############################################################################## -+# Copyright (c) 2018 Mirantis Inc., Enea AB and others. -+# All rights reserved. This program and the accompanying materials -+# are made available under the terms of the Apache License, Version 2.0 -+# which accompanies this distribution, and is available at -+# http://www.apache.org/licenses/LICENSE-2.0 -+############################################################################## -+{%- from "maas/map.jinja" import region with context %} -+ -+maas_login_admin: -+ cmd.run: -+ - name: "maas-region apikey --username {{ region.admin.username }} > /var/lib/maas/.maas_credentials" -+ - unless: 'test -e /var/lib/maas/.maas_credentials' -+ -+# TODO: implement override_failed_testing via _modules/maas.py -+mark_broken_fixed_machine: -+ cmd.run: -+ - name: "maas login {{ region.admin.username }} http://{{ region.bind.host }}:5240/MAAS/api/2.0 - < /var/lib/maas/.maas_credentials && maas opnfv machine override-failed-testing {{ pillar['system_id'] }}" -+ - require: -+ - cmd: maas_login_admin -diff --git a/maas/pxe_nat.sls b/maas/pxe_nat.sls -new file mode 100644 -index 0000000..8a03c4f ---- /dev/null -+++ b/maas/pxe_nat.sls -@@ -0,0 +1,37 @@ -+############################################################################## -+# Copyright (c) 2017 Mirantis Inc., Enea AB and others. -+# All rights reserved. This program and the accompanying materials -+# are made available under the terms of the Apache License, Version 2.0 -+# which accompanies this distribution, and is available at -+# http://www.apache.org/licenses/LICENSE-2.0 -+############################################################################## -+net.ipv4.ip_forward: -+ sysctl.present: -+ - value: 1 -+ -+iptables_pxe_nat: -+ iptables.append: -+ - table: nat -+ - chain: POSTROUTING -+ - jump: MASQUERADE -+ - destination: 0/0 -+ - source: {{ salt['pillar.get']('_param:single_address') }}/{{ salt['pillar.get']('_param:opnfv_net_admin_mask') }} -+ - save: True -+ -+iptables_pxe_source: -+ iptables.append: -+ - table: filter -+ - chain: INPUT -+ - jump: ACCEPT -+ - destination: 0/0 -+ - source: {{ salt['pillar.get']('_param:single_address') }}/{{ salt['pillar.get']('_param:opnfv_net_admin_mask') }} -+ - save: True -+ -+iptables_pxe_destination: -+ iptables.append: -+ - table: filter -+ - chain: INPUT -+ - jump: ACCEPT -+ - destination: {{ salt['pillar.get']('_param:single_address') }}/{{ salt['pillar.get']('_param:opnfv_net_admin_mask') }} -+ - source: 0/0 -+ - save: True diff --git a/mcp/patches/salt-formula-maas/0003-Extend-wait_for-maas.py-wait_for_-attempts-arg.patch b/mcp/patches/salt-formula-maas/0003-Extend-wait_for-maas.py-wait_for_-attempts-arg.patch new file mode 100644 index 000000000..aa4d95deb --- /dev/null +++ b/mcp/patches/salt-formula-maas/0003-Extend-wait_for-maas.py-wait_for_-attempts-arg.patch @@ -0,0 +1,258 @@ +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +: Copyright (c) 2018 Mirantis Inc., Enea AB and others. +: +: All rights reserved. This program and the accompanying materials +: are made available under the terms of the Apache License, Version 2.0 +: which accompanies this distribution, and is available at +: http://www.apache.org/licenses/LICENSE-2.0 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +From: Alexandru Avadanii +Date: Sun, 23 Sep 2018 03:57:27 +0200 +Subject: [PATCH] Extend wait_for maas.py, wait_for_* attempts arg + +1. maas.py: Extend wait_for states with timeout param + +Extend the wait_for states with a timeout parameter. +The timeout value is taken from reclass pillar data if +defined. Oterwise, the states use the default value. +Based on Ting's PR [1], slightly refactored. + +2. maas.py: Extend `req_status` support to multiple values + +Previously, req_status could be one of the MaaS status strings, e.g. +'Ready'. Extend matching to '|'-separated statuses (e.g. +'Ready|Deployed') to allow idempotency in MaaS machine commissioning +and deployment cycles. + +Also provide a `maas.machines.wait_for_ready_or_deployed` sls. + +3. maas.py: wait_for_*: Add attempts arg + +Introduce a new parameter that allows a maximum number of automatic +recovery attempts for the common failures w/ machine operations. +If not present in pillar data, it defaults to 0 (OFF). + +Common error states, possible cause and automatic recovery pattern: +* New + - usually indicates issues with BMC connectivity (no network route, + but on rare occassions it happens due to MaaS API being flaky); + - fix: delete the machine, (re)process machine definitions; +* Failed commissioning + - various causes, usually a simple retry works; + - fix: delete the machine, (re)process machine definitions; +* Failed testing + - incompatible hardware, missing drivers etc. + - usually consistent and board-specific; + - fix: override failed testing +* Allocated + - on rare ocassions nodes get stuck in this state instead 'Deploy'; + - fix: mark-broken, mark-fixed, if it failed at least once before + perform a fio test (fixes another unrelated spurious issue with + encrypted disks from previous deployments), (re)deploy machines; +* Failed deployment + - various causes, usually a simple retry works; + - fix: same as for nodes stuck in 'Allocated'; + +[1] https://github.com/salt-formulas/salt-formula-maas/pull/34 + +Change-Id: Ifb7dd9f8fcfbbed557e47d8fdffb1f963604fb15 +Signed-off-by: ting wu +Signed-off-by: Alexandru Avadanii +--- + README.rst | 9 +++- + _modules/maas.py | 49 ++++++++++++++++++-- + maas/machines/wait_for_deployed.sls | 2 + + maas/machines/wait_for_ready.sls | 3 ++ + maas/machines/wait_for_ready_or_deployed.sls | 15 ++++++ + maas/map.jinja | 4 ++ + tests/pillar/maas_region.sls | 4 ++ + 7 files changed, 79 insertions(+), 7 deletions(-) + create mode 100644 maas/machines/wait_for_ready_or_deployed.sls + +diff --git a/README.rst b/README.rst +index 20da43e..78d8aef 100644 +--- a/README.rst ++++ b/README.rst +@@ -622,12 +622,16 @@ Wait for status of selected machine's: + machines: + - kvm01 + - kvm02 +- timeout: 1200 # in seconds ++ timeout: {{ region.timeout.ready }} ++ attempts: {{ region.timeout.attempts }} + req_status: "Ready" + - require: + - cmd: maas_login_admin + ... + ++The timeout setting is taken from the reclass pillar data. ++If the pillar data is not defined, it will use the default value. ++ + If module run w/\o any extra paremeters, + ``wait_for_machines_ready`` will wait for defined in salt + machines. In this case, it is usefull to skip some machines: +@@ -642,7 +646,8 @@ machines. In this case, it is usefull to skip some machines: + module.run: + - name: maas.wait_for_machine_status + - kwargs: +- timeout: 1200 # in seconds ++ timeout: {{ region.timeout.deployed }} ++ attempts: {{ region.timeout.attempts }} + req_status: "Deployed" + ignore_machines: + - kvm01 # in case it's broken or whatever +diff --git a/_modules/maas.py b/_modules/maas.py +index c02f104..28e46c5 100644 +--- a/_modules/maas.py ++++ b/_modules/maas.py +@@ -921,6 +921,7 @@ class MachinesStatus(MaasObject): + req_status: string; Polling status + machines: list; machine names + ignore_machines: list; machine names ++ attempts: max number of automatic hard retries + :ret: True + Exception - if something fail/timeout reached + """ +@@ -929,6 +930,8 @@ class MachinesStatus(MaasObject): + req_status = kwargs.get("req_status", "Ready") + to_discover = kwargs.get("machines", None) + ignore_machines = kwargs.get("ignore_machines", None) ++ attempts = kwargs.get("attempts", 0) ++ failed_attempts = {} + if not to_discover: + try: + to_discover = __salt__['config.get']('maas')['region'][ +@@ -943,11 +946,45 @@ class MachinesStatus(MaasObject): + while len(total) <= len(to_discover): + for m in to_discover: + for discovered in MachinesStatus.execute()['machines']: +- if m == discovered['hostname'] and \ +- discovered['status'].lower() == req_status.lower(): +- if m in total: ++ if m == discovered['hostname'] and m in total: ++ req_status_list = req_status.lower().split('|') ++ if discovered['status'].lower() in req_status_list: + total.remove(m) +- ++ elif attempts > 0 and (m not in failed_attempts or ++ failed_attempts[m] < attempts): ++ status = discovered['status'] ++ sid = discovered['system_id'] ++ cls._maas = _create_maas_client() ++ if status in ['Failed commissioning', 'New']: ++ LOG.info('Machine {0} deleted'.format(sid)) ++ cls._maas.delete(u'api/2.0/machines/{0}/' ++ .format(sid)) ++ Machine().process() ++ elif status in ['Failed testing']: ++ data = {} ++ LOG.info('Machine {0} overriden'.format(sid)) ++ action = 'override_failed_testing' ++ cls._maas.post(u'api/2.0/machines/{0}/' ++ .format(sid), action, **data) ++ elif status in ['Failed deployment', 'Allocated']: ++ data = {} ++ LOG.info('Machine {0} mark broken'.format(sid)) ++ cls._maas.post(u'api/2.0/machines/{0}/' ++ .format(sid), 'mark_broken', **data) ++ LOG.info('Machine {0} mark fixed'.format(sid)) ++ cls._maas.post(u'api/2.0/machines/{0}/' ++ .format(sid), 'mark_fixed', **data) ++ if m in failed_attempts and failed_attempts[m]: ++ LOG.info('Machine {0} fio test'.format(sid)) ++ data['testing_scripts'] = 'fio' ++ cls._maas.post(u'api/2.0/machines/{0}/' ++ .format(sid), 'commission', **data) ++ DeployMachines().process() ++ else: ++ continue ++ if m not in failed_attempts: ++ failed_attempts[m] = 0 ++ failed_attempts[m] = failed_attempts[m] + 1 + if len(total) <= 0: + LOG.debug( + "Machines:{} are:{}".format(to_discover, req_status)) +@@ -959,7 +996,9 @@ class MachinesStatus(MaasObject): + "Waiting status:{} " + "for machines:{}" + "\nsleep for:{}s " +- "Timeout:{}s".format(req_status, total, poll_time, timeout)) ++ "Timeout:{}s ({}s left)" ++ .format(req_status, total, poll_time, timeout, ++ timeout - (time.time() - started_at))) + time.sleep(poll_time) + + +diff --git a/maas/machines/wait_for_deployed.sls b/maas/machines/wait_for_deployed.sls +index ebeedac..a646fdb 100644 +--- a/maas/machines/wait_for_deployed.sls ++++ b/maas/machines/wait_for_deployed.sls +@@ -9,5 +9,7 @@ wait_for_machines_deployed: + - name: maas.wait_for_machine_status + - kwargs: + req_status: "Deployed" ++ timeout: {{ region.timeout.deployed }} ++ attempts: {{ region.timeout.attempts }} + - require: + - cmd: maas_login_admin +diff --git a/maas/machines/wait_for_ready.sls b/maas/machines/wait_for_ready.sls +index c5d3c28..d8a2963 100644 +--- a/maas/machines/wait_for_ready.sls ++++ b/maas/machines/wait_for_ready.sls +@@ -7,5 +7,8 @@ maas_login_admin: + wait_for_machines_ready: + module.run: + - name: maas.wait_for_machine_status ++ - kwargs: ++ timeout: {{ region.timeout.ready }} ++ attempts: {{ region.timeout.attempts }} + - require: + - cmd: maas_login_admin +diff --git a/maas/machines/wait_for_ready_or_deployed.sls b/maas/machines/wait_for_ready_or_deployed.sls +new file mode 100644 +index 0000000..db3dcc4 +--- /dev/null ++++ b/maas/machines/wait_for_ready_or_deployed.sls +@@ -0,0 +1,15 @@ ++{%- from "maas/map.jinja" import region with context %} ++ ++maas_login_admin: ++ cmd.run: ++ - name: "maas-region apikey --username {{ region.admin.username }} > /var/lib/maas/.maas_credentials" ++ ++wait_for_machines_ready_or_deployed: ++ module.run: ++ - name: maas.wait_for_machine_status ++ - kwargs: ++ req_status: "Ready|Deployed" ++ timeout: {{ region.timeout.ready }} ++ attempts: {{ region.timeout.attempts }} ++ - require: ++ - cmd: maas_login_admin +diff --git a/maas/map.jinja b/maas/map.jinja +index 0671435..1e6ac07 100644 +--- a/maas/map.jinja ++++ b/maas/map.jinja +@@ -22,6 +22,10 @@ Debian: + bind: + host: 0.0.0.0 + port: 80 ++ timeout: ++ ready: 1200 ++ deployed: 7200 ++ attempts: 0 + {%- endload %} + + {%- set region = salt['grains.filter_by'](region_defaults, merge=salt['pillar.get']('maas:region', {})) %} +diff --git a/tests/pillar/maas_region.sls b/tests/pillar/maas_region.sls +index d3325eb..d710216 100644 +--- a/tests/pillar/maas_region.sls ++++ b/tests/pillar/maas_region.sls +@@ -34,3 +34,7 @@ maas: + password: password + username: maas + salt_master_ip: 127.0.0.1 ++ timeout: ++ deployed: 900 ++ ready: 900 ++ attempts: 2 -- cgit 1.2.3-korg