summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/conf.py1
-rw-r--r--docs/development/index.rst14
-rw-r--r--docs/development/overview/index.rst7
-rw-r--r--docs/development/overview/overview.rst52
-rw-r--r--docs/development/requirements/index.rst6
-rw-r--r--docs/index.rst4
-rw-r--r--docs/release/configguide/feature.configuration.rst54
-rw-r--r--docs/release/configguide/index.rst6
-rw-r--r--docs/release/index.rst12
-rw-r--r--docs/release/installation/index.rst (renamed from docs/development/manuals/index.rst)12
-rw-r--r--docs/release/installation/installation.rst44
-rw-r--r--docs/release/release-notes/release-notes.rst61
-rw-r--r--docs/release/release-notes/releasenotes_iruya.rst129
-rw-r--r--docs/release/scenarios/fault_management/fault_management.rst90
-rw-r--r--docs/release/scenarios/maintenance/images/Fault-management-design.png (renamed from docs/development/overview/functest_scenario/images/Fault-management-design.png)bin237110 -> 237110 bytes
-rw-r--r--docs/release/scenarios/maintenance/images/LICENSE (renamed from docs/development/overview/functest_scenario/images/LICENSE)0
-rw-r--r--docs/release/scenarios/maintenance/images/Maintenance-design.png (renamed from docs/development/overview/functest_scenario/images/Maintenance-design.png)bin316640 -> 316640 bytes
-rw-r--r--docs/release/scenarios/maintenance/images/Maintenance-workflow.png (renamed from docs/development/overview/functest_scenario/images/Maintenance-workflow.png)bin81286 -> 81286 bytes
-rw-r--r--docs/release/scenarios/maintenance/maintenance.rst (renamed from docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst)145
-rw-r--r--docs/release/userguide/get-valid-server-state.rst (renamed from docs/development/manuals/get-valid-server-state.rst)0
-rw-r--r--docs/release/userguide/index.rst3
-rw-r--r--docs/release/userguide/mark-host-down_manual.rst (renamed from docs/development/manuals/mark-host-down_manual.rst)0
-rw-r--r--docs/release/userguide/monitors.rst (renamed from docs/development/manuals/monitors.rst)0
-rw-r--r--docs/testing/developer/index.rst13
-rw-r--r--docs/testing/developer/testing.rst (renamed from docs/development/overview/testing.rst)45
-rw-r--r--docs/testing/index.rst15
-rw-r--r--docs/testing/user/index.rst13
-rw-r--r--docs/testing/user/testing.rst30
-rw-r--r--doctor_tests/admin_tool/fenix/Dockerfile2
-rw-r--r--doctor_tests/app_manager/__init__.py8
-rw-r--r--doctor_tests/app_manager/sample.py2
-rw-r--r--doctor_tests/app_manager/vnfm.py441
-rw-r--r--doctor_tests/common/utils.py22
-rw-r--r--doctor_tests/image.py13
-rw-r--r--doctor_tests/inspector/sample.py4
-rw-r--r--doctor_tests/installer/__init__.py8
-rw-r--r--doctor_tests/installer/base.py54
-rw-r--r--doctor_tests/installer/common/set_compute_config.py4
-rw-r--r--doctor_tests/installer/common/set_fenix.sh18
-rw-r--r--doctor_tests/installer/devstack.py151
-rw-r--r--doctor_tests/installer/local.py118
-rw-r--r--doctor_tests/installer/mcp.py62
-rw-r--r--doctor_tests/main.py84
-rw-r--r--doctor_tests/scenario/maintenance.py44
-rw-r--r--doctor_tests/user.py1
-rw-r--r--tox.ini1
46 files changed, 1318 insertions, 475 deletions
diff --git a/docs/conf.py b/docs/conf.py
index eb12e74b..3c9978bb 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1 +1,2 @@
from docs_conf.conf import * # noqa: F401,F403
+master_doc = 'index'
diff --git a/docs/development/index.rst b/docs/development/index.rst
index 2dc16a82..a7d2817b 100644
--- a/docs/development/index.rst
+++ b/docs/development/index.rst
@@ -2,18 +2,18 @@
.. http://creativecommons.org/licenses/by/4.0
.. (c) 2016 OPNFV.
+.. _development:
-======
-Doctor
-======
+===========
+Development
+===========
.. toctree::
:maxdepth: 2
- ./design/index.rst
- ./requirements/index.rst
- ./manuals/index.rst
- ./overview/functest_scenario/index.rst
+ ./design/index
+ ./overview/index
+ ./requirements/index
Indices
=======
diff --git a/docs/development/overview/index.rst b/docs/development/overview/index.rst
index 956e73e3..f6d78d57 100644
--- a/docs/development/overview/index.rst
+++ b/docs/development/overview/index.rst
@@ -3,11 +3,12 @@
.. _doctor-overview:
-************************
-Doctor Development Guide
-************************
+********
+Overview
+********
.. toctree::
:maxdepth: 2
+ overview.rst
testing.rst
diff --git a/docs/development/overview/overview.rst b/docs/development/overview/overview.rst
new file mode 100644
index 00000000..21f5439e
--- /dev/null
+++ b/docs/development/overview/overview.rst
@@ -0,0 +1,52 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Platform overview
+"""""""""""""""""
+
+Doctor platform provides these features since `Danube Release <https://wiki.opnfv.org/display/SWREL/Danube>`_:
+
+* Immediate Notification
+* Consistent resource state awareness for compute host down
+* Valid compute host status given to VM owner
+
+These features enable high availability of Network Services on top of
+the virtualized infrastructure. Immediate notification allows VNF managers
+(VNFM) to process recovery actions promptly once a failure has occurred.
+Same framework can also be utilized to have VNFM awareness about
+infrastructure maintenance.
+
+Consistency of resource state is necessary to execute recovery actions
+properly in the VIM.
+
+Ability to query host status gives VM owner the possibility to get
+consistent state information through an API in case of a compute host
+fault.
+
+The Doctor platform consists of the following components:
+
+* OpenStack Compute (Nova)
+* OpenStack Networking (Neutron)
+* OpenStack Telemetry (Ceilometer)
+* OpenStack Alarming (AODH)
+* Doctor Sample Inspector, OpenStack Congress or OpenStack Vitrage
+* Doctor Sample Monitor or any monitor supported by Congress or Vitrage
+
+.. note::
+ Doctor Sample Monitor is used in Doctor testing. However in real
+ implementation like Vitrage, there are several other monitors supported.
+
+You can see an overview of the Doctor platform and how components interact in
+:numref:`figure-p1`.
+
+
+Maintenance use case provides these features since `Iruya Release <https://wiki.opnfv.org/display/SWREL/Iruya>`_:
+
+* Infrastructure maintenance and upgrade workflow
+* Interaction between VNFM and infrastructe workflow
+
+Since `Jerma Release <https://wiki.opnfv.org/display/SWREL/Jerma>`_ maintenance
+use case also supports 'ETSI FEAT03' implementation to have the infrastructure
+maintenance and upgrade fully optimized while keeping zero impact on VNF
+service.
+
diff --git a/docs/development/requirements/index.rst b/docs/development/requirements/index.rst
index fceaebf0..ccc35cb8 100644
--- a/docs/development/requirements/index.rst
+++ b/docs/development/requirements/index.rst
@@ -3,9 +3,9 @@
.. _doctor-requirements:
-****************************************
-Doctor: Fault Management and Maintenance
-****************************************
+**********************************************
+Requirements: Fault Management and Maintenance
+**********************************************
:Project: Doctor, https://wiki.opnfv.org/doctor
:Editors: Ashiq Khan (NTT DOCOMO), Gerald Kunzmann (NTT DOCOMO)
diff --git a/docs/index.rst b/docs/index.rst
index 4dedb98d..b8e8bfd0 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -12,6 +12,6 @@ Fault Management and Maintenance (Doctor)
:numbered:
:maxdepth: 2
- release/index
development/index
-
+ release/index
+ testing/index
diff --git a/docs/release/configguide/feature.configuration.rst b/docs/release/configguide/feature.configuration.rst
index 64928eea..8fbff50e 100644
--- a/docs/release/configguide/feature.configuration.rst
+++ b/docs/release/configguide/feature.configuration.rst
@@ -159,3 +159,57 @@ You can configure the Sample Monitor as follows (Example for Apex deployment):
"http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
**Collectd Monitor**
+
+OpenStack components
+====================
+
+In OPNFV and with Doctor testing you can have all OpenStack components configured
+as needed. Here is sample of the needed configuration modifications.
+
+Ceilometer
+----------
+
+/etc/ceilometer/event_definitions.yaml:
+# Maintenance use case needs new alarm definitions to be added
+- event_type: maintenance.scheduled
+ traits:
+ actions_at:
+ fields: payload.maintenance_at
+ type: datetime
+ allowed_actions:
+ fields: payload.allowed_actions
+ host_id:
+ fields: payload.host_id
+ instances:
+ fields: payload.instances
+ metadata:
+ fields: payload.metadata
+ project_id:
+ fields: payload.project_id
+ reply_url:
+ fields: payload.reply_url
+ session_id:
+ fields: payload.session_id
+ state:
+ fields: payload.state
+- event_type: maintenance.host
+ traits:
+ host:
+ fields: payload.host
+ project_id:
+ fields: payload.project_id
+ session_id:
+ fields: payload.session_id
+ state:
+ fields: payload.state
+
+/etc/ceilometer/event_pipeline.yaml:
+# Maintenance and Fault management both needs these to be added
+ - notifier://
+ - notifier://?topic=alarm.all
+
+Nova
+----
+
+/etc/nova/nova.conf
+cpu_allocation_ratio=1.0
diff --git a/docs/release/configguide/index.rst b/docs/release/configguide/index.rst
index b1e7c33d..c2331115 100644
--- a/docs/release/configguide/index.rst
+++ b/docs/release/configguide/index.rst
@@ -3,9 +3,9 @@
.. _doctor-configguide:
-*************************
-Doctor Installation Guide
-*************************
+**************************
+Doctor Configuration Guide
+**************************
.. toctree::
:maxdepth: 2
diff --git a/docs/release/index.rst b/docs/release/index.rst
index 8a1bf405..67eb4c5f 100644
--- a/docs/release/index.rst
+++ b/docs/release/index.rst
@@ -2,14 +2,18 @@
.. http://creativecommons.org/licenses/by/4.0
.. (c) 2017 OPNFV.
+.. _release:
-======
-Doctor
-======
+=======
+Release
+=======
.. toctree::
:maxdepth: 2
+ ./configguide/index.rst
./installation/index.rst
+ ./release-notes/index.rst
+ ./scenarios/fault_management/fault_management.rst
+ ./scenarios/maintenance/maintenance.rst
./userguide/index.rst
-
diff --git a/docs/development/manuals/index.rst b/docs/release/installation/index.rst
index f705f94a..f6527e5d 100644
--- a/docs/development/manuals/index.rst
+++ b/docs/release/installation/index.rst
@@ -1,13 +1,13 @@
.. This work is licensed under a Creative Commons Attribution 4.0 International License.
.. http://creativecommons.org/licenses/by/4.0
-.. _doctor-manuals:
+.. _doctor-configguide:
-*******
-Manuals
-*******
+*************************
+Doctor Installation Guide
+*************************
.. toctree::
+ :maxdepth: 2
-.. include:: mark-host-down_manual.rst
-.. include:: get-valid-server-state.rst
+ installation.rst
diff --git a/docs/release/installation/installation.rst b/docs/release/installation/installation.rst
new file mode 100644
index 00000000..564f19fd
--- /dev/null
+++ b/docs/release/installation/installation.rst
@@ -0,0 +1,44 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Doctor Installation
+====================
+
+You can clone doctor project in OPNFV installer jumphost or if you are not
+in OPNFV environment you can clone Doctor to DevStack controller node
+
+git clone https://gerrit.opnfv.org/gerrit/doctor
+
+In DevStack controller here is a sample of including what Doctor testing
+will require for sample fault management testing and for maintenance
+testing using Fenix
+
+.. code-block:: bash
+
+ git clone https://github.com/openstack/devstack -b stable/train
+
+.. code-block:: bash
+
+ cd devstack vi local.conf
+
+.. code-block:: bash
+
+ [[local|localrc]]
+ GIT_BASE=https://git.openstack.org
+ HOST_IP=<host_ip>
+ ADMIN_PASSWORD=admin
+ DATABASE_PASSWORD=admin
+ RABBIT_PASSWORD=admin
+ SERVICE_PASSWORD=admin
+ LOGFILE=/opt/stack/stack.sh.log
+
+ PUBLIC_INTERFACE=eth0
+
+ CEILOMETER_EVENT_ALARM=True
+
+ ENABLED_SERVICES=key,rabbit,mysql,fenix-engine,fenix-api,aodh-evaluator,aodh-notifier,aodh-api
+
+ enable_plugin ceilometer https://git.openstack.org/openstack/ceilometer stable/train
+ enable_plugin aodh https://git.openstack.org/openstack/aodh stable/train
+ enable_plugin gnocchi https://github.com/openstack/gnocchi
+ enable_plugin fenix https://opendev.org/x/fenix master
diff --git a/docs/release/release-notes/release-notes.rst b/docs/release/release-notes/release-notes.rst
index 92775557..b525335e 100644
--- a/docs/release/release-notes/release-notes.rst
+++ b/docs/release/release-notes/release-notes.rst
@@ -7,33 +7,41 @@ This document provides the release notes for Iruya version of Doctor.
Important notes
===============
-In Iruya release there has not been many changes.
-
-All testing is now being made with Fuel installer. Maintenance use case
-is now only tested against latest upstream Fenix. Only sample inspector is
-tested as Fuel do not support Vitrage or Congress.
+Jerma release has mainly been for finalizing maintenance use case testing
+supporting the ETSI FEAT03 defined interactino between VNFM and infrastructure.
+This is mainly to have infrastructure maintenance and upgrade operations
+opttimized as fast as they can while keeping VNFs on top with zero impact
+on their service.
+
+Further more this is the final release of Doctor and the more deep testing is
+moving more to upstream projects like Fenix for the maintenance. Also in
+this release we have made sure that all Doctor testing and any deeper testing
+with ehe upstream projects can be done in DevStack. This also makes DevStack
+the most important installer.
Summary
=======
-Iruya Doctor framework uses OpenStack Stein integrated into its test cases.
+Jerma Doctor framework uses OpenStack Train integrated into its test cases.
Release Data
============
Doctor changes
-- Maintenance use case updated to support latest version of Fenix running
- in container on controller node
-- Maintenance use case now support Fuel installer
-- Doctor updated to use OpenStack Stein and only python 3.6
-- Testing only sample inspector as lacking installer support for
- Vitrage and Congress
+- Maintenance use case updated to support latest version of Fenix.
+- Maintenance use case now supports ETSI FEAT03 optimization with Fenix.
+- Doctor testing is now preferred to be done in DevStack environment
+ where one can easily select OpenStack release from Rocky to Ussuri to
+ test Doctor functionality. Latest OPNFV Fuel can also be used for the
+ OpenStack version it supports.
-Releng changes
+Doctor CI
-- Doctor testing running with python 3.6 and with sample inspector
-- Doctor is only tested with Fuel installer
+- Doctor tested with fuel installer.
+- Fault management use case is tested with sample inspector.
+- Maintenance use case is tested with sample implementation and towards
+ the latest Fenix version. The includes the new ETSI FEAT03 optimization.
Version change
^^^^^^^^^^^^^^
@@ -41,12 +49,13 @@ Version change
Module version changes
~~~~~~~~~~~~~~~~~~~~~~
-- OpenStack has changed from Rocky to Stein since previous Hunter release.
+- OpenStack has changed Train
Document version changes
~~~~~~~~~~~~~~~~~~~~~~~~
-N/A
+All documentation is updated to OPNFV unified format according to
+documentation guidelines. Small updates in many documents.
Reason for version
^^^^^^^^^^^^^^^^^^
@@ -56,11 +65,14 @@ N/A
Feature additions
~~~~~~~~~~~~~~~~~
-+--------------------+--------------------------------------------------------------+
-| **JIRA REFERENCE** | **SLOGAN** |
-+--------------------+--------------------------------------------------------------+
-| DOCTOR-134 | Update Doctor maintenance use case to work with latest Fenix |
-+--------------------+--------------------------------------------------------------+
++--------------------+--------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN** |
++--------------------+--------------------------------------------+
+| DOCTOR-137 | VNFM maintenance with ETSI changes |
++--------------------+--------------------------------------------+
+| DOCTOR-136 | DevStack support |
++--------------------+--------------------------------------------+
+
Deliverables
------------
@@ -127,3 +139,8 @@ References
For more information about the OPNFV Doctor latest work, please see:
https://wiki.opnfv.org/display/doctor/Doctor+Home
+
+Further information about ETSI FEAT03 optimization can be found from Fenix
+Documentation:
+
+https://fenix.readthedocs.io/en/latest
diff --git a/docs/release/release-notes/releasenotes_iruya.rst b/docs/release/release-notes/releasenotes_iruya.rst
new file mode 100644
index 00000000..92775557
--- /dev/null
+++ b/docs/release/release-notes/releasenotes_iruya.rst
@@ -0,0 +1,129 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+This document provides the release notes for Iruya version of Doctor.
+
+Important notes
+===============
+
+In Iruya release there has not been many changes.
+
+All testing is now being made with Fuel installer. Maintenance use case
+is now only tested against latest upstream Fenix. Only sample inspector is
+tested as Fuel do not support Vitrage or Congress.
+
+Summary
+=======
+
+Iruya Doctor framework uses OpenStack Stein integrated into its test cases.
+
+Release Data
+============
+
+Doctor changes
+
+- Maintenance use case updated to support latest version of Fenix running
+ in container on controller node
+- Maintenance use case now support Fuel installer
+- Doctor updated to use OpenStack Stein and only python 3.6
+- Testing only sample inspector as lacking installer support for
+ Vitrage and Congress
+
+Releng changes
+
+- Doctor testing running with python 3.6 and with sample inspector
+- Doctor is only tested with Fuel installer
+
+Version change
+^^^^^^^^^^^^^^
+
+Module version changes
+~~~~~~~~~~~~~~~~~~~~~~
+
+- OpenStack has changed from Rocky to Stein since previous Hunter release.
+
+Document version changes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+Reason for version
+^^^^^^^^^^^^^^^^^^
+
+N/A
+
+Feature additions
+~~~~~~~~~~~~~~~~~
+
++--------------------+--------------------------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN** |
++--------------------+--------------------------------------------------------------+
+| DOCTOR-134 | Update Doctor maintenance use case to work with latest Fenix |
++--------------------+--------------------------------------------------------------+
+
+Deliverables
+------------
+
+Software deliverables
+=====================
+
+None
+
+Documentation deliverables
+==========================
+
+https://git.opnfv.org/doctor/tree/docs
+
+Known Limitations, Issues and Workarounds
+=========================================
+
+System Limitations
+^^^^^^^^^^^^^^^^^^
+
+Maintenance test case requirements:
+
+- Minimum number of nodes: 1 Controller, 3 Computes
+- Min number of VCPUs: 2 VCPUs for each compute
+
+Known issues
+^^^^^^^^^^^^
+
+None
+
+Workarounds
+^^^^^^^^^^^
+
+None
+
+Test Result
+===========
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel' | SUCCESS |
++--------------------------------------+--------------+
+
+Doctor CI results with TEST_CASE='maintenance' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel' | SUCCESS |
+| ADMIN_TOOL_TYPE='fenix' *) | |
++--------------------------------------+--------------+
+
+*) Sample implementation not updated according to latest upstream Fenix
+ and is currently not being tested.
+
+References
+==========
+
+For more information about the OPNFV Doctor latest work, please see:
+
+https://wiki.opnfv.org/display/doctor/Doctor+Home
diff --git a/docs/release/scenarios/fault_management/fault_management.rst b/docs/release/scenarios/fault_management/fault_management.rst
new file mode 100644
index 00000000..99371201
--- /dev/null
+++ b/docs/release/scenarios/fault_management/fault_management.rst
@@ -0,0 +1,90 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+Running test cases
+""""""""""""""""""
+
+Functest will call the "doctor_tests/main.py" in Doctor to run the test job.
+Doctor testing can also be triggered by tox on OPNFV installer jumphost. Tox
+is normally used for functional, module and coding style testing in Python
+project.
+
+Currently 'MCP' and 'devstack' installer are supported.
+
+
+Fault management use case
+"""""""""""""""""""""""""
+
+* A consumer of the NFVI wants to receive immediate notifications about faults
+ in the NFVI affecting the proper functioning of the virtual resources.
+ Therefore, such faults have to be detected as quickly as possible, and, when
+ a critical error is observed, the affected consumer is immediately informed
+ about the fault and can switch over to the STBY configuration.
+
+The faults to be monitored (and at which detection rate) will be configured by
+the consumer. Once a fault is detected, the Inspector in the Doctor
+architecture will check the resource map maintained by the Controller, to find
+out which virtual resources are affected and then update the resources state.
+The Notifier will receive the failure event requests sent from the Controller,
+and notify the consumer(s) of the affected resources according to the alarm
+configuration.
+
+Detailed workflow information is as follows:
+
+* Consumer(VNFM): (step 0) creates resources (network, server/instance) and an
+ event alarm on state down notification of that server/instance or Neutron
+ port.
+
+* Monitor: (step 1) periodically checks nodes, such as ping from/to each
+ dplane nic to/from gw of node, (step 2) once it fails to send out event
+ with "raw" fault event information to Inspector
+
+* Inspector: when it receives an event, it will (step 3) mark the host down
+ ("mark-host-down"), (step 4) map the PM to VM, and change the VM status to
+ down. In network failure case, also Neutron port is changed to down.
+
+* Controller: (step 5) sends out instance update event to Ceilometer. In network
+ failure case, also Neutron port is changed to down and corresponding event is
+ sent to Ceilometer.
+
+* Notifier: (step 6) Ceilometer transforms and passes the events to AODH,
+ (step 7) AODH will evaluate events with the registered alarm definitions,
+ then (step 8) it will fire the alarm to the "consumer" who owns the
+ instance
+
+* Consumer(VNFM): (step 9) receives the event and (step 10) recreates a new
+ instance
+
+Fault management test case
+""""""""""""""""""""""""""
+
+Functest will call the 'doctor-test' command in Doctor to run the test job.
+
+The following steps are executed:
+
+Firstly, get the installer ip according to the installer type. Then ssh to
+the installer node to get the private key for accessing to the cloud. As
+'fuel' installer, ssh to the controller node to modify nova and ceilometer
+configurations.
+
+Secondly, prepare image for booting VM, then create a test project and test
+user (both default to doctor) for the Doctor tests.
+
+Thirdly, boot a VM under the doctor project and check the VM status to verify
+that the VM is launched completely. Then get the compute host info where the VM
+is launched to verify connectivity to the target compute host. Get the consumer
+ip according to the route to compute ip and create an alarm event in Ceilometer
+using the consumer ip.
+
+Fourthly, the Doctor components are started, and, based on the above preparation,
+a failure is injected to the system, i.e. the network of compute host is
+disabled for 3 minutes. To ensure the host is down, the status of the host
+will be checked.
+
+Finally, the notification time, i.e. the time between the execution of step 2
+(Monitor detects failure) and step 9 (Consumer receives failure notification)
+is calculated.
+
+According to the Doctor requirements, the Doctor test is successful if the
+notification time is below 1 second.
diff --git a/docs/development/overview/functest_scenario/images/Fault-management-design.png b/docs/release/scenarios/maintenance/images/Fault-management-design.png
index 6d98cdec..6d98cdec 100644
--- a/docs/development/overview/functest_scenario/images/Fault-management-design.png
+++ b/docs/release/scenarios/maintenance/images/Fault-management-design.png
Binary files differ
diff --git a/docs/development/overview/functest_scenario/images/LICENSE b/docs/release/scenarios/maintenance/images/LICENSE
index 21a2d03d..21a2d03d 100644
--- a/docs/development/overview/functest_scenario/images/LICENSE
+++ b/docs/release/scenarios/maintenance/images/LICENSE
diff --git a/docs/development/overview/functest_scenario/images/Maintenance-design.png b/docs/release/scenarios/maintenance/images/Maintenance-design.png
index 8f21db6a..8f21db6a 100644
--- a/docs/development/overview/functest_scenario/images/Maintenance-design.png
+++ b/docs/release/scenarios/maintenance/images/Maintenance-design.png
Binary files differ
diff --git a/docs/development/overview/functest_scenario/images/Maintenance-workflow.png b/docs/release/scenarios/maintenance/images/Maintenance-workflow.png
index 9b65fd59..9b65fd59 100644
--- a/docs/development/overview/functest_scenario/images/Maintenance-workflow.png
+++ b/docs/release/scenarios/maintenance/images/Maintenance-workflow.png
Binary files differ
diff --git a/docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst b/docs/release/scenarios/maintenance/maintenance.rst
index 4505dd8f..ecfe76b1 100644
--- a/docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst
+++ b/docs/release/scenarios/maintenance/maintenance.rst
@@ -2,142 +2,6 @@
.. http://creativecommons.org/licenses/by/4.0
-
-Platform overview
-"""""""""""""""""
-
-Doctor platform provides these features since `Danube Release <https://wiki.opnfv.org/display/SWREL/Danube>`_:
-
-* Immediate Notification
-* Consistent resource state awareness for compute host down
-* Valid compute host status given to VM owner
-
-These features enable high availability of Network Services on top of
-the virtualized infrastructure. Immediate notification allows VNF managers
-(VNFM) to process recovery actions promptly once a failure has occurred.
-Same framework can also be utilized to have VNFM awareness about
-infrastructure maintenance.
-
-Consistency of resource state is necessary to execute recovery actions
-properly in the VIM.
-
-Ability to query host status gives VM owner the possibility to get
-consistent state information through an API in case of a compute host
-fault.
-
-The Doctor platform consists of the following components:
-
-* OpenStack Compute (Nova)
-* OpenStack Networking (Neutron)
-* OpenStack Telemetry (Ceilometer)
-* OpenStack Alarming (AODH)
-* Doctor Sample Inspector, OpenStack Congress or OpenStack Vitrage
-* Doctor Sample Monitor or any monitor supported by Congress or Vitrage
-
-.. note::
- Doctor Sample Monitor is used in Doctor testing. However in real
- implementation like Vitrage, there are several other monitors supported.
-
-You can see an overview of the Doctor platform and how components interact in
-:numref:`figure-p1`.
-
-.. figure:: ./images/Fault-management-design.png
- :name: figure-p1
- :width: 100%
-
- Doctor platform and typical sequence
-
-Detailed information on the Doctor architecture can be found in the Doctor
-requirements documentation:
-http://artifacts.opnfv.org/doctor/docs/requirements/05-implementation.html
-
-Running test cases
-""""""""""""""""""
-
-Functest will call the "doctor_tests/main.py" in Doctor to run the test job.
-Doctor testing can also be triggered by tox on OPNFV installer jumphost. Tox
-is normally used for functional, module and coding style testing in Python
-project.
-
-Currently, 'Apex', 'MCP' and 'local' installer are supported.
-
-
-Fault management use case
-"""""""""""""""""""""""""
-
-* A consumer of the NFVI wants to receive immediate notifications about faults
- in the NFVI affecting the proper functioning of the virtual resources.
- Therefore, such faults have to be detected as quickly as possible, and, when
- a critical error is observed, the affected consumer is immediately informed
- about the fault and can switch over to the STBY configuration.
-
-The faults to be monitored (and at which detection rate) will be configured by
-the consumer. Once a fault is detected, the Inspector in the Doctor
-architecture will check the resource map maintained by the Controller, to find
-out which virtual resources are affected and then update the resources state.
-The Notifier will receive the failure event requests sent from the Controller,
-and notify the consumer(s) of the affected resources according to the alarm
-configuration.
-
-Detailed workflow information is as follows:
-
-* Consumer(VNFM): (step 0) creates resources (network, server/instance) and an
- event alarm on state down notification of that server/instance or Neutron
- port.
-
-* Monitor: (step 1) periodically checks nodes, such as ping from/to each
- dplane nic to/from gw of node, (step 2) once it fails to send out event
- with "raw" fault event information to Inspector
-
-* Inspector: when it receives an event, it will (step 3) mark the host down
- ("mark-host-down"), (step 4) map the PM to VM, and change the VM status to
- down. In network failure case, also Neutron port is changed to down.
-
-* Controller: (step 5) sends out instance update event to Ceilometer. In network
- failure case, also Neutron port is changed to down and corresponding event is
- sent to Ceilometer.
-
-* Notifier: (step 6) Ceilometer transforms and passes the events to AODH,
- (step 7) AODH will evaluate events with the registered alarm definitions,
- then (step 8) it will fire the alarm to the "consumer" who owns the
- instance
-
-* Consumer(VNFM): (step 9) receives the event and (step 10) recreates a new
- instance
-
-Fault management test case
-""""""""""""""""""""""""""
-
-Functest will call the 'doctor-test' command in Doctor to run the test job.
-
-The following steps are executed:
-
-Firstly, get the installer ip according to the installer type. Then ssh to
-the installer node to get the private key for accessing to the cloud. As
-'fuel' installer, ssh to the controller node to modify nova and ceilometer
-configurations.
-
-Secondly, prepare image for booting VM, then create a test project and test
-user (both default to doctor) for the Doctor tests.
-
-Thirdly, boot a VM under the doctor project and check the VM status to verify
-that the VM is launched completely. Then get the compute host info where the VM
-is launched to verify connectivity to the target compute host. Get the consumer
-ip according to the route to compute ip and create an alarm event in Ceilometer
-using the consumer ip.
-
-Fourthly, the Doctor components are started, and, based on the above preparation,
-a failure is injected to the system, i.e. the network of compute host is
-disabled for 3 minutes. To ensure the host is down, the status of the host
-will be checked.
-
-Finally, the notification time, i.e. the time between the execution of step 2
-(Monitor detects failure) and step 9 (Consumer receives failure notification)
-is calculated.
-
-According to the Doctor requirements, the Doctor test is successful if the
-notification time is below 1 second.
-
Maintenance use case
""""""""""""""""""""
@@ -249,7 +113,8 @@ After all computes are maintained, `admin tool` can send `MAINTENANCE_COMPLETE`
to tell maintenance/upgrade is now complete. For `app manager` this means he
can scale back to full capacity.
-This is the current sample implementation and test case. Real life
-implementation is started in OpenStack Fenix project and there we should
-eventually address requirements more deeply and update the test case with Fenix
-implementation.
+There is currently sample implementation on VNFM and test case. In
+infrastructure side there is sample implementation of 'admin_tool' and
+there is also support for the OpenStack Fenix that extends the use case to
+support 'ETSI FEAT03' for VNFM interaction and to optimize the whole
+infrastructure mainteannce and upgrade.
diff --git a/docs/development/manuals/get-valid-server-state.rst b/docs/release/userguide/get-valid-server-state.rst
index 824ea3c2..824ea3c2 100644
--- a/docs/development/manuals/get-valid-server-state.rst
+++ b/docs/release/userguide/get-valid-server-state.rst
diff --git a/docs/release/userguide/index.rst b/docs/release/userguide/index.rst
index eee855dc..577072c7 100644
--- a/docs/release/userguide/index.rst
+++ b/docs/release/userguide/index.rst
@@ -11,3 +11,6 @@ Doctor User Guide
:maxdepth: 2
feature.userguide.rst
+ get-valid-server-state.rst
+ mark-host-down_manual.rst
+ monitors.rst
diff --git a/docs/development/manuals/mark-host-down_manual.rst b/docs/release/userguide/mark-host-down_manual.rst
index 3815205d..3815205d 100644
--- a/docs/development/manuals/mark-host-down_manual.rst
+++ b/docs/release/userguide/mark-host-down_manual.rst
diff --git a/docs/development/manuals/monitors.rst b/docs/release/userguide/monitors.rst
index eeb5e226..eeb5e226 100644
--- a/docs/development/manuals/monitors.rst
+++ b/docs/release/userguide/monitors.rst
diff --git a/docs/testing/developer/index.rst b/docs/testing/developer/index.rst
new file mode 100644
index 00000000..dfbcfa74
--- /dev/null
+++ b/docs/testing/developer/index.rst
@@ -0,0 +1,13 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+*********
+Developer
+*********
+
+.. toctree::
+ :numbered:
+ :maxdepth: 2
+
+ testing.rst
diff --git a/docs/development/overview/testing.rst b/docs/testing/developer/testing.rst
index 663d4c3f..6a929130 100644
--- a/docs/development/overview/testing.rst
+++ b/docs/testing/developer/testing.rst
@@ -38,11 +38,19 @@ export TEST_CASE with different values:
export TEST_CASE='fault_management'
#Maintenance (requires 3 compute nodes)
export TEST_CASE='maintenance'
- #Use Fenix in maintenance testing instead of sample admin_tool
- export ADMIN_TOOL_TYPE='fenix'
#Run both tests cases
export TEST_CASE='all'
+ #Use Fenix in maintenance testing instead of sample admin_tool
+ #This is only for 'mainteanance' test case
+ export ADMIN_TOOL_TYPE='fenix'
+ export APP_MANAGER_TYPE='vnfm'
+
+ #Run in different installer jumphost 'fuel' or 'apex'
+ #In multinode DevStack you run Doctor in controller node
+ #with value export APP_MANAGER_TYPE=vnfm
+ export INSTALLER_TYPE='fuel'
+
Run Python Test Script
~~~~~~~~~~~~~~~~~~~~~~
@@ -59,7 +67,8 @@ environment and then run the test.
.. _doctor.sample.conf: https://git.opnfv.org/doctor/tree/etc/doctor.sample.conf
-In OPNFV Apex jumphost you can run Doctor testing as follows using tox:
+In OPNFV testing environment jumphost you can run Doctor testing as follows
+using tox:
.. code-block:: bash
@@ -69,31 +78,5 @@ In OPNFV Apex jumphost you can run Doctor testing as follows using tox:
git clone https://gerrit.opnfv.org/gerrit/doctor
cd doctor
sudo -E tox
-
-Run Functest Suite
-==================
-
-Functest supports Doctor testing by triggering the test script above in a
-Functest container. You can run the Doctor test with the following steps:
-
-.. code-block:: bash
-
- DOCKER_TAG=latest
- docker pull docker.io/opnfv/functest-features:${DOCKER_TAG}
- docker run --privileged=true -id \
- -e INSTALLER_TYPE=${INSTALLER_TYPE} \
- -e INSTALLER_IP=${INSTALLER_IP} \
- -e INSPECTOR_TYPE=sample \
- docker.io/opnfv/functest-features:${DOCKER_TAG} /bin/bash
- docker exec <container_id> functest testcase run doctor-notification
-
-See `Functest Userguide`_ for more information.
-
-.. _Functest Userguide: :doc:`<functest:testing/user/userguide>`
-
-
-For testing with stable version, change DOCKER_TAG to 'stable' or other release
-tag identifier.
-
-Tips
-====
+
+Note! In DevStack you run Doctor in controller node.
diff --git a/docs/testing/index.rst b/docs/testing/index.rst
new file mode 100644
index 00000000..3fae9568
--- /dev/null
+++ b/docs/testing/index.rst
@@ -0,0 +1,15 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+.. _testing:
+
+=======
+Testing
+=======
+
+.. toctree::
+ :maxdepth: 2
+
+ ./developer/index.rst
+ ./user/index.rst
diff --git a/docs/testing/user/index.rst b/docs/testing/user/index.rst
new file mode 100644
index 00000000..1be9c7eb
--- /dev/null
+++ b/docs/testing/user/index.rst
@@ -0,0 +1,13 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+****
+User
+****
+
+.. toctree::
+ :numbered:
+ :maxdepth: 2
+
+ testing.rst
diff --git a/docs/testing/user/testing.rst b/docs/testing/user/testing.rst
new file mode 100644
index 00000000..6172d26a
--- /dev/null
+++ b/docs/testing/user/testing.rst
@@ -0,0 +1,30 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Run Functest Suite (obsolete)
+=============================
+
+Functest supports Doctor testing by triggering the test script above in a
+Functest container. You can run the Doctor test with the following steps:
+
+.. code-block:: bash
+
+ DOCKER_TAG=latest
+ docker pull docker.io/opnfv/functest-features:${DOCKER_TAG}
+ docker run --privileged=true -id \
+ -e INSTALLER_TYPE=${INSTALLER_TYPE} \
+ -e INSTALLER_IP=${INSTALLER_IP} \
+ -e INSPECTOR_TYPE=sample \
+ docker.io/opnfv/functest-features:${DOCKER_TAG} /bin/bash
+ docker exec <container_id> functest testcase run doctor-notification
+
+See `Functest Userguide`_ for more information.
+
+.. _Functest Userguide: :doc:`<functest:testing/user/userguide>`
+
+
+For testing with stable version, change DOCKER_TAG to 'stable' or other release
+tag identifier.
+
+Tips
+====
diff --git a/doctor_tests/admin_tool/fenix/Dockerfile b/doctor_tests/admin_tool/fenix/Dockerfile
index 5804b207..202380eb 100644
--- a/doctor_tests/admin_tool/fenix/Dockerfile
+++ b/doctor_tests/admin_tool/fenix/Dockerfile
@@ -21,7 +21,7 @@ RUN apk --no-cache add ca-certificates && \
if [ ! -e /usr/bin/pip ]; then ln -s pip3 /usr/bin/pip ; fi && \
if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python3 /usr/bin/python; fi && \
pip3 install --upgrade pip && \
- pip3 install alembic aodhclient decorator flask Flask-RESTful eventlet \
+ pip3 install alembic aodhclient decorator flask Flask-RESTful eventlet jsonschema \
keystoneauth1 keystonemiddleware python-novaclient oslo.config pecan \
oslo.db oslo.log oslo.messaging oslo.serialization oslo.service oslo_policy \
oslotest oslo.utils pbr pymysql six sqlalchemy -cupper-constraints.txt && \
diff --git a/doctor_tests/app_manager/__init__.py b/doctor_tests/app_manager/__init__.py
index 717d6587..c2f75918 100644
--- a/doctor_tests/app_manager/__init__.py
+++ b/doctor_tests/app_manager/__init__.py
@@ -8,12 +8,13 @@
##############################################################################
from oslo_config import cfg
from oslo_utils import importutils
+import os
OPTS = [
cfg.StrOpt('type',
- default='sample',
- choices=['sample'],
+ default=os.environ.get('APP_MANAGER_TYPE', 'sample'),
+ choices=['sample', 'vnfm'],
help='the component of doctor app manager',
required=True),
cfg.StrOpt('ip',
@@ -28,7 +29,8 @@ OPTS = [
_app_manager_name_class_mapping = {
- 'sample': 'doctor_tests.app_manager.sample.SampleAppManager'
+ 'sample': 'doctor_tests.app_manager.sample.SampleAppManager',
+ 'vnfm': 'doctor_tests.app_manager.vnfm.VNFM',
}
diff --git a/doctor_tests/app_manager/sample.py b/doctor_tests/app_manager/sample.py
index 94049aa2..7ca35b97 100644
--- a/doctor_tests/app_manager/sample.py
+++ b/doctor_tests/app_manager/sample.py
@@ -165,7 +165,7 @@ class AppManager(Thread):
data = json.loads(request.data.decode('utf8'))
try:
payload = self._alarm_traits_decoder(data)
- except:
+ except Exception:
payload = ({t[0]: t[2] for t in
data['reason_data']['event']['traits']})
self.log.error('cannot parse alarm data: %s' % payload)
diff --git a/doctor_tests/app_manager/vnfm.py b/doctor_tests/app_manager/vnfm.py
new file mode 100644
index 00000000..68fdbb88
--- /dev/null
+++ b/doctor_tests/app_manager/vnfm.py
@@ -0,0 +1,441 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from flask import Flask
+from flask import request
+import json
+import requests
+from threading import Thread
+import time
+import uuid
+import yaml
+
+from doctor_tests.app_manager.base import BaseAppManager
+from doctor_tests.identity_auth import get_identity_auth
+from doctor_tests.identity_auth import get_session
+from doctor_tests.os_clients import neutron_client
+from doctor_tests.os_clients import nova_client
+from doctor_tests.os_clients import keystone_client
+
+
+class VNFM(BaseAppManager):
+
+ def __init__(self, stack, conf, log):
+ super(VNFM, self).__init__(conf, log)
+ self.stack = stack
+ self.app = None
+
+ def start(self):
+ self.log.info('VNFM start......')
+ self.app = VNFManager(self.stack, self.conf, self, self.log)
+ self.app.start()
+
+ def stop(self):
+ self.log.info('VNFM stop......')
+ if not self.app:
+ return
+ self.app.delete_constraints()
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ }
+ url = 'http://%s:%d/shutdown'\
+ % (self.conf.app_manager.ip,
+ self.conf.app_manager.port)
+ requests.post(url, data='', headers=headers)
+
+
+class VNFManager(Thread):
+
+ def __init__(self, stack, conf, app_manager, log):
+ Thread.__init__(self)
+ self.stack = stack
+ self.conf = conf
+ self.port = self.conf.app_manager.port
+ self.app_manager = app_manager
+ self.log = log
+ self.intance_ids = None
+ self.auth = get_identity_auth(project=self.conf.doctor_project)
+ self.session = get_session(auth=self.auth)
+ self.keystone = keystone_client(
+ self.conf.keystone_version, self.session)
+ self.nova = nova_client(self.conf.nova_version,
+ self.session)
+ self.neutron = neutron_client(session=self.session)
+ self.headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json'}
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ self.orig_number_of_instances = self.number_of_instances()
+ # List of instances
+ self.ha_instances = []
+ self.nonha_instances = []
+ # Different instance_id specific constraints {instanse_id: {},...}
+ self.instance_constraints = None
+ # Update existing instances to instance lists
+ self.update_instances()
+ nonha_instances = len(self.nonha_instances)
+ if nonha_instances < 7:
+ self.scale = 2
+ self.max_impacted = 2
+ else:
+ self.scale = int((nonha_instances) / 2)
+ self.max_impacted = self.scale - 1
+ self.log.info('Init nonha_instances: %s scale: %s: max_impacted %s' %
+ (nonha_instances, self.scale, self.max_impacted))
+ # Different instance groups constraints dict
+ self.ha_group = None
+ self.nonha_group = None
+ # Floating IP used in HA instance
+ self.floating_ip = None
+ # VNF project_id
+ self.project_id = None
+ # HA instance_id that is active / has floating IP
+ self.active_instance_id = self.active_instance_id()
+
+ services = self.keystone.services.list()
+ for service in services:
+ if service.type == 'maintenance':
+ self.log.info('maintenance service: %s:%s type %s'
+ % (service.name, service.id, service.type))
+ maint_id = service.id
+ self.maint_endpoint = [ep.url for ep in self.keystone.endpoints.list()
+ if ep.service_id == maint_id and
+ ep.interface == 'public'][0]
+ self.log.info('maintenance endpoint: %s' % self.maint_endpoint)
+ self.update_constraints_lock = False
+ self.update_constraints()
+
+ def delete_remote_instance_constraints(self, instance_id):
+ url = "%s/instance/%s" % (self.maint_endpoint, instance_id)
+ self.log.info('DELETE: %s' % url)
+ ret = requests.delete(url, data=None, headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def update_remote_instance_constraints(self, instance):
+ url = "%s/instance/%s" % (self.maint_endpoint, instance["instance_id"])
+ self.log.info('PUT: %s' % url)
+ ret = requests.put(url, data=json.dumps(instance),
+ headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def delete_remote_group_constraints(self, instance_group):
+ url = "%s/instance_group/%s" % (self.maint_endpoint,
+ instance_group["group_id"])
+ self.log.info('DELETE: %s' % url)
+ ret = requests.delete(url, data=None, headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def update_remote_group_constraints(self, instance_group):
+ url = "%s/instance_group/%s" % (self.maint_endpoint,
+ instance_group["group_id"])
+ self.log.info('PUT: %s' % url)
+ ret = requests.put(url, data=json.dumps(instance_group),
+ headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def delete_constraints(self):
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ for instance_id in self.instance_constraints:
+ self.delete_remote_instance_constraints(instance_id)
+ self.delete_remote_group_constraints(self.nonha_group)
+ self.delete_remote_group_constraints(self.ha_group)
+
+ def update_constraints(self):
+ while self.update_constraints_lock:
+ self.log.info('Waiting update_constraints_lock...')
+ time.sleep(1)
+ self.update_constraints_lock = True
+ self.log.info('Update constraints')
+ if self.project_id is None:
+ self.project_id = self.keystone.projects.list(
+ name=self.conf.doctor_project)[0].id
+ if self.nonha_group is None:
+ # Nova does not support groupping instances that do not belong to
+ # anti-affinity server_groups. Anyhow all instances need groupping
+ self.nonha_group = {
+ "group_id": str(uuid.uuid4()),
+ "project_id": self.project_id,
+ "group_name": "doctor_nonha_app_group",
+ "anti_affinity_group": False,
+ "max_instances_per_host": 0,
+ "max_impacted_members": self.max_impacted,
+ "recovery_time": 2,
+ "resource_mitigation": True}
+ self.log.info('create doctor_nonha_app_group constraints: %s'
+ % self.nonha_group)
+ self.update_remote_group_constraints(self.nonha_group)
+ if self.ha_group is None:
+ group_id = [sg.id for sg in self.nova.server_groups.list()
+ if sg.name == "doctor_ha_app_group"][0]
+ self.ha_group = {
+ "group_id": group_id,
+ "project_id": self.project_id,
+ "group_name": "doctor_ha_app_group",
+ "anti_affinity_group": True,
+ "max_instances_per_host": 1,
+ "max_impacted_members": 1,
+ "recovery_time": 4,
+ "resource_mitigation": True}
+ self.log.info('create doctor_ha_app_group constraints: %s'
+ % self.ha_group)
+ self.update_remote_group_constraints(self.ha_group)
+ instance_constraints = {}
+ for ha_instance in self.ha_instances:
+ instance = {
+ "instance_id": ha_instance.id,
+ "project_id": self.project_id,
+ "group_id": self.ha_group["group_id"],
+ "instance_name": ha_instance.name,
+ "max_interruption_time": 120,
+ "migration_type": "MIGRATE",
+ "resource_mitigation": True,
+ "lead_time": 40}
+ self.log.info('create ha instance constraints: %s'
+ % instance)
+ instance_constraints[ha_instance.id] = instance
+ for nonha_instance in self.nonha_instances:
+ instance = {
+ "instance_id": nonha_instance.id,
+ "project_id": self.project_id,
+ "group_id": self.nonha_group["group_id"],
+ "instance_name": nonha_instance.name,
+ "max_interruption_time": 120,
+ "migration_type": "MIGRATE",
+ "resource_mitigation": True,
+ "lead_time": 40}
+ self.log.info('create nonha instance constraints: %s'
+ % instance)
+ instance_constraints[nonha_instance.id] = instance
+ if not self.instance_constraints:
+ # Initial instance constraints
+ self.log.info('create initial instances constraints...')
+ for instance in [instance_constraints[i] for i
+ in instance_constraints]:
+ self.update_remote_instance_constraints(instance)
+ self.instance_constraints = instance_constraints.copy()
+ else:
+ self.log.info('check instances constraints changes...')
+ added = [i for i in instance_constraints.keys()
+ if i not in self.instance_constraints]
+ deleted = [i for i in self.instance_constraints.keys()
+ if i not in instance_constraints]
+ modified = [i for i in instance_constraints.keys()
+ if (i not in added and i not in deleted and
+ instance_constraints[i] !=
+ self.instance_constraints[i])]
+ for instance_id in deleted:
+ self.delete_remote_instance_constraints(instance_id)
+ updated = added + modified
+ for instance in [instance_constraints[i] for i in updated]:
+ self.update_remote_instance_constraints(instance)
+ if updated or deleted:
+ # Some instance constraints have changed
+ self.instance_constraints = instance_constraints.copy()
+ self.update_constraints_lock = False
+
+ def active_instance_id(self):
+ # Need rertry as it takes time after heat template done before
+ # Floating IP in place
+ retry = 5
+ while retry > 0:
+ for instance in self.ha_instances:
+ network_interfaces = next(iter(instance.addresses.values()))
+ for network_interface in network_interfaces:
+ _type = network_interface.get('OS-EXT-IPS:type')
+ if _type == "floating":
+ if not self.floating_ip:
+ self.floating_ip = network_interface.get('addr')
+ self.log.debug('active_instance: %s %s' %
+ (instance.name, instance.id))
+ return instance.id
+ time.sleep(2)
+ self.update_instances()
+ retry -= 1
+ raise Exception("No active instance found")
+
+ def switch_over_ha_instance(self):
+ for instance in self.ha_instances:
+ if instance.id != self.active_instance_id:
+ self.log.info('Switch over to: %s %s' % (instance.name,
+ instance.id))
+ # Deprecated, need to use neutron instead
+ # instance.add_floating_ip(self.floating_ip)
+ port = self.neutron.list_ports(device_id=instance.id)['ports'][0]['id'] # noqa
+ floating_id = self.neutron.list_floatingips(floating_ip_address=self.floating_ip)['floatingips'][0]['id'] # noqa
+ self.neutron.update_floatingip(floating_id, {'floatingip': {'port_id': port}}) # noqa
+ # Have to update ha_instances as floating_ip changed
+ self.update_instances()
+ self.active_instance_id = instance.id
+ break
+
+ def get_instance_ids(self):
+ ret = list()
+ for instance in self.nova.servers.list(detailed=False):
+ ret.append(instance.id)
+ return ret
+
+ def update_instances(self):
+ instances = self.nova.servers.list(detailed=True)
+ self.ha_instances = [i for i in instances
+ if "doctor_ha_app_" in i.name]
+ self.nonha_instances = [i for i in instances
+ if "doctor_nonha_app_" in i.name]
+
+ def _alarm_data_decoder(self, data):
+ if "[" in data or "{" in data:
+ # string to list or dict removing unicode
+ data = yaml.load(data.replace("u'", "'"))
+ return data
+
+ def _alarm_traits_decoder(self, data):
+ return ({str(t[0]): self._alarm_data_decoder(str(t[2]))
+ for t in data['reason_data']['event']['traits']})
+
+ def get_session_instance_ids(self, url, session_id):
+ ret = requests.get(url, data=None, headers=self.headers)
+ if ret.status_code != 200:
+ raise Exception(ret.text)
+ self.log.info('get_instance_ids %s' % ret.json())
+ return ret.json()['instance_ids']
+
+ def scale_instances(self, number_of_instances):
+ number_of_instances_before = self.number_of_instances()
+
+ parameters = self.stack.parameters
+ parameters['nonha_intances'] += number_of_instances
+ self.stack.update(self.stack.stack_name,
+ self.stack.stack_id,
+ self.stack.template,
+ parameters=parameters,
+ files=self.stack.files)
+
+ number_of_instances_after = self.number_of_instances()
+ if (number_of_instances_before + number_of_instances !=
+ number_of_instances_after):
+ self.log.error('scale_instances with: %d from: %d ends up to: %d'
+ % (number_of_instances, number_of_instances_before,
+ number_of_instances_after))
+ raise Exception('scale_instances failed')
+
+ self.log.info('scaled instances from %d to %d' %
+ (number_of_instances_before,
+ number_of_instances_after))
+
+ def number_of_instances(self):
+ return len(self.nova.servers.list(detailed=False))
+
+ def run(self):
+ app = Flask('VNFM')
+
+ @app.route('/maintenance', methods=['POST'])
+ def maintenance_alarm():
+ data = json.loads(request.data.decode('utf8'))
+ try:
+ payload = self._alarm_traits_decoder(data)
+ except Exception:
+ payload = ({t[0]: t[2] for t in
+ data['reason_data']['event']['traits']})
+ self.log.error('cannot parse alarm data: %s' % payload)
+ raise Exception('VNFM cannot parse alarm.'
+ 'Possibly trait data over 256 char')
+
+ self.log.info('VNFM received data = %s' % payload)
+
+ state = payload['state']
+ reply_state = None
+ reply = dict()
+
+ self.log.info('VNFM state: %s' % state)
+
+ if state == 'MAINTENANCE':
+ instance_ids = (self.get_session_instance_ids(
+ payload['instance_ids'],
+ payload['session_id']))
+ my_instance_ids = self.get_instance_ids()
+ invalid_instances = (
+ [instance_id for instance_id in instance_ids
+ if instance_id not in my_instance_ids])
+ if invalid_instances:
+ self.log.error('Invalid instances: %s' % invalid_instances)
+ reply_state = 'NACK_MAINTENANCE'
+ else:
+ reply_state = 'ACK_MAINTENANCE'
+
+ elif state == 'SCALE_IN':
+ # scale down "self.scale" instances that is VCPUS equaling
+ # at least a single compute node
+ self.scale_instances(-self.scale)
+ reply_state = 'ACK_SCALE_IN'
+
+ elif state == 'MAINTENANCE_COMPLETE':
+ # possibly need to upscale
+ number_of_instances = self.number_of_instances()
+ if self.orig_number_of_instances > number_of_instances:
+ scale_instances = (self.orig_number_of_instances -
+ number_of_instances)
+ self.scale_instances(scale_instances)
+ reply_state = 'ACK_MAINTENANCE_COMPLETE'
+
+ elif state == 'PREPARE_MAINTENANCE':
+ # TBD from contraints
+ if "MIGRATE" not in payload['allowed_actions']:
+ raise Exception('MIGRATE not supported')
+ instance_ids = payload['instance_ids'][0]
+ self.log.info('VNFM got instance: %s' % instance_ids)
+ if instance_ids == self.active_instance_id:
+ self.switch_over_ha_instance()
+ # optional also in contraints
+ reply['instance_action'] = "MIGRATE"
+ reply_state = 'ACK_PREPARE_MAINTENANCE'
+
+ elif state == 'PLANNED_MAINTENANCE':
+ # TBD from contraints
+ if "MIGRATE" not in payload['allowed_actions']:
+ raise Exception('MIGRATE not supported')
+ instance_ids = payload['instance_ids'][0]
+ self.log.info('VNFM got instance: %s' % instance_ids)
+ if instance_ids == self.active_instance_id:
+ self.switch_over_ha_instance()
+ # optional also in contraints
+ reply['instance_action'] = "MIGRATE"
+ reply_state = 'ACK_PLANNED_MAINTENANCE'
+
+ elif state == 'INSTANCE_ACTION_DONE':
+ # TBD was action done in allowed window
+ self.log.info('%s' % payload['instance_ids'])
+ else:
+ raise Exception('VNFM received event with'
+ ' unknown state %s' % state)
+
+ if reply_state:
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ reply['state'] = reply_state
+ url = payload['reply_url']
+ self.log.info('VNFM reply: %s' % reply)
+ requests.put(url, data=json.dumps(reply), headers=self.headers)
+
+ return 'OK'
+
+ @app.route('/shutdown', methods=['POST'])
+ def shutdown():
+ self.log.info('shutdown VNFM server at %s' % time.time())
+ func = request.environ.get('werkzeug.server.shutdown')
+ if func is None:
+ raise RuntimeError('Not running with the Werkzeug Server')
+ func()
+ return 'VNFM shutting down...'
+
+ app.run(host="0.0.0.0", port=self.port)
diff --git a/doctor_tests/common/utils.py b/doctor_tests/common/utils.py
index 1a8840dd..67ca4f4b 100644
--- a/doctor_tests/common/utils.py
+++ b/doctor_tests/common/utils.py
@@ -10,6 +10,7 @@ import json
import os
import paramiko
import re
+import subprocess
def load_json_file(full_path):
@@ -97,6 +98,27 @@ class SSHClient(object):
ftp.close()
+class LocalSSH(object):
+
+ def __init__(self, log):
+ self.log = log
+ self.log.info('Init local ssh client')
+
+ def ssh(self, cmd):
+ ret = 0
+ output = "%s failed!!!" % cmd
+ try:
+ output = subprocess.check_output((cmd), shell=True,
+ universal_newlines=True)
+ except subprocess.CalledProcessError:
+ ret = 1
+ return ret, output
+
+ def scp(self, src_file, dst_file):
+ return subprocess.check_output("cp %s %s" % (src_file, dst_file),
+ shell=True)
+
+
def run_async(func):
from threading import Thread
from functools import wraps
diff --git a/doctor_tests/image.py b/doctor_tests/image.py
index 9961b22d..50841ef6 100644
--- a/doctor_tests/image.py
+++ b/doctor_tests/image.py
@@ -7,7 +7,11 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
import os
-import urllib.request
+try:
+ from urllib.request import urlopen
+except Exception:
+ from urllib2 import urlopen
+
from oslo_config import cfg
@@ -46,11 +50,14 @@ class Image(object):
def create(self):
self.log.info('image create start......')
-
images = {image.name: image for image in self.glance.images.list()}
+ if self.conf.image_name == 'cirros':
+ cirros = [image for image in images if 'cirros' in image]
+ if cirros:
+ self.conf.image_name = cirros[0]
if self.conf.image_name not in images:
if not os.path.exists(self.conf.image_filename):
- resp = urllib.request.urlopen(self.conf.image_download_url)
+ resp = urlopen(self.conf.image_download_url)
with open(self.conf.image_filename, "wb") as file:
file.write(resp.read())
self.image = \
diff --git a/doctor_tests/inspector/sample.py b/doctor_tests/inspector/sample.py
index 70156b20..c44db95d 100644
--- a/doctor_tests/inspector/sample.py
+++ b/doctor_tests/inspector/sample.py
@@ -52,7 +52,7 @@ class SampleInspector(BaseInspector):
driver='messaging',
topics=['notifications'])
self.notif = self.notif.prepare(publisher_id='sample')
- except:
+ except Exception:
self.notif = None
def _init_novaclients(self):
@@ -135,7 +135,7 @@ class SampleInspector(BaseInspector):
def maintenance(self, data):
try:
payload = self._alarm_traits_decoder(data)
- except:
+ except Exception:
payload = ({t[0]: t[2] for t in
data['reason_data']['event']['traits']})
self.log.error('cannot parse alarm data: %s' % payload)
diff --git a/doctor_tests/installer/__init__.py b/doctor_tests/installer/__init__.py
index 2b9ad83d..00a01667 100644
--- a/doctor_tests/installer/__init__.py
+++ b/doctor_tests/installer/__init__.py
@@ -13,8 +13,8 @@ from oslo_utils import importutils
OPTS = [
cfg.StrOpt('type',
- default=os.environ.get('INSTALLER_TYPE', 'local'),
- choices=['local', 'apex', 'daisy', 'fuel'],
+ default=os.environ.get('INSTALLER_TYPE', 'devstack'),
+ choices=['apex', 'daisy', 'fuel', 'devstack'],
help='the type of installer',
required=True),
cfg.StrOpt('ip',
@@ -28,10 +28,10 @@ OPTS = [
_installer_name_class_mapping = {
- 'local': 'doctor_tests.installer.local.LocalInstaller',
'apex': 'doctor_tests.installer.apex.ApexInstaller',
'daisy': 'doctor_tests.installer.daisy.DaisyInstaller',
- 'fuel': 'doctor_tests.installer.mcp.McpInstaller'
+ 'fuel': 'doctor_tests.installer.mcp.McpInstaller',
+ 'devstack': 'doctor_tests.installer.devstack.DevstackInstaller'
}
diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py
index b2270654..de4d2f2e 100644
--- a/doctor_tests/installer/base.py
+++ b/doctor_tests/installer/base.py
@@ -11,7 +11,6 @@ import getpass
import grp
import os
import pwd
-import re
import six
import stat
import subprocess
@@ -127,47 +126,9 @@ class BaseInstaller(object):
os.chmod(ssh_key, stat.S_IREAD)
return ssh_key
+ @abc.abstractmethod
def get_transport_url(self):
- client = utils.SSHClient(self.controllers[0], self.node_user_name,
- key_filename=self.key_file)
- if self.use_containers:
- ncbase = "/var/lib/config-data/puppet-generated/nova"
- else:
- ncbase = ""
- try:
- cmd = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase
- ret, url = client.ssh(cmd)
- if ret:
- raise Exception('Exec command to get transport from '
- 'controller(%s) failed, '
- 'ret=%s, output=%s'
- % (self.controllers[0], ret, url))
- elif self.controllers[0] not in url:
- # need to use ip instead of hostname
- ret = (re.sub("@.*:", "@%s:" % self.controllers[0],
- url[0].split("=", 1)[1]))
- except:
- cmd = 'grep -i "^rabbit" %s/etc/nova/nova.conf' % ncbase
- ret, lines = client.ssh(cmd)
- if ret:
- raise Exception('Exec command to get transport from '
- 'controller(%s) in Apex installer failed, '
- 'ret=%s, output=%s'
- % (self.controllers[0], ret, url))
- else:
- for line in lines.split('\n'):
- if line.startswith("rabbit_userid"):
- rabbit_userid = line.split("=")
- if line.startswith("rabbit_port"):
- rabbit_port = line.split("=")
- if line.startswith("rabbit_password"):
- rabbit_password = line.split("=")
- ret = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid,
- rabbit_password,
- self.controllers[0],
- rabbit_port)
- self.log.debug('get_transport_url %s' % ret)
- return ret
+ pass
def _run_cmd_remote(self, client, command):
self.log.info('Run command=%s in %s installer......'
@@ -199,14 +160,15 @@ class BaseInstaller(object):
def _run_apply_patches(self, client, restart_cmd, script_names,
python='python3'):
installer_dir = os.path.dirname(os.path.realpath(__file__))
-
if isinstance(script_names, list):
for script_name in script_names:
script_abs_path = '{0}/{1}/{2}'.format(installer_dir,
'common', script_name)
+ if self.conf.installer.type == "devstack":
+ script_name = "/opt/stack/%s" % script_name
try:
client.scp(script_abs_path, script_name)
- except:
+ except Exception:
client.scp(script_abs_path, script_name)
try:
if ".py" in script_name:
@@ -216,14 +178,14 @@ class BaseInstaller(object):
script_name)
ret, output = client.ssh(cmd)
self.log.info('Command %s output %s' % (cmd, output))
- except:
+ except Exception:
ret, output = client.ssh(cmd)
-
+ self.log.info('Command %s output %s' % (cmd, output))
if ret:
raise Exception('Do the command in remote'
' node failed, ret=%s, cmd=%s, output=%s'
% (ret, cmd, output))
- if 'nova' in restart_cmd:
+ if 'nova' in restart_cmd or 'devstack@n-' in restart_cmd:
# Make sure scheduler has proper cpu_allocation_ratio
time.sleep(5)
client.ssh(restart_cmd)
diff --git a/doctor_tests/installer/common/set_compute_config.py b/doctor_tests/installer/common/set_compute_config.py
index 76ac649b..615f1895 100644
--- a/doctor_tests/installer/common/set_compute_config.py
+++ b/doctor_tests/installer/common/set_compute_config.py
@@ -26,9 +26,9 @@ def set_cpu_allocation_ratio():
found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio"
in ca])
fcheck.close()
+ change = False
+ found = False
if found_list and len(found_list):
- change = False
- found = False
for car in found_list:
if car.startswith('#'):
continue
diff --git a/doctor_tests/installer/common/set_fenix.sh b/doctor_tests/installer/common/set_fenix.sh
index aac376cd..bd1eae47 100644
--- a/doctor_tests/installer/common/set_fenix.sh
+++ b/doctor_tests/installer/common/set_fenix.sh
@@ -22,14 +22,15 @@ apt-get install -y docker-ce docker-ce-cli containerd.io
dpkg -r --force-depends golang-docker-credential-helpers
}
-docker ps | grep fenix >/dev/null && {
-REMOTE=`docker exec -ti fenix git rev-parse origin/master`
-LOCAL=`docker exec -ti fenix git rev-parse @`
-if [ $LOCAL = $REMOTE ]; then
- echo "Fenix start: Already running latest"
+docker ps | grep fenix -q && {
+REMOTE=`git ls-remote https://opendev.org/x/fenix HEAD | awk '{ print $1}'`
+LOCAL=`docker exec -t fenix git rev-parse @`
+if [[ "$LOCAL" =~ "$REMOTE" ]]; then
+ # Difference in above string ending marks, so cannot compare equal
+ echo "Fenix start: Already running latest $LOCAL equals $REMOTE"
exit 0
else
- echo "Fenix container needs to be recreated..."
+ echo "Fenix container needs to be recreated $LOCAL not $REMOTE"
# Remove previous container
for img in `docker image list | grep "^fenix" | awk '{print $1}'`; do
for dock in `docker ps --all -f "ancestor=$img" | grep "$img" | awk '{print $1}'`; do
@@ -75,6 +76,11 @@ echo "password = $OS_PASSWORD" >> fenix-api.conf
echo "username = $OS_USERNAME" >> fenix-api.conf
echo "cafile = /opt/stack/data/ca-bundle.pem" >> fenix-api.conf
+openstack service list | grep -q maintenance || {
+openstack service create --name fenix --enable maintenance
+openstack endpoint create --region $OS_REGION_NAME --enable fenix public http://localhost:12347/v1
+}
+
# Mysql pw
# MYSQLPW=`cat /var/lib/config-data/mysql/etc/puppet/hieradata/service_configs.json | grep mysql | grep root_password | awk -F": " '{print $2}' | awk -F"\"" '{print $2}'`
MYSQLPW=root
diff --git a/doctor_tests/installer/devstack.py b/doctor_tests/installer/devstack.py
new file mode 100644
index 00000000..02f3601a
--- /dev/null
+++ b/doctor_tests/installer/devstack.py
@@ -0,0 +1,151 @@
+##############################################################################
+# Copyright (c) 2019 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import socket
+import time
+
+from doctor_tests.common.utils import SSHClient
+from doctor_tests.common.utils import LocalSSH
+from doctor_tests.identity_auth import get_session
+from doctor_tests.installer.base import BaseInstaller
+from doctor_tests.os_clients import nova_client
+
+
+class DevstackInstaller(BaseInstaller):
+ node_user_name = None
+ cm_set_script = 'set_config.py'
+ nc_set_compute_script = 'set_compute_config.py'
+ cm_restore_script = 'restore_config.py'
+ nc_restore_compute_script = 'restore_compute_config.py'
+ ac_restart_script = 'restart_aodh.py'
+ ac_restore_script = 'restore_aodh.py'
+ python = 'python'
+
+ def __init__(self, conf, log):
+ super(DevstackInstaller, self).__init__(conf, log)
+ # Run Doctor under users home. sudo hides other env param to be used
+ home, self.node_user_name = (iter(os.environ.get('VIRTUAL_ENV')
+ .split('/', 3)[1:3]))
+ # Migration needs to work so ssh should have proper key defined
+ self.key_file = '/%s/%s/.ssh/id_rsa' % (home, self.node_user_name)
+ self.log.info('ssh uses: %s and %s' % (self.node_user_name,
+ self.key_file))
+ self.controllers = ([ip for ip in
+ socket.gethostbyname_ex(socket.gethostname())[2]
+ if not ip.startswith('127.')] or
+ [[(s.connect(('8.8.8.8', 53)),
+ s.getsockname()[0], s.close())
+ for s in [socket.socket(socket.AF_INET,
+ socket.SOCK_DGRAM)]][0][1]])
+ conf.admin_tool.ip = self.controllers[0]
+ self.computes = list()
+ self.nova = nova_client(conf.nova_version, get_session())
+
+ def setup(self):
+ self.log.info('Setup Devstack installer start......')
+ self._get_devstack_conf()
+ self.create_flavor()
+ self.set_apply_patches()
+
+ def cleanup(self):
+ self.restore_apply_patches()
+
+ def get_ssh_key_from_installer(self):
+ return self.key_file
+
+ def get_transport_url(self):
+ client = LocalSSH(self.log)
+ cmd = 'sudo grep -m1 "^transport_url" /etc/nova/nova.conf'
+ ret, url = client.ssh(cmd)
+ url = url.split("= ", 1)[1][:-1]
+ self.log.info('get_transport_url %s' % url)
+ return url
+
+ def get_host_ip_from_hostname(self, hostname):
+ return [hvisor.__getattr__('host_ip') for hvisor in self.hvisors
+ if hvisor.__getattr__('hypervisor_hostname') == hostname][0]
+
+ def _get_devstack_conf(self):
+ self.log.info('Get devstack config details for Devstack installer'
+ '......')
+ self.hvisors = self.nova.hypervisors.list(detailed=True)
+ self.log.info('checking hypervisors.......')
+ self.computes = [hvisor.__getattr__('host_ip') for hvisor in
+ self.hvisors]
+ self.use_containers = False
+ self.log.info('controller_ips:%s' % self.controllers)
+ self.log.info('compute_ips:%s' % self.computes)
+ self.log.info('use_containers:%s' % self.use_containers)
+
+ def _set_docker_restart_cmd(self, service):
+ # There can be multiple instances running so need to restart all
+ cmd = "for container in `sudo docker ps | grep "
+ cmd += service
+ cmd += " | awk '{print $1}'`; do sudo docker restart $container; \
+ done;"
+ return cmd
+
+ def set_apply_patches(self):
+ self.log.info('Set apply patches start......')
+
+ set_scripts = [self.cm_set_script]
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@ceilometer-anotification.service'
+
+ client = LocalSSH(self.log)
+ self._run_apply_patches(client,
+ restart_cmd,
+ set_scripts,
+ python=self.python)
+ time.sleep(7)
+
+ self.log.info('Set apply patches start......')
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@n-cpu.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(client,
+ restart_cmd,
+ [self.nc_set_compute_script],
+ python=self.python)
+ time.sleep(7)
+
+ def restore_apply_patches(self):
+ self.log.info('restore apply patches start......')
+
+ restore_scripts = [self.cm_restore_script]
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@ceilometer-anotification.service'
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd += ' devstack@n-sch.service'
+ restore_scripts.append(self.nc_restore_compute_script)
+
+ client = LocalSSH(self.log)
+ self._run_apply_patches(client,
+ restart_cmd,
+ restore_scripts,
+ python=self.python)
+
+ if self.conf.test_case != 'fault_management':
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@n-cpu.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(
+ client, restart_cmd,
+ [self.nc_restore_compute_script],
+ python=self.python)
diff --git a/doctor_tests/installer/local.py b/doctor_tests/installer/local.py
deleted file mode 100644
index fee14f33..00000000
--- a/doctor_tests/installer/local.py
+++ /dev/null
@@ -1,118 +0,0 @@
-##############################################################################
-# Copyright (c) 2017 ZTE Corporation and others.
-#
-# All rights reserved. This program and the accompanying materials
-# are made available under the terms of the Apache License, Version 2.0
-# which accompanies this distribution, and is available at
-# http://www.apache.org/licenses/LICENSE-2.0
-##############################################################################
-import os
-import shutil
-import subprocess
-
-from doctor_tests.installer.base import BaseInstaller
-from doctor_tests.installer.common.vitrage import \
- set_vitrage_host_down_template
-from doctor_tests.common.constants import Inspector
-from doctor_tests.common.utils import load_json_file
-from doctor_tests.common.utils import write_json_file
-
-
-class LocalInstaller(BaseInstaller):
- node_user_name = 'root'
-
- nova_policy_file = '/etc/nova/policy.json'
- nova_policy_file_backup = '%s%s' % (nova_policy_file, '.bak')
-
- def __init__(self, conf, log):
- super(LocalInstaller, self).__init__(conf, log)
- self.policy_modified = False
- self.add_policy_file = False
-
- def setup(self):
- self.get_ssh_key_from_installer()
- self.set_apply_patches()
-
- def cleanup(self):
- self.restore_apply_patches()
-
- def get_ssh_key_from_installer(self):
- self.log.info('Assuming SSH keys already exchanged with computer'
- 'for local installer type')
- return None
-
- def get_host_ip_from_hostname(self, hostname):
- self.log.info('Get host ip from host name in local installer......')
-
- cmd = "getent hosts %s | awk '{ print $1 }'" % (hostname)
- server = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
- stdout, stderr = server.communicate()
- host_ip = stdout.strip().decode("utf-8")
-
- self.log.info('Get host_ip:%s from host_name:%s in local installer'
- % (host_ip, hostname))
- return host_ip
-
- def set_apply_patches(self):
- self._set_nova_policy()
- if self.conf.inspector.type == Inspector.VITRAGE:
- set_vitrage_host_down_template()
- os.system('sudo systemctl restart devstack@vitrage-graph.service')
-
- def restore_apply_patches(self):
- self._restore_nova_policy()
-
- def _set_nova_policy(self):
- host_status_policy = 'os_compute_api:servers:show:host_status'
- host_status_rule = 'rule:admin_or_owner'
- policy_data = {
- 'context_is_admin': 'role:admin',
- 'owner': 'user_id:%(user_id)s',
- 'admin_or_owner': 'rule:context_is_admin or rule:owner',
- host_status_policy: host_status_rule
- }
-
- if os.path.isfile(self.nova_policy_file):
- data = load_json_file(self.nova_policy_file)
- if host_status_policy in data:
- rule_origion = data[host_status_policy]
- if host_status_rule == rule_origion:
- self.log.info('Do not need to modify nova policy.')
- self.policy_modified = False
- else:
- # update the host_status_policy
- data[host_status_policy] = host_status_rule
- self.policy_modified = True
- else:
- # add the host_status_policy, if the admin_or_owner is not
- # defined, add it also
- for policy, rule in policy_data.items():
- if policy not in data:
- data[policy] = rule
- self.policy_modified = True
- if self.policy_modified:
- self.log.info('Nova policy is Modified.')
- shutil.copyfile(self.nova_policy_file,
- self.nova_policy_file_backup)
- else:
- # file does not exit, create a new one and add the policy
- self.log.info('Nova policy file not exist. Creating a new one')
- data = policy_data
- self.add_policy_file = True
-
- if self.policy_modified or self.add_policy_file:
- write_json_file(self.nova_policy_file, data)
- os.system('sudo systemctl restart devstack@n-api.service')
-
- def _restore_nova_policy(self):
- if self.policy_modified:
- shutil.copyfile(self.nova_policy_file_backup,
- self.nova_policy_file)
- os.remove(self.nova_policy_file_backup)
- elif self.add_policy_file:
- os.remove(self.nova_policy_file)
-
- if self.add_policy_file or self.policy_modified:
- os.system('sudo systemctl restart devstack@n-api.service')
- self.add_policy_file = False
- self.policy_modified = False
diff --git a/doctor_tests/installer/mcp.py b/doctor_tests/installer/mcp.py
index 65c8ed70..7659c9e2 100644
--- a/doctor_tests/installer/mcp.py
+++ b/doctor_tests/installer/mcp.py
@@ -7,6 +7,7 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
from os.path import isfile
+import re
import time
from doctor_tests.common.constants import is_fenix
@@ -60,6 +61,45 @@ class McpInstaller(BaseInstaller):
mcp_key = '/var/lib/opnfv/mcp.rsa'
return mcp_key if isfile(mcp_key) else ssh_key
+ def get_transport_url(self):
+ client = SSHClient(self.controllers[0], self.node_user_name,
+ key_filename=self.key_file)
+ try:
+ cmd = 'sudo grep -m1 "^transport_url" /etc/nova/nova.conf'
+ ret, url = client.ssh(cmd)
+
+ if ret:
+ raise Exception('Exec command to get transport from '
+ 'controller(%s) in MCP installer failed, '
+ 'ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ elif self.controllers[0] not in url:
+ # need to use ip instead of hostname
+ url = (re.sub("@.*:", "@%s:" % self.controllers[0],
+ url[0].split("=", 1)[1]))
+ except Exception:
+ cmd = 'grep -i "^rabbit" /etc/nova/nova.conf'
+ ret, lines = client.ssh(cmd)
+ if ret:
+ raise Exception('Exec command to get transport from '
+ 'controller(%s) in MCP installer failed, '
+ 'ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ else:
+ for line in lines.split('\n'):
+ if line.startswith("rabbit_userid"):
+ rabbit_userid = line.split("=")
+ if line.startswith("rabbit_port"):
+ rabbit_port = line.split("=")
+ if line.startswith("rabbit_password"):
+ rabbit_password = line.split("=")
+ url = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid,
+ rabbit_password,
+ self.controllers[0],
+ rabbit_port)
+ self.log.info('get_transport_url %s' % url)
+ return url
+
def _copy_overcloudrc_to_controllers(self):
for ip in self.controllers:
cmd = "scp overcloudrc %s@%s:" % (self.node_user_name, ip)
@@ -89,8 +129,8 @@ class McpInstaller(BaseInstaller):
def set_apply_patches(self):
self.log.info('Set apply patches start......')
fenix_files = None
-
set_scripts = [self.cm_set_script]
+ thrs = []
restart_cmd = 'sudo systemctl restart' \
' ceilometer-agent-notification.service'
@@ -112,10 +152,10 @@ class McpInstaller(BaseInstaller):
'admin_tool/fenix',
fenix_file)
client.scp(src_file, fenix_file)
- self._run_apply_patches(client,
- restart_cmd,
- set_scripts,
- python=self.python)
+ thrs.append(self._run_apply_patches(client,
+ restart_cmd,
+ set_scripts,
+ python=self.python))
time.sleep(5)
self.log.info('Set apply patches start......')
@@ -125,11 +165,15 @@ class McpInstaller(BaseInstaller):
for node_ip in self.computes:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
- self._run_apply_patches(client,
- restart_cmd,
- [self.nc_set_compute_script],
- python=self.python)
+ thrs.append(self._run_apply_patches(
+ client,
+ restart_cmd,
+ [self.nc_set_compute_script],
+ python=self.python))
time.sleep(5)
+ # If Fenix container ir build, it needs to be ready before continue
+ for thr in thrs:
+ thr.join()
def restore_apply_patches(self):
self.log.info('restore apply patches start......')
diff --git a/doctor_tests/main.py b/doctor_tests/main.py
index cdb4af55..7573faec 100644
--- a/doctor_tests/main.py
+++ b/doctor_tests/main.py
@@ -43,7 +43,6 @@ class DoctorTest(object):
def setup(self):
# prepare the cloud env
self.installer.setup()
-
# preparing VM image...
self.image.create()
@@ -51,39 +50,50 @@ class DoctorTest(object):
self.user.create()
def test_fault_management(self):
- try:
- LOG.info('doctor fault management test starting.......')
- transport_url = self.installer.get_transport_url()
- self.fault_management = \
- FaultManagement(self.conf, self.installer, self.user, LOG,
- transport_url)
-
- # prepare test env
- self.fault_management.setup()
-
- # wait for aodh alarms are updated in caches for event evaluator,
- # sleep time should be larger than event_alarm_cache_ttl
- # (default 60)
- # (tojuvone) Fraser currently needs 120
- time.sleep(120)
-
- # injecting host failure...
- # NOTE (umar) add INTERFACE_NAME logic to host injection
- self.fault_management.start()
- time.sleep(30)
-
- # verify the test results
- # NOTE (umar) copy remote monitor.log file when monitor=collectd
- self.fault_management.check_host_status('down')
- self.fault_management.check_notification_time()
-
- except Exception as e:
- LOG.error('doctor fault management test failed, '
- 'Exception=%s' % e)
- LOG.error(format_exc())
- sys.exit(1)
- finally:
- self.fault_management.cleanup()
+ retry = 2
+ # Retry once if notified_time is None
+ while retry > 0:
+ try:
+ self.fault_management = None
+ LOG.info('doctor fault management test starting.......')
+ transport_url = self.installer.get_transport_url()
+ self.fault_management = \
+ FaultManagement(self.conf, self.installer, self.user, LOG,
+ transport_url)
+
+ # prepare test env
+ self.fault_management.setup()
+
+ # wait for aodh alarms are updated in caches for event
+ # evaluator,sleep time should be larger than
+ # event_alarm_cache_ttl (default 60)
+ # (tojuvone) Fraser currently needs 120
+ time.sleep(120)
+
+ # injecting host failure...
+ # NOTE (umar) add INTERFACE_NAME logic to host injection
+ self.fault_management.start()
+ time.sleep(30)
+
+ # verify the test results
+ # NOTE (umar) copy remote monitor.log file when
+ # monitor=collectd
+ self.fault_management.check_host_status('down')
+ self.fault_management.check_notification_time()
+ retry = 0
+
+ except Exception as e:
+ LOG.error('doctor fault management test failed, '
+ 'Exception=%s' % e)
+ if 'notified_time=None' in str(e):
+ retry -= 1
+ LOG.info('doctor fault management retry')
+ continue
+ LOG.error(format_exc())
+ sys.exit(1)
+ finally:
+ if self.fault_management is not None:
+ self.fault_management.cleanup()
def _amount_compute_nodes(self):
services = self.nova.services.list(binary='nova-compute')
@@ -96,11 +106,12 @@ class DoctorTest(object):
LOG.info('not enough compute nodes, skipping doctor '
'maintenance test')
return
- elif self.conf.installer.type not in ['apex', 'fuel']:
+ elif self.conf.installer.type not in ['apex', 'fuel', 'devstack']:
LOG.info('not supported installer, skipping doctor '
'maintenance test')
return
try:
+ maintenance = None
LOG.info('doctor maintenance test starting.......')
trasport_url = self.installer.get_transport_url()
maintenance = Maintenance(trasport_url, self.conf, LOG)
@@ -122,7 +133,8 @@ class DoctorTest(object):
LOG.error(format_exc())
sys.exit(1)
finally:
- maintenance.cleanup_maintenance()
+ if maintenance is not None:
+ maintenance.cleanup_maintenance()
def run(self):
"""run doctor tests"""
diff --git a/doctor_tests/scenario/maintenance.py b/doctor_tests/scenario/maintenance.py
index 2e40529f..e6cdcccd 100644
--- a/doctor_tests/scenario/maintenance.py
+++ b/doctor_tests/scenario/maintenance.py
@@ -35,11 +35,16 @@ class Maintenance(object):
auth = get_identity_auth(project=self.conf.doctor_project)
self.neutron = neutron_client(get_session(auth=auth))
self.stack = Stack(self.conf, self.log)
+ if self.conf.installer.type == "devstack":
+ self.endpoint_ip = trasport_url.split("@", 1)[1].split(":", 1)[0]
+ else:
+ self.endpoint_ip = self.conf.admin_tool.ip
+ self.endpoint = "http://%s:12347/" % self.endpoint_ip
if self.conf.admin_tool.type == 'sample':
self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
- self.endpoint = 'maintenance'
+ self.endpoint += 'maintenance'
else:
- self.endpoint = 'v1/maintenance'
+ self.endpoint += 'v1/maintenance'
self.app_manager = get_app_manager(self.stack, self.conf, self.log)
self.inspector = get_inspector(self.conf, self.log, trasport_url)
@@ -128,8 +133,9 @@ class Maintenance(object):
else:
# TBD Now we expect Fenix is running in self.conf.admin_tool.port
pass
- self.app_manager.start()
+ # Inspector before app_manager, as floating ip might come late
self.inspector.start()
+ self.app_manager.start()
def start_maintenance(self):
self.log.info('start maintenance.......')
@@ -138,17 +144,13 @@ class Maintenance(object):
for hvisor in hvisors:
hostname = hvisor.__getattr__('hypervisor_hostname')
maintenance_hosts.append(hostname)
-
- url = ('http://%s:%s/%s' %
- (self.conf.admin_tool.ip,
- self.conf.admin_tool.port,
- self.endpoint))
+ url = self.endpoint
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
if self.conf.admin_tool.type == 'fenix':
headers['X-Auth-Token'] = self.admin_session.get_token()
- self.log.info('headers %s' % headers)
+ self.log.info('url %s headers %s' % (url, headers))
retries = 12
ret = None
while retries > 0:
@@ -160,8 +162,12 @@ class Maintenance(object):
data = {'state': 'MAINTENANCE',
'maintenance_at': maintenance_at,
- 'metadata': {'openstack_version': 'Rocky'},
- 'workflow': 'default'}
+ 'metadata': {'openstack_version': 'Train'}}
+
+ if self.conf.app_manager.type == 'vnfm':
+ data['workflow'] = 'vnf'
+ else:
+ data['workflow'] = 'default'
if self.conf.admin_tool.type == 'sample':
data['hosts'] = maintenance_hosts
@@ -170,7 +176,7 @@ class Maintenance(object):
try:
ret = requests.post(url, data=json.dumps(data),
headers=headers)
- except:
+ except Exception:
if retries == 0:
raise Exception('admin tool did not respond in 120s')
else:
@@ -187,11 +193,8 @@ class Maintenance(object):
def remove_maintenance_session(self, session_id):
self.log.info('remove maintenance session %s.......' % session_id)
- url = ('http://%s:%s/%s/%s' %
- (self.conf.admin_tool.ip,
- self.conf.admin_tool.port,
- self.endpoint,
- session_id))
+
+ url = ('%s/%s' % (self.endpoint, session_id))
headers = {
'Content-Type': 'application/json',
@@ -205,11 +208,8 @@ class Maintenance(object):
raise Exception(ret.text)
def get_maintenance_state(self, session_id):
- url = ('http://%s:%s/%s/%s' %
- (self.conf.admin_tool.ip,
- self.conf.admin_tool.port,
- self.endpoint,
- session_id))
+
+ url = ('%s/%s' % (self.endpoint, session_id))
headers = {
'Content-Type': 'application/json',
diff --git a/doctor_tests/user.py b/doctor_tests/user.py
index 29aa004b..2cd9757f 100644
--- a/doctor_tests/user.py
+++ b/doctor_tests/user.py
@@ -129,7 +129,6 @@ class User(object):
def _add_user_role_in_project(self, is_admin=False):
"""add test user with test role in test project"""
-
project = self.projects.get(self.conf.doctor_project)
user_name = 'admin' if is_admin else self.conf.doctor_user
diff --git a/tox.ini b/tox.ini
index 30feecf8..2937c329 100644
--- a/tox.ini
+++ b/tox.ini
@@ -32,6 +32,7 @@ passenv =
ADMIN_TOOL_TYPE
TEST_CASE
SSH_KEY
+ APP_MANAGER_TYPE
changedir = {toxinidir}/doctor_tests
commands = doctor-test
/usr/bin/find {toxinidir} -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete