summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--INFO27
-rw-r--r--INFO.yaml24
-rw-r--r--devstack/README.rst4
-rw-r--r--devstack/local.conf.sample120
-rw-r--r--docs/conf.py1
-rw-r--r--docs/development/index.rst14
-rw-r--r--docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst253
-rw-r--r--docs/development/overview/index.rst7
-rw-r--r--docs/development/overview/overview.rst52
-rw-r--r--docs/development/requirements/index.rst6
-rw-r--r--docs/index.rst4
-rw-r--r--docs/release/configguide/feature.configuration.rst54
-rw-r--r--docs/release/configguide/index.rst6
-rw-r--r--docs/release/index.rst12
-rw-r--r--docs/release/installation/index.rst (renamed from docs/development/manuals/index.rst)12
-rw-r--r--docs/release/installation/installation.rst44
-rw-r--r--docs/release/release-notes/release-notes.rst253
-rw-r--r--docs/release/release-notes/releasenotes_gambia.rst303
-rw-r--r--docs/release/release-notes/releasenotes_iruya.rst129
-rw-r--r--docs/release/scenarios/fault_management/fault_management.rst90
-rw-r--r--docs/release/scenarios/maintenance/images/Fault-management-design.png (renamed from docs/development/overview/functest_scenario/images/Fault-management-design.png)bin237110 -> 237110 bytes
-rw-r--r--docs/release/scenarios/maintenance/images/LICENSE (renamed from docs/development/overview/functest_scenario/images/LICENSE)0
-rw-r--r--docs/release/scenarios/maintenance/images/Maintenance-design.png (renamed from docs/development/overview/functest_scenario/images/Maintenance-design.png)bin316640 -> 316640 bytes
-rw-r--r--docs/release/scenarios/maintenance/images/Maintenance-workflow.png (renamed from docs/development/overview/functest_scenario/images/Maintenance-workflow.png)bin81286 -> 81286 bytes
-rw-r--r--docs/release/scenarios/maintenance/maintenance.rst120
-rw-r--r--docs/release/userguide/get-valid-server-state.rst (renamed from docs/development/manuals/get-valid-server-state.rst)0
-rw-r--r--docs/release/userguide/index.rst3
-rw-r--r--docs/release/userguide/mark-host-down_manual.rst (renamed from docs/development/manuals/mark-host-down_manual.rst)0
-rw-r--r--docs/release/userguide/monitors.rst (renamed from docs/development/manuals/monitors.rst)0
-rw-r--r--docs/testing/developer/index.rst13
-rw-r--r--docs/testing/developer/testing.rst (renamed from docs/development/overview/testing.rst)44
-rw-r--r--docs/testing/index.rst15
-rw-r--r--docs/testing/user/index.rst13
-rw-r--r--docs/testing/user/testing.rst30
-rw-r--r--doctor_tests/admin_tool/__init__.py8
-rw-r--r--doctor_tests/admin_tool/fenix/Dockerfile34
-rwxr-xr-xdoctor_tests/admin_tool/fenix/run32
-rw-r--r--doctor_tests/admin_tool/sample.py185
-rw-r--r--doctor_tests/app_manager/__init__.py8
-rw-r--r--doctor_tests/app_manager/sample.py28
-rw-r--r--doctor_tests/app_manager/vnfm.py441
-rw-r--r--doctor_tests/common/constants.py4
-rw-r--r--doctor_tests/common/utils.py22
-rw-r--r--doctor_tests/image.py13
-rw-r--r--doctor_tests/inspector/__init__.py8
-rw-r--r--doctor_tests/inspector/sample.py48
-rw-r--r--doctor_tests/installer/__init__.py8
-rw-r--r--doctor_tests/installer/apex.py98
-rw-r--r--doctor_tests/installer/base.py58
-rw-r--r--doctor_tests/installer/common/restore_compute_config.py22
-rw-r--r--doctor_tests/installer/common/set_compute_config.py34
-rw-r--r--doctor_tests/installer/common/set_config.py1
-rw-r--r--doctor_tests/installer/common/set_fenix.sh106
-rw-r--r--doctor_tests/installer/devstack.py151
-rw-r--r--doctor_tests/installer/local.py118
-rw-r--r--doctor_tests/installer/mcp.py179
-rw-r--r--doctor_tests/main.py85
-rw-r--r--doctor_tests/scenario/fault_management.py13
-rw-r--r--doctor_tests/scenario/maintenance.py134
-rw-r--r--doctor_tests/stack.py2
-rw-r--r--doctor_tests/user.py1
-rw-r--r--tox.ini9
62 files changed, 2490 insertions, 1013 deletions
diff --git a/INFO b/INFO
deleted file mode 100644
index f722e870..00000000
--- a/INFO
+++ /dev/null
@@ -1,27 +0,0 @@
-Project: Fault Management project (doctor)
-Project Creation Date: December 2, 2014
-Project Category: Requirement
-Lifecycle State: Mature
-Primary Contact: Tomi Juvonen (tomi.juvonen@nokia.com)
-Project Lead: Tomi Juvonen (tomi.juvonen@nokia.com)
-Jira Project Name: Fault Management project
-Jira Project Prefix: DOCTOR
-Mailing list tag: [doctor]
-IRC: Server:freenode.net Channel:#opnfv-doctor
-Repository: doctor
-
-Committers:
-Ashiq Khan (NTT DOCOMO, khan@nttdocomo.com)
-Bertrand Souville (NTT DOCOMO, souville@docomolab-euro.com)
-Dong Wenjuan (ZTE, dong.wenjuan@zte.com.cn)
-Gerald Kunzmann (NTT DOCOMO, kunzmann@docomolab-euro.com)
-Ryota Mibu (NEC, r-mibu@cq.jp.nec.com)
-Serge Manning (Sprint, Serge.Manning@sprint.com)
-Tomi Juvonen (Nokia, tomi.juvonen@nokia.com)
-
-Link to TSC approval of the project: http://meetbot.opnfv.org/meetings/opnfv-meeting/2014/opnfv-meeting.2014-12-02-14.58.html
-Link(s) to approval of committer update:
-http://lists.opnfv.org/pipermail/opnfv-tsc/2015-June/000905.html
-http://lists.opnfv.org/pipermail/opnfv-tech-discuss/2015-June/003165.html
-http://lists.opnfv.org/pipermail/opnfv-tech-discuss/2016-June/011245.html
-http://lists.opnfv.org/pipermail/opnfv-tech-discuss/2016-July/011771.html
diff --git a/INFO.yaml b/INFO.yaml
index 3b9a3101..97acb69f 100644
--- a/INFO.yaml
+++ b/INFO.yaml
@@ -34,31 +34,12 @@ repositories:
- 'doctor'
committers:
- <<: *opnfv_doctor_ptl
- - name: 'Ashiq Khan'
- email: 'khan@nttdocomo.com'
- company: 'NTT DOCOMO'
- id: 'ashiq.khan'
- - name: 'Serge Manning'
- email: 'serge.manning@sprint.com'
- company: 'Sprint'
- id: 'sergem913'
- - name: 'Gerald Kunzmann'
- email: 'kunzmann@docomolab-euro.com'
- company: 'DOCOMO Euro-Labs'
- id: 'kunzmann'
- name: 'wenjuan dong'
email: 'dong.wenjuan@zte.com.cn'
company: 'ZTE'
id: 'dongwenjuan'
- - name: 'Bertrand Souville'
- email: 'souville@docomolab-euro.com'
- company: 'DOCOMO Euro-Labs'
- id: 'bertys'
- - name: 'Ryota Mibu'
- email: 'r-mibu@cq.jp.nec.com'
- company: 'NEC'
- id: 'r-mibu'
tsc:
+ # yamllint disable rule:line-length
approval: 'http//meetbot.opnfv.org/meetings/opnfv-meeting/2014/opnfv-meeting.2014-12-02-14.58.html'
changes:
- type: 'removal'
@@ -100,3 +81,6 @@ tsc:
- type: 'removal'
name: 'Peter Lee'
link: 'https://lists.opnfv.org/pipermail/opnfv-tsc/2018-March/004190.html'
+ - type: 'removal'
+ name: 'Bertrand Souville'
+ link: 'https://lists.opnfv.org/g/opnfv-tech-discuss/message/22344'
diff --git a/devstack/README.rst b/devstack/README.rst
index 91e8abfe..aaa18a7f 100644
--- a/devstack/README.rst
+++ b/devstack/README.rst
@@ -18,7 +18,9 @@ OPNFV Doctor in DevStack.
enable_plugin osprofiler https://git.openstack.org/openstack/osprofiler
enable_plugin doctor https://git.opnfv.org/doctor
-to the ``[[local|localrc]]`` section.
+to the ``[[local|localrc]]`` section. Or, you can copy the local.conf.sample::
+
+ cp /<path-to-doctor>/devstack/local.conf.sample ${DEVSTACK_DIR}/local.conf
.. note:: The order of enabling plugins matters.
diff --git a/devstack/local.conf.sample b/devstack/local.conf.sample
new file mode 100644
index 00000000..2967714a
--- /dev/null
+++ b/devstack/local.conf.sample
@@ -0,0 +1,120 @@
+# Sample ``local.conf`` for user-configurable variables in ``stack.sh``
+
+# NOTE: Copy this file to the root DevStack directory for it to work properly.
+
+# ``local.conf`` is a user-maintained settings file that is sourced from ``stackrc``.
+# This gives it the ability to override any variables set in ``stackrc``.
+# Also, most of the settings in ``stack.sh`` are written to only be set if no
+# value has already been set; this lets ``local.conf`` effectively override the
+# default values.
+
+# This is a collection of some of the settings we have found to be useful
+# in our DevStack development environments. Additional settings are described
+# in https://docs.openstack.org/devstack/latest/configuration.html#local-conf
+# These should be considered as samples and are unsupported DevStack code.
+
+# The ``localrc`` section replaces the old ``localrc`` configuration file.
+# Note that if ``localrc`` is present it will be used in favor of this section.
+[[local|localrc]]
+
+# Minimal Contents
+# ----------------
+
+# While ``stack.sh`` is happy to run without ``localrc``, devlife is better when
+# there are a few minimal variables set:
+
+# If the ``*_PASSWORD`` variables are not set here you will be prompted to enter
+# values for them by ``stack.sh``and they will be added to ``local.conf``.
+ADMIN_PASSWORD=devstack
+DATABASE_PASSWORD=$ADMIN_PASSWORD
+RABBIT_PASSWORD=$ADMIN_PASSWORD
+SERVICE_PASSWORD=$ADMIN_PASSWORD
+
+# ``HOST_IP`` and ``HOST_IPV6`` should be set manually for best results if
+# the NIC configuration of the host is unusual, i.e. ``eth1`` has the default
+# route but ``eth0`` is the public interface. They are auto-detected in
+# ``stack.sh`` but often is indeterminate on later runs due to the IP moving
+# from an Ethernet interface to a bridge on the host. Setting it here also
+# makes it available for ``openrc`` to include when setting ``OS_AUTH_URL``.
+# Neither is set by default.
+HOST_IP=127.0.0.1
+#HOST_IPV6=2001:db8::7
+
+
+# Logging
+# -------
+
+# By default ``stack.sh`` output only goes to the terminal where it runs. It can
+# be configured to additionally log to a file by setting ``LOGFILE`` to the full
+# path of the destination log file. A timestamp will be appended to the given name.
+LOGFILE=$DEST/logs/stack.sh.log
+
+# Old log files are automatically removed after 7 days to keep things neat. Change
+# the number of days by setting ``LOGDAYS``.
+LOGDAYS=2
+
+# Nova logs will be colorized if ``SYSLOG`` is not set; turn this off by setting
+# ``LOG_COLOR`` false.
+#LOG_COLOR=False
+
+
+# Using milestone-proposed branches
+# ---------------------------------
+
+# Uncomment these to grab the milestone-proposed branches from the
+# repos:
+#CINDER_BRANCH=milestone-proposed
+#GLANCE_BRANCH=milestone-proposed
+#HORIZON_BRANCH=milestone-proposed
+#KEYSTONE_BRANCH=milestone-proposed
+#KEYSTONECLIENT_BRANCH=milestone-proposed
+#NOVA_BRANCH=milestone-proposed
+#NOVACLIENT_BRANCH=milestone-proposed
+#NEUTRON_BRANCH=milestone-proposed
+#SWIFT_BRANCH=milestone-proposed
+
+# Using git versions of clients
+# -----------------------------
+# By default clients are installed from pip. See LIBS_FROM_GIT in
+# stackrc for details on getting clients from specific branches or
+# revisions. e.g.
+# LIBS_FROM_GIT="python-ironicclient"
+# IRONICCLIENT_BRANCH=refs/changes/44/2.../1
+
+# Swift
+# -----
+
+# Swift is now used as the back-end for the S3-like object store. Setting the
+# hash value is required and you will be prompted for it if Swift is enabled
+# so just set it to something already:
+SWIFT_HASH=66a3d6b56c1f479c8b4e70ab5c2000f5
+
+# For development purposes the default of 3 replicas is usually not required.
+# Set this to 1 to save some resources:
+SWIFT_REPLICAS=1
+
+# The data for Swift is stored by default in (``$DEST/data/swift``),
+# or (``$DATA_DIR/swift``) if ``DATA_DIR`` has been set, and can be
+# moved by setting ``SWIFT_DATA_DIR``. The directory will be created
+# if it does not exist.
+SWIFT_DATA_DIR=$DEST/data
+
+# OPNFV Doctor
+# ------------
+
+# Enable the required plugins
+# The order of enabling plugins matters
+enable_plugin aodh http://git.openstack.org/openstack/aodh
+enable_plugin panko https://git.openstack.org/openstack/panko
+enable_plugin ceilometer https://git.openstack.org/openstack/ceilometer
+enable_plugin osprofiler https://git.openstack.org/openstack/osprofiler
+enable_plugin doctor https://git.opnfv.org/doctor
+
+# To enable Python 3
+# USE_PYTHON3=True
+
+# To enable Congress as Doctor Inspector
+# enable_plugin congress https://git.openstack.org/openstack/congress
+
+# To enable Neutron port data plane status
+# Q_ML2_PLUGIN_EXT_DRIVERS=data_plane_status
diff --git a/docs/conf.py b/docs/conf.py
index eb12e74b..3c9978bb 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1 +1,2 @@
from docs_conf.conf import * # noqa: F401,F403
+master_doc = 'index'
diff --git a/docs/development/index.rst b/docs/development/index.rst
index 2dc16a82..a7d2817b 100644
--- a/docs/development/index.rst
+++ b/docs/development/index.rst
@@ -2,18 +2,18 @@
.. http://creativecommons.org/licenses/by/4.0
.. (c) 2016 OPNFV.
+.. _development:
-======
-Doctor
-======
+===========
+Development
+===========
.. toctree::
:maxdepth: 2
- ./design/index.rst
- ./requirements/index.rst
- ./manuals/index.rst
- ./overview/functest_scenario/index.rst
+ ./design/index
+ ./overview/index
+ ./requirements/index
Indices
=======
diff --git a/docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst b/docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst
deleted file mode 100644
index 9f92b5bf..00000000
--- a/docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst
+++ /dev/null
@@ -1,253 +0,0 @@
-.. This work is licensed under a Creative Commons Attribution 4.0 International License.
-.. http://creativecommons.org/licenses/by/4.0
-
-
-
-Platform overview
-"""""""""""""""""
-
-Doctor platform provides these features since `Danube Release <https://wiki.opnfv.org/display/SWREL/Danube>`_:
-
-* Immediate Notification
-* Consistent resource state awareness for compute host down
-* Valid compute host status given to VM owner
-
-These features enable high availability of Network Services on top of
-the virtualized infrastructure. Immediate notification allows VNF managers
-(VNFM) to process recovery actions promptly once a failure has occurred.
-Same framework can also be utilized to have VNFM awareness about
-infrastructure maintenance.
-
-Consistency of resource state is necessary to execute recovery actions
-properly in the VIM.
-
-Ability to query host status gives VM owner the possibility to get
-consistent state information through an API in case of a compute host
-fault.
-
-The Doctor platform consists of the following components:
-
-* OpenStack Compute (Nova)
-* OpenStack Networking (Neutron)
-* OpenStack Telemetry (Ceilometer)
-* OpenStack Alarming (AODH)
-* Doctor Sample Inspector, OpenStack Congress or OpenStack Vitrage
-* Doctor Sample Monitor or any monitor supported by Congress or Vitrage
-
-.. note::
- Doctor Sample Monitor is used in Doctor testing. However in real
- implementation like Vitrage, there are several other monitors supported.
-
-You can see an overview of the Doctor platform and how components interact in
-:numref:`figure-p1`.
-
-.. figure:: ./images/Fault-management-design.png
- :name: figure-p1
- :width: 100%
-
- Doctor platform and typical sequence
-
-Detailed information on the Doctor architecture can be found in the Doctor
-requirements documentation:
-http://artifacts.opnfv.org/doctor/docs/requirements/05-implementation.html
-
-Running test cases
-""""""""""""""""""
-
-Functest will call the "doctor_tests/main.py" in Doctor to run the test job.
-Doctor testing can also be triggered by tox on OPNFV installer jumphost. Tox
-is normally used for functional, module and coding style testing in Python
-project.
-
-Currently, 'Apex', 'Daisy', 'Fuel' and 'local' installer are supported.
-
-
-Fault management use case
-"""""""""""""""""""""""""
-
-* A consumer of the NFVI wants to receive immediate notifications about faults
- in the NFVI affecting the proper functioning of the virtual resources.
- Therefore, such faults have to be detected as quickly as possible, and, when
- a critical error is observed, the affected consumer is immediately informed
- about the fault and can switch over to the STBY configuration.
-
-The faults to be monitored (and at which detection rate) will be configured by
-the consumer. Once a fault is detected, the Inspector in the Doctor
-architecture will check the resource map maintained by the Controller, to find
-out which virtual resources are affected and then update the resources state.
-The Notifier will receive the failure event requests sent from the Controller,
-and notify the consumer(s) of the affected resources according to the alarm
-configuration.
-
-Detailed workflow information is as follows:
-
-* Consumer(VNFM): (step 0) creates resources (network, server/instance) and an
- event alarm on state down notification of that server/instance or Neutron
- port.
-
-* Monitor: (step 1) periodically checks nodes, such as ping from/to each
- dplane nic to/from gw of node, (step 2) once it fails to send out event
- with "raw" fault event information to Inspector
-
-* Inspector: when it receives an event, it will (step 3) mark the host down
- ("mark-host-down"), (step 4) map the PM to VM, and change the VM status to
- down. In network failure case, also Neutron port is changed to down.
-
-* Controller: (step 5) sends out instance update event to Ceilometer. In network
- failure case, also Neutron port is changed to down and corresponding event is
- sent to Ceilometer.
-
-* Notifier: (step 6) Ceilometer transforms and passes the events to AODH,
- (step 7) AODH will evaluate events with the registered alarm definitions,
- then (step 8) it will fire the alarm to the "consumer" who owns the
- instance
-
-* Consumer(VNFM): (step 9) receives the event and (step 10) recreates a new
- instance
-
-Fault management test case
-""""""""""""""""""""""""""
-
-Functest will call the 'doctor-test' command in Doctor to run the test job.
-
-The following steps are executed:
-
-Firstly, get the installer ip according to the installer type. Then ssh to
-the installer node to get the private key for accessing to the cloud. As
-'fuel' installer, ssh to the controller node to modify nova and ceilometer
-configurations.
-
-Secondly, prepare image for booting VM, then create a test project and test
-user (both default to doctor) for the Doctor tests.
-
-Thirdly, boot a VM under the doctor project and check the VM status to verify
-that the VM is launched completely. Then get the compute host info where the VM
-is launched to verify connectivity to the target compute host. Get the consumer
-ip according to the route to compute ip and create an alarm event in Ceilometer
-using the consumer ip.
-
-Fourthly, the Doctor components are started, and, based on the above preparation,
-a failure is injected to the system, i.e. the network of compute host is
-disabled for 3 minutes. To ensure the host is down, the status of the host
-will be checked.
-
-Finally, the notification time, i.e. the time between the execution of step 2
-(Monitor detects failure) and step 9 (Consumer receives failure notification)
-is calculated.
-
-According to the Doctor requirements, the Doctor test is successful if the
-notification time is below 1 second.
-
-Maintenance use case
-""""""""""""""""""""
-
-* A consumer of the NFVI wants to interact with NFVI maintenance, upgrade,
- scaling and to have graceful retirement. Receiving notifications over these
- NFVI events and responding to those within given time window, consumer can
- guarantee zero downtime to his service.
-
-The maintenance use case adds the Doctor platform an `admin tool` and an
-`app manager` component. Overview of maintenance components can be seen in
-:numref:`figure-p2`.
-
-.. figure:: ./images/Maintenance-design.png
- :name: figure-p2
- :width: 100%
-
- Doctor platform components in maintenance use case
-
-In maintenance use case, `app manager` (VNFM) will subscribe to maintenance
-notifications triggered by project specific alarms through AODH. This is the way
-it gets to know different NFVI maintenance, upgrade and scaling operations that
-effect to its instances. The `app manager` can do actions depicted in `green
-color` or tell `admin tool` to do admin actions depicted in `orange color`
-
-Any infrastructure component like `Inspector` can subscribe to maintenance
-notifications triggered by host specific alarms through AODH. Subscribing to the
-notifications needs admin privileges and can tell when a host is out of use as
-in maintenance and when it is taken back to production.
-
-Maintenance test case
-"""""""""""""""""""""
-
-Maintenance test case is currently running in our Apex CI and executed by tox.
-This is because the special limitation mentioned below and also the fact we
-currently have only sample implementation as a proof of concept. Environmental
-variable TEST_CASE='maintenance' needs to be used when executing
-"doctor_tests/main.py". Test case workflow can be seen in :numref:`figure-p3`.
-
-.. figure:: ./images/Maintenance-workflow.png
- :name: figure-p3
- :width: 100%
-
- Maintenance test case workflow
-
-In test case all compute capacity will be consumed with project (VNF) instances.
-For redundant services on instances and an empty compute needed for maintenance,
-test case will need at least 3 compute nodes in system. There will be 2
-instances on each compute, so minimum number of VCPUs is also 2. Depending on
-how many compute nodes there is application will always have 2 redundant
-instances (ACT-STDBY) on different compute nodes and rest of the compute
-capacity will be filled with non-redundant instances.
-
-For each project specific maintenance message there is a time window for
-`app manager` to make any needed action. This will guarantee zero
-down time for his service. All replies back are done by calling `admin tool` API
-given in the message.
-
-The following steps are executed:
-
-Infrastructure admin will call `admin tool` API to trigger maintenance for
-compute hosts having instances belonging to a VNF.
-
-Project specific `MAINTENANCE` notification is triggered to tell `app manager`
-that his instances are going to hit by infrastructure maintenance at a specific
-point in time. `app manager` will call `admin tool` API to answer back
-`ACK_MAINTENANCE`.
-
-When the time comes to start the actual maintenance workflow in `admin tool`,
-a `DOWN_SCALE` notification is triggered as there is no empty compute node for
-maintenance (or compute upgrade). Project receives corresponding alarm and scales
-down instances and call `admin tool` API to answer back `ACK_DOWN_SCALE`.
-
-As it might happen instances are not scaled down (removed) from a single
-compute node, `admin tool` might need to figure out what compute node should be
-made empty first and send `PREPARE_MAINTENANCE` to project telling which instance
-needs to be migrated to have the needed empty compute. `app manager` makes sure
-he is ready to migrate instance and call `admin tool` API to answer back
-`ACK_PREPARE_MAINTENANCE`. `admin tool` will make the migration and answer
-`ADMIN_ACTION_DONE`, so `app manager` knows instance can be again used.
-
-:numref:`figure-p3` has next a light blue section of actions to be done for each
-compute. However as we now have one empty compute, we will maintain/upgrade that
-first. So on first round, we can straight put compute in maintenance and send
-admin level host specific `IN_MAINTENANCE` message. This is caught by `Inspector`
-to know host is down for maintenance. `Inspector` can now disable any automatic
-fault management actions for the host as it can be down for a purpose. After
-`admin tool` has completed maintenance/upgrade `MAINTENANCE_COMPLETE` message
-is sent to tell host is back in production.
-
-Next rounds we always have instances on compute, so we need to have
-`PLANNED_MAINTANANCE` message to tell that those instances are now going to hit
-by maintenance. When `app manager` now receives this message, he knows instances
-to be moved away from compute will now move to already maintained/upgraded host.
-In test case no upgrade is done on application side to upgrade instances
-according to new infrastructure capabilities, but this could be done here as
-this information is also passed in the message. This might be just upgrading
-some RPMs, but also totally re-instantiating instance with a new flavor. Now if
-application runs an active side of a redundant instance on this compute,
-a switch over will be done. After `app manager` is ready he will call
-`admin tool` API to answer back `ACK_PLANNED_MAINTENANCE`. In test case the
-answer is `migrate`, so `admin tool` will migrate instances and reply
-`ADMIN_ACTION_DONE` and then `app manager` knows instances can be again used.
-Then we are ready to make the actual maintenance as previously trough
-`IN_MAINTENANCE` and `MAINTENANCE_COMPLETE` steps.
-
-After all computes are maintained, `admin tool` can send `MAINTENANCE_COMPLETE`
-to tell maintenance/upgrade is now complete. For `app manager` this means he
-can scale back to full capacity.
-
-This is the current sample implementation and test case. Real life
-implementation is started in OpenStack Fenix project and there we should
-eventually address requirements more deeply and update the test case with Fenix
-implementation.
diff --git a/docs/development/overview/index.rst b/docs/development/overview/index.rst
index 956e73e3..f6d78d57 100644
--- a/docs/development/overview/index.rst
+++ b/docs/development/overview/index.rst
@@ -3,11 +3,12 @@
.. _doctor-overview:
-************************
-Doctor Development Guide
-************************
+********
+Overview
+********
.. toctree::
:maxdepth: 2
+ overview.rst
testing.rst
diff --git a/docs/development/overview/overview.rst b/docs/development/overview/overview.rst
new file mode 100644
index 00000000..21f5439e
--- /dev/null
+++ b/docs/development/overview/overview.rst
@@ -0,0 +1,52 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Platform overview
+"""""""""""""""""
+
+Doctor platform provides these features since `Danube Release <https://wiki.opnfv.org/display/SWREL/Danube>`_:
+
+* Immediate Notification
+* Consistent resource state awareness for compute host down
+* Valid compute host status given to VM owner
+
+These features enable high availability of Network Services on top of
+the virtualized infrastructure. Immediate notification allows VNF managers
+(VNFM) to process recovery actions promptly once a failure has occurred.
+Same framework can also be utilized to have VNFM awareness about
+infrastructure maintenance.
+
+Consistency of resource state is necessary to execute recovery actions
+properly in the VIM.
+
+Ability to query host status gives VM owner the possibility to get
+consistent state information through an API in case of a compute host
+fault.
+
+The Doctor platform consists of the following components:
+
+* OpenStack Compute (Nova)
+* OpenStack Networking (Neutron)
+* OpenStack Telemetry (Ceilometer)
+* OpenStack Alarming (AODH)
+* Doctor Sample Inspector, OpenStack Congress or OpenStack Vitrage
+* Doctor Sample Monitor or any monitor supported by Congress or Vitrage
+
+.. note::
+ Doctor Sample Monitor is used in Doctor testing. However in real
+ implementation like Vitrage, there are several other monitors supported.
+
+You can see an overview of the Doctor platform and how components interact in
+:numref:`figure-p1`.
+
+
+Maintenance use case provides these features since `Iruya Release <https://wiki.opnfv.org/display/SWREL/Iruya>`_:
+
+* Infrastructure maintenance and upgrade workflow
+* Interaction between VNFM and infrastructe workflow
+
+Since `Jerma Release <https://wiki.opnfv.org/display/SWREL/Jerma>`_ maintenance
+use case also supports 'ETSI FEAT03' implementation to have the infrastructure
+maintenance and upgrade fully optimized while keeping zero impact on VNF
+service.
+
diff --git a/docs/development/requirements/index.rst b/docs/development/requirements/index.rst
index fceaebf0..ccc35cb8 100644
--- a/docs/development/requirements/index.rst
+++ b/docs/development/requirements/index.rst
@@ -3,9 +3,9 @@
.. _doctor-requirements:
-****************************************
-Doctor: Fault Management and Maintenance
-****************************************
+**********************************************
+Requirements: Fault Management and Maintenance
+**********************************************
:Project: Doctor, https://wiki.opnfv.org/doctor
:Editors: Ashiq Khan (NTT DOCOMO), Gerald Kunzmann (NTT DOCOMO)
diff --git a/docs/index.rst b/docs/index.rst
index 4dedb98d..b8e8bfd0 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -12,6 +12,6 @@ Fault Management and Maintenance (Doctor)
:numbered:
:maxdepth: 2
- release/index
development/index
-
+ release/index
+ testing/index
diff --git a/docs/release/configguide/feature.configuration.rst b/docs/release/configguide/feature.configuration.rst
index 64928eea..8fbff50e 100644
--- a/docs/release/configguide/feature.configuration.rst
+++ b/docs/release/configguide/feature.configuration.rst
@@ -159,3 +159,57 @@ You can configure the Sample Monitor as follows (Example for Apex deployment):
"http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
**Collectd Monitor**
+
+OpenStack components
+====================
+
+In OPNFV and with Doctor testing you can have all OpenStack components configured
+as needed. Here is sample of the needed configuration modifications.
+
+Ceilometer
+----------
+
+/etc/ceilometer/event_definitions.yaml:
+# Maintenance use case needs new alarm definitions to be added
+- event_type: maintenance.scheduled
+ traits:
+ actions_at:
+ fields: payload.maintenance_at
+ type: datetime
+ allowed_actions:
+ fields: payload.allowed_actions
+ host_id:
+ fields: payload.host_id
+ instances:
+ fields: payload.instances
+ metadata:
+ fields: payload.metadata
+ project_id:
+ fields: payload.project_id
+ reply_url:
+ fields: payload.reply_url
+ session_id:
+ fields: payload.session_id
+ state:
+ fields: payload.state
+- event_type: maintenance.host
+ traits:
+ host:
+ fields: payload.host
+ project_id:
+ fields: payload.project_id
+ session_id:
+ fields: payload.session_id
+ state:
+ fields: payload.state
+
+/etc/ceilometer/event_pipeline.yaml:
+# Maintenance and Fault management both needs these to be added
+ - notifier://
+ - notifier://?topic=alarm.all
+
+Nova
+----
+
+/etc/nova/nova.conf
+cpu_allocation_ratio=1.0
diff --git a/docs/release/configguide/index.rst b/docs/release/configguide/index.rst
index b1e7c33d..c2331115 100644
--- a/docs/release/configguide/index.rst
+++ b/docs/release/configguide/index.rst
@@ -3,9 +3,9 @@
.. _doctor-configguide:
-*************************
-Doctor Installation Guide
-*************************
+**************************
+Doctor Configuration Guide
+**************************
.. toctree::
:maxdepth: 2
diff --git a/docs/release/index.rst b/docs/release/index.rst
index 8a1bf405..67eb4c5f 100644
--- a/docs/release/index.rst
+++ b/docs/release/index.rst
@@ -2,14 +2,18 @@
.. http://creativecommons.org/licenses/by/4.0
.. (c) 2017 OPNFV.
+.. _release:
-======
-Doctor
-======
+=======
+Release
+=======
.. toctree::
:maxdepth: 2
+ ./configguide/index.rst
./installation/index.rst
+ ./release-notes/index.rst
+ ./scenarios/fault_management/fault_management.rst
+ ./scenarios/maintenance/maintenance.rst
./userguide/index.rst
-
diff --git a/docs/development/manuals/index.rst b/docs/release/installation/index.rst
index f705f94a..f6527e5d 100644
--- a/docs/development/manuals/index.rst
+++ b/docs/release/installation/index.rst
@@ -1,13 +1,13 @@
.. This work is licensed under a Creative Commons Attribution 4.0 International License.
.. http://creativecommons.org/licenses/by/4.0
-.. _doctor-manuals:
+.. _doctor-configguide:
-*******
-Manuals
-*******
+*************************
+Doctor Installation Guide
+*************************
.. toctree::
+ :maxdepth: 2
-.. include:: mark-host-down_manual.rst
-.. include:: get-valid-server-state.rst
+ installation.rst
diff --git a/docs/release/installation/installation.rst b/docs/release/installation/installation.rst
new file mode 100644
index 00000000..564f19fd
--- /dev/null
+++ b/docs/release/installation/installation.rst
@@ -0,0 +1,44 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Doctor Installation
+====================
+
+You can clone doctor project in OPNFV installer jumphost or if you are not
+in OPNFV environment you can clone Doctor to DevStack controller node
+
+git clone https://gerrit.opnfv.org/gerrit/doctor
+
+In DevStack controller here is a sample of including what Doctor testing
+will require for sample fault management testing and for maintenance
+testing using Fenix
+
+.. code-block:: bash
+
+ git clone https://github.com/openstack/devstack -b stable/train
+
+.. code-block:: bash
+
+ cd devstack vi local.conf
+
+.. code-block:: bash
+
+ [[local|localrc]]
+ GIT_BASE=https://git.openstack.org
+ HOST_IP=<host_ip>
+ ADMIN_PASSWORD=admin
+ DATABASE_PASSWORD=admin
+ RABBIT_PASSWORD=admin
+ SERVICE_PASSWORD=admin
+ LOGFILE=/opt/stack/stack.sh.log
+
+ PUBLIC_INTERFACE=eth0
+
+ CEILOMETER_EVENT_ALARM=True
+
+ ENABLED_SERVICES=key,rabbit,mysql,fenix-engine,fenix-api,aodh-evaluator,aodh-notifier,aodh-api
+
+ enable_plugin ceilometer https://git.openstack.org/openstack/ceilometer stable/train
+ enable_plugin aodh https://git.openstack.org/openstack/aodh stable/train
+ enable_plugin gnocchi https://github.com/openstack/gnocchi
+ enable_plugin fenix https://opendev.org/x/fenix master
diff --git a/docs/release/release-notes/release-notes.rst b/docs/release/release-notes/release-notes.rst
index 142bfacf..b525335e 100644
--- a/docs/release/release-notes/release-notes.rst
+++ b/docs/release/release-notes/release-notes.rst
@@ -2,140 +2,46 @@
.. http://creativecommons.org/licenses/by/4.0
-This document provides the release notes for Gambia of Doctor.
+This document provides the release notes for Iruya version of Doctor.
Important notes
===============
-In Gambia release, Doctor has been working with our second use case over
-maintenance. Design guideline is now done and test case exists with sample
-maintenance workflow code implemented in Doctor. Work has also started to have
-the real implementation done in the OpenStack Fenix project
-https://wiki.openstack.org/wiki/Fenix.
-
-Doctor CI testing has now moved to use tox on jumphots instead of running test
-through features container. Also in Apex we use OpenStack services running in
-containers. Functest daily testing supports Doctor fault management test case
-for Apex, Daisy and Fuel installers. This testing is done through features
-container.
-
-In this release, Doctor has not been working with the fault management use case as
-the basic framework has been already done. However, we might need to get back to
-it later to better meet the tough industry requirements as well as requirements
-from edge, containers and 5G.
+Jerma release has mainly been for finalizing maintenance use case testing
+supporting the ETSI FEAT03 defined interactino between VNFM and infrastructure.
+This is mainly to have infrastructure maintenance and upgrade operations
+opttimized as fast as they can while keeping VNFs on top with zero impact
+on their service.
+Further more this is the final release of Doctor and the more deep testing is
+moving more to upstream projects like Fenix for the maintenance. Also in
+this release we have made sure that all Doctor testing and any deeper testing
+with ehe upstream projects can be done in DevStack. This also makes DevStack
+the most important installer.
Summary
=======
-Gambia Doctor framework uses OpenStack Queens integrated into its test cases.
-Compared to the previous release, the Heat project is also being used in the
-maintenance test case.
+Jerma Doctor framework uses OpenStack Train integrated into its test cases.
Release Data
============
Doctor changes
-+------------------------------------------+----------------------------------------------------------+
-| **commit-ID** | **Subject** |
-+------------------------------------------+----------------------------------------------------------+
-| 5b3f5937e7b861fca46b2a6b2d6708866b800f95 | fix building docs |
-+------------------------------------------+----------------------------------------------------------+
-| 2ca5924081ce4784f599437707bd32807aa155ce | Fix SSH client connection reset |
-+------------------------------------------+----------------------------------------------------------+
-| baac6579556f8216b36db0d0f87f9c2d4f8b4ef5 | Support Apex with services in containers |
-+------------------------------------------+----------------------------------------------------------+
-| 23bf63c4616040cb0d69cd26238af2a4a7c00a90 | fix the username to login undercloud in Apex |
-+------------------------------------------+----------------------------------------------------------+
-| 61eb3927ada784cc3dffb5ddd17f66e47871f708 | Local Documentation Builds |
-+------------------------------------------+----------------------------------------------------------+
-| 0f1dd4314b9e0247d9af7af6df2410462423aeca | Updated from global requirements |
-+------------------------------------------+----------------------------------------------------------+
-| 2d4a9f0c0a93797da6534583f6e74553a4b634be | Fix links to remove references to submodules |
-+------------------------------------------+----------------------------------------------------------+
-| 3ddc2392b0ed364eede49ff006d64df3ea456350 | Gambia release notes |
-+------------------------------------------+----------------------------------------------------------+
-| 825a0a0dd5e8028129b782ed21c549586257b1c5 | delete doctor datasource in congress when cleanup |
-+------------------------------------------+----------------------------------------------------------+
-| fcf53129ab2b18b84571faff13d7cb118b3a41b3 | run profile even the notification time is larger than 1S |
-+------------------------------------------+----------------------------------------------------------+
-| 495965d0336d42fc36494c81fd15cee2f34c96e9 | Update and add test case |
-+------------------------------------------+----------------------------------------------------------+
-| da25598a6a31abe0579ffed12d1719e5ff75f9a7 | bugfix: add doctor datasource in congress |
-+------------------------------------------+----------------------------------------------------------+
-| f9e1e3b1ae4be80bc2dc61d9c4213c81c091ea72 | Update the maintenance design document |
-+------------------------------------------+----------------------------------------------------------+
-| 4639f15e6db2f1480b41f6fbfd11d70312d4e421 | Add maintenance test code |
-+------------------------------------------+----------------------------------------------------------+
-| b54cbc5dd2d32fcb27238680b4657ed384d021c5 | Add setup and cleanup for maintenance test |
-+------------------------------------------+----------------------------------------------------------+
-| b2bb504032ac81a2ed3f404113b097d9ce3d7f14 | bugfix: kill the stunnel when cleanup |
-+------------------------------------------+----------------------------------------------------------+
-| eaeb3c0f9dc9e6645a159d0a78b9fc181fce53d4 | add ssh_keyfile for connect to installer in Apex |
-+------------------------------------------+----------------------------------------------------------+
-| dcbe7bf1c26052b0e95d209254e7273aa1eaace1 | Add tox and test case to testing document |
-+------------------------------------------+----------------------------------------------------------+
-| 0f607cb5efd91ee497346b7f792dfa844d15595c | enlarge the time of link down |
-+------------------------------------------+----------------------------------------------------------+
-| 1351038a65739b8d799820de515178326ad05f7b | bugfix: fix the filename of ssh tunnel |
-+------------------------------------------+----------------------------------------------------------+
-| e70bf248daac03eee6b449cd1654d2ee6265dd8c | Use py34 instead of py35 |
-+------------------------------------------+----------------------------------------------------------+
-| 2a60d460eaf018951456451077b7118b60219b32 | add INSPECTOR_TYPE and TEST_CASE to tox env |
-+------------------------------------------+----------------------------------------------------------+
-| 2043ceeb08c1eca849daeb2b3696d385425ba061 | [consumer] fix default value for port number |
-+------------------------------------------+----------------------------------------------------------+
-
-Releng changes
-
-+------------------------------------------+-----------------------------------------------------------------------+
-| **commit-ID** | **Subject** |
-+------------------------------------------+-----------------------------------------------------------------------+
-| c87309f5a75ccc5d595f708817b97793c24c4387 | Add Doctor maintenance job |
-+------------------------------------------+-----------------------------------------------------------------------+
-| bd16a9756ffd0743e143f0f2f966da8dd666c7a3 | remove congress test in Daisy |
-+------------------------------------------+-----------------------------------------------------------------------+
-| c47aaaa53c91aae93877f2532c72374beaa4eabe | remove fuel job in Doctor |
-+------------------------------------------+-----------------------------------------------------------------------+
-| ab2fed2522eaf82ea7c63dd05008a37c56e825d0 | use 'workspace-cleanup' plugin in publisher |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 3aaed5cf40092744f1b87680b9205a2901baecf3 | clean the workspace in the publisher |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 50151eb3717edd4ddd996f3705fbe1732de7f3b7 | run tox with 'sudo' |
-+------------------------------------------+-----------------------------------------------------------------------+
-| a3adc85ecb52f5d19ec4e9c49ca1ac35aa429ff9 | remove inspector variable form job template |
-+------------------------------------------+-----------------------------------------------------------------------+
-| adfbaf2a3e8487e4c9152bf864a653a0425b8582 | run doctor tests with different inspectors in sequence |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 2e98e56224cd550cb3bf9798e420eece28139bd9 | add the ssh_key info if the key_file is exist |
-+------------------------------------------+-----------------------------------------------------------------------+
-| c109c271018e9a85d94be1b9b468338d64589684 | prepare installer info for doctor test |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 57cbefc7160958eae1d49e4753779180a25864af | use py34 for tox |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 3547754e808a581b09c9d22e013a7d986d9f6cd1 | specify the cacert file when it exits |
-+------------------------------------------+-----------------------------------------------------------------------+
-| ef4f36aa1c2ff0819d73cde44f84b99a42e15c7e | bugfix: wrong usage of '!include-raw' |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 0e0e0d4cb71fb27b1789a2bef2d3c4ff313e67ff | use tox instead of functest for doctor CI jobs |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 5b22f1b95feacaec0380f6a7543cbf510b628451 | pass value to parameters |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 44ab0cea07fa2a734c4f6b80776ad48fd006d1b8 | Doctor job bugfix: fix the scenario |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 17617f1c0a78c7bdad0d11d329a6c7e119cbbddd | bugfix: run doctor tests parallelly |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 811e4ef7f4c37b7bc246afc34ff880c014ecc05d | delete 'opnfv-build-ubuntu-defaults' parameters for doctor verify job |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 0705f31ab5bc54c073df120cbe0fe62cf10f9a81 | delete the 'node' parameter in 'doctor-slave-parameter' macro |
-+------------------------------------------+-----------------------------------------------------------------------+
-| 304151b15f9d7241db8c5fea067cafe048287d84 | fix the default node label for doctor test |
-+------------------------------------------+-----------------------------------------------------------------------+
-| a6963f92f015a33b44b27199886952205499b44c | Fix project name |
-+------------------------------------------+-----------------------------------------------------------------------+
-| f122bfed998b3b0e0178106a7538377c609c6512 | add a default value for SSH_KEY |
-+------------------------------------------+-----------------------------------------------------------------------+
+- Maintenance use case updated to support latest version of Fenix.
+- Maintenance use case now supports ETSI FEAT03 optimization with Fenix.
+- Doctor testing is now preferred to be done in DevStack environment
+ where one can easily select OpenStack release from Rocky to Ussuri to
+ test Doctor functionality. Latest OPNFV Fuel can also be used for the
+ OpenStack version it supports.
+
+Doctor CI
+
+- Doctor tested with fuel installer.
+- Fault management use case is tested with sample inspector.
+- Maintenance use case is tested with sample implementation and towards
+ the latest Fenix version. The includes the new ETSI FEAT03 optimization.
Version change
^^^^^^^^^^^^^^
@@ -143,49 +49,34 @@ Version change
Module version changes
~~~~~~~~~~~~~~~~~~~~~~
-- OpenStack has changed from Pike-1 to Queens-1
+- OpenStack has changed Train
Document version changes
~~~~~~~~~~~~~~~~~~~~~~~~
-These documents have been updated in Gambia release
-
-- Testing document
- docs/development/overview/testing.rst
-- Doctor scenario in functest
- docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst
-- Maintenance design guideline
- docs/development/design/maintenance-design-guideline.rst
+All documentation is updated to OPNFV unified format according to
+documentation guidelines. Small updates in many documents.
Reason for version
^^^^^^^^^^^^^^^^^^
-Documentation is updated due to tox usage in testing and adding maintenance
-use case related documentation.
+N/A
Feature additions
~~~~~~~~~~~~~~~~~
-+--------------------+--------------------------------------------------------+
-| **JIRA REFERENCE** | **SLOGAN** |
-+--------------------+--------------------------------------------------------+
-| DOCTOR-106 | Maintenance scenario |
-+--------------------+--------------------------------------------------------+
-| DOCTOR-125 | Maintenance design document according to our test case |
-+--------------------+--------------------------------------------------------+
-| DOCTOR-126 | Use Tox instead of Functest for doctor CI jobs |
-+--------------------+--------------------------------------------------------+
-| DOCTOR-127 | Maintenance test POD |
-+--------------------+--------------------------------------------------------+
-| DOCTOR-130 | Apex with containers |
-+--------------------+--------------------------------------------------------+
-
++--------------------+--------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN** |
++--------------------+--------------------------------------------+
+| DOCTOR-137 | VNFM maintenance with ETSI changes |
++--------------------+--------------------------------------------+
+| DOCTOR-136 | DevStack support |
++--------------------+--------------------------------------------+
Deliverables
------------
-
Software deliverables
=====================
@@ -226,74 +117,21 @@ Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=sample
+--------------------------------------+--------------+
| **TEST-SUITE** | **Results:** |
+--------------------------------------+--------------+
-| INSTALLER_TYPE='Apex' | SUCCESS |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Compass' | N/A |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Daisy' | SUCCESS |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Fuel' | No POD |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Joid' | N/A |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Local' | N/A |
+| INSTALLER_TYPE='fuel' | SUCCESS |
+--------------------------------------+--------------+
-Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=congress
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Doctor CI results with TEST_CASE='maintenance' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+--------------------------------------+--------------+
| **TEST-SUITE** | **Results:** |
+--------------------------------------+--------------+
-| INSTALLER_TYPE='Apex' | FAILED |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Compass' | N/A |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Daisy' | N/A |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Fuel' | No POD |
+| INSTALLER_TYPE='fuel' | SUCCESS |
+| ADMIN_TOOL_TYPE='fenix' *) | |
+--------------------------------------+--------------+
-| INSTALLER_TYPE='Joid' | N/A |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Local' | N/A |
-+--------------------------------------+--------------+
-
-Doctor Functest results with TEST_CASE='fault_management'
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-+--------------------------------------+--------------+
-| **TEST-SUITE** | **Results:** |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Apex' | skipped |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Compass' | N/A |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Daisy' | skipped |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Fuel' | skipped |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Joid' | N/A |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Local' | N/A |
-+--------------------------------------+--------------+
-
-Note: Installer Functest does not currently test features or skips running the
-project test cases
-
-Doctor CI results with TEST_CASE='maintenance'
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-+--------------------------------------+--------------+
-| **TEST-SUITE** | **Results:** |
-+--------------------------------------+--------------+
-| INSTALLER_TYPE='Apex' | SUCCESS |
-+--------------------------------------+--------------+
-
-Doctor Functest results with TEST_CASE='maintenance'
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-N/A - Needs special target and currently there is only sample implementation
+*) Sample implementation not updated according to latest upstream Fenix
+ and is currently not being tested.
References
==========
@@ -301,3 +139,8 @@ References
For more information about the OPNFV Doctor latest work, please see:
https://wiki.opnfv.org/display/doctor/Doctor+Home
+
+Further information about ETSI FEAT03 optimization can be found from Fenix
+Documentation:
+
+https://fenix.readthedocs.io/en/latest
diff --git a/docs/release/release-notes/releasenotes_gambia.rst b/docs/release/release-notes/releasenotes_gambia.rst
new file mode 100644
index 00000000..142bfacf
--- /dev/null
+++ b/docs/release/release-notes/releasenotes_gambia.rst
@@ -0,0 +1,303 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+This document provides the release notes for Gambia of Doctor.
+
+Important notes
+===============
+
+In Gambia release, Doctor has been working with our second use case over
+maintenance. Design guideline is now done and test case exists with sample
+maintenance workflow code implemented in Doctor. Work has also started to have
+the real implementation done in the OpenStack Fenix project
+https://wiki.openstack.org/wiki/Fenix.
+
+Doctor CI testing has now moved to use tox on jumphots instead of running test
+through features container. Also in Apex we use OpenStack services running in
+containers. Functest daily testing supports Doctor fault management test case
+for Apex, Daisy and Fuel installers. This testing is done through features
+container.
+
+In this release, Doctor has not been working with the fault management use case as
+the basic framework has been already done. However, we might need to get back to
+it later to better meet the tough industry requirements as well as requirements
+from edge, containers and 5G.
+
+
+Summary
+=======
+
+Gambia Doctor framework uses OpenStack Queens integrated into its test cases.
+Compared to the previous release, the Heat project is also being used in the
+maintenance test case.
+
+Release Data
+============
+
+Doctor changes
+
++------------------------------------------+----------------------------------------------------------+
+| **commit-ID** | **Subject** |
++------------------------------------------+----------------------------------------------------------+
+| 5b3f5937e7b861fca46b2a6b2d6708866b800f95 | fix building docs |
++------------------------------------------+----------------------------------------------------------+
+| 2ca5924081ce4784f599437707bd32807aa155ce | Fix SSH client connection reset |
++------------------------------------------+----------------------------------------------------------+
+| baac6579556f8216b36db0d0f87f9c2d4f8b4ef5 | Support Apex with services in containers |
++------------------------------------------+----------------------------------------------------------+
+| 23bf63c4616040cb0d69cd26238af2a4a7c00a90 | fix the username to login undercloud in Apex |
++------------------------------------------+----------------------------------------------------------+
+| 61eb3927ada784cc3dffb5ddd17f66e47871f708 | Local Documentation Builds |
++------------------------------------------+----------------------------------------------------------+
+| 0f1dd4314b9e0247d9af7af6df2410462423aeca | Updated from global requirements |
++------------------------------------------+----------------------------------------------------------+
+| 2d4a9f0c0a93797da6534583f6e74553a4b634be | Fix links to remove references to submodules |
++------------------------------------------+----------------------------------------------------------+
+| 3ddc2392b0ed364eede49ff006d64df3ea456350 | Gambia release notes |
++------------------------------------------+----------------------------------------------------------+
+| 825a0a0dd5e8028129b782ed21c549586257b1c5 | delete doctor datasource in congress when cleanup |
++------------------------------------------+----------------------------------------------------------+
+| fcf53129ab2b18b84571faff13d7cb118b3a41b3 | run profile even the notification time is larger than 1S |
++------------------------------------------+----------------------------------------------------------+
+| 495965d0336d42fc36494c81fd15cee2f34c96e9 | Update and add test case |
++------------------------------------------+----------------------------------------------------------+
+| da25598a6a31abe0579ffed12d1719e5ff75f9a7 | bugfix: add doctor datasource in congress |
++------------------------------------------+----------------------------------------------------------+
+| f9e1e3b1ae4be80bc2dc61d9c4213c81c091ea72 | Update the maintenance design document |
++------------------------------------------+----------------------------------------------------------+
+| 4639f15e6db2f1480b41f6fbfd11d70312d4e421 | Add maintenance test code |
++------------------------------------------+----------------------------------------------------------+
+| b54cbc5dd2d32fcb27238680b4657ed384d021c5 | Add setup and cleanup for maintenance test |
++------------------------------------------+----------------------------------------------------------+
+| b2bb504032ac81a2ed3f404113b097d9ce3d7f14 | bugfix: kill the stunnel when cleanup |
++------------------------------------------+----------------------------------------------------------+
+| eaeb3c0f9dc9e6645a159d0a78b9fc181fce53d4 | add ssh_keyfile for connect to installer in Apex |
++------------------------------------------+----------------------------------------------------------+
+| dcbe7bf1c26052b0e95d209254e7273aa1eaace1 | Add tox and test case to testing document |
++------------------------------------------+----------------------------------------------------------+
+| 0f607cb5efd91ee497346b7f792dfa844d15595c | enlarge the time of link down |
++------------------------------------------+----------------------------------------------------------+
+| 1351038a65739b8d799820de515178326ad05f7b | bugfix: fix the filename of ssh tunnel |
++------------------------------------------+----------------------------------------------------------+
+| e70bf248daac03eee6b449cd1654d2ee6265dd8c | Use py34 instead of py35 |
++------------------------------------------+----------------------------------------------------------+
+| 2a60d460eaf018951456451077b7118b60219b32 | add INSPECTOR_TYPE and TEST_CASE to tox env |
++------------------------------------------+----------------------------------------------------------+
+| 2043ceeb08c1eca849daeb2b3696d385425ba061 | [consumer] fix default value for port number |
++------------------------------------------+----------------------------------------------------------+
+
+Releng changes
+
++------------------------------------------+-----------------------------------------------------------------------+
+| **commit-ID** | **Subject** |
++------------------------------------------+-----------------------------------------------------------------------+
+| c87309f5a75ccc5d595f708817b97793c24c4387 | Add Doctor maintenance job |
++------------------------------------------+-----------------------------------------------------------------------+
+| bd16a9756ffd0743e143f0f2f966da8dd666c7a3 | remove congress test in Daisy |
++------------------------------------------+-----------------------------------------------------------------------+
+| c47aaaa53c91aae93877f2532c72374beaa4eabe | remove fuel job in Doctor |
++------------------------------------------+-----------------------------------------------------------------------+
+| ab2fed2522eaf82ea7c63dd05008a37c56e825d0 | use 'workspace-cleanup' plugin in publisher |
++------------------------------------------+-----------------------------------------------------------------------+
+| 3aaed5cf40092744f1b87680b9205a2901baecf3 | clean the workspace in the publisher |
++------------------------------------------+-----------------------------------------------------------------------+
+| 50151eb3717edd4ddd996f3705fbe1732de7f3b7 | run tox with 'sudo' |
++------------------------------------------+-----------------------------------------------------------------------+
+| a3adc85ecb52f5d19ec4e9c49ca1ac35aa429ff9 | remove inspector variable form job template |
++------------------------------------------+-----------------------------------------------------------------------+
+| adfbaf2a3e8487e4c9152bf864a653a0425b8582 | run doctor tests with different inspectors in sequence |
++------------------------------------------+-----------------------------------------------------------------------+
+| 2e98e56224cd550cb3bf9798e420eece28139bd9 | add the ssh_key info if the key_file is exist |
++------------------------------------------+-----------------------------------------------------------------------+
+| c109c271018e9a85d94be1b9b468338d64589684 | prepare installer info for doctor test |
++------------------------------------------+-----------------------------------------------------------------------+
+| 57cbefc7160958eae1d49e4753779180a25864af | use py34 for tox |
++------------------------------------------+-----------------------------------------------------------------------+
+| 3547754e808a581b09c9d22e013a7d986d9f6cd1 | specify the cacert file when it exits |
++------------------------------------------+-----------------------------------------------------------------------+
+| ef4f36aa1c2ff0819d73cde44f84b99a42e15c7e | bugfix: wrong usage of '!include-raw' |
++------------------------------------------+-----------------------------------------------------------------------+
+| 0e0e0d4cb71fb27b1789a2bef2d3c4ff313e67ff | use tox instead of functest for doctor CI jobs |
++------------------------------------------+-----------------------------------------------------------------------+
+| 5b22f1b95feacaec0380f6a7543cbf510b628451 | pass value to parameters |
++------------------------------------------+-----------------------------------------------------------------------+
+| 44ab0cea07fa2a734c4f6b80776ad48fd006d1b8 | Doctor job bugfix: fix the scenario |
++------------------------------------------+-----------------------------------------------------------------------+
+| 17617f1c0a78c7bdad0d11d329a6c7e119cbbddd | bugfix: run doctor tests parallelly |
++------------------------------------------+-----------------------------------------------------------------------+
+| 811e4ef7f4c37b7bc246afc34ff880c014ecc05d | delete 'opnfv-build-ubuntu-defaults' parameters for doctor verify job |
++------------------------------------------+-----------------------------------------------------------------------+
+| 0705f31ab5bc54c073df120cbe0fe62cf10f9a81 | delete the 'node' parameter in 'doctor-slave-parameter' macro |
++------------------------------------------+-----------------------------------------------------------------------+
+| 304151b15f9d7241db8c5fea067cafe048287d84 | fix the default node label for doctor test |
++------------------------------------------+-----------------------------------------------------------------------+
+| a6963f92f015a33b44b27199886952205499b44c | Fix project name |
++------------------------------------------+-----------------------------------------------------------------------+
+| f122bfed998b3b0e0178106a7538377c609c6512 | add a default value for SSH_KEY |
++------------------------------------------+-----------------------------------------------------------------------+
+
+Version change
+^^^^^^^^^^^^^^
+
+Module version changes
+~~~~~~~~~~~~~~~~~~~~~~
+
+- OpenStack has changed from Pike-1 to Queens-1
+
+Document version changes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These documents have been updated in Gambia release
+
+- Testing document
+ docs/development/overview/testing.rst
+- Doctor scenario in functest
+ docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst
+- Maintenance design guideline
+ docs/development/design/maintenance-design-guideline.rst
+
+Reason for version
+^^^^^^^^^^^^^^^^^^
+
+Documentation is updated due to tox usage in testing and adding maintenance
+use case related documentation.
+
+Feature additions
+~~~~~~~~~~~~~~~~~
+
++--------------------+--------------------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN** |
++--------------------+--------------------------------------------------------+
+| DOCTOR-106 | Maintenance scenario |
++--------------------+--------------------------------------------------------+
+| DOCTOR-125 | Maintenance design document according to our test case |
++--------------------+--------------------------------------------------------+
+| DOCTOR-126 | Use Tox instead of Functest for doctor CI jobs |
++--------------------+--------------------------------------------------------+
+| DOCTOR-127 | Maintenance test POD |
++--------------------+--------------------------------------------------------+
+| DOCTOR-130 | Apex with containers |
++--------------------+--------------------------------------------------------+
+
+
+
+Deliverables
+------------
+
+
+Software deliverables
+=====================
+
+None
+
+Documentation deliverables
+==========================
+
+https://git.opnfv.org/doctor/tree/docs
+
+Known Limitations, Issues and Workarounds
+=========================================
+
+System Limitations
+^^^^^^^^^^^^^^^^^^
+
+Maintenance test case requirements:
+
+- Minimum number of nodes: 1 Controller, 3 Computes
+- Min number of VCPUs: 2 VCPUs for each compute
+
+Known issues
+^^^^^^^^^^^^
+
+None
+
+Workarounds
+^^^^^^^^^^^
+
+None
+
+Test Result
+===========
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Apex' | SUCCESS |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Compass' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Daisy' | SUCCESS |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Fuel' | No POD |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Joid' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Local' | N/A |
++--------------------------------------+--------------+
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=congress
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Apex' | FAILED |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Compass' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Daisy' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Fuel' | No POD |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Joid' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Local' | N/A |
++--------------------------------------+--------------+
+
+
+Doctor Functest results with TEST_CASE='fault_management'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Apex' | skipped |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Compass' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Daisy' | skipped |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Fuel' | skipped |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Joid' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Local' | N/A |
++--------------------------------------+--------------+
+
+Note: Installer Functest does not currently test features or skips running the
+project test cases
+
+Doctor CI results with TEST_CASE='maintenance'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Apex' | SUCCESS |
++--------------------------------------+--------------+
+
+Doctor Functest results with TEST_CASE='maintenance'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+N/A - Needs special target and currently there is only sample implementation
+
+References
+==========
+
+For more information about the OPNFV Doctor latest work, please see:
+
+https://wiki.opnfv.org/display/doctor/Doctor+Home
diff --git a/docs/release/release-notes/releasenotes_iruya.rst b/docs/release/release-notes/releasenotes_iruya.rst
new file mode 100644
index 00000000..92775557
--- /dev/null
+++ b/docs/release/release-notes/releasenotes_iruya.rst
@@ -0,0 +1,129 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+This document provides the release notes for Iruya version of Doctor.
+
+Important notes
+===============
+
+In Iruya release there has not been many changes.
+
+All testing is now being made with Fuel installer. Maintenance use case
+is now only tested against latest upstream Fenix. Only sample inspector is
+tested as Fuel do not support Vitrage or Congress.
+
+Summary
+=======
+
+Iruya Doctor framework uses OpenStack Stein integrated into its test cases.
+
+Release Data
+============
+
+Doctor changes
+
+- Maintenance use case updated to support latest version of Fenix running
+ in container on controller node
+- Maintenance use case now support Fuel installer
+- Doctor updated to use OpenStack Stein and only python 3.6
+- Testing only sample inspector as lacking installer support for
+ Vitrage and Congress
+
+Releng changes
+
+- Doctor testing running with python 3.6 and with sample inspector
+- Doctor is only tested with Fuel installer
+
+Version change
+^^^^^^^^^^^^^^
+
+Module version changes
+~~~~~~~~~~~~~~~~~~~~~~
+
+- OpenStack has changed from Rocky to Stein since previous Hunter release.
+
+Document version changes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+Reason for version
+^^^^^^^^^^^^^^^^^^
+
+N/A
+
+Feature additions
+~~~~~~~~~~~~~~~~~
+
++--------------------+--------------------------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN** |
++--------------------+--------------------------------------------------------------+
+| DOCTOR-134 | Update Doctor maintenance use case to work with latest Fenix |
++--------------------+--------------------------------------------------------------+
+
+Deliverables
+------------
+
+Software deliverables
+=====================
+
+None
+
+Documentation deliverables
+==========================
+
+https://git.opnfv.org/doctor/tree/docs
+
+Known Limitations, Issues and Workarounds
+=========================================
+
+System Limitations
+^^^^^^^^^^^^^^^^^^
+
+Maintenance test case requirements:
+
+- Minimum number of nodes: 1 Controller, 3 Computes
+- Min number of VCPUs: 2 VCPUs for each compute
+
+Known issues
+^^^^^^^^^^^^
+
+None
+
+Workarounds
+^^^^^^^^^^^
+
+None
+
+Test Result
+===========
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel' | SUCCESS |
++--------------------------------------+--------------+
+
+Doctor CI results with TEST_CASE='maintenance' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel' | SUCCESS |
+| ADMIN_TOOL_TYPE='fenix' *) | |
++--------------------------------------+--------------+
+
+*) Sample implementation not updated according to latest upstream Fenix
+ and is currently not being tested.
+
+References
+==========
+
+For more information about the OPNFV Doctor latest work, please see:
+
+https://wiki.opnfv.org/display/doctor/Doctor+Home
diff --git a/docs/release/scenarios/fault_management/fault_management.rst b/docs/release/scenarios/fault_management/fault_management.rst
new file mode 100644
index 00000000..99371201
--- /dev/null
+++ b/docs/release/scenarios/fault_management/fault_management.rst
@@ -0,0 +1,90 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+Running test cases
+""""""""""""""""""
+
+Functest will call the "doctor_tests/main.py" in Doctor to run the test job.
+Doctor testing can also be triggered by tox on OPNFV installer jumphost. Tox
+is normally used for functional, module and coding style testing in Python
+project.
+
+Currently 'MCP' and 'devstack' installer are supported.
+
+
+Fault management use case
+"""""""""""""""""""""""""
+
+* A consumer of the NFVI wants to receive immediate notifications about faults
+ in the NFVI affecting the proper functioning of the virtual resources.
+ Therefore, such faults have to be detected as quickly as possible, and, when
+ a critical error is observed, the affected consumer is immediately informed
+ about the fault and can switch over to the STBY configuration.
+
+The faults to be monitored (and at which detection rate) will be configured by
+the consumer. Once a fault is detected, the Inspector in the Doctor
+architecture will check the resource map maintained by the Controller, to find
+out which virtual resources are affected and then update the resources state.
+The Notifier will receive the failure event requests sent from the Controller,
+and notify the consumer(s) of the affected resources according to the alarm
+configuration.
+
+Detailed workflow information is as follows:
+
+* Consumer(VNFM): (step 0) creates resources (network, server/instance) and an
+ event alarm on state down notification of that server/instance or Neutron
+ port.
+
+* Monitor: (step 1) periodically checks nodes, such as ping from/to each
+ dplane nic to/from gw of node, (step 2) once it fails to send out event
+ with "raw" fault event information to Inspector
+
+* Inspector: when it receives an event, it will (step 3) mark the host down
+ ("mark-host-down"), (step 4) map the PM to VM, and change the VM status to
+ down. In network failure case, also Neutron port is changed to down.
+
+* Controller: (step 5) sends out instance update event to Ceilometer. In network
+ failure case, also Neutron port is changed to down and corresponding event is
+ sent to Ceilometer.
+
+* Notifier: (step 6) Ceilometer transforms and passes the events to AODH,
+ (step 7) AODH will evaluate events with the registered alarm definitions,
+ then (step 8) it will fire the alarm to the "consumer" who owns the
+ instance
+
+* Consumer(VNFM): (step 9) receives the event and (step 10) recreates a new
+ instance
+
+Fault management test case
+""""""""""""""""""""""""""
+
+Functest will call the 'doctor-test' command in Doctor to run the test job.
+
+The following steps are executed:
+
+Firstly, get the installer ip according to the installer type. Then ssh to
+the installer node to get the private key for accessing to the cloud. As
+'fuel' installer, ssh to the controller node to modify nova and ceilometer
+configurations.
+
+Secondly, prepare image for booting VM, then create a test project and test
+user (both default to doctor) for the Doctor tests.
+
+Thirdly, boot a VM under the doctor project and check the VM status to verify
+that the VM is launched completely. Then get the compute host info where the VM
+is launched to verify connectivity to the target compute host. Get the consumer
+ip according to the route to compute ip and create an alarm event in Ceilometer
+using the consumer ip.
+
+Fourthly, the Doctor components are started, and, based on the above preparation,
+a failure is injected to the system, i.e. the network of compute host is
+disabled for 3 minutes. To ensure the host is down, the status of the host
+will be checked.
+
+Finally, the notification time, i.e. the time between the execution of step 2
+(Monitor detects failure) and step 9 (Consumer receives failure notification)
+is calculated.
+
+According to the Doctor requirements, the Doctor test is successful if the
+notification time is below 1 second.
diff --git a/docs/development/overview/functest_scenario/images/Fault-management-design.png b/docs/release/scenarios/maintenance/images/Fault-management-design.png
index 6d98cdec..6d98cdec 100644
--- a/docs/development/overview/functest_scenario/images/Fault-management-design.png
+++ b/docs/release/scenarios/maintenance/images/Fault-management-design.png
Binary files differ
diff --git a/docs/development/overview/functest_scenario/images/LICENSE b/docs/release/scenarios/maintenance/images/LICENSE
index 21a2d03d..21a2d03d 100644
--- a/docs/development/overview/functest_scenario/images/LICENSE
+++ b/docs/release/scenarios/maintenance/images/LICENSE
diff --git a/docs/development/overview/functest_scenario/images/Maintenance-design.png b/docs/release/scenarios/maintenance/images/Maintenance-design.png
index 8f21db6a..8f21db6a 100644
--- a/docs/development/overview/functest_scenario/images/Maintenance-design.png
+++ b/docs/release/scenarios/maintenance/images/Maintenance-design.png
Binary files differ
diff --git a/docs/development/overview/functest_scenario/images/Maintenance-workflow.png b/docs/release/scenarios/maintenance/images/Maintenance-workflow.png
index 9b65fd59..9b65fd59 100644
--- a/docs/development/overview/functest_scenario/images/Maintenance-workflow.png
+++ b/docs/release/scenarios/maintenance/images/Maintenance-workflow.png
Binary files differ
diff --git a/docs/release/scenarios/maintenance/maintenance.rst b/docs/release/scenarios/maintenance/maintenance.rst
new file mode 100644
index 00000000..ecfe76b1
--- /dev/null
+++ b/docs/release/scenarios/maintenance/maintenance.rst
@@ -0,0 +1,120 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+Maintenance use case
+""""""""""""""""""""
+
+* A consumer of the NFVI wants to interact with NFVI maintenance, upgrade,
+ scaling and to have graceful retirement. Receiving notifications over these
+ NFVI events and responding to those within given time window, consumer can
+ guarantee zero downtime to his service.
+
+The maintenance use case adds the Doctor platform an `admin tool` and an
+`app manager` component. Overview of maintenance components can be seen in
+:numref:`figure-p2`.
+
+.. figure:: ./images/Maintenance-design.png
+ :name: figure-p2
+ :width: 100%
+
+ Doctor platform components in maintenance use case
+
+In maintenance use case, `app manager` (VNFM) will subscribe to maintenance
+notifications triggered by project specific alarms through AODH. This is the way
+it gets to know different NFVI maintenance, upgrade and scaling operations that
+effect to its instances. The `app manager` can do actions depicted in `green
+color` or tell `admin tool` to do admin actions depicted in `orange color`
+
+Any infrastructure component like `Inspector` can subscribe to maintenance
+notifications triggered by host specific alarms through AODH. Subscribing to the
+notifications needs admin privileges and can tell when a host is out of use as
+in maintenance and when it is taken back to production.
+
+Maintenance test case
+"""""""""""""""""""""
+
+Maintenance test case is currently running in our Apex CI and executed by tox.
+This is because the special limitation mentioned below and also the fact we
+currently have only sample implementation as a proof of concept and we also
+support unofficial OpenStack project Fenix. Environment variable
+TEST_CASE='maintenance' needs to be used when executing "doctor_tests/main.py"
+and ADMIN_TOOL_TYPE='fenix' if want to test with Fenix instead of sample
+implementation. Test case workflow can be seen in :numref:`figure-p3`.
+
+.. figure:: ./images/Maintenance-workflow.png
+ :name: figure-p3
+ :width: 100%
+
+ Maintenance test case workflow
+
+In test case all compute capacity will be consumed with project (VNF) instances.
+For redundant services on instances and an empty compute needed for maintenance,
+test case will need at least 3 compute nodes in system. There will be 2
+instances on each compute, so minimum number of VCPUs is also 2. Depending on
+how many compute nodes there is application will always have 2 redundant
+instances (ACT-STDBY) on different compute nodes and rest of the compute
+capacity will be filled with non-redundant instances.
+
+For each project specific maintenance message there is a time window for
+`app manager` to make any needed action. This will guarantee zero
+down time for his service. All replies back are done by calling `admin tool` API
+given in the message.
+
+The following steps are executed:
+
+Infrastructure admin will call `admin tool` API to trigger maintenance for
+compute hosts having instances belonging to a VNF.
+
+Project specific `MAINTENANCE` notification is triggered to tell `app manager`
+that his instances are going to hit by infrastructure maintenance at a specific
+point in time. `app manager` will call `admin tool` API to answer back
+`ACK_MAINTENANCE`.
+
+When the time comes to start the actual maintenance workflow in `admin tool`,
+a `DOWN_SCALE` notification is triggered as there is no empty compute node for
+maintenance (or compute upgrade). Project receives corresponding alarm and scales
+down instances and call `admin tool` API to answer back `ACK_DOWN_SCALE`.
+
+As it might happen instances are not scaled down (removed) from a single
+compute node, `admin tool` might need to figure out what compute node should be
+made empty first and send `PREPARE_MAINTENANCE` to project telling which instance
+needs to be migrated to have the needed empty compute. `app manager` makes sure
+he is ready to migrate instance and call `admin tool` API to answer back
+`ACK_PREPARE_MAINTENANCE`. `admin tool` will make the migration and answer
+`ADMIN_ACTION_DONE`, so `app manager` knows instance can be again used.
+
+:numref:`figure-p3` has next a light blue section of actions to be done for each
+compute. However as we now have one empty compute, we will maintain/upgrade that
+first. So on first round, we can straight put compute in maintenance and send
+admin level host specific `IN_MAINTENANCE` message. This is caught by `Inspector`
+to know host is down for maintenance. `Inspector` can now disable any automatic
+fault management actions for the host as it can be down for a purpose. After
+`admin tool` has completed maintenance/upgrade `MAINTENANCE_COMPLETE` message
+is sent to tell host is back in production.
+
+Next rounds we always have instances on compute, so we need to have
+`PLANNED_MAINTANANCE` message to tell that those instances are now going to hit
+by maintenance. When `app manager` now receives this message, he knows instances
+to be moved away from compute will now move to already maintained/upgraded host.
+In test case no upgrade is done on application side to upgrade instances
+according to new infrastructure capabilities, but this could be done here as
+this information is also passed in the message. This might be just upgrading
+some RPMs, but also totally re-instantiating instance with a new flavor. Now if
+application runs an active side of a redundant instance on this compute,
+a switch over will be done. After `app manager` is ready he will call
+`admin tool` API to answer back `ACK_PLANNED_MAINTENANCE`. In test case the
+answer is `migrate`, so `admin tool` will migrate instances and reply
+`ADMIN_ACTION_DONE` and then `app manager` knows instances can be again used.
+Then we are ready to make the actual maintenance as previously trough
+`IN_MAINTENANCE` and `MAINTENANCE_COMPLETE` steps.
+
+After all computes are maintained, `admin tool` can send `MAINTENANCE_COMPLETE`
+to tell maintenance/upgrade is now complete. For `app manager` this means he
+can scale back to full capacity.
+
+There is currently sample implementation on VNFM and test case. In
+infrastructure side there is sample implementation of 'admin_tool' and
+there is also support for the OpenStack Fenix that extends the use case to
+support 'ETSI FEAT03' for VNFM interaction and to optimize the whole
+infrastructure mainteannce and upgrade.
diff --git a/docs/development/manuals/get-valid-server-state.rst b/docs/release/userguide/get-valid-server-state.rst
index 824ea3c2..824ea3c2 100644
--- a/docs/development/manuals/get-valid-server-state.rst
+++ b/docs/release/userguide/get-valid-server-state.rst
diff --git a/docs/release/userguide/index.rst b/docs/release/userguide/index.rst
index eee855dc..577072c7 100644
--- a/docs/release/userguide/index.rst
+++ b/docs/release/userguide/index.rst
@@ -11,3 +11,6 @@ Doctor User Guide
:maxdepth: 2
feature.userguide.rst
+ get-valid-server-state.rst
+ mark-host-down_manual.rst
+ monitors.rst
diff --git a/docs/development/manuals/mark-host-down_manual.rst b/docs/release/userguide/mark-host-down_manual.rst
index 3815205d..3815205d 100644
--- a/docs/development/manuals/mark-host-down_manual.rst
+++ b/docs/release/userguide/mark-host-down_manual.rst
diff --git a/docs/development/manuals/monitors.rst b/docs/release/userguide/monitors.rst
index eeb5e226..eeb5e226 100644
--- a/docs/development/manuals/monitors.rst
+++ b/docs/release/userguide/monitors.rst
diff --git a/docs/testing/developer/index.rst b/docs/testing/developer/index.rst
new file mode 100644
index 00000000..dfbcfa74
--- /dev/null
+++ b/docs/testing/developer/index.rst
@@ -0,0 +1,13 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+*********
+Developer
+*********
+
+.. toctree::
+ :numbered:
+ :maxdepth: 2
+
+ testing.rst
diff --git a/docs/development/overview/testing.rst b/docs/testing/developer/testing.rst
index a4b9ffa6..6a929130 100644
--- a/docs/development/overview/testing.rst
+++ b/docs/testing/developer/testing.rst
@@ -41,6 +41,16 @@ export TEST_CASE with different values:
#Run both tests cases
export TEST_CASE='all'
+ #Use Fenix in maintenance testing instead of sample admin_tool
+ #This is only for 'mainteanance' test case
+ export ADMIN_TOOL_TYPE='fenix'
+ export APP_MANAGER_TYPE='vnfm'
+
+ #Run in different installer jumphost 'fuel' or 'apex'
+ #In multinode DevStack you run Doctor in controller node
+ #with value export APP_MANAGER_TYPE=vnfm
+ export INSTALLER_TYPE='fuel'
+
Run Python Test Script
~~~~~~~~~~~~~~~~~~~~~~
@@ -57,42 +67,16 @@ environment and then run the test.
.. _doctor.sample.conf: https://git.opnfv.org/doctor/tree/etc/doctor.sample.conf
-In OPNFV Apex jumphost you can run Doctor testing as follows using tox:
+In OPNFV testing environment jumphost you can run Doctor testing as follows
+using tox:
.. code-block:: bash
- #Before Gambia: overcloudrc.v3
source overcloudrc
export INSTALLER_IP=${INSTALLER_IP}
export INSTALLER_TYPE=${INSTALLER_TYPE}
git clone https://gerrit.opnfv.org/gerrit/doctor
cd doctor
sudo -E tox
-
-Run Functest Suite
-==================
-
-Functest supports Doctor testing by triggering the test script above in a
-Functest container. You can run the Doctor test with the following steps:
-
-.. code-block:: bash
-
- DOCKER_TAG=latest
- docker pull docker.io/opnfv/functest-features:${DOCKER_TAG}
- docker run --privileged=true -id \
- -e INSTALLER_TYPE=${INSTALLER_TYPE} \
- -e INSTALLER_IP=${INSTALLER_IP} \
- -e INSPECTOR_TYPE=sample \
- docker.io/opnfv/functest-features:${DOCKER_TAG} /bin/bash
- docker exec <container_id> functest testcase run doctor-notification
-
-See `Functest Userguide`_ for more information.
-
-.. _Functest Userguide: :doc:`<functest:testing/user/userguide>`
-
-
-For testing with stable version, change DOCKER_TAG to 'stable' or other release
-tag identifier.
-
-Tips
-====
+
+Note! In DevStack you run Doctor in controller node.
diff --git a/docs/testing/index.rst b/docs/testing/index.rst
new file mode 100644
index 00000000..3fae9568
--- /dev/null
+++ b/docs/testing/index.rst
@@ -0,0 +1,15 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+.. _testing:
+
+=======
+Testing
+=======
+
+.. toctree::
+ :maxdepth: 2
+
+ ./developer/index.rst
+ ./user/index.rst
diff --git a/docs/testing/user/index.rst b/docs/testing/user/index.rst
new file mode 100644
index 00000000..1be9c7eb
--- /dev/null
+++ b/docs/testing/user/index.rst
@@ -0,0 +1,13 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+****
+User
+****
+
+.. toctree::
+ :numbered:
+ :maxdepth: 2
+
+ testing.rst
diff --git a/docs/testing/user/testing.rst b/docs/testing/user/testing.rst
new file mode 100644
index 00000000..6172d26a
--- /dev/null
+++ b/docs/testing/user/testing.rst
@@ -0,0 +1,30 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Run Functest Suite (obsolete)
+=============================
+
+Functest supports Doctor testing by triggering the test script above in a
+Functest container. You can run the Doctor test with the following steps:
+
+.. code-block:: bash
+
+ DOCKER_TAG=latest
+ docker pull docker.io/opnfv/functest-features:${DOCKER_TAG}
+ docker run --privileged=true -id \
+ -e INSTALLER_TYPE=${INSTALLER_TYPE} \
+ -e INSTALLER_IP=${INSTALLER_IP} \
+ -e INSPECTOR_TYPE=sample \
+ docker.io/opnfv/functest-features:${DOCKER_TAG} /bin/bash
+ docker exec <container_id> functest testcase run doctor-notification
+
+See `Functest Userguide`_ for more information.
+
+.. _Functest Userguide: :doc:`<functest:testing/user/userguide>`
+
+
+For testing with stable version, change DOCKER_TAG to 'stable' or other release
+tag identifier.
+
+Tips
+====
diff --git a/doctor_tests/admin_tool/__init__.py b/doctor_tests/admin_tool/__init__.py
index e8b12817..3417a334 100644
--- a/doctor_tests/admin_tool/__init__.py
+++ b/doctor_tests/admin_tool/__init__.py
@@ -8,16 +8,16 @@
##############################################################################
from oslo_config import cfg
from oslo_utils import importutils
-
+import os
OPTS = [
cfg.StrOpt('type',
- default='sample',
- choices=['sample'],
+ default=os.environ.get('ADMIN_TOOL_TYPE', 'sample'),
+ choices=['sample', 'fenix'],
help='the component of doctor admin_tool',
required=True),
cfg.StrOpt('ip',
- default='127.0.0.1',
+ default='0.0.0.0',
help='the ip of admin_tool',
required=True),
cfg.IntOpt('port',
diff --git a/doctor_tests/admin_tool/fenix/Dockerfile b/doctor_tests/admin_tool/fenix/Dockerfile
new file mode 100644
index 00000000..202380eb
--- /dev/null
+++ b/doctor_tests/admin_tool/fenix/Dockerfile
@@ -0,0 +1,34 @@
+FROM gliderlabs/alpine:3.6
+
+ARG BRANCH=master
+ARG OPENSTACK=master
+
+EXPOSE 12347
+
+RUN echo "Building Fenix container against OpenStack $OPENSTACK" && \
+ echo "Building Fenix with $BRANCH" && \
+ mkdir /etc/fenix && \
+ mkdir -p /var/tmp/fenix
+WORKDIR /var/tmp/fenix
+COPY fenix*.conf /etc/fenix/
+
+RUN apk --no-cache add ca-certificates && \
+ apk --no-cache add --update python3 sshpass py-pip git curl && \
+ apk --no-cache add --virtual .build-deps --update \
+ python3-dev build-base linux-headers libffi-dev \
+ openssl-dev libjpeg-turbo-dev && \
+ curl https://opendev.org/openstack/requirements/raw/branch/$OPENSTACK/upper-constraints.txt > upper-constraints.txt && \
+ if [ ! -e /usr/bin/pip ]; then ln -s pip3 /usr/bin/pip ; fi && \
+ if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python3 /usr/bin/python; fi && \
+ pip3 install --upgrade pip && \
+ pip3 install alembic aodhclient decorator flask Flask-RESTful eventlet jsonschema \
+ keystoneauth1 keystonemiddleware python-novaclient oslo.config pecan \
+ oslo.db oslo.log oslo.messaging oslo.serialization oslo.service oslo_policy \
+ oslotest oslo.utils pbr pymysql six sqlalchemy -cupper-constraints.txt && \
+ git clone https://opendev.org/x/fenix -b $BRANCH /fenix && \
+ rm -fr /var/tmp/fenix
+COPY run /fenix
+COPY keystonercv3 /fenix
+WORKDIR /fenix
+RUN python3 setup.py install
+CMD ./run
diff --git a/doctor_tests/admin_tool/fenix/run b/doctor_tests/admin_tool/fenix/run
new file mode 100755
index 00000000..50ae68e7
--- /dev/null
+++ b/doctor_tests/admin_tool/fenix/run
@@ -0,0 +1,32 @@
+#!/bin/sh
+. keystonercv3
+
+# Start the first process
+nohup python3 /fenix/fenix/cmd/engine.py > /var/log/fenix-engine.log&
+status=$?
+if [ $status -ne 0 ]; then
+ echo "Failed to start engine.py: $status"
+ exit $status
+fi
+
+# Start the second process
+nohup python3 /fenix/fenix/cmd/api.py > /var/log/fenix-api.log&
+status=$?
+if [ $status -ne 0 ]; then
+ echo "Failed to start api.py: $status"
+ exit $status
+fi
+
+echo "started Fenix: engine and api"
+while sleep 60; do
+ ps aux |grep "cmd/engine.py" |grep -q -v grep
+ PROCESS_1_STATUS=$?
+ ps aux |grep "cmd/api.py" |grep -q -v grep
+ PROCESS_2_STATUS=$?
+ # If the greps above find anything, they exit with 0 status
+ # If they are not both 0, then something is wrong
+ if [ $PROCESS_1_STATUS -ne 0 -o $PROCESS_2_STATUS -ne 0 ]; then
+ echo "One of the processes has already exited."
+ exit 1
+ fi
+done
diff --git a/doctor_tests/admin_tool/sample.py b/doctor_tests/admin_tool/sample.py
index 892a4c83..a71f43a1 100644
--- a/doctor_tests/admin_tool/sample.py
+++ b/doctor_tests/admin_tool/sample.py
@@ -59,7 +59,7 @@ class AdminMain(Thread):
self.parent = parent
self.log = log
self.conf = conf
- self.url = 'http://0.0.0.0:%s' % conf.admin_tool.port
+ self.url = 'http://%s:%s' % (conf.admin_tool.ip, conf.admin_tool.port)
self.projects_state = dict() # current state for each project
self.proj_server_actions = dict() # actions for each project server
self.projects_servers = dict() # servers processed in current state
@@ -86,6 +86,7 @@ class AdminMain(Thread):
driver='messaging',
topics=['notifications'])
self.notif_admin = self.notif_admin.prepare(publisher_id='admin_tool')
+ self.stopped = False
self.log.info('Admin tool session %s initialized' % self.session_id)
def cleanup(self):
@@ -116,14 +117,15 @@ class AdminMain(Thread):
if self._projects_not_in_wanted_states(wanted_states):
self.log.error('Admin tool session %s: projects in invalid states '
'%s' % (self.session_id, self.projects_state))
- raise Exception('Admin tool session %s: not all projects in states'
- ' %s' % (self.session_id, wanted_states))
+ return False
else:
self.log.info('all projects replied')
+ return True
def _project_notify(self, project_id, instance_ids, allowed_actions,
actions_at, state, metadata):
- reply_url = '%s/%s/maintenance' % (self.url, project_id)
+ reply_url = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project_id)
payload = dict(project_id=project_id,
instance_ids=instance_ids,
@@ -148,11 +150,12 @@ class AdminMain(Thread):
self.notif_admin.info({'some': 'context'}, 'maintenance.host', payload)
- def down_scale(self):
+ def in_scale(self):
for project in self.projects_servers:
- self.log.info('DOWN_SCALE to project %s' % project)
+ self.log.info('SCALE_IN to project %s' % project)
self.log.debug('instance_ids %s' % self.projects_servers[project])
- instance_ids = '%s/%s/maintenance' % (self.url, project)
+ instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project)
allowed_actions = []
wait_seconds = 120
actions_at = (datetime.datetime.utcnow() +
@@ -163,18 +166,20 @@ class AdminMain(Thread):
self._project_notify(project, instance_ids,
allowed_actions, actions_at, state,
metadata)
- allowed_states = ['ACK_DOWN_SCALE', 'NACK_DOWN_SCALE']
- self.wait_projects_state(allowed_states, wait_seconds)
- if self.projects_not_in_state('ACK_DOWN_SCALE'):
- raise Exception('Admin tool session %s: all states not '
- 'ACK_DOWN_SCALE %s' %
- (self.session_id, self.projects_state))
+ allowed_states = ['ACK_SCALE_IN', 'NACK_SCALE_IN']
+ if not self.wait_projects_state(allowed_states, wait_seconds):
+ self.state = 'MAINTENANCE_FAILED'
+ if self.projects_not_in_state('ACK_SCALE_IN'):
+ self.log.error('%s: all states not ACK_SCALE_IN' %
+ self.session_id)
+ self.state = 'MAINTENANCE_FAILED'
def maintenance(self):
for project in self.projects_servers:
self.log.info('\nMAINTENANCE to project %s\n' % project)
self.log.debug('instance_ids %s' % self.projects_servers[project])
- instance_ids = '%s/%s/maintenance' % (self.url, project)
+ instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project)
allowed_actions = []
actions_at = self.maintenance_at
state = self.state
@@ -190,16 +195,18 @@ class AdminMain(Thread):
allowed_actions, actions_at, state,
metadata)
allowed_states = ['ACK_MAINTENANCE', 'NACK_MAINTENANCE']
- self.wait_projects_state(allowed_states, wait_seconds)
+ if not self.wait_projects_state(allowed_states, wait_seconds):
+ self.state = 'MAINTENANCE_FAILED'
if self.projects_not_in_state('ACK_MAINTENANCE'):
- raise Exception('Admin tool session %s: all states not '
- 'ACK_MAINTENANCE %s' %
- (self.session_id, self.projects_state))
+ self.log.error('%s: all states not ACK_MAINTENANCE' %
+ self.session_id)
+ self.state = 'MAINTENANCE_FAILED'
def maintenance_complete(self):
for project in self.projects_servers:
self.log.info('MAINTENANCE_COMPLETE to project %s' % project)
- instance_ids = '%s/%s/maintenance' % (self.url, project)
+ instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project)
allowed_actions = []
wait_seconds = 120
actions_at = (datetime.datetime.utcnow() +
@@ -212,13 +219,14 @@ class AdminMain(Thread):
metadata)
allowed_states = ['ACK_MAINTENANCE_COMPLETE',
'NACK_MAINTENANCE_COMPLETE']
- self.wait_projects_state(allowed_states, wait_seconds)
+ if not self.wait_projects_state(allowed_states, wait_seconds):
+ self.state = 'MAINTENANCE_FAILED'
if self.projects_not_in_state('ACK_MAINTENANCE_COMPLETE'):
- raise Exception('Admin tool session %s: all states not '
- 'ACK_MAINTENANCE_COMPLETE %s' %
- (self.session_id, self.projects_state))
+ self.log.error('%s: all states not ACK_MAINTENANCE_COMPLETE' %
+ self.session_id)
+ self.state = 'MAINTENANCE_FAILED'
- def need_down_scale(self, host_servers):
+ def need_in_scale(self, host_servers):
room_for_instances = 0
for host in host_servers:
instances = 0
@@ -267,7 +275,8 @@ class AdminMain(Thread):
self.projects_servers[project] = projects_servers[project].copy()
self.log.info('%s to project %s' % (state, project))
self.project_servers_log_info(project, projects_servers)
- instance_ids = '%s/%s/maintenance' % (self.url, project)
+ instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project)
allowed_actions = ['MIGRATE', 'LIVE_MIGRATE', 'OWN_ACTION']
wait_seconds = 120
actions_at = (datetime.datetime.utcnow() +
@@ -278,11 +287,14 @@ class AdminMain(Thread):
allowed_actions, actions_at, state,
metadata)
allowed_states = [state_ack, state_nack]
- self.wait_projects_state(allowed_states, wait_seconds)
- if self.projects_not_in_state(state_ack):
- raise Exception('Admin tool session %s: all states not %s %s' %
- (self.session_id, state_ack, self.projects_state))
- self.actions_to_have_empty_host(host)
+ if not self.wait_projects_state(allowed_states, wait_seconds):
+ self.state = 'MAINTENANCE_FAILED'
+ elif self.projects_not_in_state(state_ack):
+ self.log.error('%s: all states not %s' %
+ (self.session_id, state_ack))
+ self.state = 'MAINTENANCE_FAILED'
+ else:
+ self.actions_to_have_empty_host(host)
def notify_action_done(self, project, instance_id):
instance_ids = instance_id
@@ -463,7 +475,8 @@ class AdminMain(Thread):
time.sleep(5)
def run(self):
- while self.state != 'MAINTENANCE_COMPLETE':
+ while (self.state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
+ not self.stopped):
self.log.info('--==session %s: processing state %s==--' %
(self.session_id, self.state))
if self.state == 'MAINTENANCE':
@@ -474,7 +487,8 @@ class AdminMain(Thread):
raise Exception('all projects do not listen maintenance '
'alarm')
self.maintenance()
-
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
maint_at = self.str_to_datetime(self.maintenance_at)
if maint_at > datetime.datetime.utcnow():
time_now = (datetime.datetime.utcnow().strftime(
@@ -492,14 +506,14 @@ class AdminMain(Thread):
# True -> PLANNED_MAINTENANCE
# False -> check if we can migrate VMs to get empty host
# True -> PREPARE_MAINTENANCE
- # False -> DOWN_SCALE
+ # False -> SCALE_IN
maintenance_empty_hosts = ([h for h in self.hosts if h not in
host_servers])
if len(maintenance_empty_hosts) == 0:
- if self.need_down_scale(host_servers):
+ if self.need_in_scale(host_servers):
self.log.info('Need to down scale')
- self.state = 'DOWN_SCALE'
+ self.state = 'SCALE_IN'
else:
self.log.info('Free capacity, but need empty host')
self.state = 'PREPARE_MAINTENANCE'
@@ -508,14 +522,17 @@ class AdminMain(Thread):
self.state = 'PLANNED_MAINTENANCE'
self.log.info('--==State change from MAINTENANCE to %s==--'
% self.state)
- elif self.state == 'DOWN_SCALE':
+ elif self.state == 'SCALE_IN':
# Test case is hard coded to have all compute capacity used
# We need to down scale to have one empty compute host
- self.down_scale()
+ self.update_server_info()
+ self.in_scale()
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
self.state = 'PREPARE_MAINTENANCE'
host_servers = self.update_server_info()
self.servers_log_info(host_servers)
- self.log.info('--==State change from DOWN_SCALE to'
+ self.log.info('--==State change from SCALE_IN to'
' %s==--' % self.state)
elif self.state == 'PREPARE_MAINTENANCE':
@@ -527,7 +544,7 @@ class AdminMain(Thread):
host_servers])
if len(maintenance_empty_hosts) == 0:
self.log.info('no empty hosts for maintenance')
- if self.need_down_scale(host_servers):
+ if self.need_in_scale(host_servers):
raise Exception('Admin tool session %s: Not enough '
'free capacity for maintenance' %
self.session_id)
@@ -535,6 +552,8 @@ class AdminMain(Thread):
if host:
self.make_compute_host_empty(host, host_servers[host],
'PREPARE_MAINTENANCE')
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
else:
# We do not currently support another down scale if
# first was not enough
@@ -566,6 +585,7 @@ class AdminMain(Thread):
maintenance_empty_hosts.append(host)
self.log.info('--==Start to maintain empty hosts==--\n%s' %
maintenance_empty_hosts)
+ self.update_server_info()
for host in maintenance_empty_hosts:
# scheduler has problems, let's see if just down scaled
# host is really empty
@@ -586,6 +606,8 @@ class AdminMain(Thread):
self.log.info('PLANNED_MAINTENANCE host %s' % host)
self.make_compute_host_empty(host, host_servers[host],
'PLANNED_MAINTENANCE')
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
self.log.info('IN_MAINTENANCE host %s' % host)
self._admin_notify(admin_project, host, 'IN_MAINTENANCE',
self.session_id)
@@ -603,14 +625,16 @@ class AdminMain(Thread):
self.log.info('Projects still need to up scale back to full '
'capcity')
self.maintenance_complete()
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
host_servers = self.update_server_info()
self.servers_log_info(host_servers)
- self.state = 'MAINTENANCE_COMPLETE'
+ self.state = 'MAINTENANCE_DONE'
else:
raise Exception('Admin tool session %s: session in invalid '
'state %s' % (self.session_id, self.state))
- self.log.info('--==Maintenance session %s: '
- 'MAINTENANCE SESSION COMPLETE==--' % self.session_id)
+ self.log.info('--==Maintenance session %s: %s==--' %
+ (self.session_id, self.state))
def project_input(self, project_id, data):
self.log.debug('Admin tool session %s: project %s input' %
@@ -637,7 +661,6 @@ class AdminTool(Thread):
self.admin_tool = admin_tool
self.log = log
self.conf = conf
- self.port = self.conf.admin_tool.port
self.maint_sessions = {}
self.projects = {}
self.maintenance_hosts = []
@@ -650,63 +673,55 @@ class AdminTool(Thread):
def admin_maintenance_api_post():
data = json.loads(request.data.decode('utf8'))
self.log.info('maintenance message: %s' % data)
- if 'session_id' in data:
- if data['state'] == 'REMOVE_MAINTENANCE_SESSION':
- session_id = data['session_id']
- self.log.info('remove session %s'
- % session_id)
- self.maint_sessions[session_id].cleanup()
- self.maint_sessions[session_id].stop()
- del self.maint_sessions[session_id]
- else:
- session_id = str(generate_uuid())
- self.log.info('creating session: %s' % session_id)
- self.maint_sessions[session_id] = (
- AdminMain(self.trasport_url,
- session_id,
- data,
- self,
- self.conf,
- self.log))
- self.maint_sessions[session_id].start()
+ session_id = str(generate_uuid())
+ self.log.info('creating session: %s' % session_id)
+ self.maint_sessions[session_id] = (
+ AdminMain(self.trasport_url,
+ session_id,
+ data,
+ self,
+ self.conf,
+ self.log))
+ self.maint_sessions[session_id].start()
reply = json.dumps({'session_id': session_id,
'state': 'ACK_%s' % data['state']})
self.log.debug('reply: %s' % reply)
return reply, 200, None
- @app.route('/maintenance', methods=['GET'])
- def admin_maintenance_api_get():
- data = json.loads(request.data.decode('utf8'))
- self.log.debug('Admin get maintenance: %s' % data)
- session_id = data['session_id']
+ @app.route('/maintenance/<session_id>', methods=['GET'])
+ def admin_maintenance_api_get(session_id=None):
+ self.log.debug('Admin get maintenance')
reply = json.dumps({'state':
self.maint_sessions[session_id].state})
- self.log.debug('reply: %s' % reply)
+ self.log.info('reply: %s' % reply)
return reply, 200, None
- @app.route('/<projet_id>/maintenance', methods=['PUT'])
- def project_maintenance_api_put(projet_id=None):
+ @app.route('/maintenance/<session_id>/<projet_id>', methods=['PUT'])
+ def project_maintenance_api_put(session_id=None, projet_id=None):
data = json.loads(request.data.decode('utf8'))
self.log.debug('%s project put: %s' % (projet_id, data))
- self.project_input(projet_id, data)
+ self.project_input(session_id, projet_id, data)
return 'OK'
- @app.route('/<projet_id>/maintenance', methods=['GET'])
- def project_maintenance_api_get(projet_id=None):
- data = json.loads(request.data.decode('utf8'))
- self.log.debug('%s project get %s' % (projet_id, data))
- instances = self.project_get_instances(projet_id, data)
+ @app.route('/maintenance/<session_id>/<projet_id>', methods=['GET'])
+ def project_maintenance_api_get(session_id=None, projet_id=None):
+ self.log.debug('%s project get %s' % (projet_id, session_id))
+ instances = self.project_get_instances(session_id, projet_id)
reply = json.dumps({'instance_ids': instances})
self.log.debug('%s reply: %s' % (projet_id, reply))
return reply, 200, None
+ @app.route('/maintenance/<session_id>', methods=['DELETE'])
+ def remove_session(session_id=None):
+ self.log.info('remove session %s'
+ % session_id)
+ self.maint_sessions[session_id].cleanup()
+ self.maint_sessions[session_id].stop()
+ del self.maint_sessions[session_id]
+ return 'OK'
+
@app.route('/shutdown', methods=['POST'])
def shutdown():
- for session in self.maint_sessions:
- self.log.info('shutdown admin tool session %s thread' %
- session)
- self.maint_sessions[session].cleanup()
- self.maint_sessions[session].stop()
self.log.info('shutdown admin_tool server at %s' % time.time())
func = request.environ.get('werkzeug.server.shutdown')
if func is None:
@@ -714,13 +729,11 @@ class AdminTool(Thread):
func()
return 'admin_tool app shutting down...'
- app.run(host='0.0.0.0', port=self.port)
+ app.run(host=self.conf.admin_tool.ip, port=self.conf.admin_tool.port)
- def project_input(self, project_id, data):
- session_id = data['session_id']
+ def project_input(self, session_id, project_id, data):
self.maint_sessions[session_id].project_input(project_id, data)
- def project_get_instances(self, project_id, data):
- session_id = data['session_id']
+ def project_get_instances(self, session_id, project_id):
return self.maint_sessions[session_id].project_get_instances(
project_id)
diff --git a/doctor_tests/app_manager/__init__.py b/doctor_tests/app_manager/__init__.py
index 717d6587..c2f75918 100644
--- a/doctor_tests/app_manager/__init__.py
+++ b/doctor_tests/app_manager/__init__.py
@@ -8,12 +8,13 @@
##############################################################################
from oslo_config import cfg
from oslo_utils import importutils
+import os
OPTS = [
cfg.StrOpt('type',
- default='sample',
- choices=['sample'],
+ default=os.environ.get('APP_MANAGER_TYPE', 'sample'),
+ choices=['sample', 'vnfm'],
help='the component of doctor app manager',
required=True),
cfg.StrOpt('ip',
@@ -28,7 +29,8 @@ OPTS = [
_app_manager_name_class_mapping = {
- 'sample': 'doctor_tests.app_manager.sample.SampleAppManager'
+ 'sample': 'doctor_tests.app_manager.sample.SampleAppManager',
+ 'vnfm': 'doctor_tests.app_manager.vnfm.VNFM',
}
diff --git a/doctor_tests/app_manager/sample.py b/doctor_tests/app_manager/sample.py
index 94926ee2..7ca35b97 100644
--- a/doctor_tests/app_manager/sample.py
+++ b/doctor_tests/app_manager/sample.py
@@ -17,6 +17,7 @@ import requests
from doctor_tests.app_manager.base import BaseAppManager
from doctor_tests.identity_auth import get_identity_auth
from doctor_tests.identity_auth import get_session
+from doctor_tests.os_clients import neutron_client
from doctor_tests.os_clients import nova_client
@@ -56,12 +57,16 @@ class AppManager(Thread):
self.app_manager = app_manager
self.log = log
self.intance_ids = None
+ self.auth = get_identity_auth(project=self.conf.doctor_project)
+ self.session = get_session(auth=self.auth)
+ self.nova = nova_client(self.conf.nova_version,
+ self.session)
+ self.neutron = neutron_client(session=self.session)
self.headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
- self.auth = get_identity_auth(project=self.conf.doctor_project)
- self.nova = nova_client(self.conf.nova_version,
- get_session(auth=self.auth))
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
self.orig_number_of_instances = self.number_of_instances()
self.ha_instances = self.get_ha_instances()
self.floating_ip = None
@@ -85,7 +90,13 @@ class AppManager(Thread):
if instance.id != self.active_instance_id:
self.log.info('Switch over to: %s %s' % (instance.name,
instance.id))
- instance.add_floating_ip(self.floating_ip)
+ # Deprecated, need to use neutron instead
+ # instance.add_floating_ip(self.floating_ip)
+ port = self.neutron.list_ports(device_id=instance.id)['ports'][0]['id'] # noqa
+ floating_id = self.neutron.list_floatingips(floating_ip_address=self.floating_ip)['floatingips'][0]['id'] # noqa
+ self.neutron.update_floatingip(floating_id, {'floatingip': {'port_id': port}}) # noqa
+ # Have to update ha_instances as floating_ip changed
+ self.ha_instances = self.get_ha_instances()
self.active_instance_id = instance.id
break
@@ -114,8 +125,7 @@ class AppManager(Thread):
for t in data['reason_data']['event']['traits']})
def get_session_instance_ids(self, url, session_id):
- data = {'session_id': session_id}
- ret = requests.get(url, data=json.dumps(data), headers=self.headers)
+ ret = requests.get(url, data=None, headers=self.headers)
if ret.status_code != 200:
raise Exception(ret.text)
self.log.info('get_instance_ids %s' % ret.json())
@@ -155,7 +165,7 @@ class AppManager(Thread):
data = json.loads(request.data.decode('utf8'))
try:
payload = self._alarm_traits_decoder(data)
- except:
+ except Exception:
payload = ({t[0]: t[2] for t in
data['reason_data']['event']['traits']})
self.log.error('cannot parse alarm data: %s' % payload)
@@ -177,12 +187,12 @@ class AppManager(Thread):
reply['instance_ids'] = instance_ids
reply_state = 'ACK_MAINTENANCE'
- elif state == 'DOWN_SCALE':
+ elif state == 'SCALE_IN':
# scale down 2 isntances that is VCPUS equaling to single
# compute node
self.scale_instances(-2)
reply['instance_ids'] = self.get_instance_ids()
- reply_state = 'ACK_DOWN_SCALE'
+ reply_state = 'ACK_SCALE_IN'
elif state == 'MAINTENANCE_COMPLETE':
# possibly need to upscale
diff --git a/doctor_tests/app_manager/vnfm.py b/doctor_tests/app_manager/vnfm.py
new file mode 100644
index 00000000..68fdbb88
--- /dev/null
+++ b/doctor_tests/app_manager/vnfm.py
@@ -0,0 +1,441 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from flask import Flask
+from flask import request
+import json
+import requests
+from threading import Thread
+import time
+import uuid
+import yaml
+
+from doctor_tests.app_manager.base import BaseAppManager
+from doctor_tests.identity_auth import get_identity_auth
+from doctor_tests.identity_auth import get_session
+from doctor_tests.os_clients import neutron_client
+from doctor_tests.os_clients import nova_client
+from doctor_tests.os_clients import keystone_client
+
+
+class VNFM(BaseAppManager):
+
+ def __init__(self, stack, conf, log):
+ super(VNFM, self).__init__(conf, log)
+ self.stack = stack
+ self.app = None
+
+ def start(self):
+ self.log.info('VNFM start......')
+ self.app = VNFManager(self.stack, self.conf, self, self.log)
+ self.app.start()
+
+ def stop(self):
+ self.log.info('VNFM stop......')
+ if not self.app:
+ return
+ self.app.delete_constraints()
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ }
+ url = 'http://%s:%d/shutdown'\
+ % (self.conf.app_manager.ip,
+ self.conf.app_manager.port)
+ requests.post(url, data='', headers=headers)
+
+
+class VNFManager(Thread):
+
+ def __init__(self, stack, conf, app_manager, log):
+ Thread.__init__(self)
+ self.stack = stack
+ self.conf = conf
+ self.port = self.conf.app_manager.port
+ self.app_manager = app_manager
+ self.log = log
+ self.intance_ids = None
+ self.auth = get_identity_auth(project=self.conf.doctor_project)
+ self.session = get_session(auth=self.auth)
+ self.keystone = keystone_client(
+ self.conf.keystone_version, self.session)
+ self.nova = nova_client(self.conf.nova_version,
+ self.session)
+ self.neutron = neutron_client(session=self.session)
+ self.headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json'}
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ self.orig_number_of_instances = self.number_of_instances()
+ # List of instances
+ self.ha_instances = []
+ self.nonha_instances = []
+ # Different instance_id specific constraints {instanse_id: {},...}
+ self.instance_constraints = None
+ # Update existing instances to instance lists
+ self.update_instances()
+ nonha_instances = len(self.nonha_instances)
+ if nonha_instances < 7:
+ self.scale = 2
+ self.max_impacted = 2
+ else:
+ self.scale = int((nonha_instances) / 2)
+ self.max_impacted = self.scale - 1
+ self.log.info('Init nonha_instances: %s scale: %s: max_impacted %s' %
+ (nonha_instances, self.scale, self.max_impacted))
+ # Different instance groups constraints dict
+ self.ha_group = None
+ self.nonha_group = None
+ # Floating IP used in HA instance
+ self.floating_ip = None
+ # VNF project_id
+ self.project_id = None
+ # HA instance_id that is active / has floating IP
+ self.active_instance_id = self.active_instance_id()
+
+ services = self.keystone.services.list()
+ for service in services:
+ if service.type == 'maintenance':
+ self.log.info('maintenance service: %s:%s type %s'
+ % (service.name, service.id, service.type))
+ maint_id = service.id
+ self.maint_endpoint = [ep.url for ep in self.keystone.endpoints.list()
+ if ep.service_id == maint_id and
+ ep.interface == 'public'][0]
+ self.log.info('maintenance endpoint: %s' % self.maint_endpoint)
+ self.update_constraints_lock = False
+ self.update_constraints()
+
+ def delete_remote_instance_constraints(self, instance_id):
+ url = "%s/instance/%s" % (self.maint_endpoint, instance_id)
+ self.log.info('DELETE: %s' % url)
+ ret = requests.delete(url, data=None, headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def update_remote_instance_constraints(self, instance):
+ url = "%s/instance/%s" % (self.maint_endpoint, instance["instance_id"])
+ self.log.info('PUT: %s' % url)
+ ret = requests.put(url, data=json.dumps(instance),
+ headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def delete_remote_group_constraints(self, instance_group):
+ url = "%s/instance_group/%s" % (self.maint_endpoint,
+ instance_group["group_id"])
+ self.log.info('DELETE: %s' % url)
+ ret = requests.delete(url, data=None, headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def update_remote_group_constraints(self, instance_group):
+ url = "%s/instance_group/%s" % (self.maint_endpoint,
+ instance_group["group_id"])
+ self.log.info('PUT: %s' % url)
+ ret = requests.put(url, data=json.dumps(instance_group),
+ headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def delete_constraints(self):
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ for instance_id in self.instance_constraints:
+ self.delete_remote_instance_constraints(instance_id)
+ self.delete_remote_group_constraints(self.nonha_group)
+ self.delete_remote_group_constraints(self.ha_group)
+
+ def update_constraints(self):
+ while self.update_constraints_lock:
+ self.log.info('Waiting update_constraints_lock...')
+ time.sleep(1)
+ self.update_constraints_lock = True
+ self.log.info('Update constraints')
+ if self.project_id is None:
+ self.project_id = self.keystone.projects.list(
+ name=self.conf.doctor_project)[0].id
+ if self.nonha_group is None:
+ # Nova does not support groupping instances that do not belong to
+ # anti-affinity server_groups. Anyhow all instances need groupping
+ self.nonha_group = {
+ "group_id": str(uuid.uuid4()),
+ "project_id": self.project_id,
+ "group_name": "doctor_nonha_app_group",
+ "anti_affinity_group": False,
+ "max_instances_per_host": 0,
+ "max_impacted_members": self.max_impacted,
+ "recovery_time": 2,
+ "resource_mitigation": True}
+ self.log.info('create doctor_nonha_app_group constraints: %s'
+ % self.nonha_group)
+ self.update_remote_group_constraints(self.nonha_group)
+ if self.ha_group is None:
+ group_id = [sg.id for sg in self.nova.server_groups.list()
+ if sg.name == "doctor_ha_app_group"][0]
+ self.ha_group = {
+ "group_id": group_id,
+ "project_id": self.project_id,
+ "group_name": "doctor_ha_app_group",
+ "anti_affinity_group": True,
+ "max_instances_per_host": 1,
+ "max_impacted_members": 1,
+ "recovery_time": 4,
+ "resource_mitigation": True}
+ self.log.info('create doctor_ha_app_group constraints: %s'
+ % self.ha_group)
+ self.update_remote_group_constraints(self.ha_group)
+ instance_constraints = {}
+ for ha_instance in self.ha_instances:
+ instance = {
+ "instance_id": ha_instance.id,
+ "project_id": self.project_id,
+ "group_id": self.ha_group["group_id"],
+ "instance_name": ha_instance.name,
+ "max_interruption_time": 120,
+ "migration_type": "MIGRATE",
+ "resource_mitigation": True,
+ "lead_time": 40}
+ self.log.info('create ha instance constraints: %s'
+ % instance)
+ instance_constraints[ha_instance.id] = instance
+ for nonha_instance in self.nonha_instances:
+ instance = {
+ "instance_id": nonha_instance.id,
+ "project_id": self.project_id,
+ "group_id": self.nonha_group["group_id"],
+ "instance_name": nonha_instance.name,
+ "max_interruption_time": 120,
+ "migration_type": "MIGRATE",
+ "resource_mitigation": True,
+ "lead_time": 40}
+ self.log.info('create nonha instance constraints: %s'
+ % instance)
+ instance_constraints[nonha_instance.id] = instance
+ if not self.instance_constraints:
+ # Initial instance constraints
+ self.log.info('create initial instances constraints...')
+ for instance in [instance_constraints[i] for i
+ in instance_constraints]:
+ self.update_remote_instance_constraints(instance)
+ self.instance_constraints = instance_constraints.copy()
+ else:
+ self.log.info('check instances constraints changes...')
+ added = [i for i in instance_constraints.keys()
+ if i not in self.instance_constraints]
+ deleted = [i for i in self.instance_constraints.keys()
+ if i not in instance_constraints]
+ modified = [i for i in instance_constraints.keys()
+ if (i not in added and i not in deleted and
+ instance_constraints[i] !=
+ self.instance_constraints[i])]
+ for instance_id in deleted:
+ self.delete_remote_instance_constraints(instance_id)
+ updated = added + modified
+ for instance in [instance_constraints[i] for i in updated]:
+ self.update_remote_instance_constraints(instance)
+ if updated or deleted:
+ # Some instance constraints have changed
+ self.instance_constraints = instance_constraints.copy()
+ self.update_constraints_lock = False
+
+ def active_instance_id(self):
+ # Need rertry as it takes time after heat template done before
+ # Floating IP in place
+ retry = 5
+ while retry > 0:
+ for instance in self.ha_instances:
+ network_interfaces = next(iter(instance.addresses.values()))
+ for network_interface in network_interfaces:
+ _type = network_interface.get('OS-EXT-IPS:type')
+ if _type == "floating":
+ if not self.floating_ip:
+ self.floating_ip = network_interface.get('addr')
+ self.log.debug('active_instance: %s %s' %
+ (instance.name, instance.id))
+ return instance.id
+ time.sleep(2)
+ self.update_instances()
+ retry -= 1
+ raise Exception("No active instance found")
+
+ def switch_over_ha_instance(self):
+ for instance in self.ha_instances:
+ if instance.id != self.active_instance_id:
+ self.log.info('Switch over to: %s %s' % (instance.name,
+ instance.id))
+ # Deprecated, need to use neutron instead
+ # instance.add_floating_ip(self.floating_ip)
+ port = self.neutron.list_ports(device_id=instance.id)['ports'][0]['id'] # noqa
+ floating_id = self.neutron.list_floatingips(floating_ip_address=self.floating_ip)['floatingips'][0]['id'] # noqa
+ self.neutron.update_floatingip(floating_id, {'floatingip': {'port_id': port}}) # noqa
+ # Have to update ha_instances as floating_ip changed
+ self.update_instances()
+ self.active_instance_id = instance.id
+ break
+
+ def get_instance_ids(self):
+ ret = list()
+ for instance in self.nova.servers.list(detailed=False):
+ ret.append(instance.id)
+ return ret
+
+ def update_instances(self):
+ instances = self.nova.servers.list(detailed=True)
+ self.ha_instances = [i for i in instances
+ if "doctor_ha_app_" in i.name]
+ self.nonha_instances = [i for i in instances
+ if "doctor_nonha_app_" in i.name]
+
+ def _alarm_data_decoder(self, data):
+ if "[" in data or "{" in data:
+ # string to list or dict removing unicode
+ data = yaml.load(data.replace("u'", "'"))
+ return data
+
+ def _alarm_traits_decoder(self, data):
+ return ({str(t[0]): self._alarm_data_decoder(str(t[2]))
+ for t in data['reason_data']['event']['traits']})
+
+ def get_session_instance_ids(self, url, session_id):
+ ret = requests.get(url, data=None, headers=self.headers)
+ if ret.status_code != 200:
+ raise Exception(ret.text)
+ self.log.info('get_instance_ids %s' % ret.json())
+ return ret.json()['instance_ids']
+
+ def scale_instances(self, number_of_instances):
+ number_of_instances_before = self.number_of_instances()
+
+ parameters = self.stack.parameters
+ parameters['nonha_intances'] += number_of_instances
+ self.stack.update(self.stack.stack_name,
+ self.stack.stack_id,
+ self.stack.template,
+ parameters=parameters,
+ files=self.stack.files)
+
+ number_of_instances_after = self.number_of_instances()
+ if (number_of_instances_before + number_of_instances !=
+ number_of_instances_after):
+ self.log.error('scale_instances with: %d from: %d ends up to: %d'
+ % (number_of_instances, number_of_instances_before,
+ number_of_instances_after))
+ raise Exception('scale_instances failed')
+
+ self.log.info('scaled instances from %d to %d' %
+ (number_of_instances_before,
+ number_of_instances_after))
+
+ def number_of_instances(self):
+ return len(self.nova.servers.list(detailed=False))
+
+ def run(self):
+ app = Flask('VNFM')
+
+ @app.route('/maintenance', methods=['POST'])
+ def maintenance_alarm():
+ data = json.loads(request.data.decode('utf8'))
+ try:
+ payload = self._alarm_traits_decoder(data)
+ except Exception:
+ payload = ({t[0]: t[2] for t in
+ data['reason_data']['event']['traits']})
+ self.log.error('cannot parse alarm data: %s' % payload)
+ raise Exception('VNFM cannot parse alarm.'
+ 'Possibly trait data over 256 char')
+
+ self.log.info('VNFM received data = %s' % payload)
+
+ state = payload['state']
+ reply_state = None
+ reply = dict()
+
+ self.log.info('VNFM state: %s' % state)
+
+ if state == 'MAINTENANCE':
+ instance_ids = (self.get_session_instance_ids(
+ payload['instance_ids'],
+ payload['session_id']))
+ my_instance_ids = self.get_instance_ids()
+ invalid_instances = (
+ [instance_id for instance_id in instance_ids
+ if instance_id not in my_instance_ids])
+ if invalid_instances:
+ self.log.error('Invalid instances: %s' % invalid_instances)
+ reply_state = 'NACK_MAINTENANCE'
+ else:
+ reply_state = 'ACK_MAINTENANCE'
+
+ elif state == 'SCALE_IN':
+ # scale down "self.scale" instances that is VCPUS equaling
+ # at least a single compute node
+ self.scale_instances(-self.scale)
+ reply_state = 'ACK_SCALE_IN'
+
+ elif state == 'MAINTENANCE_COMPLETE':
+ # possibly need to upscale
+ number_of_instances = self.number_of_instances()
+ if self.orig_number_of_instances > number_of_instances:
+ scale_instances = (self.orig_number_of_instances -
+ number_of_instances)
+ self.scale_instances(scale_instances)
+ reply_state = 'ACK_MAINTENANCE_COMPLETE'
+
+ elif state == 'PREPARE_MAINTENANCE':
+ # TBD from contraints
+ if "MIGRATE" not in payload['allowed_actions']:
+ raise Exception('MIGRATE not supported')
+ instance_ids = payload['instance_ids'][0]
+ self.log.info('VNFM got instance: %s' % instance_ids)
+ if instance_ids == self.active_instance_id:
+ self.switch_over_ha_instance()
+ # optional also in contraints
+ reply['instance_action'] = "MIGRATE"
+ reply_state = 'ACK_PREPARE_MAINTENANCE'
+
+ elif state == 'PLANNED_MAINTENANCE':
+ # TBD from contraints
+ if "MIGRATE" not in payload['allowed_actions']:
+ raise Exception('MIGRATE not supported')
+ instance_ids = payload['instance_ids'][0]
+ self.log.info('VNFM got instance: %s' % instance_ids)
+ if instance_ids == self.active_instance_id:
+ self.switch_over_ha_instance()
+ # optional also in contraints
+ reply['instance_action'] = "MIGRATE"
+ reply_state = 'ACK_PLANNED_MAINTENANCE'
+
+ elif state == 'INSTANCE_ACTION_DONE':
+ # TBD was action done in allowed window
+ self.log.info('%s' % payload['instance_ids'])
+ else:
+ raise Exception('VNFM received event with'
+ ' unknown state %s' % state)
+
+ if reply_state:
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ reply['state'] = reply_state
+ url = payload['reply_url']
+ self.log.info('VNFM reply: %s' % reply)
+ requests.put(url, data=json.dumps(reply), headers=self.headers)
+
+ return 'OK'
+
+ @app.route('/shutdown', methods=['POST'])
+ def shutdown():
+ self.log.info('shutdown VNFM server at %s' % time.time())
+ func = request.environ.get('werkzeug.server.shutdown')
+ if func is None:
+ raise RuntimeError('Not running with the Werkzeug Server')
+ func()
+ return 'VNFM shutting down...'
+
+ app.run(host="0.0.0.0", port=self.port)
diff --git a/doctor_tests/common/constants.py b/doctor_tests/common/constants.py
index 088ff633..201f3fc4 100644
--- a/doctor_tests/common/constants.py
+++ b/doctor_tests/common/constants.py
@@ -12,6 +12,10 @@ from collections import namedtuple
Host = namedtuple('Host', ['name', 'ip'])
+def is_fenix(conf):
+ return conf.admin_tool.type == 'fenix'
+
+
class Inspector(object):
CONGRESS = 'congress'
SAMPLE = 'sample'
diff --git a/doctor_tests/common/utils.py b/doctor_tests/common/utils.py
index 1a8840dd..67ca4f4b 100644
--- a/doctor_tests/common/utils.py
+++ b/doctor_tests/common/utils.py
@@ -10,6 +10,7 @@ import json
import os
import paramiko
import re
+import subprocess
def load_json_file(full_path):
@@ -97,6 +98,27 @@ class SSHClient(object):
ftp.close()
+class LocalSSH(object):
+
+ def __init__(self, log):
+ self.log = log
+ self.log.info('Init local ssh client')
+
+ def ssh(self, cmd):
+ ret = 0
+ output = "%s failed!!!" % cmd
+ try:
+ output = subprocess.check_output((cmd), shell=True,
+ universal_newlines=True)
+ except subprocess.CalledProcessError:
+ ret = 1
+ return ret, output
+
+ def scp(self, src_file, dst_file):
+ return subprocess.check_output("cp %s %s" % (src_file, dst_file),
+ shell=True)
+
+
def run_async(func):
from threading import Thread
from functools import wraps
diff --git a/doctor_tests/image.py b/doctor_tests/image.py
index 9961b22d..50841ef6 100644
--- a/doctor_tests/image.py
+++ b/doctor_tests/image.py
@@ -7,7 +7,11 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
import os
-import urllib.request
+try:
+ from urllib.request import urlopen
+except Exception:
+ from urllib2 import urlopen
+
from oslo_config import cfg
@@ -46,11 +50,14 @@ class Image(object):
def create(self):
self.log.info('image create start......')
-
images = {image.name: image for image in self.glance.images.list()}
+ if self.conf.image_name == 'cirros':
+ cirros = [image for image in images if 'cirros' in image]
+ if cirros:
+ self.conf.image_name = cirros[0]
if self.conf.image_name not in images:
if not os.path.exists(self.conf.image_filename):
- resp = urllib.request.urlopen(self.conf.image_download_url)
+ resp = urlopen(self.conf.image_download_url)
with open(self.conf.image_filename, "wb") as file:
file.write(resp.read())
self.image = \
diff --git a/doctor_tests/inspector/__init__.py b/doctor_tests/inspector/__init__.py
index 31291baf..50365a61 100644
--- a/doctor_tests/inspector/__init__.py
+++ b/doctor_tests/inspector/__init__.py
@@ -42,6 +42,10 @@ _inspector_name_class_mapping = {
}
-def get_inspector(conf, log):
+def get_inspector(conf, log, transport_url=None):
inspector_class = _inspector_name_class_mapping[conf.inspector.type]
- return importutils.import_object(inspector_class, conf, log)
+ if conf.inspector.type == 'sample':
+ return importutils.import_object(inspector_class, conf, log,
+ transport_url)
+ else:
+ return importutils.import_object(inspector_class, conf, log)
diff --git a/doctor_tests/inspector/sample.py b/doctor_tests/inspector/sample.py
index a55a12b7..c44db95d 100644
--- a/doctor_tests/inspector/sample.py
+++ b/doctor_tests/inspector/sample.py
@@ -10,6 +10,7 @@ import collections
from flask import Flask
from flask import request
import json
+import oslo_messaging
import time
from threading import Thread
import requests
@@ -26,7 +27,7 @@ from doctor_tests.inspector.base import BaseInspector
class SampleInspector(BaseInspector):
event_type = 'compute.host.down'
- def __init__(self, conf, log):
+ def __init__(self, conf, log, trasport_url):
super(SampleInspector, self).__init__(conf, log)
self.inspector_url = self.get_inspector_url()
self.novaclients = list()
@@ -43,6 +44,17 @@ class SampleInspector(BaseInspector):
self.hostnames = list()
self.app = None
+ try:
+ transport = oslo_messaging.get_notification_transport(self.conf,
+ trasport_url)
+ self.notif = oslo_messaging.Notifier(transport,
+ 'compute.instance.update',
+ driver='messaging',
+ topics=['notifications'])
+ self.notif = self.notif.prepare(publisher_id='sample')
+ except Exception:
+ self.notif = None
+
def _init_novaclients(self):
self.NUMBER_OF_CLIENTS = self.conf.instance_count
auth = get_identity_auth(project=self.conf.doctor_project)
@@ -54,13 +66,13 @@ class SampleInspector(BaseInspector):
def _init_servers_list(self):
self.servers.clear()
opts = {'all_tenants': True}
- servers = self.nova.servers.list(search_opts=opts)
+ servers = self.nova.servers.list(detailed=True, search_opts=opts)
for server in servers:
try:
host = server.__dict__.get('OS-EXT-SRV-ATTR:host')
self.servers[host].append(server)
self.log.debug('get hostname=%s from server=%s'
- % (host, server))
+ % (host, str(server.name)))
except Exception as e:
self.log.info('can not get hostname from server=%s, error=%s'
% (server, e))
@@ -97,10 +109,14 @@ class SampleInspector(BaseInspector):
event_type = event['type']
if event_type == self.event_type:
self.hostnames.append(hostname)
+ if self.notif is not None:
+ thr0 = self._send_notif(hostname)
thr1 = self._disable_compute_host(hostname)
thr2 = self._vms_reset_state('error', hostname)
if self.conf.inspector.update_neutron_port_dp_status:
thr3 = self._set_ports_data_plane_status('DOWN', hostname)
+ if self.notif is not None:
+ thr0.join()
thr1.join()
thr2.join()
if self.conf.inspector.update_neutron_port_dp_status:
@@ -119,7 +135,7 @@ class SampleInspector(BaseInspector):
def maintenance(self, data):
try:
payload = self._alarm_traits_decoder(data)
- except:
+ except Exception:
payload = ({t[0]: t[2] for t in
data['reason_data']['event']['traits']})
self.log.error('cannot parse alarm data: %s' % payload)
@@ -156,8 +172,8 @@ class SampleInspector(BaseInspector):
nova.servers.reset_state(server, state)
vmdown_time = time.time()
self.vm_down_time = vmdown_time
- self.log.info('doctor mark vm(%s) error at %s'
- % (server, vmdown_time))
+ self.log.info('doctor mark vm(%s) %s at %s'
+ % (server, state, vmdown_time))
thrs = []
for nova, server in zip(self.novaclients, self.servers[hostname]):
@@ -167,6 +183,26 @@ class SampleInspector(BaseInspector):
t.join()
@utils.run_async
+ def _send_notif(self, hostname):
+
+ @utils.run_async
+ def _send_notif(server):
+ payload = dict(tenant_id=server.tenant_id,
+ instance_id=server.id,
+ state="error")
+ self.notif.info({'some': 'context'}, 'compute.instance.update',
+ payload)
+ self.log.info('doctor compute.instance.update vm(%s) error %s'
+ % (server, time.time()))
+
+ thrs = []
+ for server in self.servers[hostname]:
+ t = _send_notif(server)
+ thrs.append(t)
+ for t in thrs:
+ t.join()
+
+ @utils.run_async
def _set_ports_data_plane_status(self, status, hostname):
body = {'data_plane_status': status}
diff --git a/doctor_tests/installer/__init__.py b/doctor_tests/installer/__init__.py
index 2b9ad83d..00a01667 100644
--- a/doctor_tests/installer/__init__.py
+++ b/doctor_tests/installer/__init__.py
@@ -13,8 +13,8 @@ from oslo_utils import importutils
OPTS = [
cfg.StrOpt('type',
- default=os.environ.get('INSTALLER_TYPE', 'local'),
- choices=['local', 'apex', 'daisy', 'fuel'],
+ default=os.environ.get('INSTALLER_TYPE', 'devstack'),
+ choices=['apex', 'daisy', 'fuel', 'devstack'],
help='the type of installer',
required=True),
cfg.StrOpt('ip',
@@ -28,10 +28,10 @@ OPTS = [
_installer_name_class_mapping = {
- 'local': 'doctor_tests.installer.local.LocalInstaller',
'apex': 'doctor_tests.installer.apex.ApexInstaller',
'daisy': 'doctor_tests.installer.daisy.DaisyInstaller',
- 'fuel': 'doctor_tests.installer.mcp.McpInstaller'
+ 'fuel': 'doctor_tests.installer.mcp.McpInstaller',
+ 'devstack': 'doctor_tests.installer.devstack.DevstackInstaller'
}
diff --git a/doctor_tests/installer/apex.py b/doctor_tests/installer/apex.py
index 2aa81ff9..3ec2100c 100644
--- a/doctor_tests/installer/apex.py
+++ b/doctor_tests/installer/apex.py
@@ -6,10 +6,11 @@
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
-import re
import time
from doctor_tests.common.constants import Inspector
+from doctor_tests.common.constants import is_fenix
+from doctor_tests.common.utils import get_doctor_test_root_dir
from doctor_tests.common.utils import SSHClient
from doctor_tests.installer.base import BaseInstaller
@@ -20,6 +21,7 @@ class ApexInstaller(BaseInstaller):
cm_set_script = 'set_config.py'
nc_set_compute_script = 'set_compute_config.py'
cg_set_script = 'set_congress.py'
+ fe_set_script = 'set_fenix.sh'
cm_restore_script = 'restore_config.py'
nc_restore_compute_script = 'restore_compute_config.py'
cg_restore_script = 'restore_congress.py'
@@ -36,13 +38,13 @@ class ApexInstaller(BaseInstaller):
self.key_file = None
self.controllers = list()
self.computes = list()
- self.controller_clients = list()
- self.compute_clients = list()
def setup(self):
self.log.info('Setup Apex installer start......')
self.key_file = self.get_ssh_key_from_installer()
self._get_overcloud_conf()
+ if is_fenix(self.conf):
+ self._copy_overcloudrc_to_controllers()
self.create_flavor()
self.set_apply_patches()
self.setup_stunnel()
@@ -56,6 +58,11 @@ class ApexInstaller(BaseInstaller):
key_path = '/home/stack/.ssh/id_rsa'
return self._get_ssh_key(self.client, key_path)
+ def _copy_overcloudrc_to_controllers(self):
+ for ip in self.controllers:
+ cmd = "scp overcloudrc %s@%s:" % (self.node_user_name, ip)
+ self._run_cmd_remote(self.client, cmd)
+
def _get_overcloud_conf(self):
self.log.info('Get overcloud config details from Apex installer'
'......')
@@ -83,26 +90,6 @@ class ApexInstaller(BaseInstaller):
host_ips = self._run_cmd_remote(self.client, command)
return host_ips[0]
- def get_transport_url(self):
- client = SSHClient(self.controllers[0], self.node_user_name,
- key_filename=self.key_file)
- if self.use_containers:
- ncbase = "/var/lib/config-data/puppet-generated/nova"
- else:
- ncbase = ""
- command = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase
-
- ret, url = client.ssh(command)
- if ret:
- raise Exception('Exec command to get host ip from controller(%s)'
- 'in Apex installer failed, ret=%s, output=%s'
- % (self.controllers[0], ret, url))
- # need to use ip instead of hostname
- ret = (re.sub("@.*:", "@%s:" % self.controllers[0],
- url[0].split("=", 1)[1]))
- self.log.debug('get_transport_url %s' % ret)
- return ret
-
def _set_docker_restart_cmd(self, service):
# There can be multiple instances running so need to restart all
cmd = "for container in `sudo docker ps | grep "
@@ -113,6 +100,7 @@ class ApexInstaller(BaseInstaller):
def set_apply_patches(self):
self.log.info('Set apply patches start......')
+ fenix_files = None
set_scripts = [self.cm_set_script]
@@ -127,6 +115,10 @@ class ApexInstaller(BaseInstaller):
if self.conf.test_case != 'fault_management':
if self.use_containers:
restart_cmd += self._set_docker_restart_cmd("nova-scheduler")
+ if is_fenix(self.conf):
+ set_scripts.append(self.fe_set_script)
+ testdir = get_doctor_test_root_dir()
+ fenix_files = ["Dockerfile", "run"]
else:
restart_cmd += ' openstack-nova-scheduler.service'
set_scripts.append(self.nc_set_compute_script)
@@ -141,29 +133,34 @@ class ApexInstaller(BaseInstaller):
for node_ip in self.controllers:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
- self.controller_clients.append(client)
+ if fenix_files is not None:
+ for fenix_file in fenix_files:
+ src_file = '{0}/{1}/{2}'.format(testdir,
+ 'admin_tool/fenix',
+ fenix_file)
+ client.scp(src_file, fenix_file)
self._run_apply_patches(client,
restart_cmd,
set_scripts,
python=self.python)
+ time.sleep(5)
+
+ self.log.info('Set apply patches start......')
if self.conf.test_case != 'fault_management':
if self.use_containers:
- restart_cmd = self._set_docker_restart_cmd("nova-compute")
+ restart_cmd = self._set_docker_restart_cmd("nova")
else:
restart_cmd = 'sudo systemctl restart' \
' openstack-nova-compute.service'
for node_ip in self.computes:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
- self.compute_clients.append(client)
self._run_apply_patches(client,
restart_cmd,
[self.nc_set_compute_script],
python=self.python)
-
- if self.conf.test_case != 'fault_management':
- time.sleep(10)
+ time.sleep(5)
def restore_apply_patches(self):
self.log.info('restore apply patches start......')
@@ -192,39 +189,22 @@ class ApexInstaller(BaseInstaller):
restart_cmd += ' openstack-congress-server.service'
restore_scripts.append(self.cg_restore_script)
- for client, node_ip in zip(self.controller_clients, self.controllers):
- retry = 0
- while retry < 2:
- try:
- self._run_apply_patches(client,
- restart_cmd,
- restore_scripts,
- python=self.python)
- except Exception:
- if retry > 0:
- raise Exception("SSHClient to %s feiled" % node_ip)
- client = SSHClient(node_ip, self.node_user_name,
- key_filename=self.key_file)
- retry += 1
- break
+ for node_ip in self.controllers:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(client,
+ restart_cmd,
+ restore_scripts,
+ python=self.python)
+
if self.conf.test_case != 'fault_management':
if self.use_containers:
restart_cmd = self._set_docker_restart_cmd("nova-compute")
else:
restart_cmd = 'sudo systemctl restart' \
' openstack-nova-compute.service'
- for client, node_ip in zip(self.compute_clients, self.computes):
- retry = 0
- while retry < 2:
- try:
- self._run_apply_patches(
- client, restart_cmd,
- [self.nc_restore_compute_script],
- python=self.python)
- except Exception:
- if retry > 0:
- raise Exception("SSHClient to %s feiled" % node_ip)
- client = SSHClient(node_ip, self.node_user_name,
- key_filename=self.key_file)
- retry += 1
- break
+ for node_ip in self.computes:
+ self._run_apply_patches(
+ client, restart_cmd,
+ [self.nc_restore_compute_script],
+ python=self.python)
diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py
index 30435931..de4d2f2e 100644
--- a/doctor_tests/installer/base.py
+++ b/doctor_tests/installer/base.py
@@ -14,8 +14,9 @@ import pwd
import six
import stat
import subprocess
+import time
-from doctor_tests.common.utils import get_doctor_test_root_dir
+from doctor_tests.common import utils
from doctor_tests.identity_auth import get_session
from doctor_tests.os_clients import nova_client
@@ -75,7 +76,7 @@ class BaseInstaller(object):
cmd = ("ssh -o UserKnownHostsFile=/dev/null"
" -o StrictHostKeyChecking=no"
" -i %s %s@%s -R %s:localhost:%s"
- " sleep %s > ssh_tunnel.%s"
+ " sleep %s > ssh_tunnel.%s.%s"
" 2>&1 < /dev/null "
% (self.key_file,
self.node_user_name,
@@ -83,9 +84,28 @@ class BaseInstaller(object):
port,
port,
tunnel_uptime,
- node_ip))
+ node_ip,
+ port))
server = subprocess.Popen('exec ' + cmd, shell=True)
self.servers.append(server)
+ if self.conf.admin_tool.type == 'fenix':
+ port = self.conf.admin_tool.port
+ self.log.info('tunnel for port %s' % port)
+ cmd = ("ssh -o UserKnownHostsFile=/dev/null"
+ " -o StrictHostKeyChecking=no"
+ " -i %s %s@%s -L %s:localhost:%s"
+ " sleep %s > ssh_tunnel.%s.%s"
+ " 2>&1 < /dev/null "
+ % (self.key_file,
+ self.node_user_name,
+ node_ip,
+ port,
+ port,
+ tunnel_uptime,
+ node_ip,
+ port))
+ server = subprocess.Popen('exec ' + cmd, shell=True)
+ self.servers.append(server)
def _get_ssh_key(self, client, key_path):
self.log.info('Get SSH keys from %s installer......'
@@ -96,7 +116,8 @@ class BaseInstaller(object):
% self.conf.installer.type)
return self.key_file
- ssh_key = '{0}/{1}'.format(get_doctor_test_root_dir(), 'instack_key')
+ ssh_key = '{0}/{1}'.format(utils.get_doctor_test_root_dir(),
+ 'instack_key')
client.scp(key_path, ssh_key, method='get')
user = getpass.getuser()
uid = pwd.getpwnam(user).pw_uid
@@ -105,6 +126,10 @@ class BaseInstaller(object):
os.chmod(ssh_key, stat.S_IREAD)
return ssh_key
+ @abc.abstractmethod
+ def get_transport_url(self):
+ pass
+
def _run_cmd_remote(self, client, command):
self.log.info('Run command=%s in %s installer......'
% (command, self.conf.installer.type))
@@ -131,19 +156,36 @@ class BaseInstaller(object):
ret = False
return ret
+ @utils.run_async
def _run_apply_patches(self, client, restart_cmd, script_names,
python='python3'):
installer_dir = os.path.dirname(os.path.realpath(__file__))
-
if isinstance(script_names, list):
for script_name in script_names:
script_abs_path = '{0}/{1}/{2}'.format(installer_dir,
'common', script_name)
- client.scp(script_abs_path, script_name)
- cmd = 'sudo %s %s' % (python, script_name)
- ret, output = client.ssh(cmd)
+ if self.conf.installer.type == "devstack":
+ script_name = "/opt/stack/%s" % script_name
+ try:
+ client.scp(script_abs_path, script_name)
+ except Exception:
+ client.scp(script_abs_path, script_name)
+ try:
+ if ".py" in script_name:
+ cmd = 'sudo %s %s' % (python, script_name)
+ else:
+ cmd = 'sudo chmod 700 %s;sudo ./%s' % (script_name,
+ script_name)
+ ret, output = client.ssh(cmd)
+ self.log.info('Command %s output %s' % (cmd, output))
+ except Exception:
+ ret, output = client.ssh(cmd)
+ self.log.info('Command %s output %s' % (cmd, output))
if ret:
raise Exception('Do the command in remote'
' node failed, ret=%s, cmd=%s, output=%s'
% (ret, cmd, output))
+ if 'nova' in restart_cmd or 'devstack@n-' in restart_cmd:
+ # Make sure scheduler has proper cpu_allocation_ratio
+ time.sleep(5)
client.ssh(restart_cmd)
diff --git a/doctor_tests/installer/common/restore_compute_config.py b/doctor_tests/installer/common/restore_compute_config.py
index 0e9939fd..82e10a66 100644
--- a/doctor_tests/installer/common/restore_compute_config.py
+++ b/doctor_tests/installer/common/restore_compute_config.py
@@ -11,18 +11,16 @@ import shutil
def restore_cpu_allocation_ratio():
- nova_base = "/var/lib/config-data/puppet-generated/nova"
- if not os.path.isdir(nova_base):
- nova_base = ""
- nova_file = nova_base + '/etc/nova/nova.conf'
- nova_file_bak = nova_base + '/etc/nova/nova.bak'
-
- if not os.path.isfile(nova_file_bak):
- print('Bak_file:%s does not exist.' % nova_file_bak)
- else:
- print('restore: %s' % nova_file)
- shutil.copyfile(nova_file_bak, nova_file)
- os.remove(nova_file_bak)
+ for nova_file_bak in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.bak", # noqa
+ "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.bak", # noqa
+ "/etc/nova/nova.bak"]:
+ if os.path.isfile(nova_file_bak):
+ nova_file = nova_file_bak.replace(".bak", ".conf")
+ print('restoring nova.bak.')
+ shutil.copyfile(nova_file_bak, nova_file)
+ os.remove(nova_file_bak)
+ return
+ print('nova.bak does not exist.')
return
restore_cpu_allocation_ratio()
diff --git a/doctor_tests/installer/common/set_compute_config.py b/doctor_tests/installer/common/set_compute_config.py
index 86266085..615f1895 100644
--- a/doctor_tests/installer/common/set_compute_config.py
+++ b/doctor_tests/installer/common/set_compute_config.py
@@ -10,37 +10,25 @@ import os
import shutil
-def make_initial_config(service, dest):
- for mk in ["", "/etc", "/%s" % service]:
- dest += mk
- os.mkdir(dest)
- src = "/etc/%s/%s.conf" % (service, service)
- dest += "/%s.conf" % service
- shutil.copyfile(src, dest)
-
-
def set_cpu_allocation_ratio():
- docker_conf_base_dir = "/var/lib/config-data/puppet-generated"
- if not os.path.isdir(docker_conf_base_dir):
- nova_base = ""
- else:
- nova_base = "%s/nova" % docker_conf_base_dir
- if not os.path.isdir(nova_base):
- # nova.conf to be used might not exist
- make_initial_config("nova", nova_base)
- nova_file = nova_base + '/etc/nova/nova.conf'
- nova_file_bak = nova_base + '/etc/nova/nova.bak'
+ nova_file_bak = None
+ for nova_file in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf", # noqa
+ "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf", # noqa
+ "/etc/nova/nova.conf"]:
+ if os.path.isfile(nova_file):
+ nova_file_bak = nova_file.replace(".conf", ".bak")
+ break
- if not os.path.isfile(nova_file):
- raise Exception("File doesn't exist: %s." % nova_file)
+ if nova_file_bak is None:
+ raise Exception("Could not find nova.conf")
# TODO (tojuvone): Unfortunately ConfigParser did not produce working conf
fcheck = open(nova_file)
found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio"
in ca])
fcheck.close()
+ change = False
+ found = False
if found_list and len(found_list):
- change = False
- found = False
for car in found_list:
if car.startswith('#'):
continue
diff --git a/doctor_tests/installer/common/set_config.py b/doctor_tests/installer/common/set_config.py
index 3dc6cd9a..e66d4c2c 100644
--- a/doctor_tests/installer/common/set_config.py
+++ b/doctor_tests/installer/common/set_config.py
@@ -125,6 +125,7 @@ def set_event_definitions():
'reply_url': {'fields': 'payload.reply_url'},
'actions_at': {'fields': 'payload.actions_at',
'type': 'datetime'},
+ 'reply_at': {'fields': 'payload.reply_at', 'type': 'datetime'},
'state': {'fields': 'payload.state'},
'session_id': {'fields': 'payload.session_id'},
'project_id': {'fields': 'payload.project_id'},
diff --git a/doctor_tests/installer/common/set_fenix.sh b/doctor_tests/installer/common/set_fenix.sh
new file mode 100644
index 00000000..bd1eae47
--- /dev/null
+++ b/doctor_tests/installer/common/set_fenix.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+##############################################################################
+# Copyright (c) 2019 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+# Config files
+docker -v >/dev/null || {
+echo "Fenix needs docker to be installed..."
+ver=`grep "UBUNTU_CODENAME" /etc/os-release | cut -d '=' -f 2`
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $ver stable"
+apt install apt-transport-https ca-certificates curl software-properties-common
+apt update
+apt-cache policy docker-ce
+apt-get install -y docker-ce docker-ce-cli containerd.io
+dpkg -r --force-depends golang-docker-credential-helpers
+}
+
+docker ps | grep fenix -q && {
+REMOTE=`git ls-remote https://opendev.org/x/fenix HEAD | awk '{ print $1}'`
+LOCAL=`docker exec -t fenix git rev-parse @`
+if [[ "$LOCAL" =~ "$REMOTE" ]]; then
+ # Difference in above string ending marks, so cannot compare equal
+ echo "Fenix start: Already running latest $LOCAL equals $REMOTE"
+ exit 0
+else
+ echo "Fenix container needs to be recreated $LOCAL not $REMOTE"
+ # Remove previous container
+ for img in `docker image list | grep "^fenix" | awk '{print $1}'`; do
+ for dock in `docker ps --all -f "ancestor=$img" | grep "$img" | awk '{print $1}'`; do
+ docker stop $dock; docker rm $dock;
+ done;
+ docker image rm $img;
+ done
+fi
+} || echo "Fenix container needs to be created..."
+
+cp /root/keystonercv3 .
+
+transport=`grep -m1 "^transport" /etc/nova/nova.conf`
+. keystonercv3
+
+echo "[DEFAULT]" > fenix.conf
+echo "port = 12347" >> fenix.conf
+echo $transport >> fenix.conf
+
+echo "[database]" >> fenix.conf
+MYSQLIP=`grep -m1 "^connection" /etc/nova/nova.conf | sed -e "s/.*@//;s/\/.*//"`
+echo "connection = mysql+pymysql://fenix:fenix@$MYSQLIP/fenix" >> fenix.conf
+
+echo "[service_user]" >> fenix.conf
+echo "os_auth_url = $OS_AUTH_URL" >> fenix.conf
+echo "os_username = $OS_USERNAME" >> fenix.conf
+echo "os_password = $OS_PASSWORD" >> fenix.conf
+echo "os_user_domain_name = $OS_USER_DOMAIN_NAME" >> fenix.conf
+echo "os_project_name = $OS_PROJECT_NAME" >> fenix.conf
+echo "os_project_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix.conf
+
+echo "[DEFAULT]" > fenix-api.conf
+echo "port = 12347" >> fenix-api.conf
+echo $transport >> fenix-api.conf
+
+echo "[keystone_authtoken]" >> fenix-api.conf
+echo "auth_url = $OS_AUTH_URL" >> fenix-api.conf
+echo "auth_type = password" >> fenix-api.conf
+echo "project_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix-api.conf
+echo "project_name = $OS_PROJECT_NAME" >> fenix-api.conf
+echo "user_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix-api.conf
+echo "password = $OS_PASSWORD" >> fenix-api.conf
+echo "username = $OS_USERNAME" >> fenix-api.conf
+echo "cafile = /opt/stack/data/ca-bundle.pem" >> fenix-api.conf
+
+openstack service list | grep -q maintenance || {
+openstack service create --name fenix --enable maintenance
+openstack endpoint create --region $OS_REGION_NAME --enable fenix public http://localhost:12347/v1
+}
+
+# Mysql pw
+# MYSQLPW=`cat /var/lib/config-data/mysql/etc/puppet/hieradata/service_configs.json | grep mysql | grep root_password | awk -F": " '{print $2}' | awk -F"\"" '{print $2}'`
+MYSQLPW=root
+
+# Fenix DB
+[ `mysql -uroot -p$MYSQLPW -e "SELECT host, user FROM mysql.user;" | grep fenix | wc -l` -eq 0 ] && {
+ mysql -uroot -p$MYSQLPW -hlocalhost -e "CREATE USER 'fenix'@'localhost' IDENTIFIED BY 'fenix';"
+ mysql -uroot -p$MYSQLPW -hlocalhost -e "GRANT ALL PRIVILEGES ON fenix.* TO 'fenix'@'' identified by 'fenix';FLUSH PRIVILEGES;"
+}
+mysql -ufenix -pfenix -hlocalhost -e "DROP DATABASE IF EXISTS fenix;"
+mysql -ufenix -pfenix -hlocalhost -e "CREATE DATABASE fenix CHARACTER SET utf8;"
+
+# Build Fenix container and run it
+chmod 700 run
+docker build --build-arg OPENSTACK=master --build-arg BRANCH=master --network host $PWD -t fenix | tail -1
+docker run --network host -d --name fenix -p 12347:12347 -ti fenix
+if [ $? -eq 0 ]; then
+ echo "Fenix start: OK"
+else
+ echo "Fenix start: FAILED"
+fi
+# To debug check log from fenix container
+# docker exec -ti fenix tail -f /var/log/fenix-engine.log
diff --git a/doctor_tests/installer/devstack.py b/doctor_tests/installer/devstack.py
new file mode 100644
index 00000000..02f3601a
--- /dev/null
+++ b/doctor_tests/installer/devstack.py
@@ -0,0 +1,151 @@
+##############################################################################
+# Copyright (c) 2019 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import socket
+import time
+
+from doctor_tests.common.utils import SSHClient
+from doctor_tests.common.utils import LocalSSH
+from doctor_tests.identity_auth import get_session
+from doctor_tests.installer.base import BaseInstaller
+from doctor_tests.os_clients import nova_client
+
+
+class DevstackInstaller(BaseInstaller):
+ node_user_name = None
+ cm_set_script = 'set_config.py'
+ nc_set_compute_script = 'set_compute_config.py'
+ cm_restore_script = 'restore_config.py'
+ nc_restore_compute_script = 'restore_compute_config.py'
+ ac_restart_script = 'restart_aodh.py'
+ ac_restore_script = 'restore_aodh.py'
+ python = 'python'
+
+ def __init__(self, conf, log):
+ super(DevstackInstaller, self).__init__(conf, log)
+ # Run Doctor under users home. sudo hides other env param to be used
+ home, self.node_user_name = (iter(os.environ.get('VIRTUAL_ENV')
+ .split('/', 3)[1:3]))
+ # Migration needs to work so ssh should have proper key defined
+ self.key_file = '/%s/%s/.ssh/id_rsa' % (home, self.node_user_name)
+ self.log.info('ssh uses: %s and %s' % (self.node_user_name,
+ self.key_file))
+ self.controllers = ([ip for ip in
+ socket.gethostbyname_ex(socket.gethostname())[2]
+ if not ip.startswith('127.')] or
+ [[(s.connect(('8.8.8.8', 53)),
+ s.getsockname()[0], s.close())
+ for s in [socket.socket(socket.AF_INET,
+ socket.SOCK_DGRAM)]][0][1]])
+ conf.admin_tool.ip = self.controllers[0]
+ self.computes = list()
+ self.nova = nova_client(conf.nova_version, get_session())
+
+ def setup(self):
+ self.log.info('Setup Devstack installer start......')
+ self._get_devstack_conf()
+ self.create_flavor()
+ self.set_apply_patches()
+
+ def cleanup(self):
+ self.restore_apply_patches()
+
+ def get_ssh_key_from_installer(self):
+ return self.key_file
+
+ def get_transport_url(self):
+ client = LocalSSH(self.log)
+ cmd = 'sudo grep -m1 "^transport_url" /etc/nova/nova.conf'
+ ret, url = client.ssh(cmd)
+ url = url.split("= ", 1)[1][:-1]
+ self.log.info('get_transport_url %s' % url)
+ return url
+
+ def get_host_ip_from_hostname(self, hostname):
+ return [hvisor.__getattr__('host_ip') for hvisor in self.hvisors
+ if hvisor.__getattr__('hypervisor_hostname') == hostname][0]
+
+ def _get_devstack_conf(self):
+ self.log.info('Get devstack config details for Devstack installer'
+ '......')
+ self.hvisors = self.nova.hypervisors.list(detailed=True)
+ self.log.info('checking hypervisors.......')
+ self.computes = [hvisor.__getattr__('host_ip') for hvisor in
+ self.hvisors]
+ self.use_containers = False
+ self.log.info('controller_ips:%s' % self.controllers)
+ self.log.info('compute_ips:%s' % self.computes)
+ self.log.info('use_containers:%s' % self.use_containers)
+
+ def _set_docker_restart_cmd(self, service):
+ # There can be multiple instances running so need to restart all
+ cmd = "for container in `sudo docker ps | grep "
+ cmd += service
+ cmd += " | awk '{print $1}'`; do sudo docker restart $container; \
+ done;"
+ return cmd
+
+ def set_apply_patches(self):
+ self.log.info('Set apply patches start......')
+
+ set_scripts = [self.cm_set_script]
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@ceilometer-anotification.service'
+
+ client = LocalSSH(self.log)
+ self._run_apply_patches(client,
+ restart_cmd,
+ set_scripts,
+ python=self.python)
+ time.sleep(7)
+
+ self.log.info('Set apply patches start......')
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@n-cpu.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(client,
+ restart_cmd,
+ [self.nc_set_compute_script],
+ python=self.python)
+ time.sleep(7)
+
+ def restore_apply_patches(self):
+ self.log.info('restore apply patches start......')
+
+ restore_scripts = [self.cm_restore_script]
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@ceilometer-anotification.service'
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd += ' devstack@n-sch.service'
+ restore_scripts.append(self.nc_restore_compute_script)
+
+ client = LocalSSH(self.log)
+ self._run_apply_patches(client,
+ restart_cmd,
+ restore_scripts,
+ python=self.python)
+
+ if self.conf.test_case != 'fault_management':
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@n-cpu.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(
+ client, restart_cmd,
+ [self.nc_restore_compute_script],
+ python=self.python)
diff --git a/doctor_tests/installer/local.py b/doctor_tests/installer/local.py
deleted file mode 100644
index fee14f33..00000000
--- a/doctor_tests/installer/local.py
+++ /dev/null
@@ -1,118 +0,0 @@
-##############################################################################
-# Copyright (c) 2017 ZTE Corporation and others.
-#
-# All rights reserved. This program and the accompanying materials
-# are made available under the terms of the Apache License, Version 2.0
-# which accompanies this distribution, and is available at
-# http://www.apache.org/licenses/LICENSE-2.0
-##############################################################################
-import os
-import shutil
-import subprocess
-
-from doctor_tests.installer.base import BaseInstaller
-from doctor_tests.installer.common.vitrage import \
- set_vitrage_host_down_template
-from doctor_tests.common.constants import Inspector
-from doctor_tests.common.utils import load_json_file
-from doctor_tests.common.utils import write_json_file
-
-
-class LocalInstaller(BaseInstaller):
- node_user_name = 'root'
-
- nova_policy_file = '/etc/nova/policy.json'
- nova_policy_file_backup = '%s%s' % (nova_policy_file, '.bak')
-
- def __init__(self, conf, log):
- super(LocalInstaller, self).__init__(conf, log)
- self.policy_modified = False
- self.add_policy_file = False
-
- def setup(self):
- self.get_ssh_key_from_installer()
- self.set_apply_patches()
-
- def cleanup(self):
- self.restore_apply_patches()
-
- def get_ssh_key_from_installer(self):
- self.log.info('Assuming SSH keys already exchanged with computer'
- 'for local installer type')
- return None
-
- def get_host_ip_from_hostname(self, hostname):
- self.log.info('Get host ip from host name in local installer......')
-
- cmd = "getent hosts %s | awk '{ print $1 }'" % (hostname)
- server = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
- stdout, stderr = server.communicate()
- host_ip = stdout.strip().decode("utf-8")
-
- self.log.info('Get host_ip:%s from host_name:%s in local installer'
- % (host_ip, hostname))
- return host_ip
-
- def set_apply_patches(self):
- self._set_nova_policy()
- if self.conf.inspector.type == Inspector.VITRAGE:
- set_vitrage_host_down_template()
- os.system('sudo systemctl restart devstack@vitrage-graph.service')
-
- def restore_apply_patches(self):
- self._restore_nova_policy()
-
- def _set_nova_policy(self):
- host_status_policy = 'os_compute_api:servers:show:host_status'
- host_status_rule = 'rule:admin_or_owner'
- policy_data = {
- 'context_is_admin': 'role:admin',
- 'owner': 'user_id:%(user_id)s',
- 'admin_or_owner': 'rule:context_is_admin or rule:owner',
- host_status_policy: host_status_rule
- }
-
- if os.path.isfile(self.nova_policy_file):
- data = load_json_file(self.nova_policy_file)
- if host_status_policy in data:
- rule_origion = data[host_status_policy]
- if host_status_rule == rule_origion:
- self.log.info('Do not need to modify nova policy.')
- self.policy_modified = False
- else:
- # update the host_status_policy
- data[host_status_policy] = host_status_rule
- self.policy_modified = True
- else:
- # add the host_status_policy, if the admin_or_owner is not
- # defined, add it also
- for policy, rule in policy_data.items():
- if policy not in data:
- data[policy] = rule
- self.policy_modified = True
- if self.policy_modified:
- self.log.info('Nova policy is Modified.')
- shutil.copyfile(self.nova_policy_file,
- self.nova_policy_file_backup)
- else:
- # file does not exit, create a new one and add the policy
- self.log.info('Nova policy file not exist. Creating a new one')
- data = policy_data
- self.add_policy_file = True
-
- if self.policy_modified or self.add_policy_file:
- write_json_file(self.nova_policy_file, data)
- os.system('sudo systemctl restart devstack@n-api.service')
-
- def _restore_nova_policy(self):
- if self.policy_modified:
- shutil.copyfile(self.nova_policy_file_backup,
- self.nova_policy_file)
- os.remove(self.nova_policy_file_backup)
- elif self.add_policy_file:
- os.remove(self.nova_policy_file)
-
- if self.add_policy_file or self.policy_modified:
- os.system('sudo systemctl restart devstack@n-api.service')
- self.add_policy_file = False
- self.policy_modified = False
diff --git a/doctor_tests/installer/mcp.py b/doctor_tests/installer/mcp.py
index 9cfff92d..7659c9e2 100644
--- a/doctor_tests/installer/mcp.py
+++ b/doctor_tests/installer/mcp.py
@@ -1,5 +1,5 @@
##############################################################################
-# Copyright (c) 2018 ZTE Corporation and others.
+# Copyright (c) 2019 ZTE Corporation and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
@@ -7,15 +7,26 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
from os.path import isfile
+import re
+import time
+from doctor_tests.common.constants import is_fenix
+from doctor_tests.common.utils import get_doctor_test_root_dir
from doctor_tests.common.utils import SSHClient
from doctor_tests.installer.base import BaseInstaller
class McpInstaller(BaseInstaller):
node_user_name = 'ubuntu'
- cm_set_script = 'set_ceilometer.py'
- cm_restore_script = 'restore_ceilometer.py'
+
+ cm_set_script = 'set_config.py'
+ nc_set_compute_script = 'set_compute_config.py'
+ fe_set_script = 'set_fenix.sh'
+ cm_restore_script = 'restore_config.py'
+ nc_restore_compute_script = 'restore_compute_config.py'
+ ac_restart_script = 'restart_aodh.py'
+ ac_restore_script = 'restore_aodh.py'
+ python = 'python3'
def __init__(self, conf, log):
super(McpInstaller, self).__init__(conf, log)
@@ -26,40 +37,87 @@ class McpInstaller(BaseInstaller):
look_for_keys=True)
self.controllers = list()
self.controller_clients = list()
+ self.computes = list()
def setup(self):
self.log.info('Setup MCP installer start......')
-
- self.controllers = self.get_controller_ips()
+ self.get_node_ips()
self.create_flavor()
- self.set_apply_patches()
+ if is_fenix(self.conf):
+ self.set_apply_patches()
self.setup_stunnel()
def cleanup(self):
- self.restore_apply_patches()
+ if is_fenix(self.conf):
+ self.restore_apply_patches()
for server in self.servers:
server.terminate()
def get_ssh_key_from_installer(self):
self.log.info('Get SSH keys from MCP......')
- # Assuming mcp.rsa is already mapped to functest container
- # if not, only the test runs on jumphost can get the ssh_key
- # default in path /var/lib/opnfv/mcp.rsa
+ # Default in path /var/lib/opnfv/mcp.rsa
ssh_key = '/root/.ssh/id_rsa'
mcp_key = '/var/lib/opnfv/mcp.rsa'
- return ssh_key if isfile(ssh_key) else mcp_key
-
- def get_controller_ips(self):
- self.log.info('Get controller ips from Mcp installer......')
-
- command = "sudo salt --out yaml 'ctl*' " \
- "pillar.get _param:openstack_control_address |" \
- "awk '{print $2}'"
- controllers = self._run_cmd_remote(self.client, command)
- self.log.info('Get controller_ips:%s from Mcp installer'
- % controllers)
- return controllers
+ return mcp_key if isfile(mcp_key) else ssh_key
+
+ def get_transport_url(self):
+ client = SSHClient(self.controllers[0], self.node_user_name,
+ key_filename=self.key_file)
+ try:
+ cmd = 'sudo grep -m1 "^transport_url" /etc/nova/nova.conf'
+ ret, url = client.ssh(cmd)
+
+ if ret:
+ raise Exception('Exec command to get transport from '
+ 'controller(%s) in MCP installer failed, '
+ 'ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ elif self.controllers[0] not in url:
+ # need to use ip instead of hostname
+ url = (re.sub("@.*:", "@%s:" % self.controllers[0],
+ url[0].split("=", 1)[1]))
+ except Exception:
+ cmd = 'grep -i "^rabbit" /etc/nova/nova.conf'
+ ret, lines = client.ssh(cmd)
+ if ret:
+ raise Exception('Exec command to get transport from '
+ 'controller(%s) in MCP installer failed, '
+ 'ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ else:
+ for line in lines.split('\n'):
+ if line.startswith("rabbit_userid"):
+ rabbit_userid = line.split("=")
+ if line.startswith("rabbit_port"):
+ rabbit_port = line.split("=")
+ if line.startswith("rabbit_password"):
+ rabbit_password = line.split("=")
+ url = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid,
+ rabbit_password,
+ self.controllers[0],
+ rabbit_port)
+ self.log.info('get_transport_url %s' % url)
+ return url
+
+ def _copy_overcloudrc_to_controllers(self):
+ for ip in self.controllers:
+ cmd = "scp overcloudrc %s@%s:" % (self.node_user_name, ip)
+ self._run_cmd_remote(self.client, cmd)
+
+ def get_node_ips(self):
+ self.log.info('Get node ips from Mcp installer......')
+
+ command = 'sudo salt "*" --out yaml pillar.get _param:single_address'
+ node_details = self._run_cmd_remote(self.client, command)
+
+ self.controllers = [line.split()[1] for line in node_details
+ if line.startswith("ctl")]
+ self.computes = [line.split()[1] for line in node_details
+ if line.startswith("cmp")]
+
+ self.log.info('controller_ips:%s' % self.controllers)
+ self.log.info('compute_ips:%s' % self.computes)
def get_host_ip_from_hostname(self, hostname):
command = "sudo salt --out yaml '%s*' " \
@@ -70,21 +128,80 @@ class McpInstaller(BaseInstaller):
def set_apply_patches(self):
self.log.info('Set apply patches start......')
+ fenix_files = None
+ set_scripts = [self.cm_set_script]
+ thrs = []
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' ceilometer-agent-notification.service'
+
+ if self.conf.test_case != 'fault_management':
+ if is_fenix(self.conf):
+ set_scripts.append(self.fe_set_script)
+ testdir = get_doctor_test_root_dir()
+ fenix_files = ["Dockerfile", "run"]
+ restart_cmd += ' nova-scheduler.service'
+ set_scripts.append(self.nc_set_compute_script)
- restart_cm_cmd = 'sudo service ceilometer-agent-notification restart'
for node_ip in self.controllers:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
- self.controller_clients.append(client)
- self._run_apply_patches(client,
- restart_cm_cmd,
- [self.cm_set_script])
+ if fenix_files is not None:
+ for fenix_file in fenix_files:
+ src_file = '{0}/{1}/{2}'.format(testdir,
+ 'admin_tool/fenix',
+ fenix_file)
+ client.scp(src_file, fenix_file)
+ thrs.append(self._run_apply_patches(client,
+ restart_cmd,
+ set_scripts,
+ python=self.python))
+ time.sleep(5)
+
+ self.log.info('Set apply patches start......')
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd = 'sudo systemctl restart nova-compute.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ thrs.append(self._run_apply_patches(
+ client,
+ restart_cmd,
+ [self.nc_set_compute_script],
+ python=self.python))
+ time.sleep(5)
+ # If Fenix container ir build, it needs to be ready before continue
+ for thr in thrs:
+ thr.join()
def restore_apply_patches(self):
self.log.info('restore apply patches start......')
- restart_cm_cmd = 'sudo service ceilometer-agent-notification restart'
- for client in self.controller_clients:
+ restore_scripts = [self.cm_restore_script]
+
+ restore_scripts.append(self.ac_restore_script)
+ restart_cmd = 'sudo systemctl restart' \
+ ' ceilometer-agent-notification.service'
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd += ' nova-scheduler.service'
+ restore_scripts.append(self.nc_restore_compute_script)
+
+ for node_ip in self.controllers:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
self._run_apply_patches(client,
- restart_cm_cmd,
- [self.cm_restore_script])
+ restart_cmd,
+ restore_scripts,
+ python=self.python)
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd = 'sudo systemctl restart nova-compute.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(
+ client, restart_cmd,
+ [self.nc_restore_compute_script],
+ python=self.python)
diff --git a/doctor_tests/main.py b/doctor_tests/main.py
index 438d8324..7573faec 100644
--- a/doctor_tests/main.py
+++ b/doctor_tests/main.py
@@ -1,5 +1,5 @@
##############################################################################
-# Copyright (c) 2017 ZTE Corporation and others.
+# Copyright (c) 2019 ZTE Corporation and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
@@ -43,7 +43,6 @@ class DoctorTest(object):
def setup(self):
# prepare the cloud env
self.installer.setup()
-
# preparing VM image...
self.image.create()
@@ -51,37 +50,50 @@ class DoctorTest(object):
self.user.create()
def test_fault_management(self):
- try:
- LOG.info('doctor fault management test starting.......')
-
- self.fault_management = \
- FaultManagement(self.conf, self.installer, self.user, LOG)
-
- # prepare test env
- self.fault_management.setup()
-
- # wait for aodh alarms are updated in caches for event evaluator,
- # sleep time should be larger than event_alarm_cache_ttl
- # (default 60)
- # (tojuvone) Fraser currently needs 120
- time.sleep(120)
-
- # injecting host failure...
- # NOTE (umar) add INTERFACE_NAME logic to host injection
- self.fault_management.start()
- time.sleep(30)
-
- # verify the test results
- # NOTE (umar) copy remote monitor.log file when monitor=collectd
- self.fault_management.check_host_status('down')
- self.fault_management.check_notification_time()
-
- except Exception as e:
- LOG.error('doctor fault management test failed, '
- 'Exception=%s' % e)
- sys.exit(1)
- finally:
- self.fault_management.cleanup()
+ retry = 2
+ # Retry once if notified_time is None
+ while retry > 0:
+ try:
+ self.fault_management = None
+ LOG.info('doctor fault management test starting.......')
+ transport_url = self.installer.get_transport_url()
+ self.fault_management = \
+ FaultManagement(self.conf, self.installer, self.user, LOG,
+ transport_url)
+
+ # prepare test env
+ self.fault_management.setup()
+
+ # wait for aodh alarms are updated in caches for event
+ # evaluator,sleep time should be larger than
+ # event_alarm_cache_ttl (default 60)
+ # (tojuvone) Fraser currently needs 120
+ time.sleep(120)
+
+ # injecting host failure...
+ # NOTE (umar) add INTERFACE_NAME logic to host injection
+ self.fault_management.start()
+ time.sleep(30)
+
+ # verify the test results
+ # NOTE (umar) copy remote monitor.log file when
+ # monitor=collectd
+ self.fault_management.check_host_status('down')
+ self.fault_management.check_notification_time()
+ retry = 0
+
+ except Exception as e:
+ LOG.error('doctor fault management test failed, '
+ 'Exception=%s' % e)
+ if 'notified_time=None' in str(e):
+ retry -= 1
+ LOG.info('doctor fault management retry')
+ continue
+ LOG.error(format_exc())
+ sys.exit(1)
+ finally:
+ if self.fault_management is not None:
+ self.fault_management.cleanup()
def _amount_compute_nodes(self):
services = self.nova.services.list(binary='nova-compute')
@@ -94,11 +106,12 @@ class DoctorTest(object):
LOG.info('not enough compute nodes, skipping doctor '
'maintenance test')
return
- elif self.conf.installer.type != 'apex':
+ elif self.conf.installer.type not in ['apex', 'fuel', 'devstack']:
LOG.info('not supported installer, skipping doctor '
'maintenance test')
return
try:
+ maintenance = None
LOG.info('doctor maintenance test starting.......')
trasport_url = self.installer.get_transport_url()
maintenance = Maintenance(trasport_url, self.conf, LOG)
@@ -120,7 +133,8 @@ class DoctorTest(object):
LOG.error(format_exc())
sys.exit(1)
finally:
- maintenance.cleanup_maintenance()
+ if maintenance is not None:
+ maintenance.cleanup_maintenance()
def run(self):
"""run doctor tests"""
@@ -143,6 +157,7 @@ class DoctorTest(object):
% function)
except Exception as e:
LOG.error('doctor test failed, Exception=%s' % e)
+ LOG.error(format_exc())
sys.exit(1)
finally:
self.cleanup()
diff --git a/doctor_tests/scenario/fault_management.py b/doctor_tests/scenario/fault_management.py
index 869311bd..0271dffe 100644
--- a/doctor_tests/scenario/fault_management.py
+++ b/doctor_tests/scenario/fault_management.py
@@ -40,7 +40,7 @@ sleep 1
class FaultManagement(object):
- def __init__(self, conf, installer, user, log):
+ def __init__(self, conf, installer, user, log, transport_url):
self.conf = conf
self.log = log
self.user = user
@@ -55,7 +55,7 @@ class FaultManagement(object):
self.network = Network(self.conf, log)
self.instance = Instance(self.conf, log)
self.alarm = Alarm(self.conf, log)
- self.inspector = get_inspector(self.conf, log)
+ self.inspector = get_inspector(self.conf, log, transport_url)
self.monitor = get_monitor(self.conf,
self.inspector.get_inspector_url(),
log)
@@ -111,7 +111,10 @@ class FaultManagement(object):
server = servers.get(vm_name)
if not server:
raise Exception('Can not find instance: vm_name(%s)' % vm_name)
- host_name = server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname')
+ # use hostname without domain name which is mapped to the cell
+ hostname = \
+ server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname')
+ host_name = hostname.split('.')[0]
host_ip = self.installer.get_host_ip_from_hostname(host_name)
self.log.info('Get host info(name:%s, ip:%s) which vm(%s) launched at'
@@ -209,6 +212,10 @@ class FaultManagement(object):
detected = self.monitor.detected_time
notified = self.consumer.notified_time
+ if None in [vmdown, hostdown, detected, notified]:
+ self.log.info('one of the time for profiler is None, return')
+ return
+
# TODO(yujunz) check the actual delay to verify time sync status
# expected ~1s delay from $trigger to $linkdown
relative_start = linkdown
diff --git a/doctor_tests/scenario/maintenance.py b/doctor_tests/scenario/maintenance.py
index 9fcd4128..e6cdcccd 100644
--- a/doctor_tests/scenario/maintenance.py
+++ b/doctor_tests/scenario/maintenance.py
@@ -1,5 +1,5 @@
##############################################################################
-# Copyright (c) 2018 Nokia Corporation and others.
+# Copyright (c) 2019 Nokia Corporation and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
@@ -28,15 +28,25 @@ class Maintenance(object):
def __init__(self, trasport_url, conf, log):
self.conf = conf
self.log = log
+ self.admin_session = get_session()
self.keystone = keystone_client(
self.conf.keystone_version, get_session())
self.nova = nova_client(conf.nova_version, get_session())
auth = get_identity_auth(project=self.conf.doctor_project)
self.neutron = neutron_client(get_session(auth=auth))
self.stack = Stack(self.conf, self.log)
- self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
+ if self.conf.installer.type == "devstack":
+ self.endpoint_ip = trasport_url.split("@", 1)[1].split(":", 1)[0]
+ else:
+ self.endpoint_ip = self.conf.admin_tool.ip
+ self.endpoint = "http://%s:12347/" % self.endpoint_ip
+ if self.conf.admin_tool.type == 'sample':
+ self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
+ self.endpoint += 'maintenance'
+ else:
+ self.endpoint += 'v1/maintenance'
self.app_manager = get_app_manager(self.stack, self.conf, self.log)
- self.inspector = get_inspector(self.conf, self.log)
+ self.inspector = get_inspector(self.conf, self.log, trasport_url)
def get_external_network(self):
ext_net = None
@@ -64,8 +74,16 @@ class Maintenance(object):
raise Exception('not enough vcpus (%d) on %s' %
(vcpus, hostname))
if vcpus_used > 0:
- raise Exception('%d vcpus used on %s'
- % (vcpus_used, hostname))
+ if self.conf.test_case == 'all':
+ # VCPU might not yet be free after fault_management test
+ self.log.info('%d vcpus used on %s, retry...'
+ % (vcpus_used, hostname))
+ time.sleep(15)
+ hvisor = self.nova.hypervisors.get(hvisor.id)
+ vcpus_used = hvisor.__getattr__('vcpus_used')
+ if vcpus_used > 0:
+ raise Exception('%d vcpus used on %s'
+ % (vcpus_used, hostname))
if prev_vcpus != 0 and prev_vcpus != vcpus:
raise Exception('%d vcpus on %s does not match to'
'%d on %s'
@@ -110,9 +128,14 @@ class Maintenance(object):
parameters=parameters,
files=files)
- self.admin_tool.start()
- self.app_manager.start()
+ if self.conf.admin_tool.type == 'sample':
+ self.admin_tool.start()
+ else:
+ # TBD Now we expect Fenix is running in self.conf.admin_tool.port
+ pass
+ # Inspector before app_manager, as floating ip might come late
self.inspector.start()
+ self.app_manager.start()
def start_maintenance(self):
self.log.info('start maintenance.......')
@@ -121,22 +144,49 @@ class Maintenance(object):
for hvisor in hvisors:
hostname = hvisor.__getattr__('hypervisor_hostname')
maintenance_hosts.append(hostname)
-
- url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
- # let's start maintenance 20sec from now, so projects will have
- # time to ACK to it before that
- maintenance_at = (datetime.datetime.utcnow() +
- datetime.timedelta(seconds=20)
- ).strftime('%Y-%m-%d %H:%M:%S')
- data = {'hosts': maintenance_hosts,
- 'state': 'MAINTENANCE',
- 'maintenance_at': maintenance_at,
- 'metadata': {'openstack_version': 'Pike'}}
+ url = self.endpoint
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
-
- ret = requests.post(url, data=json.dumps(data), headers=headers)
+ if self.conf.admin_tool.type == 'fenix':
+ headers['X-Auth-Token'] = self.admin_session.get_token()
+ self.log.info('url %s headers %s' % (url, headers))
+ retries = 12
+ ret = None
+ while retries > 0:
+ # let's start maintenance 20sec from now, so projects will have
+ # time to ACK to it before that
+ maintenance_at = (datetime.datetime.utcnow() +
+ datetime.timedelta(seconds=30)
+ ).strftime('%Y-%m-%d %H:%M:%S')
+
+ data = {'state': 'MAINTENANCE',
+ 'maintenance_at': maintenance_at,
+ 'metadata': {'openstack_version': 'Train'}}
+
+ if self.conf.app_manager.type == 'vnfm':
+ data['workflow'] = 'vnf'
+ else:
+ data['workflow'] = 'default'
+
+ if self.conf.admin_tool.type == 'sample':
+ data['hosts'] = maintenance_hosts
+ else:
+ data['hosts'] = []
+ try:
+ ret = requests.post(url, data=json.dumps(data),
+ headers=headers)
+ except Exception:
+ if retries == 0:
+ raise Exception('admin tool did not respond in 120s')
+ else:
+ self.log.info('admin tool not ready, retry in 10s')
+ retries = retries - 1
+ time.sleep(10)
+ continue
+ break
+ if not ret:
+ raise Exception("admin tool did not respond")
if ret.status_code != 200:
raise Exception(ret.text)
return ret.json()['session_id']
@@ -144,48 +194,56 @@ class Maintenance(object):
def remove_maintenance_session(self, session_id):
self.log.info('remove maintenance session %s.......' % session_id)
- url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
+ url = ('%s/%s' % (self.endpoint, session_id))
- data = {'state': 'REMOVE_MAINTENANCE_SESSION',
- 'session_id': session_id}
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
- ret = requests.post(url, data=json.dumps(data), headers=headers)
+ if self.conf.admin_tool.type == 'fenix':
+ headers['X-Auth-Token'] = self.admin_session.get_token()
+
+ ret = requests.delete(url, data=None, headers=headers)
if ret.status_code != 200:
raise Exception(ret.text)
def get_maintenance_state(self, session_id):
- url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
- data = {'session_id': session_id}
+
+ url = ('%s/%s' % (self.endpoint, session_id))
+
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
- ret = requests.get(url, data=json.dumps(data), headers=headers)
+
+ if self.conf.admin_tool.type == 'fenix':
+ headers['X-Auth-Token'] = self.admin_session.get_token()
+
+ ret = requests.get(url, data=None, headers=headers)
if ret.status_code != 200:
raise Exception(ret.text)
return ret.json()['state']
def wait_maintenance_complete(self, session_id):
- retries = 66
+ retries = 90
state = None
- time.sleep(540)
- while state != 'MAINTENANCE_COMPLETE' and retries > 0:
+ time.sleep(300)
+ while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
+ retries > 0):
time.sleep(10)
state = self.get_maintenance_state(session_id)
retries = retries - 1
- if retries == 0 and state != 'MAINTENANCE_COMPLETE':
- raise Exception('maintenance %s not completed within 20min, status'
- ' %s' % (session_id, state))
- elif state == 'MAINTENANCE_COMPLETE':
- self.log.info('maintenance %s %s' % (session_id, state))
- self.remove_maintenance_session(session_id)
- elif state == 'MAINTENANCE_FAILED':
+ self.remove_maintenance_session(session_id)
+ self.log.info('maintenance %s ended with state %s' %
+ (session_id, state))
+ if state == 'MAINTENANCE_FAILED':
raise Exception('maintenance %s failed' % session_id)
+ elif retries == 0:
+ raise Exception('maintenance %s not completed within 20min' %
+ session_id)
def cleanup_maintenance(self):
- self.admin_tool.stop()
+ if self.conf.admin_tool.type == 'sample':
+ self.admin_tool.stop()
self.app_manager.stop()
self.inspector.stop()
self.log.info('stack delete start.......')
diff --git a/doctor_tests/stack.py b/doctor_tests/stack.py
index ee586fa8..8a921beb 100644
--- a/doctor_tests/stack.py
+++ b/doctor_tests/stack.py
@@ -94,7 +94,7 @@ class Stack(object):
# It might not always work at first
self.log.info('retry creating maintenance stack.......')
self.delete()
- time.sleep(3)
+ time.sleep(5)
stack = self.heat.stacks.create(stack_name=self.stack_name,
files=files,
template=template,
diff --git a/doctor_tests/user.py b/doctor_tests/user.py
index 29aa004b..2cd9757f 100644
--- a/doctor_tests/user.py
+++ b/doctor_tests/user.py
@@ -129,7 +129,6 @@ class User(object):
def _add_user_role_in_project(self, is_admin=False):
"""add test user with test role in test project"""
-
project = self.projects.get(self.conf.doctor_project)
user_name = 'admin' if is_admin else self.conf.doctor_user
diff --git a/tox.ini b/tox.ini
index 6e0d8b44..2937c329 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,12 +1,12 @@
[tox]
minversion = 2.3.1
-envlist = py34, pep8,docs,docs-linkcheck
+envlist = py36,pep8,docs,docs-linkcheck
skipsdist = True
[testenv]
usedevelop = True
-install_command = pip install \
- -chttps://git.openstack.org/cgit/openstack/requirements/plain/upper-constraints.txt?h=stable/pike \
+install_command = pip3 install \
+ -chttps://git.openstack.org/cgit/openstack/requirements/plain/upper-constraints.txt?h=stable/stein \
{opts} {packages}
setenv = VIRTUAL_ENV={envdir}
deps = -r{toxinidir}/requirements.txt
@@ -29,10 +29,13 @@ passenv =
INSTALLER_TYPE
INSTALLER_IP
INSPECTOR_TYPE
+ ADMIN_TOOL_TYPE
TEST_CASE
SSH_KEY
+ APP_MANAGER_TYPE
changedir = {toxinidir}/doctor_tests
commands = doctor-test
+ /usr/bin/find {toxinidir} -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete
[testenv:pep8]
changedir = {toxinidir}