summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--INFO27
-rw-r--r--INFO.yaml24
-rw-r--r--devstack/README.rst4
-rw-r--r--devstack/local.conf.sample120
-rw-r--r--docs/conf.py2
-rw-r--r--docs/conf.yaml3
-rw-r--r--docs/development/index.rst14
-rw-r--r--docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst253
-rw-r--r--docs/development/overview/index.rst7
-rw-r--r--docs/development/overview/overview.rst52
-rw-r--r--docs/development/requirements/index.rst6
-rw-r--r--docs/index.rst17
-rw-r--r--docs/release/configguide/feature.configuration.rst54
-rw-r--r--docs/release/configguide/index.rst6
-rw-r--r--docs/release/index.rst12
-rw-r--r--docs/release/installation/index.rst (renamed from docs/development/manuals/index.rst)12
-rw-r--r--docs/release/installation/installation.rst44
-rw-r--r--docs/release/release-notes/index.rst2
-rw-r--r--docs/release/release-notes/release-notes.rst146
-rw-r--r--docs/release/release-notes/releasenotes_fraser.rst (renamed from docs/release/release-notes/releasenotes.rst)0
-rw-r--r--docs/release/release-notes/releasenotes_gambia.rst303
-rw-r--r--docs/release/release-notes/releasenotes_iruya.rst129
-rw-r--r--docs/release/scenarios/fault_management/fault_management.rst90
-rw-r--r--docs/release/scenarios/maintenance/images/Fault-management-design.png (renamed from docs/development/overview/functest_scenario/images/Fault-management-design.png)bin237110 -> 237110 bytes
-rw-r--r--docs/release/scenarios/maintenance/images/LICENSE (renamed from docs/development/overview/functest_scenario/images/LICENSE)0
-rw-r--r--docs/release/scenarios/maintenance/images/Maintenance-design.png (renamed from docs/development/overview/functest_scenario/images/Maintenance-design.png)bin316640 -> 316640 bytes
-rw-r--r--docs/release/scenarios/maintenance/images/Maintenance-workflow.png (renamed from docs/development/overview/functest_scenario/images/Maintenance-workflow.png)bin81286 -> 81286 bytes
-rw-r--r--docs/release/scenarios/maintenance/maintenance.rst120
-rw-r--r--docs/release/userguide/get-valid-server-state.rst (renamed from docs/development/manuals/get-valid-server-state.rst)0
-rw-r--r--docs/release/userguide/index.rst3
-rw-r--r--docs/release/userguide/mark-host-down_manual.rst (renamed from docs/development/manuals/mark-host-down_manual.rst)0
-rw-r--r--docs/release/userguide/monitors.rst (renamed from docs/development/manuals/monitors.rst)3
-rw-r--r--docs/requirements.txt2
-rw-r--r--docs/testing/developer/index.rst13
-rw-r--r--docs/testing/developer/testing.rst (renamed from docs/development/overview/testing.rst)43
-rw-r--r--docs/testing/index.rst15
-rw-r--r--docs/testing/user/index.rst13
-rw-r--r--docs/testing/user/testing.rst30
-rw-r--r--doctor_tests/admin_tool/__init__.py8
-rw-r--r--doctor_tests/admin_tool/fenix/Dockerfile34
-rwxr-xr-xdoctor_tests/admin_tool/fenix/run32
-rw-r--r--doctor_tests/admin_tool/sample.py185
-rw-r--r--doctor_tests/app_manager/__init__.py8
-rw-r--r--doctor_tests/app_manager/sample.py28
-rw-r--r--doctor_tests/app_manager/vnfm.py441
-rw-r--r--doctor_tests/common/constants.py4
-rw-r--r--doctor_tests/common/utils.py26
-rw-r--r--doctor_tests/image.py13
-rw-r--r--doctor_tests/inspector/__init__.py8
-rw-r--r--doctor_tests/inspector/sample.py48
-rw-r--r--doctor_tests/installer/__init__.py12
-rw-r--r--doctor_tests/installer/apex.py150
-rw-r--r--doctor_tests/installer/base.py76
-rw-r--r--doctor_tests/installer/common/restart_aodh.py42
-rw-r--r--doctor_tests/installer/common/restore_aodh.py32
-rw-r--r--doctor_tests/installer/common/restore_compute_config.py19
-rw-r--r--doctor_tests/installer/common/restore_config.py33
-rw-r--r--doctor_tests/installer/common/restore_congress.py7
-rw-r--r--doctor_tests/installer/common/set_compute_config.py17
-rw-r--r--doctor_tests/installer/common/set_config.py136
-rw-r--r--doctor_tests/installer/common/set_congress.py14
-rw-r--r--doctor_tests/installer/common/set_fenix.sh106
-rw-r--r--doctor_tests/installer/common/vitrage.py5
-rw-r--r--doctor_tests/installer/daisy.py3
-rw-r--r--doctor_tests/installer/devstack.py151
-rw-r--r--doctor_tests/installer/local.py118
-rw-r--r--doctor_tests/installer/mcp.py179
-rw-r--r--doctor_tests/main.py85
-rw-r--r--doctor_tests/scenario/fault_management.py20
-rw-r--r--doctor_tests/scenario/maintenance.py134
-rw-r--r--doctor_tests/stack.py16
-rw-r--r--doctor_tests/user.py1
-rw-r--r--requirements.txt34
-rw-r--r--tox.ini22
75 files changed, 2927 insertions, 891 deletions
diff --git a/.gitignore b/.gitignore
index 6fc25628..b21ec655 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,6 @@
/*.egg-info/
.eggs/
/build/
-/docs_build/
/docs_output/
/releng/
/tests/*.img
@@ -16,3 +15,4 @@
#Build results
.tox
+docs/_build/*
diff --git a/INFO b/INFO
deleted file mode 100644
index f722e870..00000000
--- a/INFO
+++ /dev/null
@@ -1,27 +0,0 @@
-Project: Fault Management project (doctor)
-Project Creation Date: December 2, 2014
-Project Category: Requirement
-Lifecycle State: Mature
-Primary Contact: Tomi Juvonen (tomi.juvonen@nokia.com)
-Project Lead: Tomi Juvonen (tomi.juvonen@nokia.com)
-Jira Project Name: Fault Management project
-Jira Project Prefix: DOCTOR
-Mailing list tag: [doctor]
-IRC: Server:freenode.net Channel:#opnfv-doctor
-Repository: doctor
-
-Committers:
-Ashiq Khan (NTT DOCOMO, khan@nttdocomo.com)
-Bertrand Souville (NTT DOCOMO, souville@docomolab-euro.com)
-Dong Wenjuan (ZTE, dong.wenjuan@zte.com.cn)
-Gerald Kunzmann (NTT DOCOMO, kunzmann@docomolab-euro.com)
-Ryota Mibu (NEC, r-mibu@cq.jp.nec.com)
-Serge Manning (Sprint, Serge.Manning@sprint.com)
-Tomi Juvonen (Nokia, tomi.juvonen@nokia.com)
-
-Link to TSC approval of the project: http://meetbot.opnfv.org/meetings/opnfv-meeting/2014/opnfv-meeting.2014-12-02-14.58.html
-Link(s) to approval of committer update:
-http://lists.opnfv.org/pipermail/opnfv-tsc/2015-June/000905.html
-http://lists.opnfv.org/pipermail/opnfv-tech-discuss/2015-June/003165.html
-http://lists.opnfv.org/pipermail/opnfv-tech-discuss/2016-June/011245.html
-http://lists.opnfv.org/pipermail/opnfv-tech-discuss/2016-July/011771.html
diff --git a/INFO.yaml b/INFO.yaml
index 3b9a3101..97acb69f 100644
--- a/INFO.yaml
+++ b/INFO.yaml
@@ -34,31 +34,12 @@ repositories:
- 'doctor'
committers:
- <<: *opnfv_doctor_ptl
- - name: 'Ashiq Khan'
- email: 'khan@nttdocomo.com'
- company: 'NTT DOCOMO'
- id: 'ashiq.khan'
- - name: 'Serge Manning'
- email: 'serge.manning@sprint.com'
- company: 'Sprint'
- id: 'sergem913'
- - name: 'Gerald Kunzmann'
- email: 'kunzmann@docomolab-euro.com'
- company: 'DOCOMO Euro-Labs'
- id: 'kunzmann'
- name: 'wenjuan dong'
email: 'dong.wenjuan@zte.com.cn'
company: 'ZTE'
id: 'dongwenjuan'
- - name: 'Bertrand Souville'
- email: 'souville@docomolab-euro.com'
- company: 'DOCOMO Euro-Labs'
- id: 'bertys'
- - name: 'Ryota Mibu'
- email: 'r-mibu@cq.jp.nec.com'
- company: 'NEC'
- id: 'r-mibu'
tsc:
+ # yamllint disable rule:line-length
approval: 'http//meetbot.opnfv.org/meetings/opnfv-meeting/2014/opnfv-meeting.2014-12-02-14.58.html'
changes:
- type: 'removal'
@@ -100,3 +81,6 @@ tsc:
- type: 'removal'
name: 'Peter Lee'
link: 'https://lists.opnfv.org/pipermail/opnfv-tsc/2018-March/004190.html'
+ - type: 'removal'
+ name: 'Bertrand Souville'
+ link: 'https://lists.opnfv.org/g/opnfv-tech-discuss/message/22344'
diff --git a/devstack/README.rst b/devstack/README.rst
index 91e8abfe..aaa18a7f 100644
--- a/devstack/README.rst
+++ b/devstack/README.rst
@@ -18,7 +18,9 @@ OPNFV Doctor in DevStack.
enable_plugin osprofiler https://git.openstack.org/openstack/osprofiler
enable_plugin doctor https://git.opnfv.org/doctor
-to the ``[[local|localrc]]`` section.
+to the ``[[local|localrc]]`` section. Or, you can copy the local.conf.sample::
+
+ cp /<path-to-doctor>/devstack/local.conf.sample ${DEVSTACK_DIR}/local.conf
.. note:: The order of enabling plugins matters.
diff --git a/devstack/local.conf.sample b/devstack/local.conf.sample
new file mode 100644
index 00000000..2967714a
--- /dev/null
+++ b/devstack/local.conf.sample
@@ -0,0 +1,120 @@
+# Sample ``local.conf`` for user-configurable variables in ``stack.sh``
+
+# NOTE: Copy this file to the root DevStack directory for it to work properly.
+
+# ``local.conf`` is a user-maintained settings file that is sourced from ``stackrc``.
+# This gives it the ability to override any variables set in ``stackrc``.
+# Also, most of the settings in ``stack.sh`` are written to only be set if no
+# value has already been set; this lets ``local.conf`` effectively override the
+# default values.
+
+# This is a collection of some of the settings we have found to be useful
+# in our DevStack development environments. Additional settings are described
+# in https://docs.openstack.org/devstack/latest/configuration.html#local-conf
+# These should be considered as samples and are unsupported DevStack code.
+
+# The ``localrc`` section replaces the old ``localrc`` configuration file.
+# Note that if ``localrc`` is present it will be used in favor of this section.
+[[local|localrc]]
+
+# Minimal Contents
+# ----------------
+
+# While ``stack.sh`` is happy to run without ``localrc``, devlife is better when
+# there are a few minimal variables set:
+
+# If the ``*_PASSWORD`` variables are not set here you will be prompted to enter
+# values for them by ``stack.sh``and they will be added to ``local.conf``.
+ADMIN_PASSWORD=devstack
+DATABASE_PASSWORD=$ADMIN_PASSWORD
+RABBIT_PASSWORD=$ADMIN_PASSWORD
+SERVICE_PASSWORD=$ADMIN_PASSWORD
+
+# ``HOST_IP`` and ``HOST_IPV6`` should be set manually for best results if
+# the NIC configuration of the host is unusual, i.e. ``eth1`` has the default
+# route but ``eth0`` is the public interface. They are auto-detected in
+# ``stack.sh`` but often is indeterminate on later runs due to the IP moving
+# from an Ethernet interface to a bridge on the host. Setting it here also
+# makes it available for ``openrc`` to include when setting ``OS_AUTH_URL``.
+# Neither is set by default.
+HOST_IP=127.0.0.1
+#HOST_IPV6=2001:db8::7
+
+
+# Logging
+# -------
+
+# By default ``stack.sh`` output only goes to the terminal where it runs. It can
+# be configured to additionally log to a file by setting ``LOGFILE`` to the full
+# path of the destination log file. A timestamp will be appended to the given name.
+LOGFILE=$DEST/logs/stack.sh.log
+
+# Old log files are automatically removed after 7 days to keep things neat. Change
+# the number of days by setting ``LOGDAYS``.
+LOGDAYS=2
+
+# Nova logs will be colorized if ``SYSLOG`` is not set; turn this off by setting
+# ``LOG_COLOR`` false.
+#LOG_COLOR=False
+
+
+# Using milestone-proposed branches
+# ---------------------------------
+
+# Uncomment these to grab the milestone-proposed branches from the
+# repos:
+#CINDER_BRANCH=milestone-proposed
+#GLANCE_BRANCH=milestone-proposed
+#HORIZON_BRANCH=milestone-proposed
+#KEYSTONE_BRANCH=milestone-proposed
+#KEYSTONECLIENT_BRANCH=milestone-proposed
+#NOVA_BRANCH=milestone-proposed
+#NOVACLIENT_BRANCH=milestone-proposed
+#NEUTRON_BRANCH=milestone-proposed
+#SWIFT_BRANCH=milestone-proposed
+
+# Using git versions of clients
+# -----------------------------
+# By default clients are installed from pip. See LIBS_FROM_GIT in
+# stackrc for details on getting clients from specific branches or
+# revisions. e.g.
+# LIBS_FROM_GIT="python-ironicclient"
+# IRONICCLIENT_BRANCH=refs/changes/44/2.../1
+
+# Swift
+# -----
+
+# Swift is now used as the back-end for the S3-like object store. Setting the
+# hash value is required and you will be prompted for it if Swift is enabled
+# so just set it to something already:
+SWIFT_HASH=66a3d6b56c1f479c8b4e70ab5c2000f5
+
+# For development purposes the default of 3 replicas is usually not required.
+# Set this to 1 to save some resources:
+SWIFT_REPLICAS=1
+
+# The data for Swift is stored by default in (``$DEST/data/swift``),
+# or (``$DATA_DIR/swift``) if ``DATA_DIR`` has been set, and can be
+# moved by setting ``SWIFT_DATA_DIR``. The directory will be created
+# if it does not exist.
+SWIFT_DATA_DIR=$DEST/data
+
+# OPNFV Doctor
+# ------------
+
+# Enable the required plugins
+# The order of enabling plugins matters
+enable_plugin aodh http://git.openstack.org/openstack/aodh
+enable_plugin panko https://git.openstack.org/openstack/panko
+enable_plugin ceilometer https://git.openstack.org/openstack/ceilometer
+enable_plugin osprofiler https://git.openstack.org/openstack/osprofiler
+enable_plugin doctor https://git.opnfv.org/doctor
+
+# To enable Python 3
+# USE_PYTHON3=True
+
+# To enable Congress as Doctor Inspector
+# enable_plugin congress https://git.openstack.org/openstack/congress
+
+# To enable Neutron port data plane status
+# Q_ML2_PLUGIN_EXT_DRIVERS=data_plane_status
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 00000000..3c9978bb
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,2 @@
+from docs_conf.conf import * # noqa: F401,F403
+master_doc = 'index'
diff --git a/docs/conf.yaml b/docs/conf.yaml
new file mode 100644
index 00000000..e08a41ab
--- /dev/null
+++ b/docs/conf.yaml
@@ -0,0 +1,3 @@
+---
+project_cfg: opnfv
+project: DOCTOR
diff --git a/docs/development/index.rst b/docs/development/index.rst
index 2dc16a82..a7d2817b 100644
--- a/docs/development/index.rst
+++ b/docs/development/index.rst
@@ -2,18 +2,18 @@
.. http://creativecommons.org/licenses/by/4.0
.. (c) 2016 OPNFV.
+.. _development:
-======
-Doctor
-======
+===========
+Development
+===========
.. toctree::
:maxdepth: 2
- ./design/index.rst
- ./requirements/index.rst
- ./manuals/index.rst
- ./overview/functest_scenario/index.rst
+ ./design/index
+ ./overview/index
+ ./requirements/index
Indices
=======
diff --git a/docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst b/docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst
deleted file mode 100644
index 9f92b5bf..00000000
--- a/docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst
+++ /dev/null
@@ -1,253 +0,0 @@
-.. This work is licensed under a Creative Commons Attribution 4.0 International License.
-.. http://creativecommons.org/licenses/by/4.0
-
-
-
-Platform overview
-"""""""""""""""""
-
-Doctor platform provides these features since `Danube Release <https://wiki.opnfv.org/display/SWREL/Danube>`_:
-
-* Immediate Notification
-* Consistent resource state awareness for compute host down
-* Valid compute host status given to VM owner
-
-These features enable high availability of Network Services on top of
-the virtualized infrastructure. Immediate notification allows VNF managers
-(VNFM) to process recovery actions promptly once a failure has occurred.
-Same framework can also be utilized to have VNFM awareness about
-infrastructure maintenance.
-
-Consistency of resource state is necessary to execute recovery actions
-properly in the VIM.
-
-Ability to query host status gives VM owner the possibility to get
-consistent state information through an API in case of a compute host
-fault.
-
-The Doctor platform consists of the following components:
-
-* OpenStack Compute (Nova)
-* OpenStack Networking (Neutron)
-* OpenStack Telemetry (Ceilometer)
-* OpenStack Alarming (AODH)
-* Doctor Sample Inspector, OpenStack Congress or OpenStack Vitrage
-* Doctor Sample Monitor or any monitor supported by Congress or Vitrage
-
-.. note::
- Doctor Sample Monitor is used in Doctor testing. However in real
- implementation like Vitrage, there are several other monitors supported.
-
-You can see an overview of the Doctor platform and how components interact in
-:numref:`figure-p1`.
-
-.. figure:: ./images/Fault-management-design.png
- :name: figure-p1
- :width: 100%
-
- Doctor platform and typical sequence
-
-Detailed information on the Doctor architecture can be found in the Doctor
-requirements documentation:
-http://artifacts.opnfv.org/doctor/docs/requirements/05-implementation.html
-
-Running test cases
-""""""""""""""""""
-
-Functest will call the "doctor_tests/main.py" in Doctor to run the test job.
-Doctor testing can also be triggered by tox on OPNFV installer jumphost. Tox
-is normally used for functional, module and coding style testing in Python
-project.
-
-Currently, 'Apex', 'Daisy', 'Fuel' and 'local' installer are supported.
-
-
-Fault management use case
-"""""""""""""""""""""""""
-
-* A consumer of the NFVI wants to receive immediate notifications about faults
- in the NFVI affecting the proper functioning of the virtual resources.
- Therefore, such faults have to be detected as quickly as possible, and, when
- a critical error is observed, the affected consumer is immediately informed
- about the fault and can switch over to the STBY configuration.
-
-The faults to be monitored (and at which detection rate) will be configured by
-the consumer. Once a fault is detected, the Inspector in the Doctor
-architecture will check the resource map maintained by the Controller, to find
-out which virtual resources are affected and then update the resources state.
-The Notifier will receive the failure event requests sent from the Controller,
-and notify the consumer(s) of the affected resources according to the alarm
-configuration.
-
-Detailed workflow information is as follows:
-
-* Consumer(VNFM): (step 0) creates resources (network, server/instance) and an
- event alarm on state down notification of that server/instance or Neutron
- port.
-
-* Monitor: (step 1) periodically checks nodes, such as ping from/to each
- dplane nic to/from gw of node, (step 2) once it fails to send out event
- with "raw" fault event information to Inspector
-
-* Inspector: when it receives an event, it will (step 3) mark the host down
- ("mark-host-down"), (step 4) map the PM to VM, and change the VM status to
- down. In network failure case, also Neutron port is changed to down.
-
-* Controller: (step 5) sends out instance update event to Ceilometer. In network
- failure case, also Neutron port is changed to down and corresponding event is
- sent to Ceilometer.
-
-* Notifier: (step 6) Ceilometer transforms and passes the events to AODH,
- (step 7) AODH will evaluate events with the registered alarm definitions,
- then (step 8) it will fire the alarm to the "consumer" who owns the
- instance
-
-* Consumer(VNFM): (step 9) receives the event and (step 10) recreates a new
- instance
-
-Fault management test case
-""""""""""""""""""""""""""
-
-Functest will call the 'doctor-test' command in Doctor to run the test job.
-
-The following steps are executed:
-
-Firstly, get the installer ip according to the installer type. Then ssh to
-the installer node to get the private key for accessing to the cloud. As
-'fuel' installer, ssh to the controller node to modify nova and ceilometer
-configurations.
-
-Secondly, prepare image for booting VM, then create a test project and test
-user (both default to doctor) for the Doctor tests.
-
-Thirdly, boot a VM under the doctor project and check the VM status to verify
-that the VM is launched completely. Then get the compute host info where the VM
-is launched to verify connectivity to the target compute host. Get the consumer
-ip according to the route to compute ip and create an alarm event in Ceilometer
-using the consumer ip.
-
-Fourthly, the Doctor components are started, and, based on the above preparation,
-a failure is injected to the system, i.e. the network of compute host is
-disabled for 3 minutes. To ensure the host is down, the status of the host
-will be checked.
-
-Finally, the notification time, i.e. the time between the execution of step 2
-(Monitor detects failure) and step 9 (Consumer receives failure notification)
-is calculated.
-
-According to the Doctor requirements, the Doctor test is successful if the
-notification time is below 1 second.
-
-Maintenance use case
-""""""""""""""""""""
-
-* A consumer of the NFVI wants to interact with NFVI maintenance, upgrade,
- scaling and to have graceful retirement. Receiving notifications over these
- NFVI events and responding to those within given time window, consumer can
- guarantee zero downtime to his service.
-
-The maintenance use case adds the Doctor platform an `admin tool` and an
-`app manager` component. Overview of maintenance components can be seen in
-:numref:`figure-p2`.
-
-.. figure:: ./images/Maintenance-design.png
- :name: figure-p2
- :width: 100%
-
- Doctor platform components in maintenance use case
-
-In maintenance use case, `app manager` (VNFM) will subscribe to maintenance
-notifications triggered by project specific alarms through AODH. This is the way
-it gets to know different NFVI maintenance, upgrade and scaling operations that
-effect to its instances. The `app manager` can do actions depicted in `green
-color` or tell `admin tool` to do admin actions depicted in `orange color`
-
-Any infrastructure component like `Inspector` can subscribe to maintenance
-notifications triggered by host specific alarms through AODH. Subscribing to the
-notifications needs admin privileges and can tell when a host is out of use as
-in maintenance and when it is taken back to production.
-
-Maintenance test case
-"""""""""""""""""""""
-
-Maintenance test case is currently running in our Apex CI and executed by tox.
-This is because the special limitation mentioned below and also the fact we
-currently have only sample implementation as a proof of concept. Environmental
-variable TEST_CASE='maintenance' needs to be used when executing
-"doctor_tests/main.py". Test case workflow can be seen in :numref:`figure-p3`.
-
-.. figure:: ./images/Maintenance-workflow.png
- :name: figure-p3
- :width: 100%
-
- Maintenance test case workflow
-
-In test case all compute capacity will be consumed with project (VNF) instances.
-For redundant services on instances and an empty compute needed for maintenance,
-test case will need at least 3 compute nodes in system. There will be 2
-instances on each compute, so minimum number of VCPUs is also 2. Depending on
-how many compute nodes there is application will always have 2 redundant
-instances (ACT-STDBY) on different compute nodes and rest of the compute
-capacity will be filled with non-redundant instances.
-
-For each project specific maintenance message there is a time window for
-`app manager` to make any needed action. This will guarantee zero
-down time for his service. All replies back are done by calling `admin tool` API
-given in the message.
-
-The following steps are executed:
-
-Infrastructure admin will call `admin tool` API to trigger maintenance for
-compute hosts having instances belonging to a VNF.
-
-Project specific `MAINTENANCE` notification is triggered to tell `app manager`
-that his instances are going to hit by infrastructure maintenance at a specific
-point in time. `app manager` will call `admin tool` API to answer back
-`ACK_MAINTENANCE`.
-
-When the time comes to start the actual maintenance workflow in `admin tool`,
-a `DOWN_SCALE` notification is triggered as there is no empty compute node for
-maintenance (or compute upgrade). Project receives corresponding alarm and scales
-down instances and call `admin tool` API to answer back `ACK_DOWN_SCALE`.
-
-As it might happen instances are not scaled down (removed) from a single
-compute node, `admin tool` might need to figure out what compute node should be
-made empty first and send `PREPARE_MAINTENANCE` to project telling which instance
-needs to be migrated to have the needed empty compute. `app manager` makes sure
-he is ready to migrate instance and call `admin tool` API to answer back
-`ACK_PREPARE_MAINTENANCE`. `admin tool` will make the migration and answer
-`ADMIN_ACTION_DONE`, so `app manager` knows instance can be again used.
-
-:numref:`figure-p3` has next a light blue section of actions to be done for each
-compute. However as we now have one empty compute, we will maintain/upgrade that
-first. So on first round, we can straight put compute in maintenance and send
-admin level host specific `IN_MAINTENANCE` message. This is caught by `Inspector`
-to know host is down for maintenance. `Inspector` can now disable any automatic
-fault management actions for the host as it can be down for a purpose. After
-`admin tool` has completed maintenance/upgrade `MAINTENANCE_COMPLETE` message
-is sent to tell host is back in production.
-
-Next rounds we always have instances on compute, so we need to have
-`PLANNED_MAINTANANCE` message to tell that those instances are now going to hit
-by maintenance. When `app manager` now receives this message, he knows instances
-to be moved away from compute will now move to already maintained/upgraded host.
-In test case no upgrade is done on application side to upgrade instances
-according to new infrastructure capabilities, but this could be done here as
-this information is also passed in the message. This might be just upgrading
-some RPMs, but also totally re-instantiating instance with a new flavor. Now if
-application runs an active side of a redundant instance on this compute,
-a switch over will be done. After `app manager` is ready he will call
-`admin tool` API to answer back `ACK_PLANNED_MAINTENANCE`. In test case the
-answer is `migrate`, so `admin tool` will migrate instances and reply
-`ADMIN_ACTION_DONE` and then `app manager` knows instances can be again used.
-Then we are ready to make the actual maintenance as previously trough
-`IN_MAINTENANCE` and `MAINTENANCE_COMPLETE` steps.
-
-After all computes are maintained, `admin tool` can send `MAINTENANCE_COMPLETE`
-to tell maintenance/upgrade is now complete. For `app manager` this means he
-can scale back to full capacity.
-
-This is the current sample implementation and test case. Real life
-implementation is started in OpenStack Fenix project and there we should
-eventually address requirements more deeply and update the test case with Fenix
-implementation.
diff --git a/docs/development/overview/index.rst b/docs/development/overview/index.rst
index 956e73e3..f6d78d57 100644
--- a/docs/development/overview/index.rst
+++ b/docs/development/overview/index.rst
@@ -3,11 +3,12 @@
.. _doctor-overview:
-************************
-Doctor Development Guide
-************************
+********
+Overview
+********
.. toctree::
:maxdepth: 2
+ overview.rst
testing.rst
diff --git a/docs/development/overview/overview.rst b/docs/development/overview/overview.rst
new file mode 100644
index 00000000..21f5439e
--- /dev/null
+++ b/docs/development/overview/overview.rst
@@ -0,0 +1,52 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Platform overview
+"""""""""""""""""
+
+Doctor platform provides these features since `Danube Release <https://wiki.opnfv.org/display/SWREL/Danube>`_:
+
+* Immediate Notification
+* Consistent resource state awareness for compute host down
+* Valid compute host status given to VM owner
+
+These features enable high availability of Network Services on top of
+the virtualized infrastructure. Immediate notification allows VNF managers
+(VNFM) to process recovery actions promptly once a failure has occurred.
+Same framework can also be utilized to have VNFM awareness about
+infrastructure maintenance.
+
+Consistency of resource state is necessary to execute recovery actions
+properly in the VIM.
+
+Ability to query host status gives VM owner the possibility to get
+consistent state information through an API in case of a compute host
+fault.
+
+The Doctor platform consists of the following components:
+
+* OpenStack Compute (Nova)
+* OpenStack Networking (Neutron)
+* OpenStack Telemetry (Ceilometer)
+* OpenStack Alarming (AODH)
+* Doctor Sample Inspector, OpenStack Congress or OpenStack Vitrage
+* Doctor Sample Monitor or any monitor supported by Congress or Vitrage
+
+.. note::
+ Doctor Sample Monitor is used in Doctor testing. However in real
+ implementation like Vitrage, there are several other monitors supported.
+
+You can see an overview of the Doctor platform and how components interact in
+:numref:`figure-p1`.
+
+
+Maintenance use case provides these features since `Iruya Release <https://wiki.opnfv.org/display/SWREL/Iruya>`_:
+
+* Infrastructure maintenance and upgrade workflow
+* Interaction between VNFM and infrastructe workflow
+
+Since `Jerma Release <https://wiki.opnfv.org/display/SWREL/Jerma>`_ maintenance
+use case also supports 'ETSI FEAT03' implementation to have the infrastructure
+maintenance and upgrade fully optimized while keeping zero impact on VNF
+service.
+
diff --git a/docs/development/requirements/index.rst b/docs/development/requirements/index.rst
index fceaebf0..ccc35cb8 100644
--- a/docs/development/requirements/index.rst
+++ b/docs/development/requirements/index.rst
@@ -3,9 +3,9 @@
.. _doctor-requirements:
-****************************************
-Doctor: Fault Management and Maintenance
-****************************************
+**********************************************
+Requirements: Fault Management and Maintenance
+**********************************************
:Project: Doctor, https://wiki.opnfv.org/doctor
:Editors: Ashiq Khan (NTT DOCOMO), Gerald Kunzmann (NTT DOCOMO)
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 00000000..b8e8bfd0
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,17 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+.. _doctor:
+
+=========================================
+Fault Management and Maintenance (Doctor)
+=========================================
+
+.. toctree::
+ :numbered:
+ :maxdepth: 2
+
+ development/index
+ release/index
+ testing/index
diff --git a/docs/release/configguide/feature.configuration.rst b/docs/release/configguide/feature.configuration.rst
index 64928eea..8fbff50e 100644
--- a/docs/release/configguide/feature.configuration.rst
+++ b/docs/release/configguide/feature.configuration.rst
@@ -159,3 +159,57 @@ You can configure the Sample Monitor as follows (Example for Apex deployment):
"http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
**Collectd Monitor**
+
+OpenStack components
+====================
+
+In OPNFV and with Doctor testing you can have all OpenStack components configured
+as needed. Here is sample of the needed configuration modifications.
+
+Ceilometer
+----------
+
+/etc/ceilometer/event_definitions.yaml:
+# Maintenance use case needs new alarm definitions to be added
+- event_type: maintenance.scheduled
+ traits:
+ actions_at:
+ fields: payload.maintenance_at
+ type: datetime
+ allowed_actions:
+ fields: payload.allowed_actions
+ host_id:
+ fields: payload.host_id
+ instances:
+ fields: payload.instances
+ metadata:
+ fields: payload.metadata
+ project_id:
+ fields: payload.project_id
+ reply_url:
+ fields: payload.reply_url
+ session_id:
+ fields: payload.session_id
+ state:
+ fields: payload.state
+- event_type: maintenance.host
+ traits:
+ host:
+ fields: payload.host
+ project_id:
+ fields: payload.project_id
+ session_id:
+ fields: payload.session_id
+ state:
+ fields: payload.state
+
+/etc/ceilometer/event_pipeline.yaml:
+# Maintenance and Fault management both needs these to be added
+ - notifier://
+ - notifier://?topic=alarm.all
+
+Nova
+----
+
+/etc/nova/nova.conf
+cpu_allocation_ratio=1.0
diff --git a/docs/release/configguide/index.rst b/docs/release/configguide/index.rst
index b1e7c33d..c2331115 100644
--- a/docs/release/configguide/index.rst
+++ b/docs/release/configguide/index.rst
@@ -3,9 +3,9 @@
.. _doctor-configguide:
-*************************
-Doctor Installation Guide
-*************************
+**************************
+Doctor Configuration Guide
+**************************
.. toctree::
:maxdepth: 2
diff --git a/docs/release/index.rst b/docs/release/index.rst
index 8a1bf405..67eb4c5f 100644
--- a/docs/release/index.rst
+++ b/docs/release/index.rst
@@ -2,14 +2,18 @@
.. http://creativecommons.org/licenses/by/4.0
.. (c) 2017 OPNFV.
+.. _release:
-======
-Doctor
-======
+=======
+Release
+=======
.. toctree::
:maxdepth: 2
+ ./configguide/index.rst
./installation/index.rst
+ ./release-notes/index.rst
+ ./scenarios/fault_management/fault_management.rst
+ ./scenarios/maintenance/maintenance.rst
./userguide/index.rst
-
diff --git a/docs/development/manuals/index.rst b/docs/release/installation/index.rst
index f705f94a..f6527e5d 100644
--- a/docs/development/manuals/index.rst
+++ b/docs/release/installation/index.rst
@@ -1,13 +1,13 @@
.. This work is licensed under a Creative Commons Attribution 4.0 International License.
.. http://creativecommons.org/licenses/by/4.0
-.. _doctor-manuals:
+.. _doctor-configguide:
-*******
-Manuals
-*******
+*************************
+Doctor Installation Guide
+*************************
.. toctree::
+ :maxdepth: 2
-.. include:: mark-host-down_manual.rst
-.. include:: get-valid-server-state.rst
+ installation.rst
diff --git a/docs/release/installation/installation.rst b/docs/release/installation/installation.rst
new file mode 100644
index 00000000..564f19fd
--- /dev/null
+++ b/docs/release/installation/installation.rst
@@ -0,0 +1,44 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Doctor Installation
+====================
+
+You can clone doctor project in OPNFV installer jumphost or if you are not
+in OPNFV environment you can clone Doctor to DevStack controller node
+
+git clone https://gerrit.opnfv.org/gerrit/doctor
+
+In DevStack controller here is a sample of including what Doctor testing
+will require for sample fault management testing and for maintenance
+testing using Fenix
+
+.. code-block:: bash
+
+ git clone https://github.com/openstack/devstack -b stable/train
+
+.. code-block:: bash
+
+ cd devstack vi local.conf
+
+.. code-block:: bash
+
+ [[local|localrc]]
+ GIT_BASE=https://git.openstack.org
+ HOST_IP=<host_ip>
+ ADMIN_PASSWORD=admin
+ DATABASE_PASSWORD=admin
+ RABBIT_PASSWORD=admin
+ SERVICE_PASSWORD=admin
+ LOGFILE=/opt/stack/stack.sh.log
+
+ PUBLIC_INTERFACE=eth0
+
+ CEILOMETER_EVENT_ALARM=True
+
+ ENABLED_SERVICES=key,rabbit,mysql,fenix-engine,fenix-api,aodh-evaluator,aodh-notifier,aodh-api
+
+ enable_plugin ceilometer https://git.openstack.org/openstack/ceilometer stable/train
+ enable_plugin aodh https://git.openstack.org/openstack/aodh stable/train
+ enable_plugin gnocchi https://github.com/openstack/gnocchi
+ enable_plugin fenix https://opendev.org/x/fenix master
diff --git a/docs/release/release-notes/index.rst b/docs/release/release-notes/index.rst
index 2e6d46e1..a0e30501 100644
--- a/docs/release/release-notes/index.rst
+++ b/docs/release/release-notes/index.rst
@@ -10,4 +10,4 @@ Doctor Release Notes
.. toctree::
:maxdepth: 2
- releasenotes.rst
+ release-notes.rst
diff --git a/docs/release/release-notes/release-notes.rst b/docs/release/release-notes/release-notes.rst
new file mode 100644
index 00000000..b525335e
--- /dev/null
+++ b/docs/release/release-notes/release-notes.rst
@@ -0,0 +1,146 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+This document provides the release notes for Iruya version of Doctor.
+
+Important notes
+===============
+
+Jerma release has mainly been for finalizing maintenance use case testing
+supporting the ETSI FEAT03 defined interactino between VNFM and infrastructure.
+This is mainly to have infrastructure maintenance and upgrade operations
+opttimized as fast as they can while keeping VNFs on top with zero impact
+on their service.
+
+Further more this is the final release of Doctor and the more deep testing is
+moving more to upstream projects like Fenix for the maintenance. Also in
+this release we have made sure that all Doctor testing and any deeper testing
+with ehe upstream projects can be done in DevStack. This also makes DevStack
+the most important installer.
+
+Summary
+=======
+
+Jerma Doctor framework uses OpenStack Train integrated into its test cases.
+
+Release Data
+============
+
+Doctor changes
+
+- Maintenance use case updated to support latest version of Fenix.
+- Maintenance use case now supports ETSI FEAT03 optimization with Fenix.
+- Doctor testing is now preferred to be done in DevStack environment
+ where one can easily select OpenStack release from Rocky to Ussuri to
+ test Doctor functionality. Latest OPNFV Fuel can also be used for the
+ OpenStack version it supports.
+
+Doctor CI
+
+- Doctor tested with fuel installer.
+- Fault management use case is tested with sample inspector.
+- Maintenance use case is tested with sample implementation and towards
+ the latest Fenix version. The includes the new ETSI FEAT03 optimization.
+
+Version change
+^^^^^^^^^^^^^^
+
+Module version changes
+~~~~~~~~~~~~~~~~~~~~~~
+
+- OpenStack has changed Train
+
+Document version changes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+All documentation is updated to OPNFV unified format according to
+documentation guidelines. Small updates in many documents.
+
+Reason for version
+^^^^^^^^^^^^^^^^^^
+
+N/A
+
+Feature additions
+~~~~~~~~~~~~~~~~~
+
++--------------------+--------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN** |
++--------------------+--------------------------------------------+
+| DOCTOR-137 | VNFM maintenance with ETSI changes |
++--------------------+--------------------------------------------+
+| DOCTOR-136 | DevStack support |
++--------------------+--------------------------------------------+
+
+
+Deliverables
+------------
+
+Software deliverables
+=====================
+
+None
+
+Documentation deliverables
+==========================
+
+https://git.opnfv.org/doctor/tree/docs
+
+Known Limitations, Issues and Workarounds
+=========================================
+
+System Limitations
+^^^^^^^^^^^^^^^^^^
+
+Maintenance test case requirements:
+
+- Minimum number of nodes: 1 Controller, 3 Computes
+- Min number of VCPUs: 2 VCPUs for each compute
+
+Known issues
+^^^^^^^^^^^^
+
+None
+
+Workarounds
+^^^^^^^^^^^
+
+None
+
+Test Result
+===========
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel' | SUCCESS |
++--------------------------------------+--------------+
+
+Doctor CI results with TEST_CASE='maintenance' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel' | SUCCESS |
+| ADMIN_TOOL_TYPE='fenix' *) | |
++--------------------------------------+--------------+
+
+*) Sample implementation not updated according to latest upstream Fenix
+ and is currently not being tested.
+
+References
+==========
+
+For more information about the OPNFV Doctor latest work, please see:
+
+https://wiki.opnfv.org/display/doctor/Doctor+Home
+
+Further information about ETSI FEAT03 optimization can be found from Fenix
+Documentation:
+
+https://fenix.readthedocs.io/en/latest
diff --git a/docs/release/release-notes/releasenotes.rst b/docs/release/release-notes/releasenotes_fraser.rst
index f1cf9d7e..f1cf9d7e 100644
--- a/docs/release/release-notes/releasenotes.rst
+++ b/docs/release/release-notes/releasenotes_fraser.rst
diff --git a/docs/release/release-notes/releasenotes_gambia.rst b/docs/release/release-notes/releasenotes_gambia.rst
new file mode 100644
index 00000000..142bfacf
--- /dev/null
+++ b/docs/release/release-notes/releasenotes_gambia.rst
@@ -0,0 +1,303 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+This document provides the release notes for Gambia of Doctor.
+
+Important notes
+===============
+
+In Gambia release, Doctor has been working with our second use case over
+maintenance. Design guideline is now done and test case exists with sample
+maintenance workflow code implemented in Doctor. Work has also started to have
+the real implementation done in the OpenStack Fenix project
+https://wiki.openstack.org/wiki/Fenix.
+
+Doctor CI testing has now moved to use tox on jumphots instead of running test
+through features container. Also in Apex we use OpenStack services running in
+containers. Functest daily testing supports Doctor fault management test case
+for Apex, Daisy and Fuel installers. This testing is done through features
+container.
+
+In this release, Doctor has not been working with the fault management use case as
+the basic framework has been already done. However, we might need to get back to
+it later to better meet the tough industry requirements as well as requirements
+from edge, containers and 5G.
+
+
+Summary
+=======
+
+Gambia Doctor framework uses OpenStack Queens integrated into its test cases.
+Compared to the previous release, the Heat project is also being used in the
+maintenance test case.
+
+Release Data
+============
+
+Doctor changes
+
++------------------------------------------+----------------------------------------------------------+
+| **commit-ID** | **Subject** |
++------------------------------------------+----------------------------------------------------------+
+| 5b3f5937e7b861fca46b2a6b2d6708866b800f95 | fix building docs |
++------------------------------------------+----------------------------------------------------------+
+| 2ca5924081ce4784f599437707bd32807aa155ce | Fix SSH client connection reset |
++------------------------------------------+----------------------------------------------------------+
+| baac6579556f8216b36db0d0f87f9c2d4f8b4ef5 | Support Apex with services in containers |
++------------------------------------------+----------------------------------------------------------+
+| 23bf63c4616040cb0d69cd26238af2a4a7c00a90 | fix the username to login undercloud in Apex |
++------------------------------------------+----------------------------------------------------------+
+| 61eb3927ada784cc3dffb5ddd17f66e47871f708 | Local Documentation Builds |
++------------------------------------------+----------------------------------------------------------+
+| 0f1dd4314b9e0247d9af7af6df2410462423aeca | Updated from global requirements |
++------------------------------------------+----------------------------------------------------------+
+| 2d4a9f0c0a93797da6534583f6e74553a4b634be | Fix links to remove references to submodules |
++------------------------------------------+----------------------------------------------------------+
+| 3ddc2392b0ed364eede49ff006d64df3ea456350 | Gambia release notes |
++------------------------------------------+----------------------------------------------------------+
+| 825a0a0dd5e8028129b782ed21c549586257b1c5 | delete doctor datasource in congress when cleanup |
++------------------------------------------+----------------------------------------------------------+
+| fcf53129ab2b18b84571faff13d7cb118b3a41b3 | run profile even the notification time is larger than 1S |
++------------------------------------------+----------------------------------------------------------+
+| 495965d0336d42fc36494c81fd15cee2f34c96e9 | Update and add test case |
++------------------------------------------+----------------------------------------------------------+
+| da25598a6a31abe0579ffed12d1719e5ff75f9a7 | bugfix: add doctor datasource in congress |
++------------------------------------------+----------------------------------------------------------+
+| f9e1e3b1ae4be80bc2dc61d9c4213c81c091ea72 | Update the maintenance design document |
++------------------------------------------+----------------------------------------------------------+
+| 4639f15e6db2f1480b41f6fbfd11d70312d4e421 | Add maintenance test code |
++------------------------------------------+----------------------------------------------------------+
+| b54cbc5dd2d32fcb27238680b4657ed384d021c5 | Add setup and cleanup for maintenance test |
++------------------------------------------+----------------------------------------------------------+
+| b2bb504032ac81a2ed3f404113b097d9ce3d7f14 | bugfix: kill the stunnel when cleanup |
++------------------------------------------+----------------------------------------------------------+
+| eaeb3c0f9dc9e6645a159d0a78b9fc181fce53d4 | add ssh_keyfile for connect to installer in Apex |
++------------------------------------------+----------------------------------------------------------+
+| dcbe7bf1c26052b0e95d209254e7273aa1eaace1 | Add tox and test case to testing document |
++------------------------------------------+----------------------------------------------------------+
+| 0f607cb5efd91ee497346b7f792dfa844d15595c | enlarge the time of link down |
++------------------------------------------+----------------------------------------------------------+
+| 1351038a65739b8d799820de515178326ad05f7b | bugfix: fix the filename of ssh tunnel |
++------------------------------------------+----------------------------------------------------------+
+| e70bf248daac03eee6b449cd1654d2ee6265dd8c | Use py34 instead of py35 |
++------------------------------------------+----------------------------------------------------------+
+| 2a60d460eaf018951456451077b7118b60219b32 | add INSPECTOR_TYPE and TEST_CASE to tox env |
++------------------------------------------+----------------------------------------------------------+
+| 2043ceeb08c1eca849daeb2b3696d385425ba061 | [consumer] fix default value for port number |
++------------------------------------------+----------------------------------------------------------+
+
+Releng changes
+
++------------------------------------------+-----------------------------------------------------------------------+
+| **commit-ID** | **Subject** |
++------------------------------------------+-----------------------------------------------------------------------+
+| c87309f5a75ccc5d595f708817b97793c24c4387 | Add Doctor maintenance job |
++------------------------------------------+-----------------------------------------------------------------------+
+| bd16a9756ffd0743e143f0f2f966da8dd666c7a3 | remove congress test in Daisy |
++------------------------------------------+-----------------------------------------------------------------------+
+| c47aaaa53c91aae93877f2532c72374beaa4eabe | remove fuel job in Doctor |
++------------------------------------------+-----------------------------------------------------------------------+
+| ab2fed2522eaf82ea7c63dd05008a37c56e825d0 | use 'workspace-cleanup' plugin in publisher |
++------------------------------------------+-----------------------------------------------------------------------+
+| 3aaed5cf40092744f1b87680b9205a2901baecf3 | clean the workspace in the publisher |
++------------------------------------------+-----------------------------------------------------------------------+
+| 50151eb3717edd4ddd996f3705fbe1732de7f3b7 | run tox with 'sudo' |
++------------------------------------------+-----------------------------------------------------------------------+
+| a3adc85ecb52f5d19ec4e9c49ca1ac35aa429ff9 | remove inspector variable form job template |
++------------------------------------------+-----------------------------------------------------------------------+
+| adfbaf2a3e8487e4c9152bf864a653a0425b8582 | run doctor tests with different inspectors in sequence |
++------------------------------------------+-----------------------------------------------------------------------+
+| 2e98e56224cd550cb3bf9798e420eece28139bd9 | add the ssh_key info if the key_file is exist |
++------------------------------------------+-----------------------------------------------------------------------+
+| c109c271018e9a85d94be1b9b468338d64589684 | prepare installer info for doctor test |
++------------------------------------------+-----------------------------------------------------------------------+
+| 57cbefc7160958eae1d49e4753779180a25864af | use py34 for tox |
++------------------------------------------+-----------------------------------------------------------------------+
+| 3547754e808a581b09c9d22e013a7d986d9f6cd1 | specify the cacert file when it exits |
++------------------------------------------+-----------------------------------------------------------------------+
+| ef4f36aa1c2ff0819d73cde44f84b99a42e15c7e | bugfix: wrong usage of '!include-raw' |
++------------------------------------------+-----------------------------------------------------------------------+
+| 0e0e0d4cb71fb27b1789a2bef2d3c4ff313e67ff | use tox instead of functest for doctor CI jobs |
++------------------------------------------+-----------------------------------------------------------------------+
+| 5b22f1b95feacaec0380f6a7543cbf510b628451 | pass value to parameters |
++------------------------------------------+-----------------------------------------------------------------------+
+| 44ab0cea07fa2a734c4f6b80776ad48fd006d1b8 | Doctor job bugfix: fix the scenario |
++------------------------------------------+-----------------------------------------------------------------------+
+| 17617f1c0a78c7bdad0d11d329a6c7e119cbbddd | bugfix: run doctor tests parallelly |
++------------------------------------------+-----------------------------------------------------------------------+
+| 811e4ef7f4c37b7bc246afc34ff880c014ecc05d | delete 'opnfv-build-ubuntu-defaults' parameters for doctor verify job |
++------------------------------------------+-----------------------------------------------------------------------+
+| 0705f31ab5bc54c073df120cbe0fe62cf10f9a81 | delete the 'node' parameter in 'doctor-slave-parameter' macro |
++------------------------------------------+-----------------------------------------------------------------------+
+| 304151b15f9d7241db8c5fea067cafe048287d84 | fix the default node label for doctor test |
++------------------------------------------+-----------------------------------------------------------------------+
+| a6963f92f015a33b44b27199886952205499b44c | Fix project name |
++------------------------------------------+-----------------------------------------------------------------------+
+| f122bfed998b3b0e0178106a7538377c609c6512 | add a default value for SSH_KEY |
++------------------------------------------+-----------------------------------------------------------------------+
+
+Version change
+^^^^^^^^^^^^^^
+
+Module version changes
+~~~~~~~~~~~~~~~~~~~~~~
+
+- OpenStack has changed from Pike-1 to Queens-1
+
+Document version changes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These documents have been updated in Gambia release
+
+- Testing document
+ docs/development/overview/testing.rst
+- Doctor scenario in functest
+ docs/development/overview/functest_scenario/doctor-scenario-in-functest.rst
+- Maintenance design guideline
+ docs/development/design/maintenance-design-guideline.rst
+
+Reason for version
+^^^^^^^^^^^^^^^^^^
+
+Documentation is updated due to tox usage in testing and adding maintenance
+use case related documentation.
+
+Feature additions
+~~~~~~~~~~~~~~~~~
+
++--------------------+--------------------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN** |
++--------------------+--------------------------------------------------------+
+| DOCTOR-106 | Maintenance scenario |
++--------------------+--------------------------------------------------------+
+| DOCTOR-125 | Maintenance design document according to our test case |
++--------------------+--------------------------------------------------------+
+| DOCTOR-126 | Use Tox instead of Functest for doctor CI jobs |
++--------------------+--------------------------------------------------------+
+| DOCTOR-127 | Maintenance test POD |
++--------------------+--------------------------------------------------------+
+| DOCTOR-130 | Apex with containers |
++--------------------+--------------------------------------------------------+
+
+
+
+Deliverables
+------------
+
+
+Software deliverables
+=====================
+
+None
+
+Documentation deliverables
+==========================
+
+https://git.opnfv.org/doctor/tree/docs
+
+Known Limitations, Issues and Workarounds
+=========================================
+
+System Limitations
+^^^^^^^^^^^^^^^^^^
+
+Maintenance test case requirements:
+
+- Minimum number of nodes: 1 Controller, 3 Computes
+- Min number of VCPUs: 2 VCPUs for each compute
+
+Known issues
+^^^^^^^^^^^^
+
+None
+
+Workarounds
+^^^^^^^^^^^
+
+None
+
+Test Result
+===========
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Apex' | SUCCESS |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Compass' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Daisy' | SUCCESS |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Fuel' | No POD |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Joid' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Local' | N/A |
++--------------------------------------+--------------+
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=congress
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Apex' | FAILED |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Compass' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Daisy' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Fuel' | No POD |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Joid' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Local' | N/A |
++--------------------------------------+--------------+
+
+
+Doctor Functest results with TEST_CASE='fault_management'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Apex' | skipped |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Compass' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Daisy' | skipped |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Fuel' | skipped |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Joid' | N/A |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Local' | N/A |
++--------------------------------------+--------------+
+
+Note: Installer Functest does not currently test features or skips running the
+project test cases
+
+Doctor CI results with TEST_CASE='maintenance'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='Apex' | SUCCESS |
++--------------------------------------+--------------+
+
+Doctor Functest results with TEST_CASE='maintenance'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+N/A - Needs special target and currently there is only sample implementation
+
+References
+==========
+
+For more information about the OPNFV Doctor latest work, please see:
+
+https://wiki.opnfv.org/display/doctor/Doctor+Home
diff --git a/docs/release/release-notes/releasenotes_iruya.rst b/docs/release/release-notes/releasenotes_iruya.rst
new file mode 100644
index 00000000..92775557
--- /dev/null
+++ b/docs/release/release-notes/releasenotes_iruya.rst
@@ -0,0 +1,129 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+This document provides the release notes for Iruya version of Doctor.
+
+Important notes
+===============
+
+In Iruya release there has not been many changes.
+
+All testing is now being made with Fuel installer. Maintenance use case
+is now only tested against latest upstream Fenix. Only sample inspector is
+tested as Fuel do not support Vitrage or Congress.
+
+Summary
+=======
+
+Iruya Doctor framework uses OpenStack Stein integrated into its test cases.
+
+Release Data
+============
+
+Doctor changes
+
+- Maintenance use case updated to support latest version of Fenix running
+ in container on controller node
+- Maintenance use case now support Fuel installer
+- Doctor updated to use OpenStack Stein and only python 3.6
+- Testing only sample inspector as lacking installer support for
+ Vitrage and Congress
+
+Releng changes
+
+- Doctor testing running with python 3.6 and with sample inspector
+- Doctor is only tested with Fuel installer
+
+Version change
+^^^^^^^^^^^^^^
+
+Module version changes
+~~~~~~~~~~~~~~~~~~~~~~
+
+- OpenStack has changed from Rocky to Stein since previous Hunter release.
+
+Document version changes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+Reason for version
+^^^^^^^^^^^^^^^^^^
+
+N/A
+
+Feature additions
+~~~~~~~~~~~~~~~~~
+
++--------------------+--------------------------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN** |
++--------------------+--------------------------------------------------------------+
+| DOCTOR-134 | Update Doctor maintenance use case to work with latest Fenix |
++--------------------+--------------------------------------------------------------+
+
+Deliverables
+------------
+
+Software deliverables
+=====================
+
+None
+
+Documentation deliverables
+==========================
+
+https://git.opnfv.org/doctor/tree/docs
+
+Known Limitations, Issues and Workarounds
+=========================================
+
+System Limitations
+^^^^^^^^^^^^^^^^^^
+
+Maintenance test case requirements:
+
+- Minimum number of nodes: 1 Controller, 3 Computes
+- Min number of VCPUs: 2 VCPUs for each compute
+
+Known issues
+^^^^^^^^^^^^
+
+None
+
+Workarounds
+^^^^^^^^^^^
+
+None
+
+Test Result
+===========
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel' | SUCCESS |
++--------------------------------------+--------------+
+
+Doctor CI results with TEST_CASE='maintenance' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE** | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel' | SUCCESS |
+| ADMIN_TOOL_TYPE='fenix' *) | |
++--------------------------------------+--------------+
+
+*) Sample implementation not updated according to latest upstream Fenix
+ and is currently not being tested.
+
+References
+==========
+
+For more information about the OPNFV Doctor latest work, please see:
+
+https://wiki.opnfv.org/display/doctor/Doctor+Home
diff --git a/docs/release/scenarios/fault_management/fault_management.rst b/docs/release/scenarios/fault_management/fault_management.rst
new file mode 100644
index 00000000..99371201
--- /dev/null
+++ b/docs/release/scenarios/fault_management/fault_management.rst
@@ -0,0 +1,90 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+Running test cases
+""""""""""""""""""
+
+Functest will call the "doctor_tests/main.py" in Doctor to run the test job.
+Doctor testing can also be triggered by tox on OPNFV installer jumphost. Tox
+is normally used for functional, module and coding style testing in Python
+project.
+
+Currently 'MCP' and 'devstack' installer are supported.
+
+
+Fault management use case
+"""""""""""""""""""""""""
+
+* A consumer of the NFVI wants to receive immediate notifications about faults
+ in the NFVI affecting the proper functioning of the virtual resources.
+ Therefore, such faults have to be detected as quickly as possible, and, when
+ a critical error is observed, the affected consumer is immediately informed
+ about the fault and can switch over to the STBY configuration.
+
+The faults to be monitored (and at which detection rate) will be configured by
+the consumer. Once a fault is detected, the Inspector in the Doctor
+architecture will check the resource map maintained by the Controller, to find
+out which virtual resources are affected and then update the resources state.
+The Notifier will receive the failure event requests sent from the Controller,
+and notify the consumer(s) of the affected resources according to the alarm
+configuration.
+
+Detailed workflow information is as follows:
+
+* Consumer(VNFM): (step 0) creates resources (network, server/instance) and an
+ event alarm on state down notification of that server/instance or Neutron
+ port.
+
+* Monitor: (step 1) periodically checks nodes, such as ping from/to each
+ dplane nic to/from gw of node, (step 2) once it fails to send out event
+ with "raw" fault event information to Inspector
+
+* Inspector: when it receives an event, it will (step 3) mark the host down
+ ("mark-host-down"), (step 4) map the PM to VM, and change the VM status to
+ down. In network failure case, also Neutron port is changed to down.
+
+* Controller: (step 5) sends out instance update event to Ceilometer. In network
+ failure case, also Neutron port is changed to down and corresponding event is
+ sent to Ceilometer.
+
+* Notifier: (step 6) Ceilometer transforms and passes the events to AODH,
+ (step 7) AODH will evaluate events with the registered alarm definitions,
+ then (step 8) it will fire the alarm to the "consumer" who owns the
+ instance
+
+* Consumer(VNFM): (step 9) receives the event and (step 10) recreates a new
+ instance
+
+Fault management test case
+""""""""""""""""""""""""""
+
+Functest will call the 'doctor-test' command in Doctor to run the test job.
+
+The following steps are executed:
+
+Firstly, get the installer ip according to the installer type. Then ssh to
+the installer node to get the private key for accessing to the cloud. As
+'fuel' installer, ssh to the controller node to modify nova and ceilometer
+configurations.
+
+Secondly, prepare image for booting VM, then create a test project and test
+user (both default to doctor) for the Doctor tests.
+
+Thirdly, boot a VM under the doctor project and check the VM status to verify
+that the VM is launched completely. Then get the compute host info where the VM
+is launched to verify connectivity to the target compute host. Get the consumer
+ip according to the route to compute ip and create an alarm event in Ceilometer
+using the consumer ip.
+
+Fourthly, the Doctor components are started, and, based on the above preparation,
+a failure is injected to the system, i.e. the network of compute host is
+disabled for 3 minutes. To ensure the host is down, the status of the host
+will be checked.
+
+Finally, the notification time, i.e. the time between the execution of step 2
+(Monitor detects failure) and step 9 (Consumer receives failure notification)
+is calculated.
+
+According to the Doctor requirements, the Doctor test is successful if the
+notification time is below 1 second.
diff --git a/docs/development/overview/functest_scenario/images/Fault-management-design.png b/docs/release/scenarios/maintenance/images/Fault-management-design.png
index 6d98cdec..6d98cdec 100644
--- a/docs/development/overview/functest_scenario/images/Fault-management-design.png
+++ b/docs/release/scenarios/maintenance/images/Fault-management-design.png
Binary files differ
diff --git a/docs/development/overview/functest_scenario/images/LICENSE b/docs/release/scenarios/maintenance/images/LICENSE
index 21a2d03d..21a2d03d 100644
--- a/docs/development/overview/functest_scenario/images/LICENSE
+++ b/docs/release/scenarios/maintenance/images/LICENSE
diff --git a/docs/development/overview/functest_scenario/images/Maintenance-design.png b/docs/release/scenarios/maintenance/images/Maintenance-design.png
index 8f21db6a..8f21db6a 100644
--- a/docs/development/overview/functest_scenario/images/Maintenance-design.png
+++ b/docs/release/scenarios/maintenance/images/Maintenance-design.png
Binary files differ
diff --git a/docs/development/overview/functest_scenario/images/Maintenance-workflow.png b/docs/release/scenarios/maintenance/images/Maintenance-workflow.png
index 9b65fd59..9b65fd59 100644
--- a/docs/development/overview/functest_scenario/images/Maintenance-workflow.png
+++ b/docs/release/scenarios/maintenance/images/Maintenance-workflow.png
Binary files differ
diff --git a/docs/release/scenarios/maintenance/maintenance.rst b/docs/release/scenarios/maintenance/maintenance.rst
new file mode 100644
index 00000000..ecfe76b1
--- /dev/null
+++ b/docs/release/scenarios/maintenance/maintenance.rst
@@ -0,0 +1,120 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+Maintenance use case
+""""""""""""""""""""
+
+* A consumer of the NFVI wants to interact with NFVI maintenance, upgrade,
+ scaling and to have graceful retirement. Receiving notifications over these
+ NFVI events and responding to those within given time window, consumer can
+ guarantee zero downtime to his service.
+
+The maintenance use case adds the Doctor platform an `admin tool` and an
+`app manager` component. Overview of maintenance components can be seen in
+:numref:`figure-p2`.
+
+.. figure:: ./images/Maintenance-design.png
+ :name: figure-p2
+ :width: 100%
+
+ Doctor platform components in maintenance use case
+
+In maintenance use case, `app manager` (VNFM) will subscribe to maintenance
+notifications triggered by project specific alarms through AODH. This is the way
+it gets to know different NFVI maintenance, upgrade and scaling operations that
+effect to its instances. The `app manager` can do actions depicted in `green
+color` or tell `admin tool` to do admin actions depicted in `orange color`
+
+Any infrastructure component like `Inspector` can subscribe to maintenance
+notifications triggered by host specific alarms through AODH. Subscribing to the
+notifications needs admin privileges and can tell when a host is out of use as
+in maintenance and when it is taken back to production.
+
+Maintenance test case
+"""""""""""""""""""""
+
+Maintenance test case is currently running in our Apex CI and executed by tox.
+This is because the special limitation mentioned below and also the fact we
+currently have only sample implementation as a proof of concept and we also
+support unofficial OpenStack project Fenix. Environment variable
+TEST_CASE='maintenance' needs to be used when executing "doctor_tests/main.py"
+and ADMIN_TOOL_TYPE='fenix' if want to test with Fenix instead of sample
+implementation. Test case workflow can be seen in :numref:`figure-p3`.
+
+.. figure:: ./images/Maintenance-workflow.png
+ :name: figure-p3
+ :width: 100%
+
+ Maintenance test case workflow
+
+In test case all compute capacity will be consumed with project (VNF) instances.
+For redundant services on instances and an empty compute needed for maintenance,
+test case will need at least 3 compute nodes in system. There will be 2
+instances on each compute, so minimum number of VCPUs is also 2. Depending on
+how many compute nodes there is application will always have 2 redundant
+instances (ACT-STDBY) on different compute nodes and rest of the compute
+capacity will be filled with non-redundant instances.
+
+For each project specific maintenance message there is a time window for
+`app manager` to make any needed action. This will guarantee zero
+down time for his service. All replies back are done by calling `admin tool` API
+given in the message.
+
+The following steps are executed:
+
+Infrastructure admin will call `admin tool` API to trigger maintenance for
+compute hosts having instances belonging to a VNF.
+
+Project specific `MAINTENANCE` notification is triggered to tell `app manager`
+that his instances are going to hit by infrastructure maintenance at a specific
+point in time. `app manager` will call `admin tool` API to answer back
+`ACK_MAINTENANCE`.
+
+When the time comes to start the actual maintenance workflow in `admin tool`,
+a `DOWN_SCALE` notification is triggered as there is no empty compute node for
+maintenance (or compute upgrade). Project receives corresponding alarm and scales
+down instances and call `admin tool` API to answer back `ACK_DOWN_SCALE`.
+
+As it might happen instances are not scaled down (removed) from a single
+compute node, `admin tool` might need to figure out what compute node should be
+made empty first and send `PREPARE_MAINTENANCE` to project telling which instance
+needs to be migrated to have the needed empty compute. `app manager` makes sure
+he is ready to migrate instance and call `admin tool` API to answer back
+`ACK_PREPARE_MAINTENANCE`. `admin tool` will make the migration and answer
+`ADMIN_ACTION_DONE`, so `app manager` knows instance can be again used.
+
+:numref:`figure-p3` has next a light blue section of actions to be done for each
+compute. However as we now have one empty compute, we will maintain/upgrade that
+first. So on first round, we can straight put compute in maintenance and send
+admin level host specific `IN_MAINTENANCE` message. This is caught by `Inspector`
+to know host is down for maintenance. `Inspector` can now disable any automatic
+fault management actions for the host as it can be down for a purpose. After
+`admin tool` has completed maintenance/upgrade `MAINTENANCE_COMPLETE` message
+is sent to tell host is back in production.
+
+Next rounds we always have instances on compute, so we need to have
+`PLANNED_MAINTANANCE` message to tell that those instances are now going to hit
+by maintenance. When `app manager` now receives this message, he knows instances
+to be moved away from compute will now move to already maintained/upgraded host.
+In test case no upgrade is done on application side to upgrade instances
+according to new infrastructure capabilities, but this could be done here as
+this information is also passed in the message. This might be just upgrading
+some RPMs, but also totally re-instantiating instance with a new flavor. Now if
+application runs an active side of a redundant instance on this compute,
+a switch over will be done. After `app manager` is ready he will call
+`admin tool` API to answer back `ACK_PLANNED_MAINTENANCE`. In test case the
+answer is `migrate`, so `admin tool` will migrate instances and reply
+`ADMIN_ACTION_DONE` and then `app manager` knows instances can be again used.
+Then we are ready to make the actual maintenance as previously trough
+`IN_MAINTENANCE` and `MAINTENANCE_COMPLETE` steps.
+
+After all computes are maintained, `admin tool` can send `MAINTENANCE_COMPLETE`
+to tell maintenance/upgrade is now complete. For `app manager` this means he
+can scale back to full capacity.
+
+There is currently sample implementation on VNFM and test case. In
+infrastructure side there is sample implementation of 'admin_tool' and
+there is also support for the OpenStack Fenix that extends the use case to
+support 'ETSI FEAT03' for VNFM interaction and to optimize the whole
+infrastructure mainteannce and upgrade.
diff --git a/docs/development/manuals/get-valid-server-state.rst b/docs/release/userguide/get-valid-server-state.rst
index 824ea3c2..824ea3c2 100644
--- a/docs/development/manuals/get-valid-server-state.rst
+++ b/docs/release/userguide/get-valid-server-state.rst
diff --git a/docs/release/userguide/index.rst b/docs/release/userguide/index.rst
index eee855dc..577072c7 100644
--- a/docs/release/userguide/index.rst
+++ b/docs/release/userguide/index.rst
@@ -11,3 +11,6 @@ Doctor User Guide
:maxdepth: 2
feature.userguide.rst
+ get-valid-server-state.rst
+ mark-host-down_manual.rst
+ monitors.rst
diff --git a/docs/development/manuals/mark-host-down_manual.rst b/docs/release/userguide/mark-host-down_manual.rst
index 3815205d..3815205d 100644
--- a/docs/development/manuals/mark-host-down_manual.rst
+++ b/docs/release/userguide/mark-host-down_manual.rst
diff --git a/docs/development/manuals/monitors.rst b/docs/release/userguide/monitors.rst
index 0d22b1de..eeb5e226 100644
--- a/docs/development/manuals/monitors.rst
+++ b/docs/release/userguide/monitors.rst
@@ -23,7 +23,8 @@ calculated by using the difference of time at which compute node sends notificat
control node and the time at which consumer is notified. The time on control and compute
node has to be synchronized for this reason. For further details on setting up collectd
on the compute node, use the following link:
-http://docs.opnfv.org/en/stable-danube/submodules/barometer/docs/release/userguide/feature.userguide.html#id18
+:doc:`<barometer:release/userguide/feature.userguide>`
+
Collectd monitors an interface managed by OVS. If the interface is not be assigned
an IP, the user has to provide the name of interface to be monitored. The command to
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..9fde2df2
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,2 @@
+lfdocs-conf
+sphinx_opnfv_theme
diff --git a/docs/testing/developer/index.rst b/docs/testing/developer/index.rst
new file mode 100644
index 00000000..dfbcfa74
--- /dev/null
+++ b/docs/testing/developer/index.rst
@@ -0,0 +1,13 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+*********
+Developer
+*********
+
+.. toctree::
+ :numbered:
+ :maxdepth: 2
+
+ testing.rst
diff --git a/docs/development/overview/testing.rst b/docs/testing/developer/testing.rst
index ba0546eb..6a929130 100644
--- a/docs/development/overview/testing.rst
+++ b/docs/testing/developer/testing.rst
@@ -41,6 +41,16 @@ export TEST_CASE with different values:
#Run both tests cases
export TEST_CASE='all'
+ #Use Fenix in maintenance testing instead of sample admin_tool
+ #This is only for 'mainteanance' test case
+ export ADMIN_TOOL_TYPE='fenix'
+ export APP_MANAGER_TYPE='vnfm'
+
+ #Run in different installer jumphost 'fuel' or 'apex'
+ #In multinode DevStack you run Doctor in controller node
+ #with value export APP_MANAGER_TYPE=vnfm
+ export INSTALLER_TYPE='fuel'
+
Run Python Test Script
~~~~~~~~~~~~~~~~~~~~~~
@@ -57,41 +67,16 @@ environment and then run the test.
.. _doctor.sample.conf: https://git.opnfv.org/doctor/tree/etc/doctor.sample.conf
-In OPNFV Apex jumphost you can run Doctor testing as follows using tox:
+In OPNFV testing environment jumphost you can run Doctor testing as follows
+using tox:
.. code-block:: bash
- #Before Gambia: overcloudrc.v3
source overcloudrc
export INSTALLER_IP=${INSTALLER_IP}
export INSTALLER_TYPE=${INSTALLER_TYPE}
git clone https://gerrit.opnfv.org/gerrit/doctor
cd doctor
sudo -E tox
-
-Run Functest Suite
-==================
-
-Functest supports Doctor testing by triggering the test script above in a
-Functest container. You can run the Doctor test with the following steps:
-
-.. code-block:: bash
-
- DOCKER_TAG=latest
- docker pull docker.io/opnfv/functest-features:${DOCKER_TAG}
- docker run --privileged=true -id \
- -e INSTALLER_TYPE=${INSTALLER_TYPE} \
- -e INSTALLER_IP=${INSTALLER_IP} \
- -e INSPECTOR_TYPE=sample \
- docker.io/opnfv/functest-features:${DOCKER_TAG} /bin/bash
- docker exec <container_id> functest testcase run doctor-notification
-
-See `Functest Userguide`_ for more information.
-
-.. _Functest Userguide: http://docs.opnfv.org/en/latest/submodules/functest/docs/testing/user/userguide/index.html
-
-For testing with stable version, change DOCKER_TAG to 'stable' or other release
-tag identifier.
-
-Tips
-====
+
+Note! In DevStack you run Doctor in controller node.
diff --git a/docs/testing/index.rst b/docs/testing/index.rst
new file mode 100644
index 00000000..3fae9568
--- /dev/null
+++ b/docs/testing/index.rst
@@ -0,0 +1,15 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+.. _testing:
+
+=======
+Testing
+=======
+
+.. toctree::
+ :maxdepth: 2
+
+ ./developer/index.rst
+ ./user/index.rst
diff --git a/docs/testing/user/index.rst b/docs/testing/user/index.rst
new file mode 100644
index 00000000..1be9c7eb
--- /dev/null
+++ b/docs/testing/user/index.rst
@@ -0,0 +1,13 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. SPDX-License-Identifier: CC-BY-4.0
+.. (c) Open Platform for NFV Project, Inc. and its contributors
+
+****
+User
+****
+
+.. toctree::
+ :numbered:
+ :maxdepth: 2
+
+ testing.rst
diff --git a/docs/testing/user/testing.rst b/docs/testing/user/testing.rst
new file mode 100644
index 00000000..6172d26a
--- /dev/null
+++ b/docs/testing/user/testing.rst
@@ -0,0 +1,30 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Run Functest Suite (obsolete)
+=============================
+
+Functest supports Doctor testing by triggering the test script above in a
+Functest container. You can run the Doctor test with the following steps:
+
+.. code-block:: bash
+
+ DOCKER_TAG=latest
+ docker pull docker.io/opnfv/functest-features:${DOCKER_TAG}
+ docker run --privileged=true -id \
+ -e INSTALLER_TYPE=${INSTALLER_TYPE} \
+ -e INSTALLER_IP=${INSTALLER_IP} \
+ -e INSPECTOR_TYPE=sample \
+ docker.io/opnfv/functest-features:${DOCKER_TAG} /bin/bash
+ docker exec <container_id> functest testcase run doctor-notification
+
+See `Functest Userguide`_ for more information.
+
+.. _Functest Userguide: :doc:`<functest:testing/user/userguide>`
+
+
+For testing with stable version, change DOCKER_TAG to 'stable' or other release
+tag identifier.
+
+Tips
+====
diff --git a/doctor_tests/admin_tool/__init__.py b/doctor_tests/admin_tool/__init__.py
index e8b12817..3417a334 100644
--- a/doctor_tests/admin_tool/__init__.py
+++ b/doctor_tests/admin_tool/__init__.py
@@ -8,16 +8,16 @@
##############################################################################
from oslo_config import cfg
from oslo_utils import importutils
-
+import os
OPTS = [
cfg.StrOpt('type',
- default='sample',
- choices=['sample'],
+ default=os.environ.get('ADMIN_TOOL_TYPE', 'sample'),
+ choices=['sample', 'fenix'],
help='the component of doctor admin_tool',
required=True),
cfg.StrOpt('ip',
- default='127.0.0.1',
+ default='0.0.0.0',
help='the ip of admin_tool',
required=True),
cfg.IntOpt('port',
diff --git a/doctor_tests/admin_tool/fenix/Dockerfile b/doctor_tests/admin_tool/fenix/Dockerfile
new file mode 100644
index 00000000..202380eb
--- /dev/null
+++ b/doctor_tests/admin_tool/fenix/Dockerfile
@@ -0,0 +1,34 @@
+FROM gliderlabs/alpine:3.6
+
+ARG BRANCH=master
+ARG OPENSTACK=master
+
+EXPOSE 12347
+
+RUN echo "Building Fenix container against OpenStack $OPENSTACK" && \
+ echo "Building Fenix with $BRANCH" && \
+ mkdir /etc/fenix && \
+ mkdir -p /var/tmp/fenix
+WORKDIR /var/tmp/fenix
+COPY fenix*.conf /etc/fenix/
+
+RUN apk --no-cache add ca-certificates && \
+ apk --no-cache add --update python3 sshpass py-pip git curl && \
+ apk --no-cache add --virtual .build-deps --update \
+ python3-dev build-base linux-headers libffi-dev \
+ openssl-dev libjpeg-turbo-dev && \
+ curl https://opendev.org/openstack/requirements/raw/branch/$OPENSTACK/upper-constraints.txt > upper-constraints.txt && \
+ if [ ! -e /usr/bin/pip ]; then ln -s pip3 /usr/bin/pip ; fi && \
+ if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python3 /usr/bin/python; fi && \
+ pip3 install --upgrade pip && \
+ pip3 install alembic aodhclient decorator flask Flask-RESTful eventlet jsonschema \
+ keystoneauth1 keystonemiddleware python-novaclient oslo.config pecan \
+ oslo.db oslo.log oslo.messaging oslo.serialization oslo.service oslo_policy \
+ oslotest oslo.utils pbr pymysql six sqlalchemy -cupper-constraints.txt && \
+ git clone https://opendev.org/x/fenix -b $BRANCH /fenix && \
+ rm -fr /var/tmp/fenix
+COPY run /fenix
+COPY keystonercv3 /fenix
+WORKDIR /fenix
+RUN python3 setup.py install
+CMD ./run
diff --git a/doctor_tests/admin_tool/fenix/run b/doctor_tests/admin_tool/fenix/run
new file mode 100755
index 00000000..50ae68e7
--- /dev/null
+++ b/doctor_tests/admin_tool/fenix/run
@@ -0,0 +1,32 @@
+#!/bin/sh
+. keystonercv3
+
+# Start the first process
+nohup python3 /fenix/fenix/cmd/engine.py > /var/log/fenix-engine.log&
+status=$?
+if [ $status -ne 0 ]; then
+ echo "Failed to start engine.py: $status"
+ exit $status
+fi
+
+# Start the second process
+nohup python3 /fenix/fenix/cmd/api.py > /var/log/fenix-api.log&
+status=$?
+if [ $status -ne 0 ]; then
+ echo "Failed to start api.py: $status"
+ exit $status
+fi
+
+echo "started Fenix: engine and api"
+while sleep 60; do
+ ps aux |grep "cmd/engine.py" |grep -q -v grep
+ PROCESS_1_STATUS=$?
+ ps aux |grep "cmd/api.py" |grep -q -v grep
+ PROCESS_2_STATUS=$?
+ # If the greps above find anything, they exit with 0 status
+ # If they are not both 0, then something is wrong
+ if [ $PROCESS_1_STATUS -ne 0 -o $PROCESS_2_STATUS -ne 0 ]; then
+ echo "One of the processes has already exited."
+ exit 1
+ fi
+done
diff --git a/doctor_tests/admin_tool/sample.py b/doctor_tests/admin_tool/sample.py
index 892a4c83..a71f43a1 100644
--- a/doctor_tests/admin_tool/sample.py
+++ b/doctor_tests/admin_tool/sample.py
@@ -59,7 +59,7 @@ class AdminMain(Thread):
self.parent = parent
self.log = log
self.conf = conf
- self.url = 'http://0.0.0.0:%s' % conf.admin_tool.port
+ self.url = 'http://%s:%s' % (conf.admin_tool.ip, conf.admin_tool.port)
self.projects_state = dict() # current state for each project
self.proj_server_actions = dict() # actions for each project server
self.projects_servers = dict() # servers processed in current state
@@ -86,6 +86,7 @@ class AdminMain(Thread):
driver='messaging',
topics=['notifications'])
self.notif_admin = self.notif_admin.prepare(publisher_id='admin_tool')
+ self.stopped = False
self.log.info('Admin tool session %s initialized' % self.session_id)
def cleanup(self):
@@ -116,14 +117,15 @@ class AdminMain(Thread):
if self._projects_not_in_wanted_states(wanted_states):
self.log.error('Admin tool session %s: projects in invalid states '
'%s' % (self.session_id, self.projects_state))
- raise Exception('Admin tool session %s: not all projects in states'
- ' %s' % (self.session_id, wanted_states))
+ return False
else:
self.log.info('all projects replied')
+ return True
def _project_notify(self, project_id, instance_ids, allowed_actions,
actions_at, state, metadata):
- reply_url = '%s/%s/maintenance' % (self.url, project_id)
+ reply_url = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project_id)
payload = dict(project_id=project_id,
instance_ids=instance_ids,
@@ -148,11 +150,12 @@ class AdminMain(Thread):
self.notif_admin.info({'some': 'context'}, 'maintenance.host', payload)
- def down_scale(self):
+ def in_scale(self):
for project in self.projects_servers:
- self.log.info('DOWN_SCALE to project %s' % project)
+ self.log.info('SCALE_IN to project %s' % project)
self.log.debug('instance_ids %s' % self.projects_servers[project])
- instance_ids = '%s/%s/maintenance' % (self.url, project)
+ instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project)
allowed_actions = []
wait_seconds = 120
actions_at = (datetime.datetime.utcnow() +
@@ -163,18 +166,20 @@ class AdminMain(Thread):
self._project_notify(project, instance_ids,
allowed_actions, actions_at, state,
metadata)
- allowed_states = ['ACK_DOWN_SCALE', 'NACK_DOWN_SCALE']
- self.wait_projects_state(allowed_states, wait_seconds)
- if self.projects_not_in_state('ACK_DOWN_SCALE'):
- raise Exception('Admin tool session %s: all states not '
- 'ACK_DOWN_SCALE %s' %
- (self.session_id, self.projects_state))
+ allowed_states = ['ACK_SCALE_IN', 'NACK_SCALE_IN']
+ if not self.wait_projects_state(allowed_states, wait_seconds):
+ self.state = 'MAINTENANCE_FAILED'
+ if self.projects_not_in_state('ACK_SCALE_IN'):
+ self.log.error('%s: all states not ACK_SCALE_IN' %
+ self.session_id)
+ self.state = 'MAINTENANCE_FAILED'
def maintenance(self):
for project in self.projects_servers:
self.log.info('\nMAINTENANCE to project %s\n' % project)
self.log.debug('instance_ids %s' % self.projects_servers[project])
- instance_ids = '%s/%s/maintenance' % (self.url, project)
+ instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project)
allowed_actions = []
actions_at = self.maintenance_at
state = self.state
@@ -190,16 +195,18 @@ class AdminMain(Thread):
allowed_actions, actions_at, state,
metadata)
allowed_states = ['ACK_MAINTENANCE', 'NACK_MAINTENANCE']
- self.wait_projects_state(allowed_states, wait_seconds)
+ if not self.wait_projects_state(allowed_states, wait_seconds):
+ self.state = 'MAINTENANCE_FAILED'
if self.projects_not_in_state('ACK_MAINTENANCE'):
- raise Exception('Admin tool session %s: all states not '
- 'ACK_MAINTENANCE %s' %
- (self.session_id, self.projects_state))
+ self.log.error('%s: all states not ACK_MAINTENANCE' %
+ self.session_id)
+ self.state = 'MAINTENANCE_FAILED'
def maintenance_complete(self):
for project in self.projects_servers:
self.log.info('MAINTENANCE_COMPLETE to project %s' % project)
- instance_ids = '%s/%s/maintenance' % (self.url, project)
+ instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project)
allowed_actions = []
wait_seconds = 120
actions_at = (datetime.datetime.utcnow() +
@@ -212,13 +219,14 @@ class AdminMain(Thread):
metadata)
allowed_states = ['ACK_MAINTENANCE_COMPLETE',
'NACK_MAINTENANCE_COMPLETE']
- self.wait_projects_state(allowed_states, wait_seconds)
+ if not self.wait_projects_state(allowed_states, wait_seconds):
+ self.state = 'MAINTENANCE_FAILED'
if self.projects_not_in_state('ACK_MAINTENANCE_COMPLETE'):
- raise Exception('Admin tool session %s: all states not '
- 'ACK_MAINTENANCE_COMPLETE %s' %
- (self.session_id, self.projects_state))
+ self.log.error('%s: all states not ACK_MAINTENANCE_COMPLETE' %
+ self.session_id)
+ self.state = 'MAINTENANCE_FAILED'
- def need_down_scale(self, host_servers):
+ def need_in_scale(self, host_servers):
room_for_instances = 0
for host in host_servers:
instances = 0
@@ -267,7 +275,8 @@ class AdminMain(Thread):
self.projects_servers[project] = projects_servers[project].copy()
self.log.info('%s to project %s' % (state, project))
self.project_servers_log_info(project, projects_servers)
- instance_ids = '%s/%s/maintenance' % (self.url, project)
+ instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+ project)
allowed_actions = ['MIGRATE', 'LIVE_MIGRATE', 'OWN_ACTION']
wait_seconds = 120
actions_at = (datetime.datetime.utcnow() +
@@ -278,11 +287,14 @@ class AdminMain(Thread):
allowed_actions, actions_at, state,
metadata)
allowed_states = [state_ack, state_nack]
- self.wait_projects_state(allowed_states, wait_seconds)
- if self.projects_not_in_state(state_ack):
- raise Exception('Admin tool session %s: all states not %s %s' %
- (self.session_id, state_ack, self.projects_state))
- self.actions_to_have_empty_host(host)
+ if not self.wait_projects_state(allowed_states, wait_seconds):
+ self.state = 'MAINTENANCE_FAILED'
+ elif self.projects_not_in_state(state_ack):
+ self.log.error('%s: all states not %s' %
+ (self.session_id, state_ack))
+ self.state = 'MAINTENANCE_FAILED'
+ else:
+ self.actions_to_have_empty_host(host)
def notify_action_done(self, project, instance_id):
instance_ids = instance_id
@@ -463,7 +475,8 @@ class AdminMain(Thread):
time.sleep(5)
def run(self):
- while self.state != 'MAINTENANCE_COMPLETE':
+ while (self.state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
+ not self.stopped):
self.log.info('--==session %s: processing state %s==--' %
(self.session_id, self.state))
if self.state == 'MAINTENANCE':
@@ -474,7 +487,8 @@ class AdminMain(Thread):
raise Exception('all projects do not listen maintenance '
'alarm')
self.maintenance()
-
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
maint_at = self.str_to_datetime(self.maintenance_at)
if maint_at > datetime.datetime.utcnow():
time_now = (datetime.datetime.utcnow().strftime(
@@ -492,14 +506,14 @@ class AdminMain(Thread):
# True -> PLANNED_MAINTENANCE
# False -> check if we can migrate VMs to get empty host
# True -> PREPARE_MAINTENANCE
- # False -> DOWN_SCALE
+ # False -> SCALE_IN
maintenance_empty_hosts = ([h for h in self.hosts if h not in
host_servers])
if len(maintenance_empty_hosts) == 0:
- if self.need_down_scale(host_servers):
+ if self.need_in_scale(host_servers):
self.log.info('Need to down scale')
- self.state = 'DOWN_SCALE'
+ self.state = 'SCALE_IN'
else:
self.log.info('Free capacity, but need empty host')
self.state = 'PREPARE_MAINTENANCE'
@@ -508,14 +522,17 @@ class AdminMain(Thread):
self.state = 'PLANNED_MAINTENANCE'
self.log.info('--==State change from MAINTENANCE to %s==--'
% self.state)
- elif self.state == 'DOWN_SCALE':
+ elif self.state == 'SCALE_IN':
# Test case is hard coded to have all compute capacity used
# We need to down scale to have one empty compute host
- self.down_scale()
+ self.update_server_info()
+ self.in_scale()
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
self.state = 'PREPARE_MAINTENANCE'
host_servers = self.update_server_info()
self.servers_log_info(host_servers)
- self.log.info('--==State change from DOWN_SCALE to'
+ self.log.info('--==State change from SCALE_IN to'
' %s==--' % self.state)
elif self.state == 'PREPARE_MAINTENANCE':
@@ -527,7 +544,7 @@ class AdminMain(Thread):
host_servers])
if len(maintenance_empty_hosts) == 0:
self.log.info('no empty hosts for maintenance')
- if self.need_down_scale(host_servers):
+ if self.need_in_scale(host_servers):
raise Exception('Admin tool session %s: Not enough '
'free capacity for maintenance' %
self.session_id)
@@ -535,6 +552,8 @@ class AdminMain(Thread):
if host:
self.make_compute_host_empty(host, host_servers[host],
'PREPARE_MAINTENANCE')
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
else:
# We do not currently support another down scale if
# first was not enough
@@ -566,6 +585,7 @@ class AdminMain(Thread):
maintenance_empty_hosts.append(host)
self.log.info('--==Start to maintain empty hosts==--\n%s' %
maintenance_empty_hosts)
+ self.update_server_info()
for host in maintenance_empty_hosts:
# scheduler has problems, let's see if just down scaled
# host is really empty
@@ -586,6 +606,8 @@ class AdminMain(Thread):
self.log.info('PLANNED_MAINTENANCE host %s' % host)
self.make_compute_host_empty(host, host_servers[host],
'PLANNED_MAINTENANCE')
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
self.log.info('IN_MAINTENANCE host %s' % host)
self._admin_notify(admin_project, host, 'IN_MAINTENANCE',
self.session_id)
@@ -603,14 +625,16 @@ class AdminMain(Thread):
self.log.info('Projects still need to up scale back to full '
'capcity')
self.maintenance_complete()
+ if self.state == 'MAINTENANCE_FAILED':
+ continue
host_servers = self.update_server_info()
self.servers_log_info(host_servers)
- self.state = 'MAINTENANCE_COMPLETE'
+ self.state = 'MAINTENANCE_DONE'
else:
raise Exception('Admin tool session %s: session in invalid '
'state %s' % (self.session_id, self.state))
- self.log.info('--==Maintenance session %s: '
- 'MAINTENANCE SESSION COMPLETE==--' % self.session_id)
+ self.log.info('--==Maintenance session %s: %s==--' %
+ (self.session_id, self.state))
def project_input(self, project_id, data):
self.log.debug('Admin tool session %s: project %s input' %
@@ -637,7 +661,6 @@ class AdminTool(Thread):
self.admin_tool = admin_tool
self.log = log
self.conf = conf
- self.port = self.conf.admin_tool.port
self.maint_sessions = {}
self.projects = {}
self.maintenance_hosts = []
@@ -650,63 +673,55 @@ class AdminTool(Thread):
def admin_maintenance_api_post():
data = json.loads(request.data.decode('utf8'))
self.log.info('maintenance message: %s' % data)
- if 'session_id' in data:
- if data['state'] == 'REMOVE_MAINTENANCE_SESSION':
- session_id = data['session_id']
- self.log.info('remove session %s'
- % session_id)
- self.maint_sessions[session_id].cleanup()
- self.maint_sessions[session_id].stop()
- del self.maint_sessions[session_id]
- else:
- session_id = str(generate_uuid())
- self.log.info('creating session: %s' % session_id)
- self.maint_sessions[session_id] = (
- AdminMain(self.trasport_url,
- session_id,
- data,
- self,
- self.conf,
- self.log))
- self.maint_sessions[session_id].start()
+ session_id = str(generate_uuid())
+ self.log.info('creating session: %s' % session_id)
+ self.maint_sessions[session_id] = (
+ AdminMain(self.trasport_url,
+ session_id,
+ data,
+ self,
+ self.conf,
+ self.log))
+ self.maint_sessions[session_id].start()
reply = json.dumps({'session_id': session_id,
'state': 'ACK_%s' % data['state']})
self.log.debug('reply: %s' % reply)
return reply, 200, None
- @app.route('/maintenance', methods=['GET'])
- def admin_maintenance_api_get():
- data = json.loads(request.data.decode('utf8'))
- self.log.debug('Admin get maintenance: %s' % data)
- session_id = data['session_id']
+ @app.route('/maintenance/<session_id>', methods=['GET'])
+ def admin_maintenance_api_get(session_id=None):
+ self.log.debug('Admin get maintenance')
reply = json.dumps({'state':
self.maint_sessions[session_id].state})
- self.log.debug('reply: %s' % reply)
+ self.log.info('reply: %s' % reply)
return reply, 200, None
- @app.route('/<projet_id>/maintenance', methods=['PUT'])
- def project_maintenance_api_put(projet_id=None):
+ @app.route('/maintenance/<session_id>/<projet_id>', methods=['PUT'])
+ def project_maintenance_api_put(session_id=None, projet_id=None):
data = json.loads(request.data.decode('utf8'))
self.log.debug('%s project put: %s' % (projet_id, data))
- self.project_input(projet_id, data)
+ self.project_input(session_id, projet_id, data)
return 'OK'
- @app.route('/<projet_id>/maintenance', methods=['GET'])
- def project_maintenance_api_get(projet_id=None):
- data = json.loads(request.data.decode('utf8'))
- self.log.debug('%s project get %s' % (projet_id, data))
- instances = self.project_get_instances(projet_id, data)
+ @app.route('/maintenance/<session_id>/<projet_id>', methods=['GET'])
+ def project_maintenance_api_get(session_id=None, projet_id=None):
+ self.log.debug('%s project get %s' % (projet_id, session_id))
+ instances = self.project_get_instances(session_id, projet_id)
reply = json.dumps({'instance_ids': instances})
self.log.debug('%s reply: %s' % (projet_id, reply))
return reply, 200, None
+ @app.route('/maintenance/<session_id>', methods=['DELETE'])
+ def remove_session(session_id=None):
+ self.log.info('remove session %s'
+ % session_id)
+ self.maint_sessions[session_id].cleanup()
+ self.maint_sessions[session_id].stop()
+ del self.maint_sessions[session_id]
+ return 'OK'
+
@app.route('/shutdown', methods=['POST'])
def shutdown():
- for session in self.maint_sessions:
- self.log.info('shutdown admin tool session %s thread' %
- session)
- self.maint_sessions[session].cleanup()
- self.maint_sessions[session].stop()
self.log.info('shutdown admin_tool server at %s' % time.time())
func = request.environ.get('werkzeug.server.shutdown')
if func is None:
@@ -714,13 +729,11 @@ class AdminTool(Thread):
func()
return 'admin_tool app shutting down...'
- app.run(host='0.0.0.0', port=self.port)
+ app.run(host=self.conf.admin_tool.ip, port=self.conf.admin_tool.port)
- def project_input(self, project_id, data):
- session_id = data['session_id']
+ def project_input(self, session_id, project_id, data):
self.maint_sessions[session_id].project_input(project_id, data)
- def project_get_instances(self, project_id, data):
- session_id = data['session_id']
+ def project_get_instances(self, session_id, project_id):
return self.maint_sessions[session_id].project_get_instances(
project_id)
diff --git a/doctor_tests/app_manager/__init__.py b/doctor_tests/app_manager/__init__.py
index 717d6587..c2f75918 100644
--- a/doctor_tests/app_manager/__init__.py
+++ b/doctor_tests/app_manager/__init__.py
@@ -8,12 +8,13 @@
##############################################################################
from oslo_config import cfg
from oslo_utils import importutils
+import os
OPTS = [
cfg.StrOpt('type',
- default='sample',
- choices=['sample'],
+ default=os.environ.get('APP_MANAGER_TYPE', 'sample'),
+ choices=['sample', 'vnfm'],
help='the component of doctor app manager',
required=True),
cfg.StrOpt('ip',
@@ -28,7 +29,8 @@ OPTS = [
_app_manager_name_class_mapping = {
- 'sample': 'doctor_tests.app_manager.sample.SampleAppManager'
+ 'sample': 'doctor_tests.app_manager.sample.SampleAppManager',
+ 'vnfm': 'doctor_tests.app_manager.vnfm.VNFM',
}
diff --git a/doctor_tests/app_manager/sample.py b/doctor_tests/app_manager/sample.py
index 94926ee2..7ca35b97 100644
--- a/doctor_tests/app_manager/sample.py
+++ b/doctor_tests/app_manager/sample.py
@@ -17,6 +17,7 @@ import requests
from doctor_tests.app_manager.base import BaseAppManager
from doctor_tests.identity_auth import get_identity_auth
from doctor_tests.identity_auth import get_session
+from doctor_tests.os_clients import neutron_client
from doctor_tests.os_clients import nova_client
@@ -56,12 +57,16 @@ class AppManager(Thread):
self.app_manager = app_manager
self.log = log
self.intance_ids = None
+ self.auth = get_identity_auth(project=self.conf.doctor_project)
+ self.session = get_session(auth=self.auth)
+ self.nova = nova_client(self.conf.nova_version,
+ self.session)
+ self.neutron = neutron_client(session=self.session)
self.headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
- self.auth = get_identity_auth(project=self.conf.doctor_project)
- self.nova = nova_client(self.conf.nova_version,
- get_session(auth=self.auth))
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
self.orig_number_of_instances = self.number_of_instances()
self.ha_instances = self.get_ha_instances()
self.floating_ip = None
@@ -85,7 +90,13 @@ class AppManager(Thread):
if instance.id != self.active_instance_id:
self.log.info('Switch over to: %s %s' % (instance.name,
instance.id))
- instance.add_floating_ip(self.floating_ip)
+ # Deprecated, need to use neutron instead
+ # instance.add_floating_ip(self.floating_ip)
+ port = self.neutron.list_ports(device_id=instance.id)['ports'][0]['id'] # noqa
+ floating_id = self.neutron.list_floatingips(floating_ip_address=self.floating_ip)['floatingips'][0]['id'] # noqa
+ self.neutron.update_floatingip(floating_id, {'floatingip': {'port_id': port}}) # noqa
+ # Have to update ha_instances as floating_ip changed
+ self.ha_instances = self.get_ha_instances()
self.active_instance_id = instance.id
break
@@ -114,8 +125,7 @@ class AppManager(Thread):
for t in data['reason_data']['event']['traits']})
def get_session_instance_ids(self, url, session_id):
- data = {'session_id': session_id}
- ret = requests.get(url, data=json.dumps(data), headers=self.headers)
+ ret = requests.get(url, data=None, headers=self.headers)
if ret.status_code != 200:
raise Exception(ret.text)
self.log.info('get_instance_ids %s' % ret.json())
@@ -155,7 +165,7 @@ class AppManager(Thread):
data = json.loads(request.data.decode('utf8'))
try:
payload = self._alarm_traits_decoder(data)
- except:
+ except Exception:
payload = ({t[0]: t[2] for t in
data['reason_data']['event']['traits']})
self.log.error('cannot parse alarm data: %s' % payload)
@@ -177,12 +187,12 @@ class AppManager(Thread):
reply['instance_ids'] = instance_ids
reply_state = 'ACK_MAINTENANCE'
- elif state == 'DOWN_SCALE':
+ elif state == 'SCALE_IN':
# scale down 2 isntances that is VCPUS equaling to single
# compute node
self.scale_instances(-2)
reply['instance_ids'] = self.get_instance_ids()
- reply_state = 'ACK_DOWN_SCALE'
+ reply_state = 'ACK_SCALE_IN'
elif state == 'MAINTENANCE_COMPLETE':
# possibly need to upscale
diff --git a/doctor_tests/app_manager/vnfm.py b/doctor_tests/app_manager/vnfm.py
new file mode 100644
index 00000000..68fdbb88
--- /dev/null
+++ b/doctor_tests/app_manager/vnfm.py
@@ -0,0 +1,441 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from flask import Flask
+from flask import request
+import json
+import requests
+from threading import Thread
+import time
+import uuid
+import yaml
+
+from doctor_tests.app_manager.base import BaseAppManager
+from doctor_tests.identity_auth import get_identity_auth
+from doctor_tests.identity_auth import get_session
+from doctor_tests.os_clients import neutron_client
+from doctor_tests.os_clients import nova_client
+from doctor_tests.os_clients import keystone_client
+
+
+class VNFM(BaseAppManager):
+
+ def __init__(self, stack, conf, log):
+ super(VNFM, self).__init__(conf, log)
+ self.stack = stack
+ self.app = None
+
+ def start(self):
+ self.log.info('VNFM start......')
+ self.app = VNFManager(self.stack, self.conf, self, self.log)
+ self.app.start()
+
+ def stop(self):
+ self.log.info('VNFM stop......')
+ if not self.app:
+ return
+ self.app.delete_constraints()
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ }
+ url = 'http://%s:%d/shutdown'\
+ % (self.conf.app_manager.ip,
+ self.conf.app_manager.port)
+ requests.post(url, data='', headers=headers)
+
+
+class VNFManager(Thread):
+
+ def __init__(self, stack, conf, app_manager, log):
+ Thread.__init__(self)
+ self.stack = stack
+ self.conf = conf
+ self.port = self.conf.app_manager.port
+ self.app_manager = app_manager
+ self.log = log
+ self.intance_ids = None
+ self.auth = get_identity_auth(project=self.conf.doctor_project)
+ self.session = get_session(auth=self.auth)
+ self.keystone = keystone_client(
+ self.conf.keystone_version, self.session)
+ self.nova = nova_client(self.conf.nova_version,
+ self.session)
+ self.neutron = neutron_client(session=self.session)
+ self.headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json'}
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ self.orig_number_of_instances = self.number_of_instances()
+ # List of instances
+ self.ha_instances = []
+ self.nonha_instances = []
+ # Different instance_id specific constraints {instanse_id: {},...}
+ self.instance_constraints = None
+ # Update existing instances to instance lists
+ self.update_instances()
+ nonha_instances = len(self.nonha_instances)
+ if nonha_instances < 7:
+ self.scale = 2
+ self.max_impacted = 2
+ else:
+ self.scale = int((nonha_instances) / 2)
+ self.max_impacted = self.scale - 1
+ self.log.info('Init nonha_instances: %s scale: %s: max_impacted %s' %
+ (nonha_instances, self.scale, self.max_impacted))
+ # Different instance groups constraints dict
+ self.ha_group = None
+ self.nonha_group = None
+ # Floating IP used in HA instance
+ self.floating_ip = None
+ # VNF project_id
+ self.project_id = None
+ # HA instance_id that is active / has floating IP
+ self.active_instance_id = self.active_instance_id()
+
+ services = self.keystone.services.list()
+ for service in services:
+ if service.type == 'maintenance':
+ self.log.info('maintenance service: %s:%s type %s'
+ % (service.name, service.id, service.type))
+ maint_id = service.id
+ self.maint_endpoint = [ep.url for ep in self.keystone.endpoints.list()
+ if ep.service_id == maint_id and
+ ep.interface == 'public'][0]
+ self.log.info('maintenance endpoint: %s' % self.maint_endpoint)
+ self.update_constraints_lock = False
+ self.update_constraints()
+
+ def delete_remote_instance_constraints(self, instance_id):
+ url = "%s/instance/%s" % (self.maint_endpoint, instance_id)
+ self.log.info('DELETE: %s' % url)
+ ret = requests.delete(url, data=None, headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def update_remote_instance_constraints(self, instance):
+ url = "%s/instance/%s" % (self.maint_endpoint, instance["instance_id"])
+ self.log.info('PUT: %s' % url)
+ ret = requests.put(url, data=json.dumps(instance),
+ headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def delete_remote_group_constraints(self, instance_group):
+ url = "%s/instance_group/%s" % (self.maint_endpoint,
+ instance_group["group_id"])
+ self.log.info('DELETE: %s' % url)
+ ret = requests.delete(url, data=None, headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def update_remote_group_constraints(self, instance_group):
+ url = "%s/instance_group/%s" % (self.maint_endpoint,
+ instance_group["group_id"])
+ self.log.info('PUT: %s' % url)
+ ret = requests.put(url, data=json.dumps(instance_group),
+ headers=self.headers)
+ if ret.status_code != 200 and ret.status_code != 204:
+ raise Exception(ret.text)
+
+ def delete_constraints(self):
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ for instance_id in self.instance_constraints:
+ self.delete_remote_instance_constraints(instance_id)
+ self.delete_remote_group_constraints(self.nonha_group)
+ self.delete_remote_group_constraints(self.ha_group)
+
+ def update_constraints(self):
+ while self.update_constraints_lock:
+ self.log.info('Waiting update_constraints_lock...')
+ time.sleep(1)
+ self.update_constraints_lock = True
+ self.log.info('Update constraints')
+ if self.project_id is None:
+ self.project_id = self.keystone.projects.list(
+ name=self.conf.doctor_project)[0].id
+ if self.nonha_group is None:
+ # Nova does not support groupping instances that do not belong to
+ # anti-affinity server_groups. Anyhow all instances need groupping
+ self.nonha_group = {
+ "group_id": str(uuid.uuid4()),
+ "project_id": self.project_id,
+ "group_name": "doctor_nonha_app_group",
+ "anti_affinity_group": False,
+ "max_instances_per_host": 0,
+ "max_impacted_members": self.max_impacted,
+ "recovery_time": 2,
+ "resource_mitigation": True}
+ self.log.info('create doctor_nonha_app_group constraints: %s'
+ % self.nonha_group)
+ self.update_remote_group_constraints(self.nonha_group)
+ if self.ha_group is None:
+ group_id = [sg.id for sg in self.nova.server_groups.list()
+ if sg.name == "doctor_ha_app_group"][0]
+ self.ha_group = {
+ "group_id": group_id,
+ "project_id": self.project_id,
+ "group_name": "doctor_ha_app_group",
+ "anti_affinity_group": True,
+ "max_instances_per_host": 1,
+ "max_impacted_members": 1,
+ "recovery_time": 4,
+ "resource_mitigation": True}
+ self.log.info('create doctor_ha_app_group constraints: %s'
+ % self.ha_group)
+ self.update_remote_group_constraints(self.ha_group)
+ instance_constraints = {}
+ for ha_instance in self.ha_instances:
+ instance = {
+ "instance_id": ha_instance.id,
+ "project_id": self.project_id,
+ "group_id": self.ha_group["group_id"],
+ "instance_name": ha_instance.name,
+ "max_interruption_time": 120,
+ "migration_type": "MIGRATE",
+ "resource_mitigation": True,
+ "lead_time": 40}
+ self.log.info('create ha instance constraints: %s'
+ % instance)
+ instance_constraints[ha_instance.id] = instance
+ for nonha_instance in self.nonha_instances:
+ instance = {
+ "instance_id": nonha_instance.id,
+ "project_id": self.project_id,
+ "group_id": self.nonha_group["group_id"],
+ "instance_name": nonha_instance.name,
+ "max_interruption_time": 120,
+ "migration_type": "MIGRATE",
+ "resource_mitigation": True,
+ "lead_time": 40}
+ self.log.info('create nonha instance constraints: %s'
+ % instance)
+ instance_constraints[nonha_instance.id] = instance
+ if not self.instance_constraints:
+ # Initial instance constraints
+ self.log.info('create initial instances constraints...')
+ for instance in [instance_constraints[i] for i
+ in instance_constraints]:
+ self.update_remote_instance_constraints(instance)
+ self.instance_constraints = instance_constraints.copy()
+ else:
+ self.log.info('check instances constraints changes...')
+ added = [i for i in instance_constraints.keys()
+ if i not in self.instance_constraints]
+ deleted = [i for i in self.instance_constraints.keys()
+ if i not in instance_constraints]
+ modified = [i for i in instance_constraints.keys()
+ if (i not in added and i not in deleted and
+ instance_constraints[i] !=
+ self.instance_constraints[i])]
+ for instance_id in deleted:
+ self.delete_remote_instance_constraints(instance_id)
+ updated = added + modified
+ for instance in [instance_constraints[i] for i in updated]:
+ self.update_remote_instance_constraints(instance)
+ if updated or deleted:
+ # Some instance constraints have changed
+ self.instance_constraints = instance_constraints.copy()
+ self.update_constraints_lock = False
+
+ def active_instance_id(self):
+ # Need rertry as it takes time after heat template done before
+ # Floating IP in place
+ retry = 5
+ while retry > 0:
+ for instance in self.ha_instances:
+ network_interfaces = next(iter(instance.addresses.values()))
+ for network_interface in network_interfaces:
+ _type = network_interface.get('OS-EXT-IPS:type')
+ if _type == "floating":
+ if not self.floating_ip:
+ self.floating_ip = network_interface.get('addr')
+ self.log.debug('active_instance: %s %s' %
+ (instance.name, instance.id))
+ return instance.id
+ time.sleep(2)
+ self.update_instances()
+ retry -= 1
+ raise Exception("No active instance found")
+
+ def switch_over_ha_instance(self):
+ for instance in self.ha_instances:
+ if instance.id != self.active_instance_id:
+ self.log.info('Switch over to: %s %s' % (instance.name,
+ instance.id))
+ # Deprecated, need to use neutron instead
+ # instance.add_floating_ip(self.floating_ip)
+ port = self.neutron.list_ports(device_id=instance.id)['ports'][0]['id'] # noqa
+ floating_id = self.neutron.list_floatingips(floating_ip_address=self.floating_ip)['floatingips'][0]['id'] # noqa
+ self.neutron.update_floatingip(floating_id, {'floatingip': {'port_id': port}}) # noqa
+ # Have to update ha_instances as floating_ip changed
+ self.update_instances()
+ self.active_instance_id = instance.id
+ break
+
+ def get_instance_ids(self):
+ ret = list()
+ for instance in self.nova.servers.list(detailed=False):
+ ret.append(instance.id)
+ return ret
+
+ def update_instances(self):
+ instances = self.nova.servers.list(detailed=True)
+ self.ha_instances = [i for i in instances
+ if "doctor_ha_app_" in i.name]
+ self.nonha_instances = [i for i in instances
+ if "doctor_nonha_app_" in i.name]
+
+ def _alarm_data_decoder(self, data):
+ if "[" in data or "{" in data:
+ # string to list or dict removing unicode
+ data = yaml.load(data.replace("u'", "'"))
+ return data
+
+ def _alarm_traits_decoder(self, data):
+ return ({str(t[0]): self._alarm_data_decoder(str(t[2]))
+ for t in data['reason_data']['event']['traits']})
+
+ def get_session_instance_ids(self, url, session_id):
+ ret = requests.get(url, data=None, headers=self.headers)
+ if ret.status_code != 200:
+ raise Exception(ret.text)
+ self.log.info('get_instance_ids %s' % ret.json())
+ return ret.json()['instance_ids']
+
+ def scale_instances(self, number_of_instances):
+ number_of_instances_before = self.number_of_instances()
+
+ parameters = self.stack.parameters
+ parameters['nonha_intances'] += number_of_instances
+ self.stack.update(self.stack.stack_name,
+ self.stack.stack_id,
+ self.stack.template,
+ parameters=parameters,
+ files=self.stack.files)
+
+ number_of_instances_after = self.number_of_instances()
+ if (number_of_instances_before + number_of_instances !=
+ number_of_instances_after):
+ self.log.error('scale_instances with: %d from: %d ends up to: %d'
+ % (number_of_instances, number_of_instances_before,
+ number_of_instances_after))
+ raise Exception('scale_instances failed')
+
+ self.log.info('scaled instances from %d to %d' %
+ (number_of_instances_before,
+ number_of_instances_after))
+
+ def number_of_instances(self):
+ return len(self.nova.servers.list(detailed=False))
+
+ def run(self):
+ app = Flask('VNFM')
+
+ @app.route('/maintenance', methods=['POST'])
+ def maintenance_alarm():
+ data = json.loads(request.data.decode('utf8'))
+ try:
+ payload = self._alarm_traits_decoder(data)
+ except Exception:
+ payload = ({t[0]: t[2] for t in
+ data['reason_data']['event']['traits']})
+ self.log.error('cannot parse alarm data: %s' % payload)
+ raise Exception('VNFM cannot parse alarm.'
+ 'Possibly trait data over 256 char')
+
+ self.log.info('VNFM received data = %s' % payload)
+
+ state = payload['state']
+ reply_state = None
+ reply = dict()
+
+ self.log.info('VNFM state: %s' % state)
+
+ if state == 'MAINTENANCE':
+ instance_ids = (self.get_session_instance_ids(
+ payload['instance_ids'],
+ payload['session_id']))
+ my_instance_ids = self.get_instance_ids()
+ invalid_instances = (
+ [instance_id for instance_id in instance_ids
+ if instance_id not in my_instance_ids])
+ if invalid_instances:
+ self.log.error('Invalid instances: %s' % invalid_instances)
+ reply_state = 'NACK_MAINTENANCE'
+ else:
+ reply_state = 'ACK_MAINTENANCE'
+
+ elif state == 'SCALE_IN':
+ # scale down "self.scale" instances that is VCPUS equaling
+ # at least a single compute node
+ self.scale_instances(-self.scale)
+ reply_state = 'ACK_SCALE_IN'
+
+ elif state == 'MAINTENANCE_COMPLETE':
+ # possibly need to upscale
+ number_of_instances = self.number_of_instances()
+ if self.orig_number_of_instances > number_of_instances:
+ scale_instances = (self.orig_number_of_instances -
+ number_of_instances)
+ self.scale_instances(scale_instances)
+ reply_state = 'ACK_MAINTENANCE_COMPLETE'
+
+ elif state == 'PREPARE_MAINTENANCE':
+ # TBD from contraints
+ if "MIGRATE" not in payload['allowed_actions']:
+ raise Exception('MIGRATE not supported')
+ instance_ids = payload['instance_ids'][0]
+ self.log.info('VNFM got instance: %s' % instance_ids)
+ if instance_ids == self.active_instance_id:
+ self.switch_over_ha_instance()
+ # optional also in contraints
+ reply['instance_action'] = "MIGRATE"
+ reply_state = 'ACK_PREPARE_MAINTENANCE'
+
+ elif state == 'PLANNED_MAINTENANCE':
+ # TBD from contraints
+ if "MIGRATE" not in payload['allowed_actions']:
+ raise Exception('MIGRATE not supported')
+ instance_ids = payload['instance_ids'][0]
+ self.log.info('VNFM got instance: %s' % instance_ids)
+ if instance_ids == self.active_instance_id:
+ self.switch_over_ha_instance()
+ # optional also in contraints
+ reply['instance_action'] = "MIGRATE"
+ reply_state = 'ACK_PLANNED_MAINTENANCE'
+
+ elif state == 'INSTANCE_ACTION_DONE':
+ # TBD was action done in allowed window
+ self.log.info('%s' % payload['instance_ids'])
+ else:
+ raise Exception('VNFM received event with'
+ ' unknown state %s' % state)
+
+ if reply_state:
+ if self.conf.admin_tool.type == 'fenix':
+ self.headers['X-Auth-Token'] = self.session.get_token()
+ reply['state'] = reply_state
+ url = payload['reply_url']
+ self.log.info('VNFM reply: %s' % reply)
+ requests.put(url, data=json.dumps(reply), headers=self.headers)
+
+ return 'OK'
+
+ @app.route('/shutdown', methods=['POST'])
+ def shutdown():
+ self.log.info('shutdown VNFM server at %s' % time.time())
+ func = request.environ.get('werkzeug.server.shutdown')
+ if func is None:
+ raise RuntimeError('Not running with the Werkzeug Server')
+ func()
+ return 'VNFM shutting down...'
+
+ app.run(host="0.0.0.0", port=self.port)
diff --git a/doctor_tests/common/constants.py b/doctor_tests/common/constants.py
index 088ff633..201f3fc4 100644
--- a/doctor_tests/common/constants.py
+++ b/doctor_tests/common/constants.py
@@ -12,6 +12,10 @@ from collections import namedtuple
Host = namedtuple('Host', ['name', 'ip'])
+def is_fenix(conf):
+ return conf.admin_tool.type == 'fenix'
+
+
class Inspector(object):
CONGRESS = 'congress'
SAMPLE = 'sample'
diff --git a/doctor_tests/common/utils.py b/doctor_tests/common/utils.py
index 1a84c824..67ca4f4b 100644
--- a/doctor_tests/common/utils.py
+++ b/doctor_tests/common/utils.py
@@ -10,6 +10,7 @@ import json
import os
import paramiko
import re
+import subprocess
def load_json_file(full_path):
@@ -67,7 +68,7 @@ class SSHClient(object):
def __del__(self):
self.client.close()
- def ssh(self, command):
+ def ssh(self, command, raise_enabled=True):
if self.log:
self.log.info("Executing: %s" % command)
stdin, stdout, stderr = self.client.exec_command(command)
@@ -75,7 +76,7 @@ class SSHClient(object):
output = list()
for line in stdout.read().splitlines():
output.append(line.decode('utf-8'))
- if ret:
+ if ret and raise_enabled:
if self.log:
self.log.info("*** FAILED to run command %s (%s)"
% (command, ret))
@@ -97,6 +98,27 @@ class SSHClient(object):
ftp.close()
+class LocalSSH(object):
+
+ def __init__(self, log):
+ self.log = log
+ self.log.info('Init local ssh client')
+
+ def ssh(self, cmd):
+ ret = 0
+ output = "%s failed!!!" % cmd
+ try:
+ output = subprocess.check_output((cmd), shell=True,
+ universal_newlines=True)
+ except subprocess.CalledProcessError:
+ ret = 1
+ return ret, output
+
+ def scp(self, src_file, dst_file):
+ return subprocess.check_output("cp %s %s" % (src_file, dst_file),
+ shell=True)
+
+
def run_async(func):
from threading import Thread
from functools import wraps
diff --git a/doctor_tests/image.py b/doctor_tests/image.py
index 9961b22d..50841ef6 100644
--- a/doctor_tests/image.py
+++ b/doctor_tests/image.py
@@ -7,7 +7,11 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
import os
-import urllib.request
+try:
+ from urllib.request import urlopen
+except Exception:
+ from urllib2 import urlopen
+
from oslo_config import cfg
@@ -46,11 +50,14 @@ class Image(object):
def create(self):
self.log.info('image create start......')
-
images = {image.name: image for image in self.glance.images.list()}
+ if self.conf.image_name == 'cirros':
+ cirros = [image for image in images if 'cirros' in image]
+ if cirros:
+ self.conf.image_name = cirros[0]
if self.conf.image_name not in images:
if not os.path.exists(self.conf.image_filename):
- resp = urllib.request.urlopen(self.conf.image_download_url)
+ resp = urlopen(self.conf.image_download_url)
with open(self.conf.image_filename, "wb") as file:
file.write(resp.read())
self.image = \
diff --git a/doctor_tests/inspector/__init__.py b/doctor_tests/inspector/__init__.py
index 31291baf..50365a61 100644
--- a/doctor_tests/inspector/__init__.py
+++ b/doctor_tests/inspector/__init__.py
@@ -42,6 +42,10 @@ _inspector_name_class_mapping = {
}
-def get_inspector(conf, log):
+def get_inspector(conf, log, transport_url=None):
inspector_class = _inspector_name_class_mapping[conf.inspector.type]
- return importutils.import_object(inspector_class, conf, log)
+ if conf.inspector.type == 'sample':
+ return importutils.import_object(inspector_class, conf, log,
+ transport_url)
+ else:
+ return importutils.import_object(inspector_class, conf, log)
diff --git a/doctor_tests/inspector/sample.py b/doctor_tests/inspector/sample.py
index a55a12b7..c44db95d 100644
--- a/doctor_tests/inspector/sample.py
+++ b/doctor_tests/inspector/sample.py
@@ -10,6 +10,7 @@ import collections
from flask import Flask
from flask import request
import json
+import oslo_messaging
import time
from threading import Thread
import requests
@@ -26,7 +27,7 @@ from doctor_tests.inspector.base import BaseInspector
class SampleInspector(BaseInspector):
event_type = 'compute.host.down'
- def __init__(self, conf, log):
+ def __init__(self, conf, log, trasport_url):
super(SampleInspector, self).__init__(conf, log)
self.inspector_url = self.get_inspector_url()
self.novaclients = list()
@@ -43,6 +44,17 @@ class SampleInspector(BaseInspector):
self.hostnames = list()
self.app = None
+ try:
+ transport = oslo_messaging.get_notification_transport(self.conf,
+ trasport_url)
+ self.notif = oslo_messaging.Notifier(transport,
+ 'compute.instance.update',
+ driver='messaging',
+ topics=['notifications'])
+ self.notif = self.notif.prepare(publisher_id='sample')
+ except Exception:
+ self.notif = None
+
def _init_novaclients(self):
self.NUMBER_OF_CLIENTS = self.conf.instance_count
auth = get_identity_auth(project=self.conf.doctor_project)
@@ -54,13 +66,13 @@ class SampleInspector(BaseInspector):
def _init_servers_list(self):
self.servers.clear()
opts = {'all_tenants': True}
- servers = self.nova.servers.list(search_opts=opts)
+ servers = self.nova.servers.list(detailed=True, search_opts=opts)
for server in servers:
try:
host = server.__dict__.get('OS-EXT-SRV-ATTR:host')
self.servers[host].append(server)
self.log.debug('get hostname=%s from server=%s'
- % (host, server))
+ % (host, str(server.name)))
except Exception as e:
self.log.info('can not get hostname from server=%s, error=%s'
% (server, e))
@@ -97,10 +109,14 @@ class SampleInspector(BaseInspector):
event_type = event['type']
if event_type == self.event_type:
self.hostnames.append(hostname)
+ if self.notif is not None:
+ thr0 = self._send_notif(hostname)
thr1 = self._disable_compute_host(hostname)
thr2 = self._vms_reset_state('error', hostname)
if self.conf.inspector.update_neutron_port_dp_status:
thr3 = self._set_ports_data_plane_status('DOWN', hostname)
+ if self.notif is not None:
+ thr0.join()
thr1.join()
thr2.join()
if self.conf.inspector.update_neutron_port_dp_status:
@@ -119,7 +135,7 @@ class SampleInspector(BaseInspector):
def maintenance(self, data):
try:
payload = self._alarm_traits_decoder(data)
- except:
+ except Exception:
payload = ({t[0]: t[2] for t in
data['reason_data']['event']['traits']})
self.log.error('cannot parse alarm data: %s' % payload)
@@ -156,8 +172,8 @@ class SampleInspector(BaseInspector):
nova.servers.reset_state(server, state)
vmdown_time = time.time()
self.vm_down_time = vmdown_time
- self.log.info('doctor mark vm(%s) error at %s'
- % (server, vmdown_time))
+ self.log.info('doctor mark vm(%s) %s at %s'
+ % (server, state, vmdown_time))
thrs = []
for nova, server in zip(self.novaclients, self.servers[hostname]):
@@ -167,6 +183,26 @@ class SampleInspector(BaseInspector):
t.join()
@utils.run_async
+ def _send_notif(self, hostname):
+
+ @utils.run_async
+ def _send_notif(server):
+ payload = dict(tenant_id=server.tenant_id,
+ instance_id=server.id,
+ state="error")
+ self.notif.info({'some': 'context'}, 'compute.instance.update',
+ payload)
+ self.log.info('doctor compute.instance.update vm(%s) error %s'
+ % (server, time.time()))
+
+ thrs = []
+ for server in self.servers[hostname]:
+ t = _send_notif(server)
+ thrs.append(t)
+ for t in thrs:
+ t.join()
+
+ @utils.run_async
def _set_ports_data_plane_status(self, status, hostname):
body = {'data_plane_status': status}
diff --git a/doctor_tests/installer/__init__.py b/doctor_tests/installer/__init__.py
index ee44018c..00a01667 100644
--- a/doctor_tests/installer/__init__.py
+++ b/doctor_tests/installer/__init__.py
@@ -13,17 +13,13 @@ from oslo_utils import importutils
OPTS = [
cfg.StrOpt('type',
- default=os.environ.get('INSTALLER_TYPE', 'local'),
- choices=['local', 'apex', 'daisy', 'fuel'],
+ default=os.environ.get('INSTALLER_TYPE', 'devstack'),
+ choices=['apex', 'daisy', 'fuel', 'devstack'],
help='the type of installer',
required=True),
cfg.StrOpt('ip',
default=os.environ.get('INSTALLER_IP', '127.0.0.1'),
help='the ip of installer'),
- cfg.StrOpt('username',
- default='root',
- help='the user name for login installer server',
- required=True),
cfg.StrOpt('key_file',
default=os.environ.get('SSH_KEY', None),
help='the key for user to login installer server',
@@ -32,10 +28,10 @@ OPTS = [
_installer_name_class_mapping = {
- 'local': 'doctor_tests.installer.local.LocalInstaller',
'apex': 'doctor_tests.installer.apex.ApexInstaller',
'daisy': 'doctor_tests.installer.daisy.DaisyInstaller',
- 'fuel': 'doctor_tests.installer.mcp.McpInstaller'
+ 'fuel': 'doctor_tests.installer.mcp.McpInstaller',
+ 'devstack': 'doctor_tests.installer.devstack.DevstackInstaller'
}
diff --git a/doctor_tests/installer/apex.py b/doctor_tests/installer/apex.py
index 694adb88..3ec2100c 100644
--- a/doctor_tests/installer/apex.py
+++ b/doctor_tests/installer/apex.py
@@ -6,39 +6,45 @@
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
-import re
import time
from doctor_tests.common.constants import Inspector
+from doctor_tests.common.constants import is_fenix
+from doctor_tests.common.utils import get_doctor_test_root_dir
from doctor_tests.common.utils import SSHClient
from doctor_tests.installer.base import BaseInstaller
class ApexInstaller(BaseInstaller):
node_user_name = 'heat-admin'
+ installer_username = 'stack'
cm_set_script = 'set_config.py'
nc_set_compute_script = 'set_compute_config.py'
cg_set_script = 'set_congress.py'
+ fe_set_script = 'set_fenix.sh'
cm_restore_script = 'restore_config.py'
nc_restore_compute_script = 'restore_compute_config.py'
cg_restore_script = 'restore_congress.py'
+ ac_restart_script = 'restart_aodh.py'
+ ac_restore_script = 'restore_aodh.py'
+ python = 'python'
def __init__(self, conf, log):
super(ApexInstaller, self).__init__(conf, log)
self.client = SSHClient(self.conf.installer.ip,
- self.conf.installer.username,
+ self.installer_username,
key_filename=self.conf.installer.key_file,
look_for_keys=True)
self.key_file = None
self.controllers = list()
self.computes = list()
- self.controller_clients = list()
- self.compute_clients = list()
def setup(self):
self.log.info('Setup Apex installer start......')
self.key_file = self.get_ssh_key_from_installer()
- self._get_and_set_ips()
+ self._get_overcloud_conf()
+ if is_fenix(self.conf):
+ self._copy_overcloudrc_to_controllers()
self.create_flavor()
self.set_apply_patches()
self.setup_stunnel()
@@ -52,8 +58,13 @@ class ApexInstaller(BaseInstaller):
key_path = '/home/stack/.ssh/id_rsa'
return self._get_ssh_key(self.client, key_path)
- def _get_and_set_ips(self):
- self.log.info('Get controller and compute ips from Apex installer'
+ def _copy_overcloudrc_to_controllers(self):
+ for ip in self.controllers:
+ cmd = "scp overcloudrc %s@%s:" % (self.node_user_name, ip)
+ self._run_cmd_remote(self.client, cmd)
+
+ def _get_overcloud_conf(self):
+ self.log.info('Get overcloud config details from Apex installer'
'......')
command = "source stackrc; nova list | grep ' overcloud-'"
@@ -64,8 +75,11 @@ class ApexInstaller(BaseInstaller):
self.controllers.append(ip)
elif 'overcloud-novacompute-' in line:
self.computes.append(ip)
+ command = "grep docker /home/stack/deploy_command"
+ self.use_containers = self._check_cmd_remote(self.client, command)
self.log.info('controller_ips:%s' % self.controllers)
self.log.info('compute_ips:%s' % self.computes)
+ self.log.info('use_containers:%s' % self.use_containers)
def get_host_ip_from_hostname(self, hostname):
self.log.info('Get host ip by hostname=%s from Apex installer......'
@@ -76,83 +90,121 @@ class ApexInstaller(BaseInstaller):
host_ips = self._run_cmd_remote(self.client, command)
return host_ips[0]
- def get_transport_url(self):
- client = SSHClient(self.controllers[0], self.node_user_name,
- key_filename=self.key_file)
-
- command = 'sudo grep "^transport_url" /etc/nova/nova.conf'
- ret, url = client.ssh(command)
- if ret:
- raise Exception('Exec command to get host ip from controller(%s)'
- 'in Apex installer failed, ret=%s, output=%s'
- % (self.controllers[0], ret, url))
- # need to use ip instead of hostname
- ret = (re.sub("@.*:", "@%s:" % self.controllers[0],
- url[0].split("=", 1)[1]))
- self.log.debug('get_transport_url %s' % ret)
- return ret
+ def _set_docker_restart_cmd(self, service):
+ # There can be multiple instances running so need to restart all
+ cmd = "for container in `sudo docker ps | grep "
+ cmd += service
+ cmd += " | awk '{print $1}'`; do sudo docker restart $container; \
+ done;"
+ return cmd
def set_apply_patches(self):
self.log.info('Set apply patches start......')
-
- restart_cmd = 'sudo systemctl restart' \
- ' openstack-ceilometer-notification.service'
+ fenix_files = None
set_scripts = [self.cm_set_script]
+ if self.use_containers:
+ restart_cmd = (self._set_docker_restart_cmd(
+ "ceilometer-notification"))
+ set_scripts.append(self.ac_restart_script)
+ else:
+ restart_cmd = 'sudo systemctl restart' \
+ ' openstack-ceilometer-notification.service'
+
if self.conf.test_case != 'fault_management':
- restart_cmd += ' openstack-nova-scheduler.service'
+ if self.use_containers:
+ restart_cmd += self._set_docker_restart_cmd("nova-scheduler")
+ if is_fenix(self.conf):
+ set_scripts.append(self.fe_set_script)
+ testdir = get_doctor_test_root_dir()
+ fenix_files = ["Dockerfile", "run"]
+ else:
+ restart_cmd += ' openstack-nova-scheduler.service'
+ set_scripts.append(self.nc_set_compute_script)
if self.conf.inspector.type == Inspector.CONGRESS:
- restart_cmd += ' openstack-congress-server.service'
+ if self.use_containers:
+ restart_cmd += self._set_docker_restart_cmd("congress-server")
+ else:
+ restart_cmd += ' openstack-congress-server.service'
set_scripts.append(self.cg_set_script)
for node_ip in self.controllers:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
- self.controller_clients.append(client)
+ if fenix_files is not None:
+ for fenix_file in fenix_files:
+ src_file = '{0}/{1}/{2}'.format(testdir,
+ 'admin_tool/fenix',
+ fenix_file)
+ client.scp(src_file, fenix_file)
self._run_apply_patches(client,
restart_cmd,
- set_scripts)
+ set_scripts,
+ python=self.python)
+ time.sleep(5)
+
+ self.log.info('Set apply patches start......')
if self.conf.test_case != 'fault_management':
- restart_cmd = 'sudo systemctl restart' \
- ' openstack-nova-compute.service'
+ if self.use_containers:
+ restart_cmd = self._set_docker_restart_cmd("nova")
+ else:
+ restart_cmd = 'sudo systemctl restart' \
+ ' openstack-nova-compute.service'
for node_ip in self.computes:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
- self.compute_clients.append(client)
self._run_apply_patches(client,
restart_cmd,
- [self.nc_set_compute_script])
-
- if self.conf.test_case != 'fault_management':
- time.sleep(10)
+ [self.nc_set_compute_script],
+ python=self.python)
+ time.sleep(5)
def restore_apply_patches(self):
self.log.info('restore apply patches start......')
- restart_cmd = 'sudo systemctl restart' \
- ' openstack-ceilometer-notification.service'
-
restore_scripts = [self.cm_restore_script]
+ if self.use_containers:
+ restart_cmd = (self._set_docker_restart_cmd(
+ "ceilometer-notification"))
+ restore_scripts.append(self.ac_restore_script)
+ else:
+ restart_cmd = 'sudo systemctl restart' \
+ ' openstack-ceilometer-notification.service'
+
if self.conf.test_case != 'fault_management':
- restart_cmd += ' openstack-nova-scheduler.service'
+ if self.use_containers:
+ restart_cmd += self._set_docker_restart_cmd("nova-scheduler")
+ else:
+ restart_cmd += ' openstack-nova-scheduler.service'
+ restore_scripts.append(self.nc_restore_compute_script)
if self.conf.inspector.type == Inspector.CONGRESS:
- restart_cmd += ' openstack-congress-server.service'
+ if self.use_containers:
+ restart_cmd += self._set_docker_restart_cmd("congress-server")
+ else:
+ restart_cmd += ' openstack-congress-server.service'
restore_scripts.append(self.cg_restore_script)
- for client in self.controller_clients:
+ for node_ip in self.controllers:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
self._run_apply_patches(client,
restart_cmd,
- restore_scripts)
+ restore_scripts,
+ python=self.python)
if self.conf.test_case != 'fault_management':
- restart_cmd = 'sudo systemctl restart' \
- ' openstack-nova-compute.service'
- for client in self.compute_clients:
- self._run_apply_patches(client,
- restart_cmd,
- [self.nc_restore_compute_script])
+ if self.use_containers:
+ restart_cmd = self._set_docker_restart_cmd("nova-compute")
+ else:
+ restart_cmd = 'sudo systemctl restart' \
+ ' openstack-nova-compute.service'
+ for node_ip in self.computes:
+ self._run_apply_patches(
+ client, restart_cmd,
+ [self.nc_restore_compute_script],
+ python=self.python)
diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py
index 953b36d9..de4d2f2e 100644
--- a/doctor_tests/installer/base.py
+++ b/doctor_tests/installer/base.py
@@ -14,8 +14,9 @@ import pwd
import six
import stat
import subprocess
+import time
-from doctor_tests.common.utils import get_doctor_test_root_dir
+from doctor_tests.common import utils
from doctor_tests.identity_auth import get_session
from doctor_tests.os_clients import nova_client
@@ -26,6 +27,7 @@ class BaseInstaller(object):
self.conf = conf
self.log = log
self.servers = list()
+ self.use_containers = False
@abc.abstractproperty
def node_user_name(self):
@@ -74,7 +76,7 @@ class BaseInstaller(object):
cmd = ("ssh -o UserKnownHostsFile=/dev/null"
" -o StrictHostKeyChecking=no"
" -i %s %s@%s -R %s:localhost:%s"
- " sleep %s > ssh_tunnel.%s"
+ " sleep %s > ssh_tunnel.%s.%s"
" 2>&1 < /dev/null "
% (self.key_file,
self.node_user_name,
@@ -82,9 +84,28 @@ class BaseInstaller(object):
port,
port,
tunnel_uptime,
- node_ip))
+ node_ip,
+ port))
server = subprocess.Popen('exec ' + cmd, shell=True)
self.servers.append(server)
+ if self.conf.admin_tool.type == 'fenix':
+ port = self.conf.admin_tool.port
+ self.log.info('tunnel for port %s' % port)
+ cmd = ("ssh -o UserKnownHostsFile=/dev/null"
+ " -o StrictHostKeyChecking=no"
+ " -i %s %s@%s -L %s:localhost:%s"
+ " sleep %s > ssh_tunnel.%s.%s"
+ " 2>&1 < /dev/null "
+ % (self.key_file,
+ self.node_user_name,
+ node_ip,
+ port,
+ port,
+ tunnel_uptime,
+ node_ip,
+ port))
+ server = subprocess.Popen('exec ' + cmd, shell=True)
+ self.servers.append(server)
def _get_ssh_key(self, client, key_path):
self.log.info('Get SSH keys from %s installer......'
@@ -95,7 +116,8 @@ class BaseInstaller(object):
% self.conf.installer.type)
return self.key_file
- ssh_key = '{0}/{1}'.format(get_doctor_test_root_dir(), 'instack_key')
+ ssh_key = '{0}/{1}'.format(utils.get_doctor_test_root_dir(),
+ 'instack_key')
client.scp(key_path, ssh_key, method='get')
user = getpass.getuser()
uid = pwd.getpwnam(user).pw_uid
@@ -104,6 +126,10 @@ class BaseInstaller(object):
os.chmod(ssh_key, stat.S_IREAD)
return ssh_key
+ @abc.abstractmethod
+ def get_transport_url(self):
+ pass
+
def _run_cmd_remote(self, client, command):
self.log.info('Run command=%s in %s installer......'
% (command, self.conf.installer.type))
@@ -118,18 +144,48 @@ class BaseInstaller(object):
% (output, command, self.conf.installer.type))
return output
- def _run_apply_patches(self, client, restart_cmd, script_names):
- installer_dir = os.path.dirname(os.path.realpath(__file__))
+ def _check_cmd_remote(self, client, command):
+ self.log.info('Check command=%s return in %s installer......'
+ % (command, self.conf.installer.type))
+
+ ret, output = client.ssh(command, raise_enabled=False)
+ self.log.info('return %s' % ret)
+ if ret == 0:
+ ret = True
+ else:
+ ret = False
+ return ret
+ @utils.run_async
+ def _run_apply_patches(self, client, restart_cmd, script_names,
+ python='python3'):
+ installer_dir = os.path.dirname(os.path.realpath(__file__))
if isinstance(script_names, list):
for script_name in script_names:
script_abs_path = '{0}/{1}/{2}'.format(installer_dir,
'common', script_name)
- client.scp(script_abs_path, script_name)
- cmd = 'sudo python3 %s' % script_name
- ret, output = client.ssh(cmd)
+ if self.conf.installer.type == "devstack":
+ script_name = "/opt/stack/%s" % script_name
+ try:
+ client.scp(script_abs_path, script_name)
+ except Exception:
+ client.scp(script_abs_path, script_name)
+ try:
+ if ".py" in script_name:
+ cmd = 'sudo %s %s' % (python, script_name)
+ else:
+ cmd = 'sudo chmod 700 %s;sudo ./%s' % (script_name,
+ script_name)
+ ret, output = client.ssh(cmd)
+ self.log.info('Command %s output %s' % (cmd, output))
+ except Exception:
+ ret, output = client.ssh(cmd)
+ self.log.info('Command %s output %s' % (cmd, output))
if ret:
- raise Exception('Do the command in controller'
+ raise Exception('Do the command in remote'
' node failed, ret=%s, cmd=%s, output=%s'
% (ret, cmd, output))
+ if 'nova' in restart_cmd or 'devstack@n-' in restart_cmd:
+ # Make sure scheduler has proper cpu_allocation_ratio
+ time.sleep(5)
client.ssh(restart_cmd)
diff --git a/doctor_tests/installer/common/restart_aodh.py b/doctor_tests/installer/common/restart_aodh.py
new file mode 100644
index 00000000..4473bdca
--- /dev/null
+++ b/doctor_tests/installer/common/restart_aodh.py
@@ -0,0 +1,42 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import socket
+import subprocess
+
+
+def restart_aodh_event_alarm():
+ # Restart aodh-evaluator docker with localhost as controller host ip
+ # This makes our alarm sending look the same as without container
+
+ orig_docker_id = subprocess.check_output("docker ps | grep aodh-evaluator "
+ "| awk '{print $1}'", shell=True)
+ get_docker_startup = (
+ 'docker inspect --format=\'{{range .Config.Env}} -e "{{.}}" {{end}} '
+ '{{range .Mounts}} -v {{.Source}}:{{.Destination}}{{if .Mode}}:'
+ '{{.Mode}}{{end}}{{end}} -ti {{.Config.Image}}\''
+ )
+ docker_start = subprocess.check_output("%s %s" % (get_docker_startup,
+ orig_docker_id), shell=True)
+ with open("orig_docker_id", 'w') as oid:
+ oid.write(orig_docker_id)
+ oid.close()
+ subprocess.check_output("docker stop %s" % orig_docker_id, shell=True)
+ ip = socket.gethostbyname(socket.gethostname())
+
+ ae_start = '-d --add-host="localhost:%s" %s' % (ip, docker_start)
+ subprocess.check_output("docker run %s" % ae_start, shell=True)
+ new_docker_id = subprocess.check_output("docker ps | grep aodh-evaluator "
+ " | awk '{print $1}'", shell=True)
+ if orig_docker_id == new_docker_id:
+ raise Exception("Docker ids matching!")
+ with open("new_docker_id", 'w') as nid:
+ nid.write(new_docker_id)
+ nid.close()
+
+restart_aodh_event_alarm()
diff --git a/doctor_tests/installer/common/restore_aodh.py b/doctor_tests/installer/common/restore_aodh.py
new file mode 100644
index 00000000..b55eae8d
--- /dev/null
+++ b/doctor_tests/installer/common/restore_aodh.py
@@ -0,0 +1,32 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import subprocess
+
+
+def restore_aodh_event_alarm():
+ # Remove modified docker and restore original
+ orig = "orig_docker_id"
+ new = "new_docker_id"
+ if os.path.isfile(orig):
+ with open("orig_docker_id", 'r') as oid:
+ orig_docker_id = oid.read()
+ oid.close()
+ if os.path.isfile(new):
+ with open("new_docker_id", 'r') as nid:
+ new_docker_id = nid.read()
+ nid.close()
+ subprocess.check_output("docker stop %s" % new_docker_id,
+ shell=True)
+ subprocess.check_output("docker rm %s" % new_docker_id, shell=True)
+ os.remove(new)
+ subprocess.check_output("docker start %s" % orig_docker_id, shell=True)
+ os.remove(orig)
+
+restore_aodh_event_alarm()
diff --git a/doctor_tests/installer/common/restore_compute_config.py b/doctor_tests/installer/common/restore_compute_config.py
index 0971d12b..82e10a66 100644
--- a/doctor_tests/installer/common/restore_compute_config.py
+++ b/doctor_tests/installer/common/restore_compute_config.py
@@ -11,15 +11,16 @@ import shutil
def restore_cpu_allocation_ratio():
- nova_file = '/etc/nova/nova.conf'
- nova_file_bak = '/etc/nova/nova.bak'
-
- if not os.path.isfile(nova_file_bak):
- print('Bak_file:%s does not exist.' % nova_file_bak)
- else:
- print('restore: %s' % nova_file)
- shutil.copyfile(nova_file_bak, nova_file)
- os.remove(nova_file_bak)
+ for nova_file_bak in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.bak", # noqa
+ "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.bak", # noqa
+ "/etc/nova/nova.bak"]:
+ if os.path.isfile(nova_file_bak):
+ nova_file = nova_file_bak.replace(".bak", ".conf")
+ print('restoring nova.bak.')
+ shutil.copyfile(nova_file_bak, nova_file)
+ os.remove(nova_file_bak)
+ return
+ print('nova.bak does not exist.')
return
restore_cpu_allocation_ratio()
diff --git a/doctor_tests/installer/common/restore_config.py b/doctor_tests/installer/common/restore_config.py
index c1f919c1..5cb83b27 100644
--- a/doctor_tests/installer/common/restore_config.py
+++ b/doctor_tests/installer/common/restore_config.py
@@ -9,11 +9,15 @@
import os
import shutil
-ep_file = '/etc/ceilometer/event_pipeline.yaml'
-ep_file_bak = '/etc/ceilometer/event_pipeline.yaml.bak'
+
+cbase = "/var/lib/config-data/puppet-generated/ceilometer"
+if not os.path.isdir(cbase):
+ cbase = ""
def restore_ep_config():
+ ep_file = cbase + '/etc/ceilometer/event_pipeline.yaml'
+ ep_file_bak = cbase + '/etc/ceilometer/event_pipeline.yaml.bak'
if not os.path.isfile(ep_file_bak):
print('Bak_file:%s does not exist.' % ep_file_bak)
@@ -25,31 +29,20 @@ def restore_ep_config():
def restore_ed_config():
-
- ed_file = '/etc/ceilometer/event_definitions.yaml'
- ed_file_bak = '/etc/ceilometer/event_definitions.bak'
+ ed_file = cbase + '/etc/ceilometer/event_definitions.yaml'
+ ed_file_bak = cbase + '/etc/ceilometer/event_definitions.bak'
if not os.path.isfile(ed_file_bak):
print("Bak_file doesn't exist: %s." % ed_file_bak)
else:
print('restore: %s' % ed_file)
- shutil.copyfile(ed_file_bak, ed_file)
+ if os.stat(ed_file_bak).st_size == 0:
+ print('Bak_file empty, so removing also: %s' % ed_file)
+ os.remove(ed_file)
+ else:
+ shutil.copyfile(ed_file_bak, ed_file)
os.remove(ed_file_bak)
return
-
-def restore_cpu_allocation_ratio():
- nova_file = '/etc/nova/nova.conf'
- nova_file_bak = '/etc/nova/nova.bak'
-
- if not os.path.isfile(nova_file_bak):
- print('Bak_file:%s does not exist.' % nova_file_bak)
- else:
- print('restore: %s' % nova_file)
- shutil.copyfile(nova_file_bak, nova_file)
- os.remove(nova_file_bak)
- return
-
restore_ep_config()
restore_ed_config()
-restore_cpu_allocation_ratio()
diff --git a/doctor_tests/installer/common/restore_congress.py b/doctor_tests/installer/common/restore_congress.py
index b5efb1ef..576f1b16 100644
--- a/doctor_tests/installer/common/restore_congress.py
+++ b/doctor_tests/installer/common/restore_congress.py
@@ -11,8 +11,11 @@ import shutil
def restore_drivers_config():
- co_conf = "/etc/congress/congress.conf"
- co_conf_bak = "/etc/congress/congress.conf.bak"
+ co_base = "/var/lib/config-data/puppet-generated/congress"
+ if not os.path.isdir(co_base):
+ co_base = ""
+ co_conf = co_base + "/etc/congress/congress.conf"
+ co_conf_bak = co_base + "/etc/congress/congress.conf.bak"
if not os.path.isfile(co_conf_bak):
print('Bak_file:%s does not exist.' % co_conf_bak)
diff --git a/doctor_tests/installer/common/set_compute_config.py b/doctor_tests/installer/common/set_compute_config.py
index 07db1e16..615f1895 100644
--- a/doctor_tests/installer/common/set_compute_config.py
+++ b/doctor_tests/installer/common/set_compute_config.py
@@ -11,19 +11,24 @@ import shutil
def set_cpu_allocation_ratio():
- nova_file = '/etc/nova/nova.conf'
- nova_file_bak = '/etc/nova/nova.bak'
+ nova_file_bak = None
+ for nova_file in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf", # noqa
+ "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf", # noqa
+ "/etc/nova/nova.conf"]:
+ if os.path.isfile(nova_file):
+ nova_file_bak = nova_file.replace(".conf", ".bak")
+ break
- if not os.path.isfile(nova_file):
- raise Exception("File doesn't exist: %s." % nova_file)
+ if nova_file_bak is None:
+ raise Exception("Could not find nova.conf")
# TODO (tojuvone): Unfortunately ConfigParser did not produce working conf
fcheck = open(nova_file)
found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio"
in ca])
fcheck.close()
+ change = False
+ found = False
if found_list and len(found_list):
- change = False
- found = False
for car in found_list:
if car.startswith('#'):
continue
diff --git a/doctor_tests/installer/common/set_config.py b/doctor_tests/installer/common/set_config.py
index 42465247..e66d4c2c 100644
--- a/doctor_tests/installer/common/set_config.py
+++ b/doctor_tests/installer/common/set_config.py
@@ -10,12 +10,16 @@ import os
import shutil
import yaml
-ep_file = '/etc/ceilometer/event_pipeline.yaml'
-ep_file_bak = '/etc/ceilometer/event_pipeline.yaml.bak'
-event_notifier_topic = 'notifier://?topic=alarm.all'
+
+cbase = "/var/lib/config-data/puppet-generated/ceilometer"
+if not os.path.isdir(cbase):
+ cbase = ""
def set_notifier_topic():
+ ep_file = cbase + '/etc/ceilometer/event_pipeline.yaml'
+ ep_file_bak = cbase + '/etc/ceilometer/event_pipeline.yaml.bak'
+ event_notifier_topic = 'notifier://?topic=alarm.all'
config_modified = False
if not os.path.isfile(ep_file):
@@ -42,24 +46,77 @@ def set_notifier_topic():
file.write(yaml.safe_dump(config))
-def set_maintenance_event_definitions():
- ed_file = '/etc/ceilometer/event_definitions.yaml'
- ed_file_bak = '/etc/ceilometer/event_definitions.bak'
+def set_event_definitions():
+ ed_file = cbase + '/etc/ceilometer/event_definitions.yaml'
+ ed_file_bak = cbase + '/etc/ceilometer/event_definitions.bak'
+ orig_ed_file_exist = True
+ modify_config = False
if not os.path.isfile(ed_file):
- raise Exception("File doesn't exist: %s." % ed_file)
-
- with open(ed_file, 'r') as file:
- config = yaml.safe_load(file)
+ # Deployment did not modify file, so it did not exist
+ src_file = '/etc/ceilometer/event_definitions.yaml'
+ if not os.path.isfile(src_file):
+ config = []
+ orig_ed_file_exist = False
+ else:
+ shutil.copyfile('/etc/ceilometer/event_definitions.yaml', ed_file)
+ if orig_ed_file_exist:
+ with open(ed_file, 'r') as file:
+ config = yaml.safe_load(file)
et_list = [et['event_type'] for et in config]
+ if 'compute.instance.update' in et_list:
+ print('NOTE: compute.instance.update allready configured')
+ else:
+ print('NOTE: add compute.instance.update to event_definitions.yaml')
+ modify_config = True
+ instance_update = {
+ 'event_type': 'compute.instance.update',
+ 'traits': {
+ 'deleted_at': {'fields': 'payload.deleted_at',
+ 'type': 'datetime'},
+ 'disk_gb': {'fields': 'payload.disk_gb',
+ 'type': 'int'},
+ 'display_name': {'fields': 'payload.display_name'},
+ 'ephemeral_gb': {'fields': 'payload.ephemeral_gb',
+ 'type': 'int'},
+ 'host': {'fields': 'publisher_id.`split(., 1, 1)`'},
+ 'instance_id': {'fields': 'payload.instance_id'},
+ 'instance_type': {'fields': 'payload.instance_type'},
+ 'instance_type_id': {'fields': 'payload.instance_type_id',
+ 'type': 'int'},
+ 'launched_at': {'fields': 'payload.launched_at',
+ 'type': 'datetime'},
+ 'memory_mb': {'fields': 'payload.memory_mb',
+ 'type': 'int'},
+ 'old_state': {'fields': 'payload.old_state'},
+ 'os_architecture': {
+ 'fields':
+ "payload.image_meta.'org.openstack__1__architecture'"},
+ 'os_distro': {
+ 'fields':
+ "payload.image_meta.'org.openstack__1__os_distro'"},
+ 'os_version': {
+ 'fields':
+ "payload.image_meta.'org.openstack__1__os_version'"},
+ 'resource_id': {'fields': 'payload.instance_id'},
+ 'root_gb': {'fields': 'payload.root_gb',
+ 'type': 'int'},
+ 'service': {'fields': 'publisher_id.`split(., 0, -1)`'},
+ 'state': {'fields': 'payload.state'},
+ 'tenant_id': {'fields': 'payload.tenant_id'},
+ 'user_id': {'fields': 'payload.user_id'},
+ 'vcpus': {'fields': 'payload.vcpus', 'type': 'int'}
+ }
+ }
+ config.append(instance_update)
+
if 'maintenance.scheduled' in et_list:
- add_mscheduled = False
print('NOTE: maintenance.scheduled allready configured')
else:
print('NOTE: add maintenance.scheduled to event_definitions.yaml')
- add_mscheduled = True
+ modify_config = True
mscheduled = {
'event_type': 'maintenance.scheduled',
'traits': {
@@ -68,20 +125,20 @@ def set_maintenance_event_definitions():
'reply_url': {'fields': 'payload.reply_url'},
'actions_at': {'fields': 'payload.actions_at',
'type': 'datetime'},
+ 'reply_at': {'fields': 'payload.reply_at', 'type': 'datetime'},
'state': {'fields': 'payload.state'},
'session_id': {'fields': 'payload.session_id'},
'project_id': {'fields': 'payload.project_id'},
'metadata': {'fields': 'payload.metadata'}
+ }
}
- }
config.append(mscheduled)
if 'maintenance.host' in et_list:
- add_mhost = False
print('NOTE: maintenance.host allready configured')
else:
print('NOTE: add maintenance.host to event_definitions.yaml')
- add_mhost = True
+ modify_config = True
mhost = {
'event_type': 'maintenance.host',
'traits': {
@@ -89,51 +146,18 @@ def set_maintenance_event_definitions():
'project_id': {'fields': 'payload.project_id'},
'state': {'fields': 'payload.state'},
'session_id': {'fields': 'payload.session_id'}
+ }
}
- }
config.append(mhost)
- if add_mscheduled or add_mhost:
- shutil.copyfile(ed_file, ed_file_bak)
+ if modify_config:
+ if orig_ed_file_exist:
+ shutil.copyfile(ed_file, ed_file_bak)
+ else:
+ with open(ed_file_bak, 'w+') as file:
+ file.close()
with open(ed_file, 'w+') as file:
file.write(yaml.safe_dump(config))
-
-def set_cpu_allocation_ratio():
- nova_file = '/etc/nova/nova.conf'
- nova_file_bak = '/etc/nova/nova.bak'
-
- if not os.path.isfile(nova_file):
- raise Exception("File doesn't exist: %s." % nova_file)
- # TODO (tojuvone): Unfortunately ConfigParser did not produce working conf
- fcheck = open(nova_file)
- found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio"
- in ca])
- fcheck.close()
- if found_list and len(found_list):
- change = False
- found = False
- for car in found_list:
- if car.startswith('#'):
- continue
- if car.startswith('cpu_allocation_ratio'):
- found = True
- if "1.0" not in car.split('=')[1]:
- change = True
- if not found or change:
- # need to add or change
- shutil.copyfile(nova_file, nova_file_bak)
- fin = open(nova_file_bak)
- fout = open(nova_file, "wt")
- for line in fin:
- if change and line.startswith("cpu_allocation_ratio"):
- line = "cpu_allocation_ratio=1.0"
- if not found and line.startswith("[DEFAULT]"):
- line += "cpu_allocation_ratio=1.0\n"
- fout.write(line)
- fin.close()
- fout.close()
-
set_notifier_topic()
-set_maintenance_event_definitions()
-set_cpu_allocation_ratio()
+set_event_definitions()
diff --git a/doctor_tests/installer/common/set_congress.py b/doctor_tests/installer/common/set_congress.py
index d8438701..7961df32 100644
--- a/doctor_tests/installer/common/set_congress.py
+++ b/doctor_tests/installer/common/set_congress.py
@@ -6,25 +6,29 @@
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
-import configparser
+from six.moves import configparser
+import os
import shutil
def set_drivers_config():
- co_conf = "/etc/congress/congress.conf"
- co_conf_bak = "/etc/congress/congress.conf.bak"
+ co_base = "/var/lib/config-data/puppet-generated/congress"
+ if not os.path.isdir(co_base):
+ co_base = ""
+ co_conf = co_base + "/etc/congress/congress.conf"
+ co_conf_bak = co_base + "/etc/congress/congress.conf.bak"
doctor_driver = "congress.datasources.doctor_driver.DoctorDriver"
config_modified = False
config = configparser.ConfigParser()
config.read(co_conf)
- drivers = config['DEFAULT']['drivers']
+ drivers = config.get('DEFAULT', 'drivers')
if doctor_driver not in drivers:
config_modified = True
drivers += ',' + doctor_driver
- config['DEFAULT']['drivers'] = drivers
+ config.set('DEFAULT', 'drivers', drivers)
if config_modified:
shutil.copyfile(co_conf, co_conf_bak)
diff --git a/doctor_tests/installer/common/set_fenix.sh b/doctor_tests/installer/common/set_fenix.sh
new file mode 100644
index 00000000..bd1eae47
--- /dev/null
+++ b/doctor_tests/installer/common/set_fenix.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+##############################################################################
+# Copyright (c) 2019 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+# Config files
+docker -v >/dev/null || {
+echo "Fenix needs docker to be installed..."
+ver=`grep "UBUNTU_CODENAME" /etc/os-release | cut -d '=' -f 2`
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $ver stable"
+apt install apt-transport-https ca-certificates curl software-properties-common
+apt update
+apt-cache policy docker-ce
+apt-get install -y docker-ce docker-ce-cli containerd.io
+dpkg -r --force-depends golang-docker-credential-helpers
+}
+
+docker ps | grep fenix -q && {
+REMOTE=`git ls-remote https://opendev.org/x/fenix HEAD | awk '{ print $1}'`
+LOCAL=`docker exec -t fenix git rev-parse @`
+if [[ "$LOCAL" =~ "$REMOTE" ]]; then
+ # Difference in above string ending marks, so cannot compare equal
+ echo "Fenix start: Already running latest $LOCAL equals $REMOTE"
+ exit 0
+else
+ echo "Fenix container needs to be recreated $LOCAL not $REMOTE"
+ # Remove previous container
+ for img in `docker image list | grep "^fenix" | awk '{print $1}'`; do
+ for dock in `docker ps --all -f "ancestor=$img" | grep "$img" | awk '{print $1}'`; do
+ docker stop $dock; docker rm $dock;
+ done;
+ docker image rm $img;
+ done
+fi
+} || echo "Fenix container needs to be created..."
+
+cp /root/keystonercv3 .
+
+transport=`grep -m1 "^transport" /etc/nova/nova.conf`
+. keystonercv3
+
+echo "[DEFAULT]" > fenix.conf
+echo "port = 12347" >> fenix.conf
+echo $transport >> fenix.conf
+
+echo "[database]" >> fenix.conf
+MYSQLIP=`grep -m1 "^connection" /etc/nova/nova.conf | sed -e "s/.*@//;s/\/.*//"`
+echo "connection = mysql+pymysql://fenix:fenix@$MYSQLIP/fenix" >> fenix.conf
+
+echo "[service_user]" >> fenix.conf
+echo "os_auth_url = $OS_AUTH_URL" >> fenix.conf
+echo "os_username = $OS_USERNAME" >> fenix.conf
+echo "os_password = $OS_PASSWORD" >> fenix.conf
+echo "os_user_domain_name = $OS_USER_DOMAIN_NAME" >> fenix.conf
+echo "os_project_name = $OS_PROJECT_NAME" >> fenix.conf
+echo "os_project_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix.conf
+
+echo "[DEFAULT]" > fenix-api.conf
+echo "port = 12347" >> fenix-api.conf
+echo $transport >> fenix-api.conf
+
+echo "[keystone_authtoken]" >> fenix-api.conf
+echo "auth_url = $OS_AUTH_URL" >> fenix-api.conf
+echo "auth_type = password" >> fenix-api.conf
+echo "project_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix-api.conf
+echo "project_name = $OS_PROJECT_NAME" >> fenix-api.conf
+echo "user_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix-api.conf
+echo "password = $OS_PASSWORD" >> fenix-api.conf
+echo "username = $OS_USERNAME" >> fenix-api.conf
+echo "cafile = /opt/stack/data/ca-bundle.pem" >> fenix-api.conf
+
+openstack service list | grep -q maintenance || {
+openstack service create --name fenix --enable maintenance
+openstack endpoint create --region $OS_REGION_NAME --enable fenix public http://localhost:12347/v1
+}
+
+# Mysql pw
+# MYSQLPW=`cat /var/lib/config-data/mysql/etc/puppet/hieradata/service_configs.json | grep mysql | grep root_password | awk -F": " '{print $2}' | awk -F"\"" '{print $2}'`
+MYSQLPW=root
+
+# Fenix DB
+[ `mysql -uroot -p$MYSQLPW -e "SELECT host, user FROM mysql.user;" | grep fenix | wc -l` -eq 0 ] && {
+ mysql -uroot -p$MYSQLPW -hlocalhost -e "CREATE USER 'fenix'@'localhost' IDENTIFIED BY 'fenix';"
+ mysql -uroot -p$MYSQLPW -hlocalhost -e "GRANT ALL PRIVILEGES ON fenix.* TO 'fenix'@'' identified by 'fenix';FLUSH PRIVILEGES;"
+}
+mysql -ufenix -pfenix -hlocalhost -e "DROP DATABASE IF EXISTS fenix;"
+mysql -ufenix -pfenix -hlocalhost -e "CREATE DATABASE fenix CHARACTER SET utf8;"
+
+# Build Fenix container and run it
+chmod 700 run
+docker build --build-arg OPENSTACK=master --build-arg BRANCH=master --network host $PWD -t fenix | tail -1
+docker run --network host -d --name fenix -p 12347:12347 -ti fenix
+if [ $? -eq 0 ]; then
+ echo "Fenix start: OK"
+else
+ echo "Fenix start: FAILED"
+fi
+# To debug check log from fenix container
+# docker exec -ti fenix tail -f /var/log/fenix-engine.log
diff --git a/doctor_tests/installer/common/vitrage.py b/doctor_tests/installer/common/vitrage.py
index 30a73f5d..801adff5 100644
--- a/doctor_tests/installer/common/vitrage.py
+++ b/doctor_tests/installer/common/vitrage.py
@@ -9,8 +9,11 @@
import os
+vi_base = "/var/lib/config-data/puppet-generated/vitrage"
+if not os.path.isdir(vi_base):
+ vi_base = ""
vitrage_template_file = \
- '/etc/vitrage/templates/vitrage_host_down_scenarios.yaml'
+ vi_base + '/etc/vitrage/templates/vitrage_host_down_scenarios.yaml'
template = """
metadata:
diff --git a/doctor_tests/installer/daisy.py b/doctor_tests/installer/daisy.py
index 52ccb7c8..e4499d9c 100644
--- a/doctor_tests/installer/daisy.py
+++ b/doctor_tests/installer/daisy.py
@@ -12,11 +12,12 @@ from doctor_tests.installer.base import BaseInstaller
class DaisyInstaller(BaseInstaller):
node_user_name = 'root'
+ installer_username = 'root'
def __init__(self, conf, log):
super(DaisyInstaller, self).__init__(conf, log)
self.client = SSHClient(self.conf.installer.ip,
- self.conf.installer.username,
+ self.installer_username,
password='r00tme')
self.key_file = None
self.controllers = list()
diff --git a/doctor_tests/installer/devstack.py b/doctor_tests/installer/devstack.py
new file mode 100644
index 00000000..02f3601a
--- /dev/null
+++ b/doctor_tests/installer/devstack.py
@@ -0,0 +1,151 @@
+##############################################################################
+# Copyright (c) 2019 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import socket
+import time
+
+from doctor_tests.common.utils import SSHClient
+from doctor_tests.common.utils import LocalSSH
+from doctor_tests.identity_auth import get_session
+from doctor_tests.installer.base import BaseInstaller
+from doctor_tests.os_clients import nova_client
+
+
+class DevstackInstaller(BaseInstaller):
+ node_user_name = None
+ cm_set_script = 'set_config.py'
+ nc_set_compute_script = 'set_compute_config.py'
+ cm_restore_script = 'restore_config.py'
+ nc_restore_compute_script = 'restore_compute_config.py'
+ ac_restart_script = 'restart_aodh.py'
+ ac_restore_script = 'restore_aodh.py'
+ python = 'python'
+
+ def __init__(self, conf, log):
+ super(DevstackInstaller, self).__init__(conf, log)
+ # Run Doctor under users home. sudo hides other env param to be used
+ home, self.node_user_name = (iter(os.environ.get('VIRTUAL_ENV')
+ .split('/', 3)[1:3]))
+ # Migration needs to work so ssh should have proper key defined
+ self.key_file = '/%s/%s/.ssh/id_rsa' % (home, self.node_user_name)
+ self.log.info('ssh uses: %s and %s' % (self.node_user_name,
+ self.key_file))
+ self.controllers = ([ip for ip in
+ socket.gethostbyname_ex(socket.gethostname())[2]
+ if not ip.startswith('127.')] or
+ [[(s.connect(('8.8.8.8', 53)),
+ s.getsockname()[0], s.close())
+ for s in [socket.socket(socket.AF_INET,
+ socket.SOCK_DGRAM)]][0][1]])
+ conf.admin_tool.ip = self.controllers[0]
+ self.computes = list()
+ self.nova = nova_client(conf.nova_version, get_session())
+
+ def setup(self):
+ self.log.info('Setup Devstack installer start......')
+ self._get_devstack_conf()
+ self.create_flavor()
+ self.set_apply_patches()
+
+ def cleanup(self):
+ self.restore_apply_patches()
+
+ def get_ssh_key_from_installer(self):
+ return self.key_file
+
+ def get_transport_url(self):
+ client = LocalSSH(self.log)
+ cmd = 'sudo grep -m1 "^transport_url" /etc/nova/nova.conf'
+ ret, url = client.ssh(cmd)
+ url = url.split("= ", 1)[1][:-1]
+ self.log.info('get_transport_url %s' % url)
+ return url
+
+ def get_host_ip_from_hostname(self, hostname):
+ return [hvisor.__getattr__('host_ip') for hvisor in self.hvisors
+ if hvisor.__getattr__('hypervisor_hostname') == hostname][0]
+
+ def _get_devstack_conf(self):
+ self.log.info('Get devstack config details for Devstack installer'
+ '......')
+ self.hvisors = self.nova.hypervisors.list(detailed=True)
+ self.log.info('checking hypervisors.......')
+ self.computes = [hvisor.__getattr__('host_ip') for hvisor in
+ self.hvisors]
+ self.use_containers = False
+ self.log.info('controller_ips:%s' % self.controllers)
+ self.log.info('compute_ips:%s' % self.computes)
+ self.log.info('use_containers:%s' % self.use_containers)
+
+ def _set_docker_restart_cmd(self, service):
+ # There can be multiple instances running so need to restart all
+ cmd = "for container in `sudo docker ps | grep "
+ cmd += service
+ cmd += " | awk '{print $1}'`; do sudo docker restart $container; \
+ done;"
+ return cmd
+
+ def set_apply_patches(self):
+ self.log.info('Set apply patches start......')
+
+ set_scripts = [self.cm_set_script]
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@ceilometer-anotification.service'
+
+ client = LocalSSH(self.log)
+ self._run_apply_patches(client,
+ restart_cmd,
+ set_scripts,
+ python=self.python)
+ time.sleep(7)
+
+ self.log.info('Set apply patches start......')
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@n-cpu.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(client,
+ restart_cmd,
+ [self.nc_set_compute_script],
+ python=self.python)
+ time.sleep(7)
+
+ def restore_apply_patches(self):
+ self.log.info('restore apply patches start......')
+
+ restore_scripts = [self.cm_restore_script]
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@ceilometer-anotification.service'
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd += ' devstack@n-sch.service'
+ restore_scripts.append(self.nc_restore_compute_script)
+
+ client = LocalSSH(self.log)
+ self._run_apply_patches(client,
+ restart_cmd,
+ restore_scripts,
+ python=self.python)
+
+ if self.conf.test_case != 'fault_management':
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' devstack@n-cpu.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(
+ client, restart_cmd,
+ [self.nc_restore_compute_script],
+ python=self.python)
diff --git a/doctor_tests/installer/local.py b/doctor_tests/installer/local.py
deleted file mode 100644
index fee14f33..00000000
--- a/doctor_tests/installer/local.py
+++ /dev/null
@@ -1,118 +0,0 @@
-##############################################################################
-# Copyright (c) 2017 ZTE Corporation and others.
-#
-# All rights reserved. This program and the accompanying materials
-# are made available under the terms of the Apache License, Version 2.0
-# which accompanies this distribution, and is available at
-# http://www.apache.org/licenses/LICENSE-2.0
-##############################################################################
-import os
-import shutil
-import subprocess
-
-from doctor_tests.installer.base import BaseInstaller
-from doctor_tests.installer.common.vitrage import \
- set_vitrage_host_down_template
-from doctor_tests.common.constants import Inspector
-from doctor_tests.common.utils import load_json_file
-from doctor_tests.common.utils import write_json_file
-
-
-class LocalInstaller(BaseInstaller):
- node_user_name = 'root'
-
- nova_policy_file = '/etc/nova/policy.json'
- nova_policy_file_backup = '%s%s' % (nova_policy_file, '.bak')
-
- def __init__(self, conf, log):
- super(LocalInstaller, self).__init__(conf, log)
- self.policy_modified = False
- self.add_policy_file = False
-
- def setup(self):
- self.get_ssh_key_from_installer()
- self.set_apply_patches()
-
- def cleanup(self):
- self.restore_apply_patches()
-
- def get_ssh_key_from_installer(self):
- self.log.info('Assuming SSH keys already exchanged with computer'
- 'for local installer type')
- return None
-
- def get_host_ip_from_hostname(self, hostname):
- self.log.info('Get host ip from host name in local installer......')
-
- cmd = "getent hosts %s | awk '{ print $1 }'" % (hostname)
- server = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
- stdout, stderr = server.communicate()
- host_ip = stdout.strip().decode("utf-8")
-
- self.log.info('Get host_ip:%s from host_name:%s in local installer'
- % (host_ip, hostname))
- return host_ip
-
- def set_apply_patches(self):
- self._set_nova_policy()
- if self.conf.inspector.type == Inspector.VITRAGE:
- set_vitrage_host_down_template()
- os.system('sudo systemctl restart devstack@vitrage-graph.service')
-
- def restore_apply_patches(self):
- self._restore_nova_policy()
-
- def _set_nova_policy(self):
- host_status_policy = 'os_compute_api:servers:show:host_status'
- host_status_rule = 'rule:admin_or_owner'
- policy_data = {
- 'context_is_admin': 'role:admin',
- 'owner': 'user_id:%(user_id)s',
- 'admin_or_owner': 'rule:context_is_admin or rule:owner',
- host_status_policy: host_status_rule
- }
-
- if os.path.isfile(self.nova_policy_file):
- data = load_json_file(self.nova_policy_file)
- if host_status_policy in data:
- rule_origion = data[host_status_policy]
- if host_status_rule == rule_origion:
- self.log.info('Do not need to modify nova policy.')
- self.policy_modified = False
- else:
- # update the host_status_policy
- data[host_status_policy] = host_status_rule
- self.policy_modified = True
- else:
- # add the host_status_policy, if the admin_or_owner is not
- # defined, add it also
- for policy, rule in policy_data.items():
- if policy not in data:
- data[policy] = rule
- self.policy_modified = True
- if self.policy_modified:
- self.log.info('Nova policy is Modified.')
- shutil.copyfile(self.nova_policy_file,
- self.nova_policy_file_backup)
- else:
- # file does not exit, create a new one and add the policy
- self.log.info('Nova policy file not exist. Creating a new one')
- data = policy_data
- self.add_policy_file = True
-
- if self.policy_modified or self.add_policy_file:
- write_json_file(self.nova_policy_file, data)
- os.system('sudo systemctl restart devstack@n-api.service')
-
- def _restore_nova_policy(self):
- if self.policy_modified:
- shutil.copyfile(self.nova_policy_file_backup,
- self.nova_policy_file)
- os.remove(self.nova_policy_file_backup)
- elif self.add_policy_file:
- os.remove(self.nova_policy_file)
-
- if self.add_policy_file or self.policy_modified:
- os.system('sudo systemctl restart devstack@n-api.service')
- self.add_policy_file = False
- self.policy_modified = False
diff --git a/doctor_tests/installer/mcp.py b/doctor_tests/installer/mcp.py
index 9cfff92d..7659c9e2 100644
--- a/doctor_tests/installer/mcp.py
+++ b/doctor_tests/installer/mcp.py
@@ -1,5 +1,5 @@
##############################################################################
-# Copyright (c) 2018 ZTE Corporation and others.
+# Copyright (c) 2019 ZTE Corporation and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
@@ -7,15 +7,26 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
from os.path import isfile
+import re
+import time
+from doctor_tests.common.constants import is_fenix
+from doctor_tests.common.utils import get_doctor_test_root_dir
from doctor_tests.common.utils import SSHClient
from doctor_tests.installer.base import BaseInstaller
class McpInstaller(BaseInstaller):
node_user_name = 'ubuntu'
- cm_set_script = 'set_ceilometer.py'
- cm_restore_script = 'restore_ceilometer.py'
+
+ cm_set_script = 'set_config.py'
+ nc_set_compute_script = 'set_compute_config.py'
+ fe_set_script = 'set_fenix.sh'
+ cm_restore_script = 'restore_config.py'
+ nc_restore_compute_script = 'restore_compute_config.py'
+ ac_restart_script = 'restart_aodh.py'
+ ac_restore_script = 'restore_aodh.py'
+ python = 'python3'
def __init__(self, conf, log):
super(McpInstaller, self).__init__(conf, log)
@@ -26,40 +37,87 @@ class McpInstaller(BaseInstaller):
look_for_keys=True)
self.controllers = list()
self.controller_clients = list()
+ self.computes = list()
def setup(self):
self.log.info('Setup MCP installer start......')
-
- self.controllers = self.get_controller_ips()
+ self.get_node_ips()
self.create_flavor()
- self.set_apply_patches()
+ if is_fenix(self.conf):
+ self.set_apply_patches()
self.setup_stunnel()
def cleanup(self):
- self.restore_apply_patches()
+ if is_fenix(self.conf):
+ self.restore_apply_patches()
for server in self.servers:
server.terminate()
def get_ssh_key_from_installer(self):
self.log.info('Get SSH keys from MCP......')
- # Assuming mcp.rsa is already mapped to functest container
- # if not, only the test runs on jumphost can get the ssh_key
- # default in path /var/lib/opnfv/mcp.rsa
+ # Default in path /var/lib/opnfv/mcp.rsa
ssh_key = '/root/.ssh/id_rsa'
mcp_key = '/var/lib/opnfv/mcp.rsa'
- return ssh_key if isfile(ssh_key) else mcp_key
-
- def get_controller_ips(self):
- self.log.info('Get controller ips from Mcp installer......')
-
- command = "sudo salt --out yaml 'ctl*' " \
- "pillar.get _param:openstack_control_address |" \
- "awk '{print $2}'"
- controllers = self._run_cmd_remote(self.client, command)
- self.log.info('Get controller_ips:%s from Mcp installer'
- % controllers)
- return controllers
+ return mcp_key if isfile(mcp_key) else ssh_key
+
+ def get_transport_url(self):
+ client = SSHClient(self.controllers[0], self.node_user_name,
+ key_filename=self.key_file)
+ try:
+ cmd = 'sudo grep -m1 "^transport_url" /etc/nova/nova.conf'
+ ret, url = client.ssh(cmd)
+
+ if ret:
+ raise Exception('Exec command to get transport from '
+ 'controller(%s) in MCP installer failed, '
+ 'ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ elif self.controllers[0] not in url:
+ # need to use ip instead of hostname
+ url = (re.sub("@.*:", "@%s:" % self.controllers[0],
+ url[0].split("=", 1)[1]))
+ except Exception:
+ cmd = 'grep -i "^rabbit" /etc/nova/nova.conf'
+ ret, lines = client.ssh(cmd)
+ if ret:
+ raise Exception('Exec command to get transport from '
+ 'controller(%s) in MCP installer failed, '
+ 'ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ else:
+ for line in lines.split('\n'):
+ if line.startswith("rabbit_userid"):
+ rabbit_userid = line.split("=")
+ if line.startswith("rabbit_port"):
+ rabbit_port = line.split("=")
+ if line.startswith("rabbit_password"):
+ rabbit_password = line.split("=")
+ url = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid,
+ rabbit_password,
+ self.controllers[0],
+ rabbit_port)
+ self.log.info('get_transport_url %s' % url)
+ return url
+
+ def _copy_overcloudrc_to_controllers(self):
+ for ip in self.controllers:
+ cmd = "scp overcloudrc %s@%s:" % (self.node_user_name, ip)
+ self._run_cmd_remote(self.client, cmd)
+
+ def get_node_ips(self):
+ self.log.info('Get node ips from Mcp installer......')
+
+ command = 'sudo salt "*" --out yaml pillar.get _param:single_address'
+ node_details = self._run_cmd_remote(self.client, command)
+
+ self.controllers = [line.split()[1] for line in node_details
+ if line.startswith("ctl")]
+ self.computes = [line.split()[1] for line in node_details
+ if line.startswith("cmp")]
+
+ self.log.info('controller_ips:%s' % self.controllers)
+ self.log.info('compute_ips:%s' % self.computes)
def get_host_ip_from_hostname(self, hostname):
command = "sudo salt --out yaml '%s*' " \
@@ -70,21 +128,80 @@ class McpInstaller(BaseInstaller):
def set_apply_patches(self):
self.log.info('Set apply patches start......')
+ fenix_files = None
+ set_scripts = [self.cm_set_script]
+ thrs = []
+
+ restart_cmd = 'sudo systemctl restart' \
+ ' ceilometer-agent-notification.service'
+
+ if self.conf.test_case != 'fault_management':
+ if is_fenix(self.conf):
+ set_scripts.append(self.fe_set_script)
+ testdir = get_doctor_test_root_dir()
+ fenix_files = ["Dockerfile", "run"]
+ restart_cmd += ' nova-scheduler.service'
+ set_scripts.append(self.nc_set_compute_script)
- restart_cm_cmd = 'sudo service ceilometer-agent-notification restart'
for node_ip in self.controllers:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
- self.controller_clients.append(client)
- self._run_apply_patches(client,
- restart_cm_cmd,
- [self.cm_set_script])
+ if fenix_files is not None:
+ for fenix_file in fenix_files:
+ src_file = '{0}/{1}/{2}'.format(testdir,
+ 'admin_tool/fenix',
+ fenix_file)
+ client.scp(src_file, fenix_file)
+ thrs.append(self._run_apply_patches(client,
+ restart_cmd,
+ set_scripts,
+ python=self.python))
+ time.sleep(5)
+
+ self.log.info('Set apply patches start......')
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd = 'sudo systemctl restart nova-compute.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ thrs.append(self._run_apply_patches(
+ client,
+ restart_cmd,
+ [self.nc_set_compute_script],
+ python=self.python))
+ time.sleep(5)
+ # If Fenix container ir build, it needs to be ready before continue
+ for thr in thrs:
+ thr.join()
def restore_apply_patches(self):
self.log.info('restore apply patches start......')
- restart_cm_cmd = 'sudo service ceilometer-agent-notification restart'
- for client in self.controller_clients:
+ restore_scripts = [self.cm_restore_script]
+
+ restore_scripts.append(self.ac_restore_script)
+ restart_cmd = 'sudo systemctl restart' \
+ ' ceilometer-agent-notification.service'
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd += ' nova-scheduler.service'
+ restore_scripts.append(self.nc_restore_compute_script)
+
+ for node_ip in self.controllers:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
self._run_apply_patches(client,
- restart_cm_cmd,
- [self.cm_restore_script])
+ restart_cmd,
+ restore_scripts,
+ python=self.python)
+
+ if self.conf.test_case != 'fault_management':
+ restart_cmd = 'sudo systemctl restart nova-compute.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(
+ client, restart_cmd,
+ [self.nc_restore_compute_script],
+ python=self.python)
diff --git a/doctor_tests/main.py b/doctor_tests/main.py
index 438d8324..7573faec 100644
--- a/doctor_tests/main.py
+++ b/doctor_tests/main.py
@@ -1,5 +1,5 @@
##############################################################################
-# Copyright (c) 2017 ZTE Corporation and others.
+# Copyright (c) 2019 ZTE Corporation and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
@@ -43,7 +43,6 @@ class DoctorTest(object):
def setup(self):
# prepare the cloud env
self.installer.setup()
-
# preparing VM image...
self.image.create()
@@ -51,37 +50,50 @@ class DoctorTest(object):
self.user.create()
def test_fault_management(self):
- try:
- LOG.info('doctor fault management test starting.......')
-
- self.fault_management = \
- FaultManagement(self.conf, self.installer, self.user, LOG)
-
- # prepare test env
- self.fault_management.setup()
-
- # wait for aodh alarms are updated in caches for event evaluator,
- # sleep time should be larger than event_alarm_cache_ttl
- # (default 60)
- # (tojuvone) Fraser currently needs 120
- time.sleep(120)
-
- # injecting host failure...
- # NOTE (umar) add INTERFACE_NAME logic to host injection
- self.fault_management.start()
- time.sleep(30)
-
- # verify the test results
- # NOTE (umar) copy remote monitor.log file when monitor=collectd
- self.fault_management.check_host_status('down')
- self.fault_management.check_notification_time()
-
- except Exception as e:
- LOG.error('doctor fault management test failed, '
- 'Exception=%s' % e)
- sys.exit(1)
- finally:
- self.fault_management.cleanup()
+ retry = 2
+ # Retry once if notified_time is None
+ while retry > 0:
+ try:
+ self.fault_management = None
+ LOG.info('doctor fault management test starting.......')
+ transport_url = self.installer.get_transport_url()
+ self.fault_management = \
+ FaultManagement(self.conf, self.installer, self.user, LOG,
+ transport_url)
+
+ # prepare test env
+ self.fault_management.setup()
+
+ # wait for aodh alarms are updated in caches for event
+ # evaluator,sleep time should be larger than
+ # event_alarm_cache_ttl (default 60)
+ # (tojuvone) Fraser currently needs 120
+ time.sleep(120)
+
+ # injecting host failure...
+ # NOTE (umar) add INTERFACE_NAME logic to host injection
+ self.fault_management.start()
+ time.sleep(30)
+
+ # verify the test results
+ # NOTE (umar) copy remote monitor.log file when
+ # monitor=collectd
+ self.fault_management.check_host_status('down')
+ self.fault_management.check_notification_time()
+ retry = 0
+
+ except Exception as e:
+ LOG.error('doctor fault management test failed, '
+ 'Exception=%s' % e)
+ if 'notified_time=None' in str(e):
+ retry -= 1
+ LOG.info('doctor fault management retry')
+ continue
+ LOG.error(format_exc())
+ sys.exit(1)
+ finally:
+ if self.fault_management is not None:
+ self.fault_management.cleanup()
def _amount_compute_nodes(self):
services = self.nova.services.list(binary='nova-compute')
@@ -94,11 +106,12 @@ class DoctorTest(object):
LOG.info('not enough compute nodes, skipping doctor '
'maintenance test')
return
- elif self.conf.installer.type != 'apex':
+ elif self.conf.installer.type not in ['apex', 'fuel', 'devstack']:
LOG.info('not supported installer, skipping doctor '
'maintenance test')
return
try:
+ maintenance = None
LOG.info('doctor maintenance test starting.......')
trasport_url = self.installer.get_transport_url()
maintenance = Maintenance(trasport_url, self.conf, LOG)
@@ -120,7 +133,8 @@ class DoctorTest(object):
LOG.error(format_exc())
sys.exit(1)
finally:
- maintenance.cleanup_maintenance()
+ if maintenance is not None:
+ maintenance.cleanup_maintenance()
def run(self):
"""run doctor tests"""
@@ -143,6 +157,7 @@ class DoctorTest(object):
% function)
except Exception as e:
LOG.error('doctor test failed, Exception=%s' % e)
+ LOG.error(format_exc())
sys.exit(1)
finally:
self.cleanup()
diff --git a/doctor_tests/scenario/fault_management.py b/doctor_tests/scenario/fault_management.py
index ee3bf5f1..0271dffe 100644
--- a/doctor_tests/scenario/fault_management.py
+++ b/doctor_tests/scenario/fault_management.py
@@ -40,7 +40,7 @@ sleep 1
class FaultManagement(object):
- def __init__(self, conf, installer, user, log):
+ def __init__(self, conf, installer, user, log, transport_url):
self.conf = conf
self.log = log
self.user = user
@@ -55,7 +55,7 @@ class FaultManagement(object):
self.network = Network(self.conf, log)
self.instance = Instance(self.conf, log)
self.alarm = Alarm(self.conf, log)
- self.inspector = get_inspector(self.conf, log)
+ self.inspector = get_inspector(self.conf, log, transport_url)
self.monitor = get_monitor(self.conf,
self.inspector.get_inspector_url(),
log)
@@ -111,7 +111,10 @@ class FaultManagement(object):
server = servers.get(vm_name)
if not server:
raise Exception('Can not find instance: vm_name(%s)' % vm_name)
- host_name = server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname')
+ # use hostname without domain name which is mapped to the cell
+ hostname = \
+ server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname')
+ host_name = hostname.split('.')[0]
host_ip = self.installer.get_host_ip_from_hostname(host_name)
self.log.info('Get host info(name:%s, ip:%s) which vm(%s) launched at'
@@ -180,9 +183,12 @@ class FaultManagement(object):
notification_time = \
self.consumer.notified_time - \
self.monitor.detected_time
+
+ self.log.info('doctor fault management notification_time=%s'
+ % notification_time)
+
if notification_time < 1 and notification_time > 0:
- self.log.info('doctor fault management test successfully,'
- 'notification_time=%s' % notification_time)
+ self.log.info('doctor fault management test successfully')
else:
if self.conf.profiler_type:
self.log.info('run doctor fault management profile.......')
@@ -206,6 +212,10 @@ class FaultManagement(object):
detected = self.monitor.detected_time
notified = self.consumer.notified_time
+ if None in [vmdown, hostdown, detected, notified]:
+ self.log.info('one of the time for profiler is None, return')
+ return
+
# TODO(yujunz) check the actual delay to verify time sync status
# expected ~1s delay from $trigger to $linkdown
relative_start = linkdown
diff --git a/doctor_tests/scenario/maintenance.py b/doctor_tests/scenario/maintenance.py
index 54244d79..e6cdcccd 100644
--- a/doctor_tests/scenario/maintenance.py
+++ b/doctor_tests/scenario/maintenance.py
@@ -1,5 +1,5 @@
##############################################################################
-# Copyright (c) 2018 Nokia Corporation and others.
+# Copyright (c) 2019 Nokia Corporation and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
@@ -28,15 +28,25 @@ class Maintenance(object):
def __init__(self, trasport_url, conf, log):
self.conf = conf
self.log = log
+ self.admin_session = get_session()
self.keystone = keystone_client(
self.conf.keystone_version, get_session())
self.nova = nova_client(conf.nova_version, get_session())
auth = get_identity_auth(project=self.conf.doctor_project)
self.neutron = neutron_client(get_session(auth=auth))
self.stack = Stack(self.conf, self.log)
- self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
+ if self.conf.installer.type == "devstack":
+ self.endpoint_ip = trasport_url.split("@", 1)[1].split(":", 1)[0]
+ else:
+ self.endpoint_ip = self.conf.admin_tool.ip
+ self.endpoint = "http://%s:12347/" % self.endpoint_ip
+ if self.conf.admin_tool.type == 'sample':
+ self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
+ self.endpoint += 'maintenance'
+ else:
+ self.endpoint += 'v1/maintenance'
self.app_manager = get_app_manager(self.stack, self.conf, self.log)
- self.inspector = get_inspector(self.conf, self.log)
+ self.inspector = get_inspector(self.conf, self.log, trasport_url)
def get_external_network(self):
ext_net = None
@@ -64,8 +74,16 @@ class Maintenance(object):
raise Exception('not enough vcpus (%d) on %s' %
(vcpus, hostname))
if vcpus_used > 0:
- raise Exception('%d vcpus used on %s'
- % (vcpus_used, hostname))
+ if self.conf.test_case == 'all':
+ # VCPU might not yet be free after fault_management test
+ self.log.info('%d vcpus used on %s, retry...'
+ % (vcpus_used, hostname))
+ time.sleep(15)
+ hvisor = self.nova.hypervisors.get(hvisor.id)
+ vcpus_used = hvisor.__getattr__('vcpus_used')
+ if vcpus_used > 0:
+ raise Exception('%d vcpus used on %s'
+ % (vcpus_used, hostname))
if prev_vcpus != 0 and prev_vcpus != vcpus:
raise Exception('%d vcpus on %s does not match to'
'%d on %s'
@@ -110,9 +128,14 @@ class Maintenance(object):
parameters=parameters,
files=files)
- self.admin_tool.start()
- self.app_manager.start()
+ if self.conf.admin_tool.type == 'sample':
+ self.admin_tool.start()
+ else:
+ # TBD Now we expect Fenix is running in self.conf.admin_tool.port
+ pass
+ # Inspector before app_manager, as floating ip might come late
self.inspector.start()
+ self.app_manager.start()
def start_maintenance(self):
self.log.info('start maintenance.......')
@@ -121,22 +144,49 @@ class Maintenance(object):
for hvisor in hvisors:
hostname = hvisor.__getattr__('hypervisor_hostname')
maintenance_hosts.append(hostname)
-
- url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
- # let's start maintenance 20sec from now, so projects will have
- # time to ACK to it before that
- maintenance_at = (datetime.datetime.utcnow() +
- datetime.timedelta(seconds=20)
- ).strftime('%Y-%m-%d %H:%M:%S')
- data = {'hosts': maintenance_hosts,
- 'state': 'MAINTENANCE',
- 'maintenance_at': maintenance_at,
- 'metadata': {'openstack_version': 'Pike'}}
+ url = self.endpoint
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
-
- ret = requests.post(url, data=json.dumps(data), headers=headers)
+ if self.conf.admin_tool.type == 'fenix':
+ headers['X-Auth-Token'] = self.admin_session.get_token()
+ self.log.info('url %s headers %s' % (url, headers))
+ retries = 12
+ ret = None
+ while retries > 0:
+ # let's start maintenance 20sec from now, so projects will have
+ # time to ACK to it before that
+ maintenance_at = (datetime.datetime.utcnow() +
+ datetime.timedelta(seconds=30)
+ ).strftime('%Y-%m-%d %H:%M:%S')
+
+ data = {'state': 'MAINTENANCE',
+ 'maintenance_at': maintenance_at,
+ 'metadata': {'openstack_version': 'Train'}}
+
+ if self.conf.app_manager.type == 'vnfm':
+ data['workflow'] = 'vnf'
+ else:
+ data['workflow'] = 'default'
+
+ if self.conf.admin_tool.type == 'sample':
+ data['hosts'] = maintenance_hosts
+ else:
+ data['hosts'] = []
+ try:
+ ret = requests.post(url, data=json.dumps(data),
+ headers=headers)
+ except Exception:
+ if retries == 0:
+ raise Exception('admin tool did not respond in 120s')
+ else:
+ self.log.info('admin tool not ready, retry in 10s')
+ retries = retries - 1
+ time.sleep(10)
+ continue
+ break
+ if not ret:
+ raise Exception("admin tool did not respond")
if ret.status_code != 200:
raise Exception(ret.text)
return ret.json()['session_id']
@@ -144,48 +194,56 @@ class Maintenance(object):
def remove_maintenance_session(self, session_id):
self.log.info('remove maintenance session %s.......' % session_id)
- url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
+ url = ('%s/%s' % (self.endpoint, session_id))
- data = {'state': 'REMOVE_MAINTENANCE_SESSION',
- 'session_id': session_id}
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
- ret = requests.post(url, data=json.dumps(data), headers=headers)
+ if self.conf.admin_tool.type == 'fenix':
+ headers['X-Auth-Token'] = self.admin_session.get_token()
+
+ ret = requests.delete(url, data=None, headers=headers)
if ret.status_code != 200:
raise Exception(ret.text)
def get_maintenance_state(self, session_id):
- url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
- data = {'session_id': session_id}
+
+ url = ('%s/%s' % (self.endpoint, session_id))
+
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'}
- ret = requests.get(url, data=json.dumps(data), headers=headers)
+
+ if self.conf.admin_tool.type == 'fenix':
+ headers['X-Auth-Token'] = self.admin_session.get_token()
+
+ ret = requests.get(url, data=None, headers=headers)
if ret.status_code != 200:
raise Exception(ret.text)
return ret.json()['state']
def wait_maintenance_complete(self, session_id):
- retries = 60
+ retries = 90
state = None
- time.sleep(600)
- while state != 'MAINTENANCE_COMPLETE' and retries > 0:
+ time.sleep(300)
+ while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
+ retries > 0):
time.sleep(10)
state = self.get_maintenance_state(session_id)
retries = retries - 1
- if retries == 0 and state != 'MAINTENANCE_COMPLETE':
- raise Exception('maintenance %s not completed within 20min, status'
- ' %s' % (session_id, state))
- elif state == 'MAINTENANCE_COMPLETE':
- self.log.info('maintenance %s %s' % (session_id, state))
- self.remove_maintenance_session(session_id)
- elif state == 'MAINTENANCE_FAILED':
+ self.remove_maintenance_session(session_id)
+ self.log.info('maintenance %s ended with state %s' %
+ (session_id, state))
+ if state == 'MAINTENANCE_FAILED':
raise Exception('maintenance %s failed' % session_id)
+ elif retries == 0:
+ raise Exception('maintenance %s not completed within 20min' %
+ session_id)
def cleanup_maintenance(self):
- self.admin_tool.stop()
+ if self.conf.admin_tool.type == 'sample':
+ self.admin_tool.stop()
self.app_manager.stop()
self.inspector.stop()
self.log.info('stack delete start.......')
diff --git a/doctor_tests/stack.py b/doctor_tests/stack.py
index 688c2050..8a921beb 100644
--- a/doctor_tests/stack.py
+++ b/doctor_tests/stack.py
@@ -44,7 +44,7 @@ class Stack(object):
action_failed = '%s_FAILED' % action
status = action_in_progress
- stack_retries = 150
+ stack_retries = 160
while status == action_in_progress and stack_retries > 0:
time.sleep(2)
try:
@@ -88,7 +88,19 @@ class Stack(object):
template=template,
parameters=parameters)
self.stack_id = stack['stack']['id']
- self.wait_stack_create()
+ try:
+ self.wait_stack_create()
+ except Exception:
+ # It might not always work at first
+ self.log.info('retry creating maintenance stack.......')
+ self.delete()
+ time.sleep(5)
+ stack = self.heat.stacks.create(stack_name=self.stack_name,
+ files=files,
+ template=template,
+ parameters=parameters)
+ self.stack_id = stack['stack']['id']
+ self.wait_stack_create()
def update(self, stack_name, stack_id, template, parameters={}, files={}):
self.heat.stacks.update(stack_name=stack_name,
diff --git a/doctor_tests/user.py b/doctor_tests/user.py
index 29aa004b..2cd9757f 100644
--- a/doctor_tests/user.py
+++ b/doctor_tests/user.py
@@ -129,7 +129,6 @@ class User(object):
def _add_user_role_in_project(self, is_admin=False):
"""add test user with test role in test project"""
-
project = self.projects.get(self.conf.doctor_project)
user_name = 'admin' if is_admin else self.conf.doctor_user
diff --git a/requirements.txt b/requirements.txt
index 50ce80fc..1eab2a04 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,22 +1,22 @@
# The order of packages is significant, because pip processes them in the order
# of appearance. Changing the order has an impact on the overall integration
# process, which may cause wedges in the gate later.
-Flask!=0.11,<1.0,>=0.10 # BSD
-paramiko>=2.0.0 # LGPLv2.1+
+Flask!=0.11 # BSD
+paramiko # LGPLv2.1+
scp
-requests>=2.14.2 # Apache-2.0
-oslo.config>=5.2.0 # Apache-2.0
-python-openstackclient>=3.12.0 # Apache-2.0
-oslo.messaging>=5.30.2 # Apache-2.0
-oslo.versionedobjects>=1.26.1 # Apache-2.0
-python-ceilometerclient>=2.5.0 # Apache-2.0
-aodhclient>=0.9.0 # Apache-2.0
-python-keystoneclient>=3.8.0 # Apache-2.0
-python-neutronclient>=6.7.0 # Apache-2.0
-python-novaclient>=9.1.0 # Apache-2.0
-python-congressclient<2000,>=1.9.0 # Apache-2.0
-python-glanceclient>=2.8.0 # Apache-2.0
-python-vitrageclient>=2.0.0 # Apache-2.0
-virtualenv>=14.0.6 # MIT
-python-heatclient>=1.8.2 # Apache-2.0
+requests # Apache-2.0
+oslo.config!=4.3.0,!=4.4.0 # Apache-2.0
+python-openstackclient # Apache-2.0
+oslo.messaging # Apache-2.0
+oslo.versionedobjects # Apache-2.0
+python-ceilometerclient # Apache-2.0
+aodhclient # Apache-2.0
+python-keystoneclient!=2.1.0 # Apache-2.0
+python-neutronclient # Apache-2.0
+python-novaclient # Apache-2.0
+python-congressclient<2000 # Apache-2.0
+python-glanceclient # Apache-2.0
+python-vitrageclient # Apache-2.0
+virtualenv # MIT
+python-heatclient # Apache-2.0
flake8<2.6.0,>=2.5.4 # MIT
diff --git a/tox.ini b/tox.ini
index 832a1dae..2937c329 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,12 +1,12 @@
[tox]
minversion = 2.3.1
-envlist = py34, pep8
+envlist = py36,pep8,docs,docs-linkcheck
skipsdist = True
[testenv]
usedevelop = True
-install_command = pip install \
- -chttps://git.openstack.org/cgit/openstack/requirements/plain/upper-constraints.txt?h=stable/pike \
+install_command = pip3 install \
+ -chttps://git.openstack.org/cgit/openstack/requirements/plain/upper-constraints.txt?h=stable/stein \
{opts} {packages}
setenv = VIRTUAL_ENV={envdir}
deps = -r{toxinidir}/requirements.txt
@@ -29,10 +29,13 @@ passenv =
INSTALLER_TYPE
INSTALLER_IP
INSPECTOR_TYPE
+ ADMIN_TOOL_TYPE
TEST_CASE
SSH_KEY
+ APP_MANAGER_TYPE
changedir = {toxinidir}/doctor_tests
commands = doctor-test
+ /usr/bin/find {toxinidir} -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete
[testenv:pep8]
changedir = {toxinidir}
@@ -49,3 +52,16 @@ enable-extensions=H106,H203
builtins = _
filename = *.py,app.wsgi
exclude=.venv,.git,.tox,dist,doc,*lib/python*,*egg,build,tests
+
+[testenv:docs]
+changedir = {toxinidir}
+deps = -rdocs/requirements.txt
+commands =
+ sphinx-build -b html -n -d {envtmpdir}/doctrees ./docs/ {toxinidir}/docs/_build/html
+ echo "Generated docs available in {toxinidir}/docs/_build/html"
+whitelist_externals = echo
+
+[testenv:docs-linkcheck]
+changedir = {toxinidir}
+deps = -rdocs/requirements.txt
+commands = sphinx-build -b linkcheck -d {envtmpdir}/doctrees ./docs/ {toxinidir}/docs/_build/linkcheck