From 72a1f8c92f1692f1ea8dcb5bc706ec9939c30e0a Mon Sep 17 00:00:00 2001
From: Tomi Juvonen <tomi.juvonen@nokia.com>
Date: Tue, 13 Oct 2020 16:37:57 +0300
Subject: Documents up-to-date

According to document guidelines
Release notes
ETSI FEAT03 support and other minor enhancements

JIRA: DOCTOR-143

Signed-off-by: Tomi Juvonen <tomi.juvonen@nokia.com>
Change-Id: Iefa74004dfada376d1ab05c0149029a26f822275
---
 docs/release/configguide/feature.configuration.rst |  54 +++++++++
 docs/release/configguide/index.rst                 |   6 +-
 docs/release/index.rst                             |  12 +-
 docs/release/installation/index.rst                |  13 +++
 docs/release/installation/installation.rst         |  44 +++++++
 docs/release/release-notes/release-notes.rst       |  61 ++++++----
 docs/release/release-notes/releasenotes_iruya.rst  | 129 +++++++++++++++++++++
 .../fault_management/fault_management.rst          |  90 ++++++++++++++
 .../maintenance/images/Fault-management-design.png | Bin 0 -> 237110 bytes
 docs/release/scenarios/maintenance/images/LICENSE  |  14 +++
 .../maintenance/images/Maintenance-design.png      | Bin 0 -> 316640 bytes
 .../maintenance/images/Maintenance-workflow.png    | Bin 0 -> 81286 bytes
 docs/release/scenarios/maintenance/maintenance.rst | 120 +++++++++++++++++++
 docs/release/userguide/get-valid-server-state.rst  | 125 ++++++++++++++++++++
 docs/release/userguide/index.rst                   |   3 +
 docs/release/userguide/mark-host-down_manual.rst   | 122 +++++++++++++++++++
 docs/release/userguide/monitors.rst                |  37 ++++++
 17 files changed, 801 insertions(+), 29 deletions(-)
 create mode 100644 docs/release/installation/index.rst
 create mode 100644 docs/release/installation/installation.rst
 create mode 100644 docs/release/release-notes/releasenotes_iruya.rst
 create mode 100644 docs/release/scenarios/fault_management/fault_management.rst
 create mode 100644 docs/release/scenarios/maintenance/images/Fault-management-design.png
 create mode 100644 docs/release/scenarios/maintenance/images/LICENSE
 create mode 100644 docs/release/scenarios/maintenance/images/Maintenance-design.png
 create mode 100644 docs/release/scenarios/maintenance/images/Maintenance-workflow.png
 create mode 100644 docs/release/scenarios/maintenance/maintenance.rst
 create mode 100644 docs/release/userguide/get-valid-server-state.rst
 create mode 100644 docs/release/userguide/mark-host-down_manual.rst
 create mode 100644 docs/release/userguide/monitors.rst

(limited to 'docs/release')

diff --git a/docs/release/configguide/feature.configuration.rst b/docs/release/configguide/feature.configuration.rst
index 64928eea..8fbff50e 100644
--- a/docs/release/configguide/feature.configuration.rst
+++ b/docs/release/configguide/feature.configuration.rst
@@ -159,3 +159,57 @@ You can configure the Sample Monitor as follows (Example for Apex deployment):
         "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
 
 **Collectd Monitor**
+
+OpenStack components
+====================
+
+In OPNFV and with Doctor testing you can have all OpenStack components configured
+as needed. Here is sample of the needed configuration modifications.
+
+Ceilometer
+----------
+
+/etc/ceilometer/event_definitions.yaml:
+# Maintenance use case needs new alarm definitions to be added
+- event_type: maintenance.scheduled
+    traits:
+      actions_at:
+        fields: payload.maintenance_at
+        type: datetime
+      allowed_actions:
+        fields: payload.allowed_actions
+      host_id:
+        fields: payload.host_id
+      instances:
+        fields: payload.instances
+      metadata:
+        fields: payload.metadata
+      project_id:
+        fields: payload.project_id
+      reply_url:
+        fields: payload.reply_url
+      session_id:
+        fields: payload.session_id
+      state:
+        fields: payload.state
+- event_type: maintenance.host
+    traits:
+      host:
+        fields: payload.host
+      project_id:
+        fields: payload.project_id
+      session_id:
+        fields: payload.session_id
+      state:
+        fields: payload.state
+
+/etc/ceilometer/event_pipeline.yaml:
+# Maintenance and Fault management both needs these to be added
+    - notifier://
+    - notifier://?topic=alarm.all
+
+Nova
+----
+
+/etc/nova/nova.conf
+cpu_allocation_ratio=1.0
diff --git a/docs/release/configguide/index.rst b/docs/release/configguide/index.rst
index b1e7c33d..c2331115 100644
--- a/docs/release/configguide/index.rst
+++ b/docs/release/configguide/index.rst
@@ -3,9 +3,9 @@
 
 .. _doctor-configguide:
 
-*************************
-Doctor Installation Guide
-*************************
+**************************
+Doctor Configuration Guide
+**************************
 
 .. toctree::
     :maxdepth: 2
diff --git a/docs/release/index.rst b/docs/release/index.rst
index 8a1bf405..67eb4c5f 100644
--- a/docs/release/index.rst
+++ b/docs/release/index.rst
@@ -2,14 +2,18 @@
 .. http://creativecommons.org/licenses/by/4.0
 .. (c) 2017 OPNFV.
 
+.. _release:
 
-======
-Doctor
-======
+=======
+Release
+=======
 
 .. toctree::
    :maxdepth: 2
 
+   ./configguide/index.rst
    ./installation/index.rst
+   ./release-notes/index.rst
+   ./scenarios/fault_management/fault_management.rst
+   ./scenarios/maintenance/maintenance.rst
    ./userguide/index.rst
-
diff --git a/docs/release/installation/index.rst b/docs/release/installation/index.rst
new file mode 100644
index 00000000..f6527e5d
--- /dev/null
+++ b/docs/release/installation/index.rst
@@ -0,0 +1,13 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+.. _doctor-configguide:
+
+*************************
+Doctor Installation Guide
+*************************
+
+.. toctree::
+    :maxdepth: 2
+
+    installation.rst
diff --git a/docs/release/installation/installation.rst b/docs/release/installation/installation.rst
new file mode 100644
index 00000000..564f19fd
--- /dev/null
+++ b/docs/release/installation/installation.rst
@@ -0,0 +1,44 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Doctor Installation
+====================
+
+You can clone doctor project in OPNFV installer jumphost or if you are not
+in OPNFV environment you can clone Doctor to DevStack controller node
+
+git clone https://gerrit.opnfv.org/gerrit/doctor
+
+In DevStack controller here is a sample of including what Doctor testing
+will require for sample fault management testing and for maintenance
+testing using Fenix
+
+.. code-block:: bash
+
+    git clone https://github.com/openstack/devstack -b stable/train
+
+.. code-block:: bash
+
+    cd devstack vi local.conf
+
+.. code-block:: bash
+
+    [[local|localrc]]
+    GIT_BASE=https://git.openstack.org
+    HOST_IP=<host_ip>
+    ADMIN_PASSWORD=admin
+    DATABASE_PASSWORD=admin
+    RABBIT_PASSWORD=admin
+    SERVICE_PASSWORD=admin
+    LOGFILE=/opt/stack/stack.sh.log
+
+    PUBLIC_INTERFACE=eth0
+
+    CEILOMETER_EVENT_ALARM=True
+
+    ENABLED_SERVICES=key,rabbit,mysql,fenix-engine,fenix-api,aodh-evaluator,aodh-notifier,aodh-api
+
+    enable_plugin ceilometer https://git.openstack.org/openstack/ceilometer stable/train
+    enable_plugin aodh https://git.openstack.org/openstack/aodh stable/train
+    enable_plugin gnocchi https://github.com/openstack/gnocchi
+    enable_plugin fenix https://opendev.org/x/fenix master
diff --git a/docs/release/release-notes/release-notes.rst b/docs/release/release-notes/release-notes.rst
index 92775557..b525335e 100644
--- a/docs/release/release-notes/release-notes.rst
+++ b/docs/release/release-notes/release-notes.rst
@@ -7,33 +7,41 @@ This document provides the release notes for Iruya version of Doctor.
 Important notes
 ===============
 
-In Iruya release there has not been many changes.
-
-All testing is now being made with Fuel installer. Maintenance use case
-is now only tested against latest upstream Fenix. Only sample inspector is
-tested as Fuel do not support Vitrage or Congress.
+Jerma release has mainly been for finalizing maintenance use case testing
+supporting the ETSI FEAT03 defined interactino between VNFM and infrastructure.
+This is mainly to have infrastructure maintenance and upgrade operations
+opttimized as fast as they can while keeping VNFs on top with zero impact
+on their service.
+
+Further more this is the final release of Doctor and the more deep testing is
+moving more to upstream projects like Fenix for the maintenance. Also in
+this release we have made sure that all Doctor testing and any deeper testing
+with ehe upstream projects can be done in DevStack. This also makes DevStack
+the most important installer.
 
 Summary
 =======
 
-Iruya Doctor framework uses OpenStack Stein integrated into its test cases.
+Jerma Doctor framework uses OpenStack Train integrated into its test cases.
 
 Release Data
 ============
 
 Doctor changes
 
-- Maintenance use case updated to support latest version of Fenix running
-  in container on controller node
-- Maintenance use case now support Fuel installer
-- Doctor updated to use OpenStack Stein and only python 3.6
-- Testing only sample inspector as lacking installer support for
-  Vitrage and Congress
+- Maintenance use case updated to support latest version of Fenix.
+- Maintenance use case now supports ETSI FEAT03 optimization with Fenix.
+- Doctor testing is now preferred to be done in DevStack environment
+  where one can easily select OpenStack release from Rocky to Ussuri to
+  test Doctor functionality. Latest OPNFV Fuel can also be used for the
+  OpenStack version it supports.
 
-Releng changes
+Doctor CI
 
-- Doctor testing running with python 3.6 and with sample inspector
-- Doctor is only tested with Fuel installer
+- Doctor tested with fuel installer.
+- Fault management use case is tested with sample inspector.
+- Maintenance use case is tested with sample implementation and towards
+  the latest Fenix version. The includes the new ETSI FEAT03 optimization.
 
 Version change
 ^^^^^^^^^^^^^^
@@ -41,12 +49,13 @@ Version change
 Module version changes
 ~~~~~~~~~~~~~~~~~~~~~~
 
-- OpenStack has changed from Rocky to Stein since previous Hunter release.
+- OpenStack has changed Train
 
 Document version changes
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-N/A
+All documentation is updated to OPNFV unified format according to
+documentation guidelines. Small updates in many documents. 
 
 Reason for version
 ^^^^^^^^^^^^^^^^^^
@@ -56,11 +65,14 @@ N/A
 Feature additions
 ~~~~~~~~~~~~~~~~~
 
-+--------------------+--------------------------------------------------------------+
-| **JIRA REFERENCE** | **SLOGAN**                                                   |
-+--------------------+--------------------------------------------------------------+
-| DOCTOR-134         | Update Doctor maintenance use case to work with latest Fenix |
-+--------------------+--------------------------------------------------------------+
++--------------------+--------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN**                                 |
++--------------------+--------------------------------------------+
+| DOCTOR-137         | VNFM maintenance with ETSI changes         |
++--------------------+--------------------------------------------+
+| DOCTOR-136	     | DevStack support                           |
++--------------------+--------------------------------------------+
+
 
 Deliverables
 ------------
@@ -127,3 +139,8 @@ References
 For more information about the OPNFV Doctor latest work, please see:
 
 https://wiki.opnfv.org/display/doctor/Doctor+Home
+
+Further information about ETSI FEAT03 optimization can be found from Fenix
+Documentation:
+
+https://fenix.readthedocs.io/en/latest
diff --git a/docs/release/release-notes/releasenotes_iruya.rst b/docs/release/release-notes/releasenotes_iruya.rst
new file mode 100644
index 00000000..92775557
--- /dev/null
+++ b/docs/release/release-notes/releasenotes_iruya.rst
@@ -0,0 +1,129 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+This document provides the release notes for Iruya version of Doctor.
+
+Important notes
+===============
+
+In Iruya release there has not been many changes.
+
+All testing is now being made with Fuel installer. Maintenance use case
+is now only tested against latest upstream Fenix. Only sample inspector is
+tested as Fuel do not support Vitrage or Congress.
+
+Summary
+=======
+
+Iruya Doctor framework uses OpenStack Stein integrated into its test cases.
+
+Release Data
+============
+
+Doctor changes
+
+- Maintenance use case updated to support latest version of Fenix running
+  in container on controller node
+- Maintenance use case now support Fuel installer
+- Doctor updated to use OpenStack Stein and only python 3.6
+- Testing only sample inspector as lacking installer support for
+  Vitrage and Congress
+
+Releng changes
+
+- Doctor testing running with python 3.6 and with sample inspector
+- Doctor is only tested with Fuel installer
+
+Version change
+^^^^^^^^^^^^^^
+
+Module version changes
+~~~~~~~~~~~~~~~~~~~~~~
+
+- OpenStack has changed from Rocky to Stein since previous Hunter release.
+
+Document version changes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+Reason for version
+^^^^^^^^^^^^^^^^^^
+
+N/A
+
+Feature additions
+~~~~~~~~~~~~~~~~~
+
++--------------------+--------------------------------------------------------------+
+| **JIRA REFERENCE** | **SLOGAN**                                                   |
++--------------------+--------------------------------------------------------------+
+| DOCTOR-134         | Update Doctor maintenance use case to work with latest Fenix |
++--------------------+--------------------------------------------------------------+
+
+Deliverables
+------------
+
+Software deliverables
+=====================
+
+None
+
+Documentation deliverables
+==========================
+
+https://git.opnfv.org/doctor/tree/docs
+
+Known Limitations, Issues and Workarounds
+=========================================
+
+System Limitations
+^^^^^^^^^^^^^^^^^^
+
+Maintenance test case requirements:
+
+- Minimum number of nodes:   1 Controller, 3 Computes
+- Min number of VCPUs:       2 VCPUs for each compute
+
+Known issues
+^^^^^^^^^^^^
+
+None
+
+Workarounds
+^^^^^^^^^^^
+
+None
+
+Test Result
+===========
+
+Doctor CI results with TEST_CASE='fault_management' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE**                       | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel'                | SUCCESS      |
++--------------------------------------+--------------+
+
+Doctor CI results with TEST_CASE='maintenance' and INSPECTOR_TYPE=sample
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------------+--------------+
+| **TEST-SUITE**                       | **Results:** |
++--------------------------------------+--------------+
+| INSTALLER_TYPE='fuel'                | SUCCESS      |
+| ADMIN_TOOL_TYPE='fenix' *)           |              |
++--------------------------------------+--------------+
+
+*) Sample implementation not updated according to latest upstream Fenix
+   and is currently not being tested.
+
+References
+==========
+
+For more information about the OPNFV Doctor latest work, please see:
+
+https://wiki.opnfv.org/display/doctor/Doctor+Home
diff --git a/docs/release/scenarios/fault_management/fault_management.rst b/docs/release/scenarios/fault_management/fault_management.rst
new file mode 100644
index 00000000..99371201
--- /dev/null
+++ b/docs/release/scenarios/fault_management/fault_management.rst
@@ -0,0 +1,90 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+Running test cases
+""""""""""""""""""
+
+Functest will call the "doctor_tests/main.py" in Doctor to run the test job.
+Doctor testing can also be triggered by tox on OPNFV installer jumphost. Tox
+is normally used for functional, module and coding style testing in Python
+project.
+
+Currently 'MCP' and 'devstack' installer are supported.
+
+
+Fault management use case
+"""""""""""""""""""""""""
+
+* A consumer of the NFVI wants to receive immediate notifications about faults
+  in the NFVI affecting the proper functioning of the virtual resources.
+  Therefore, such faults have to be detected as quickly as possible, and, when
+  a critical error is observed, the affected consumer is immediately informed
+  about the fault and can switch over to the STBY configuration.
+
+The faults to be monitored (and at which detection rate) will be configured by
+the consumer. Once a fault is detected, the Inspector in the Doctor
+architecture will check the resource map maintained by the Controller, to find
+out which virtual resources are affected and then update the resources state.
+The Notifier will receive the failure event requests sent from the Controller,
+and notify the consumer(s) of the affected resources according to the alarm
+configuration.
+
+Detailed workflow information is as follows:
+
+* Consumer(VNFM): (step 0) creates resources (network, server/instance) and an
+  event alarm on state down notification of that server/instance or Neutron
+  port.
+
+* Monitor: (step 1) periodically checks nodes, such as ping from/to each
+  dplane nic to/from gw of node, (step 2) once it fails to send out event
+  with "raw" fault event information to Inspector
+
+* Inspector: when it receives an event, it will (step 3) mark the host down
+  ("mark-host-down"), (step 4) map the PM to VM, and change the VM status to
+  down. In network failure case, also Neutron port is changed to down.
+
+* Controller: (step 5) sends out instance update event to Ceilometer. In network
+  failure case, also Neutron port is changed to down and corresponding event is
+  sent to Ceilometer.
+
+* Notifier: (step 6) Ceilometer transforms and passes the events to AODH,
+  (step 7) AODH will evaluate events with the registered alarm definitions,
+  then (step 8) it will fire the alarm to the "consumer" who owns the
+  instance
+
+* Consumer(VNFM): (step 9) receives the event and (step 10) recreates a new
+  instance
+
+Fault management test case
+""""""""""""""""""""""""""
+
+Functest will call the 'doctor-test' command in Doctor to run the test job.
+
+The following steps are executed:
+
+Firstly, get the installer ip according to the installer type. Then ssh to
+the installer node to get the private key for accessing to the cloud. As
+'fuel' installer, ssh to the controller node to modify nova and ceilometer
+configurations.
+
+Secondly, prepare image for booting VM, then create a test project and test
+user (both default to doctor) for the Doctor tests.
+
+Thirdly, boot a VM under the doctor project and check the VM status to verify
+that the VM is launched completely. Then get the compute host info where the VM
+is launched to verify connectivity to the target compute host. Get the consumer
+ip according to the route to compute ip and create an alarm event in Ceilometer
+using the consumer ip.
+
+Fourthly, the Doctor components are started, and, based on the above preparation,
+a failure is injected to the system, i.e. the network of compute host is
+disabled for 3 minutes. To ensure the host is down, the status of the host
+will be checked.
+
+Finally, the notification time, i.e. the time between the execution of step 2
+(Monitor detects failure) and step 9 (Consumer receives failure notification)
+is calculated.
+
+According to the Doctor requirements, the Doctor test is successful if the
+notification time is below 1 second.
diff --git a/docs/release/scenarios/maintenance/images/Fault-management-design.png b/docs/release/scenarios/maintenance/images/Fault-management-design.png
new file mode 100644
index 00000000..6d98cdec
Binary files /dev/null and b/docs/release/scenarios/maintenance/images/Fault-management-design.png differ
diff --git a/docs/release/scenarios/maintenance/images/LICENSE b/docs/release/scenarios/maintenance/images/LICENSE
new file mode 100644
index 00000000..21a2d03d
--- /dev/null
+++ b/docs/release/scenarios/maintenance/images/LICENSE
@@ -0,0 +1,14 @@
+Copyright 2017 Open Platform for NFV Project, Inc. and its contributors
+
+Open Platform for NFV Project Documentation License
+===================================================
+Any documentation developed by the "Open Platform for NFV Project"
+is licensed under a Creative Commons Attribution 4.0 International License.
+You should have received a copy of the license along with this. If not,
+see <http://creativecommons.org/licenses/by/4.0/>.
+
+Unless required by applicable law or agreed to in writing, documentation
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/docs/release/scenarios/maintenance/images/Maintenance-design.png b/docs/release/scenarios/maintenance/images/Maintenance-design.png
new file mode 100644
index 00000000..8f21db6a
Binary files /dev/null and b/docs/release/scenarios/maintenance/images/Maintenance-design.png differ
diff --git a/docs/release/scenarios/maintenance/images/Maintenance-workflow.png b/docs/release/scenarios/maintenance/images/Maintenance-workflow.png
new file mode 100644
index 00000000..9b65fd59
Binary files /dev/null and b/docs/release/scenarios/maintenance/images/Maintenance-workflow.png differ
diff --git a/docs/release/scenarios/maintenance/maintenance.rst b/docs/release/scenarios/maintenance/maintenance.rst
new file mode 100644
index 00000000..ecfe76b1
--- /dev/null
+++ b/docs/release/scenarios/maintenance/maintenance.rst
@@ -0,0 +1,120 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+Maintenance use case
+""""""""""""""""""""
+
+* A consumer of the NFVI wants to interact with NFVI maintenance, upgrade,
+  scaling and to have graceful retirement. Receiving notifications over these
+  NFVI events and responding to those within given time window, consumer can
+  guarantee zero downtime to his service.
+
+The maintenance use case adds the Doctor platform an `admin tool` and an
+`app manager` component. Overview of maintenance components can be seen in
+:numref:`figure-p2`.
+
+.. figure:: ./images/Maintenance-design.png
+    :name: figure-p2
+    :width: 100%
+
+    Doctor platform components in maintenance use case
+
+In maintenance use case, `app manager` (VNFM) will subscribe to maintenance
+notifications triggered by project specific alarms through AODH. This is the way
+it gets to know different NFVI maintenance, upgrade and scaling operations that
+effect to its instances. The `app manager` can do actions depicted in `green
+color` or tell `admin tool` to do admin actions depicted in `orange color`
+
+Any infrastructure component like `Inspector` can subscribe to maintenance
+notifications triggered by host specific alarms through AODH. Subscribing to the
+notifications needs admin privileges and can tell when a host is out of use as
+in maintenance and when it is taken back to production.
+
+Maintenance test case
+"""""""""""""""""""""
+
+Maintenance test case is currently running in our Apex CI and executed by tox.
+This is because the special limitation mentioned below and also the fact we
+currently have only sample implementation as a proof of concept and we also
+support unofficial OpenStack project Fenix. Environment variable
+TEST_CASE='maintenance' needs to be used when executing "doctor_tests/main.py"
+and ADMIN_TOOL_TYPE='fenix' if want to test with Fenix instead of sample
+implementation. Test case workflow can be seen in :numref:`figure-p3`.
+
+.. figure:: ./images/Maintenance-workflow.png
+    :name: figure-p3
+    :width: 100%
+
+    Maintenance test case workflow
+
+In test case all compute capacity will be consumed with project (VNF) instances.
+For redundant services on instances and an empty compute needed for maintenance,
+test case will need at least 3 compute nodes in system. There will be 2
+instances on each compute, so minimum number of VCPUs is also 2. Depending on
+how many compute nodes there is application will always have 2 redundant
+instances (ACT-STDBY) on different compute nodes and rest of the compute
+capacity will be filled with non-redundant instances.
+
+For each project specific maintenance message there is a time window for
+`app manager` to make any needed action. This will guarantee zero
+down time for his service. All replies back are done by calling `admin tool` API
+given in the message.
+
+The following steps are executed:
+
+Infrastructure admin will call `admin tool` API to trigger maintenance for
+compute hosts having instances belonging to a VNF.
+
+Project specific `MAINTENANCE` notification is triggered to tell `app manager`
+that his instances are going to hit by infrastructure maintenance at a specific
+point in time. `app manager` will call `admin tool` API to answer back
+`ACK_MAINTENANCE`.
+
+When the time comes to start the actual maintenance workflow in `admin tool`,
+a `DOWN_SCALE` notification is triggered as there is no empty compute node for
+maintenance (or compute upgrade). Project receives corresponding alarm and scales
+down instances and call `admin tool` API to answer back `ACK_DOWN_SCALE`.
+
+As it might happen instances are not scaled down (removed) from a single
+compute node, `admin tool` might need to figure out what compute node should be
+made empty first and send `PREPARE_MAINTENANCE` to project telling which instance
+needs to be migrated to have the needed empty compute. `app manager` makes sure
+he is ready to migrate instance and call `admin tool` API to answer back
+`ACK_PREPARE_MAINTENANCE`. `admin tool` will make the migration and answer
+`ADMIN_ACTION_DONE`, so `app manager` knows instance can be again used.
+
+:numref:`figure-p3` has next a light blue section of actions to be done for each
+compute. However as we now have one empty compute, we will maintain/upgrade that
+first. So on first round, we can straight put compute in maintenance and send
+admin level host specific `IN_MAINTENANCE` message. This is caught by `Inspector`
+to know host is down for maintenance. `Inspector` can now disable any automatic
+fault management actions for the host as it can be down for a purpose. After
+`admin tool` has completed maintenance/upgrade `MAINTENANCE_COMPLETE` message
+is sent to tell host is back in production.
+
+Next rounds we always have instances on compute, so we need to have
+`PLANNED_MAINTANANCE` message to tell that those instances are now going to hit
+by maintenance. When `app manager` now receives this message, he knows instances
+to be moved away from compute will now move to already maintained/upgraded host.
+In test case no upgrade is done on application side to upgrade instances
+according to new infrastructure capabilities, but this could be done here as
+this information is also passed in the message. This might be just upgrading
+some RPMs, but also totally re-instantiating instance with a new flavor. Now if
+application runs an active side of a redundant instance on this compute,
+a switch over will be done. After `app manager` is ready he will call
+`admin tool` API to answer back `ACK_PLANNED_MAINTENANCE`. In test case the
+answer is `migrate`, so `admin tool` will migrate instances and reply
+`ADMIN_ACTION_DONE` and then `app manager` knows instances can be again used.
+Then we are ready to make the actual maintenance as previously trough
+`IN_MAINTENANCE` and `MAINTENANCE_COMPLETE` steps.
+
+After all computes are maintained, `admin tool` can send `MAINTENANCE_COMPLETE`
+to tell maintenance/upgrade is now complete. For `app manager` this means he
+can scale back to full capacity.
+
+There is currently sample implementation on VNFM and test case. In
+infrastructure side there is sample implementation of 'admin_tool' and
+there is also support for the OpenStack Fenix that extends the use case to
+support 'ETSI FEAT03' for VNFM interaction and to optimize the whole
+infrastructure mainteannce and upgrade.
diff --git a/docs/release/userguide/get-valid-server-state.rst b/docs/release/userguide/get-valid-server-state.rst
new file mode 100644
index 00000000..824ea3c2
--- /dev/null
+++ b/docs/release/userguide/get-valid-server-state.rst
@@ -0,0 +1,125 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+======================
+Get valid server state
+======================
+
+Related Blueprints:
+===================
+
+https://blueprints.launchpad.net/nova/+spec/get-valid-server-state
+
+Problem description
+===================
+
+Previously when the owner of a VM has queried his VMs, he has not received
+enough state information, states have not changed fast enough in the VIM and
+they have not been accurate in some scenarios. With this change this gap is now
+closed.
+
+A typical case is that, in case of a fault of a host, the user of a high
+availability service running on top of that host, needs to make an immediate
+switch over from the faulty host to an active standby host. Now, if the compute
+host is forced down [1] as a result of that fault, the user has to be notified
+about this state change such that the user can react accordingly. Similarly,
+a change of the host state to "maintenance" should also be notified to the
+users.
+
+What is changed
+===============
+
+A new ``host_status`` parameter is added to the ``/servers/{server_id}`` and
+``/servers/detail`` endpoints in microversion 2.16. By this new parameter
+user can get additional state information about the host.
+
+``host_status`` possible values where next value in list can override the
+previous:
+
+- ``UP`` if nova-compute is up.
+- ``UNKNOWN`` if nova-compute status was not reported by servicegroup driver
+  within configured time period. Default is within 60 seconds,
+  but can be changed with ``service_down_time`` in nova.conf.
+- ``DOWN`` if nova-compute was forced down.
+- ``MAINTENANCE`` if nova-compute was disabled. MAINTENANCE in API directly
+  means nova-compute service is disabled. Different wording is used to avoid
+  the impression that the whole host is down, as only scheduling of new VMs
+  is disabled.
+- Empty string indicates there is no host for server.
+
+``host_status`` is returned in the response in case the policy permits. By
+default the policy is for admin only in Nova policy.json::
+
+  "os_compute_api:servers:show:host_status": "rule:admin_api"
+
+For an NFV use case this has to also be enabled for the owner of the VM::
+
+  "os_compute_api:servers:show:host_status": "rule:admin_or_owner"
+
+REST API examples:
+==================
+
+Case where nova-compute is enabled and reporting normally::
+
+    GET /v2.1/{tenant_id}/servers/{server_id}
+
+    200 OK
+    {
+      "server": {
+        "host_status": "UP",
+        ...
+      }
+    }
+
+Case where nova-compute is enabled, but not reporting normally::
+
+    GET /v2.1/{tenant_id}/servers/{server_id}
+
+    200 OK
+    {
+      "server": {
+        "host_status": "UNKNOWN",
+        ...
+      }
+    }
+
+Case where nova-compute is enabled, but forced_down::
+
+    GET /v2.1/{tenant_id}/servers/{server_id}
+
+    200 OK
+    {
+      "server": {
+        "host_status": "DOWN",
+        ...
+      }
+    }
+
+Case where nova-compute is disabled::
+
+    GET /v2.1/{tenant_id}/servers/{server_id}
+
+    200 OK
+    {
+      "server": {
+        "host_status": "MAINTENANCE",
+        ...
+      }
+    }
+
+Host Status is also visible in python-novaclient::
+
+  +-------+------+--------+------------+-------------+----------+-------------+
+  | ID    | Name | Status | Task State | Power State | Networks | Host Status |
+  +-------+------+--------+------------+-------------+----------+-------------+
+  | 9a... | vm1  | ACTIVE | -          | RUNNING     | xnet=... | UP          |
+  +-------+------+--------+------------+-------------+----------+-------------+
+
+Links:
+======
+
+[1] Manual for OpenStack NOVA API for marking host down
+http://artifacts.opnfv.org/doctor/docs/manuals/mark-host-down_manual.html
+
+[2] OpenStack compute manual page
+http://developer.openstack.org/api-ref-compute-v2.1.html#compute-v2.1
diff --git a/docs/release/userguide/index.rst b/docs/release/userguide/index.rst
index eee855dc..577072c7 100644
--- a/docs/release/userguide/index.rst
+++ b/docs/release/userguide/index.rst
@@ -11,3 +11,6 @@ Doctor User Guide
     :maxdepth: 2
 
     feature.userguide.rst
+    get-valid-server-state.rst
+    mark-host-down_manual.rst
+    monitors.rst
diff --git a/docs/release/userguide/mark-host-down_manual.rst b/docs/release/userguide/mark-host-down_manual.rst
new file mode 100644
index 00000000..3815205d
--- /dev/null
+++ b/docs/release/userguide/mark-host-down_manual.rst
@@ -0,0 +1,122 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+=========================================
+OpenStack NOVA API for marking host down.
+=========================================
+
+Related Blueprints:
+===================
+
+  https://blueprints.launchpad.net/nova/+spec/mark-host-down
+  https://blueprints.launchpad.net/python-novaclient/+spec/support-force-down-service
+
+What the API is for
+===================
+
+  This API will give external fault monitoring system a possibility of telling
+  OpenStack Nova fast that compute host is down. This will immediately enable
+  calling of evacuation of any VM on host and further enabling faster HA
+  actions.
+
+What this API does
+==================
+
+  In OpenStack the nova-compute service state can represent the compute host
+  state and this new API is used to force this service down. It is assumed
+  that the one calling this API has made sure the host is also fenced or
+  powered down. This is important, so there is no chance same VM instance will
+  appear twice in case evacuated to new compute host. When host is recovered
+  by any means, the external system is responsible of calling the API again to
+  disable forced_down flag and let the host nova-compute service report again
+  host being up. If network fenced host come up again it should not boot VMs
+  it had if figuring out they are evacuated to other compute host. The
+  decision of deleting or booting VMs there used to be on host should be
+  enhanced later to be more reliable by Nova blueprint:
+  https://blueprints.launchpad.net/nova/+spec/robustify-evacuate
+
+REST API for forcing down:
+==========================
+
+  Parameter explanations:
+  tenant_id:       Identifier of the tenant.
+  binary:          Compute service binary name.
+  host:            Compute host name.
+  forced_down:     Compute service forced down flag.
+  token:           Token received after successful authentication.
+  service_host_ip: Serving controller node ip.
+
+  request:
+  PUT /v2.1/{tenant_id}/os-services/force-down
+  {
+  "binary": "nova-compute",
+  "host": "compute1",
+  "forced_down": true
+  }
+
+  response:
+  200 OK
+  {
+  "service": {
+  "host": "compute1",
+  "binary": "nova-compute",
+  "forced_down": true
+  }
+  }
+
+  Example:
+  curl -g -i -X PUT http://{service_host_ip}:8774/v2.1/{tenant_id}/os-services
+  /force-down -H "Content-Type: application/json" -H "Accept: application/json
+  " -H "X-OpenStack-Nova-API-Version: 2.11" -H "X-Auth-Token: {token}" -d '{"b
+  inary": "nova-compute", "host": "compute1", "forced_down": true}'
+
+CLI for forcing down:
+=====================
+
+  nova service-force-down <hostname> nova-compute
+
+  Example:
+  nova service-force-down compute1 nova-compute
+
+REST API for disabling forced down:
+===================================
+
+  Parameter explanations:
+  tenant_id:       Identifier of the tenant.
+  binary:          Compute service binary name.
+  host:            Compute host name.
+  forced_down:     Compute service forced down flag.
+  token:           Token received after successful authentication.
+  service_host_ip: Serving controller node ip.
+
+  request:
+  PUT /v2.1/{tenant_id}/os-services/force-down
+  {
+  "binary": "nova-compute",
+  "host": "compute1",
+  "forced_down": false
+  }
+
+  response:
+  200 OK
+  {
+  "service": {
+  "host": "compute1",
+  "binary": "nova-compute",
+  "forced_down": false
+  }
+  }
+
+  Example:
+  curl -g -i -X PUT http://{service_host_ip}:8774/v2.1/{tenant_id}/os-services
+  /force-down -H "Content-Type: application/json" -H "Accept: application/json
+  " -H "X-OpenStack-Nova-API-Version: 2.11" -H "X-Auth-Token: {token}" -d '{"b
+  inary": "nova-compute", "host": "compute1", "forced_down": false}'
+
+CLI for disabling forced down:
+==============================
+
+  nova service-force-down --unset <hostname> nova-compute
+
+  Example:
+  nova service-force-down --unset compute1 nova-compute
diff --git a/docs/release/userguide/monitors.rst b/docs/release/userguide/monitors.rst
new file mode 100644
index 00000000..eeb5e226
--- /dev/null
+++ b/docs/release/userguide/monitors.rst
@@ -0,0 +1,37 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+Monitor Types and Limitations
+=============================
+
+Currently there are two monitor types supported: sample and collectd
+
+Sample Monitor
+--------------
+
+Sample monitor type pings the compute host from the control host and calculates the
+notification time after the ping timeout.
+Also if inspector type is sample, the compute node needs to communicate with the control
+node on port 12345. This port needs to be opened for incomming traffic on control node.
+
+Collectd Monitor
+----------------
+
+Collectd monitor type uses collectd daemon running ovs_events plugin. Collectd runs on
+compute to send instant notification to the control node. The notification time is
+calculated by using the difference of time at which compute node sends notification to
+control node and the time at which consumer is notified. The time on control and compute
+node has to be synchronized for this reason. For further details on setting up collectd
+on the compute node, use the following link:
+:doc:`<barometer:release/userguide/feature.userguide>`
+
+
+Collectd monitors an interface managed by OVS. If the interface is not be assigned
+an IP, the user has to provide the name of interface to be monitored. The command to
+launch the doctor test in that case is:
+MONITOR_TYPE=collectd INSPECTOR_TYPE=sample INTERFACE_NAME=example_iface ./run.sh
+
+If the interface name or IP is not provided, the collectd monitor type will monitor the
+default management interface. This may result in the failure of doctor run.sh test case.
+The test case sets the monitored interface down and if the inspector (sample or congress)
+is running on the same subnet, collectd monitor will not be able to communicate with it.
-- 
cgit