summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--INFO1
-rw-r--r--UPSTREAM39
-rw-r--r--docs/design/index.rst2
-rw-r--r--docs/design/inspector-design-guideline.rst46
-rw-r--r--docs/design/performance-profiler.rst118
-rw-r--r--docs/requirements/02-use_cases.rst2
-rw-r--r--docs/requirements/03-architecture.rst34
-rw-r--r--docs/requirements/04-gaps.rst55
-rw-r--r--docs/requirements/05-implementation.rst117
-rw-r--r--docs/requirements/07-annex.rst2
-rw-r--r--[-rwxr-xr-x]docs/requirements/images/figure1.pngbin977880 -> 79420 bytes
-rw-r--r--[-rwxr-xr-x]docs/requirements/images/figure2.pngbin1043699 -> 82010 bytes
-rw-r--r--tests/consumer.py9
-rw-r--r--tests/functions-common72
-rw-r--r--tests/inspector.py26
-rw-r--r--tests/lib/inspector30
-rw-r--r--tests/lib/inspectors/congress69
-rw-r--r--tests/lib/inspectors/sample16
-rw-r--r--tests/lib/installer34
-rw-r--r--tests/lib/installers/apex24
-rw-r--r--tests/lib/installers/fuel107
-rw-r--r--tests/lib/installers/local21
-rw-r--r--tests/logger.py47
-rw-r--r--tests/monitor.py45
-rw-r--r--tests/profiler-poc.py87
-rwxr-xr-xtests/run.sh350
26 files changed, 958 insertions, 395 deletions
diff --git a/INFO b/INFO
index 08c017fc..bb91b30e 100644
--- a/INFO
+++ b/INFO
@@ -21,7 +21,6 @@ Peter Lee (Corenova Technologies, peter@corenova.com)
Ryota Mibu (NEC, r-mibu@cq.jp.nec.com)
Serge Manning (Sprint, Serge.Manning@sprint.com)
Tomi Juvonen (Nokia, tomi.juvonen@nokia.com)
-Tommy Lindgren (Ericsson, tommy.lindgren@ericsson.com)
Uli Kleber (Huawei, ulrich.kleber@huawei.com)
Link to TSC approval of the project: http://meetbot.opnfv.org/meetings/opnfv-meeting/2014/opnfv-meeting.2014-12-02-14.58.html
diff --git a/UPSTREAM b/UPSTREAM
new file mode 100644
index 00000000..d8810404
--- /dev/null
+++ b/UPSTREAM
@@ -0,0 +1,39 @@
+# Upstream contributions, bitergia will crawl this and extract the relevant information
+# system is one of Gerrit, Bugzilla, Launchpad (insert more)
+---
+-
+ url: https://blueprints.launchpad.net/ceilometer/+spec/event-alarm-evaluator
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/mark-host-down
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/python-novaclient/+spec/support-force-down-service
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/get-valid-server-state
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/servers-by-host-status
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/maintenance-reason-to-server
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/service-status-notification
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/congress/+spec/push-type-datasource-driver
+ system: Launchpad
+#-
+# url: https://review.openstack.org/#/c/314915/
+# system: Gerrit
+-
+ url: https://blueprints.launchpad.net/cinder/+spec/mark-services-down
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/python-cinderclient/+spec/mark-service-down-cli
+ system: Launchpad
+#-
+# url: https://bugs.launchpad.net/neutron/+bug/1513144
+# system: Launchpad-bug
diff --git a/docs/design/index.rst b/docs/design/index.rst
index 4efbef17..963002a0 100644
--- a/docs/design/index.rst
+++ b/docs/design/index.rst
@@ -23,3 +23,5 @@ See also https://wiki.opnfv.org/requirements_projects .
notification-alarm-evaluator.rst
rfe-port-status-update.rst
port-data-plane-status.rst
+ inspector-design-guideline.rst
+ performance-profiler.rst
diff --git a/docs/design/inspector-design-guideline.rst b/docs/design/inspector-design-guideline.rst
new file mode 100644
index 00000000..4add8c0f
--- /dev/null
+++ b/docs/design/inspector-design-guideline.rst
@@ -0,0 +1,46 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+==========================
+Inspector Design Guideline
+==========================
+
+.. NOTE::
+ This is spec draft of design guideline for inspector component.
+ JIRA ticket to track the update and collect comments: `DOCTOR-73`_.
+
+This document summarize the best practise in designing a high performance
+inspector to meet the requirements in `OPNFV Doctor project`_.
+
+Problem Description
+===================
+
+Some pitfalls has be detected during the development of sample inspector, e.g.
+we suffered a significant `performance degrading in listing VMs in a host`_.
+
+A `patch set for caching the list`_ has been committed to solve issue. When a
+new inspector is integrated, it would be nice to have an evaluation of existing
+design and give recommendations for improvements.
+
+This document can be treated as a source of related blueprints in inspector
+projects.
+
+Guidelines
+==========
+
+Host specific VMs list
+----------------------
+
+TBD, see `DOCTOR-76`_.
+
+Parallel execution
+------------------
+
+TBD, see `discussion in mailing list`_.
+
+.. _DOCTOR-73: https://jira.opnfv.org/browse/DOCTOR-73
+.. _OPNFV Doctor project: https://wiki.opnfv.org/doctor
+.. _performance degrading in listing VMs in a host: https://lists.opnfv.org/pipermail/opnfv-tech-discuss/2016-September/012591.html
+.. _patch set for caching the list: https://gerrit.opnfv.org/gerrit/#/c/20877/
+.. _DOCTOR-76: https://jira.opnfv.org/browse/DOCTOR-76
+.. _discussion in mailing list: https://lists.opnfv.org/pipermail/opnfv-tech-discuss/2016-October/013036.html
diff --git a/docs/design/performance-profiler.rst b/docs/design/performance-profiler.rst
new file mode 100644
index 00000000..f834a915
--- /dev/null
+++ b/docs/design/performance-profiler.rst
@@ -0,0 +1,118 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+====================
+Performance Profiler
+====================
+
+https://goo.gl/98Osig
+
+This blueprint proposes to create a performance profiler for doctor scenarios.
+
+Problem Description
+===================
+
+In the verification job for notification time, we have encountered some
+performance issues, such as
+
+1. In environment deployed by APEX, it meets the criteria while in the one by
+Fuel, the performance is much more poor.
+2. Signification performance degradation was spotted when we increase the total
+number of VMs
+
+It takes time to dig the log and analyse the reason. People have to collect
+timestamp at each checkpoints manually to find out the bottleneck. A performance
+profiler will make this process automatic.
+
+Proposed Change
+===============
+
+Current Doctor scenario covers the inspector and notifier in the whole fault
+management cycle::
+
+ start end
+ + + + + + +
+ | | | | | |
+ |monitor|inspector|notifier|manager|controller|
+ +------>+ | | | |
+ occurred +-------->+ | | |
+ | detected +------->+ | |
+ | | identified +-------+ |
+ | | notified +--------->+
+ | | | processed resolved
+ | | | |
+ | +<-----doctor----->+ |
+ | |
+ | |
+ +<---------------fault management------------>+
+
+The notification time can be split into several parts and visualized as a
+timeline::
+
+ start end
+ 0----5---10---15---20---25---30---35---40---45--> (x 10ms)
+ + + + + + + + + + + +
+ 0-hostdown | | | | | | | | |
+ +--->+ | | | | | | | | |
+ | 1-raw failure | | | | | | |
+ | +-->+ | | | | | | | |
+ | | 2-found affected | | | | |
+ | | +-->+ | | | | | | |
+ | | 3-marked host down| | | | |
+ | | +-->+ | | | | | |
+ | | 4-set VM error| | | | |
+ | | +--->+ | | | | |
+ | | | 5-notified VM error | |
+ | | | +----->| | | | |
+ | | | | 6-transformed event
+ | | | | +-->+ | | |
+ | | | | | 7-evaluated event
+ | | | | | +-->+ | |
+ | | | | | 8-fired alarm
+ | | | | | +-->+ |
+ | | | | | 9-received alarm
+ | | | | | +-->+
+ sample | sample | | | |10-handled alarm
+ monitor| inspector |nova| c/m | aodh |
+ | |
+ +<-----------------doctor--------------->+
+
+Note: c/m = ceilometer
+
+And a table of components sorted by time cost from most to least
+
++----------+---------+----------+
+|Component |Time Cost|Percentage|
++==========+=========+==========+
+|inspector |160ms | 40% |
++----------+---------+----------+
+|aodh |110ms | 30% |
++----------+---------+----------+
+|monitor |50ms | 14% |
++----------+---------+----------+
+|... | | |
++----------+---------+----------+
+|... | | |
++----------+---------+----------+
+
+Note: data in the table is for demonstration only, not actual measurement
+
+Timestamps can be collected from various sources
+
+1. log files
+2. trace point in code
+
+The performance profiler will be integrated into the verification job to provide
+detail result of the test. It can also be deployed independently to diagnose
+performance issue in specified environment.
+
+Working Items
+=============
+
+1. PoC with limited checkpoints
+2. Integration with verification job
+3. Collect timestamp at all checkpoints
+4. Display the profiling result in console
+5. Report the profiling result to test database
+6. Independent package which can be installed to specified environment
diff --git a/docs/requirements/02-use_cases.rst b/docs/requirements/02-use_cases.rst
index 424a3c6e..0a1f6413 100644
--- a/docs/requirements/02-use_cases.rst
+++ b/docs/requirements/02-use_cases.rst
@@ -136,7 +136,7 @@ the same as in the "Fault management using ACT-STBY configuration" use case,
except in this case, the Consumer of a VM/VNF switches to STBY configuration
based on a predicted fault, rather than an occurred fault.
-NVFI Maintenance
+NFVI Maintenance
----------------
VM Retirement
diff --git a/docs/requirements/03-architecture.rst b/docs/requirements/03-architecture.rst
index 8ff5dacf..b7417691 100644
--- a/docs/requirements/03-architecture.rst
+++ b/docs/requirements/03-architecture.rst
@@ -191,11 +191,15 @@ fencing, but there has not been any progress. The general description is
available here:
https://wiki.openstack.org/wiki/Fencing_Instances_of_an_Unreachable_Host
-As OpenStack does not cover fencing it is in the responsibility of the Doctor
-project to make sure fencing is done by using tools like pacemaker and by
-calling OpenStack APIs. Only after fencing is done OpenStack resources can be
-marked as down. In case there are gaps in OpenStack projects to have all
-relevant resources marked as down, those gaps need to be identified and fixed.
+OpenStack provides some mechanisms that allow fencing of faulty resources. Some
+are automatically invoked by the platform itself (e.g. Nova disables the
+compute service when libvirtd stops running, preventing new VMs to be scheduled
+to that node), while other mechanisms are consumer trigger-based actions (e.g.
+Neutron port admin-state-up). For other fencing actions not supported by
+OpenStack, the Doctor project may suggest ways to address the gap (e.g. through
+means of resourcing to external tools and orchestration methods), or
+documenting or implementing them upstream.
+
The Doctor Inspector component will be responsible of marking resources down in
the OpenStack and back up if necessary.
@@ -206,18 +210,18 @@ In the basic :ref:`uc-fault1` use case, no automatic actions will be taken by
the VIM, but all recovery actions executed by the VIM and the NFVI will be
instructed and coordinated by the Consumer.
-In a more advanced use case, the VIM shall be able to recover the failed virtual
+In a more advanced use case, the VIM may be able to recover the failed virtual
resources according to a pre-defined behavior for that resource. In principle
this means that the owner of the resource (i.e., its consumer or administrator)
can define which recovery actions shall be taken by the VIM. Examples are a
-restart of the VM, migration/evacuation of the VM, or no action.
+restart of the VM or migration/evacuation of the VM.
High level northbound interface specification
---------------------------------------------
-Fault management
+Fault Management
^^^^^^^^^^^^^^^^
This interface allows the Consumer to subscribe to fault notification from the
@@ -261,7 +265,8 @@ physical resource from 'enabled' to 'going-to-maintenance' and a timeout [#timeo
After receiving the MaintenanceRequest,the VIM decides on the actions to be taken
based on maintenance policies predefined by the affected Consumer(s).
-.. [#timeout] Timeout is set by the Administrator and corresponds to the maximum time to empty the physical resources.
+.. [#timeout] Timeout is set by the Administrator and corresponds to the maximum time
+ to empty the physical resources.
.. figure:: images/figure5a.png
:name: figure5a
@@ -321,12 +326,13 @@ An example of a high level message flow to cover the failed NFVI maintenance cas
shown in :numref:`figure5c`.
It consists of the following steps:
-5. The Consumer C3 switches to standby configuration (STDBY).
-6. Instructions from Consumers C2/C3 are shared to VIM requesting certain actions to be performed (steps 6a, 6b).
- The VIM executes the requested actions and sends back a NACK to consumer C2 (step 6d) as the
- migration of the virtual resource(s) is not completed by the given timeout.
+5. The Consumer C3 switches to standby configuration (STBY).
+6. Instructions from Consumers C2/C3 are shared to VIM requesting certain actions to be performed
+ (steps 6a, 6b). The VIM executes the requested actions and sends back a NACK to consumer C2
+ (step 6d) as the migration of the virtual resource(s) is not completed by the given timeout.
7. The VIM switches the physical resources to "enabled" state.
-8. MaintenanceResponse is sent from VIM to inform the Administrator that the maintenance action cannot start.
+8. MaintenanceNotification is sent from VIM to inform the Administrator that the maintenance action
+ cannot start.
..
diff --git a/docs/requirements/04-gaps.rst b/docs/requirements/04-gaps.rst
index 154f8e43..b8ff7f2e 100644
--- a/docs/requirements/04-gaps.rst
+++ b/docs/requirements/04-gaps.rst
@@ -61,6 +61,13 @@ Immediate Notification
- Fault notifications cannot be received immediately by Ceilometer.
+* Solved by
+
+ + Event Alarm Evaluator:
+ https://specs.openstack.org/openstack/ceilometer-specs/specs/liberty/event-alarm-evaluator.html
+ + New OpenStack alarms and notifications project AODH:
+ http://docs.openstack.org/developer/aodh/
+
Maintenance Notification
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -98,7 +105,7 @@ Maintenance Notification
- VIM user cannot receive maintenance notifications.
-* Related blueprints
+* Solved by
+ https://blueprints.launchpad.net/nova/+spec/service-status-notification
@@ -126,6 +133,10 @@ Normalization of data collection models
- Normalized data format does not exist.
+* Solved by
+
+ + Specification in Section :ref:`southbound`.
+
OpenStack
---------
@@ -157,7 +168,7 @@ ________________________________
- Ceilometer seems to be unsuitable for monitoring medium and large scale
NFVI deployments.
-* Related blueprints
+* Solved by
+ Usage of Zabbix for fault aggregation [ZABB]_. Zabbix can support a much
higher number of fault events (up to 15 thousand events per second, but
@@ -189,13 +200,14 @@ ___________________________________
- OpenStack Ceilometer does not monitor hardware and software to capture
faults.
- + Gap
+ + Gap
- - Ceilometer is not able to detect and handle all faults listed in the Annex.
+ - Ceilometer is not able to detect and handle all faults listed in the Annex.
-* Related blueprints / workarounds
+* Solved by
- - Use other dedicated monitoring tools like Zabbix or Monasca
+ + Use of dedicated monitoring tools like Zabbix or Monasca.
+ See :ref:`nfvi_faults`.
Nova
^^^^
@@ -218,15 +230,14 @@ ________________________________________
+ To-be
- - There needs to be API to change VM power_State in case host has failed.
- - There needs to be API to change nova-compute state.
+ - The API shall support to change VM power state in case host has failed.
+ - The API shall support to change nova-compute state.
- There could be single API to change different VM states for all VMs
- belonging to specific host.
- - As external system monitoring the infra calls these APIs change can be
- fast and reliable.
- - Correlation actions can be faster and automated as states are reliable.
- - User will be able to read states from OpenStack and trust they are
- correct.
+ belonging to a specific host.
+ - Support external systems that are monitoring the infrastructure and resources
+ that are able to call the API fast and reliable.
+ - Resource states are reliable such that correlation actions can be fast and automated.
+ - User shall be able to read states from OpenStack and trust they are correct.
+ As-is
@@ -240,12 +251,11 @@ ________________________________________
+ Gap
- OpenStack does not change its states fast and reliably enough.
- - There is API missing to have external system to change states and to
- trust the states are then reliable (external system has fenced failed
- host).
+ - The API does not support to have an external system to change states and to
+ trust the states are reliable (external system has fenced failed host).
- User cannot read all the states from OpenStack nor trust they are right.
-* Related blueprints
+* Solved by
+ https://blueprints.launchpad.net/nova/+spec/mark-host-down
+ https://blueprints.launchpad.net/python-novaclient/+spec/support-force-down-service
@@ -309,7 +319,7 @@ _________________
underlying root cause of failure. Knowing the root cause can help filter
out unnecessary and overwhelming alarms.
-* Related blueprints / workarounds
+* Status
+ Monasca as of now lacks this feature, although the community is aware and
working toward supporting it.
@@ -334,7 +344,7 @@ _________________
- Sensor monitoring is very important. It provides operators status
on the state of the physical infrastructure (e.g. temperature, fans).
-* Related blueprints / workarounds
+* Addressed by
+ Monasca can be configured to use third-party monitoring solutions (e.g.
Nagios, Cacti) for retrieving additional data.
@@ -370,7 +380,10 @@ _____________________________
+ Gap
- - Cause of the delay needs to be identified and fixed
+ - Cause of the delay is a periodic evaluation and notification. Periodicity is configured
+ as 30s default value and can be reduced to 5s but not below.
+ https://github.com/zabbix/zabbix/blob/trunk/conf/zabbix_server.conf#L329
+
..
vim: set tabstop=4 expandtab textwidth=80:
diff --git a/docs/requirements/05-implementation.rst b/docs/requirements/05-implementation.rst
index 4c89fdf5..84979772 100644
--- a/docs/requirements/05-implementation.rst
+++ b/docs/requirements/05-implementation.rst
@@ -672,47 +672,81 @@ and correlated alarms. Instead the AODH alarm class has attributes for actions,
rules and user and project id.
-+------------------------+------------------------+------------------------+
-| ETSI NFV Alarm Type | OPNFV Doctor Req Spec | AODH Alarm Type |
-+========================+========================+========================+
-| AlarmId | FaultId | Alarm Id |
-+------------------------+------------------------+------------------------+
-| managedObjectId | virtualResourceId | (N/A) |
-+------------------------+------------------------+------------------------+
-| \- | \- | User_Id, Project_Id |
-+------------------------+------------------------+------------------------+
-| alarmRaisedTime | \- | (N/A) |
-+------------------------+------------------------+------------------------+
-| alarmChangedTime | \- | (N/A) |
-+------------------------+------------------------+------------------------+
-| alarmClearedTime | \- | (N/A) |
-+------------------------+------------------------+------------------------+
-| alarmState: | virtualResourceState | State: ok, alarm, |
-| New, Updated, Cleared | (e.g. normal, | insufficient data |
-| | maintenance, down, | |
-| | error) | |
-+------------------------+------------------------+------------------------+
-| vrPerceivedSeverity: | Severity (Integer) | Severity: low, |
-| Critical, Major, Minor,| | moderate, critical |
-| Warning, Indeterminate,| | |
-| Cleared | | |
-+------------------------+------------------------+------------------------+
-| eventTime (unclear?) | EventTime | (N/A) |
-+------------------------+------------------------+------------------------+
-| faultType | FaultType | type |
-+------------------------+------------------------+------------------------+
-| probableCause | ProbableCause | description |
-+------------------------+------------------------+------------------------+
-| isRootCause | IsRootCause | \- |
-+------------------------+------------------------+------------------------+
-| correlatedAlarmId | CorrelatedFaultId | \- |
-+------------------------+------------------------+------------------------+
-| faultDetails | FaultDetails | \- |
-+------------------------+------------------------+------------------------+
-| \- | \- | actions, rule, time |
-| | | constraints |
-+------------------------+------------------------+------------------------+
-
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| ETSI NFV Alarm Type | OPNFV Doctor | AODH Event Alarm | Description / Comment | Recommendations |
+| | Requirement Specs | Notification | | |
++========================+========================+=====================+=============================================+=======================================+
+| alarmId | FaultId | alarm_id | Identifier of an alarm. | \- |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| \- | \- | alarm_name | Human readable alarm name. | May be added in ETSI NFV Stage 3. |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| managedObjectId | VirtualResourceId | (reason) | Identifier of the affected virtual resource | \- |
+| | | | is part of the AODH reason parameter. | |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| \- | \- | user_id, project_id | User and project identifiers. | May be added in ETSI NFV Stage 3. |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| alarmRaisedTime | \- | \- | Timestamp when alarm was raised. | To be added to Doctor and AODH. May |
+| | | | | be derived (e.g. in a shimlayer) from |
+| | | | | the AODH alarm history. |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| alarmChangedTime | \- | \- | Timestamp when alarm was changed/updated. | see above |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| alarmClearedTime | \- | \- | Timestamp when alarm was cleared. | see above |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| eventTime | \- | \- | Timestamp when alarm was first observed by | see above |
+| | | | the Monitor. | |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| \- | EventTime | generated | Timestamp of the Notification. | Update parameter name in Doctor spec. |
+| | | | | May be added in ETSI NFV Stage 3. |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| state: | VirtualResourceState: | current: ok, alarm, | ETSI NFV IFA 005/006 lists example alarm | Maintenance state is missing in AODH. |
+| E.g. Fired, Updated | E.g. normal, down | insufficient_data | states. | List of alarm states will be |
+| Cleared | maintenance, error | | | specified in ETSI NFV Stage 3. |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| perceivedSeverity: | Severity (Integer) | Severity: | ETSI NFV IFA 005/006 lists example | List of alarm states will be |
+| E.g. Critical, Major, | | low (default), | perceived severity values. | specified in ETSI NFV Stage 3. |
+| Minor, Warning, | | moderate, critical | | |
+| Indeterminate, Cleared | | | | **OPNFV: Severity (Integer)**: |
+| | | | | * update OPNFV Doctor specification |
+| | | | | to *Enum* |
+| | | | | |
+| | | | | **perceivedSeverity=Indetermined**: |
+| | | | | * remove value *Indetermined* in |
+| | | | | IFA and map undefined values to |
+| | | | | “minor” severity, or |
+| | | | | * add value *indetermined* in AODH |
+| | | | | and make it the default value. |
+| | | | | |
+| | | | | **perceivedSeverity=Cleared**: |
+| | | | | * remove value *Cleared* in IFA as |
+| | | | | the information about a cleared |
+| | | | | alarm alarm can be derived from |
+| | | | | the alarm state parameter, or |
+| | | | | * add value *cleared* in AODH and |
+| | | | | set a rule that the severity is |
+| | | | | “cleared” when the state is *ok*. |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| faultType | FaultType | event_type in | Type of the fault, e.g. “CPU failure” of a | OpenStack Alarming (Aodh) can use a |
+| | | reason_data | compute resource, in machine interpretable | fuzzy matching with wildcard string, |
+| | | | format. | "compute.cpu.failure". |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| N/A | N/A | type = "event" | Type of the notification. For fault | \- |
+| | | | notifications the type in AODH is “event”. | |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| probableCause | ProbableCause | \- | Probable cause of the alarm. | May be provided (e.g. in a shimlayer) |
+| | | | | based on Vitrage topology awareness / |
+| | | | | root-cause-analysis. |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| isRootCause | IsRootCause | \- | Boolean indicating whether the fault is the | see above |
+| | | | root cause of other faults. | |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| correlatedAlarmId | CorrelatedFaultId | \- | List of IDs of correlated faults. | see above |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| faultDetails | FaultDetails | \- | Additional details about the fault/alarm. | FaultDetails information element will |
+| | | | | be specified in ETSI NFV Stage 3. |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
+| \- | \- | action, previous | Additional AODH alarm related parameters. | \- |
++------------------------+------------------------+---------------------+---------------------------------------------+---------------------------------------+
Table: Comparison of alarm attributes
@@ -728,6 +762,7 @@ Other areas that need alignment is the so called alarm state in NFV. Here we mus
however consider what can be attributes of the notification vs. what should be a
property of the alarm instance. This will be analyzed later.
+.. _southbound:
Detailed southbound interface specification
-------------------------------------------
diff --git a/docs/requirements/07-annex.rst b/docs/requirements/07-annex.rst
index 8cb19612..2ebba0d8 100644
--- a/docs/requirements/07-annex.rst
+++ b/docs/requirements/07-annex.rst
@@ -1,6 +1,8 @@
.. This work is licensed under a Creative Commons Attribution 4.0 International License.
.. http://creativecommons.org/licenses/by/4.0
+.. _nfvi_faults:
+
Annex: NFVI Faults
=================================================
diff --git a/docs/requirements/images/figure1.png b/docs/requirements/images/figure1.png
index dacf0dd4..267ddddc 100755..100644
--- a/docs/requirements/images/figure1.png
+++ b/docs/requirements/images/figure1.png
Binary files differ
diff --git a/docs/requirements/images/figure2.png b/docs/requirements/images/figure2.png
index 3c8a2bf1..9a3b166d 100755..100644
--- a/docs/requirements/images/figure2.png
+++ b/docs/requirements/images/figure2.png
Binary files differ
diff --git a/tests/consumer.py b/tests/consumer.py
index 9b3230fe..3c012b4f 100644
--- a/tests/consumer.py
+++ b/tests/consumer.py
@@ -11,17 +11,20 @@ import argparse
from flask import Flask
from flask import request
import json
+import logger as doctor_log
import os
import time
+LOG = doctor_log.Logger('doctor_consumer').getLogger()
+
app = Flask(__name__)
@app.route('/failure', methods=['POST'])
def event_posted():
- app.logger.debug('doctor consumer notified at %s' % time.time())
- app.logger.debug('received data = %s' % request.data)
+ LOG.info('doctor consumer notified at %s' % time.time())
+ LOG.info('received data = %s' % request.data)
d = json.loads(request.data)
return "OK"
@@ -35,7 +38,7 @@ def get_args():
def main():
args = get_args()
- app.run(host="0.0.0.0", port=args.port, debug=True)
+ app.run(host="0.0.0.0", port=args.port)
if __name__ == '__main__':
diff --git a/tests/functions-common b/tests/functions-common
new file mode 100644
index 00000000..db2565a3
--- /dev/null
+++ b/tests/functions-common
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Test if the named environment variable is set and not zero length
+# is_set env-var
+function is_set {
+ local var=\$"$1"
+ eval "[ -n \"$var\" ]"
+}
+
+# Prints backtrace info
+# filename:lineno:function
+# backtrace level
+function backtrace {
+ local level=$1
+ local deep
+ deep=$((${#BASH_SOURCE[@]} - 1))
+ echo "[Call Trace]"
+ while [ $level -le $deep ]; do
+ echo "${BASH_SOURCE[$deep]}:${BASH_LINENO[$deep-1]}:${FUNCNAME[$deep-1]}"
+ deep=$((deep - 1))
+ done
+}
+
+# Prints line number and "message" in error format
+# err $LINENO "message"
+function err {
+ local exitcode=$?
+ local xtrace
+ xtrace=$(set +o | grep xtrace)
+ set +o xtrace
+ local msg="[ERROR] ${BASH_SOURCE[2]}:$1 $2"
+ echo $msg 1>&2;
+ if [[ -n ${LOGDIR} ]]; then
+ echo $msg >> "${LOGDIR}/error.log"
+ fi
+ $xtrace
+ return $exitcode
+}
+
+# Prints line number and "message" then exits
+# die $LINENO "message"
+function die {
+ local exitcode=$?
+ set +o xtrace
+ local line=$1; shift
+ if [ $exitcode == 0 ]; then
+ exitcode=1
+ fi
+ backtrace 2
+ err $line "$*"
+ # Give buffers a second to flush
+ sleep 1
+ exit $exitcode
+}
+
+# Checks an environment variable is not set or has length 0 OR if the
+# exit code is non-zero and prints "message" and exits
+# NOTE: env-var is the variable name without a '$'
+# die_if_not_set $LINENO env-var "message"
+function die_if_not_set {
+ local exitcode=$?
+ local xtrace
+ xtrace=$(set +o | grep xtrace)
+ set +o xtrace
+ local line=$1; shift
+ local evar=$1; shift
+ if ! is_set $evar || [ $exitcode != 0 ]; then
+ die $line "$*"
+ fi
+ $xtrace
+}
+
diff --git a/tests/inspector.py b/tests/inspector.py
index 62614158..c1f95697 100644
--- a/tests/inspector.py
+++ b/tests/inspector.py
@@ -12,6 +12,7 @@ import collections
from flask import Flask
from flask import request
import json
+import logger as doctor_log
import os
import time
@@ -19,6 +20,8 @@ import novaclient.client as novaclient
import nova_force_down
+LOG = doctor_log.Logger('doctor_inspector').getLogger()
+
class DoctorInspectorSample(object):
@@ -44,13 +47,14 @@ class DoctorInspectorSample(object):
try:
host=server.__dict__.get('OS-EXT-SRV-ATTR:host')
self.servers[host].append(server)
- app.logger.debug('get hostname=%s from server=%s' % (host, server))
+ LOG.debug('get hostname=%s from server=%s' % (host, server))
except Exception as e:
- app.logger.debug('can not get hostname from server=%s' % server)
+ LOG.error('can not get hostname from server=%s' % server)
def disable_compute_host(self, hostname):
for server in self.servers[hostname]:
self.nova.servers.reset_state(server, 'error')
+ LOG.info('doctor mark vm(%s) error at %s' % (server, time.time()))
# NOTE: We use our own client here instead of this novaclient for a
# workaround. Once keystone provides v2.1 nova api endpoint
@@ -60,23 +64,24 @@ class DoctorInspectorSample(object):
# self.nova.services.force_down(hostname, 'nova-compute', True)
#
nova_force_down.force_down(hostname)
+ LOG.info('doctor mark host(%s) down at %s' % (hostname, time.time()))
app = Flask(__name__)
-app.debug = True
inspector = DoctorInspectorSample()
@app.route('/events', methods=['POST'])
def event_posted():
- app.logger.debug('event posted at %s' % time.time())
- app.logger.debug('inspector = %s' % inspector)
- app.logger.debug('received data = %s' % request.data)
+ LOG.info('event posted at %s' % time.time())
+ LOG.info('inspector = %s' % inspector)
+ LOG.info('received data = %s' % request.data)
d = json.loads(request.data)
- hostname = d['hostname']
- event_type = d['type']
- if event_type == 'compute.host.down':
- inspector.disable_compute_host(hostname)
+ for event in d:
+ hostname = event['details']['hostname']
+ event_type = event['type']
+ if event_type == 'compute.host.down':
+ inspector.disable_compute_host(hostname)
return "OK"
@@ -91,5 +96,6 @@ def main():
args = get_args()
app.run(port=args.port)
+
if __name__ == '__main__':
main()
diff --git a/tests/lib/inspector b/tests/lib/inspector
new file mode 100644
index 00000000..2fb7c409
--- /dev/null
+++ b/tests/lib/inspector
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+INSPECTOR_TYPE=${INSPECTOR_TYPE:-sample}
+
+function is_inspector_supported {
+ local inspector="$1"
+ [[ -f $TOP_DIR/lib/inspectors/$inspector ]]
+}
+
+function is_inspector {
+ local inspector="$1"
+ [[ $inspector == $INSPECTOR_TYPE ]]
+}
+
+function start_inspector {
+ if ! is_inspector_supported $INSPECTOR_TYPE; then
+ die $LINENO"INSPECTOR_TYPE=$INSPECTOR_TYPE is not supported."
+ fi
+
+ source $TOP_DIR/lib/inspectors/$INSPECTOR_TYPE
+ start_inspector_$INSPECTOR_TYPE
+}
+
+function stop_inspector {
+ stop_inspector_$INSPECTOR_TYPE
+}
+
+function cleanup_inspector {
+ cleanup_inspector_$INSPECTOR_TYPE
+}
diff --git a/tests/lib/inspectors/congress b/tests/lib/inspectors/congress
new file mode 100644
index 00000000..04825252
--- /dev/null
+++ b/tests/lib/inspectors/congress
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+function _congress_add_rule {
+ name=$1
+ policy=$2
+ rule=$3
+
+ if ! openstack congress policy rule list $policy | grep -q -e "// Name: $name$" ; then
+ openstack congress policy rule create --name $name $policy "$rule"
+ fi
+}
+
+function _congress_del_rule {
+ name=$1
+ policy=$2
+
+ if openstack congress policy rule list $policy | grep -q -e "^// Name: $name$" ; then
+ openstack congress policy rule delete $policy $name
+ fi
+}
+
+function _congress_add_rules {
+ _congress_add_rule host_down classification \
+ 'host_down(host) :-
+ doctor:events(hostname=host, type="compute.host.down", status="down")'
+
+ _congress_add_rule active_instance_in_host classification \
+ 'active_instance_in_host(vmid, host) :-
+ nova:servers(id=vmid, host_name=host, status="ACTIVE")'
+
+ _congress_add_rule host_force_down classification \
+ 'execute[nova:services.force_down(host, "nova-compute", "True")] :-
+ host_down(host)'
+
+ _congress_add_rule error_vm_states classification \
+ 'execute[nova:servers.reset_state(vmid, "error")] :-
+ host_down(host),
+ active_instance_in_host(vmid, host)'
+}
+
+function start_inspector_congress {
+ nova_api_min_version="2.11"
+ nova_api_version=$(openstack congress datasource list | \
+ grep nova | grep -Po "(?<='api_version': ')[^']*")
+ [[ -z $nova_api_version ]] && nova_api_version="2.0"
+ if [[ "$nova_api_version" < "$nova_api_min_version" ]]; then
+ echo "ERROR: Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)"
+ exit 1
+ fi
+ openstack congress driver list | grep -q " doctor "
+ openstack congress datasource list | grep -q " doctor " || {
+ openstack congress datasource create doctor doctor
+ }
+ _congress_add_rules
+
+}
+
+function stop_inspector_congress {
+ _congress_del_rule host_force_down classification
+ _congress_del_rule error_vm_states classification
+ _congress_del_rule active_instance_in_host classification
+ _congress_del_rule host_down classification
+
+}
+
+function cleanup_inspector_congress {
+ # Noop
+ return
+}
diff --git a/tests/lib/inspectors/sample b/tests/lib/inspectors/sample
new file mode 100644
index 00000000..cd21a008
--- /dev/null
+++ b/tests/lib/inspectors/sample
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+function start_inspector_sample {
+ pgrep -f "python inspector.py" && return 0
+ python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
+}
+
+function stop_inspector_sample {
+ pgrep -f "python inspector.py" || return 0
+ kill $(pgrep -f "python inspector.py")
+}
+
+function cleanup_inspector_sample {
+ # Noop
+ return
+}
diff --git a/tests/lib/installer b/tests/lib/installer
new file mode 100644
index 00000000..cdde6eff
--- /dev/null
+++ b/tests/lib/installer
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+INSTALLER_TYPE=${INSTALLER_TYPE:-local}
+INSTALLER_IP=${INSTALLER_IP:-none}
+ssh_opts_cpu="$ssh_opts"
+
+function is_installer_supported {
+ local installer="$1"
+ [[ -f $TOP_DIR/lib/installers/$installer ]]
+}
+
+function is_installer {
+ local installer="$1"
+ [[ $installer == $INSTALLER_TYPE ]]
+}
+
+function setup_installer {
+ if ! is_installer_supported $INSTALLER_TYPE; then
+ die $LINENO"INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
+ fi
+
+ source $TOP_DIR/lib/installers/$INSTALLER_TYPE
+
+ if ! is_set INSTALLER_IP; then
+ get_installer_ip
+ fi
+
+ installer_get_ssh_keys
+ installer_apply_patches
+}
+
+function cleanup_installer {
+ cleanup_installer_$INSTALLER_TYPE
+}
diff --git a/tests/lib/installers/apex b/tests/lib/installers/apex
new file mode 100644
index 00000000..54b3dce2
--- /dev/null
+++ b/tests/lib/installers/apex
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+function get_installer_ip {
+ local instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
+ INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
+ die_if_not_set $LINENO $INSTALLER_IP "No installer IP"
+}
+
+function installer_get_ssh_keys {
+ sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
+ sudo chown $(whoami):$(whoami) instack_key
+ chmod 400 instack_key
+ ssh_opts_cpu+=" -i instack_key"
+}
+
+function installer_apply_patches {
+ # Noop
+ return
+}
+
+function cleanup_installer_apex {
+ # Noop
+ return
+}
diff --git a/tests/lib/installers/fuel b/tests/lib/installers/fuel
new file mode 100644
index 00000000..34a86922
--- /dev/null
+++ b/tests/lib/installers/fuel
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+function get_installer_ip {
+ local instack_mac=$(sudo virsh domiflist fuel-opnfv | awk '/pxebr/{print $5}')
+ INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
+ die_if_not_set $LINENO $INSTALLER_IP "No installer IP"
+}
+
+function installer_get_ssh_keys {
+ sshpass -p r00tme scp $ssh_opts root@${INSTALLER_IP}:.ssh/id_rsa instack_key
+ sudo chown $(whoami):$(whoami) instack_key
+ chmod 400 instack_key
+ ssh_opts_cpu+=" -i instack_key"
+}
+
+function installer_apply_patches {
+ cat > set_conf.sh << 'END_TXT'
+#!/bin/bash
+if [ -e /etc/ceilometer/event_pipeline.yaml ]; then
+ if ! grep -q '^ *- notifier://?topic=alarm.all$' /etc/ceilometer/event_pipeline.yaml; then
+ sed -i 's|- notifier://|- notifier://?topic=alarm.all|' /etc/ceilometer/event_pipeline.yaml
+ echo "modify the ceilometer config"
+ service ceilometer-agent-notification restart
+ fi
+else
+ echo "ceilometer event_pipeline.yaml file does not exist"
+ exit 1
+fi
+if [ -e /etc/nova/nova.conf ]; then
+ if ! grep -q '^notification_driver=messaging$' /etc/nova/nova.conf; then
+ sed -i -r 's/notification_driver=/notification_driver=messaging/g' /etc/nova/nova.conf
+ echo "modify nova config"
+ service nova-api restart
+ fi
+else
+ echo "nova.conf file does not exist"
+ exit 1
+fi
+exit 0
+END_TXT
+
+ chmod +x set_conf.sh
+ CONTROLLER_IP=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \
+ "fuel node | grep controller | cut -d '|' -f 5|xargs")
+ for node in $CONTROLLER_IP;do
+ scp $ssh_opts_cpu set_conf.sh "root@$node:"
+ ssh $ssh_opts_cpu "root@$node" './set_conf.sh > set_conf.log 2>&1 &'
+ sleep 1
+ scp $ssh_opts_cpu "root@$node:set_conf.log" set_conf_$node.log
+ done
+
+ if grep -q "modify the ceilometer config" set_conf_*.log ; then
+ NEED_TO_RESTORE_CEILOMETER=true
+ fi
+ if grep -q "modify nova config" set_conf_*.log ; then
+ NEED_TO_RESTORE_NOVA=true
+ fi
+
+ echo "waiting service restart..."
+ sleep 60
+
+}
+
+function cleanup_installer_fuel {
+ if ! ($NEED_TO_RESTORE_CEILOMETER || $NEED_TO_RESTORE_NOVA) ; then
+ echo "Don't need to restore config"
+ exit 0
+ fi
+
+ echo "restore the configuration..."
+ cat > restore_conf.sh << 'END_TXT'
+#!/bin/bash
+if @NEED_TO_RESTORE_CEILOMETER@ ; then
+ if [ -e /etc/ceilometer/event_pipeline.yaml ]; then
+ if grep -q '^ *- notifier://?topic=alarm.all$' /etc/ceilometer/event_pipeline.yaml; then
+ sed -i 's|- notifier://?topic=alarm.all|- notifier://|' /etc/ceilometer/event_pipeline.yaml
+ service ceilometer-agent-notification restart
+ fi
+ else
+ echo "ceilometer event_pipeline.yaml file does not exist"
+ exit 1
+ fi
+fi
+if @NEED_TO_RESTORE_NOVA@ ; then
+ if [ -e /etc/nova/nova.conf ]; then
+ if grep -q '^notification_driver=messaging$' /etc/nova/nova.conf; then
+ sed -i -r 's/notification_driver=messaging/notification_driver=/g' /etc/nova/nova.conf
+ service nova-api restart
+ fi
+ else
+ echo "nova.conf file does not exist"
+ exit 1
+ fi
+fi
+exit 0
+END_TXT
+ sed -i -e "s/@NEED_TO_RESTORE_CEILOMETER@/$NEED_TO_RESTORE_CEILOMETER/" restore_conf.sh
+ sed -i -e "s/@NEED_TO_RESTORE_NOVA@/$NEED_TO_RESTORE_NOVA/" restore_conf.sh
+ chmod +x restore_conf.sh
+ for node in $CONTROLLER_IP;do
+ scp $ssh_opts_cpu restore_conf.sh "root@$node:"
+ ssh $ssh_opts_cpu "root@$node" './restore_conf.sh > restore_conf.log 2>&1 &'
+ done
+
+ echo "waiting service restart..."
+ sleep 60
+}
diff --git a/tests/lib/installers/local b/tests/lib/installers/local
new file mode 100644
index 00000000..e7aed14f
--- /dev/null
+++ b/tests/lib/installers/local
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+function get_installer_ip {
+ # Noop
+ return
+}
+
+function installer_get_ssh_keys {
+ echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
+ return
+}
+
+function installer_apply_patches {
+ # Noop
+ return
+}
+
+function cleanup_installer_local {
+ # Noop
+ return
+}
diff --git a/tests/logger.py b/tests/logger.py
new file mode 100644
index 00000000..a4f33234
--- /dev/null
+++ b/tests/logger.py
@@ -0,0 +1,47 @@
+##############################################################################
+# Copyright (c) 2016 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+# Usage:
+# import doctor_logger
+# logger = doctor_logger.Logger("script_name").getLogger()
+# logger.info("message to be shown with - INFO - ")
+# logger.debug("message to be shown with - DEBUG -")
+
+import logging
+import os
+
+
+class Logger:
+ def __init__(self, logger_name):
+
+ CI_DEBUG = os.getenv('CI_DEBUG')
+
+ self.logger = logging.getLogger(logger_name)
+ self.logger.propagate = 0
+ self.logger.setLevel(logging.DEBUG)
+
+ formatter = logging.Formatter('%(asctime)s %(filename)s %(lineno)d '
+ '%(levelname)-6s %(message)s')
+
+ ch = logging.StreamHandler()
+ ch.setFormatter(formatter)
+ if CI_DEBUG is not None and CI_DEBUG.lower() == "true":
+ ch.setLevel(logging.DEBUG)
+ else:
+ ch.setLevel(logging.INFO)
+ self.logger.addHandler(ch)
+
+ file_handler = logging.FileHandler('%s.log' % logger_name)
+ file_handler.setFormatter(formatter)
+ file_handler.setLevel(logging.DEBUG)
+ self.logger.addHandler(file_handler)
+
+
+ def getLogger(self):
+ return self.logger
+
diff --git a/tests/monitor.py b/tests/monitor.py
index caf4c321..75d82392 100644
--- a/tests/monitor.py
+++ b/tests/monitor.py
@@ -10,6 +10,7 @@
import argparse
from datetime import datetime
import json
+import logger as doctor_log
import os
import requests
import socket
@@ -26,6 +27,8 @@ ICMP_ECHO_MESSAGE = '\x08\x00\xf7\xff\x00\x00\x00\x00'
SUPPORTED_INSPECTOR_TYPES = ['sample', 'congress']
+LOG = doctor_log.Logger('doctor_monitor').getLogger()
+
class DoctorMonitorSample(object):
interval = 0.1 # second
@@ -58,8 +61,8 @@ class DoctorMonitorSample(object):
(congress_endpoint, doctor_ds['id']))
def start_loop(self):
- print "start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
- 'i': self.ip_addr}
+ LOG.debug("start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
+ 'i': self.ip_addr})
sock = socket.socket(socket.AF_INET, socket.SOCK_RAW,
socket.IPPROTO_ICMP)
sock.settimeout(self.timeout)
@@ -68,40 +71,38 @@ class DoctorMonitorSample(object):
sock.sendto(ICMP_ECHO_MESSAGE, (self.ip_addr, 0))
data = sock.recv(4096)
except socket.timeout:
- print "doctor monitor detected at %s" % time.time()
+ LOG.info("doctor monitor detected at %s" % time.time())
self.report_error()
- print "ping timeout, quit monitoring..."
+ LOG.info("ping timeout, quit monitoring...")
return
time.sleep(self.interval)
def report_error(self):
+ payload = [
+ {
+ 'id': 'monitor_sample_id1',
+ 'time': datetime.now().isoformat(),
+ 'type': self.event_type,
+ 'details': {
+ 'hostname': self.hostname,
+ 'status': 'down',
+ 'monitor': 'monitor_sample',
+ 'monitor_event_id': 'monitor_sample_event1'
+ },
+ },
+ ]
+ data = json.dumps(payload)
+
if self.inspector_type == 'sample':
- payload = {"type": self.event_type, "hostname": self.hostname}
- data = json.dumps(payload)
headers = {'content-type': 'application/json'}
requests.post(self.inspector_url, data=data, headers=headers)
elif self.inspector_type == 'congress':
- data = [
- {
- 'id': 'monitor_sample_id1',
- 'time': datetime.now().isoformat(),
- 'type': self.event_type,
- 'details': {
- 'hostname': self.hostname,
- 'status': 'down',
- 'monitor': 'monitor_sample',
- 'monitor_event_id': 'monitor_sample_event1'
- },
- },
- ]
-
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json',
'X-Auth-Token':self.session.get_token(),
}
-
- requests.put(self.inspector_url, data=json.dumps(data), headers=headers)
+ requests.put(self.inspector_url, data=data, headers=headers)
def get_args():
diff --git a/tests/profiler-poc.py b/tests/profiler-poc.py
new file mode 100644
index 00000000..71034781
--- /dev/null
+++ b/tests/profiler-poc.py
@@ -0,0 +1,87 @@
+##############################################################################
+# Copyright (c) 2016 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+"""
+PoC of performance profiler for OPNFV doctor project
+
+Usage:
+
+Export environment variables to set timestamp at each checkpoint in millisecond.
+Valid check points are: DOCTOR_PROFILER_T{00-09}
+
+See also: https://goo.gl/98Osig
+"""
+
+import json
+import os
+
+LOGFILE = 'performance-profile'
+PREFIX = 'DOCTOR_PROFILER'
+TOTAL_CHECK_POINTS = 10
+MODULE_CHECK_POINTS = ['T00', 'T01', 'T04', 'T05', 'T06', 'T09']
+TAG_FORMAT = "{:<5}"
+# Inspired by https://github.com/reorx/httpstat
+TEMPLATE = """
+Total time cost: {total}(ms)
+==============================================================================>
+ |Monitor|Inspector |Controller|Notifier|Evaluator |
+ |{M00} |{M01} |{M02} |{M03} |{M04} |
+ | | | | | | | | | |
+host down:{T00}| | | | | | | | |
+ raw failure:{T01}| | | | | | | |
+ found affected:{T02}| | | | | | |
+ set VM error:{T03}| | | | | |
+ marked host down:{T04}| | | | |
+ notified VM error:{T05} | | | |
+ transformed event:{T06}| | |
+ evaluated event:{T07}| |
+ fired alarm:{T08}|
+ received alarm:{T09}
+"""
+
+
+def main():
+ check_points = ["T{:02d}".format(i) for i in range(TOTAL_CHECK_POINTS)]
+ module_map = {"M{:02d}".format(i):
+ (MODULE_CHECK_POINTS[i], MODULE_CHECK_POINTS[i + 1])
+ for i in range(len(MODULE_CHECK_POINTS) - 1)}
+
+ # check point tags
+ elapsed_ms = {cp: os.getenv("{}_{}".format(PREFIX, cp))
+ for cp in check_points}
+
+ def format_tag(tag):
+ return TAG_FORMAT.format(tag or '?')
+
+ tags = {cp: format_tag(ms) for cp, ms in elapsed_ms.iteritems()}
+
+ def time_cost(cp):
+ if elapsed_ms[cp[0]] and elapsed_ms[cp[1]]:
+ return int(elapsed_ms[cp[1]]) - int(elapsed_ms[cp[0]])
+ else:
+ return None
+
+ # module time cost tags
+ modules_cost_ms = {module: time_cost(cp)
+ for module, cp in module_map.iteritems()}
+
+ tags.update({module: format_tag(cost)
+ for module, cost in modules_cost_ms.iteritems()})
+
+ tags.update({'total': time_cost((check_points[0], check_points[-1]))})
+
+ profile = TEMPLATE.format(**tags)
+
+ logfile = open('{}.json'.format(LOGFILE), 'w')
+ logfile.write(json.dumps(tags))
+
+ print profile
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/run.sh b/tests/run.sh
index 99e8feff..206f6a40 100755
--- a/tests/run.sh
+++ b/tests/run.sh
@@ -8,6 +8,8 @@
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
+# Configuration
+
[[ "${CI_DEBUG:-true}" == [Tt]rue ]] && set -x
IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
@@ -25,231 +27,71 @@ DOCTOR_PW=doctor
DOCTOR_PROJECT=doctor
#TODO: change back to `_member_` when JIRA DOCTOR-55 is done
DOCTOR_ROLE=admin
+PROFILER_TYPE=${PROFILER_TYPE:-none}
-SUPPORTED_INSTALLER_TYPES="apex fuel local"
-INSTALLER_TYPE=${INSTALLER_TYPE:-local}
-INSTALLER_IP=${INSTALLER_IP:-none}
-
-SUPPORTED_INSPECTOR_TYPES="sample congress"
-INSPECTOR_TYPE=${INSPECTOR_TYPE:-sample}
+TOP_DIR=$(cd $(dirname "$0") && pwd)
ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
as_doctor_user="--os-username $DOCTOR_USER --os-password $DOCTOR_PW
--os-tenant-name $DOCTOR_PROJECT"
-if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then
- echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
- exit 1
-fi
-
-if [[ ! "$SUPPORTED_INSPECTOR_TYPES" =~ "$INSPECTOR_TYPE" ]] ; then
- echo "ERROR: INSPECTOR_TYPE=$INSPECTOR_TYPE is not supported."
- exit 1
-fi
-
-get_installer_ip() {
- if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
- if [[ "$INSTALLER_IP" == "none" ]] ; then
- instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
- INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
- fi
- elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then
- if [[ "$INSTALLER_IP" == "none" ]] ; then
- instack_mac=$(sudo virsh domiflist fuel-opnfv | awk '/pxebr/{print $5}')
- INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
- fi
- fi
-
- if [[ "$INSTALLER_TYPE" != "local" ]] ; then
- if [[ -z "$INSTALLER_IP" ]] ; then
- echo "ERROR: no installer ip"
- exit 1
- fi
- fi
-}
-
-prepare_ssh_to_cloud() {
- ssh_opts_cpu="$ssh_opts"
-
- # get ssh key from installer node
- if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
- sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
- sudo chown $(whoami):$(whoami) instack_key
- chmod 400 instack_key
- ssh_opts_cpu+=" -i instack_key"
- elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then
- sshpass -p r00tme scp $ssh_opts root@${INSTALLER_IP}:.ssh/id_rsa instack_key
- sudo chown $(whoami):$(whoami) instack_key
- chmod 400 instack_key
- ssh_opts_cpu+=" -i instack_key"
- elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
- echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
- fi
-}
-prepare_test_env() {
- #TODO delete it when fuel support the configuration
- if [[ "$INSTALLER_TYPE" == "fuel" ]] ; then
- echo "modify the configuration..."
- cat > set_conf.sh << 'END_TXT'
-#!/bin/bash
-if [ -e /etc/ceilometer/event_pipeline.yaml ]; then
- if ! grep -q '^ *- notifier://?topic=alarm.all$' /etc/ceilometer/event_pipeline.yaml; then
- sed -i 's|- notifier://|- notifier://?topic=alarm.all|' /etc/ceilometer/event_pipeline.yaml
- echo "modify the ceilometer config"
- service ceilometer-agent-notification restart
- fi
-else
- echo "ceilometer event_pipeline.yaml file does not exist"
- exit 1
-fi
-if [ -e /etc/nova/nova.conf ]; then
- if ! grep -q '^notification_driver=messaging$' /etc/nova/nova.conf; then
- sed -i -r 's/notification_driver=/notification_driver=messaging/g' /etc/nova/nova.conf
- echo "modify nova config"
- service nova-api restart
- fi
-else
- echo "nova.conf file does not exist"
- exit 1
-fi
-exit 0
-END_TXT
- chmod +x set_conf.sh
- CONTROLLER_IP=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \
- "fuel node | grep controller | cut -d '|' -f 5|xargs")
- for node in $CONTROLLER_IP;do
- scp $ssh_opts_cpu set_conf.sh "root@$node:"
- ssh $ssh_opts_cpu "root@$node" './set_conf.sh > set_conf.log 2>&1 &'
- sleep 1
- scp $ssh_opts_cpu "root@$node:set_conf.log" set_conf_$node.log
- done
-
- if grep -q "modify the ceilometer config" set_conf_*.log ; then
- NEED_TO_RESTORE_CEILOMETER=true
- fi
- if grep -q "modify nova config" set_conf_*.log ; then
- NEED_TO_RESTORE_NOVA=true
- fi
-
- echo "waiting service restart..."
- sleep 60
- fi
-}
-
-restore_test_env() {
- #TODO delete it when fuel support the configuration
- if [[ "$INSTALLER_TYPE" == "fuel" ]] ; then
- if ! ($NEED_TO_RESTORE_CEILOMETER || $NEED_TO_RESTORE_NOVA) ; then
- echo "Don't need to restore config"
- exit 0
- fi
-
- echo "restore the configuration..."
- cat > restore_conf.sh << 'END_TXT'
-#!/bin/bash
-if @NEED_TO_RESTORE_CEILOMETER@ ; then
- if [ -e /etc/ceilometer/event_pipeline.yaml ]; then
- if grep -q '^ *- notifier://?topic=alarm.all$' /etc/ceilometer/event_pipeline.yaml; then
- sed -i 's|- notifier://?topic=alarm.all|- notifier://|' /etc/ceilometer/event_pipeline.yaml
- service ceilometer-agent-notification restart
- fi
- else
- echo "ceilometer event_pipeline.yaml file does not exist"
- exit 1
- fi
-fi
-if @NEED_TO_RESTORE_NOVA@ ; then
- if [ -e /etc/nova/nova.conf ]; then
- if grep -q '^notification_driver=messaging$' /etc/nova/nova.conf; then
- sed -i -r 's/notification_driver=messaging/notification_driver=/g' /etc/nova/nova.conf
- service nova-api restart
- fi
- else
- echo "nova.conf file does not exist"
- exit 1
- fi
-fi
-exit 0
-END_TXT
- sed -i -e "s/@NEED_TO_RESTORE_CEILOMETER@/$NEED_TO_RESTORE_CEILOMETER/" restore_conf.sh
- sed -i -e "s/@NEED_TO_RESTORE_NOVA@/$NEED_TO_RESTORE_NOVA/" restore_conf.sh
- chmod +x restore_conf.sh
- for node in $CONTROLLER_IP;do
- scp $ssh_opts_cpu restore_conf.sh "root@$node:"
- ssh $ssh_opts_cpu "root@$node" './restore_conf.sh > restore_conf.log 2>&1 &'
- done
-
- echo "waiting service restart..."
- sleep 60
- fi
-}
+# Functions
get_compute_host_info() {
# get computer host info which VM boot in
COMPUTE_HOST=$(openstack $as_doctor_user server show $VM_NAME |
grep "OS-EXT-SRV-ATTR:host" | awk '{ print $4 }')
compute_host_in_undercloud=${COMPUTE_HOST%%.*}
- if [[ -z "$COMPUTE_HOST" ]] ; then
- echo "ERROR: failed to get compute hostname"
- exit 1
- fi
+ die_if_not_set $LINENO COMPUTE_HOST "Failed to get compute hostname"
- if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
+ if is_installer apex; then
COMPUTE_USER=${COMPUTE_USER:-heat-admin}
COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
"source stackrc; \
nova show $compute_host_in_undercloud \
| awk '/ ctlplane network /{print \$5}'")
- elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then
+ elif is_installer fuel; then
COMPUTE_USER=${COMPUTE_USER:-root}
node_id=$(echo $compute_host_in_undercloud | cut -d "-" -f 2)
COMPUTE_IP=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \
"fuel node|awk -F '|' -v id=$node_id '{if (\$1 == id) print \$5}' |xargs")
- elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
+ elif is_installer local; then
COMPUTE_USER=${COMPUTE_USER:-$(whoami)}
COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
fi
- if [[ -z "$COMPUTE_IP" ]]; then
- echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
- exit 1
- fi
+ die_if_not_set $LINENO COMPUTE_IP "Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
echo "COMPUTE_HOST=$COMPUTE_HOST"
echo "COMPUTE_IP=$COMPUTE_IP"
# verify connectivity to target compute host
ping -c 1 "$COMPUTE_IP"
if [[ $? -ne 0 ]] ; then
- echo "ERROR: can not ping to computer host"
- exit 1
+ die $LINENO "Can not ping to computer host"
fi
# verify ssh to target compute host
ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'exit'
if [[ $? -ne 0 ]] ; then
- echo "ERROR: can not ssh to computer host"
- exit 1
+ die $LINENO "Can not ssh to computer host"
fi
}
get_consumer_ip() {
local get_consumer_command="ip route get $COMPUTE_IP | awk '/ src /{print \$NF}'"
- if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
+ if is_installer apex; then
CONSUMER_IP=$(sudo ssh $ssh_opts root@$INSTALLER_IP \
"$get_consumer_command")
- elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then
+ elif is_installer fuel; then
CONSUMER_IP=$(sudo sshpass -p r00tme ssh $ssh_opts root@${INSTALLER_IP} \
"$get_consumer_command")
- elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
+ elif is_installer local; then
CONSUMER_IP=`$get_consumer_command`
fi
echo "CONSUMER_IP=$CONSUMER_IP"
- if [[ -z "$CONSUMER_IP" ]]; then
- echo "ERROR: Could not get CONSUMER_IP."
- exit 1
- fi
+ die_if_not_set $LINENO CONSUMER_IP "Could not get CONSUMER_IP."
}
download_image() {
@@ -312,12 +154,6 @@ create_alarm() {
-q "traits.state=string::error; traits.instance_id=string::$vm_id"
}
-print_log() {
- log_file=$1
- echo "$log_file:"
- sed -e 's/^/ /' "$log_file"
-}
-
start_monitor() {
pgrep -f "python monitor.py" && return 0
sudo -E python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" "$INSPECTOR_TYPE" \
@@ -327,79 +163,6 @@ start_monitor() {
stop_monitor() {
pgrep -f "python monitor.py" || return 0
sudo kill $(pgrep -f "python monitor.py")
- print_log monitor.log
-}
-
-congress_add_rule() {
- name=$1
- policy=$2
- rule=$3
-
- if ! openstack congress policy rule list $policy | grep -q -e "// Name: $name$" ; then
- openstack congress policy rule create --name $name $policy "$rule"
- fi
-}
-
-congress_del_rule() {
- name=$1
- policy=$2
-
- if openstack congress policy rule list $policy | grep -q -e "^// Name: $name$" ; then
- openstack congress policy rule delete $policy $name
- fi
-}
-
-congress_setup_rules() {
- congress_add_rule host_down classification \
- 'host_down(host) :-
- doctor:events(hostname=host, type="compute.host.down", status="down")'
-
- congress_add_rule active_instance_in_host classification \
- 'active_instance_in_host(vmid, host) :-
- nova:servers(id=vmid, host_name=host, status="ACTIVE")'
-
- congress_add_rule host_force_down classification \
- 'execute[nova:services.force_down(host, "nova-compute", "True")] :-
- host_down(host)'
-
- congress_add_rule error_vm_states classification \
- 'execute[nova:servers.reset_state(vmid, "error")] :-
- host_down(host),
- active_instance_in_host(vmid, host)'
-}
-
-start_inspector() {
- if [[ "$INSPECTOR_TYPE" == "sample" ]] ; then
- pgrep -f "python inspector.py" && return 0
- python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
- elif [[ "$INSPECTOR_TYPE" == "congress" ]] ; then
- nova_api_min_version="2.11"
- nova_api_version=$(openstack congress datasource list | \
- grep nova | grep -Po "(?<='api_version': ')[^']*")
- [[ -z $nova_api_version ]] && nova_api_version="2.0"
- if [[ "$nova_api_version" < "$nova_api_min_version" ]]; then
- echo "ERROR: Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)"
- exit 1
- fi
- openstack congress driver list | grep -q " doctor "
- openstack congress datasource list | grep -q " doctor " || {
- openstack congress datasource create doctor doctor
- }
- congress_setup_rules
- fi
-}
-
-stop_inspector() {
- if [[ "$INSPECTOR_TYPE" == "sample" ]] ; then
- pgrep -f "python inspector.py" || return 0
- kill $(pgrep -f "python inspector.py")
- print_log inspector.log
- elif [[ "$INSPECTOR_TYPE" == "congress" ]] ; then
- congress_del_rule host_force_down classification
- congress_del_rule error_vm_states classification
- congress_del_rule active_instance_in_host classification
- congress_del_rule host_down classification
- fi
}
start_consumer() {
@@ -409,21 +172,18 @@ start_consumer() {
# NOTE(r-mibu): create tunnel to the controller nodes, so that we can
# avoid some network problems dpends on infra and installers.
# This tunnel will be terminated by stop_consumer() or after 10 mins passed.
- if [[ "$INSTALLER_TYPE" != "local" ]] ; then
- if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
+ if ! is_installer local; then
+ if is_installer apex; then
CONTROLLER_IPS=$(sudo ssh $ssh_opts $INSTALLER_IP \
"source stackrc; \
nova list | grep ' overcloud-controller-[0-9] ' \
| sed -e 's/^.*ctlplane=//' -e 's/ *|\$//'")
- elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then
+ elif is_installer fuel; then
CONTROLLER_IPS=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \
"fuel node | grep controller | cut -d '|' -f 5|xargs")
fi
- if [[ -z "$CONTROLLER_IPS" ]]; then
- echo "ERROR: Could not get CONTROLLER_IPS."
- exit 1
- fi
+ die_if_not_set $LINENO CONTROLLER_IPS "Could not get CONTROLLER_IPS."
for ip in $CONTROLLER_IPS
do
forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT"
@@ -436,16 +196,14 @@ start_consumer() {
stop_consumer() {
pgrep -f "python consumer.py" || return 0
kill $(pgrep -f "python consumer.py")
- print_log consumer.log
# NOTE(r-mibu): terminate tunnels to the controller nodes
- if [[ "$INSTALLER_TYPE" != "local" ]] ; then
+ if ! is_installer local; then
for ip in $CONTROLLER_IPS
do
forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT"
tunnel_command="sudo ssh $ssh_opts_cpu $COMPUTE_USER@$ip $forward_rule sleep 600"
kill $(pgrep -f "$tunnel_command")
- print_log "ssh_tunnel.${ip}.log"
done
fi
}
@@ -463,12 +221,13 @@ wait_for_vm_launch() {
sleep 5
return 0
fi
- [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
+ if [[ "$state" == "ERROR" ]]; then
+ die $LINENO "vm state is ERROR"
+ fi
count=$(($count+1))
sleep 1
done
- echo "ERROR: time out while waiting for vm launch"
- exit 1
+ die $LINENO "Time out while waiting for VM launch"
}
inject_failure() {
@@ -478,6 +237,7 @@ inject_failure() {
dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $7}')
sleep 1
sudo ip link set $dev down
+echo "doctor set host down at" $(date "+%s.%N")
sleep 180
sudo ip link set $dev up
sleep 1
@@ -488,13 +248,40 @@ END_TXT
ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
}
+profile_performance_poc() {
+ triggered=$(grep "^doctor set host down at" disable_network.log |\
+ sed -e "s/^.* at //")
+ vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
+ sed -e "s/^.* at //")
+ hostdown=$(grep "doctor mark host.* down at" inspector.log |\
+ sed -e "s/^.* at //")
+
+ #calculate the relative interval to triggered(T00)
+ export DOCTOR_PROFILER_T00=0
+ export DOCTOR_PROFILER_T01=$(echo "($detected-$triggered)*1000/1" |bc)
+ export DOCTOR_PROFILER_T03=$(echo "($vmdown-$triggered)*1000/1" |bc)
+ export DOCTOR_PROFILER_T04=$(echo "($hostdown-$triggered)*1000/1" |bc)
+ export DOCTOR_PROFILER_T09=$(echo "($notified-$triggered)*1000/1" |bc)
+
+ python profiler-poc.py
+}
+
calculate_notification_time() {
- detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
- notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
if ! grep -q "doctor consumer notified at" consumer.log ; then
- echo "ERROR: consumer hasn't received fault notification."
- exit 1
+ die $LINENO "Consumer hasn't received fault notification."
+ fi
+
+ #keep 'at' as the last keyword just before the value, and
+ #use regex to get value instead of the fixed column
+ detected=$(grep "doctor monitor detected at" monitor.log |\
+ sed -e "s/^.* at //")
+ notified=$(grep "doctor consumer notified at" consumer.log |\
+ sed -e "s/^.* at //")
+
+ if [[ "$PROFILER_TYPE" == "poc" ]]; then
+ profile_performance_poc
fi
+
echo "$notified $detected" | \
awk '{
d = $1 - $2;
@@ -509,14 +296,11 @@ check_host_status() {
host_status_line=$(openstack $as_doctor_user --os-compute-api-version 2.16 \
server show $VM_NAME | grep "host_status")
host_status=$(echo $host_status_line | awk '{print $4}')
- if [ -z "$host_status" ] ; then
- echo "ERROR: host_status not reported by: nova show $VM_NAME"
- exit 1
- elif [[ "$expected_state" =~ "$host_status" ]] ; then
+ die_if_not_set $LINENO host_status "host_status not reported by: nova show $VM_NAME"
+ if [[ "$expected_state" =~ "$host_status" ]] ; then
echo "$VM_NAME showing host_status: $host_status"
else
- echo "ERROR: host_status:$host_status not equal to expected_state: $expected_state"
- exit 1
+ die $LINENO "host_status:$host_status not equal to expected_state: $expected_state"
fi
}
@@ -532,7 +316,6 @@ cleanup() {
sleep 240
check_host_status "UP"
scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" .
- print_log disable_network.log
openstack $as_doctor_user server list | grep -q " $VM_NAME " && openstack $as_doctor_user server delete "$VM_NAME"
sleep 1
@@ -552,18 +335,21 @@ cleanup() {
openstack project delete "$DOCTOR_PROJECT"
openstack user delete "$DOCTOR_USER"
- restore_test_env
+ cleanup_installer
+ cleanup_inspector
}
+# Main process
echo "Note: doctor/tests/run.sh has been executed."
trap cleanup EXIT
-echo "preparing test env..."
-get_installer_ip
-prepare_ssh_to_cloud
-prepare_test_env
+source $TOP_DIR/functions-common
+source $TOP_DIR/lib/installer
+source $TOP_DIR/lib/inspector
+
+setup_installer
echo "preparing VM image..."
download_image