From 843ea84d9da3a4a20a7ad388d94ae8c861380254 Mon Sep 17 00:00:00 2001 From: Maryam Tahhan Date: Fri, 2 Dec 2016 17:32:22 +0000 Subject: src: build all dependencies and plugins Build all dependencies and plugins. Provide sample plugin configurations and update user guide. Change-Id: I5170c84529e514e12bb1bd4dc34ecdd93eb764d7 Signed-off-by: Maryam Tahhan --- docs/configguide/index.rst | 4 - docs/index.rst | 12 +- docs/requirements/02-collectd.rst | 103 +++++++++++ docs/requirements/03-dpdk.rst | 170 ++++++++++++++++++ docs/requirements/dpdk_ka.png | Bin 0 -> 100808 bytes docs/requirements/index.rst | 2 + docs/requirements/stats_and_timestamps.png | Bin 0 -> 52193 bytes docs/userguide/collectd.userguide.rst | 195 +++++---------------- docs/userguide/dpdk_ka.png | Bin 100808 -> 0 bytes docs/userguide/index.rst | 5 +- docs/userguide/keepalive.userguide.rst | 128 -------------- docs/userguide/stats_and_timestamps.png | Bin 52193 -> 0 bytes src/Makefile | 1 + src/collectd/Makefile | 27 ++- src/collectd/collectd_sample_configs/csv.conf | 19 ++ src/collectd/collectd_sample_configs/dpdkstat.conf | 24 +++ src/collectd/collectd_sample_configs/exec.conf | 20 +++ .../collectd_sample_configs/hugepages.conf | 23 +++ .../collectd_sample_configs/ovs_events.conf | 25 +++ src/collectd/collectd_sample_configs/rdt.conf | 20 +++ .../collectd_sample_configs/write_notification.sh | 19 ++ src/collectd/include_config.sh | 12 ++ src/install_build_deps.sh | 74 ++++++++ src/libpqos/Makefile | 77 ++++++++ src/package-list.mk | 5 +- 25 files changed, 669 insertions(+), 296 deletions(-) create mode 100755 docs/requirements/02-collectd.rst create mode 100644 docs/requirements/03-dpdk.rst create mode 100644 docs/requirements/dpdk_ka.png create mode 100644 docs/requirements/stats_and_timestamps.png delete mode 100644 docs/userguide/dpdk_ka.png delete mode 100644 docs/userguide/keepalive.userguide.rst delete mode 100644 docs/userguide/stats_and_timestamps.png create mode 100644 src/collectd/collectd_sample_configs/csv.conf create mode 100644 src/collectd/collectd_sample_configs/dpdkstat.conf create mode 100644 src/collectd/collectd_sample_configs/exec.conf create mode 100644 src/collectd/collectd_sample_configs/hugepages.conf create mode 100644 src/collectd/collectd_sample_configs/ovs_events.conf create mode 100644 src/collectd/collectd_sample_configs/rdt.conf create mode 100755 src/collectd/collectd_sample_configs/write_notification.sh create mode 100755 src/collectd/include_config.sh create mode 100755 src/install_build_deps.sh create mode 100644 src/libpqos/Makefile diff --git a/docs/configguide/index.rst b/docs/configguide/index.rst index d5b161f6..d97583c2 100644 --- a/docs/configguide/index.rst +++ b/docs/configguide/index.rst @@ -2,10 +2,6 @@ .. http://creativecommons.org/licenses/by/4.0 .. Copyright (c) 2016 Open Platform for NFV Project, Inc. and its contributors -================================== -Barometer Guides and Installation -================================== - .. toctree:: :numbered: :maxdepth: 3 diff --git a/docs/index.rst b/docs/index.rst index 00e13366..a6d3b939 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,9 +2,9 @@ .. http://creativecommons.org/licenses/by/4.0 .. (c) OPNFV, Intel Corporation and others. -********** +========= Barometer -********** +========= :Project: Barometer, https://wiki.opnfv.org/display/fastpath/Barometer+Home :Authors: Maryam Tahhan @@ -24,7 +24,7 @@ Table of Contents: :maxdepth: 4 :numbered: - requirements/index.rst - configguide/index.rst - userguide/index.rst - release/index.rst + ./requirements/index.rst + ./configguide/index.rst + ./userguide/index.rst + ./release/index.rst diff --git a/docs/requirements/02-collectd.rst b/docs/requirements/02-collectd.rst new file mode 100755 index 00000000..2303fadc --- /dev/null +++ b/docs/requirements/02-collectd.rst @@ -0,0 +1,103 @@ +.. This work is licensed under a Creative Commons Attribution 4.0 International License. +.. http://creativecommons.org/licenses/by/4.0 +.. (c) OPNFV, Intel Corporation and others. + +collectd +~~~~~~~~ +collectd is a daemon which collects system performance statistics periodically +and provides a variety of mechanisms to publish the collected metrics. It +supports more than 90 different input and output plugins. Input plugins retrieve +metrics and publish them to the collectd deamon, while output plugins publish +the data they receive to an end point. collectd also has infrastructure to +support thresholding and notification. + +collectd statistics and Notifications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Within collectd notifications and performance data are dispatched in the same +way. There are producer plugins (plugins that create notifications/metrics), +and consumer plugins (plugins that receive notifications/metrics and do +something with them). + +Statistics in collectd consist of a value list. A value list includes: + +* Values, can be one of: + + * Derive: used for values where a change in the value since it's last been + read is of interest. Can be used to calculate and store a rate. + + * Counter: similar to derive values, but take the possibility of a counter + wrap around into consideration. + + * Gauge: used for values that are stored as is. + + * Absolute: used for counters that are reset after reading. + +* Value length: the number of values in the data set. + +* Time: timestamp at which the value was collected. + +* Interval: interval at which to expect a new value. + +* Host: used to identify the host. + +* Plugin: used to identify the plugin. + +* Plugin instance (optional): used to group a set of values together. For e.g. + values belonging to a DPDK interface. + +* Type: unit used to measure a value. In other words used to refer to a data + set. + +* Type instance (optional): used to distinguish between values that have an + identical type. + +* meta data: an opaque data structure that enables the passing of additional + information about a value list. "Meta data in the global cache can be used to + store arbitrary information about an identifier" [7]. + +Host, plugin, plugin instance, type and type instance uniquely identify a +collectd value. + +Values lists are often accompanied by data sets that describe the values in more +detail. Data sets consist of: + +* A type: a name which uniquely identifies a data set. + +* One or more data sources (entries in a data set) which include: + + * The name of the data source. If there is only a single data source this is + set to "value". + + * The type of the data source, one of: counter, gauge, absolute or derive. + + * A min and a max value. + +Types in collectd are defined in types.db. Examples of types in types.db: + +.. code-block:: console + + bitrate value:GAUGE:0:4294967295 + counter value:COUNTER:U:U + if_octets rx:COUNTER:0:4294967295, tx:COUNTER:0:4294967295 + +In the example above if_octets has two data sources: tx and rx. + +Notifications in collectd are generic messages containing: + +* An associated severity, which can be one of OKAY, WARNING, and FAILURE. + +* A time. + +* A Message + +* A host. + +* A plugin. + +* A plugin instance (optional). + +* A type. + +* A types instance (optional). + +* Meta-data. diff --git a/docs/requirements/03-dpdk.rst b/docs/requirements/03-dpdk.rst new file mode 100644 index 00000000..ad7c8c78 --- /dev/null +++ b/docs/requirements/03-dpdk.rst @@ -0,0 +1,170 @@ +.. This work is licensed under a Creative Commons Attribution 4.0 International License. +.. http://creativecommons.org/licenses/by/4.0 +.. (c) OPNFV, Intel Corporation and others. + +DPDK Enhancements +================== +This section will discuss the Barometer features that were integrated with DPDK. + +Measuring Telco Traffic and Performance KPIs +-------------------------------------------- +This section will discuss the Barometer features that enable Measuring Telco Traffic +and Performance KPIs. + +.. Figure:: stats_and_timestamps.png + + Measuring Telco Traffic and Performance KPIs + +* The very first thing Barometer enabled was a call-back API in DPDK and an + associated application that used the API to demonstrate how to timestamp + packets and measure packet latency in DPDK (the sample app is called + rxtx_callbacks). This was upstreamed to DPDK 2.0 and is represented by + the interfaces 1 and 2 in Figure 1.2. + +* The second thing Barometer implemented in DPDK is the extended NIC statistics API, + which exposes NIC stats including error stats to the DPDK user by reading the + registers on the NIC. This is represented by interface 3 in Figure 1.2. + + * For DPDK 2.1 this API was only implemented for the ixgbe (10Gb) NIC driver, + in association with a sample application that runs as a DPDK secondary + process and retrieves the extended NIC stats. + + * For DPDK 2.2 the API was implemented for igb, i40e and all the Virtual + Functions (VFs) for all drivers. + + * For DPDK 16.07 the API migrated from using string value pairs to using id + value pairs, improving the overall performance of the API. + +Monitoring DPDK interfaces +-------------------------- +With the features Barometer enabled in DPDK to enable measuring Telco traffic and +performance KPIs, we can now retrieve NIC statistics including error stats and +relay them to a DPDK user. The next step is to enable monitoring of the DPDK +interfaces based on the stats that we are retrieving from the NICs, by relaying +the information to a higher level Fault Management entity. To enable this Barometer +has been enabling a number of plugins for collectd. + +DPDK Keep Alive description +--------------------------- +SFQM aims to enable fault detection within DPDK, the very first feature to +meet this goal is the DPDK Keep Alive Sample app that is part of DPDK 2.2. + +DPDK Keep Alive or KA is a sample application that acts as a heartbeat/watchdog +for DPDK packet processing cores, to detect application thread failure. The +application supports the detection of ‘failed’ DPDK cores and notification to a +HA/SA middleware. The purpose is to detect Packet Processing Core fails (e.g. +infinite loop) and ensure the failure of the core does not result in a fault +that is not detectable by a management entity. + +.. Figure:: dpdk_ka.png + + DPDK Keep Alive Sample Application + +Essentially the app demonstrates how to detect 'silent outages' on DPDK packet +processing cores. The application can be decomposed into two specific parts: +detection and notification. + +* The detection period is programmable/configurable but defaults to 5ms if no + timeout is specified. +* The Notification support is enabled by simply having a hook function that where this + can be 'call back support' for a fault management application with a compliant + heartbeat mechanism. + +DPDK Keep Alive Sample App Internals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This section provides some explanation of the The Keep-Alive/'Liveliness' +conceptual scheme as well as the DPDK Keep Alive App. The initialization and +run-time paths are very similar to those of the L2 forwarding application (see +`L2 Forwarding Sample Application (in Real and Virtualized Environments)`_ for more +information). + +There are two types of cores: a Keep Alive Monitor Agent Core (master DPDK core) +and Worker cores (Tx/Rx/Forwarding cores). The Keep Alive Monitor Agent Core +will supervise worker cores and report any failure (2 successive missed pings). +The Keep-Alive/'Liveliness' conceptual scheme is: + +* DPDK worker cores mark their liveliness as they forward traffic. +* A Keep Alive Monitor Agent Core runs a function every N Milliseconds to + inspect worker core liveliness. +* If keep-alive agent detects time-outs, it notifies the fault management + entity through a call-back function. + +**Note:** Only the worker cores state is monitored. There is no mechanism or agent +to monitor the Keep Alive Monitor Agent Core. + +DPDK Keep Alive Sample App Code Internals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The following section provides some explanation of the code aspects that are +specific to the Keep Alive sample application. + +The heartbeat functionality is initialized with a struct rte_heartbeat and the +callback function to invoke in the case of a timeout. + +.. code:: c + + rte_global_keepalive_info = rte_keepalive_create(&dead_core, NULL); + if (rte_global_hbeat_info == NULL) + rte_exit(EXIT_FAILURE, "keepalive_create() failed"); + +The function that issues the pings hbeat_dispatch_pings() is configured to run +every check_period milliseconds. + +.. code:: c + + if (rte_timer_reset(&hb_timer, + (check_period * rte_get_timer_hz()) / 1000, + PERIODICAL, + rte_lcore_id(), + &hbeat_dispatch_pings, rte_global_keepalive_info + ) != 0 ) + rte_exit(EXIT_FAILURE, "Keepalive setup failure.\n"); + +The rest of the initialization and run-time path follows the same paths as the +the L2 forwarding application. The only addition to the main processing loop is +the mark alive functionality and the example random failures. + +.. code:: c + + rte_keepalive_mark_alive(&rte_global_hbeat_info); + cur_tsc = rte_rdtsc(); + + /* Die randomly within 7 secs for demo purposes.. */ + if (cur_tsc - tsc_initial > tsc_lifetime) + break; + +The rte_keepalive_mark_alive() function simply sets the core state to alive. + +.. code:: c + + static inline void + rte_keepalive_mark_alive(struct rte_heartbeat *keepcfg) + { + keepcfg->state_flags[rte_lcore_id()] = 1; + } + +Keep Alive Monitor Agent Core Monitoring Options +The application can run on either a host or a guest. As such there are a number +of options for monitoring the Keep Alive Monitor Agent Core through a Local +Agent on the compute node: + + ====================== ========== ============= + Application Location DPDK KA LOCAL AGENT + ====================== ========== ============= + HOST X HOST/GUEST + GUEST X HOST/GUEST + ====================== ========== ============= + + +For the first implementation of a Local Agent SFQM will enable: + + ====================== ========== ============= + Application Location DPDK KA LOCAL AGENT + ====================== ========== ============= + HOST X HOST + ====================== ========== ============= + +Through extending the dpdkstat plugin for collectd with KA functionality, and +integrating the extended plugin with Monasca for high performing, resilient, +and scalable fault detection. + +.. _L2 Forwarding Sample Application (in Real and Virtualized Environments): http://dpdk.org/doc/guides/sample_app_ug/l2_forward_real_virtual.html diff --git a/docs/requirements/dpdk_ka.png b/docs/requirements/dpdk_ka.png new file mode 100644 index 00000000..4a45e10c Binary files /dev/null and b/docs/requirements/dpdk_ka.png differ diff --git a/docs/requirements/index.rst b/docs/requirements/index.rst index a9be153d..b36f4513 100644 --- a/docs/requirements/index.rst +++ b/docs/requirements/index.rst @@ -7,3 +7,5 @@ :numbered: 01-intro.rst + 02-collectd.rst + 03-dpdk.rst diff --git a/docs/requirements/stats_and_timestamps.png b/docs/requirements/stats_and_timestamps.png new file mode 100644 index 00000000..84aef726 Binary files /dev/null and b/docs/requirements/stats_and_timestamps.png differ diff --git a/docs/userguide/collectd.userguide.rst b/docs/userguide/collectd.userguide.rst index 5151befc..8bf666e2 100644 --- a/docs/userguide/collectd.userguide.rst +++ b/docs/userguide/collectd.userguide.rst @@ -2,153 +2,8 @@ .. http://creativecommons.org/licenses/by/4.0 .. (c) OPNFV, Intel Corporation and others. -collectd plugins description -============================ -The SFQM collectd plugins enable the ability to monitor DPDK interfaces by -exposing stats and the relevant events to higher level telemetry and fault -management applications. The following sections will discuss the SFQM features -in detail. - -Measuring Telco Traffic and Performance KPIs --------------------------------------------- -This section will discuss the SFQM features that enable Measuring Telco Traffic -and Performance KPIs. - -.. Figure:: stats_and_timestamps.png - - Measuring Telco Traffic and Performance KPIs - -* The very first thing SFQM enabled was a call-back API in DPDK and an - associated application that used the API to demonstrate how to timestamp - packets and measure packet latency in DPDK (the sample app is called - rxtx_callbacks). This was upstreamed to DPDK 2.0 and is represented by - the interfaces 1 and 2 in Figure 1.2. - -* The second thing SFQM implemented in DPDK is the extended NIC statistics API, - which exposes NIC stats including error stats to the DPDK user by reading the - registers on the NIC. This is represented by interface 3 in Figure 1.2. - - * For DPDK 2.1 this API was only implemented for the ixgbe (10Gb) NIC driver, - in association with a sample application that runs as a DPDK secondary - process and retrieves the extended NIC stats. - - * For DPDK 2.2 the API was implemented for igb, i40e and all the Virtual - Functions (VFs) for all drivers. - - * For DPDK 16.07 the API migrated from using string value pairs to using id - value pairs, improving the overall performance of the API. - -Monitoring DPDK interfaces --------------------------- -With the features SFQM enabled in DPDK to enable measuring Telco traffic and -performance KPIs, we can now retrieve NIC statistics including error stats and -relay them to a DPDK user. The next step is to enable monitoring of the DPDK -interfaces based on the stats that we are retrieving from the NICs, by relaying -the information to a higher level Fault Management entity. To enable this SFQM -has been enabling a number of plugins for collectd. - -collectd -~~~~~~~~ -collectd is a daemon which collects system performance statistics periodically -and provides a variety of mechanisms to publish the collected metrics. It -supports more than 90 different input and output plugins. Input plugins retrieve -metrics and publish them to the collectd deamon, while output plugins publish -the data they receive to an end point. collectd also has infrastructure to -support thresholding and notification. - -collectd statistics and Notifications -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Within collectd notifications and performance data are dispatched in the same -way. There are producer plugins (plugins that create notifications/metrics), -and consumer plugins (plugins that receive notifications/metrics and do -something with them). - -Statistics in collectd consist of a value list. A value list includes: - -* Values, can be one of: - - * Derive: used for values where a change in the value since it's last been - read is of interest. Can be used to calculate and store a rate. - - * Counter: similar to derive values, but take the possibility of a counter - wrap around into consideration. - - * Gauge: used for values that are stored as is. - - * Absolute: used for counters that are reset after reading. - -* Value length: the number of values in the data set. - -* Time: timestamp at which the value was collected. - -* Interval: interval at which to expect a new value. - -* Host: used to identify the host. - -* Plugin: used to identify the plugin. - -* Plugin instance (optional): used to group a set of values together. For e.g. - values belonging to a DPDK interface. - -* Type: unit used to measure a value. In other words used to refer to a data - set. - -* Type instance (optional): used to distinguish between values that have an - identical type. - -* meta data: an opaque data structure that enables the passing of additional - information about a value list. "Meta data in the global cache can be used to - store arbitrary information about an identifier" [7]. - -Host, plugin, plugin instance, type and type instance uniquely identify a -collectd value. - -Values lists are often accompanied by data sets that describe the values in more -detail. Data sets consist of: - -* A type: a name which uniquely identifies a data set. - -* One or more data sources (entries in a data set) which include: - - * The name of the data source. If there is only a single data source this is - set to "value". - - * The type of the data source, one of: counter, gauge, absolute or derive. - - * A min and a max value. - -Types in collectd are defined in types.db. Examples of types in types.db: - -.. code-block:: console - - bitrate value:GAUGE:0:4294967295 - counter value:COUNTER:U:U - if_octets rx:COUNTER:0:4294967295, tx:COUNTER:0:4294967295 - -In the example above if_octets has two data sources: tx and rx. - -Notifications in collectd are generic messages containing: - -* An associated severity, which can be one of OKAY, WARNING, and FAILURE. - -* A time. - -* A Message - -* A host. - -* A plugin. - -* A plugin instance (optional). - -* A type. - -* A types instance (optional). - -* Meta-data. - collectd plugins ----------------- +================= Barometer has enabled the following collectd plugins: * dpdkstat plugin: A read plugin that retrieve stats from the DPDK extended @@ -165,12 +20,14 @@ Barometer has enabled the following collectd plugins: * RDT plugin: A read plugin that provides the last level cache utilitzation and memory bandwidth utilization +* Open vSwitch events Plugin: A read plugin that retrieves events from OVS. + All the plugins above are available on the collectd master, except for the ceilometer plugin as it's a python based plugin and only C plugins are accepted by the collectd community. The ceilometer plugin lives in the OpenStack repositories. -Other plugins in progress: +Other plugins under development or existing as a pull request into collectd master: * dpdkevents: A read plugin that retrieves DPDK link status and DPDK forwarding cores liveliness status (DPDK Keep Alive). @@ -178,8 +35,6 @@ Other plugins in progress: * Open vSwitch stats Plugin: A read plugin that retrieve flow and interface stats from OVS. -* Open vSwitch events Plugin: A read plugin that retrieves events from OVS. - * mcelog plugin: A read plugin that uses mcelog client protocol to check for memory Machine Check Exceptions and sends the stats for reported exceptions. @@ -190,6 +45,44 @@ Other plugins in progress: * Legacy/IPMI: A read plugin that will report platform thermals, voltages, fanspeed.... +Building collectd with the Barometer plugins and installing the dependencies +============================================================================= +The plugins that have been merged to master can all be built and configured through +the barometer repository. + +**Note**: sudo permissions are required to install collectd. + +**Note**: These are instructions for Ubuntu 16.04. + +To build and install these dependencies, clone the barometer repo: + +.. code:: c + + $ git clone https://gerrit.opnfv.org/gerrit/barometer + +Install the build dependencies + +.. code:: bash + + $ ./src/install_build_deps.sh + +To install collectd as a service and install all it's dependencies: + +.. code:: bash + + $ cd barometer/src && sudo make && sudo make install + +This will install collectd as a service and the base install directory +is /opt/collectd. + +Sample configuration files can be found in '/opt/collectd/etc/collectd.conf.d' + +Please note if you are using any Open vSwitch plugins you need to run: + +.. code:: bash + + $ sudo ovs-vsctl set-manager ptcp:6640 + Monitoring Interfaces and Openstack Support ------------------------------------------- .. Figure:: monitoring_interfaces.png @@ -201,7 +94,7 @@ node, sending and receiving traffic. collectd is also running on this compute node retrieving the stats periodically from DPDK through the dpdkstat plugin and publishing the retrieved stats to Ceilometer through the ceilometer plugin. -To see this demo in action please checkout: `SFQM OPNFV Summit demo`_ +To see this demo in action please checkout: `Barometer OPNFV Summit demo`_ References ---------- @@ -213,5 +106,5 @@ References [6] https://collectd.org/wiki/index.php/Data_source [7] https://collectd.org/wiki/index.php/Meta_Data_Interface -.. _SFQM OPNFV Summit demo: https://prezi.com/kjv6o8ixs6se/software-fastpath-service-quality-metrics-demo/ +.. _Barometer OPNFV Summit demo: https://prezi.com/kjv6o8ixs6se/software-fastpath-service-quality-metrics-demo/ .. _ceilometer plugin: https://github.com/openstack/collectd-ceilometer-plugin/tree/stable/mitaka diff --git a/docs/userguide/dpdk_ka.png b/docs/userguide/dpdk_ka.png deleted file mode 100644 index 4a45e10c..00000000 Binary files a/docs/userguide/dpdk_ka.png and /dev/null differ diff --git a/docs/userguide/index.rst b/docs/userguide/index.rst index ae459d0a..5f019181 100644 --- a/docs/userguide/index.rst +++ b/docs/userguide/index.rst @@ -1,6 +1,6 @@ .. This work is licensed under a Creative Commons Attribution 4.0 International License. - .. http://creativecommons.org/licenses/by/4.0 - .. (c) +.. http://creativecommons.org/licenses/by/4.0 +.. (c) Intel Corporation and OPNFV ==================== Barometer user guide @@ -16,4 +16,3 @@ Barometer user guide :maxdepth: 3 collectd.userguide.rst - keepalive.userguide.rst diff --git a/docs/userguide/keepalive.userguide.rst b/docs/userguide/keepalive.userguide.rst deleted file mode 100644 index 4b6e990d..00000000 --- a/docs/userguide/keepalive.userguide.rst +++ /dev/null @@ -1,128 +0,0 @@ -.. This work is licensed under a Creative Commons Attribution 4.0 International License. -.. http://creativecommons.org/licenses/by/4.0 -.. (c) OPNFV, Intel Corporation and others. - -DPDK Keep Alive description -=========================== -SFQM aims to enable fault detection within DPDK, the very first feature to -meet this goal is the DPDK Keep Alive Sample app that is part of DPDK 2.2. - -DPDK Keep Alive or KA is a sample application that acts as a heartbeat/watchdog -for DPDK packet processing cores, to detect application thread failure. The -application supports the detection of ‘failed’ DPDK cores and notification to a -HA/SA middleware. The purpose is to detect Packet Processing Core fails (e.g. -infinite loop) and ensure the failure of the core does not result in a fault -that is not detectable by a management entity. - -.. Figure:: dpdk_ka.png - - DPDK Keep Alive Sample Application - -Essentially the app demonstrates how to detect 'silent outages' on DPDK packet -processing cores. The application can be decomposed into two specific parts: -detection and notification. - -* The detection period is programmable/configurable but defaults to 5ms if no - timeout is specified. -* The Notification support is enabled by simply having a hook function that where this - can be 'call back support' for a fault management application with a compliant - heartbeat mechanism. - -DPDK Keep Alive Sample App Internals ------------------------------------- -This section provides some explanation of the The Keep-Alive/'Liveliness' -conceptual scheme as well as the DPDK Keep Alive App. The initialization and -run-time paths are very similar to those of the L2 forwarding application (see -`L2 Forwarding Sample Application (in Real and Virtualized Environments)`_ for more -information). - -There are two types of cores: a Keep Alive Monitor Agent Core (master DPDK core) -and Worker cores (Tx/Rx/Forwarding cores). The Keep Alive Monitor Agent Core -will supervise worker cores and report any failure (2 successive missed pings). -The Keep-Alive/'Liveliness' conceptual scheme is: - -* DPDK worker cores mark their liveliness as they forward traffic. -* A Keep Alive Monitor Agent Core runs a function every N Milliseconds to - inspect worker core liveliness. -* If keep-alive agent detects time-outs, it notifies the fault management - entity through a call-back function. - -**Note:** Only the worker cores state is monitored. There is no mechanism or agent -to monitor the Keep Alive Monitor Agent Core. - -DPDK Keep Alive Sample App Code Internals ------------------------------------------ -The following section provides some explanation of the code aspects that are -specific to the Keep Alive sample application. - -The heartbeat functionality is initialized with a struct rte_heartbeat and the -callback function to invoke in the case of a timeout. - -.. code:: c - - rte_global_keepalive_info = rte_keepalive_create(&dead_core, NULL); - if (rte_global_hbeat_info == NULL) - rte_exit(EXIT_FAILURE, "keepalive_create() failed"); - -The function that issues the pings hbeat_dispatch_pings() is configured to run -every check_period milliseconds. - -.. code:: c - - if (rte_timer_reset(&hb_timer, - (check_period * rte_get_timer_hz()) / 1000, - PERIODICAL, - rte_lcore_id(), - &hbeat_dispatch_pings, rte_global_keepalive_info - ) != 0 ) - rte_exit(EXIT_FAILURE, "Keepalive setup failure.\n"); - -The rest of the initialization and run-time path follows the same paths as the -the L2 forwarding application. The only addition to the main processing loop is -the mark alive functionality and the example random failures. - -.. code:: c - - rte_keepalive_mark_alive(&rte_global_hbeat_info); - cur_tsc = rte_rdtsc(); - - /* Die randomly within 7 secs for demo purposes.. */ - if (cur_tsc - tsc_initial > tsc_lifetime) - break; - -The rte_keepalive_mark_alive() function simply sets the core state to alive. - -.. code:: c - - static inline void - rte_keepalive_mark_alive(struct rte_heartbeat *keepcfg) - { - keepcfg->state_flags[rte_lcore_id()] = 1; - } - -Keep Alive Monitor Agent Core Monitoring Options -The application can run on either a host or a guest. As such there are a number -of options for monitoring the Keep Alive Monitor Agent Core through a Local -Agent on the compute node: - - ====================== ========== ============= - Application Location DPDK KA LOCAL AGENT - ====================== ========== ============= - HOST X HOST/GUEST - GUEST X HOST/GUEST - ====================== ========== ============= - - -For the first implementation of a Local Agent SFQM will enable: - - ====================== ========== ============= - Application Location DPDK KA LOCAL AGENT - ====================== ========== ============= - HOST X HOST - ====================== ========== ============= - -Through extending the dpdkstat plugin for collectd with KA functionality, and -integrating the extended plugin with Monasca for high performing, resilient, -and scalable fault detection. - -.. _L2 Forwarding Sample Application (in Real and Virtualized Environments): http://dpdk.org/doc/guides/sample_app_ug/l2_forward_real_virtual.html diff --git a/docs/userguide/stats_and_timestamps.png b/docs/userguide/stats_and_timestamps.png deleted file mode 100644 index 84aef726..00000000 Binary files a/docs/userguide/stats_and_timestamps.png and /dev/null differ diff --git a/src/Makefile b/src/Makefile index 73e3508a..48d79e9a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -35,6 +35,7 @@ export WITH_DPDK include mk/master.mk SUBDIRS = SUBDIRS += dpdk +SUBDIRS += libpqos SUBDIRS += collectd SUBDIRS += collectd-ceilometer-plugin diff --git a/src/collectd/Makefile b/src/collectd/Makefile index 1cff9364..0e16cfac 100644 --- a/src/collectd/Makefile +++ b/src/collectd/Makefile @@ -24,6 +24,7 @@ include ../mk/master.mk include ../package-list.mk WORK_DIR = collectd +SAMPLE_CONFIG = $(CURDIR)/collectd_sample_configs TAG_DONE_FLAG = $(WORK_DIR)/.$(COLLECTD_TAG).done BUILD_CMD = ./build.sh CONFIG_CMD = @@ -35,14 +36,25 @@ ifeq ($(DPDK_DIR),) DPDK_DIR = /usr endif +LIBPQOS_DIR = $(shell echo $$LIBPQOS_DIR) +ifeq ($(LIBPQOS_DIR),) +LIBPQOS_DIR = /usr +endif + + CONFIG_CMD += --enable-syslog CONFIG_CMD += --enable-logfile CONFIG_CMD += --with-libdpdk=$(DPDK_DIR) +CONFIG_CMD += --with-libpqos=$(LIBPQOS_DIR) +CONFIG_CMD += --with-hugepages +CONFIG_CMD += --enable-exec +CONFIG_CMD += --enable-python +CONFIG_CMD += --enable-threshold .PHONY: install force_install config force_make # install depends on make -force_install: force_make +#force_install: force_make all: force_make @echo "Finished making $(WORK_DIR) " @@ -56,11 +68,20 @@ INSTALL_TARGET = force_install force_make force_make: $(WORK_DIR)/Makefile $(AT)cd $(WORK_DIR) && git pull $(COLLECTD_URL) $(COLLECTD_TAG) @echo "git pull done" - $(AT)$(MAKE) -C $(WORK_DIR) $(MORE_MAKE_FLAGS) + $(AT) $(MAKE) -C $(WORK_DIR) $(MORE_MAKE_FLAGS) @echo "Make done" force_install: - $(AT)sudo $(MAKE) -C $(WORK_DIR) install + $(AT)$(MAKE) -C $(WORK_DIR) install + $(AT)cp $(WORK_DIR)/contrib/systemd.collectd.service /etc/systemd/system/ + $(AT)mv /etc/systemd/system/systemd.collectd.service /etc/systemd/system/collectd.service + $(AT)chmod +x /etc/systemd/system/collectd.service + $(AT)sed -i -e 's/ExecStart=\/usr\/sbin\/collectd/ExecStart=\/opt\/collectd\/sbin\/collectd/g' /etc/systemd/system/collectd.service + $(AT)sed -i -e 's/CapabilityBoundingSet=/CapabilityBoundingSet=CAP_SETUID CAP_SETGID/g' /etc/systemd/system/collectd.service + $(AT)systemctl daemon-reload + $(AT)sudo mkdir -p /opt/collectd/etc/collectd.conf.d + $(AT)sudo cp $(SAMPLE_CONFIG)/* /opt/collectd/etc/collectd.conf.d + $(AT)sudo ./include_config.sh install: $(INSTALL_TARGET) diff --git a/src/collectd/collectd_sample_configs/csv.conf b/src/collectd/collectd_sample_configs/csv.conf new file mode 100644 index 00000000..e2350039 --- /dev/null +++ b/src/collectd/collectd_sample_configs/csv.conf @@ -0,0 +1,19 @@ +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LoadPlugin csv + + DataDir "/tmp/collectd/csv" + StoreRates false + diff --git a/src/collectd/collectd_sample_configs/dpdkstat.conf b/src/collectd/collectd_sample_configs/dpdkstat.conf new file mode 100644 index 00000000..201f9d37 --- /dev/null +++ b/src/collectd/collectd_sample_configs/dpdkstat.conf @@ -0,0 +1,24 @@ +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +LoadPlugin dpdkstat + + +# Coremask "0xf" +# ProcessType "secondary" +# FilePrefix "rte" + EnabledPortMask 0xffff +# PortName "interface1" +# PortName "interface2" + + diff --git a/src/collectd/collectd_sample_configs/exec.conf b/src/collectd/collectd_sample_configs/exec.conf new file mode 100644 index 00000000..5b11fd94 --- /dev/null +++ b/src/collectd/collectd_sample_configs/exec.conf @@ -0,0 +1,20 @@ +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +LoadPlugin exec + +# Exec "user:group" "/path/to/exec" + NotificationExec "" "write_notification.sh" + diff --git a/src/collectd/collectd_sample_configs/hugepages.conf b/src/collectd/collectd_sample_configs/hugepages.conf new file mode 100644 index 00000000..4e4d9152 --- /dev/null +++ b/src/collectd/collectd_sample_configs/hugepages.conf @@ -0,0 +1,23 @@ +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +LoadPlugin hugepages + + + ReportPerNodeHP true + ReportRootHP true + ValuesPages true + ValuesBytes false + ValuesPercentage false + + diff --git a/src/collectd/collectd_sample_configs/ovs_events.conf b/src/collectd/collectd_sample_configs/ovs_events.conf new file mode 100644 index 00000000..a52842ca --- /dev/null +++ b/src/collectd/collectd_sample_configs/ovs_events.conf @@ -0,0 +1,25 @@ +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + Interval 1 + + + +# Port "6640" +# Address "127.0.0.1" +# Socket "/var/run/openvswitch/db.sock" +# Interfaces "br0" "veth0" + SendNotification true + diff --git a/src/collectd/collectd_sample_configs/rdt.conf b/src/collectd/collectd_sample_configs/rdt.conf new file mode 100644 index 00000000..4ff0a77a --- /dev/null +++ b/src/collectd/collectd_sample_configs/rdt.conf @@ -0,0 +1,20 @@ +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + Interval 1 + + + + Cores "" + diff --git a/src/collectd/collectd_sample_configs/write_notification.sh b/src/collectd/collectd_sample_configs/write_notification.sh new file mode 100755 index 00000000..28198f9d --- /dev/null +++ b/src/collectd/collectd_sample_configs/write_notification.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +rm -f /tmp/notifications +while read x y +do + echo $x$y >> /tmp/notifications +done diff --git a/src/collectd/include_config.sh b/src/collectd/include_config.sh new file mode 100755 index 00000000..95b052f3 --- /dev/null +++ b/src/collectd/include_config.sh @@ -0,0 +1,12 @@ +#!/bin/bash +COLLECTD_CONF_FILE=/opt/collectd/etc/collectd.conf +INCLUDE_CONF="" + +function write_include { + echo $INCLUDE_CONF | sudo tee -a $COLLECTD_CONF_FILE; + echo " Filter \"*.conf\"" | sudo tee -a $COLLECTD_CONF_FILE; + echo -e "" | sudo tee -a /opt/collectd/etc/collectd.conf +} + +grep -qe '' $COLLECTD_CONF_FILE; [ $? -ne 0 ] && write_include + diff --git a/src/install_build_deps.sh b/src/install_build_deps.sh new file mode 100755 index 00000000..7c6a89a2 --- /dev/null +++ b/src/install_build_deps.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -eux +sudo apt-get -y install build-essential dh-autoreconf fakeroot devscripts dpkg-dev git wget + +sudo apt-get -y install \ + debhelper dpkg-dev po-debconf dh-systemd dh-strip-nondeterminism \ + bison flex autotools-dev libltdl-dev pkg-config \ + iptables-dev \ + javahelper \ + libatasmart-dev \ + libcap-dev \ + libcurl4-gnutls-dev \ + libdbi0-dev \ + libesmtp-dev \ + libganglia1-dev \ + libgcrypt11-dev \ + libglib2.0-dev \ + libgps-dev \ + libhiredis-dev \ + libi2c-dev \ + libldap2-dev \ + liblua5.3-dev \ + liblvm2-dev \ + libmemcached-dev \ + libmodbus-dev \ + libmnl-dev \ + libmosquitto-dev \ + libmysqlclient-dev \ + libnotify-dev \ + libopenipmi-dev \ + liboping-dev \ + libow-dev \ + libpcap0.8-dev \ + libpcap-dev\ + libperl-dev \ + libpq-dev \ + libprotobuf-c-dev \ + libriemann-client-dev \ + librdkafka-dev \ + librabbitmq-dev \ + librrd-dev \ + libsensors4-dev \ + libsigrok-dev \ + libsnmp-dev \ + libsnmp9-dev \ + perl \ + libtokyocabinet-dev \ + libtokyotyrant-dev \ + libudev-dev \ + libupsclient-dev \ + libvarnishapi-dev \ + libvirt-dev \ + libxen-dev \ + libxml2-dev \ + libyajl-dev \ + linux-libc-dev \ + default-jdk \ + protobuf-c-compiler \ + python-dev \ + openvswitch-switch + diff --git a/src/libpqos/Makefile b/src/libpqos/Makefile new file mode 100644 index 00000000..14bb7882 --- /dev/null +++ b/src/libpqos/Makefile @@ -0,0 +1,77 @@ +# makefile to manage collectd package +# + +# Copyright 2016 OPNFV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Contributors: +# Aihua Li, Huawei Technologies. +# Maryam Tahhan, Intel Corporation. + +include ../mk/master.mk +include ../package-list.mk + +WORK_DIR = intel-cmt-cat +TAG_DONE_FLAG = $(WORK_DIR)/.$(LIBPQOS_TAG).done +BUILD_CMD = + +.PHONY: install force_install force_make + +# install depends on make +force_install: force_make + +all: force_make + @echo "Finished making $(WORK_DIR) " + +INSTALL_TARGET = force_install force_make + +force_make: $(WORK_DIR) + $(AT)cd $(WORK_DIR) && git pull $(LIBPQOS_URL) $(LIBPQOS_TAG) + @echo "git pull done" + $(AT)$(MAKE) -C $(WORK_DIR) $(MORE_MAKE_FLAGS) + @echo "Make done" + +force_install: + $(AT)sudo $(MAKE) -C $(WORK_DIR) install PREFIX=/usr + $(AT)sudo modprobe msr + +install: $(INSTALL_TARGET) + +# hard way to clean and clobber +clean: + $(AT)cd $(WORK_DIR) && git clean -xfd *.o +clobber: + $(AT)rm -rf $(WORK_DIR) + +# distclean is for developer who would like to keep the +# clone git repo, saving time to fetch again from url +distclean: + $(AT)cd $(WORK_DIR) && git clean -xfd && git checkout -f + +test: + @echo "Make test in $(WORK_DIR) (stub) " + +sanity: + @echo "Make sanity in $(WORK_DIR) (stub) " + +$(WORK_DIR): + $(AT)git clone $(LIBPQOS_URL) + +$(TAG_DONE_FLAG): $(WORK_DIR) + $(AT)cd $(WORK_DIR); git checkout $(LIBPQOS_TAG) +ifneq ($(PATCH_FILE),) + $(AT)cd $(WORK_DIR); patch -p1 < ../$(PATCH_FILE) +endif + $(AT)touch $@ diff --git a/src/package-list.mk b/src/package-list.mk index a64489da..3ec60210 100644 --- a/src/package-list.mk +++ b/src/package-list.mk @@ -6,7 +6,10 @@ # dpdk section # DPDK_URL ?= git://dpdk.org/dpdk DPDK_URL ?= http://dpdk.org/git/dpdk -DPDK_TAG ?= v16.04 +DPDK_TAG ?= v16.07 + +LIBPQOS_URL ?= https://github.com/01org/intel-cmt-cat.git +LIBPQOS_TAG ?= master # collectd section COLLECTD_URL ?= https://github.com/collectd/collectd -- cgit 1.2.3-korg