aboutsummaryrefslogtreecommitdiffstats
path: root/yardstick/network_services/nfvi/resource.py
diff options
context:
space:
mode:
Diffstat (limited to 'yardstick/network_services/nfvi/resource.py')
-rw-r--r--yardstick/network_services/nfvi/resource.py189
1 files changed, 129 insertions, 60 deletions
diff --git a/yardstick/network_services/nfvi/resource.py b/yardstick/network_services/nfvi/resource.py
index f0ae67616..adf4d8ae6 100644
--- a/yardstick/network_services/nfvi/resource.py
+++ b/yardstick/network_services/nfvi/resource.py
@@ -15,56 +15,99 @@
from __future__ import absolute_import
from __future__ import print_function
-import tempfile
+
import logging
+from itertools import chain
+
+import errno
+import jinja2
import os
import os.path
import re
import multiprocessing
-from collections import Sequence
+import pkg_resources
from oslo_config import cfg
+from oslo_utils.encodeutils import safe_decode
from yardstick import ssh
+from yardstick.common.task_template import finalize_for_yaml
+from yardstick.common.utils import validate_non_string_sequence
from yardstick.network_services.nfvi.collectd import AmqpConsumer
-from yardstick.network_services.utils import get_nsb_option
+
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
ZMQ_OVS_PORT = 5567
ZMQ_POLLING_TIME = 12000
-LIST_PLUGINS_ENABLED = ["amqp", "cpu", "cpufreq", "intel_rdt", "memory",
- "hugepages", "dpdkstat", "virt", "ovs_stats", "intel_pmu"]
+LIST_PLUGINS_ENABLED = ["amqp", "cpu", "cpufreq", "memory",
+ "hugepages"]
class ResourceProfile(object):
"""
This profile adds a resource at the beginning of the test session
"""
+ COLLECTD_CONF = "collectd.conf"
+ AMPQ_PORT = 5672
+ DEFAULT_INTERVAL = 25
+ DEFAULT_TIMEOUT = 3600
+ OVS_SOCKET_PATH = "/usr/local/var/run/openvswitch/db.sock"
+
+ def __init__(self, mgmt, port_names=None, plugins=None, interval=None, timeout=None):
+
+ if plugins is None:
+ self.plugins = {}
+ else:
+ self.plugins = plugins
+
+ if interval is None:
+ self.interval = self.DEFAULT_INTERVAL
+ else:
+ self.interval = interval
+
+ if timeout is None:
+ self.timeout = self.DEFAULT_TIMEOUT
+ else:
+ self.timeout = timeout
- def __init__(self, mgmt, interfaces=None, cores=None):
self.enable = True
- self.connection = None
- self.cores = cores if isinstance(cores, Sequence) else []
self._queue = multiprocessing.Queue()
self.amqp_client = None
- self.interfaces = interfaces if isinstance(interfaces, Sequence) else []
+ self.port_names = validate_non_string_sequence(port_names, default=[])
- # why the host or ip?
- self.vnfip = mgmt.get("host", mgmt["ip"])
- self.connection = ssh.SSH.from_node(mgmt, overrides={"ip": self.vnfip})
+ # we need to save mgmt so we can connect to port 5672
+ self.mgmt = mgmt
+ self.connection = ssh.AutoConnectSSH.from_node(mgmt)
- self.connection.wait()
+ @classmethod
+ def make_from_node(cls, node, timeout):
+ # node dict works as mgmt dict
+ # don't need port names, there is no way we can
+ # tell what port is used on the compute node
+ collectd_options = node["collectd"]
+ plugins = collectd_options.get("plugins", {})
+ interval = collectd_options.get("interval")
+
+ return cls(node, plugins=plugins, interval=interval, timeout=timeout)
def check_if_sa_running(self, process):
""" verify if system agent is running """
- err, pid, _ = self.connection.execute("pgrep -f %s" % process)
- return [err == 0, pid]
+ try:
+ err, pid, _ = self.connection.execute("pgrep -f %s" % process)
+ # strip whitespace
+ return err, pid.strip()
+ except OSError as e:
+ if e.errno in {errno.ECONNRESET}:
+ # if we can't connect to check, then we won't be able to connect to stop it
+ LOG.exception("can't connect to host to check collectd status")
+ return 1, None
+ raise
def run_collectd_amqp(self):
""" run amqp consumer to collect the NFVi data """
- amqp_url = 'amqp://admin:admin@{}:5672/%2F'.format(self.vnfip)
+ amqp_url = 'amqp://admin:admin@{}:{}/%2F'.format(self.mgmt['ip'], self.AMPQ_PORT)
amqp = AmqpConsumer(amqp_url, self._queue)
try:
amqp.run()
@@ -111,9 +154,9 @@ class ResourceProfile(object):
@classmethod
def parse_intel_pmu_stats(cls, key, value):
- return {''.join(key): value.split(":")[1]}
+ return {''.join(str(v) for v in key): value.split(":")[1]}
- def parse_collectd_result(self, metrics, core_list):
+ def parse_collectd_result(self, metrics):
""" convert collectd data into json"""
result = {
"cpu": {},
@@ -122,21 +165,21 @@ class ResourceProfile(object):
"dpdkstat": {},
"virt": {},
"ovs_stats": {},
- "intel_pmu": {},
}
testcase = ""
- for key, value in metrics.items():
+ # unicode decode
+ decoded = ((safe_decode(k, 'utf-8'), safe_decode(v, 'utf-8')) for k, v in metrics.items())
+ for key, value in decoded:
key_split = key.split("/")
res_key_iter = (key for key in key_split if "nsb_stats" not in key)
res_key0 = next(res_key_iter)
res_key1 = next(res_key_iter)
- if "cpu" in res_key0 or "intel_rdt" in res_key0:
+ if "cpu" in res_key0 or "intel_rdt" in res_key0 or "intel_pmu" in res_key0:
cpu_key, name, metric, testcase = \
self.get_cpu_data(res_key0, res_key1, value)
- if cpu_key in core_list:
- result["cpu"].setdefault(cpu_key, {}).update({name: metric})
+ result["cpu"].setdefault(cpu_key, {}).update({name: metric})
elif "memory" in res_key0:
result["memory"].update({res_key1: value.split(":")[0]})
@@ -153,9 +196,6 @@ class ResourceProfile(object):
elif "ovs_stats" in res_key0:
result["ovs_stats"].update(self.parse_ovs_stats(key_split, value))
- elif "intel_pmu-all" in res_key0:
- result["intel_pmu"].update(self.parse_intel_pmu_stats(res_key1, value))
-
result["timestamp"] = testcase
return result
@@ -163,8 +203,9 @@ class ResourceProfile(object):
def amqp_process_for_nfvi_kpi(self):
""" amqp collect and return nfvi kpis """
if self.amqp_client is None and self.enable:
- self.amqp_client = \
- multiprocessing.Process(target=self.run_collectd_amqp)
+ self.amqp_client = multiprocessing.Process(
+ name="AmqpClient-{}-{}".format(self.mgmt['ip'], os.getpid()),
+ target=self.run_collectd_amqp)
self.amqp_client.start()
def amqp_collect_nfvi_kpi(self):
@@ -175,37 +216,48 @@ class ResourceProfile(object):
metric = {}
while not self._queue.empty():
metric.update(self._queue.get())
- msg = self.parse_collectd_result(metric, self.cores)
+ msg = self.parse_collectd_result(metric)
return msg
- def _provide_config_file(self, bin_path, nfvi_cfg, kwargs):
- with open(os.path.join(bin_path, nfvi_cfg), 'r') as cfg:
- template = cfg.read()
- cfg, cfg_content = tempfile.mkstemp()
- with os.fdopen(cfg, "w+") as cfg:
- cfg.write(template.format(**kwargs))
- cfg_file = os.path.join(bin_path, nfvi_cfg)
- self.connection.put(cfg_content, cfg_file)
-
- def _prepare_collectd_conf(self, bin_path):
+ def _provide_config_file(self, config_file_path, nfvi_cfg, template_kwargs):
+ template = pkg_resources.resource_string("yardstick.network_services.nfvi",
+ nfvi_cfg).decode('utf-8')
+ cfg_content = jinja2.Template(template, trim_blocks=True, lstrip_blocks=True,
+ finalize=finalize_for_yaml).render(
+ **template_kwargs)
+ # cfg_content = io.StringIO(template.format(**template_kwargs))
+ cfg_file = os.path.join(config_file_path, nfvi_cfg)
+ # must write as root, so use sudo
+ self.connection.execute("cat | sudo tee {}".format(cfg_file), stdin=cfg_content)
+
+ def _prepare_collectd_conf(self, config_file_path):
""" Prepare collectd conf """
- loadplugin = "\n".join("LoadPlugin {0}".format(plugin)
- for plugin in LIST_PLUGINS_ENABLED)
-
- interfaces = "\n".join("PortName '{0[name]}'".format(interface)
- for interface in self.interfaces)
kwargs = {
- "interval": '25',
- "loadplugin": loadplugin,
- "dpdk_interface": interfaces,
+ "interval": self.interval,
+ "loadplugins": set(chain(LIST_PLUGINS_ENABLED, self.plugins.keys())),
+ # Optional fields PortName is descriptive only, use whatever is present
+ "port_names": self.port_names,
+ # "ovs_bridge_interfaces": ["br-int"],
+ "plugins": self.plugins,
}
- self._provide_config_file(bin_path, 'collectd.conf', kwargs)
+ self._provide_config_file(config_file_path, self.COLLECTD_CONF, kwargs)
+
+ def _setup_ovs_stats(self, connection):
+ try:
+ socket_path = self.plugins["ovs_stats"].get("ovs_socket_path", self.OVS_SOCKET_PATH)
+ except KeyError:
+ # ovs_stats is not a dict
+ socket_path = self.OVS_SOCKET_PATH
+ status = connection.execute("test -S {}".format(socket_path))[0]
+ if status != 0:
+ LOG.error("cannot find OVS socket %s", socket_path)
def _start_collectd(self, connection, bin_path):
- connection.execute('sudo pkill -9 collectd')
- bin_path = get_nsb_option("bin_path")
- collectd_path = os.path.join(bin_path, "collectd", "collectd")
+ LOG.debug("Starting collectd to collect NFVi stats")
+ connection.execute('sudo pkill -x -9 collectd')
+ collectd_path = os.path.join(bin_path, "collectd", "sbin", "collectd")
+ config_file_path = os.path.join(bin_path, "collectd", "etc")
exit_status = connection.execute("which %s > /dev/null 2>&1" % collectd_path)[0]
if exit_status != 0:
LOG.warning("%s is not present disabling", collectd_path)
@@ -217,8 +269,13 @@ class ResourceProfile(object):
# connection.execute("sudo %s '%s' '%s'" % (
# collectd_installer, http_proxy, https_proxy))
return
+ if "ovs_stats" in self.plugins:
+ self._setup_ovs_stats(connection)
+
LOG.debug("Starting collectd to collect NFVi stats")
- self._prepare_collectd_conf(bin_path)
+ # ensure collectd.conf.d exists to avoid error/warning
+ connection.execute("sudo mkdir -p /etc/collectd/collectd.conf.d")
+ self._prepare_collectd_conf(config_file_path)
# Reset amqp queue
LOG.debug("reset and setup amqp to collect data from collectd")
@@ -229,20 +286,27 @@ class ResourceProfile(object):
connection.execute("sudo rabbitmqctl start_app")
connection.execute("sudo service rabbitmq-server restart")
- LOG.debug("Creating amdin user for rabbitmq in order to collect data from collectd")
+ LOG.debug("Creating admin user for rabbitmq in order to collect data from collectd")
connection.execute("sudo rabbitmqctl delete_user guest")
connection.execute("sudo rabbitmqctl add_user admin admin")
connection.execute("sudo rabbitmqctl authenticate_user admin admin")
- connection.execute("sudo rabbitmqctl set_permissions -p / admin \".*\" \".*\" \".*\"")
+ connection.execute("sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'")
- LOG.debug("Start collectd service.....")
- connection.execute("sudo %s" % collectd_path)
+ LOG.debug("Start collectd service..... %s second timeout", self.timeout)
+ # intel_pmu plug requires large numbers of files open, so try to set
+ # ulimit -n to a large value
+ connection.execute("sudo bash -c 'ulimit -n 1000000 ; %s'" % collectd_path,
+ timeout=self.timeout)
LOG.debug("Done")
def initiate_systemagent(self, bin_path):
""" Start system agent for NFVi collection on host """
if self.enable:
- self._start_collectd(self.connection, bin_path)
+ try:
+ self._start_collectd(self.connection, bin_path)
+ except Exception:
+ LOG.exception("Exception during collectd start")
+ raise
def start(self):
""" start nfvi collection """
@@ -258,13 +322,18 @@ class ResourceProfile(object):
LOG.debug("Stop resource monitor...")
if self.amqp_client is not None:
+ # we proper and try to join first
+ self.amqp_client.join(3)
self.amqp_client.terminate()
+ LOG.debug("Check if %s is running", agent)
status, pid = self.check_if_sa_running(agent)
- if status == 0:
+ LOG.debug("status %s pid %s", status, pid)
+ if status != 0:
return
- self.connection.execute('sudo kill -9 %s' % pid)
- self.connection.execute('sudo pkill -9 %s' % agent)
+ if pid:
+ self.connection.execute('sudo kill -9 "%s"' % pid)
+ self.connection.execute('sudo pkill -9 "%s"' % agent)
self.connection.execute('sudo service rabbitmq-server stop')
self.connection.execute("sudo rabbitmqctl stop_app")