aboutsummaryrefslogtreecommitdiffstats
path: root/yardstick/network_services/vnf_generic
diff options
context:
space:
mode:
authorRoss Brattain <ross.b.brattain@intel.com>2017-09-28 00:10:43 -0700
committerMaciej Skrocki <maciej.skrocki@intel.com>2017-10-03 21:54:33 +0000
commite228c2a3ac5b0173792fa7b11f9540ecec3a0029 (patch)
tree4d4b8c940c90bc5f6827475eaa45a131c6a3d246 /yardstick/network_services/vnf_generic
parente8cf6a76c346806b53fa0c802374327ddc4956d1 (diff)
NSB PROX test hang fixes
The PROX tests were hanging in the duration runner. These are fixes for various errors: raise error in collect_kpi if VNF is down move prox dpdk_rebind after collectd stop fix dpdk nicbind rebind to group by drivers prox: raise error in collect_kpi if the VNF is down prox: add VNF_TYPE for consistency sample_vnf: debug and fix kill_vnf pkill is not matching some executable names, add some debug process dumps and try switching back to killall until we can find the issue sample_vnf: add default timeout, so we can override default 3600 SSH timeout collect_kpi is the point at which we check the VNFs and TGs for failures or exits queues are the problem make sure we aren't silently blocking on non-empty queues by canceling join thread in subprocess fixup duration runner to close queues and other attempt to stop duration runner from hanging VnfdHelper: memoize port_num resource: fail if ssh can't connect at the end of 3600 second test our ssh connection is dead, so we can't actually stop collectd unless we reconnect fix stop() logic to ignore ssh errors Change-Id: I6c8e682a80cb9d00362e2fef4a46df080f304e55 Signed-off-by: Ross Brattain <ross.b.brattain@intel.com>
Diffstat (limited to 'yardstick/network_services/vnf_generic')
-rw-r--r--yardstick/network_services/vnf_generic/vnf/base.py8
-rw-r--r--yardstick/network_services/vnf_generic/vnf/prox_helpers.py5
-rw-r--r--yardstick/network_services/vnf_generic/vnf/prox_vnf.py41
-rw-r--r--yardstick/network_services/vnf_generic/vnf/sample_vnf.py58
-rw-r--r--yardstick/network_services/vnf_generic/vnf/udp_replay.py3
-rw-r--r--yardstick/network_services/vnf_generic/vnf/vpe_vnf.py3
6 files changed, 95 insertions, 23 deletions
diff --git a/yardstick/network_services/vnf_generic/vnf/base.py b/yardstick/network_services/vnf_generic/vnf/base.py
index 67634a79c..7391633af 100644
--- a/yardstick/network_services/vnf_generic/vnf/base.py
+++ b/yardstick/network_services/vnf_generic/vnf/base.py
@@ -64,6 +64,8 @@ class VnfdHelper(dict):
def __init__(self, *args, **kwargs):
super(VnfdHelper, self).__init__(*args, **kwargs)
self.port_pairs = PortPairs(self['vdu'][0]['external-interface'])
+ # port num is not present until binding so we have to memoize
+ self._port_num_map = {}
@property
def mgmt_interface(self):
@@ -91,12 +93,14 @@ class VnfdHelper(dict):
virtual_intf = interface["virtual-interface"]
if virtual_intf[key] == value:
return interface
+ raise KeyError()
def find_interface(self, **kwargs):
key, value = next(iter(kwargs.items()))
for interface in self.interfaces:
if interface[key] == value:
return interface
+ raise KeyError()
# hide dpdk_port_num key so we can abstract
def find_interface_by_port(self, port):
@@ -105,6 +109,7 @@ class VnfdHelper(dict):
# we have to convert to int to compare
if int(virtual_intf['dpdk_port_num']) == port:
return interface
+ raise KeyError()
def port_num(self, port):
# we need interface name -> DPDK port num (PMD ID) -> LINK ID
@@ -118,7 +123,8 @@ class VnfdHelper(dict):
intf = port
else:
intf = self.find_interface(name=port)
- return int(intf["virtual-interface"]["dpdk_port_num"])
+ return self._port_num_map.setdefault(intf["name"],
+ int(intf["virtual-interface"]["dpdk_port_num"]))
def port_nums(self, intfs):
return [self.port_num(i) for i in intfs]
diff --git a/yardstick/network_services/vnf_generic/vnf/prox_helpers.py b/yardstick/network_services/vnf_generic/vnf/prox_helpers.py
index ba4d44c41..d9fe9a948 100644
--- a/yardstick/network_services/vnf_generic/vnf/prox_helpers.py
+++ b/yardstick/network_services/vnf_generic/vnf/prox_helpers.py
@@ -364,6 +364,7 @@ class ProxSocketHelper(object):
""" send data to the remote instance """
LOG.debug("Sending data to socket: [%s]", to_send.rstrip('\n'))
try:
+ # TODO: sendall will block, we need a timeout
self._sock.sendall(to_send.encode('utf-8'))
except:
pass
@@ -593,6 +594,8 @@ _LOCAL_OBJECT = object()
class ProxDpdkVnfSetupEnvHelper(DpdkVnfSetupEnvHelper):
# the actual app is lowercase
APP_NAME = 'prox'
+ # not used for Prox but added for consistency
+ VNF_TYPE = "PROX"
LUA_PARAMETER_NAME = ""
LUA_PARAMETER_PEER = {
@@ -609,6 +612,8 @@ class ProxDpdkVnfSetupEnvHelper(DpdkVnfSetupEnvHelper):
self._prox_config_data = None
self.additional_files = {}
self.config_queue = Queue()
+ # allow_exit_without_flush
+ self.config_queue.cancel_join_thread()
self._global_section = None
@property
diff --git a/yardstick/network_services/vnf_generic/vnf/prox_vnf.py b/yardstick/network_services/vnf_generic/vnf/prox_vnf.py
index 2ac6ea412..3bfca19aa 100644
--- a/yardstick/network_services/vnf_generic/vnf/prox_vnf.py
+++ b/yardstick/network_services/vnf_generic/vnf/prox_vnf.py
@@ -16,9 +16,10 @@ import errno
import logging
+from yardstick.common.process import check_if_process_failed
from yardstick.network_services.vnf_generic.vnf.prox_helpers import ProxDpdkVnfSetupEnvHelper
from yardstick.network_services.vnf_generic.vnf.prox_helpers import ProxResourceHelper
-from yardstick.network_services.vnf_generic.vnf.sample_vnf import SampleVNF
+from yardstick.network_services.vnf_generic.vnf.sample_vnf import SampleVNF, PROCESS_JOIN_TIMEOUT
LOG = logging.getLogger(__name__)
@@ -51,10 +52,18 @@ class ProxApproxVnf(SampleVNF):
try:
return self.resource_helper.execute(cmd, *args, **kwargs)
except OSError as e:
- if not ignore_errors or e.errno not in {errno.EPIPE, errno.ESHUTDOWN}:
+ if e.errno in {errno.EPIPE, errno.ESHUTDOWN, errno.ECONNRESET}:
+ if ignore_errors:
+ LOG.debug("ignoring vnf_execute exception %s for command %s", e, cmd)
+ else:
+ raise
+ else:
raise
def collect_kpi(self):
+ # we can't get KPIs if the VNF is down
+ check_if_process_failed(self._vnf_process)
+
if self.resource_helper is None:
result = {
"packets_in": 0,
@@ -64,12 +73,13 @@ class ProxApproxVnf(SampleVNF):
}
return result
- intf_count = len(self.vnfd_helper.interfaces)
- if intf_count not in {1, 2, 4}:
+ # use all_ports so we only use ports matched in topology
+ port_count = len(self.vnfd_helper.port_pairs.all_ports)
+ if port_count not in {1, 2, 4}:
raise RuntimeError("Failed ..Invalid no of ports .. "
"1, 2 or 4 ports only supported at this time")
- port_stats = self.vnf_execute('port_stats', range(intf_count))
+ port_stats = self.vnf_execute('port_stats', range(port_count))
try:
rx_total = port_stats[6]
tx_total = port_stats[7]
@@ -94,13 +104,20 @@ class ProxApproxVnf(SampleVNF):
self.setup_helper.tear_down()
def terminate(self):
+ # stop collectd first or we get pika errors?
+ self.resource_helper.stop_collect()
# try to quit with socket commands
- self.vnf_execute("stop_all")
- self.vnf_execute("quit")
- # hopefully quit succeeds and socket closes, so ignore force_quit socket errors
- self.vnf_execute("force_quit", _ignore_errors=True)
- if self._vnf_process:
- self._vnf_process.terminate()
+ # pkill is not matching, debug with pgrep
+ self.ssh_helper.execute("sudo pgrep -lax %s" % self.setup_helper.APP_NAME)
+ self.ssh_helper.execute("sudo ps aux | grep -i %s" % self.setup_helper.APP_NAME)
+ if self._vnf_process.is_alive():
+ self.vnf_execute("stop_all")
+ self.vnf_execute("quit")
+ # hopefully quit succeeds and socket closes, so ignore force_quit socket errors
+ self.vnf_execute("force_quit", _ignore_errors=True)
self.setup_helper.kill_vnf()
self._tear_down()
- self.resource_helper.stop_collect()
+ if self._vnf_process is not None:
+ LOG.debug("joining before terminate %s", self._vnf_process.name)
+ self._vnf_process.join(PROCESS_JOIN_TIMEOUT)
+ self._vnf_process.terminate()
diff --git a/yardstick/network_services/vnf_generic/vnf/sample_vnf.py b/yardstick/network_services/vnf_generic/vnf/sample_vnf.py
index bbaae39e3..114153dab 100644
--- a/yardstick/network_services/vnf_generic/vnf/sample_vnf.py
+++ b/yardstick/network_services/vnf_generic/vnf/sample_vnf.py
@@ -29,10 +29,11 @@ from six.moves import cStringIO
from yardstick.benchmark.contexts.base import Context
from yardstick.benchmark.scenarios.networking.vnf_generic import find_relative_file
+from yardstick.common.process import check_if_process_failed
from yardstick.network_services.helpers.cpu import CpuSysCores
from yardstick.network_services.helpers.samplevnf_helper import PortPairs
from yardstick.network_services.helpers.samplevnf_helper import MultiPortConfig
-from yardstick.network_services.helpers.dpdknicbind_helper import DpdkBindHelper
+from yardstick.network_services.helpers.dpdkbindnic_helper import DpdkBindHelper
from yardstick.network_services.nfvi.resource import ResourceProfile
from yardstick.network_services.vnf_generic.vnf.base import GenericVNF
from yardstick.network_services.vnf_generic.vnf.base import QueueFileWrapper
@@ -51,6 +52,8 @@ LOG = logging.getLogger(__name__)
REMOTE_TMP = "/tmp"
+DEFAULT_VNF_TIMEOUT = 3600
+PROCESS_JOIN_TIMEOUT = 3
class VnfSshHelper(AutoConnectSSH):
@@ -290,8 +293,12 @@ class DpdkVnfSetupEnvHelper(SetupEnvHelper):
return resource
def kill_vnf(self):
+ # pkill is not matching, debug with pgrep
+ self.ssh_helper.execute("sudo pgrep -lax %s" % self.APP_NAME)
+ self.ssh_helper.execute("sudo ps aux | grep -i %s" % self.APP_NAME)
# have to use exact match
- self.ssh_helper.execute("sudo pkill -x %s" % self.APP_NAME)
+ # try using killall to match
+ self.ssh_helper.execute("sudo killall %s" % self.APP_NAME)
def _setup_dpdk(self):
""" setup dpdk environment needed for vnf to run """
@@ -330,8 +337,10 @@ class DpdkVnfSetupEnvHelper(SetupEnvHelper):
port_names = (intf["name"] for intf in ports)
collectd_options = self.get_collectd_options()
plugins = collectd_options.get("plugins", {})
+ # we must set timeout to be the same as the VNF otherwise KPIs will die before VNF
return ResourceProfile(self.vnfd_helper.mgmt_interface, port_names=port_names, cores=cores,
- plugins=plugins, interval=collectd_options.get("interval"))
+ plugins=plugins, interval=collectd_options.get("interval"),
+ timeout=self.scenario_helper.timeout)
def _detect_and_bind_drivers(self):
interfaces = self.vnfd_helper.interfaces
@@ -386,7 +395,7 @@ class ResourceHelper(object):
def _collect_resource_kpi(self):
result = {}
status = self.resource.check_if_sa_running("collectd")[0]
- if status:
+ if status == 0:
result = self.resource.amqp_collect_nfvi_kpi()
result = {"core": result}
@@ -672,12 +681,18 @@ class ScenarioHelper(object):
def topology(self):
return self.scenario_cfg['topology']
+ @property
+ def timeout(self):
+ return self.options.get('timeout', DEFAULT_VNF_TIMEOUT)
+
class SampleVNF(GenericVNF):
""" Class providing file-like API for generic VNF implementation """
VNF_PROMPT = "pipeline>"
WAIT_TIME = 1
+ APP_NAME = "SampleVNF"
+ # we run the VNF interactively, so the ssh command will timeout after this long
def __init__(self, name, vnfd, setup_env_helper_type=None, resource_helper_type=None):
super(SampleVNF, self).__init__(name, vnfd)
@@ -760,7 +775,8 @@ class SampleVNF(GenericVNF):
def _start_vnf(self):
self.queue_wrapper = QueueFileWrapper(self.q_in, self.q_out, self.VNF_PROMPT)
- self._vnf_process = Process(target=self._run)
+ name = "{}-{}-{}".format(self.name, self.APP_NAME, os.getpid())
+ self._vnf_process = Process(name=name, target=self._run)
self._vnf_process.start()
def _vnf_up_post(self):
@@ -810,6 +826,7 @@ class SampleVNF(GenericVNF):
'stdout': self.queue_wrapper,
'keep_stdin_open': True,
'pty': True,
+ 'timeout': self.scenario_helper.timeout,
}
def _build_config(self):
@@ -842,11 +859,15 @@ class SampleVNF(GenericVNF):
def terminate(self):
self.vnf_execute("quit")
- if self._vnf_process:
- self._vnf_process.terminate()
self.setup_helper.kill_vnf()
self._tear_down()
self.resource_helper.stop_collect()
+ if self._vnf_process is not None:
+ # be proper and join first before we kill
+ LOG.debug("joining before terminate %s", self._vnf_process.name)
+ self._vnf_process.join(PROCESS_JOIN_TIMEOUT)
+ self._vnf_process.terminate()
+ # no terminate children here because we share processes with tg
def get_stats(self, *args, **kwargs):
"""
@@ -860,6 +881,8 @@ class SampleVNF(GenericVNF):
return out
def collect_kpi(self):
+ # we can't get KPIs if the VNF is down
+ check_if_process_failed(self._vnf_process)
stats = self.get_stats()
m = re.search(self.COLLECT_KPI, stats, re.MULTILINE)
if m:
@@ -884,7 +907,6 @@ class SampleVNFTrafficGen(GenericTrafficGen):
def __init__(self, name, vnfd, setup_env_helper_type=None, resource_helper_type=None):
super(SampleVNFTrafficGen, self).__init__(name, vnfd)
self.bin_path = get_nsb_option('bin_path', '')
- self.name = "tgen__1" # name in topology file
self.scenario_helper = ScenarioHelper(self.name)
self.ssh_helper = VnfSshHelper(self.vnfd_helper.mgmt_interface, self.bin_path, wait=True)
@@ -916,7 +938,8 @@ class SampleVNFTrafficGen(GenericTrafficGen):
self.resource_helper.setup()
LOG.info("Starting %s server...", self.APP_NAME)
- self._tg_process = Process(target=self._start_server)
+ name = "{}-{}-{}".format(self.name, self.APP_NAME, os.getpid())
+ self._tg_process = Process(name=name, target=self._start_server)
self._tg_process.start()
def wait_for_instantiate(self):
@@ -952,7 +975,9 @@ class SampleVNFTrafficGen(GenericTrafficGen):
:param traffic_profile:
:return: True/False
"""
- self._traffic_process = Process(target=self._traffic_runner,
+ name = "{}-{}-{}-{}".format(self.name, self.APP_NAME, traffic_profile.__class__.__name__,
+ os.getpid())
+ self._traffic_process = Process(name=name, target=self._traffic_runner,
args=(traffic_profile,))
self._traffic_process.start()
# Wait for traffic process to start
@@ -983,6 +1008,9 @@ class SampleVNFTrafficGen(GenericTrafficGen):
pass
def collect_kpi(self):
+ # check if the tg processes have exited
+ for proc in (self._tg_process, self._traffic_process):
+ check_if_process_failed(proc)
result = self.resource_helper.collect_kpi()
LOG.debug("%s collect KPIs %s", self.APP_NAME, result)
return result
@@ -993,5 +1021,15 @@ class SampleVNFTrafficGen(GenericTrafficGen):
:return: True/False
"""
self.traffic_finished = True
+ # we must kill client before we kill the server, or the client will raise exception
if self._traffic_process is not None:
+ # be proper and try to join before terminating
+ LOG.debug("joining before terminate %s", self._traffic_process.name)
+ self._traffic_process.join(PROCESS_JOIN_TIMEOUT)
self._traffic_process.terminate()
+ if self._tg_process is not None:
+ # be proper and try to join before terminating
+ LOG.debug("joining before terminate %s", self._tg_process.name)
+ self._tg_process.join(PROCESS_JOIN_TIMEOUT)
+ self._tg_process.terminate()
+ # no terminate children here because we share processes with vnf
diff --git a/yardstick/network_services/vnf_generic/vnf/udp_replay.py b/yardstick/network_services/vnf_generic/vnf/udp_replay.py
index 6b7779782..a57f53bc7 100644
--- a/yardstick/network_services/vnf_generic/vnf/udp_replay.py
+++ b/yardstick/network_services/vnf_generic/vnf/udp_replay.py
@@ -15,6 +15,7 @@
from __future__ import absolute_import
import logging
+from yardstick.common.process import check_if_process_failed
from yardstick.network_services.vnf_generic.vnf.sample_vnf import SampleVNF
from yardstick.network_services.vnf_generic.vnf.sample_vnf import DpdkVnfSetupEnvHelper
from yardstick.network_services.vnf_generic.vnf.sample_vnf import ClientResourceHelper
@@ -107,6 +108,8 @@ class UdpReplayApproxVnf(SampleVNF):
def collect_kpi(self):
def get_sum(offset):
return sum(int(i) for i in split_stats[offset::5])
+ # we can't get KPIs if the VNF is down
+ check_if_process_failed(self._vnf_process)
number_of_ports = len(self.vnfd_helper.port_pairs.all_ports)
diff --git a/yardstick/network_services/vnf_generic/vnf/vpe_vnf.py b/yardstick/network_services/vnf_generic/vnf/vpe_vnf.py
index 5f1c4d4d3..c02c0eb27 100644
--- a/yardstick/network_services/vnf_generic/vnf/vpe_vnf.py
+++ b/yardstick/network_services/vnf_generic/vnf/vpe_vnf.py
@@ -24,6 +24,7 @@ import posixpath
from six.moves import configparser, zip
+from yardstick.common.process import check_if_process_failed
from yardstick.network_services.helpers.samplevnf_helper import PortPairs
from yardstick.network_services.pipeline import PipelineRules
from yardstick.network_services.vnf_generic.vnf.sample_vnf import SampleVNF, DpdkVnfSetupEnvHelper
@@ -278,6 +279,8 @@ class VpeApproxVnf(SampleVNF):
raise NotImplementedError
def collect_kpi(self):
+ # we can't get KPIs if the VNF is down
+ check_if_process_failed(self._vnf_process)
result = {
'pkt_in_up_stream': 0,
'pkt_drop_up_stream': 0,