aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Klozik <martinx.klozik@intel.com>2016-04-29 14:23:36 +0100
committerMartin Klozik <martinx.klozik@intel.com>2016-05-04 09:29:30 +0100
commit824d9c5d537916ebb1aebf1cfb6de9ab64484246 (patch)
tree5568133a2c095deed5940f3a069ca1cd983c7789
parent25969600ac9508ecc54a25d7b0f628e0713a82a2 (diff)
bugfix: Graceful shutdown of VM - improvement
Cleanup phase of PVVP scenario sometimes causes server reboot. Following updates were made to prevent reboots: * better generic process termination procedure * ovsdb is terminated after vswitchd termination * vswitchd is terminated directly instead of parent sudo process * already running VNFs are terminated in case of failure during VNF start() Change-Id: Ic09d60d7bfdea01c84a2685ede3d0316f0d09be7 JIRA: VSPERF-271 Signed-off-by: Martin Klozik <martinx.klozik@intel.com> Reviewed-by: Maryam Tahhan <maryam.tahhan@intel.com>
-rw-r--r--conf/02_vswitch.conf2
-rw-r--r--core/vnf_controller.py9
-rw-r--r--src/ovs/daemon.py17
-rw-r--r--tools/systeminfo.py8
-rw-r--r--tools/tasks.py63
-rw-r--r--vnfs/qemu/qemu.py28
-rw-r--r--vnfs/vnf/vnf.py9
-rw-r--r--vswitches/ovs.py2
8 files changed, 108 insertions, 30 deletions
diff --git a/conf/02_vswitch.conf b/conf/02_vswitch.conf
index d36d1786..67f96991 100644
--- a/conf/02_vswitch.conf
+++ b/conf/02_vswitch.conf
@@ -71,7 +71,7 @@ VHOST_USER_SOCKS = ['/tmp/dpdkvhostuser0', '/tmp/dpdkvhostuser1',
# hardware configuration, like cpu numbering and NUMA.
VSWITCHD_DPDK_ARGS = ['-c', '0x4', '-n', '4', '--socket-mem 1024,0']
-VSWITCHD_VANILLA_ARGS = ['--pidfile']
+VSWITCHD_VANILLA_ARGS = []
# use full module path to load module matching OVS version built from the source
VSWITCH_VANILLA_KERNEL_MODULES = ['libcrc32c', 'ip_tunnel', 'vxlan', 'gre', 'nf_conntrack', 'nf_defrag_ipv4', 'nf_defrag_ipv6', os.path.join(OVS_DIR_VANILLA, 'datapath/linux/openvswitch.ko')]
diff --git a/core/vnf_controller.py b/core/vnf_controller.py
index 39a63044..8800ccaf 100644
--- a/core/vnf_controller.py
+++ b/core/vnf_controller.py
@@ -15,6 +15,7 @@
"""
import logging
+import pexpect
from vnfs.vnf.vnf import IVnf
class VnfController(object):
@@ -68,8 +69,12 @@ class VnfController(object):
"""
self._logger.debug('start ' + str(len(self._vnfs)) +
' VNF[s] with ' + ' '.join(map(str, self._vnfs)))
- for vnf in self._vnfs:
- vnf.start()
+ try:
+ for vnf in self._vnfs:
+ vnf.start()
+ except pexpect.TIMEOUT:
+ self.stop()
+ raise
def stop(self):
"""Stops all VNFs set-up by __init__.
diff --git a/src/ovs/daemon.py b/src/ovs/daemon.py
index 089bc7a4..f9b037b2 100644
--- a/src/ovs/daemon.py
+++ b/src/ovs/daemon.py
@@ -38,6 +38,7 @@ class VSwitchd(tasks.Process):
_ovsdb_pid = None
_logfile = _LOG_FILE_VSWITCHD
_ovsdb_pidfile_path = os.path.join(settings.getValue('LOG_DIR'), "ovsdb_pidfile.pid")
+ _vswitchd_pidfile_path = os.path.join(settings.getValue('LOG_DIR'), "vswitchd_pidfile.pid")
_proc_name = 'ovs-vswitchd'
def __init__(self, timeout=30, vswitchd_args=None, expected_cmd=None):
@@ -55,7 +56,10 @@ class VSwitchd(tasks.Process):
vswitchd_args = vswitchd_args or []
ovs_vswitchd_bin = os.path.join(
settings.getValue('OVS_DIR'), 'vswitchd', 'ovs-vswitchd')
- self._cmd = ['sudo', '-E', ovs_vswitchd_bin] + vswitchd_args
+ sep = ['--'] if '--dpdk' in vswitchd_args else []
+ self._cmd = ['sudo', '-E', ovs_vswitchd_bin] + vswitchd_args + sep + \
+ ['--pidfile=' + self._vswitchd_pidfile_path, '--overwrite-pidfile',
+ '--log-file=' + self._logfile]
# startup/shutdown
@@ -77,15 +81,19 @@ class VSwitchd(tasks.Process):
self._kill_ovsdb()
raise exc
- def kill(self, signal='-15', sleep=2):
+ def kill(self, signal='-15', sleep=10):
"""Kill ``ovs-vswitchd`` instance if it is alive.
:returns: None
"""
self._logger.info('Killing ovs-vswitchd...')
+ with open(self._vswitchd_pidfile_path, "r") as pidfile:
+ vswitchd_pid = pidfile.read().strip()
+ tasks.terminate_task(vswitchd_pid, logger=self._logger)
- self._kill_ovsdb()
+ self._kill_ovsdb() # ovsdb must be killed after vswitchd
+ # just for case, that sudo envelope has not terminated
super(VSwitchd, self).kill(signal, sleep)
# helper functions
@@ -144,8 +152,7 @@ class VSwitchd(tasks.Process):
self._logger.info("Killing ovsdb with pid: " + ovsdb_pid)
if ovsdb_pid:
- tasks.run_task(['sudo', 'kill', '-15', str(ovsdb_pid)],
- self._logger, 'Killing ovsdb-server...')
+ tasks.terminate_task(ovsdb_pid, logger=self._logger)
@staticmethod
def get_db_sock_path():
diff --git a/tools/systeminfo.py b/tools/systeminfo.py
index ba490946..9d8eb5cb 100644
--- a/tools/systeminfo.py
+++ b/tools/systeminfo.py
@@ -168,6 +168,14 @@ def get_pid(proc_name_str):
"""
return get_pids([proc_name_str])
+def pid_isalive(pid):
+ """ Checks if given PID is alive
+
+ :param pid: PID of the process
+ :returns: True if given process is running, False otherwise
+ """
+ return os.path.isdir('/proc/' + str(pid))
+
# This function uses long switch per purpose, so let us suppress pylint warning too-many-branches
# pylint: disable=R0912
def get_version(app_name):
diff --git a/tools/tasks.py b/tools/tasks.py
index 90b7e553..dda5217d 100644
--- a/tools/tasks.py
+++ b/tools/tasks.py
@@ -26,6 +26,7 @@ import locale
import time
from conf import settings
+from tools import systeminfo
CMD_PREFIX = 'cmd : '
@@ -150,6 +151,55 @@ def run_interactive_task(cmd, logger, msg):
return child
+def terminate_task_subtree(pid, signal='-15', sleep=10, logger=None):
+ """Terminate given process and all its children
+
+ Function will sent given signal to the process. In case
+ that process will not terminate within given sleep interval
+ and signal was not SIGKILL, then process will be killed by SIGKILL.
+ After that function will check if all children of the process
+ are terminated and if not the same terminating procedure is applied
+ on any living child (only one level of children is considered).
+
+ :param pid: Process ID to terminate
+ :param signal: Signal to be sent to the process
+ :param sleep: Maximum delay in seconds after signal is sent
+ :param logger: Logger to write details to
+ """
+ try:
+ output = subprocess.check_output("pgrep -P " + str(pid), shell=True).decode().rstrip('\n')
+ except subprocess.CalledProcessError:
+ output = ""
+
+ terminate_task(pid, signal, sleep, logger)
+
+ # just for case children were kept alive
+ children = output.split('\n')
+ for child in children:
+ terminate_task(child, signal, sleep, logger)
+
+def terminate_task(pid, signal='-15', sleep=10, logger=None):
+ """Terminate process with given pid
+
+ Function will sent given signal to the process. In case
+ that process will not terminate within given sleep interval
+ and signal was not SIGKILL, then process will be killed by SIGKILL.
+
+ :param pid: Process ID to terminate
+ :param signal: Signal to be sent to the process
+ :param sleep: Maximum delay in seconds after signal is sent
+ :param logger: Logger to write details to
+ """
+ if systeminfo.pid_isalive(pid):
+ run_task(['sudo', 'kill', signal, str(pid)], logger)
+ logger.debug('Wait for process %s to terminate after signal %s', pid, signal)
+ for dummy in range(sleep):
+ time.sleep(1)
+ if not systeminfo.pid_isalive(pid):
+ break
+
+ if signal.lstrip('-').upper() not in ('9', 'KILL', 'SIGKILL') and systeminfo.pid_isalive(pid):
+ terminate_task(pid, '-9', sleep, logger)
class Process(object):
"""Control an instance of a long-running process.
@@ -242,17 +292,14 @@ class Process(object):
self.kill()
raise exc
- def kill(self, signal='-15', sleep=2):
+ def kill(self, signal='-15', sleep=10):
"""Kill process instance if it is alive.
:param signal: signal to be sent to the process
:param sleep: delay in seconds after signal is sent
"""
- if self._child and self._child.isalive():
- run_task(['sudo', 'kill', signal, str(self._child.pid)],
- self._logger)
- self._logger.debug('Wait for process to terminate')
- time.sleep(sleep)
+ if self.is_running():
+ terminate_task_subtree(self._child.pid, signal, sleep, self._logger)
if self.is_relinquished():
self._relinquish_thread.join()
@@ -275,7 +322,7 @@ class Process(object):
:returns: True if process is running, else False
"""
- return self._child is not None
+ return self._child and self._child.isalive()
def _affinitize_pid(self, core, pid):
"""Affinitize a process with ``pid`` to ``core``.
@@ -298,7 +345,7 @@ class Process(object):
"""
self._logger.info('Affinitizing process')
- if self._child and self._child.isalive():
+ if self.is_running():
self._affinitize_pid(core, self._child.pid)
class ContinueReadPrintLoop(threading.Thread):
diff --git a/vnfs/qemu/qemu.py b/vnfs/qemu/qemu.py
index c735062f..d108dc9a 100644
--- a/vnfs/qemu/qemu.py
+++ b/vnfs/qemu/qemu.py
@@ -21,6 +21,7 @@ import locale
import re
import subprocess
import time
+import pexpect
from conf import settings as S
from conf import get_test_param
@@ -133,15 +134,24 @@ class IVnfQemu(IVnf):
"""
Stops VNF instance gracefully first.
"""
- # exit testpmd if needed
- if self._guest_loopback == 'testpmd':
- self.execute_and_wait('stop', 120, "Done")
- self.execute_and_wait('quit', 120, "bye")
-
- # turn off VM
- self.execute_and_wait('poweroff', 120, "Power down")
- # VM OS is off, but wait until qemu shutdowns
- time.sleep(2)
+ try:
+ # exit testpmd if needed
+ if self._guest_loopback == 'testpmd':
+ self.execute_and_wait('stop', 120, "Done")
+ self.execute_and_wait('quit', 120, "bye")
+
+ # turn off VM
+ self.execute_and_wait('poweroff', 120, "Power down")
+
+ except pexpect.TIMEOUT:
+ self.kill()
+
+ # wait until qemu shutdowns
+ self._logger.debug('Wait for QEMU to terminate')
+ for dummy in range(30):
+ time.sleep(1)
+ if not self.is_running():
+ break
# just for case that graceful shutdown failed
super(IVnfQemu, self).stop()
diff --git a/vnfs/vnf/vnf.py b/vnfs/vnf/vnf.py
index 3dae2733..1410a0c4 100644
--- a/vnfs/vnf/vnf.py
+++ b/vnfs/vnf/vnf.py
@@ -51,11 +51,12 @@ class IVnf(tasks.Process):
"""
Stops VNF instance.
"""
- self._logger.info('Killing VNF...')
+ if self.is_running():
+ self._logger.info('Killing VNF...')
- # force termination of VNF and wait for it to terminate; It will avoid
- # sporadic reboot of host. (caused by hugepages or DPDK ports)
- super(IVnf, self).kill(signal='-9', sleep=10)
+ # force termination of VNF and wait for it to terminate; It will avoid
+ # sporadic reboot of host. (caused by hugepages or DPDK ports)
+ super(IVnf, self).kill(signal='-9', sleep=10)
def execute(self, cmd, delay=0):
"""
diff --git a/vswitches/ovs.py b/vswitches/ovs.py
index 06dc7a1a..dd49a1fc 100644
--- a/vswitches/ovs.py
+++ b/vswitches/ovs.py
@@ -21,7 +21,7 @@ from conf import settings
from vswitches.vswitch import IVSwitch
from src.ovs import OFBridge, flow_key, flow_match
-_VSWITCHD_CONST_ARGS = ['--', '--pidfile', '--log-file']
+_VSWITCHD_CONST_ARGS = []
class IVSwitchOvs(IVSwitch):
"""Open vSwitch base class implementation