From 9f07e41cc85f3dbe6e5eb151a0c59743521a6c00 Mon Sep 17 00:00:00 2001 From: Juha Kosonen Date: Wed, 4 Sep 2019 15:21:00 +0300 Subject: Prevent occasional rally hangs Set timeout on subprocess invocation instead of spawned child process. Increase the timeout value for rally_full to 2h since the execution typically takes ~90min [1]. [1] https://build.opnfv.org/ci/job/functest-opnfv-functest-benchmarking-latest-rally_full-run/22/ Change-Id: I0ca90bc2d85b4625336eb0396d8b2816a486b746 Signed-off-by: Juha Kosonen --- functest/opnfv_tests/openstack/rally/rally.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/functest/opnfv_tests/openstack/rally/rally.py b/functest/opnfv_tests/openstack/rally/rally.py index f6e563ba7..8100edaff 100644 --- a/functest/opnfv_tests/openstack/rally/rally.py +++ b/functest/opnfv_tests/openstack/rally/rally.py @@ -22,6 +22,7 @@ import shutil import subprocess import time +from threading import Timer import pkg_resources import prettytable from ruamel.yaml import YAML @@ -67,7 +68,7 @@ class RallyBase(singlevm.VmReady2): visibility = 'public' shared_network = True allow_no_fip = True - task_timeout = '3600' + task_timeout = 3600 def __init__(self, **kwargs): """Initialize RallyBase object.""" @@ -100,6 +101,7 @@ class RallyBase(singlevm.VmReady2): self.run_cmd = '' self.network_extensions = [] self.services = [] + self.task_aborted = False def build_task_args(self, test_name): """Build arguments for the Rally task.""" @@ -423,14 +425,25 @@ class RallyBase(singlevm.VmReady2): else: LOGGER.info('Test scenario: "%s" Failed.', test_name) + def kill_task(self, proc): + """ Kill a task.""" + proc.kill() + self.task_aborted = True + def run_task(self, test_name): """Run a task.""" LOGGER.info('Starting test scenario "%s" ...', test_name) LOGGER.debug('running command: %s', self.run_cmd) proc = subprocess.Popen(self.run_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + self.task_aborted = False + timer = Timer(self.task_timeout, self.kill_task, [proc]) + timer.start() output = proc.communicate()[0] - + if self.task_aborted: + LOGGER.error("Failed to complete task") + raise Exception("Failed to complete task") + timer.cancel() task_id = self.get_task_id(output) LOGGER.debug('task_id : %s', task_id) if task_id is None: @@ -525,8 +538,7 @@ class RallyBase(singlevm.VmReady2): if self.file_is_empty(file_name): LOGGER.info('No tests for scenario "%s"', test_name) return False - self.run_cmd = (["timeout", "-t", self.task_timeout, - "rally", "task", "start", "--abort-on-sla-failure", + self.run_cmd = (["rally", "task", "start", "--abort-on-sla-failure", "--task", self.task_file, "--task-args", str(self.build_task_args(test_name))]) return True @@ -718,6 +730,8 @@ class RallySanity(RallyBase): class RallyFull(RallyBase): """Rally full testcase implementation.""" + task_timeout = 7200 + def __init__(self, **kwargs): """Initialize RallyFull object.""" if "case_name" not in kwargs: @@ -731,7 +745,7 @@ class RallyJobs(RallyBase): """Rally OpenStack CI testcase implementation.""" stests = ["neutron"] - task_timeout = '7200' + task_timeout = 7200 def __init__(self, **kwargs): """Initialize RallyJobs object.""" @@ -837,8 +851,7 @@ class RallyJobs(RallyBase): os.makedirs(self.temp_dir) task_file_name = os.path.join(self.temp_dir, task_name) self.apply_blacklist(task, task_file_name) - self.run_cmd = (["timeout", "-t", self.task_timeout, - "rally", "task", "start", "--task", task_file_name, + self.run_cmd = (["rally", "task", "start", "--task", task_file_name, "--task-args", str(self.build_task_args(test_name))]) return True -- cgit 1.2.3-korg