From 3d41a65738c44c2859ec9bdba11a0c0714b62b14 Mon Sep 17 00:00:00 2001 From: mbeierl Date: Thu, 26 Jan 2017 11:30:26 -0500 Subject: Hardening FIO interaction Fixes a problem where FIO does not terminate by scheduling a second killall if we get a specific message back from FIO stderr. Introduces a new flavor for StorPerf that has a little more memory as larger memory maps for duplicate blocks sometimes caused out of memory killer to be invoked. Change-Id: I06856561ad73fef582a81d4136a36a1bea47654a JIRA: STORPERF-99 Signed-off-by: mbeierl --- ci/create_glance_image.sh | 20 ++++++++++ ci/create_storperf_flavor.sh | 25 ++++++++++++ ci/daily.sh | 64 ++++++++++++------------------ ci/delete_stack.sh | 16 ++++++++ ci/generate-admin-rc.sh | 3 +- ci/launch_docker_container.sh | 7 +++- storperf/fio/fio_invoker.py | 41 ++++++++++--------- storperf/resources/hot/agent-group.yaml | 4 +- storperf/resources/hot/storperf-agent.yaml | 4 +- storperf/utilities/data_handler.py | 11 ++--- tests/utilities_tests/data_handler_test.py | 13 ++++-- 11 files changed, 136 insertions(+), 72 deletions(-) create mode 100755 ci/create_glance_image.sh create mode 100755 ci/create_storperf_flavor.sh create mode 100755 ci/delete_stack.sh diff --git a/ci/create_glance_image.sh b/ci/create_glance_image.sh new file mode 100755 index 0000000..8811897 --- /dev/null +++ b/ci/create_glance_image.sh @@ -0,0 +1,20 @@ +#!/bin/bash +############################################################################## +# Copyright (c) 2017 EMC and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +echo "Checking for Ubuntu 16.04 image in Glance" +IMAGE=`openstack image list | grep "Ubuntu 16.04 x86_64"` +if [ -z "$IMAGE" ] +then + wget -q https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.img + openstack image create "Ubuntu 16.04 x86_64" --disk-format qcow2 --public \ + --container-format bare --file ubuntu-16.04-server-cloudimg-amd64-disk1.img +fi + +openstack image show "Ubuntu 16.04 x86_64" diff --git a/ci/create_storperf_flavor.sh b/ci/create_storperf_flavor.sh new file mode 100755 index 0000000..f25d56d --- /dev/null +++ b/ci/create_storperf_flavor.sh @@ -0,0 +1,25 @@ +#!/bin/bash +############################################################################## +# Copyright (c) 2017 EMC and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +echo "Checking for StorPerf flavor" + +openstack flavor delete storperf + +FLAVOUR=`openstack flavor list | grep "storperf"` +if [ -z "$FLAVOUR" ] +then + openstack flavor create storperf \ + --id auto \ + --ram 8192 \ + --disk 4 \ + --vcpus 2 +fi + +openstack flavor show storperf diff --git a/ci/daily.sh b/ci/daily.sh index 1e77d67..c26e8d3 100755 --- a/ci/daily.sh +++ b/ci/daily.sh @@ -19,17 +19,17 @@ then sudo rm -rf $WORKSPACE/ci/job fi -git clone --depth 1 https://gerrit.opnfv.org/gerrit/releng ci/job/releng +git clone --depth 1 https://gerrit.opnfv.org/gerrit/releng $WORKSPACE/ci/job/releng virtualenv $WORKSPACE/ci/job/storperf_daily_venv source $WORKSPACE/ci/job/storperf_daily_venv/bin/activate -pip install --upgrade setuptools -pip install functools32 -pip install pytz -pip install osc_lib -pip install python-openstackclient -pip install python-heatclient +pip install --upgrade setuptools==33.1.1 +pip install functools32==3.2.3.post2 +pip install pytz==2016.10 +pip install osc_lib==1.3.0 +pip install python-openstackclient==3.7.0 +pip install python-heatclient==1.7.0 # This is set by Jenkins, but if we are running manually, just use the # current hostname. @@ -41,45 +41,28 @@ export POD_NAME=$NODE_NAME sudo find $WORKSPACE/ -name '*.db' -exec rm -fv {} \; -export INSTALLER=`$WORKSPACE/ci/detect_installer.sh` - $WORKSPACE/ci/generate-admin-rc.sh $WORKSPACE/ci/generate-environment.sh . $WORKSPACE/ci/job/environment.rc -for env in `cat $WORKSPACE/ci/job/admin.rc` -do - export $env -done - -echo "Checking for an existing stack" -STACK_ID=`openstack stack list | grep StorPerfAgentGroup | awk '{print $2}'` -if [ ! -z $STACK_ID ] -then - openstack stack delete --yes --wait StorPerfAgentGroup -fi -echo Checking for Ubuntu 16.04 image in Glance -IMAGE=`openstack image list | grep "Ubuntu 16.04 x86_64"` -if [ -z $IMAGE ] -then - wget https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.img - openstack image create "Ubuntu 16.04 x86_64" --disk-format qcow2 --public \ - --container-format bare --file ubuntu-16.04-server-cloudimg-amd64-disk1.img -fi +while read -r env +do + export "$env" +done < $WORKSPACE/ci/job/admin.rc echo "TEST_DB_URL=http://testresults.opnfv.org/test/api/v1" >> $WORKSPACE/ci/job/admin.rc -echo "INSTALLER_TYPE=${INSTALLER}" >> $WORKSPACE/ci/job/admin.rc + +$WORKSPACE/ci/delete_stack.sh +$WORKSPACE/ci/create_glance_image.sh +$WORKSPACE/ci/create_storperf_flavor.sh $WORKSPACE/ci/launch_docker_container.sh +$WORKSPACE/ci/create_stack.sh $CINDER_NODES 10 "Ubuntu 16.04 x86_64" $NETWORK -echo "Waiting for StorPerf to become active" -while [ $(curl -X GET 'http://127.0.0.1:5000/api/v1.0/configurations' > /dev/null 2>&1;echo $?) -ne 0 ] -do - sleep 1 -done -echo Creating 1:1 stack -$WORKSPACE/ci/create_stack.sh $CINDER_NODES 10 "Ubuntu 16.04 x86_64" $NETWORK +echo ========================================================================== +echo Starting warmup +echo ========================================================================== export QUEUE_DEPTH=8 export BLOCK_SIZE=16384 @@ -96,9 +79,14 @@ do | awk '/Status/ {print $2}' | sed 's/"//g'` done -export QUEUE_DEPTH=1,2,8 -export BLOCK_SIZE=2048,8192,16384 + +echo ========================================================================== +echo Starting full matrix run +echo ========================================================================== + export WORKLOAD=ws,wr,rs,rr,rw +export BLOCK_SIZE=2048,8192,16384 +export QUEUE_DEPTH=1,2,8 export SCENARIO_NAME="${CINDER_BACKEND}_${WORKLOAD}" JOB=`$WORKSPACE/ci/start_job.sh \ diff --git a/ci/delete_stack.sh b/ci/delete_stack.sh new file mode 100755 index 0000000..a8a3f56 --- /dev/null +++ b/ci/delete_stack.sh @@ -0,0 +1,16 @@ +#!/bin/bash +############################################################################## +# Copyright (c) 2017 EMC and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +echo "Checking for an existing stack" +STACK_ID=`openstack stack list | grep StorPerfAgentGroup | awk '{print $2}'` +if [ ! -z $STACK_ID ] +then + openstack stack delete --yes --wait StorPerfAgentGroup +fi diff --git a/ci/generate-admin-rc.sh b/ci/generate-admin-rc.sh index 424d69c..07a0a3e 100755 --- a/ci/generate-admin-rc.sh +++ b/ci/generate-admin-rc.sh @@ -42,4 +42,5 @@ then echo export OS_PROJECT_NAME=admin >> job/openstack.rc fi -sed "s/export //" job/openstack.rc > job/admin.rc \ No newline at end of file +sed "s/export //" job/openstack.rc > job/admin.rc +echo "INSTALLER_TYPE=${INSTALLER}" >> job/admin.rc diff --git a/ci/launch_docker_container.sh b/ci/launch_docker_container.sh index 13cfe04..b119946 100755 --- a/ci/launch_docker_container.sh +++ b/ci/launch_docker_container.sh @@ -36,5 +36,10 @@ docker run -d --env-file `pwd`/job/admin.rc \ -p 8000:8000 \ -v `pwd`/job/carbon:/opt/graphite/storage/whisper \ --name storperf opnfv/storperf +# -v `pwd`/../../storperf:/home/opnfv/repos/storperf \ - +echo "Waiting for StorPerf to become active" +while [ $(curl -X GET 'http://127.0.0.1:5000/api/v1.0/configurations' > /dev/null 2>&1;echo $?) -ne 0 ] +do + sleep 1 +done diff --git a/storperf/fio/fio_invoker.py b/storperf/fio/fio_invoker.py index 2febf25..a201802 100644 --- a/storperf/fio/fio_invoker.py +++ b/storperf/fio/fio_invoker.py @@ -9,7 +9,6 @@ import json import logging -import subprocess from threading import Thread import paramiko @@ -65,7 +64,7 @@ class FIOInvoker(object): "Event listener callback complete") except Exception, e: self.logger.error("Error parsing JSON: %s", e) - except ValueError: + except IOError: pass # We might have read from the closed socket, ignore it stdout.close() @@ -76,6 +75,14 @@ class FIOInvoker(object): for line in iter(stderr.readline, b''): self.logger.error("FIO Error: %s", line.rstrip()) + # Sometime, FIO gets stuck and will give us this message: + # fio: job 'sequential_read' hasn't exited in 60 seconds, + # it appears to be stuck. Doing forceful exit of this job. + # A second killall of fio will release it stuck process. + + if 'it appears to be stuck' in line: + self.terminate() + stderr.close() self.logger.debug("Finished") @@ -121,24 +128,22 @@ class FIOInvoker(object): def terminate(self): self.logger.debug("Terminating fio on " + self.remote_host) - cmd = ['ssh', '-o', 'StrictHostKeyChecking=no', - '-o', 'UserKnownHostsFile=/dev/null', - '-o', 'LogLevel=error', - '-i', 'storperf/resources/ssh/storperf_rsa', - 'storperf@' + self.remote_host, - 'sudo', 'killall', '-9', 'fio'] - kill_process = subprocess.Popen(cmd, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect(self.remote_host, username='storperf', + key_filename='storperf/resources/ssh/storperf_rsa', + timeout=2) - for line in iter(kill_process.stdout.readline, b''): - self.logger.debug("FIO Termination: " + line) + command = "sudo killall fio" - kill_process.stdout.close() + self.logger.debug("Executing on %s: %s" % (self.remote_host, command)) + (_, stdout, stderr) = ssh.exec_command(command) - for line in iter(kill_process.stderr.readline, b''): - self.logger.debug("FIO Termination: " + line) + for line in stdout.readlines(): + self.logger.debug(line.strip()) + for line in stderr.readlines(): + self.logger.error(line.strip()) - kill_process.stderr.close() + stdout.close() + stderr.close() diff --git a/storperf/resources/hot/agent-group.yaml b/storperf/resources/hot/agent-group.yaml index fc98c23..a06c847 100644 --- a/storperf/resources/hot/agent-group.yaml +++ b/storperf/resources/hot/agent-group.yaml @@ -16,7 +16,7 @@ parameters: - custom_constraint: neutron.network flavor: type: string - default: "m1.small" + default: "storperf" agent_image: type: string default: 'StorPerf Ubuntu 14.04' @@ -38,7 +38,7 @@ parameters: resources: slaves: type: OS::Heat::ResourceGroup - depends_on: [storperf_subnet, storperf_network_router_interface, + depends_on: [storperf_subnet, storperf_network_router_interface, storperf_open_security_group, storperf_key_pair] properties: count: {get_param: agent_count} diff --git a/storperf/resources/hot/storperf-agent.yaml b/storperf/resources/hot/storperf-agent.yaml index 587b6d8..7bf8b4d 100644 --- a/storperf/resources/hot/storperf-agent.yaml +++ b/storperf/resources/hot/storperf-agent.yaml @@ -12,7 +12,7 @@ heat_template_version: 2013-05-23 parameters: flavor: type: string - default: m1.small + default: storperf image: type: string default: 'Ubuntu 16.04' @@ -96,4 +96,4 @@ resources: outputs: storperf_agent_ip: description: The floating IP address of the agent on the public network - value: { get_attr: [ storperf_floating_ip, floating_ip_address ] } \ No newline at end of file + value: { get_attr: [ storperf_floating_ip, floating_ip_address ] } diff --git a/storperf/utilities/data_handler.py b/storperf/utilities/data_handler.py index 0aae3b1..2d4194a 100644 --- a/storperf/utilities/data_handler.py +++ b/storperf/utilities/data_handler.py @@ -24,7 +24,7 @@ class DataHandler(object): def __init__(self): self.logger = logging.getLogger(__name__) - self.samples = 11 + self.samples = 10 """ """ @@ -116,12 +116,9 @@ class DataHandler(object): self.logger.debug("Data series: %s" % data_series) if len(data_series) == 0: return False - earliest_timestamp = data_series[0][0] - latest_timestamp = data_series[-1][0] - duration = latest_timestamp - earliest_timestamp - if (duration < 60 * self.samples): - self.logger.debug("Only %s minutes of samples, ignoring" % - ((duration / 60 + 1),)) + number_of_samples = len(data_series) + if (number_of_samples < self.samples): + self.logger.debug("Only %s samples, ignoring" % number_of_samples) return False return SteadyState.steady_state(data_series) diff --git a/tests/utilities_tests/data_handler_test.py b/tests/utilities_tests/data_handler_test.py index 8115c6d..3813957 100644 --- a/tests/utilities_tests/data_handler_test.py +++ b/tests/utilities_tests/data_handler_test.py @@ -114,6 +114,10 @@ class DataHandlerTest(unittest.TestCase): series = [[4804559100, 205.345], [4804559200, 201.59], [4804559300, 205.76], + [4804559400, 205.76], + [4804559500, 205.76], + [4804559600, 205.76], + [4804559700, 205.76], [4804560300, 219.37], [4804560400, 219.28], [4804560500, 217.75]] @@ -199,15 +203,19 @@ class DataHandlerTest(unittest.TestCase): series = [[4804559100, 205.345], [4804559200, 201.59], [4804559300, 205.76], + [4804559400, 205.76], + [4804559500, 205.76], + [4804559600, 205.76], + [4804559700, 205.76], [4804560300, 219.37], [4804560400, 219.28], [4804560500, 217.75]] mock_graphite_db.return_value = series mock_time.return_value = 4804560500 + 10 - expected_slope = 0.011830471529818998 + expected_slope = 0.01266822319352225 expected_range = 17.78 - expected_average = 211.51583333333335 + expected_average = 209.2135 self.current_workload = ("%s.%s.queue-depth.%s.block-size.%s" % ("job_id", @@ -240,4 +248,3 @@ class DataHandlerTest(unittest.TestCase): self.assertEqual(True, self._terminated) self.assertEqual(False, self.pushed) - self.assertEqual(True, self._terminated) -- cgit 1.2.3-korg