summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/profiler-poc.py6
-rwxr-xr-xtests/run.sh65
2 files changed, 45 insertions, 26 deletions
diff --git a/tests/profiler-poc.py b/tests/profiler-poc.py
index 71034781..408cb09a 100644
--- a/tests/profiler-poc.py
+++ b/tests/profiler-poc.py
@@ -21,7 +21,7 @@ See also: https://goo.gl/98Osig
import json
import os
-LOGFILE = 'performance-profile'
+OUTPUT = 'doctor_profiling_output'
PREFIX = 'DOCTOR_PROFILER'
TOTAL_CHECK_POINTS = 10
MODULE_CHECK_POINTS = ['T00', 'T01', 'T04', 'T05', 'T06', 'T09']
@@ -33,7 +33,7 @@ Total time cost: {total}(ms)
|Monitor|Inspector |Controller|Notifier|Evaluator |
|{M00} |{M01} |{M02} |{M03} |{M04} |
| | | | | | | | | |
-host down:{T00}| | | | | | | | |
+link down:{T00}| | | | | | | | |
raw failure:{T01}| | | | | | | |
found affected:{T02}| | | | | | |
set VM error:{T03}| | | | | |
@@ -78,7 +78,7 @@ def main():
profile = TEMPLATE.format(**tags)
- logfile = open('{}.json'.format(LOGFILE), 'w')
+ logfile = open('{}.json'.format(OUTPUT), 'w')
logfile.write(json.dumps(tags))
print profile
diff --git a/tests/run.sh b/tests/run.sh
index 227c6ea1..70086502 100755
--- a/tests/run.sh
+++ b/tests/run.sh
@@ -238,7 +238,7 @@ dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $7}')
[[ -n "$dev" ]] || dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $5}')
sleep 1
sudo ip link set $dev down
-echo "doctor set host down at" $(date "+%s.%N")
+echo "doctor set link down at" $(date "+%s.%N")
sleep 180
sudo ip link set $dev up
sleep 1
@@ -247,24 +247,8 @@ END_TXT
chmod +x disable_network.sh
scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
-}
-
-profile_performance_poc() {
- triggered=$(grep "^doctor set host down at" disable_network.log |\
- sed -e "s/^.* at //")
- vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
- sed -e "s/^.* at //")
- hostdown=$(grep "doctor mark host.* down at" inspector.log |\
- sed -e "s/^.* at //")
-
- #calculate the relative interval to triggered(T00)
- export DOCTOR_PROFILER_T00=0
- export DOCTOR_PROFILER_T01=$(echo "($detected-$triggered)*1000/1" |bc)
- export DOCTOR_PROFILER_T03=$(echo "($vmdown-$triggered)*1000/1" |bc)
- export DOCTOR_PROFILER_T04=$(echo "($hostdown-$triggered)*1000/1" |bc)
- export DOCTOR_PROFILER_T09=$(echo "($notified-$triggered)*1000/1" |bc)
-
- python profiler-poc.py
+ # use host time to get rid of potential time sync deviation between nodes
+ triggered=$(date "+%s.%N")
}
calculate_notification_time() {
@@ -279,10 +263,6 @@ calculate_notification_time() {
notified=$(grep "doctor consumer notified at" consumer.log |\
sed -e "s/^.* at //")
- if [[ "$PROFILER_TYPE" == "poc" ]]; then
- profile_performance_poc
- fi
-
echo "$notified $detected" | \
awk '{
d = $1 - $2;
@@ -319,6 +299,43 @@ unset_forced_down_hosts() {
-f value -c State | grep -q down' 240 5
}
+collect_logs() {
+ unset_forced_down_hosts
+ # TODO: We need to make sure the target compute host is back to IP
+ # reachable. wait_ping() will be added by tojuvone .
+ sleep 110
+ scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" .
+
+ # TODO(yujunz) collect other logs, e.g. nova, aodh
+}
+
+run_profiler() {
+ if [[ "$PROFILER_TYPE" == "poc" ]]; then
+ linkdown=$(grep "doctor set link down at " disable_network.log |\
+ sed -e "s/^.* at //")
+ vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
+ sed -e "s/^.* at //")
+ hostdown=$(grep "doctor mark host.* down at" inspector.log |\
+ sed -e "s/^.* at //")
+
+ # TODO(yujunz) check the actual delay to verify time sync status
+ # expected ~1s delay from $trigger to $linkdown
+ relative_start=${linkdown}
+ export DOCTOR_PROFILER_T00=$(python -c \
+ "print(int(($linkdown-$relative_start)*1000))")
+ export DOCTOR_PROFILER_T01=$(python -c \
+ "print(int(($detected-$relative_start)*1000))")
+ export DOCTOR_PROFILER_T03=$(python -c \
+ "print(int(($vmdown-$relative_start)*1000))")
+ export DOCTOR_PROFILER_T04=$(python -c \
+ "print(int(($hostdown-$relative_start)*1000))")
+ export DOCTOR_PROFILER_T09=$(python -c \
+ "print(int(($notified-$relative_start)*1000))")
+
+ python profiler-poc.py >doctor_profiler.log 2>&1
+ fi
+}
+
cleanup() {
set +e
echo "cleanup..."
@@ -398,5 +415,7 @@ sleep 60
check_host_status "(DOWN|UNKNOWN)"
calculate_notification_time
+collect_logs
+run_profiler
echo "done"