1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
##############################################################################
# Copyright (c) 2016 CENGN and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
import copy
def slope(data_series):
"""
This function implements the linear least squares algorithm described in
the following wikipedia article :
https://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)
in the case of m equations (provided by m data points) and 2 unknown
variables (x and y, which represent the time and the Volume performance
variable being tested e.g. IOPS, latency...).
The data_series is currently assumed to follow the pattern :
[[x1,y1], [x2,y2], ..., [xm,ym]].
If this data pattern were to change, the data_treatement function
should be adjusted to ensure compatibility with the rest of the
Steady State Detection module.
"""
# In the particular case of an empty data series
if len(data_series) == 0:
beta2 = None
else: # The general case
data_series = copy.deepcopy(data_series)
m = len(data_series)
# To make sure at least one element is a float number so the result
# of the algorithm be a float number
data_series[0][0] = float(data_series[0][0])
"""
It consists in solving the normal equations system (2 equations,
2 unknowns) by calculating the value of beta2 (slope).
The formula of beta1 (the y-intercept) is given as a comment in
case it is needed later.
"""
sum_xi = 0
sum_xi_sq = 0
sum_yi_xi = 0
sum_yi = 0
for i in range(0, m):
xi = data_series[i][0]
yi = data_series[i][1]
sum_xi += xi
sum_xi_sq += xi**2
sum_yi_xi += xi * yi
sum_yi += yi
over = (sum_xi**2 - m * sum_xi_sq)
if over == 0:
beta2 = None # Infinite slope
else:
beta2 = (sum_yi * sum_xi - m * sum_yi_xi) / over # The slope
# beta1 = (sum_yi_xi - beta2*sum_xi_sq)/sum_xi #The y-intercept if
# needed
return beta2
def range_value(data_series):
"""
This function implements a range algorithm that returns a float number
representing the range of the data_series that is passed to it.
The data_series being passed is assumed to follow the following data
pattern : [y1, y2, y3, ..., ym] where yi represents the ith
measuring point of the y variable. The y variable represents the
Volume performance being tested (e.g. IOPS, latency...).
If this data pattern were to change, the data_treatment function
should be adjusted to ensure compatibility with the rest of the
Steady State Dectection module.
The conversion of the data series from the original pattern to the
[y1, y2, y3, ..., ym] pattern is done outside this function
so the original pattern can be changed without breaking this function.
"""
# In the particular case of an empty data series
if len(data_series) == 0:
range_value = None
else: # The general case
max_value = max(data_series)
min_value = min(data_series)
range_value = max_value - min_value
return range_value
def average(data_series):
"""
This function seeks to calculate the average value of the data series
given a series following the pattern : [y1, y2, y3, ..., ym].
If this data pattern were to change, the data_treatment function
should be adjusted to ensure compatibility with the rest of the
Steady State Dectection module.
The function returns a float number corresponding to the average of the yi.
"""
m = len(data_series)
if m == 0: # In the particular case of an empty data series
average = None
else:
data_sum = 0
for value in data_series:
data_sum += value
average = data_sum / float(m)
return average
|