# Metrics Analysis Notebook (local)

#### Used to analyse / visualize the metrics when uploaded via csv file

### Contributor: Aditya Srivastava 


In [None]:
from datetime import datetime
import json
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import os
import pandas as pd
from pprint import pprint
import re
import requests
import time

## Helper Functions

In [None]:
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"

def convert_to_timestamp(s):
 global DATETIME_FORMAT
 return time.mktime(datetime.strptime(s, DATETIME_FORMAT).timetuple())

def convert_to_time_string(epoch):
 global DATETIME_FORMAT
 t = datetime.fromtimestamp(float(epoch)/1000.)
 return t.strftime(DATETIME_FORMAT)

### Note: 
 
Path will be used as a parameter in almost every function

path / rootdir / csv : (str) Path to the folder whose direct children are metric folders

example: /path/to/folder

When : 
```sh
ls /path/to/folder

# output should be directories such as
# cpu-0 cpu-1 cpu-2 ..........................
# processes-ovs-vswitchd ........processes-ovsdb-server
```

## Analysis Function

#### CPU

In [None]:
rootdir = 'metrics_data/'

def fetch_cpu_data(rootdir):
 df = pd.DataFrame()
 reg_compile = re.compile("cpu-\d{1,2}")
 for dirpath, dirnames, filenames in os.walk(rootdir):
 dirname = dirpath.split(os.sep)[-1] 
 if reg_compile.match(dirname):
 # read 3 files from this folder...
 _df = pd.DataFrame()
 for file in filenames:
 if 'user' in file:
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['user'] = temp_df['value']
 _df['epoch'] = temp_df['epoch']

 if 'system' in file:
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['system'] = temp_df['value']
 _df['epoch'] = temp_df['epoch']

 if 'idle' in file:
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['idle'] = temp_df['value']
 _df['epoch'] = temp_df['epoch']

 _df['cpu'] = dirname.split('-')[-1]

 df = df.append(_df, ignore_index=True)

 total = df['user'] + df['system'] + df['idle']

 df['user_percentage'] = df['user']*100 / total
 df['system_percentage'] = df['system']*100 / total
 df['idle_percentage'] = df['idle']*100 / total
 
 return df


In [None]:
# CPU Unused Cores
def unused_cores(rootdir, verbose=False):
 
 df = fetch_cpu_data(rootdir)
 groups = df.groupby(['cpu'])
 if verbose: print("Unused Cores :")

 unused_cores = []
 for key, item in groups:
 curr_df = item
 unused_cores.append(key)
 idle_values = curr_df.loc[curr_df['idle_percentage'] < 99.999]
 if np.any(idle_values):
 unused_cores.pop(-1)

 unused_cores = set(unused_cores)
 for key, item in groups:
 if key not in unused_cores:
 continue
 fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')

 ax1 = fig.add_subplot(131)
 ax1.title.set_text("System")
 ax1.plot(item['epoch'], item['system_percentage'])
 
 ax2 = fig.add_subplot(132)
 ax2.title.set_text("User")
 ax2.plot(item['epoch'], item['user_percentage'])
 
 ax3 = fig.add_subplot(133)
 ax3.title.set_text("Idle")
 ax3.plot(item['epoch'], item['idle_percentage'])

 plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)
 plt.show()

 print("Number of unused cores: ", len(unused_cores))
 return unused_cores


#CPU fully used cores
def fully_used_cores(rootdir, verbose=False):
 

 df = fetch_cpu_data(rootdir)
 groups = df.groupby(['cpu'])
 if verbose: print("Fully Used Cores :")

 fully_used_cores = []
 for key, item in groups:
 curr_df = item
 idle_values = curr_df.loc[curr_df['idle_percentage'] <= 10]
 if np.any(idle_values):
 fully_used_cores.append(key)

 fully_used_cores = set(fully_used_cores)
 for key, item in groups:
 if key not in fully_used_cores:
 continue
 fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')

 ax1 = fig.add_subplot(131)
 ax1.title.set_text("System")
 ax1.plot(item['epoch'], item['system_percentage'])

 ax2 = fig.add_subplot(132)
 ax2.title.set_text("User")
 ax2.plot(item['epoch'], item['user_percentage'])

 ax3 = fig.add_subplot(133)
 ax3.title.set_text("Idle")
 ax3.plot(item['epoch'], item['idle_percentage'])

 plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)
 plt.show()

 print("Number of fully used cores: ", len(fully_used_cores))
 return fully_used_cores


# CPU used cores plots
def used_cores(rootdir, verbose=False):

 df = fetch_cpu_data(rootdir)
 groups = df.groupby(['cpu'])
 if verbose: print("Used Cores :")

 used_cores = []
 for key, item in groups:
 curr_df = item
 idle_values = curr_df.loc[curr_df['idle_percentage'] < 99.999]
 if np.any(idle_values):
 used_cores.append(key)

 used_cores = set(used_cores)
 for key, item in groups:
 if key not in used_cores:
 continue
 fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')

 ax1 = fig.add_subplot(131)
 ax1.title.set_text("System")
 ax1.plot(item['epoch'], item['system_percentage'])

 ax2 = fig.add_subplot(132)
 ax2.title.set_text("User")
 ax2.plot(item['epoch'], item['user_percentage'])

 ax3 = fig.add_subplot(133)
 ax3.title.set_text("Idle")
 ax3.plot(item['epoch'], item['idle_percentage'])

 plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)
 plt.show()

 print("Number of used cores: ", len(used_cores))
 return used_cores


#### Interface

In [None]:
rootdir = 'metrics_data/'

def fetch_interfaces_data(rootdir):

 df = pd.DataFrame()
 reg_compile = re.compile("interface-.*")
 for dirpath, dirnames, filenames in os.walk(rootdir):
 dirname = dirpath.split(os.sep)[-1] 
 if reg_compile.match(dirname):
 # read 3 files from this folder...
 _df = pd.DataFrame()
 for file in filenames:
 if 'errors' in file:
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['error_rx'] = temp_df['rx']
 _df['error_tx'] = temp_df['tx']
 _df['epoch'] = temp_df['epoch']

 if 'dropped' in file:
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['dropped_rx'] = temp_df['rx']
 _df['dropped_tx'] = temp_df['tx']
 _df['epoch'] = temp_df['epoch']

 _df['interface'] = '-'.join(dirname.split('-')[1:])
 df = df.append(_df, ignore_index=True)
 return df


In [None]:
# Interface Dropped (both type 1 and 2, i.e rx and tx)
def interface_dropped(rootdir, verbose=False):
 
 df = fetch_interfaces_data(rootdir)
 group = df.groupby(['interface'])
 color = ['oldlace', 'mistyrose']

 dropped = {'rx':[], 'tx':[]}

 itr = 0
 for key, item in group:
 curr_df = item

 if np.any(curr_df['dropped_rx'] == 1):
 dropped_rows = curr_df[curr_df['dropped_rx'] == 1]
 dropped['rx'].append([key, dropped_row['epoch'].iloc[0]])
 if np.any(curr_df['dropped_tx'] == 1):
 dropped_rows = curr_df[curr_df['dropped_tx'] == 1]
 dropped['tx'].append([key, dropped_row['epoch'].iloc[0]])

 fig = plt.figure(figsize=(24,6), facecolor=color[itr%2], edgecolor='red')
 ax = fig.add_subplot(211)
 ax.title.set_text("Interface: {} Dropped (rx)".format(key))
 ax.plot(item['epoch'], item['dropped_rx'])

 ax1 = fig.add_subplot(212)
 ax1.title.set_text("Interface: {} Dropped (tx)".format(key))
 ax1.plot(item['epoch'], item['dropped_tx'])

 itr += 1

 plt.suptitle('Interface Dropped', fontsize=14)
 plt.show()

 return dropped


# Interface Errors (both type 1 and 2, i.e rx and tx)
def interface_errors(rootdir, verbose=False):
 
 df = fetch_interfaces_data(rootdir)
 group = df.groupby(['interface'])
 color = ['oldlace', 'mistyrose']

 errors = {'rx':[], 'tx':[]}

 itr = 0
 for key, item in group:
 curr_df = item

 if np.any(curr_df['error_rx'] == 1):
 err_rows = curr_df[curr_df['error_rx'] == 1]
 errors['rx'].append([key, err_row['epoch'].iloc[0]])
 if np.any(curr_df['error_tx'] == 1):
 err_rows = curr_df[curr_df['error_tx'] == 1]
 errors['tx'].append([key, err_row['epoch'].iloc[0]])

 fig = plt.figure(figsize=(24,6), facecolor=color[itr%2], edgecolor='red')
 ax = fig.add_subplot(211)
 ax.title.set_text("Interface: {} Errors (rx)".format(key))
 ax.plot(item['epoch'], item['error_rx'])

 ax1 = fig.add_subplot(212)
 ax1.title.set_text("Interface: {} Errors (tx)".format(key))
 ax1.plot(item['epoch'], item['error_tx'])

 itr += 1

 plt.suptitle('Interface Erros', fontsize=14)
 plt.show()

 return errors


#### OVS Stats (Non DPDK)

In [None]:
rootdir = 'metrics_data/'

def fetch_ovs_stats_data(rootdir):
 df = pd.DataFrame()
 reg_compile = re.compile("ovs_stats-.*")
 for dirpath, dirnames, filenames in os.walk(rootdir):
 dirname = dirpath.split(os.sep)[-1] 
 if reg_compile.match(dirname):
 if 'dpdk' in dirname:
 continue #ignoring dpdk

 _df = pd.DataFrame()
 for file in filenames:
 if 'errors' in file:
 col_name = '-'.join(file.split('_')[1:])
 temp_df = pd.read_csv(dirpath + os.sep + file)

 _df['epoch'] = temp_df['epoch']
 temp_df = temp_df.drop(['epoch'], axis=1)
 new_cols = [i + '_' + col_name for i in temp_df.columns]
 _df[new_cols] = temp_df

 if 'dropped' in file:
 col_name = '-'.join(file.split('_')[1:])
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['epoch'] = temp_df['epoch']
 temp_df = temp_df.drop(['epoch'], axis=1)
 new_cols = [i + '_' + col_name for i in temp_df.columns]
 _df[new_cols] = temp_df 
 _df['interface'] = '-'.join(dirname.split('-')[1:])
 df = df.append(_df, ignore_index=True)
 return df


In [None]:
def ovs_stats_dropped(rootdir, verbose=False):
 
 df = fetch_ovs_stats_data(rootdir)
 group = df.groupby(['interface'])
 color = ['oldlace', 'mistyrose']

 i = 0
 for key, item in group:
 curr_df = item
 for col in curr_df:
 if 'dropped' in col:
 if item[col].isnull().all():
 continue
 fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')
 plt.plot(item['epoch'], item[col])
 plt.title("Interface: {} Dropped {}".format(key, col))
 i += 1
 plt.show()
 return


# Interface Errors (both type 1 and 2, i.e rx and tx)
def ovs_stats_errors(rootdir, verbose=False):


 df = fetch_ovs_stats_data(rootdir)
 group = df.groupby(['interface'])
 color = ['oldlace', 'mistyrose']

 i = 0
 for key, item in group:
 curr_df = item
 for col in curr_df:
 if 'error' in col:
 if item[col].isnull().all():
 continue
 fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')
 plt.plot(item['epoch'], item[col])
 plt.title("Interface: {} Errors {}".format(key, col))
 i += 1
 plt.show()

#### DPDK

In [None]:
rootdir = 'metrics_data/'

def fetch_dpdk_data(rootdir):
 df = pd.DataFrame()
 reg_compile = re.compile(".*dpdk.*")
 for dirpath, dirnames, filenames in os.walk(rootdir):
 dirname = dirpath.split(os.sep)[-1] 
 if reg_compile.match(dirname):
 _df = pd.DataFrame()
 for file in filenames:
 if 'errors' in file:
 col_name = '-'.join(file.split('_')[1:])
 temp_df = pd.read_csv(dirpath + os.sep + file)

 _df['epoch'] = temp_df['epoch']
 temp_df = temp_df.drop(['epoch'], axis=1)
 new_cols = [i + '_' + col_name for i in temp_df.columns]
 _df[new_cols] = temp_df

 if 'dropped' in file:
 col_name = '-'.join(file.split('_')[1:])
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['epoch'] = temp_df['epoch']
 temp_df = temp_df.drop(['epoch'], axis=1)
 new_cols = [i + '_' + col_name for i in temp_df.columns]
 _df[new_cols] = temp_df 
 _df['dpdk'] = '-'.join(dirname.split('-')[1:])
 df = df.append(_df, ignore_index=True)
 return df


In [None]:
fetch_dpdk_data(rootdir)

In [None]:
def dpdk_dropped(rootdir, verbose=False):
 
 df = fetch_dpdk_data(rootdir)
 group = df.groupby(['dpdk'])
 color = ['oldlace', 'mistyrose']

 i = 0
 for key, item in group:
 curr_df = item
 for col in curr_df:
 if 'dropped' in col:
 if item[col].isnull().all():
 continue
 fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')
 plt.plot(item['epoch'], item[col])
 plt.title("DpDK: {} Dropped {}".format(key, col))
 i += 1
 plt.show()
 return


# Interface Errors (both type 1 and 2, i.e rx and tx)
def dpdk_errors(rootdir, verbose=False):


 df = fetch_dpdk_data(rootdir)
 group = df.groupby(['dpdk'])
 color = ['oldlace', 'mistyrose']

 i = 0
 for key, item in group:
 curr_df = item
 for col in curr_df:
 if 'error' in col:
 if item[col].isnull().all():
 continue
 fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')
 plt.plot(item['epoch'], item[col])
 plt.title("DpDK: {} Errors {}".format(key, col))
 i += 1
 plt.show()

In [None]:
dpdk_dropped(rootdir)

#### RDT (need to be testes)

In [None]:
rootdir = 'metrics_data/'

def fetch_rdt_data(rootdir):
 df = pd.DataFrame()
 reg_compile = re.compile(".*rdt.*")
 for dirpath, dirnames, filenames in os.walk(rootdir):
 dirname = dirpath.split(os.sep)[-1] 
 if reg_compile.match(dirname):
 _df = pd.DataFrame()
 for file in filenames:
 if 'bytes' in file:
 col_name = '-'.join(file.split('_')[1:])
 temp_df = pd.read_csv(dirpath + os.sep + file)

 _df['epoch'] = temp_df['epoch']
 temp_df = temp_df.drop(['epoch'], axis=1)
 new_cols = [i + '_' + col_name for i in temp_df.columns]
 _df[new_cols] = temp_df
 
 if 'bandwidth' in file:
 col_name = '-'.join(file.split('_')[1:])
 temp_df = pd.read_csv(dirpath + os.sep + file)

 _df['epoch'] = temp_df['epoch']
 temp_df = temp_df.drop(['epoch'], axis=1)
 new_cols = [i + '_' + col_name for i in temp_df.columns]
 _df[new_cols] = temp_df

 if 'ipc' in file:
 col_name = '-'.join(file.split('_')[1:])
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['epoch'] = temp_df['epoch']
 temp_df = temp_df.drop(['epoch'], axis=1)
 new_cols = [i + '_' + col_name for i in temp_df.columns]
 _df[new_cols] = temp_df 
 _df['intel_rdt'] = '-'.join(dirname.split('-')[1:])
 df = df.append(_df, ignore_index=True)
 return df


In [None]:
# L3 cache bytes
def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):
 
 df = fetch_rdt_data(rootdir)
 group = df.groupby(['intel_rdt'])
 color = ['oldlace', 'mistyrose']

 i = 0
 for key, item in group:
 curr_df = item
 for col in curr_df:
 if 'bytes' in col:
 if item[col].isnull().all():
 continue
 fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')
 plt.plot(item['epoch'], item[col])
 plt.title("RDT BYTES, RDT: {}".format(key, col))
 i += 1
 plt.show()


# L3 IPC values
def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):
 
 
 df = fetch_rdt_data(rootdir)
 group = df.groupby(['intel_rdt'])
 color = ['oldlace', 'mistyrose']

 i = 0
 for key, item in group:
 curr_df = item
 for col in curr_df:
 if 'ipc' in col:
 if item[col].isnull().all():
 continue
 fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')
 plt.plot(item['epoch'], item[col])
 plt.title("RDT IPC, RDT: {}".format(key, col))
 i += 1
 plt.show()



# memeory bandwidtdh
def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):
 
 
 df = fetch_rdt_data(rootdir)
 group = df.groupby(['intel_rdt'])
 color = ['oldlace', 'mistyrose']

 i = 0
 for key, item in group:
 curr_df = item
 for col in curr_df:
 if 'bandwidht' in col:
 if item[col].isnull().all():
 continue
 fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')
 plt.plot(item['epoch'], item[col])
 plt.title("RDT Memory Bandwidht, RDT: {}".format(key, col))
 i += 1
 plt.show()


#### Memory (following functions still need to written for csv)

In [None]:
rootdir = 'metrics_data/'

def fetch_memory_data(rootdir):
 df = pd.DataFrame()
 reg_compile = re.compile("memory")
 for dirpath, dirnames, filenames in os.walk(rootdir):
 dirname = dirpath.split(os.sep)[-1] 
 if reg_compile.match(dirname):
 print(dirname)
 _df = pd.DataFrame()
 for file in filenames: 
 col_name = file.split('-')[1]
 temp_df = pd.read_csv(dirpath + os.sep + file)
 _df['epoch'] = temp_df['epoch']
 temp_df = temp_df.drop(['epoch'], axis=1)
 new_cols = [col_name for i in temp_df.columns]
 _df[new_cols] = temp_df
 df = df.append(_df, ignore_index=True)
 return df

In [None]:
def get_memory_usage(rootdir, verbose=False):
 df = fetch_memory_data(rootdir)
 color = ['oldlace', 'mistyrose']
 i = 0
 for col in df:
 if df[col].isnull().all():
 continue
 fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')
 plt.plot(df['epoch'], df[col])
 plt.title("{} Memory".format(col))
 i += 1
 plt.show()



## Usage / Examples


##### CPU 

- For calling cpu unsued cores

```py
cores = unused_cores(rootdir='metrics_data')
```

- For finding fully used cores

```py
fully_used = fully_used_cores('metrics_data')
```

- Similarly for plotting used cores

```py
plot_used_cores(csv='metrics_data')
```


##### Interface

- Interface Dropped 

```py
# Using CSV
dropped_interfaces = interface_dropped('metrics_data')
```

- Interface Errors

```py
# Using CSV
interface_errors('metrics_data')
```

##### OVS Stats

- OVS Stats Dropped 

```py
# Using CSV
ovs_stats_dropped('metrics_data')
```

- OVS Stats Errors

```py
# Using CSV
ovs_stats_errors('metrics_data')
```

##### DPDK 

- DPDK Dropped 

```py
# Using CSV
dpdk_dropped('metrics_data')
```

- DPDK Errors

```py
# Using CSV
dpdk_errors('metrics_data')
```



##### RDT (Do not run yet)

- Plot bytes

```py
#csv
plot_rdt_bytes('metrics_data')
```

- Plot ipc values

```py
#csv
plot_rdt_ipc('metrics_data')
```

- Memory bandwidth

```py
#csv
get_rdt_memory_bandwidth('metrics_data')
```

##### Memory

```py
#csv
get_memory_usage('metrics_data')
```