summaryrefslogtreecommitdiffstats
path: root/tools/lma
diff options
context:
space:
mode:
authorAditya Srivastava <adityasrivastava301199@gmail.com>2020-08-24 02:46:18 +0530
committerAditya Srivastava <adityasrivastava301199@gmail.com>2020-08-24 02:46:18 +0530
commitd25014e0201cf0b0a662a84984191786d7f8eb5d (patch)
treec30831d9cd9083f5a6ca64ea53cfdb35230efc14 /tools/lma
parente5eef0ffdf2d281fecf12597041fd8af23d65e42 (diff)
Tools: Add monitoring analysis jupyter notebook
This patch adds 2 monitoring jupyter notebooks and 1 notebook combining both logs and metrics causation analysis. Causation analysis: Finds anomalies in logs and fetchs and analyses metrics in a delta time range of that timestamp. Analysis-Monitoring-K8S: Fetches metrics from prometheus to analyse them Analysis-Monitoring-Local: Data folder containing csv files of metrics is given, analysis is performed on that data. Signed-off-by: Aditya Srivastava <adityasrivastava301199@gmail.com> Change-Id: I8833f5155b3184f697fac5270c69e0df02d2986b
Diffstat (limited to 'tools/lma')
-rw-r--r--tools/lma/jupyter-notebooks/Causation-Analysis.ipynb784
-rw-r--r--tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb644
-rw-r--r--tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-Local.ipynb913
3 files changed, 2341 insertions, 0 deletions
diff --git a/tools/lma/jupyter-notebooks/Causation-Analysis.ipynb b/tools/lma/jupyter-notebooks/Causation-Analysis.ipynb
new file mode 100644
index 00000000..d2e7886a
--- /dev/null
+++ b/tools/lma/jupyter-notebooks/Causation-Analysis.ipynb
@@ -0,0 +1,784 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Logs and Metrics Analysis Notebook\n",
+ "\n",
+ "#### Used to capture anomalies in the logs and analyse / visualize the metrics in the vicinity of that time\n",
+ "\n",
+ "##### Contributors:\n",
+ "\n",
+ "- Adarsh Yadav <adiyadav0509@gmail.com> \n",
+ " \n",
+ " Log Analysis and Anomaly Finding\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n",
+ "- Aditya Srivastava <adityasrivastava301199@gmail.com>\n",
+ " \n",
+ " Metrics Analysis and Visualization"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Metrics Analysis and Visualization"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.dates as mdates\n",
+ "import numpy as np\n",
+ "\n",
+ "import datetime\n",
+ "import time\n",
+ "import requests\n",
+ "\n",
+ "from pprint import pprint\n",
+ "import json\n",
+ "from datetime import datetime, timedelta\n",
+ "\n",
+ "from elasticsearch import Elasticsearch\n",
+ "from elasticsearch_dsl import Search\n",
+ "from elasticsearch.connection import create_ssl_context\n",
+ "import ssl\n",
+ "import urllib3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "PROMETHEUS = 'http://10.10.120.211:30902/' #do not change, unless sure"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Helper Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#function to make DF out of query json\n",
+ "\n",
+ "def convert_to_df(res_json):\n",
+ "\n",
+ " data_list = res_json['data']['result']\n",
+ " res_df = pd.DataFrame()\n",
+ " if not data_list:\n",
+ " return res_df\n",
+ "\n",
+ " # making colums\n",
+ " headers = data_list[0]\n",
+ " for data in data_list:\n",
+ " metrics = data['metric']\n",
+ " for metric in metrics.keys():\n",
+ " res_df[metric] = np.nan\n",
+ " res_df['value'] = 0\n",
+ " \n",
+ " # filling the df\n",
+ " for data in data_list:\n",
+ " metrics = data['metric']\n",
+ " metrics['value'] = data['value'][-1]\n",
+ " res_df = res_df.append(metrics, ignore_index=True) \n",
+ "\n",
+ " return res_df\n",
+ "\n",
+ "def convert_to_df_range(res_json):\n",
+ "\n",
+ " data_list = res_json['data']['result']\n",
+ " res_df = pd.DataFrame()\n",
+ " if not data_list:\n",
+ " return res_df\n",
+ "\n",
+ " # filling the df\n",
+ " for data in data_list:\n",
+ " metrics = data['metric']\n",
+ " values = np.array(data['values'])\n",
+ " for time, value in values:\n",
+ " metrics['timestamp'] = time\n",
+ " metrics['value'] = value\n",
+ " res_df = res_df.append(metrics, ignore_index=True) \n",
+ "\n",
+ " return res_df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# functions to query\n",
+ "\n",
+ "def convert_to_timestamp(s):\n",
+ " return time.mktime(datetime.strptime(s, \"%Y-%m-%d %H:%M:%S\").timetuple())\n",
+ "\n",
+ "def query_current(params={}):\n",
+ " # input: params\n",
+ " # type: dict\n",
+ " # Example: {'query': 'container_cpu_user_seconds_total'}\n",
+ " \n",
+ " # Output: dict, loaded json response of the query\n",
+ "\n",
+ " res = requests.get(PROMETHEUS + '/api/v1/query', \n",
+ " params=params)\n",
+ " return json.loads(res.text)\n",
+ "\n",
+ "\n",
+ "def query_range(start, end, params={}, steps = '30s'):\n",
+ " # input: params\n",
+ " # type: dict\n",
+ " # Example: {'query': 'container_cpu_user_seconds_total'}\n",
+ " \n",
+ " # Output: dict, loaded json response of the query\n",
+ " params[\"start\"] = convert_to_timestamp(start)\n",
+ " params[\"end\"] = convert_to_timestamp(end)\n",
+ " params[\"step\"] = steps\n",
+ "\n",
+ " # print(params)\n",
+ "\n",
+ " res = requests.get(PROMETHEUS + '/api/v1/query_range', \n",
+ " params=params,\n",
+ " )\n",
+ "\n",
+ " return json.loads(res.text)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Analysis Function"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### CPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# CPU Unused Cores\n",
+ "def unused_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " return df\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n",
+ "\n",
+ " target_cpu_usage_range = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(target_cpu_usage_range)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " if verbose: print(\"Unused Cores :\")\n",
+ " unused_cores = []\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n",
+ " if idle_row['value'].iloc[0] == '100':\n",
+ " if verbose: print(\"Core: \",key)\n",
+ " unused_cores.append(int(key))\n",
+ "\n",
+ " print(\"Number of unused cores: \", len(unused_cores))\n",
+ " return unused_cores\n",
+ "\n",
+ "\n",
+ "#CPU fully used cores\n",
+ "def fully_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " return df\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n",
+ "\n",
+ " target_cpu_usage_range = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(target_cpu_usage_range)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " if verbose: print(\"Fully Used Cores :\")\n",
+ " fully_used_cores = []\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n",
+ " if idle_row['value'].iloc[0] == '0':\n",
+ " if verbose: print(\"Core: \",key)\n",
+ " fully_used_cores.append(int(key))\n",
+ " print(\"Number of fully used cores: \", len(fully_used_cores))\n",
+ " return fully_used_cores\n",
+ "\n",
+ "\n",
+ "# CPU used cores plots\n",
+ "def plot_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " return df\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ "\n",
+ " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n",
+ "\n",
+ " target_cpu_usage_range = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(target_cpu_usage_range)\n",
+ " \n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " used_cores = []\n",
+ "\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " user_row = curr_df.loc[curr_df['type'] == 'user']\n",
+ " sys_row = curr_df.loc[curr_df['type'] == 'system']\n",
+ "\n",
+ "\n",
+ " if np.any(sys_row != '0') or np.any(user_row != '0'):\n",
+ " used_cores.append(key)\n",
+ " type_grps = curr_df.groupby('type')\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ "\n",
+ " for type_key, new_item in type_grps:\n",
+ "\n",
+ " if type_key == 'system':\n",
+ " ax1 = fig.add_subplot(131)\n",
+ " ax1.title.set_text(type_key)\n",
+ " ax1.plot(new_item['timestamp'], new_item['value'])\n",
+ " elif type_key == 'user':\n",
+ " ax2 = fig.add_subplot(132)\n",
+ " ax2.title.set_text(type_key)\n",
+ " ax2.plot(new_item['timestamp'], new_item['value'])\n",
+ " elif type_key == 'wait':\n",
+ " ax3 = fig.add_subplot(133)\n",
+ " ax3.title.set_text(type_key)\n",
+ " ax3.plot(new_item['timestamp'], new_item['value'])\n",
+ "\n",
+ " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n",
+ " plt.show()\n",
+ " print(\"Number of used cores: \", len(used_cores))\n",
+ " return used_cores"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Interface"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Interface Dropped (both type 1 and 2, i.e rx and tx)\n",
+ "#TODO: Change this to separate functions later\n",
+ "def interface_dropped(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " df_0 = df #TODO: Change this\n",
+ " df_1 = df #TODO: Change this\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_interface_if_dropped_0_total{exported_instance='\" + node + \"'}\"}\n",
+ "\n",
+ " interface_dropped_0 = query_range(start, end, params, steps)\n",
+ " df_0 = convert_to_df_range(interface_dropped_0)\n",
+ " \n",
+ " params = {'query' : \"collectd_interface_if_dropped_1_total{exported_instance='\" + node + \"'}\"}\n",
+ " interface_dropped_1 = query_range(start, end, params, steps)\n",
+ " df_1 = convert_to_df_range(interface_dropped_1)\n",
+ "\n",
+ " \n",
+ " #df_0 : interfaces_dropped_0_df\n",
+ " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ "\n",
+ " #df_1 : interfaces_dropped_1_df\n",
+ " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ "\n",
+ " groups_0 = df_0.groupby(['interface'])\n",
+ " groups_1 = df_1.groupby(['interface'])\n",
+ "\n",
+ " groups = [groups_0, groups_1]\n",
+ " dropped_interfaces= []\n",
+ " drop_type = 0\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ " plot_iter = 111\n",
+ " for group in groups:\n",
+ " dropped = []\n",
+ "\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " if np.any(curr_df['value'] == '1'):\n",
+ " dropped_row = curr_df.loc[curr_df['value'] == '1']\n",
+ " dropped.append([key, dropped_row['timestamp'].iloc[0]])\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[drop_type], edgecolor='red')\n",
+ " ax = fig.add_subplot(plot_iter)\n",
+ " ax.title.set_text(\"Interface: {}\".format(key))\n",
+ " ax.plot(item['timestamp'], item['value'])\n",
+ " dropped_interfaces.append(dropped)\n",
+ " plt.suptitle('Interfaces Drop type {}'.format(drop_type), fontsize=14)\n",
+ " plt.show()\n",
+ " drop_type += 1\n",
+ " return dropped_interfaces\n",
+ "\n",
+ "\n",
+ "# Interface Errors (both type 1 and 2, i.e rx and tx)\n",
+ "#TODO: Change this to separate functions later\n",
+ "def interface_errors(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " df_0 = df #TODO: Change this\n",
+ " df_1 = df #TODO: Change this\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_interface_if_errors_0_total{exported_instance='\" + node + \"'}\"}\n",
+ " interfaces_errors_0 = query_range(start, end, params, steps)\n",
+ " df_0 = convert_to_df_range(interfaces_errors_0)\n",
+ " \n",
+ " params = {'query' : \"collectd_interface_if_errors_1_total{exported_instance='\" + node + \"'}\"}\n",
+ " interface_errors_1 = query_range(start, end, params, steps)\n",
+ " df_1 = convert_to_df_range(interface_errors_1)\n",
+ "\n",
+ " \n",
+ " #df_0 : interfaces_errors_0_df\n",
+ " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ "\n",
+ " #df_1 : interfaces_dropped_1_df\n",
+ " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ "\n",
+ " groups_0 = df_0.groupby(['interface'])\n",
+ " groups_1 = df_1.groupby(['interface'])\n",
+ "\n",
+ " groups = [groups_0, groups_1]\n",
+ " err_interfaces= []\n",
+ " err_type = 0\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ " for group in groups:\n",
+ " errors = []\n",
+ "\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ "\n",
+ " if np.any(curr_df['value'] == '1'):\n",
+ " err_row = curr_df.loc[curr_df['value'] == '1']\n",
+ " erros.append([key, err_row['timestamp'].iloc[0]])\n",
+ "\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[err_type], edgecolor='red')\n",
+ " ax = fig.add_subplot(111)\n",
+ " ax.title.set_text(\"Interface: {}\".format(key))\n",
+ " ax.plot(item['timestamp'], item['value'])\n",
+ "\n",
+ " err_interfaces.append(errors)\n",
+ " plt.suptitle('Interfaces Error type {}'.format(err_type), fontsize=14)\n",
+ " plt.show()\n",
+ " err_type += 1\n",
+ "\n",
+ " return err_interfaces"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### RDT "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# L3 cache bytes\n",
+ "def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ "\n",
+ " params = {'query' : \"collectd_intel_rdt_bytes{exported_instance='\" + node + \"'}\"}\n",
+ " intel_rdt_bytes = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(intel_rdt_bytes)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['intel_rdt'])\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ " ax1 = fig.add_subplot(111)\n",
+ " ax1.title.set_text(\"Intel RDT Number: {}\".format(key))\n",
+ " ax1.plot(item['timestamp'], item['value'])\n",
+ " plt.show()\n",
+ " return\n",
+ "\n",
+ "\n",
+ "# L3 IPC values\n",
+ "def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_intel_rdt_ipc{exported_instance='\" + node + \"'}\"}\n",
+ " intel_rdt_ipc = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(intel_rdt_ipc)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['intel_rdt'])\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ " ax1 = fig.add_subplot(111)\n",
+ " ax1.title.set_text(\"Intel RDT Number: {}, IPC value\".format(key))\n",
+ " ax1.plot(item['timestamp'], item['value'])\n",
+ " plt.show()\n",
+ " return\n",
+ "\n",
+ "\n",
+ "# memeory bandwidtdh\n",
+ "def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " else:\n",
+ "\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_intel_rdt_memory_bandwidth_total{exported_instance='\" + node + \"'}\"}\n",
+ " intel_rdt_mem_bw = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(intel_rdt_mem_bw)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " \n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Memory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "def get_memory_usage(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_memory{exported_instance='\" + node + \"'} / (1024*1024*1024) \"} \n",
+ " target_memory_usage_range = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(target_memory_usage_range)\n",
+ " \n",
+ " df = df.drop(['instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['memory'])\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ " ax1 = fig.add_subplot(111)\n",
+ " ax1.title.set_text(\"Memory Type: {}\".format(key))\n",
+ " ax1.plot(item['timestamp'], item['value'])\n",
+ " plt.show()\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Testing Zone"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_memory_usage('2020-08-03 08:00:12', '2020-08-03 08:01:12', 'pod12-node4')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def analyse(timestamp, node):\n",
+ " ts = datetime.strptime(timestamp.split(',')[0], \"%Y-%m-%d %H:%M:%S\")\n",
+ " start = ts - timedelta(seconds=10)\n",
+ " end = ts + timedelta(seconds=10)\n",
+ " \n",
+ " start = str(start)\n",
+ " end = str(end)\n",
+ " steps = '5s'\n",
+ "\n",
+ " print(\"Starting Analysis from\",start,\"to\",end,'\\n\\n')\n",
+ "\n",
+ " if \"node4\" in node:\n",
+ " node = 'pod12-node4'\n",
+ "\n",
+ " #cpu analysis\n",
+ " print(\"=====CPU ANALYSIS=====\\n\")\n",
+ " unused = unused_cores(start, end, node, steps)\n",
+ " print(\"Unused Cores:\", unused)\n",
+ " fully_used = fully_used_cores(start, end, node, steps)\n",
+ " print(\"Fully Used Cores:\", fully_used)\n",
+ " print(\"Plotting used cores:\")\n",
+ " used_cores = plot_used_cores(start, end, node, steps)\n",
+ " \n",
+ " #interface analysis\n",
+ " print(\"=====Interfaces Dropped / Errors=====\\n\")\n",
+ " dropped_interfaces = interface_dropped(start, end, node, steps)\n",
+ " err_interfaces = interface_errors(start, end, node, steps)\n",
+ " \n",
+ " #RDT Analysis\n",
+ " print(\"=====RDT Analysis=====\\n\")\n",
+ " plot_rdt_bytes(start, end, node, steps)\n",
+ " plot_rdt_ipc(start, end, node, steps)\n",
+ " mem_bandwidht = get_rdt_memory_bandwidth(start, end, node, steps)\n",
+ " \n",
+ " #Memory Analysis:\n",
+ " print(\"=====Memory Analysis=====\\n\")\n",
+ " mem = get_memory_usage(start, end, node, steps)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Usage / Examples\n",
+ "\n",
+ "\n",
+ "##### CPU \n",
+ "\n",
+ "- For calling cpu unsued cores\n",
+ "\n",
+ "```py\n",
+ "# Fetching from prometheus\n",
+ "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- For finding fully used cores\n",
+ "\n",
+ "```py\n",
+ "# Fetching from prometheus\n",
+ "fully_used = fully_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- Similarly for plotting used cores\n",
+ "\n",
+ "```py\n",
+ "# Fetching\n",
+ "plot_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "\n",
+ "#csv\n",
+ "# use Analysis-Monitoring-Local Notebook for correct analysis \n",
+ "plot_used_cores(csv='metrics_data/cpu-0/cpu-user-2020-06-02')\n",
+ "\n",
+ "```\n",
+ "\n",
+ "\n",
+ "##### Interface\n",
+ "\n",
+ "- Interface Dropped \n",
+ "\n",
+ "```py\n",
+ "# Fetching from prom\n",
+ "dropped_interfaces = interface_dropped('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- Interface Errors\n",
+ "\n",
+ "```py\n",
+ "# Fetching from prom\n",
+ "interface_errors('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "```\n",
+ "\n",
+ "##### RDT\n",
+ "\n",
+ "- Plot bytes\n",
+ "\n",
+ "```py\n",
+ "# fetch\n",
+ "plot_rdt_bytes('2020-07-31 08:00:12', '2020-07-31 08:01:12','pod12-node4')\n",
+ "```\n",
+ "\n",
+ "- Plot ipc values\n",
+ "\n",
+ "```py\n",
+ "#fetch\n",
+ "plot_rdt_ipc('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "```\n",
+ "\n",
+ "- Memory bandwidth\n",
+ "\n",
+ "```py\n",
+ "#fetch\n",
+ "get_rdt_memory_bandwidth('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "```\n",
+ "\n",
+ "##### Memory\n",
+ "\n",
+ "- Memory usage\n",
+ "\n",
+ "```py\n",
+ "get_memory_usage('2020-08-03 08:00:12', '2020-08-03 08:01:12', 'pod12-node4')\n",
+ "```\n",
+ "\n",
+ "##### Analyse everything\n",
+ "\n",
+ "```py\n",
+ "# example alert_time: 2020-08-03 08:00:12\n",
+ "# example index: 'pod12-node4'\n",
+ "analyse(alert_time,index)\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Checking Anomaly in logs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Give file name\n",
+ "foldername = \"results_2020-08-07_03-39-57\"\n",
+ "#Give index name - \"node1*\" or \"node4*\"\n",
+ "index = \"node4*\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "ssl_context = create_ssl_context()\n",
+ "ssl_context.check_hostname = False\n",
+ "ssl_context.verify_mode = ssl.CERT_NONE\n",
+ "urllib3.disable_warnings()\n",
+ "client = Elasticsearch(['https://elasticsearch:password123@10.10.120.211:31111'],verify_certs=False,ssl_context=ssl_context)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vsperf = \"vsperf-overall_\"+ foldername[8:] +\".log\"\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"alert\").query(\"match_phrase\", log_path=vsperf)\n",
+ "for hits in s.scan():\n",
+ " alert_time = hits.alert_time\n",
+ "\n",
+ "print(alert_time)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "analyse(alert_time,index)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb
new file mode 100644
index 00000000..10c59d84
--- /dev/null
+++ b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb
@@ -0,0 +1,644 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Metrics Analysis Notebook (k8s)\n",
+ "\n",
+ "#### Used to analyse / visualize the metrics, data fetched from prometheus (monitoring cluster)\n",
+ "\n",
+ "### Contributor: Aditya Srivastava <adityasrivastava301199@gmail.com>\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.dates as mdates\n",
+ "import numpy as np\n",
+ "\n",
+ "import datetime\n",
+ "import time\n",
+ "import requests\n",
+ "\n",
+ "from pprint import pprint\n",
+ "import json\n",
+ "from datetime import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "PROMETHEUS = 'http://10.10.120.211:30902/' #do not change, unless sure"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Helper Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#function to make DF out of query json\n",
+ "\n",
+ "def convert_to_df(res_json):\n",
+ "\n",
+ " data_list = res_json['data']['result']\n",
+ " res_df = pd.DataFrame()\n",
+ " if not data_list:\n",
+ " return res_df\n",
+ "\n",
+ " # making colums\n",
+ " headers = data_list[0]\n",
+ " for data in data_list:\n",
+ " metrics = data['metric']\n",
+ " for metric in metrics.keys():\n",
+ " res_df[metric] = np.nan\n",
+ " res_df['value'] = 0\n",
+ " \n",
+ " # filling the df\n",
+ " for data in data_list:\n",
+ " metrics = data['metric']\n",
+ " metrics['value'] = data['value'][-1]\n",
+ " res_df = res_df.append(metrics, ignore_index=True) \n",
+ "\n",
+ " return res_df\n",
+ "\n",
+ "def convert_to_df_range(res_json):\n",
+ "\n",
+ " data_list = res_json['data']['result']\n",
+ " res_df = pd.DataFrame()\n",
+ " if not data_list:\n",
+ " return res_df\n",
+ "\n",
+ " # filling the df\n",
+ " for data in data_list:\n",
+ " metrics = data['metric']\n",
+ " values = np.array(data['values'])\n",
+ " for time, value in values:\n",
+ " metrics['timestamp'] = time\n",
+ " metrics['value'] = value\n",
+ " res_df = res_df.append(metrics, ignore_index=True) \n",
+ "\n",
+ " return res_df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# functions to query\n",
+ "\n",
+ "def convert_to_timestamp(s):\n",
+ " return time.mktime(datetime.strptime(s, \"%Y-%m-%d %H:%M:%S\").timetuple())\n",
+ "\n",
+ "def query_current(params={}):\n",
+ " # input: params\n",
+ " # type: dict\n",
+ " # Example: {'query': 'container_cpu_user_seconds_total'}\n",
+ " \n",
+ " # Output: dict, loaded json response of the query\n",
+ "\n",
+ " res = requests.get(PROMETHEUS + '/api/v1/query', \n",
+ " params=params)\n",
+ " return json.loads(res.text)\n",
+ "\n",
+ "\n",
+ "def query_range(start, end, params={}, steps = '30s'):\n",
+ " # input: params\n",
+ " # type: dict\n",
+ " # Example: {'query': 'container_cpu_user_seconds_total'}\n",
+ " \n",
+ " # Output: dict, loaded json response of the query\n",
+ " params[\"start\"] = convert_to_timestamp(start)\n",
+ " params[\"end\"] = convert_to_timestamp(end)\n",
+ " params[\"step\"] = steps\n",
+ "\n",
+ " print(params)\n",
+ " \n",
+ " res = requests.get(PROMETHEUS + '/api/v1/query_range', \n",
+ " params=params,\n",
+ " )\n",
+ "\n",
+ " return json.loads(res.text)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Analysis Function"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### CPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# CPU Unused Cores\n",
+ "def unused_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " return df\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n",
+ "\n",
+ " target_cpu_usage_range = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(target_cpu_usage_range)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " if verbose: print(\"Unused Cores :\")\n",
+ " unused_cores = []\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n",
+ " if idle_row['value'].iloc[0] == '100':\n",
+ " if verbose: print(\"Core: \",key)\n",
+ " unused_cores.append(int(key))\n",
+ "\n",
+ " print(\"Number of unused cores: \", len(unused_cores))\n",
+ " return unused_cores\n",
+ "\n",
+ "\n",
+ "#CPU fully used cores\n",
+ "def fully_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " return df\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n",
+ "\n",
+ " target_cpu_usage_range = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(target_cpu_usage_range)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " if verbose: print(\"Fully Used Cores :\")\n",
+ " fully_used_cores = []\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n",
+ " if idle_row['value'].iloc[0] == '0':\n",
+ " if verbose: print(\"Core: \",key)\n",
+ " fully_used_cores.append(int(key))\n",
+ " print(\"Number of fully used cores: \", len(fully_used_cores))\n",
+ " return fully_used_cores\n",
+ "\n",
+ "\n",
+ "# CPU used cores plots\n",
+ "def plot_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " \n",
+ " # \n",
+ " df['rate'] = df['value'].diff()\n",
+ "\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ " ax1 = fig.add_subplot(111)\n",
+ " ax1.title.set_text('CPU usage')\n",
+ " ax1.plot(df['epoch'], df['rate'])\n",
+ " return df\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ "\n",
+ " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n",
+ "\n",
+ " target_cpu_usage_range = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(target_cpu_usage_range)\n",
+ " \n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " used_cores = []\n",
+ "\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n",
+ "\n",
+ " if idle_row['value'].iloc[0] != '100':\n",
+ " used_cores.append(key)\n",
+ " type_grps = curr_df.groupby('type')\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ "\n",
+ " for type_key, new_item in type_grps:\n",
+ "\n",
+ " if type_key == 'system':\n",
+ " ax1 = fig.add_subplot(131)\n",
+ " ax1.title.set_text(type_key)\n",
+ " ax1.plot(new_item['timestamp'], new_item['value'])\n",
+ " elif type_key == 'user':\n",
+ " ax2 = fig.add_subplot(132)\n",
+ " ax2.title.set_text(type_key)\n",
+ " ax2.plot(new_item['timestamp'], new_item['value'])\n",
+ " elif type_key == 'wait':\n",
+ " ax3 = fig.add_subplot(133)\n",
+ " ax3.title.set_text(type_key)\n",
+ " ax3.plot(new_item['timestamp'], new_item['value'])\n",
+ "\n",
+ " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n",
+ " plt.show()\n",
+ " print(\"Number of used cores: \", len(used_cores))\n",
+ " return used_cores"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Interface"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Interface Dropped (both type 1 and 2, i.e rx and tx)\n",
+ "#TODO: Change this to separate functions later\n",
+ "def interface_dropped(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " df_0 = df #TODO: Change this\n",
+ " df_1 = df #TODO: Change this\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_interface_if_dropped_0_total{exported_instance='\" + node + \"'}\"}\n",
+ "\n",
+ " interface_dropped_0 = query_range(start, end, params, steps)\n",
+ " df_0 = convert_to_df_range(interface_dropped_0)\n",
+ " \n",
+ " params = {'query' : \"collectd_interface_if_dropped_1_total{exported_instance='\" + node + \"'}\"}\n",
+ " interface_dropped_1 = query_range(start, end, params, steps)\n",
+ " df_1 = convert_to_df_range(interface_dropped_1)\n",
+ "\n",
+ " \n",
+ " #df_0 : interfaces_dropped_0_df\n",
+ " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ "\n",
+ " #df_1 : interfaces_dropped_1_df\n",
+ " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ "\n",
+ " groups_0 = df_0.groupby(['interface'])\n",
+ " groups_1 = df_1.groupby(['interface'])\n",
+ "\n",
+ " groups = [groups_0, groups_1]\n",
+ " dropped_interfaces= []\n",
+ " drop_type = 0\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ " plot_iter = 111\n",
+ " for group in groups:\n",
+ " dropped = []\n",
+ "\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " if np.any(curr_df['value'] == '1'):\n",
+ " dropped_row = curr_df.loc[curr_df['value'] == '1']\n",
+ " dropped.append([key, dropped_row['timestamp'].iloc[0]])\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[drop_type], edgecolor='red')\n",
+ " ax = fig.add_subplot(plot_iter)\n",
+ " ax.title.set_text(\"Interface: {}\".format(key))\n",
+ " ax.plot(item['timestamp'], item['value'])\n",
+ " dropped_interfaces.append(dropped)\n",
+ " plt.suptitle('Interfaces Drop type {}'.format(drop_type), fontsize=14)\n",
+ " plt.show()\n",
+ " drop_type += 1\n",
+ " return dropped_interfaces\n",
+ "\n",
+ "\n",
+ "# Interface Errors (both type 1 and 2, i.e rx and tx)\n",
+ "#TODO: Change this to separate functions later\n",
+ "def interface_errors(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " df_0 = df #TODO: Change this\n",
+ " df_1 = df #TODO: Change this\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_interface_if_errors_0_total{exported_instance='\" + node + \"'}\"}\n",
+ " interfaces_errors_0 = query_range(start, end, params, steps)\n",
+ " df_0 = convert_to_df_range(interfaces_errors_0)\n",
+ " \n",
+ " params = {'query' : \"collectd_interface_if_errors_1_total{exported_instance='\" + node + \"'}\"}\n",
+ " interface_errors_1 = query_range(start, end, params, steps)\n",
+ " df_1 = convert_to_df_range(interface_errors_1)\n",
+ "\n",
+ " \n",
+ " #df_0 : interfaces_errors_0_df\n",
+ " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ "\n",
+ " #df_1 : interfaces_dropped_1_df\n",
+ " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ "\n",
+ " groups_0 = df_0.groupby(['interface'])\n",
+ " groups_1 = df_1.groupby(['interface'])\n",
+ "\n",
+ " groups = [groups_0, groups_1]\n",
+ " err_interfaces= []\n",
+ " err_type = 0\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ " for group in groups:\n",
+ " errors = []\n",
+ "\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ "\n",
+ " if np.any(curr_df['value'] == '1'):\n",
+ " err_row = curr_df.loc[curr_df['value'] == '1']\n",
+ " erros.append([key, err_row['timestamp'].iloc[0]])\n",
+ "\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[err_type], edgecolor='red')\n",
+ " ax = fig.add_subplot(111)\n",
+ " ax.title.set_text(\"Interface: {}\".format(key))\n",
+ " ax.plot(item['timestamp'], item['value'])\n",
+ "\n",
+ " err_interfaces.append(errors)\n",
+ " plt.suptitle('Interfaces Error type {}'.format(err_type), fontsize=14)\n",
+ " plt.show()\n",
+ " err_type += 1\n",
+ "\n",
+ " return err_interfaces"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### RDT "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# L3 cache bytes\n",
+ "def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ "\n",
+ " params = {'query' : \"collectd_intel_rdt_bytes{exported_instance='\" + node + \"'}\"}\n",
+ " intel_rdt_bytes = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(intel_rdt_bytes)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['intel_rdt'])\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ " ax1 = fig.add_subplot(111)\n",
+ " ax1.title.set_text(\"Intel RDT Number: {}\".format(key))\n",
+ " ax1.plot(item['timestamp'], item['value'])\n",
+ " plt.show()\n",
+ " return\n",
+ "\n",
+ "\n",
+ "# L3 IPC values\n",
+ "def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_intel_rdt_ipc{exported_instance='\" + node + \"'}\"}\n",
+ " intel_rdt_ipc = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(intel_rdt_ipc)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " groups = df.groupby(['intel_rdt'])\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ " ax1 = fig.add_subplot(111)\n",
+ " ax1.title.set_text(\"Intel RDT Number: {}, IPC value\".format(key))\n",
+ " ax1.plot(item['timestamp'], item['value'])\n",
+ " plt.show()\n",
+ " return\n",
+ "\n",
+ "\n",
+ "# memeory bandwidtdh\n",
+ "def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " else:\n",
+ "\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_intel_rdt_memory_bandwidth_total{exported_instance='\" + node + \"'}\"}\n",
+ " intel_rdt_mem_bw = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(intel_rdt_mem_bw)\n",
+ "\n",
+ " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n",
+ " \n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Memory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "def get_memory_usage(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " if csv is not None:\n",
+ " df = pd.read_csv(csv)\n",
+ " else:\n",
+ " if start is None or end is None or node is None:\n",
+ " return \"Start, end and Node name required when fetching from prometheus\"\n",
+ " \n",
+ " params = {'query' : \"collectd_memory{exported_instance='\" + node + \"'} / (1024*1024*1024) \"} \n",
+ " target_memory_usage_range = query_range(start, end, params, steps)\n",
+ " df = convert_to_df_range(target_memory_usage_range)\n",
+ "\n",
+ " df = df.drop(['instance', 'job'], axis = 1)\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Testing Zone"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [],
+ "source": [
+ "# prom fetch\n",
+ "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "print(cores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Usage / Examples\n",
+ "\n",
+ "\n",
+ "##### CPU \n",
+ "\n",
+ "- For calling cpu unsued cores\n",
+ "\n",
+ "```py\n",
+ "# Fetching from prometheus\n",
+ "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- For finding fully used cores\n",
+ "\n",
+ "```py\n",
+ "# Fetching from prometheus\n",
+ "fully_used = fully_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- Similarly for plotting used cores\n",
+ "\n",
+ "```py\n",
+ "# Fetching\n",
+ "plot_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "\n",
+ "#csv\n",
+ "# use Analysis-Monitoring-Local Notebook for correct analysis \n",
+ "plot_used_cores(csv='metrics_data/cpu-0/cpu-user-2020-06-02')\n",
+ "\n",
+ "```\n",
+ "\n",
+ "\n",
+ "##### Interface\n",
+ "\n",
+ "- Interface Dropped \n",
+ "\n",
+ "```py\n",
+ "# Fetching from prom\n",
+ "dropped_interfaces = interface_dropped('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "\n",
+ "```\n",
+ "\n",
+ "- Interface Errors\n",
+ "\n",
+ "```py\n",
+ "# Fetching from prom\n",
+ "interface_errors('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "```\n",
+ "\n",
+ "##### RDT\n",
+ "\n",
+ "- Plot bytes\n",
+ "\n",
+ "```py\n",
+ "# fetch\n",
+ "plot_rdt_bytes('2020-07-31 08:00:12', '2020-07-31 08:01:12','pod12-node4')\n",
+ "```\n",
+ "\n",
+ "- Plot ipc values\n",
+ "\n",
+ "```py\n",
+ "#fetch\n",
+ "plot_rdt_ipc('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "```\n",
+ "\n",
+ "- Memory bandwidth\n",
+ "\n",
+ "```py\n",
+ "#fetch\n",
+ "get_rdt_memory_bandwidth('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n",
+ "```"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-Local.ipynb b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-Local.ipynb
new file mode 100644
index 00000000..0385b6f9
--- /dev/null
+++ b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-Local.ipynb
@@ -0,0 +1,913 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Metrics Analysis Notebook (local)\n",
+ "\n",
+ "#### Used to analyse / visualize the metrics when uploaded via csv file\n",
+ "\n",
+ "### Contributor: Aditya Srivastava <adityasrivastava301199@gmail.com>\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from datetime import datetime\n",
+ "import json\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.dates as mdates\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import pandas as pd\n",
+ "from pprint import pprint\n",
+ "import re\n",
+ "import requests\n",
+ "import time"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Helper Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "DATETIME_FORMAT = \"%Y-%m-%d %H:%M:%S\"\n",
+ "\n",
+ "def convert_to_timestamp(s):\n",
+ " global DATETIME_FORMAT\n",
+ " return time.mktime(datetime.strptime(s, DATETIME_FORMAT).timetuple())\n",
+ "\n",
+ "def convert_to_time_string(epoch):\n",
+ " global DATETIME_FORMAT\n",
+ " t = datetime.fromtimestamp(float(epoch)/1000.)\n",
+ " return t.strftime(DATETIME_FORMAT)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Note: \n",
+ " \n",
+ "Path will be used as a parameter in almost every function\n",
+ "\n",
+ "path / rootdir / csv : (str) Path to the folder whose direct children are metric folders\n",
+ "\n",
+ "example: /path/to/folder\n",
+ "\n",
+ "When : \n",
+ "```sh\n",
+ "ls /path/to/folder\n",
+ "\n",
+ "# output should be directories such as\n",
+ "# cpu-0 cpu-1 cpu-2 ..........................\n",
+ "# processes-ovs-vswitchd ........processes-ovsdb-server\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Analysis Function"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### CPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rootdir = 'metrics_data/'\n",
+ "\n",
+ "def fetch_cpu_data(rootdir):\n",
+ " df = pd.DataFrame()\n",
+ " reg_compile = re.compile(\"cpu-\\d{1,2}\")\n",
+ " for dirpath, dirnames, filenames in os.walk(rootdir):\n",
+ " dirname = dirpath.split(os.sep)[-1] \n",
+ " if reg_compile.match(dirname):\n",
+ " # read 3 files from this folder...\n",
+ " _df = pd.DataFrame()\n",
+ " for file in filenames:\n",
+ " if 'user' in file:\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['user'] = temp_df['value']\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ "\n",
+ " if 'system' in file:\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['system'] = temp_df['value']\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ "\n",
+ " if 'idle' in file:\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['idle'] = temp_df['value']\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ "\n",
+ " _df['cpu'] = dirname.split('-')[-1]\n",
+ "\n",
+ " df = df.append(_df, ignore_index=True)\n",
+ "\n",
+ " total = df['user'] + df['system'] + df['idle']\n",
+ "\n",
+ " df['user_percentage'] = df['user']*100 / total\n",
+ " df['system_percentage'] = df['system']*100 / total\n",
+ " df['idle_percentage'] = df['idle']*100 / total\n",
+ " \n",
+ " return df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# CPU Unused Cores\n",
+ "def unused_cores(rootdir, verbose=False):\n",
+ " \n",
+ " df = fetch_cpu_data(rootdir)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " if verbose: print(\"Unused Cores :\")\n",
+ "\n",
+ " unused_cores = []\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " unused_cores.append(key)\n",
+ " idle_values = curr_df.loc[curr_df['idle_percentage'] < 99.999]\n",
+ " if np.any(idle_values):\n",
+ " unused_cores.pop(-1)\n",
+ "\n",
+ " unused_cores = set(unused_cores)\n",
+ " for key, item in groups:\n",
+ " if key not in unused_cores:\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ "\n",
+ " ax1 = fig.add_subplot(131)\n",
+ " ax1.title.set_text(\"System\")\n",
+ " ax1.plot(item['epoch'], item['system_percentage'])\n",
+ " \n",
+ " ax2 = fig.add_subplot(132)\n",
+ " ax2.title.set_text(\"User\")\n",
+ " ax2.plot(item['epoch'], item['user_percentage'])\n",
+ " \n",
+ " ax3 = fig.add_subplot(133)\n",
+ " ax3.title.set_text(\"Idle\")\n",
+ " ax3.plot(item['epoch'], item['idle_percentage'])\n",
+ "\n",
+ " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n",
+ " plt.show()\n",
+ "\n",
+ " print(\"Number of unused cores: \", len(unused_cores))\n",
+ " return unused_cores\n",
+ "\n",
+ "\n",
+ "#CPU fully used cores\n",
+ "def fully_used_cores(rootdir, verbose=False):\n",
+ " \n",
+ "\n",
+ " df = fetch_cpu_data(rootdir)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " if verbose: print(\"Fully Used Cores :\")\n",
+ "\n",
+ " fully_used_cores = []\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " idle_values = curr_df.loc[curr_df['idle_percentage'] <= 10]\n",
+ " if np.any(idle_values):\n",
+ " fully_used_cores.append(key)\n",
+ "\n",
+ " fully_used_cores = set(fully_used_cores)\n",
+ " for key, item in groups:\n",
+ " if key not in fully_used_cores:\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ "\n",
+ " ax1 = fig.add_subplot(131)\n",
+ " ax1.title.set_text(\"System\")\n",
+ " ax1.plot(item['epoch'], item['system_percentage'])\n",
+ "\n",
+ " ax2 = fig.add_subplot(132)\n",
+ " ax2.title.set_text(\"User\")\n",
+ " ax2.plot(item['epoch'], item['user_percentage'])\n",
+ "\n",
+ " ax3 = fig.add_subplot(133)\n",
+ " ax3.title.set_text(\"Idle\")\n",
+ " ax3.plot(item['epoch'], item['idle_percentage'])\n",
+ "\n",
+ " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n",
+ " plt.show()\n",
+ "\n",
+ " print(\"Number of fully used cores: \", len(fully_used_cores))\n",
+ " return fully_used_cores\n",
+ "\n",
+ "\n",
+ "# CPU used cores plots\n",
+ "def used_cores(rootdir, verbose=False):\n",
+ "\n",
+ " df = fetch_cpu_data(rootdir)\n",
+ " groups = df.groupby(['cpu'])\n",
+ " if verbose: print(\"Used Cores :\")\n",
+ "\n",
+ " used_cores = []\n",
+ " for key, item in groups:\n",
+ " curr_df = item\n",
+ " idle_values = curr_df.loc[curr_df['idle_percentage'] < 99.999]\n",
+ " if np.any(idle_values):\n",
+ " used_cores.append(key)\n",
+ "\n",
+ " used_cores = set(used_cores)\n",
+ " for key, item in groups:\n",
+ " if key not in used_cores:\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n",
+ "\n",
+ " ax1 = fig.add_subplot(131)\n",
+ " ax1.title.set_text(\"System\")\n",
+ " ax1.plot(item['epoch'], item['system_percentage'])\n",
+ "\n",
+ " ax2 = fig.add_subplot(132)\n",
+ " ax2.title.set_text(\"User\")\n",
+ " ax2.plot(item['epoch'], item['user_percentage'])\n",
+ "\n",
+ " ax3 = fig.add_subplot(133)\n",
+ " ax3.title.set_text(\"Idle\")\n",
+ " ax3.plot(item['epoch'], item['idle_percentage'])\n",
+ "\n",
+ " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n",
+ " plt.show()\n",
+ "\n",
+ " print(\"Number of used cores: \", len(used_cores))\n",
+ " return used_cores\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Interface"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rootdir = 'metrics_data/'\n",
+ "\n",
+ "def fetch_interfaces_data(rootdir):\n",
+ "\n",
+ " df = pd.DataFrame()\n",
+ " reg_compile = re.compile(\"interface-.*\")\n",
+ " for dirpath, dirnames, filenames in os.walk(rootdir):\n",
+ " dirname = dirpath.split(os.sep)[-1] \n",
+ " if reg_compile.match(dirname):\n",
+ " # read 3 files from this folder...\n",
+ " _df = pd.DataFrame()\n",
+ " for file in filenames:\n",
+ " if 'errors' in file:\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['error_rx'] = temp_df['rx']\n",
+ " _df['error_tx'] = temp_df['tx']\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ "\n",
+ " if 'dropped' in file:\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['dropped_rx'] = temp_df['rx']\n",
+ " _df['dropped_tx'] = temp_df['tx']\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ "\n",
+ " _df['interface'] = '-'.join(dirname.split('-')[1:])\n",
+ " df = df.append(_df, ignore_index=True)\n",
+ " return df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Interface Dropped (both type 1 and 2, i.e rx and tx)\n",
+ "def interface_dropped(rootdir, verbose=False):\n",
+ " \n",
+ " df = fetch_interfaces_data(rootdir)\n",
+ " group = df.groupby(['interface'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " dropped = {'rx':[], 'tx':[]}\n",
+ "\n",
+ " itr = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ "\n",
+ " if np.any(curr_df['dropped_rx'] == 1):\n",
+ " dropped_rows = curr_df[curr_df['dropped_rx'] == 1]\n",
+ " dropped['rx'].append([key, dropped_row['epoch'].iloc[0]])\n",
+ " if np.any(curr_df['dropped_tx'] == 1):\n",
+ " dropped_rows = curr_df[curr_df['dropped_tx'] == 1]\n",
+ " dropped['tx'].append([key, dropped_row['epoch'].iloc[0]])\n",
+ "\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[itr%2], edgecolor='red')\n",
+ " ax = fig.add_subplot(211)\n",
+ " ax.title.set_text(\"Interface: {} Dropped (rx)\".format(key))\n",
+ " ax.plot(item['epoch'], item['dropped_rx'])\n",
+ "\n",
+ " ax1 = fig.add_subplot(212)\n",
+ " ax1.title.set_text(\"Interface: {} Dropped (tx)\".format(key))\n",
+ " ax1.plot(item['epoch'], item['dropped_tx'])\n",
+ "\n",
+ " itr += 1\n",
+ "\n",
+ " plt.suptitle('Interface Dropped', fontsize=14)\n",
+ " plt.show()\n",
+ "\n",
+ " return dropped\n",
+ "\n",
+ "\n",
+ "# Interface Errors (both type 1 and 2, i.e rx and tx)\n",
+ "def interface_errors(rootdir, verbose=False):\n",
+ " \n",
+ " df = fetch_interfaces_data(rootdir)\n",
+ " group = df.groupby(['interface'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " errors = {'rx':[], 'tx':[]}\n",
+ "\n",
+ " itr = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ "\n",
+ " if np.any(curr_df['error_rx'] == 1):\n",
+ " err_rows = curr_df[curr_df['error_rx'] == 1]\n",
+ " errors['rx'].append([key, err_row['epoch'].iloc[0]])\n",
+ " if np.any(curr_df['error_tx'] == 1):\n",
+ " err_rows = curr_df[curr_df['error_tx'] == 1]\n",
+ " errors['tx'].append([key, err_row['epoch'].iloc[0]])\n",
+ "\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[itr%2], edgecolor='red')\n",
+ " ax = fig.add_subplot(211)\n",
+ " ax.title.set_text(\"Interface: {} Errors (rx)\".format(key))\n",
+ " ax.plot(item['epoch'], item['error_rx'])\n",
+ "\n",
+ " ax1 = fig.add_subplot(212)\n",
+ " ax1.title.set_text(\"Interface: {} Errors (tx)\".format(key))\n",
+ " ax1.plot(item['epoch'], item['error_tx'])\n",
+ "\n",
+ " itr += 1\n",
+ "\n",
+ " plt.suptitle('Interface Erros', fontsize=14)\n",
+ " plt.show()\n",
+ "\n",
+ " return errors\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### OVS Stats (Non DPDK)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rootdir = 'metrics_data/'\n",
+ "\n",
+ "def fetch_ovs_stats_data(rootdir):\n",
+ " df = pd.DataFrame()\n",
+ " reg_compile = re.compile(\"ovs_stats-.*\")\n",
+ " for dirpath, dirnames, filenames in os.walk(rootdir):\n",
+ " dirname = dirpath.split(os.sep)[-1] \n",
+ " if reg_compile.match(dirname):\n",
+ " if 'dpdk' in dirname:\n",
+ " continue #ignoring dpdk\n",
+ "\n",
+ " _df = pd.DataFrame()\n",
+ " for file in filenames:\n",
+ " if 'errors' in file:\n",
+ " col_name = '-'.join(file.split('_')[1:])\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ "\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ " temp_df = temp_df.drop(['epoch'], axis=1)\n",
+ " new_cols = [i + '_' + col_name for i in temp_df.columns]\n",
+ " _df[new_cols] = temp_df\n",
+ "\n",
+ " if 'dropped' in file:\n",
+ " col_name = '-'.join(file.split('_')[1:])\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ " temp_df = temp_df.drop(['epoch'], axis=1)\n",
+ " new_cols = [i + '_' + col_name for i in temp_df.columns]\n",
+ " _df[new_cols] = temp_df \n",
+ " _df['interface'] = '-'.join(dirname.split('-')[1:])\n",
+ " df = df.append(_df, ignore_index=True)\n",
+ " return df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def ovs_stats_dropped(rootdir, verbose=False):\n",
+ " \n",
+ " df = fetch_ovs_stats_data(rootdir)\n",
+ " group = df.groupby(['interface'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " i = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " for col in curr_df:\n",
+ " if 'dropped' in col:\n",
+ " if item[col].isnull().all():\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n",
+ " plt.plot(item['epoch'], item[col])\n",
+ " plt.title(\"Interface: {} Dropped {}\".format(key, col))\n",
+ " i += 1\n",
+ " plt.show()\n",
+ " return\n",
+ "\n",
+ "\n",
+ "# Interface Errors (both type 1 and 2, i.e rx and tx)\n",
+ "def ovs_stats_errors(rootdir, verbose=False):\n",
+ "\n",
+ "\n",
+ " df = fetch_ovs_stats_data(rootdir)\n",
+ " group = df.groupby(['interface'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " i = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " for col in curr_df:\n",
+ " if 'error' in col:\n",
+ " if item[col].isnull().all():\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n",
+ " plt.plot(item['epoch'], item[col])\n",
+ " plt.title(\"Interface: {} Errors {}\".format(key, col))\n",
+ " i += 1\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### DPDK"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rootdir = 'metrics_data/'\n",
+ "\n",
+ "def fetch_dpdk_data(rootdir):\n",
+ " df = pd.DataFrame()\n",
+ " reg_compile = re.compile(\".*dpdk.*\")\n",
+ " for dirpath, dirnames, filenames in os.walk(rootdir):\n",
+ " dirname = dirpath.split(os.sep)[-1] \n",
+ " if reg_compile.match(dirname):\n",
+ " _df = pd.DataFrame()\n",
+ " for file in filenames:\n",
+ " if 'errors' in file:\n",
+ " col_name = '-'.join(file.split('_')[1:])\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ "\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ " temp_df = temp_df.drop(['epoch'], axis=1)\n",
+ " new_cols = [i + '_' + col_name for i in temp_df.columns]\n",
+ " _df[new_cols] = temp_df\n",
+ "\n",
+ " if 'dropped' in file:\n",
+ " col_name = '-'.join(file.split('_')[1:])\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ " temp_df = temp_df.drop(['epoch'], axis=1)\n",
+ " new_cols = [i + '_' + col_name for i in temp_df.columns]\n",
+ " _df[new_cols] = temp_df \n",
+ " _df['dpdk'] = '-'.join(dirname.split('-')[1:])\n",
+ " df = df.append(_df, ignore_index=True)\n",
+ " return df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fetch_dpdk_data(rootdir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def dpdk_dropped(rootdir, verbose=False):\n",
+ " \n",
+ " df = fetch_dpdk_data(rootdir)\n",
+ " group = df.groupby(['dpdk'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " i = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " for col in curr_df:\n",
+ " if 'dropped' in col:\n",
+ " if item[col].isnull().all():\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n",
+ " plt.plot(item['epoch'], item[col])\n",
+ " plt.title(\"DpDK: {} Dropped {}\".format(key, col))\n",
+ " i += 1\n",
+ " plt.show()\n",
+ " return\n",
+ "\n",
+ "\n",
+ "# Interface Errors (both type 1 and 2, i.e rx and tx)\n",
+ "def dpdk_errors(rootdir, verbose=False):\n",
+ "\n",
+ "\n",
+ " df = fetch_dpdk_data(rootdir)\n",
+ " group = df.groupby(['dpdk'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " i = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " for col in curr_df:\n",
+ " if 'error' in col:\n",
+ " if item[col].isnull().all():\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n",
+ " plt.plot(item['epoch'], item[col])\n",
+ " plt.title(\"DpDK: {} Errors {}\".format(key, col))\n",
+ " i += 1\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dpdk_dropped(rootdir)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### RDT (need to be testes)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rootdir = 'metrics_data/'\n",
+ "\n",
+ "def fetch_rdt_data(rootdir):\n",
+ " df = pd.DataFrame()\n",
+ " reg_compile = re.compile(\".*rdt.*\")\n",
+ " for dirpath, dirnames, filenames in os.walk(rootdir):\n",
+ " dirname = dirpath.split(os.sep)[-1] \n",
+ " if reg_compile.match(dirname):\n",
+ " _df = pd.DataFrame()\n",
+ " for file in filenames:\n",
+ " if 'bytes' in file:\n",
+ " col_name = '-'.join(file.split('_')[1:])\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ "\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ " temp_df = temp_df.drop(['epoch'], axis=1)\n",
+ " new_cols = [i + '_' + col_name for i in temp_df.columns]\n",
+ " _df[new_cols] = temp_df\n",
+ " \n",
+ " if 'bandwidth' in file:\n",
+ " col_name = '-'.join(file.split('_')[1:])\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ "\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ " temp_df = temp_df.drop(['epoch'], axis=1)\n",
+ " new_cols = [i + '_' + col_name for i in temp_df.columns]\n",
+ " _df[new_cols] = temp_df\n",
+ "\n",
+ " if 'ipc' in file:\n",
+ " col_name = '-'.join(file.split('_')[1:])\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ " temp_df = temp_df.drop(['epoch'], axis=1)\n",
+ " new_cols = [i + '_' + col_name for i in temp_df.columns]\n",
+ " _df[new_cols] = temp_df \n",
+ " _df['intel_rdt'] = '-'.join(dirname.split('-')[1:])\n",
+ " df = df.append(_df, ignore_index=True)\n",
+ " return df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# L3 cache bytes\n",
+ "def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " df = fetch_rdt_data(rootdir)\n",
+ " group = df.groupby(['intel_rdt'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " i = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " for col in curr_df:\n",
+ " if 'bytes' in col:\n",
+ " if item[col].isnull().all():\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n",
+ " plt.plot(item['epoch'], item[col])\n",
+ " plt.title(\"RDT BYTES, RDT: {}\".format(key, col))\n",
+ " i += 1\n",
+ " plt.show()\n",
+ "\n",
+ "\n",
+ "# L3 IPC values\n",
+ "def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " \n",
+ " df = fetch_rdt_data(rootdir)\n",
+ " group = df.groupby(['intel_rdt'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " i = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " for col in curr_df:\n",
+ " if 'ipc' in col:\n",
+ " if item[col].isnull().all():\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n",
+ " plt.plot(item['epoch'], item[col])\n",
+ " plt.title(\"RDT IPC, RDT: {}\".format(key, col))\n",
+ " i += 1\n",
+ " plt.show()\n",
+ "\n",
+ "\n",
+ "\n",
+ "# memeory bandwidtdh\n",
+ "def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
+ " \n",
+ " \n",
+ " df = fetch_rdt_data(rootdir)\n",
+ " group = df.groupby(['intel_rdt'])\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ "\n",
+ " i = 0\n",
+ " for key, item in group:\n",
+ " curr_df = item\n",
+ " for col in curr_df:\n",
+ " if 'bandwidht' in col:\n",
+ " if item[col].isnull().all():\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n",
+ " plt.plot(item['epoch'], item[col])\n",
+ " plt.title(\"RDT Memory Bandwidht, RDT: {}\".format(key, col))\n",
+ " i += 1\n",
+ " plt.show()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Memory (following functions still need to written for csv)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rootdir = 'metrics_data/'\n",
+ "\n",
+ "def fetch_memory_data(rootdir):\n",
+ " df = pd.DataFrame()\n",
+ " reg_compile = re.compile(\"memory\")\n",
+ " for dirpath, dirnames, filenames in os.walk(rootdir):\n",
+ " dirname = dirpath.split(os.sep)[-1] \n",
+ " if reg_compile.match(dirname):\n",
+ " print(dirname)\n",
+ " _df = pd.DataFrame()\n",
+ " for file in filenames: \n",
+ " col_name = file.split('-')[1]\n",
+ " temp_df = pd.read_csv(dirpath + os.sep + file)\n",
+ " _df['epoch'] = temp_df['epoch']\n",
+ " temp_df = temp_df.drop(['epoch'], axis=1)\n",
+ " new_cols = [col_name for i in temp_df.columns]\n",
+ " _df[new_cols] = temp_df\n",
+ " df = df.append(_df, ignore_index=True)\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "def get_memory_usage(rootdir, verbose=False):\n",
+ " df = fetch_memory_data(rootdir)\n",
+ " color = ['oldlace', 'mistyrose']\n",
+ " i = 0\n",
+ " for col in df:\n",
+ " if df[col].isnull().all():\n",
+ " continue\n",
+ " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n",
+ " plt.plot(df['epoch'], df[col])\n",
+ " plt.title(\"{} Memory\".format(col))\n",
+ " i += 1\n",
+ " plt.show()\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Usage / Examples\n",
+ "\n",
+ "\n",
+ "##### CPU \n",
+ "\n",
+ "- For calling cpu unsued cores\n",
+ "\n",
+ "```py\n",
+ "cores = unused_cores(rootdir='metrics_data')\n",
+ "```\n",
+ "\n",
+ "- For finding fully used cores\n",
+ "\n",
+ "```py\n",
+ "fully_used = fully_used_cores('metrics_data')\n",
+ "```\n",
+ "\n",
+ "- Similarly for plotting used cores\n",
+ "\n",
+ "```py\n",
+ "plot_used_cores(csv='metrics_data')\n",
+ "```\n",
+ "\n",
+ "\n",
+ "##### Interface\n",
+ "\n",
+ "- Interface Dropped \n",
+ "\n",
+ "```py\n",
+ "# Using CSV\n",
+ "dropped_interfaces = interface_dropped('metrics_data')\n",
+ "```\n",
+ "\n",
+ "- Interface Errors\n",
+ "\n",
+ "```py\n",
+ "# Using CSV\n",
+ "interface_errors('metrics_data')\n",
+ "```\n",
+ "\n",
+ "##### OVS Stats\n",
+ "\n",
+ "- OVS Stats Dropped \n",
+ "\n",
+ "```py\n",
+ "# Using CSV\n",
+ "ovs_stats_dropped('metrics_data')\n",
+ "```\n",
+ "\n",
+ "- OVS Stats Errors\n",
+ "\n",
+ "```py\n",
+ "# Using CSV\n",
+ "ovs_stats_errors('metrics_data')\n",
+ "```\n",
+ "\n",
+ "##### DPDK \n",
+ "\n",
+ "- DPDK Dropped \n",
+ "\n",
+ "```py\n",
+ "# Using CSV\n",
+ "dpdk_dropped('metrics_data')\n",
+ "```\n",
+ "\n",
+ "- DPDK Errors\n",
+ "\n",
+ "```py\n",
+ "# Using CSV\n",
+ "dpdk_errors('metrics_data')\n",
+ "```\n",
+ "\n",
+ "\n",
+ "\n",
+ "##### RDT (Do not run yet)\n",
+ "\n",
+ "- Plot bytes\n",
+ "\n",
+ "```py\n",
+ "#csv\n",
+ "plot_rdt_bytes('metrics_data')\n",
+ "```\n",
+ "\n",
+ "- Plot ipc values\n",
+ "\n",
+ "```py\n",
+ "#csv\n",
+ "plot_rdt_ipc('metrics_data')\n",
+ "```\n",
+ "\n",
+ "- Memory bandwidth\n",
+ "\n",
+ "```py\n",
+ "#csv\n",
+ "get_rdt_memory_bandwidth('metrics_data')\n",
+ "```\n",
+ "\n",
+ "##### Memory\n",
+ "\n",
+ "```py\n",
+ "#csv\n",
+ "get_memory_usage('metrics_data')\n",
+ "```"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}