From d25014e0201cf0b0a662a84984191786d7f8eb5d Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Mon, 24 Aug 2020 02:46:18 +0530 Subject: Tools: Add monitoring analysis jupyter notebook This patch adds 2 monitoring jupyter notebooks and 1 notebook combining both logs and metrics causation analysis. Causation analysis: Finds anomalies in logs and fetchs and analyses metrics in a delta time range of that timestamp. Analysis-Monitoring-K8S: Fetches metrics from prometheus to analyse them Analysis-Monitoring-Local: Data folder containing csv files of metrics is given, analysis is performed on that data. Signed-off-by: Aditya Srivastava Change-Id: I8833f5155b3184f697fac5270c69e0df02d2986b --- .../lma/jupyter-notebooks/Causation-Analysis.ipynb | 784 ++++++++++++++++++ .../Analysis-Monitoring-K8S.ipynb | 644 +++++++++++++++ .../Analysis-Monitoring-Local.ipynb | 913 +++++++++++++++++++++ 3 files changed, 2341 insertions(+) create mode 100644 tools/lma/jupyter-notebooks/Causation-Analysis.ipynb create mode 100644 tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb create mode 100644 tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-Local.ipynb diff --git a/tools/lma/jupyter-notebooks/Causation-Analysis.ipynb b/tools/lma/jupyter-notebooks/Causation-Analysis.ipynb new file mode 100644 index 00000000..d2e7886a --- /dev/null +++ b/tools/lma/jupyter-notebooks/Causation-Analysis.ipynb @@ -0,0 +1,784 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Logs and Metrics Analysis Notebook\n", + "\n", + "#### Used to capture anomalies in the logs and analyse / visualize the metrics in the vicinity of that time\n", + "\n", + "##### Contributors:\n", + "\n", + "- Adarsh Yadav \n", + " \n", + " Log Analysis and Anomaly Finding\n", + " \n", + "\n", + "\n", + "\n", + "- Aditya Srivastava \n", + " \n", + " Metrics Analysis and Visualization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Metrics Analysis and Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import numpy as np\n", + "\n", + "import datetime\n", + "import time\n", + "import requests\n", + "\n", + "from pprint import pprint\n", + "import json\n", + "from datetime import datetime, timedelta\n", + "\n", + "from elasticsearch import Elasticsearch\n", + "from elasticsearch_dsl import Search\n", + "from elasticsearch.connection import create_ssl_context\n", + "import ssl\n", + "import urllib3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PROMETHEUS = 'http://10.10.120.211:30902/' #do not change, unless sure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#function to make DF out of query json\n", + "\n", + "def convert_to_df(res_json):\n", + "\n", + " data_list = res_json['data']['result']\n", + " res_df = pd.DataFrame()\n", + " if not data_list:\n", + " return res_df\n", + "\n", + " # making colums\n", + " headers = data_list[0]\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " for metric in metrics.keys():\n", + " res_df[metric] = np.nan\n", + " res_df['value'] = 0\n", + " \n", + " # filling the df\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " metrics['value'] = data['value'][-1]\n", + " res_df = res_df.append(metrics, ignore_index=True) \n", + "\n", + " return res_df\n", + "\n", + "def convert_to_df_range(res_json):\n", + "\n", + " data_list = res_json['data']['result']\n", + " res_df = pd.DataFrame()\n", + " if not data_list:\n", + " return res_df\n", + "\n", + " # filling the df\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " values = np.array(data['values'])\n", + " for time, value in values:\n", + " metrics['timestamp'] = time\n", + " metrics['value'] = value\n", + " res_df = res_df.append(metrics, ignore_index=True) \n", + "\n", + " return res_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# functions to query\n", + "\n", + "def convert_to_timestamp(s):\n", + " return time.mktime(datetime.strptime(s, \"%Y-%m-%d %H:%M:%S\").timetuple())\n", + "\n", + "def query_current(params={}):\n", + " # input: params\n", + " # type: dict\n", + " # Example: {'query': 'container_cpu_user_seconds_total'}\n", + " \n", + " # Output: dict, loaded json response of the query\n", + "\n", + " res = requests.get(PROMETHEUS + '/api/v1/query', \n", + " params=params)\n", + " return json.loads(res.text)\n", + "\n", + "\n", + "def query_range(start, end, params={}, steps = '30s'):\n", + " # input: params\n", + " # type: dict\n", + " # Example: {'query': 'container_cpu_user_seconds_total'}\n", + " \n", + " # Output: dict, loaded json response of the query\n", + " params[\"start\"] = convert_to_timestamp(start)\n", + " params[\"end\"] = convert_to_timestamp(end)\n", + " params[\"step\"] = steps\n", + "\n", + " # print(params)\n", + "\n", + " res = requests.get(PROMETHEUS + '/api/v1/query_range', \n", + " params=params,\n", + " )\n", + "\n", + " return json.loads(res.text)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis Function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# CPU Unused Cores\n", + "def unused_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Unused Cores :\")\n", + " unused_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", + " if idle_row['value'].iloc[0] == '100':\n", + " if verbose: print(\"Core: \",key)\n", + " unused_cores.append(int(key))\n", + "\n", + " print(\"Number of unused cores: \", len(unused_cores))\n", + " return unused_cores\n", + "\n", + "\n", + "#CPU fully used cores\n", + "def fully_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Fully Used Cores :\")\n", + " fully_used_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", + " if idle_row['value'].iloc[0] == '0':\n", + " if verbose: print(\"Core: \",key)\n", + " fully_used_cores.append(int(key))\n", + " print(\"Number of fully used cores: \", len(fully_used_cores))\n", + " return fully_used_cores\n", + "\n", + "\n", + "# CPU used cores plots\n", + "def plot_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + "\n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + " \n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " used_cores = []\n", + "\n", + " for key, item in groups:\n", + " curr_df = item\n", + " user_row = curr_df.loc[curr_df['type'] == 'user']\n", + " sys_row = curr_df.loc[curr_df['type'] == 'system']\n", + "\n", + "\n", + " if np.any(sys_row != '0') or np.any(user_row != '0'):\n", + " used_cores.append(key)\n", + " type_grps = curr_df.groupby('type')\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + "\n", + " for type_key, new_item in type_grps:\n", + "\n", + " if type_key == 'system':\n", + " ax1 = fig.add_subplot(131)\n", + " ax1.title.set_text(type_key)\n", + " ax1.plot(new_item['timestamp'], new_item['value'])\n", + " elif type_key == 'user':\n", + " ax2 = fig.add_subplot(132)\n", + " ax2.title.set_text(type_key)\n", + " ax2.plot(new_item['timestamp'], new_item['value'])\n", + " elif type_key == 'wait':\n", + " ax3 = fig.add_subplot(133)\n", + " ax3.title.set_text(type_key)\n", + " ax3.plot(new_item['timestamp'], new_item['value'])\n", + "\n", + " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n", + " plt.show()\n", + " print(\"Number of used cores: \", len(used_cores))\n", + " return used_cores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Interface Dropped (both type 1 and 2, i.e rx and tx)\n", + "#TODO: Change this to separate functions later\n", + "def interface_dropped(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " df_0 = df #TODO: Change this\n", + " df_1 = df #TODO: Change this\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_interface_if_dropped_0_total{exported_instance='\" + node + \"'}\"}\n", + "\n", + " interface_dropped_0 = query_range(start, end, params, steps)\n", + " df_0 = convert_to_df_range(interface_dropped_0)\n", + " \n", + " params = {'query' : \"collectd_interface_if_dropped_1_total{exported_instance='\" + node + \"'}\"}\n", + " interface_dropped_1 = query_range(start, end, params, steps)\n", + " df_1 = convert_to_df_range(interface_dropped_1)\n", + "\n", + " \n", + " #df_0 : interfaces_dropped_0_df\n", + " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " #df_1 : interfaces_dropped_1_df\n", + " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " groups_0 = df_0.groupby(['interface'])\n", + " groups_1 = df_1.groupby(['interface'])\n", + "\n", + " groups = [groups_0, groups_1]\n", + " dropped_interfaces= []\n", + " drop_type = 0\n", + " color = ['oldlace', 'mistyrose']\n", + " plot_iter = 111\n", + " for group in groups:\n", + " dropped = []\n", + "\n", + " for key, item in group:\n", + " curr_df = item\n", + " if np.any(curr_df['value'] == '1'):\n", + " dropped_row = curr_df.loc[curr_df['value'] == '1']\n", + " dropped.append([key, dropped_row['timestamp'].iloc[0]])\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[drop_type], edgecolor='red')\n", + " ax = fig.add_subplot(plot_iter)\n", + " ax.title.set_text(\"Interface: {}\".format(key))\n", + " ax.plot(item['timestamp'], item['value'])\n", + " dropped_interfaces.append(dropped)\n", + " plt.suptitle('Interfaces Drop type {}'.format(drop_type), fontsize=14)\n", + " plt.show()\n", + " drop_type += 1\n", + " return dropped_interfaces\n", + "\n", + "\n", + "# Interface Errors (both type 1 and 2, i.e rx and tx)\n", + "#TODO: Change this to separate functions later\n", + "def interface_errors(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " df_0 = df #TODO: Change this\n", + " df_1 = df #TODO: Change this\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_interface_if_errors_0_total{exported_instance='\" + node + \"'}\"}\n", + " interfaces_errors_0 = query_range(start, end, params, steps)\n", + " df_0 = convert_to_df_range(interfaces_errors_0)\n", + " \n", + " params = {'query' : \"collectd_interface_if_errors_1_total{exported_instance='\" + node + \"'}\"}\n", + " interface_errors_1 = query_range(start, end, params, steps)\n", + " df_1 = convert_to_df_range(interface_errors_1)\n", + "\n", + " \n", + " #df_0 : interfaces_errors_0_df\n", + " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " #df_1 : interfaces_dropped_1_df\n", + " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " groups_0 = df_0.groupby(['interface'])\n", + " groups_1 = df_1.groupby(['interface'])\n", + "\n", + " groups = [groups_0, groups_1]\n", + " err_interfaces= []\n", + " err_type = 0\n", + " color = ['oldlace', 'mistyrose']\n", + " for group in groups:\n", + " errors = []\n", + "\n", + " for key, item in group:\n", + " curr_df = item\n", + "\n", + " if np.any(curr_df['value'] == '1'):\n", + " err_row = curr_df.loc[curr_df['value'] == '1']\n", + " erros.append([key, err_row['timestamp'].iloc[0]])\n", + "\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[err_type], edgecolor='red')\n", + " ax = fig.add_subplot(111)\n", + " ax.title.set_text(\"Interface: {}\".format(key))\n", + " ax.plot(item['timestamp'], item['value'])\n", + "\n", + " err_interfaces.append(errors)\n", + " plt.suptitle('Interfaces Error type {}'.format(err_type), fontsize=14)\n", + " plt.show()\n", + " err_type += 1\n", + "\n", + " return err_interfaces" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### RDT " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# L3 cache bytes\n", + "def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + "\n", + " params = {'query' : \"collectd_intel_rdt_bytes{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_bytes = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_bytes)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['intel_rdt'])\n", + " for key, item in groups:\n", + " curr_df = item\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text(\"Intel RDT Number: {}\".format(key))\n", + " ax1.plot(item['timestamp'], item['value'])\n", + " plt.show()\n", + " return\n", + "\n", + "\n", + "# L3 IPC values\n", + "def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_intel_rdt_ipc{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_ipc = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_ipc)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['intel_rdt'])\n", + " for key, item in groups:\n", + " curr_df = item\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text(\"Intel RDT Number: {}, IPC value\".format(key))\n", + " ax1.plot(item['timestamp'], item['value'])\n", + " plt.show()\n", + " return\n", + "\n", + "\n", + "# memeory bandwidtdh\n", + "def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + "\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_intel_rdt_memory_bandwidth_total{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_mem_bw = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_mem_bw)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " \n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Memory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def get_memory_usage(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_memory{exported_instance='\" + node + \"'} / (1024*1024*1024) \"} \n", + " target_memory_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_memory_usage_range)\n", + " \n", + " df = df.drop(['instance', 'job'], axis = 1)\n", + " groups = df.groupby(['memory'])\n", + " for key, item in groups:\n", + " curr_df = item\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text(\"Memory Type: {}\".format(key))\n", + " ax1.plot(item['timestamp'], item['value'])\n", + " plt.show()\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing Zone" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_memory_usage('2020-08-03 08:00:12', '2020-08-03 08:01:12', 'pod12-node4')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def analyse(timestamp, node):\n", + " ts = datetime.strptime(timestamp.split(',')[0], \"%Y-%m-%d %H:%M:%S\")\n", + " start = ts - timedelta(seconds=10)\n", + " end = ts + timedelta(seconds=10)\n", + " \n", + " start = str(start)\n", + " end = str(end)\n", + " steps = '5s'\n", + "\n", + " print(\"Starting Analysis from\",start,\"to\",end,'\\n\\n')\n", + "\n", + " if \"node4\" in node:\n", + " node = 'pod12-node4'\n", + "\n", + " #cpu analysis\n", + " print(\"=====CPU ANALYSIS=====\\n\")\n", + " unused = unused_cores(start, end, node, steps)\n", + " print(\"Unused Cores:\", unused)\n", + " fully_used = fully_used_cores(start, end, node, steps)\n", + " print(\"Fully Used Cores:\", fully_used)\n", + " print(\"Plotting used cores:\")\n", + " used_cores = plot_used_cores(start, end, node, steps)\n", + " \n", + " #interface analysis\n", + " print(\"=====Interfaces Dropped / Errors=====\\n\")\n", + " dropped_interfaces = interface_dropped(start, end, node, steps)\n", + " err_interfaces = interface_errors(start, end, node, steps)\n", + " \n", + " #RDT Analysis\n", + " print(\"=====RDT Analysis=====\\n\")\n", + " plot_rdt_bytes(start, end, node, steps)\n", + " plot_rdt_ipc(start, end, node, steps)\n", + " mem_bandwidht = get_rdt_memory_bandwidth(start, end, node, steps)\n", + " \n", + " #Memory Analysis:\n", + " print(\"=====Memory Analysis=====\\n\")\n", + " mem = get_memory_usage(start, end, node, steps)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage / Examples\n", + "\n", + "\n", + "##### CPU \n", + "\n", + "- For calling cpu unsued cores\n", + "\n", + "```py\n", + "# Fetching from prometheus\n", + "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- For finding fully used cores\n", + "\n", + "```py\n", + "# Fetching from prometheus\n", + "fully_used = fully_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- Similarly for plotting used cores\n", + "\n", + "```py\n", + "# Fetching\n", + "plot_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "#csv\n", + "# use Analysis-Monitoring-Local Notebook for correct analysis \n", + "plot_used_cores(csv='metrics_data/cpu-0/cpu-user-2020-06-02')\n", + "\n", + "```\n", + "\n", + "\n", + "##### Interface\n", + "\n", + "- Interface Dropped \n", + "\n", + "```py\n", + "# Fetching from prom\n", + "dropped_interfaces = interface_dropped('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- Interface Errors\n", + "\n", + "```py\n", + "# Fetching from prom\n", + "interface_errors('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```\n", + "\n", + "##### RDT\n", + "\n", + "- Plot bytes\n", + "\n", + "```py\n", + "# fetch\n", + "plot_rdt_bytes('2020-07-31 08:00:12', '2020-07-31 08:01:12','pod12-node4')\n", + "```\n", + "\n", + "- Plot ipc values\n", + "\n", + "```py\n", + "#fetch\n", + "plot_rdt_ipc('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```\n", + "\n", + "- Memory bandwidth\n", + "\n", + "```py\n", + "#fetch\n", + "get_rdt_memory_bandwidth('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```\n", + "\n", + "##### Memory\n", + "\n", + "- Memory usage\n", + "\n", + "```py\n", + "get_memory_usage('2020-08-03 08:00:12', '2020-08-03 08:01:12', 'pod12-node4')\n", + "```\n", + "\n", + "##### Analyse everything\n", + "\n", + "```py\n", + "# example alert_time: 2020-08-03 08:00:12\n", + "# example index: 'pod12-node4'\n", + "analyse(alert_time,index)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Checking Anomaly in logs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Give file name\n", + "foldername = \"results_2020-08-07_03-39-57\"\n", + "#Give index name - \"node1*\" or \"node4*\"\n", + "index = \"node4*\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "ssl_context = create_ssl_context()\n", + "ssl_context.check_hostname = False\n", + "ssl_context.verify_mode = ssl.CERT_NONE\n", + "urllib3.disable_warnings()\n", + "client = Elasticsearch(['https://elasticsearch:password123@10.10.120.211:31111'],verify_certs=False,ssl_context=ssl_context)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vsperf = \"vsperf-overall_\"+ foldername[8:] +\".log\"\n", + "s = Search(index=index).using(client).query(\"exists\", field=\"alert\").query(\"match_phrase\", log_path=vsperf)\n", + "for hits in s.scan():\n", + " alert_time = hits.alert_time\n", + "\n", + "print(alert_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "analyse(alert_time,index)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb new file mode 100644 index 00000000..10c59d84 --- /dev/null +++ b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb @@ -0,0 +1,644 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Metrics Analysis Notebook (k8s)\n", + "\n", + "#### Used to analyse / visualize the metrics, data fetched from prometheus (monitoring cluster)\n", + "\n", + "### Contributor: Aditya Srivastava \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import numpy as np\n", + "\n", + "import datetime\n", + "import time\n", + "import requests\n", + "\n", + "from pprint import pprint\n", + "import json\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PROMETHEUS = 'http://10.10.120.211:30902/' #do not change, unless sure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#function to make DF out of query json\n", + "\n", + "def convert_to_df(res_json):\n", + "\n", + " data_list = res_json['data']['result']\n", + " res_df = pd.DataFrame()\n", + " if not data_list:\n", + " return res_df\n", + "\n", + " # making colums\n", + " headers = data_list[0]\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " for metric in metrics.keys():\n", + " res_df[metric] = np.nan\n", + " res_df['value'] = 0\n", + " \n", + " # filling the df\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " metrics['value'] = data['value'][-1]\n", + " res_df = res_df.append(metrics, ignore_index=True) \n", + "\n", + " return res_df\n", + "\n", + "def convert_to_df_range(res_json):\n", + "\n", + " data_list = res_json['data']['result']\n", + " res_df = pd.DataFrame()\n", + " if not data_list:\n", + " return res_df\n", + "\n", + " # filling the df\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " values = np.array(data['values'])\n", + " for time, value in values:\n", + " metrics['timestamp'] = time\n", + " metrics['value'] = value\n", + " res_df = res_df.append(metrics, ignore_index=True) \n", + "\n", + " return res_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# functions to query\n", + "\n", + "def convert_to_timestamp(s):\n", + " return time.mktime(datetime.strptime(s, \"%Y-%m-%d %H:%M:%S\").timetuple())\n", + "\n", + "def query_current(params={}):\n", + " # input: params\n", + " # type: dict\n", + " # Example: {'query': 'container_cpu_user_seconds_total'}\n", + " \n", + " # Output: dict, loaded json response of the query\n", + "\n", + " res = requests.get(PROMETHEUS + '/api/v1/query', \n", + " params=params)\n", + " return json.loads(res.text)\n", + "\n", + "\n", + "def query_range(start, end, params={}, steps = '30s'):\n", + " # input: params\n", + " # type: dict\n", + " # Example: {'query': 'container_cpu_user_seconds_total'}\n", + " \n", + " # Output: dict, loaded json response of the query\n", + " params[\"start\"] = convert_to_timestamp(start)\n", + " params[\"end\"] = convert_to_timestamp(end)\n", + " params[\"step\"] = steps\n", + "\n", + " print(params)\n", + " \n", + " res = requests.get(PROMETHEUS + '/api/v1/query_range', \n", + " params=params,\n", + " )\n", + "\n", + " return json.loads(res.text)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis Function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# CPU Unused Cores\n", + "def unused_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Unused Cores :\")\n", + " unused_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", + " if idle_row['value'].iloc[0] == '100':\n", + " if verbose: print(\"Core: \",key)\n", + " unused_cores.append(int(key))\n", + "\n", + " print(\"Number of unused cores: \", len(unused_cores))\n", + " return unused_cores\n", + "\n", + "\n", + "#CPU fully used cores\n", + "def fully_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Fully Used Cores :\")\n", + " fully_used_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", + " if idle_row['value'].iloc[0] == '0':\n", + " if verbose: print(\"Core: \",key)\n", + " fully_used_cores.append(int(key))\n", + " print(\"Number of fully used cores: \", len(fully_used_cores))\n", + " return fully_used_cores\n", + "\n", + "\n", + "# CPU used cores plots\n", + "def plot_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " \n", + " # \n", + " df['rate'] = df['value'].diff()\n", + "\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text('CPU usage')\n", + " ax1.plot(df['epoch'], df['rate'])\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + "\n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + " \n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " used_cores = []\n", + "\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", + "\n", + " if idle_row['value'].iloc[0] != '100':\n", + " used_cores.append(key)\n", + " type_grps = curr_df.groupby('type')\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + "\n", + " for type_key, new_item in type_grps:\n", + "\n", + " if type_key == 'system':\n", + " ax1 = fig.add_subplot(131)\n", + " ax1.title.set_text(type_key)\n", + " ax1.plot(new_item['timestamp'], new_item['value'])\n", + " elif type_key == 'user':\n", + " ax2 = fig.add_subplot(132)\n", + " ax2.title.set_text(type_key)\n", + " ax2.plot(new_item['timestamp'], new_item['value'])\n", + " elif type_key == 'wait':\n", + " ax3 = fig.add_subplot(133)\n", + " ax3.title.set_text(type_key)\n", + " ax3.plot(new_item['timestamp'], new_item['value'])\n", + "\n", + " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n", + " plt.show()\n", + " print(\"Number of used cores: \", len(used_cores))\n", + " return used_cores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Interface Dropped (both type 1 and 2, i.e rx and tx)\n", + "#TODO: Change this to separate functions later\n", + "def interface_dropped(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " df_0 = df #TODO: Change this\n", + " df_1 = df #TODO: Change this\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_interface_if_dropped_0_total{exported_instance='\" + node + \"'}\"}\n", + "\n", + " interface_dropped_0 = query_range(start, end, params, steps)\n", + " df_0 = convert_to_df_range(interface_dropped_0)\n", + " \n", + " params = {'query' : \"collectd_interface_if_dropped_1_total{exported_instance='\" + node + \"'}\"}\n", + " interface_dropped_1 = query_range(start, end, params, steps)\n", + " df_1 = convert_to_df_range(interface_dropped_1)\n", + "\n", + " \n", + " #df_0 : interfaces_dropped_0_df\n", + " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " #df_1 : interfaces_dropped_1_df\n", + " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " groups_0 = df_0.groupby(['interface'])\n", + " groups_1 = df_1.groupby(['interface'])\n", + "\n", + " groups = [groups_0, groups_1]\n", + " dropped_interfaces= []\n", + " drop_type = 0\n", + " color = ['oldlace', 'mistyrose']\n", + " plot_iter = 111\n", + " for group in groups:\n", + " dropped = []\n", + "\n", + " for key, item in group:\n", + " curr_df = item\n", + " if np.any(curr_df['value'] == '1'):\n", + " dropped_row = curr_df.loc[curr_df['value'] == '1']\n", + " dropped.append([key, dropped_row['timestamp'].iloc[0]])\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[drop_type], edgecolor='red')\n", + " ax = fig.add_subplot(plot_iter)\n", + " ax.title.set_text(\"Interface: {}\".format(key))\n", + " ax.plot(item['timestamp'], item['value'])\n", + " dropped_interfaces.append(dropped)\n", + " plt.suptitle('Interfaces Drop type {}'.format(drop_type), fontsize=14)\n", + " plt.show()\n", + " drop_type += 1\n", + " return dropped_interfaces\n", + "\n", + "\n", + "# Interface Errors (both type 1 and 2, i.e rx and tx)\n", + "#TODO: Change this to separate functions later\n", + "def interface_errors(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " df_0 = df #TODO: Change this\n", + " df_1 = df #TODO: Change this\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_interface_if_errors_0_total{exported_instance='\" + node + \"'}\"}\n", + " interfaces_errors_0 = query_range(start, end, params, steps)\n", + " df_0 = convert_to_df_range(interfaces_errors_0)\n", + " \n", + " params = {'query' : \"collectd_interface_if_errors_1_total{exported_instance='\" + node + \"'}\"}\n", + " interface_errors_1 = query_range(start, end, params, steps)\n", + " df_1 = convert_to_df_range(interface_errors_1)\n", + "\n", + " \n", + " #df_0 : interfaces_errors_0_df\n", + " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " #df_1 : interfaces_dropped_1_df\n", + " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " groups_0 = df_0.groupby(['interface'])\n", + " groups_1 = df_1.groupby(['interface'])\n", + "\n", + " groups = [groups_0, groups_1]\n", + " err_interfaces= []\n", + " err_type = 0\n", + " color = ['oldlace', 'mistyrose']\n", + " for group in groups:\n", + " errors = []\n", + "\n", + " for key, item in group:\n", + " curr_df = item\n", + "\n", + " if np.any(curr_df['value'] == '1'):\n", + " err_row = curr_df.loc[curr_df['value'] == '1']\n", + " erros.append([key, err_row['timestamp'].iloc[0]])\n", + "\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[err_type], edgecolor='red')\n", + " ax = fig.add_subplot(111)\n", + " ax.title.set_text(\"Interface: {}\".format(key))\n", + " ax.plot(item['timestamp'], item['value'])\n", + "\n", + " err_interfaces.append(errors)\n", + " plt.suptitle('Interfaces Error type {}'.format(err_type), fontsize=14)\n", + " plt.show()\n", + " err_type += 1\n", + "\n", + " return err_interfaces" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### RDT " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# L3 cache bytes\n", + "def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + "\n", + " params = {'query' : \"collectd_intel_rdt_bytes{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_bytes = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_bytes)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['intel_rdt'])\n", + " for key, item in groups:\n", + " curr_df = item\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text(\"Intel RDT Number: {}\".format(key))\n", + " ax1.plot(item['timestamp'], item['value'])\n", + " plt.show()\n", + " return\n", + "\n", + "\n", + "# L3 IPC values\n", + "def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_intel_rdt_ipc{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_ipc = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_ipc)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['intel_rdt'])\n", + " for key, item in groups:\n", + " curr_df = item\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text(\"Intel RDT Number: {}, IPC value\".format(key))\n", + " ax1.plot(item['timestamp'], item['value'])\n", + " plt.show()\n", + " return\n", + "\n", + "\n", + "# memeory bandwidtdh\n", + "def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + "\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_intel_rdt_memory_bandwidth_total{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_mem_bw = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_mem_bw)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " \n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Memory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def get_memory_usage(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_memory{exported_instance='\" + node + \"'} / (1024*1024*1024) \"} \n", + " target_memory_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_memory_usage_range)\n", + "\n", + " df = df.drop(['instance', 'job'], axis = 1)\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing Zone" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# prom fetch\n", + "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "print(cores)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage / Examples\n", + "\n", + "\n", + "##### CPU \n", + "\n", + "- For calling cpu unsued cores\n", + "\n", + "```py\n", + "# Fetching from prometheus\n", + "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- For finding fully used cores\n", + "\n", + "```py\n", + "# Fetching from prometheus\n", + "fully_used = fully_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- Similarly for plotting used cores\n", + "\n", + "```py\n", + "# Fetching\n", + "plot_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "#csv\n", + "# use Analysis-Monitoring-Local Notebook for correct analysis \n", + "plot_used_cores(csv='metrics_data/cpu-0/cpu-user-2020-06-02')\n", + "\n", + "```\n", + "\n", + "\n", + "##### Interface\n", + "\n", + "- Interface Dropped \n", + "\n", + "```py\n", + "# Fetching from prom\n", + "dropped_interfaces = interface_dropped('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- Interface Errors\n", + "\n", + "```py\n", + "# Fetching from prom\n", + "interface_errors('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```\n", + "\n", + "##### RDT\n", + "\n", + "- Plot bytes\n", + "\n", + "```py\n", + "# fetch\n", + "plot_rdt_bytes('2020-07-31 08:00:12', '2020-07-31 08:01:12','pod12-node4')\n", + "```\n", + "\n", + "- Plot ipc values\n", + "\n", + "```py\n", + "#fetch\n", + "plot_rdt_ipc('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```\n", + "\n", + "- Memory bandwidth\n", + "\n", + "```py\n", + "#fetch\n", + "get_rdt_memory_bandwidth('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-Local.ipynb b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-Local.ipynb new file mode 100644 index 00000000..0385b6f9 --- /dev/null +++ b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-Local.ipynb @@ -0,0 +1,913 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Metrics Analysis Notebook (local)\n", + "\n", + "#### Used to analyse / visualize the metrics when uploaded via csv file\n", + "\n", + "### Contributor: Aditya Srivastava \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import json\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "from pprint import pprint\n", + "import re\n", + "import requests\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATETIME_FORMAT = \"%Y-%m-%d %H:%M:%S\"\n", + "\n", + "def convert_to_timestamp(s):\n", + " global DATETIME_FORMAT\n", + " return time.mktime(datetime.strptime(s, DATETIME_FORMAT).timetuple())\n", + "\n", + "def convert_to_time_string(epoch):\n", + " global DATETIME_FORMAT\n", + " t = datetime.fromtimestamp(float(epoch)/1000.)\n", + " return t.strftime(DATETIME_FORMAT)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Note: \n", + " \n", + "Path will be used as a parameter in almost every function\n", + "\n", + "path / rootdir / csv : (str) Path to the folder whose direct children are metric folders\n", + "\n", + "example: /path/to/folder\n", + "\n", + "When : \n", + "```sh\n", + "ls /path/to/folder\n", + "\n", + "# output should be directories such as\n", + "# cpu-0 cpu-1 cpu-2 ..........................\n", + "# processes-ovs-vswitchd ........processes-ovsdb-server\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis Function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rootdir = 'metrics_data/'\n", + "\n", + "def fetch_cpu_data(rootdir):\n", + " df = pd.DataFrame()\n", + " reg_compile = re.compile(\"cpu-\\d{1,2}\")\n", + " for dirpath, dirnames, filenames in os.walk(rootdir):\n", + " dirname = dirpath.split(os.sep)[-1] \n", + " if reg_compile.match(dirname):\n", + " # read 3 files from this folder...\n", + " _df = pd.DataFrame()\n", + " for file in filenames:\n", + " if 'user' in file:\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['user'] = temp_df['value']\n", + " _df['epoch'] = temp_df['epoch']\n", + "\n", + " if 'system' in file:\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['system'] = temp_df['value']\n", + " _df['epoch'] = temp_df['epoch']\n", + "\n", + " if 'idle' in file:\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['idle'] = temp_df['value']\n", + " _df['epoch'] = temp_df['epoch']\n", + "\n", + " _df['cpu'] = dirname.split('-')[-1]\n", + "\n", + " df = df.append(_df, ignore_index=True)\n", + "\n", + " total = df['user'] + df['system'] + df['idle']\n", + "\n", + " df['user_percentage'] = df['user']*100 / total\n", + " df['system_percentage'] = df['system']*100 / total\n", + " df['idle_percentage'] = df['idle']*100 / total\n", + " \n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# CPU Unused Cores\n", + "def unused_cores(rootdir, verbose=False):\n", + " \n", + " df = fetch_cpu_data(rootdir)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Unused Cores :\")\n", + "\n", + " unused_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " unused_cores.append(key)\n", + " idle_values = curr_df.loc[curr_df['idle_percentage'] < 99.999]\n", + " if np.any(idle_values):\n", + " unused_cores.pop(-1)\n", + "\n", + " unused_cores = set(unused_cores)\n", + " for key, item in groups:\n", + " if key not in unused_cores:\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + "\n", + " ax1 = fig.add_subplot(131)\n", + " ax1.title.set_text(\"System\")\n", + " ax1.plot(item['epoch'], item['system_percentage'])\n", + " \n", + " ax2 = fig.add_subplot(132)\n", + " ax2.title.set_text(\"User\")\n", + " ax2.plot(item['epoch'], item['user_percentage'])\n", + " \n", + " ax3 = fig.add_subplot(133)\n", + " ax3.title.set_text(\"Idle\")\n", + " ax3.plot(item['epoch'], item['idle_percentage'])\n", + "\n", + " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n", + " plt.show()\n", + "\n", + " print(\"Number of unused cores: \", len(unused_cores))\n", + " return unused_cores\n", + "\n", + "\n", + "#CPU fully used cores\n", + "def fully_used_cores(rootdir, verbose=False):\n", + " \n", + "\n", + " df = fetch_cpu_data(rootdir)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Fully Used Cores :\")\n", + "\n", + " fully_used_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_values = curr_df.loc[curr_df['idle_percentage'] <= 10]\n", + " if np.any(idle_values):\n", + " fully_used_cores.append(key)\n", + "\n", + " fully_used_cores = set(fully_used_cores)\n", + " for key, item in groups:\n", + " if key not in fully_used_cores:\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + "\n", + " ax1 = fig.add_subplot(131)\n", + " ax1.title.set_text(\"System\")\n", + " ax1.plot(item['epoch'], item['system_percentage'])\n", + "\n", + " ax2 = fig.add_subplot(132)\n", + " ax2.title.set_text(\"User\")\n", + " ax2.plot(item['epoch'], item['user_percentage'])\n", + "\n", + " ax3 = fig.add_subplot(133)\n", + " ax3.title.set_text(\"Idle\")\n", + " ax3.plot(item['epoch'], item['idle_percentage'])\n", + "\n", + " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n", + " plt.show()\n", + "\n", + " print(\"Number of fully used cores: \", len(fully_used_cores))\n", + " return fully_used_cores\n", + "\n", + "\n", + "# CPU used cores plots\n", + "def used_cores(rootdir, verbose=False):\n", + "\n", + " df = fetch_cpu_data(rootdir)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Used Cores :\")\n", + "\n", + " used_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_values = curr_df.loc[curr_df['idle_percentage'] < 99.999]\n", + " if np.any(idle_values):\n", + " used_cores.append(key)\n", + "\n", + " used_cores = set(used_cores)\n", + " for key, item in groups:\n", + " if key not in used_cores:\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + "\n", + " ax1 = fig.add_subplot(131)\n", + " ax1.title.set_text(\"System\")\n", + " ax1.plot(item['epoch'], item['system_percentage'])\n", + "\n", + " ax2 = fig.add_subplot(132)\n", + " ax2.title.set_text(\"User\")\n", + " ax2.plot(item['epoch'], item['user_percentage'])\n", + "\n", + " ax3 = fig.add_subplot(133)\n", + " ax3.title.set_text(\"Idle\")\n", + " ax3.plot(item['epoch'], item['idle_percentage'])\n", + "\n", + " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n", + " plt.show()\n", + "\n", + " print(\"Number of used cores: \", len(used_cores))\n", + " return used_cores\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rootdir = 'metrics_data/'\n", + "\n", + "def fetch_interfaces_data(rootdir):\n", + "\n", + " df = pd.DataFrame()\n", + " reg_compile = re.compile(\"interface-.*\")\n", + " for dirpath, dirnames, filenames in os.walk(rootdir):\n", + " dirname = dirpath.split(os.sep)[-1] \n", + " if reg_compile.match(dirname):\n", + " # read 3 files from this folder...\n", + " _df = pd.DataFrame()\n", + " for file in filenames:\n", + " if 'errors' in file:\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['error_rx'] = temp_df['rx']\n", + " _df['error_tx'] = temp_df['tx']\n", + " _df['epoch'] = temp_df['epoch']\n", + "\n", + " if 'dropped' in file:\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['dropped_rx'] = temp_df['rx']\n", + " _df['dropped_tx'] = temp_df['tx']\n", + " _df['epoch'] = temp_df['epoch']\n", + "\n", + " _df['interface'] = '-'.join(dirname.split('-')[1:])\n", + " df = df.append(_df, ignore_index=True)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Interface Dropped (both type 1 and 2, i.e rx and tx)\n", + "def interface_dropped(rootdir, verbose=False):\n", + " \n", + " df = fetch_interfaces_data(rootdir)\n", + " group = df.groupby(['interface'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " dropped = {'rx':[], 'tx':[]}\n", + "\n", + " itr = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + "\n", + " if np.any(curr_df['dropped_rx'] == 1):\n", + " dropped_rows = curr_df[curr_df['dropped_rx'] == 1]\n", + " dropped['rx'].append([key, dropped_row['epoch'].iloc[0]])\n", + " if np.any(curr_df['dropped_tx'] == 1):\n", + " dropped_rows = curr_df[curr_df['dropped_tx'] == 1]\n", + " dropped['tx'].append([key, dropped_row['epoch'].iloc[0]])\n", + "\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[itr%2], edgecolor='red')\n", + " ax = fig.add_subplot(211)\n", + " ax.title.set_text(\"Interface: {} Dropped (rx)\".format(key))\n", + " ax.plot(item['epoch'], item['dropped_rx'])\n", + "\n", + " ax1 = fig.add_subplot(212)\n", + " ax1.title.set_text(\"Interface: {} Dropped (tx)\".format(key))\n", + " ax1.plot(item['epoch'], item['dropped_tx'])\n", + "\n", + " itr += 1\n", + "\n", + " plt.suptitle('Interface Dropped', fontsize=14)\n", + " plt.show()\n", + "\n", + " return dropped\n", + "\n", + "\n", + "# Interface Errors (both type 1 and 2, i.e rx and tx)\n", + "def interface_errors(rootdir, verbose=False):\n", + " \n", + " df = fetch_interfaces_data(rootdir)\n", + " group = df.groupby(['interface'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " errors = {'rx':[], 'tx':[]}\n", + "\n", + " itr = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + "\n", + " if np.any(curr_df['error_rx'] == 1):\n", + " err_rows = curr_df[curr_df['error_rx'] == 1]\n", + " errors['rx'].append([key, err_row['epoch'].iloc[0]])\n", + " if np.any(curr_df['error_tx'] == 1):\n", + " err_rows = curr_df[curr_df['error_tx'] == 1]\n", + " errors['tx'].append([key, err_row['epoch'].iloc[0]])\n", + "\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[itr%2], edgecolor='red')\n", + " ax = fig.add_subplot(211)\n", + " ax.title.set_text(\"Interface: {} Errors (rx)\".format(key))\n", + " ax.plot(item['epoch'], item['error_rx'])\n", + "\n", + " ax1 = fig.add_subplot(212)\n", + " ax1.title.set_text(\"Interface: {} Errors (tx)\".format(key))\n", + " ax1.plot(item['epoch'], item['error_tx'])\n", + "\n", + " itr += 1\n", + "\n", + " plt.suptitle('Interface Erros', fontsize=14)\n", + " plt.show()\n", + "\n", + " return errors\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### OVS Stats (Non DPDK)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rootdir = 'metrics_data/'\n", + "\n", + "def fetch_ovs_stats_data(rootdir):\n", + " df = pd.DataFrame()\n", + " reg_compile = re.compile(\"ovs_stats-.*\")\n", + " for dirpath, dirnames, filenames in os.walk(rootdir):\n", + " dirname = dirpath.split(os.sep)[-1] \n", + " if reg_compile.match(dirname):\n", + " if 'dpdk' in dirname:\n", + " continue #ignoring dpdk\n", + "\n", + " _df = pd.DataFrame()\n", + " for file in filenames:\n", + " if 'errors' in file:\n", + " col_name = '-'.join(file.split('_')[1:])\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + "\n", + " _df['epoch'] = temp_df['epoch']\n", + " temp_df = temp_df.drop(['epoch'], axis=1)\n", + " new_cols = [i + '_' + col_name for i in temp_df.columns]\n", + " _df[new_cols] = temp_df\n", + "\n", + " if 'dropped' in file:\n", + " col_name = '-'.join(file.split('_')[1:])\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['epoch'] = temp_df['epoch']\n", + " temp_df = temp_df.drop(['epoch'], axis=1)\n", + " new_cols = [i + '_' + col_name for i in temp_df.columns]\n", + " _df[new_cols] = temp_df \n", + " _df['interface'] = '-'.join(dirname.split('-')[1:])\n", + " df = df.append(_df, ignore_index=True)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def ovs_stats_dropped(rootdir, verbose=False):\n", + " \n", + " df = fetch_ovs_stats_data(rootdir)\n", + " group = df.groupby(['interface'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " i = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + " for col in curr_df:\n", + " if 'dropped' in col:\n", + " if item[col].isnull().all():\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n", + " plt.plot(item['epoch'], item[col])\n", + " plt.title(\"Interface: {} Dropped {}\".format(key, col))\n", + " i += 1\n", + " plt.show()\n", + " return\n", + "\n", + "\n", + "# Interface Errors (both type 1 and 2, i.e rx and tx)\n", + "def ovs_stats_errors(rootdir, verbose=False):\n", + "\n", + "\n", + " df = fetch_ovs_stats_data(rootdir)\n", + " group = df.groupby(['interface'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " i = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + " for col in curr_df:\n", + " if 'error' in col:\n", + " if item[col].isnull().all():\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n", + " plt.plot(item['epoch'], item[col])\n", + " plt.title(\"Interface: {} Errors {}\".format(key, col))\n", + " i += 1\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### DPDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rootdir = 'metrics_data/'\n", + "\n", + "def fetch_dpdk_data(rootdir):\n", + " df = pd.DataFrame()\n", + " reg_compile = re.compile(\".*dpdk.*\")\n", + " for dirpath, dirnames, filenames in os.walk(rootdir):\n", + " dirname = dirpath.split(os.sep)[-1] \n", + " if reg_compile.match(dirname):\n", + " _df = pd.DataFrame()\n", + " for file in filenames:\n", + " if 'errors' in file:\n", + " col_name = '-'.join(file.split('_')[1:])\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + "\n", + " _df['epoch'] = temp_df['epoch']\n", + " temp_df = temp_df.drop(['epoch'], axis=1)\n", + " new_cols = [i + '_' + col_name for i in temp_df.columns]\n", + " _df[new_cols] = temp_df\n", + "\n", + " if 'dropped' in file:\n", + " col_name = '-'.join(file.split('_')[1:])\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['epoch'] = temp_df['epoch']\n", + " temp_df = temp_df.drop(['epoch'], axis=1)\n", + " new_cols = [i + '_' + col_name for i in temp_df.columns]\n", + " _df[new_cols] = temp_df \n", + " _df['dpdk'] = '-'.join(dirname.split('-')[1:])\n", + " df = df.append(_df, ignore_index=True)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fetch_dpdk_data(rootdir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def dpdk_dropped(rootdir, verbose=False):\n", + " \n", + " df = fetch_dpdk_data(rootdir)\n", + " group = df.groupby(['dpdk'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " i = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + " for col in curr_df:\n", + " if 'dropped' in col:\n", + " if item[col].isnull().all():\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n", + " plt.plot(item['epoch'], item[col])\n", + " plt.title(\"DpDK: {} Dropped {}\".format(key, col))\n", + " i += 1\n", + " plt.show()\n", + " return\n", + "\n", + "\n", + "# Interface Errors (both type 1 and 2, i.e rx and tx)\n", + "def dpdk_errors(rootdir, verbose=False):\n", + "\n", + "\n", + " df = fetch_dpdk_data(rootdir)\n", + " group = df.groupby(['dpdk'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " i = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + " for col in curr_df:\n", + " if 'error' in col:\n", + " if item[col].isnull().all():\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n", + " plt.plot(item['epoch'], item[col])\n", + " plt.title(\"DpDK: {} Errors {}\".format(key, col))\n", + " i += 1\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dpdk_dropped(rootdir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### RDT (need to be testes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rootdir = 'metrics_data/'\n", + "\n", + "def fetch_rdt_data(rootdir):\n", + " df = pd.DataFrame()\n", + " reg_compile = re.compile(\".*rdt.*\")\n", + " for dirpath, dirnames, filenames in os.walk(rootdir):\n", + " dirname = dirpath.split(os.sep)[-1] \n", + " if reg_compile.match(dirname):\n", + " _df = pd.DataFrame()\n", + " for file in filenames:\n", + " if 'bytes' in file:\n", + " col_name = '-'.join(file.split('_')[1:])\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + "\n", + " _df['epoch'] = temp_df['epoch']\n", + " temp_df = temp_df.drop(['epoch'], axis=1)\n", + " new_cols = [i + '_' + col_name for i in temp_df.columns]\n", + " _df[new_cols] = temp_df\n", + " \n", + " if 'bandwidth' in file:\n", + " col_name = '-'.join(file.split('_')[1:])\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + "\n", + " _df['epoch'] = temp_df['epoch']\n", + " temp_df = temp_df.drop(['epoch'], axis=1)\n", + " new_cols = [i + '_' + col_name for i in temp_df.columns]\n", + " _df[new_cols] = temp_df\n", + "\n", + " if 'ipc' in file:\n", + " col_name = '-'.join(file.split('_')[1:])\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['epoch'] = temp_df['epoch']\n", + " temp_df = temp_df.drop(['epoch'], axis=1)\n", + " new_cols = [i + '_' + col_name for i in temp_df.columns]\n", + " _df[new_cols] = temp_df \n", + " _df['intel_rdt'] = '-'.join(dirname.split('-')[1:])\n", + " df = df.append(_df, ignore_index=True)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# L3 cache bytes\n", + "def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " df = fetch_rdt_data(rootdir)\n", + " group = df.groupby(['intel_rdt'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " i = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + " for col in curr_df:\n", + " if 'bytes' in col:\n", + " if item[col].isnull().all():\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n", + " plt.plot(item['epoch'], item[col])\n", + " plt.title(\"RDT BYTES, RDT: {}\".format(key, col))\n", + " i += 1\n", + " plt.show()\n", + "\n", + "\n", + "# L3 IPC values\n", + "def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " \n", + " df = fetch_rdt_data(rootdir)\n", + " group = df.groupby(['intel_rdt'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " i = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + " for col in curr_df:\n", + " if 'ipc' in col:\n", + " if item[col].isnull().all():\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n", + " plt.plot(item['epoch'], item[col])\n", + " plt.title(\"RDT IPC, RDT: {}\".format(key, col))\n", + " i += 1\n", + " plt.show()\n", + "\n", + "\n", + "\n", + "# memeory bandwidtdh\n", + "def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " \n", + " df = fetch_rdt_data(rootdir)\n", + " group = df.groupby(['intel_rdt'])\n", + " color = ['oldlace', 'mistyrose']\n", + "\n", + " i = 0\n", + " for key, item in group:\n", + " curr_df = item\n", + " for col in curr_df:\n", + " if 'bandwidht' in col:\n", + " if item[col].isnull().all():\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n", + " plt.plot(item['epoch'], item[col])\n", + " plt.title(\"RDT Memory Bandwidht, RDT: {}\".format(key, col))\n", + " i += 1\n", + " plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Memory (following functions still need to written for csv)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rootdir = 'metrics_data/'\n", + "\n", + "def fetch_memory_data(rootdir):\n", + " df = pd.DataFrame()\n", + " reg_compile = re.compile(\"memory\")\n", + " for dirpath, dirnames, filenames in os.walk(rootdir):\n", + " dirname = dirpath.split(os.sep)[-1] \n", + " if reg_compile.match(dirname):\n", + " print(dirname)\n", + " _df = pd.DataFrame()\n", + " for file in filenames: \n", + " col_name = file.split('-')[1]\n", + " temp_df = pd.read_csv(dirpath + os.sep + file)\n", + " _df['epoch'] = temp_df['epoch']\n", + " temp_df = temp_df.drop(['epoch'], axis=1)\n", + " new_cols = [col_name for i in temp_df.columns]\n", + " _df[new_cols] = temp_df\n", + " df = df.append(_df, ignore_index=True)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def get_memory_usage(rootdir, verbose=False):\n", + " df = fetch_memory_data(rootdir)\n", + " color = ['oldlace', 'mistyrose']\n", + " i = 0\n", + " for col in df:\n", + " if df[col].isnull().all():\n", + " continue\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[i%2], edgecolor='red')\n", + " plt.plot(df['epoch'], df[col])\n", + " plt.title(\"{} Memory\".format(col))\n", + " i += 1\n", + " plt.show()\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage / Examples\n", + "\n", + "\n", + "##### CPU \n", + "\n", + "- For calling cpu unsued cores\n", + "\n", + "```py\n", + "cores = unused_cores(rootdir='metrics_data')\n", + "```\n", + "\n", + "- For finding fully used cores\n", + "\n", + "```py\n", + "fully_used = fully_used_cores('metrics_data')\n", + "```\n", + "\n", + "- Similarly for plotting used cores\n", + "\n", + "```py\n", + "plot_used_cores(csv='metrics_data')\n", + "```\n", + "\n", + "\n", + "##### Interface\n", + "\n", + "- Interface Dropped \n", + "\n", + "```py\n", + "# Using CSV\n", + "dropped_interfaces = interface_dropped('metrics_data')\n", + "```\n", + "\n", + "- Interface Errors\n", + "\n", + "```py\n", + "# Using CSV\n", + "interface_errors('metrics_data')\n", + "```\n", + "\n", + "##### OVS Stats\n", + "\n", + "- OVS Stats Dropped \n", + "\n", + "```py\n", + "# Using CSV\n", + "ovs_stats_dropped('metrics_data')\n", + "```\n", + "\n", + "- OVS Stats Errors\n", + "\n", + "```py\n", + "# Using CSV\n", + "ovs_stats_errors('metrics_data')\n", + "```\n", + "\n", + "##### DPDK \n", + "\n", + "- DPDK Dropped \n", + "\n", + "```py\n", + "# Using CSV\n", + "dpdk_dropped('metrics_data')\n", + "```\n", + "\n", + "- DPDK Errors\n", + "\n", + "```py\n", + "# Using CSV\n", + "dpdk_errors('metrics_data')\n", + "```\n", + "\n", + "\n", + "\n", + "##### RDT (Do not run yet)\n", + "\n", + "- Plot bytes\n", + "\n", + "```py\n", + "#csv\n", + "plot_rdt_bytes('metrics_data')\n", + "```\n", + "\n", + "- Plot ipc values\n", + "\n", + "```py\n", + "#csv\n", + "plot_rdt_ipc('metrics_data')\n", + "```\n", + "\n", + "- Memory bandwidth\n", + "\n", + "```py\n", + "#csv\n", + "get_rdt_memory_bandwidth('metrics_data')\n", + "```\n", + "\n", + "##### Memory\n", + "\n", + "```py\n", + "#csv\n", + "get_memory_usage('metrics_data')\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- cgit 1.2.3-korg