diff options
author | Aditya Srivastava <adityasrivastava301199@gmail.com> | 2020-08-24 02:46:18 +0530 |
---|---|---|
committer | Aditya Srivastava <adityasrivastava301199@gmail.com> | 2020-08-24 02:46:18 +0530 |
commit | d25014e0201cf0b0a662a84984191786d7f8eb5d (patch) | |
tree | c30831d9cd9083f5a6ca64ea53cfdb35230efc14 /tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb | |
parent | e5eef0ffdf2d281fecf12597041fd8af23d65e42 (diff) |
Tools: Add monitoring analysis jupyter notebook
This patch adds 2 monitoring jupyter notebooks and 1 notebook combining
both logs and metrics causation analysis.
Causation analysis: Finds anomalies in logs and fetchs and analyses
metrics in a delta time range of that timestamp.
Analysis-Monitoring-K8S: Fetches metrics from prometheus to analyse them
Analysis-Monitoring-Local: Data folder containing csv files of metrics is given,
analysis is performed on that data.
Signed-off-by: Aditya Srivastava <adityasrivastava301199@gmail.com>
Change-Id: I8833f5155b3184f697fac5270c69e0df02d2986b
Diffstat (limited to 'tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb')
-rw-r--r-- | tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb | 644 |
1 files changed, 644 insertions, 0 deletions
diff --git a/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb new file mode 100644 index 00000000..10c59d84 --- /dev/null +++ b/tools/lma/metrics/jupyter-notebooks/Analysis-Monitoring-K8S.ipynb @@ -0,0 +1,644 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Metrics Analysis Notebook (k8s)\n", + "\n", + "#### Used to analyse / visualize the metrics, data fetched from prometheus (monitoring cluster)\n", + "\n", + "### Contributor: Aditya Srivastava <adityasrivastava301199@gmail.com>\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import numpy as np\n", + "\n", + "import datetime\n", + "import time\n", + "import requests\n", + "\n", + "from pprint import pprint\n", + "import json\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PROMETHEUS = 'http://10.10.120.211:30902/' #do not change, unless sure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#function to make DF out of query json\n", + "\n", + "def convert_to_df(res_json):\n", + "\n", + " data_list = res_json['data']['result']\n", + " res_df = pd.DataFrame()\n", + " if not data_list:\n", + " return res_df\n", + "\n", + " # making colums\n", + " headers = data_list[0]\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " for metric in metrics.keys():\n", + " res_df[metric] = np.nan\n", + " res_df['value'] = 0\n", + " \n", + " # filling the df\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " metrics['value'] = data['value'][-1]\n", + " res_df = res_df.append(metrics, ignore_index=True) \n", + "\n", + " return res_df\n", + "\n", + "def convert_to_df_range(res_json):\n", + "\n", + " data_list = res_json['data']['result']\n", + " res_df = pd.DataFrame()\n", + " if not data_list:\n", + " return res_df\n", + "\n", + " # filling the df\n", + " for data in data_list:\n", + " metrics = data['metric']\n", + " values = np.array(data['values'])\n", + " for time, value in values:\n", + " metrics['timestamp'] = time\n", + " metrics['value'] = value\n", + " res_df = res_df.append(metrics, ignore_index=True) \n", + "\n", + " return res_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# functions to query\n", + "\n", + "def convert_to_timestamp(s):\n", + " return time.mktime(datetime.strptime(s, \"%Y-%m-%d %H:%M:%S\").timetuple())\n", + "\n", + "def query_current(params={}):\n", + " # input: params\n", + " # type: dict\n", + " # Example: {'query': 'container_cpu_user_seconds_total'}\n", + " \n", + " # Output: dict, loaded json response of the query\n", + "\n", + " res = requests.get(PROMETHEUS + '/api/v1/query', \n", + " params=params)\n", + " return json.loads(res.text)\n", + "\n", + "\n", + "def query_range(start, end, params={}, steps = '30s'):\n", + " # input: params\n", + " # type: dict\n", + " # Example: {'query': 'container_cpu_user_seconds_total'}\n", + " \n", + " # Output: dict, loaded json response of the query\n", + " params[\"start\"] = convert_to_timestamp(start)\n", + " params[\"end\"] = convert_to_timestamp(end)\n", + " params[\"step\"] = steps\n", + "\n", + " print(params)\n", + " \n", + " res = requests.get(PROMETHEUS + '/api/v1/query_range', \n", + " params=params,\n", + " )\n", + "\n", + " return json.loads(res.text)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis Function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# CPU Unused Cores\n", + "def unused_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Unused Cores :\")\n", + " unused_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", + " if idle_row['value'].iloc[0] == '100':\n", + " if verbose: print(\"Core: \",key)\n", + " unused_cores.append(int(key))\n", + "\n", + " print(\"Number of unused cores: \", len(unused_cores))\n", + " return unused_cores\n", + "\n", + "\n", + "#CPU fully used cores\n", + "def fully_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " if verbose: print(\"Fully Used Cores :\")\n", + " fully_used_cores = []\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", + " if idle_row['value'].iloc[0] == '0':\n", + " if verbose: print(\"Core: \",key)\n", + " fully_used_cores.append(int(key))\n", + " print(\"Number of fully used cores: \", len(fully_used_cores))\n", + " return fully_used_cores\n", + "\n", + "\n", + "# CPU used cores plots\n", + "def plot_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " \n", + " # \n", + " df['rate'] = df['value'].diff()\n", + "\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text('CPU usage')\n", + " ax1.plot(df['epoch'], df['rate'])\n", + " return df\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + "\n", + " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", + "\n", + " target_cpu_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_cpu_usage_range)\n", + " \n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['cpu'])\n", + " used_cores = []\n", + "\n", + " for key, item in groups:\n", + " curr_df = item\n", + " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", + "\n", + " if idle_row['value'].iloc[0] != '100':\n", + " used_cores.append(key)\n", + " type_grps = curr_df.groupby('type')\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + "\n", + " for type_key, new_item in type_grps:\n", + "\n", + " if type_key == 'system':\n", + " ax1 = fig.add_subplot(131)\n", + " ax1.title.set_text(type_key)\n", + " ax1.plot(new_item['timestamp'], new_item['value'])\n", + " elif type_key == 'user':\n", + " ax2 = fig.add_subplot(132)\n", + " ax2.title.set_text(type_key)\n", + " ax2.plot(new_item['timestamp'], new_item['value'])\n", + " elif type_key == 'wait':\n", + " ax3 = fig.add_subplot(133)\n", + " ax3.title.set_text(type_key)\n", + " ax3.plot(new_item['timestamp'], new_item['value'])\n", + "\n", + " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n", + " plt.show()\n", + " print(\"Number of used cores: \", len(used_cores))\n", + " return used_cores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Interface Dropped (both type 1 and 2, i.e rx and tx)\n", + "#TODO: Change this to separate functions later\n", + "def interface_dropped(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " df_0 = df #TODO: Change this\n", + " df_1 = df #TODO: Change this\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_interface_if_dropped_0_total{exported_instance='\" + node + \"'}\"}\n", + "\n", + " interface_dropped_0 = query_range(start, end, params, steps)\n", + " df_0 = convert_to_df_range(interface_dropped_0)\n", + " \n", + " params = {'query' : \"collectd_interface_if_dropped_1_total{exported_instance='\" + node + \"'}\"}\n", + " interface_dropped_1 = query_range(start, end, params, steps)\n", + " df_1 = convert_to_df_range(interface_dropped_1)\n", + "\n", + " \n", + " #df_0 : interfaces_dropped_0_df\n", + " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " #df_1 : interfaces_dropped_1_df\n", + " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " groups_0 = df_0.groupby(['interface'])\n", + " groups_1 = df_1.groupby(['interface'])\n", + "\n", + " groups = [groups_0, groups_1]\n", + " dropped_interfaces= []\n", + " drop_type = 0\n", + " color = ['oldlace', 'mistyrose']\n", + " plot_iter = 111\n", + " for group in groups:\n", + " dropped = []\n", + "\n", + " for key, item in group:\n", + " curr_df = item\n", + " if np.any(curr_df['value'] == '1'):\n", + " dropped_row = curr_df.loc[curr_df['value'] == '1']\n", + " dropped.append([key, dropped_row['timestamp'].iloc[0]])\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[drop_type], edgecolor='red')\n", + " ax = fig.add_subplot(plot_iter)\n", + " ax.title.set_text(\"Interface: {}\".format(key))\n", + " ax.plot(item['timestamp'], item['value'])\n", + " dropped_interfaces.append(dropped)\n", + " plt.suptitle('Interfaces Drop type {}'.format(drop_type), fontsize=14)\n", + " plt.show()\n", + " drop_type += 1\n", + " return dropped_interfaces\n", + "\n", + "\n", + "# Interface Errors (both type 1 and 2, i.e rx and tx)\n", + "#TODO: Change this to separate functions later\n", + "def interface_errors(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " df_0 = df #TODO: Change this\n", + " df_1 = df #TODO: Change this\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_interface_if_errors_0_total{exported_instance='\" + node + \"'}\"}\n", + " interfaces_errors_0 = query_range(start, end, params, steps)\n", + " df_0 = convert_to_df_range(interfaces_errors_0)\n", + " \n", + " params = {'query' : \"collectd_interface_if_errors_1_total{exported_instance='\" + node + \"'}\"}\n", + " interface_errors_1 = query_range(start, end, params, steps)\n", + " df_1 = convert_to_df_range(interface_errors_1)\n", + "\n", + " \n", + " #df_0 : interfaces_errors_0_df\n", + " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " #df_1 : interfaces_dropped_1_df\n", + " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n", + "\n", + " groups_0 = df_0.groupby(['interface'])\n", + " groups_1 = df_1.groupby(['interface'])\n", + "\n", + " groups = [groups_0, groups_1]\n", + " err_interfaces= []\n", + " err_type = 0\n", + " color = ['oldlace', 'mistyrose']\n", + " for group in groups:\n", + " errors = []\n", + "\n", + " for key, item in group:\n", + " curr_df = item\n", + "\n", + " if np.any(curr_df['value'] == '1'):\n", + " err_row = curr_df.loc[curr_df['value'] == '1']\n", + " erros.append([key, err_row['timestamp'].iloc[0]])\n", + "\n", + " fig = plt.figure(figsize=(24,6), facecolor=color[err_type], edgecolor='red')\n", + " ax = fig.add_subplot(111)\n", + " ax.title.set_text(\"Interface: {}\".format(key))\n", + " ax.plot(item['timestamp'], item['value'])\n", + "\n", + " err_interfaces.append(errors)\n", + " plt.suptitle('Interfaces Error type {}'.format(err_type), fontsize=14)\n", + " plt.show()\n", + " err_type += 1\n", + "\n", + " return err_interfaces" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### RDT " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# L3 cache bytes\n", + "def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + "\n", + " params = {'query' : \"collectd_intel_rdt_bytes{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_bytes = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_bytes)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['intel_rdt'])\n", + " for key, item in groups:\n", + " curr_df = item\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text(\"Intel RDT Number: {}\".format(key))\n", + " ax1.plot(item['timestamp'], item['value'])\n", + " plt.show()\n", + " return\n", + "\n", + "\n", + "# L3 IPC values\n", + "def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_intel_rdt_ipc{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_ipc = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_ipc)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " groups = df.groupby(['intel_rdt'])\n", + " for key, item in groups:\n", + " curr_df = item\n", + " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", + " ax1 = fig.add_subplot(111)\n", + " ax1.title.set_text(\"Intel RDT Number: {}, IPC value\".format(key))\n", + " ax1.plot(item['timestamp'], item['value'])\n", + " plt.show()\n", + " return\n", + "\n", + "\n", + "# memeory bandwidtdh\n", + "def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + "\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_intel_rdt_memory_bandwidth_total{exported_instance='\" + node + \"'}\"}\n", + " intel_rdt_mem_bw = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(intel_rdt_mem_bw)\n", + "\n", + " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", + " \n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Memory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def get_memory_usage(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", + " \n", + " if csv is not None:\n", + " df = pd.read_csv(csv)\n", + " else:\n", + " if start is None or end is None or node is None:\n", + " return \"Start, end and Node name required when fetching from prometheus\"\n", + " \n", + " params = {'query' : \"collectd_memory{exported_instance='\" + node + \"'} / (1024*1024*1024) \"} \n", + " target_memory_usage_range = query_range(start, end, params, steps)\n", + " df = convert_to_df_range(target_memory_usage_range)\n", + "\n", + " df = df.drop(['instance', 'job'], axis = 1)\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing Zone" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# prom fetch\n", + "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "print(cores)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage / Examples\n", + "\n", + "\n", + "##### CPU \n", + "\n", + "- For calling cpu unsued cores\n", + "\n", + "```py\n", + "# Fetching from prometheus\n", + "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- For finding fully used cores\n", + "\n", + "```py\n", + "# Fetching from prometheus\n", + "fully_used = fully_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- Similarly for plotting used cores\n", + "\n", + "```py\n", + "# Fetching\n", + "plot_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "#csv\n", + "# use Analysis-Monitoring-Local Notebook for correct analysis \n", + "plot_used_cores(csv='metrics_data/cpu-0/cpu-user-2020-06-02')\n", + "\n", + "```\n", + "\n", + "\n", + "##### Interface\n", + "\n", + "- Interface Dropped \n", + "\n", + "```py\n", + "# Fetching from prom\n", + "dropped_interfaces = interface_dropped('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "\n", + "```\n", + "\n", + "- Interface Errors\n", + "\n", + "```py\n", + "# Fetching from prom\n", + "interface_errors('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```\n", + "\n", + "##### RDT\n", + "\n", + "- Plot bytes\n", + "\n", + "```py\n", + "# fetch\n", + "plot_rdt_bytes('2020-07-31 08:00:12', '2020-07-31 08:01:12','pod12-node4')\n", + "```\n", + "\n", + "- Plot ipc values\n", + "\n", + "```py\n", + "#fetch\n", + "plot_rdt_ipc('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```\n", + "\n", + "- Memory bandwidth\n", + "\n", + "```py\n", + "#fetch\n", + "get_rdt_memory_bandwidth('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} |