{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Metrics Analysis Notebook (k8s)\n", "\n", "#### Used to analyse / visualize the metrics, data fetched from prometheus (monitoring cluster)\n", "\n", "### Contributor: Aditya Srivastava \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates\n", "import numpy as np\n", "\n", "import datetime\n", "import time\n", "import requests\n", "\n", "from pprint import pprint\n", "import json\n", "from datetime import datetime" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "PROMETHEUS = 'http://10.10.120.211:30902/' #do not change, unless sure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Helper Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#function to make DF out of query json\n", "\n", "def convert_to_df(res_json):\n", "\n", " data_list = res_json['data']['result']\n", " res_df = pd.DataFrame()\n", " if not data_list:\n", " return res_df\n", "\n", " # making colums\n", " headers = data_list[0]\n", " for data in data_list:\n", " metrics = data['metric']\n", " for metric in metrics.keys():\n", " res_df[metric] = np.nan\n", " res_df['value'] = 0\n", " \n", " # filling the df\n", " for data in data_list:\n", " metrics = data['metric']\n", " metrics['value'] = data['value'][-1]\n", " res_df = res_df.append(metrics, ignore_index=True) \n", "\n", " return res_df\n", "\n", "def convert_to_df_range(res_json):\n", "\n", " data_list = res_json['data']['result']\n", " res_df = pd.DataFrame()\n", " if not data_list:\n", " return res_df\n", "\n", " # filling the df\n", " for data in data_list:\n", " metrics = data['metric']\n", " values = np.array(data['values'])\n", " for time, value in values:\n", " metrics['timestamp'] = time\n", " metrics['value'] = value\n", " res_df = res_df.append(metrics, ignore_index=True) \n", "\n", " return res_df\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# functions to query\n", "\n", "def convert_to_timestamp(s):\n", " return time.mktime(datetime.strptime(s, \"%Y-%m-%d %H:%M:%S\").timetuple())\n", "\n", "def query_current(params={}):\n", " # input: params\n", " # type: dict\n", " # Example: {'query': 'container_cpu_user_seconds_total'}\n", " \n", " # Output: dict, loaded json response of the query\n", "\n", " res = requests.get(PROMETHEUS + '/api/v1/query', \n", " params=params)\n", " return json.loads(res.text)\n", "\n", "\n", "def query_range(start, end, params={}, steps = '30s'):\n", " # input: params\n", " # type: dict\n", " # Example: {'query': 'container_cpu_user_seconds_total'}\n", " \n", " # Output: dict, loaded json response of the query\n", " params[\"start\"] = convert_to_timestamp(start)\n", " params[\"end\"] = convert_to_timestamp(end)\n", " params[\"step\"] = steps\n", "\n", " print(params)\n", " \n", " res = requests.get(PROMETHEUS + '/api/v1/query_range', \n", " params=params,\n", " )\n", "\n", " return json.loads(res.text)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Analysis Function" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### CPU" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# CPU Unused Cores\n", "def unused_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " return df\n", " else:\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", " \n", " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", "\n", " target_cpu_usage_range = query_range(start, end, params, steps)\n", " df = convert_to_df_range(target_cpu_usage_range)\n", "\n", " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", " groups = df.groupby(['cpu'])\n", " if verbose: print(\"Unused Cores :\")\n", " unused_cores = []\n", " for key, item in groups:\n", " curr_df = item\n", " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", " if idle_row['value'].iloc[0] == '100':\n", " if verbose: print(\"Core: \",key)\n", " unused_cores.append(int(key))\n", "\n", " print(\"Number of unused cores: \", len(unused_cores))\n", " return unused_cores\n", "\n", "\n", "#CPU fully used cores\n", "def fully_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " return df\n", " else:\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", " \n", " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", "\n", " target_cpu_usage_range = query_range(start, end, params, steps)\n", " df = convert_to_df_range(target_cpu_usage_range)\n", "\n", " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", " groups = df.groupby(['cpu'])\n", " if verbose: print(\"Fully Used Cores :\")\n", " fully_used_cores = []\n", " for key, item in groups:\n", " curr_df = item\n", " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", " if idle_row['value'].iloc[0] == '0':\n", " if verbose: print(\"Core: \",key)\n", " fully_used_cores.append(int(key))\n", " print(\"Number of fully used cores: \", len(fully_used_cores))\n", " return fully_used_cores\n", "\n", "\n", "# CPU used cores plots\n", "def plot_used_cores(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " \n", " # \n", " df['rate'] = df['value'].diff()\n", "\n", " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", " ax1 = fig.add_subplot(111)\n", " ax1.title.set_text('CPU usage')\n", " ax1.plot(df['epoch'], df['rate'])\n", " return df\n", " else:\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", "\n", " params = {'query' : \"collectd_cpu_percent{exported_instance='\" + node + \"'}\"}\n", "\n", " target_cpu_usage_range = query_range(start, end, params, steps)\n", " df = convert_to_df_range(target_cpu_usage_range)\n", " \n", " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", " groups = df.groupby(['cpu'])\n", " used_cores = []\n", "\n", " for key, item in groups:\n", " curr_df = item\n", " idle_row = curr_df.loc[curr_df['type'] == 'idle']\n", "\n", " if idle_row['value'].iloc[0] != '100':\n", " used_cores.append(key)\n", " type_grps = curr_df.groupby('type')\n", " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", "\n", " for type_key, new_item in type_grps:\n", "\n", " if type_key == 'system':\n", " ax1 = fig.add_subplot(131)\n", " ax1.title.set_text(type_key)\n", " ax1.plot(new_item['timestamp'], new_item['value'])\n", " elif type_key == 'user':\n", " ax2 = fig.add_subplot(132)\n", " ax2.title.set_text(type_key)\n", " ax2.plot(new_item['timestamp'], new_item['value'])\n", " elif type_key == 'wait':\n", " ax3 = fig.add_subplot(133)\n", " ax3.title.set_text(type_key)\n", " ax3.plot(new_item['timestamp'], new_item['value'])\n", "\n", " plt.suptitle('Used CPU Core {}'.format(key), fontsize=14)\n", " plt.show()\n", " print(\"Number of used cores: \", len(used_cores))\n", " return used_cores" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Interface" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Interface Dropped (both type 1 and 2, i.e rx and tx)\n", "#TODO: Change this to separate functions later\n", "def interface_dropped(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " df_0 = df #TODO: Change this\n", " df_1 = df #TODO: Change this\n", " else:\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", " \n", " params = {'query' : \"collectd_interface_if_dropped_0_total{exported_instance='\" + node + \"'}\"}\n", "\n", " interface_dropped_0 = query_range(start, end, params, steps)\n", " df_0 = convert_to_df_range(interface_dropped_0)\n", " \n", " params = {'query' : \"collectd_interface_if_dropped_1_total{exported_instance='\" + node + \"'}\"}\n", " interface_dropped_1 = query_range(start, end, params, steps)\n", " df_1 = convert_to_df_range(interface_dropped_1)\n", "\n", " \n", " #df_0 : interfaces_dropped_0_df\n", " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n", "\n", " #df_1 : interfaces_dropped_1_df\n", " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n", "\n", " groups_0 = df_0.groupby(['interface'])\n", " groups_1 = df_1.groupby(['interface'])\n", "\n", " groups = [groups_0, groups_1]\n", " dropped_interfaces= []\n", " drop_type = 0\n", " color = ['oldlace', 'mistyrose']\n", " plot_iter = 111\n", " for group in groups:\n", " dropped = []\n", "\n", " for key, item in group:\n", " curr_df = item\n", " if np.any(curr_df['value'] == '1'):\n", " dropped_row = curr_df.loc[curr_df['value'] == '1']\n", " dropped.append([key, dropped_row['timestamp'].iloc[0]])\n", " fig = plt.figure(figsize=(24,6), facecolor=color[drop_type], edgecolor='red')\n", " ax = fig.add_subplot(plot_iter)\n", " ax.title.set_text(\"Interface: {}\".format(key))\n", " ax.plot(item['timestamp'], item['value'])\n", " dropped_interfaces.append(dropped)\n", " plt.suptitle('Interfaces Drop type {}'.format(drop_type), fontsize=14)\n", " plt.show()\n", " drop_type += 1\n", " return dropped_interfaces\n", "\n", "\n", "# Interface Errors (both type 1 and 2, i.e rx and tx)\n", "#TODO: Change this to separate functions later\n", "def interface_errors(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " df_0 = df #TODO: Change this\n", " df_1 = df #TODO: Change this\n", " else:\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", " \n", " params = {'query' : \"collectd_interface_if_errors_0_total{exported_instance='\" + node + \"'}\"}\n", " interfaces_errors_0 = query_range(start, end, params, steps)\n", " df_0 = convert_to_df_range(interfaces_errors_0)\n", " \n", " params = {'query' : \"collectd_interface_if_errors_1_total{exported_instance='\" + node + \"'}\"}\n", " interface_errors_1 = query_range(start, end, params, steps)\n", " df_1 = convert_to_df_range(interface_errors_1)\n", "\n", " \n", " #df_0 : interfaces_errors_0_df\n", " df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n", "\n", " #df_1 : interfaces_dropped_1_df\n", " df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n", "\n", " groups_0 = df_0.groupby(['interface'])\n", " groups_1 = df_1.groupby(['interface'])\n", "\n", " groups = [groups_0, groups_1]\n", " err_interfaces= []\n", " err_type = 0\n", " color = ['oldlace', 'mistyrose']\n", " for group in groups:\n", " errors = []\n", "\n", " for key, item in group:\n", " curr_df = item\n", "\n", " if np.any(curr_df['value'] == '1'):\n", " err_row = curr_df.loc[curr_df['value'] == '1']\n", " erros.append([key, err_row['timestamp'].iloc[0]])\n", "\n", " fig = plt.figure(figsize=(24,6), facecolor=color[err_type], edgecolor='red')\n", " ax = fig.add_subplot(111)\n", " ax.title.set_text(\"Interface: {}\".format(key))\n", " ax.plot(item['timestamp'], item['value'])\n", "\n", " err_interfaces.append(errors)\n", " plt.suptitle('Interfaces Error type {}'.format(err_type), fontsize=14)\n", " plt.show()\n", " err_type += 1\n", "\n", " return err_interfaces" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### RDT " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# L3 cache bytes\n", "def plot_rdt_bytes(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " else:\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", "\n", " params = {'query' : \"collectd_intel_rdt_bytes{exported_instance='\" + node + \"'}\"}\n", " intel_rdt_bytes = query_range(start, end, params, steps)\n", " df = convert_to_df_range(intel_rdt_bytes)\n", "\n", " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", " groups = df.groupby(['intel_rdt'])\n", " for key, item in groups:\n", " curr_df = item\n", " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", " ax1 = fig.add_subplot(111)\n", " ax1.title.set_text(\"Intel RDT Number: {}\".format(key))\n", " ax1.plot(item['timestamp'], item['value'])\n", " plt.show()\n", " return\n", "\n", "\n", "# L3 IPC values\n", "def plot_rdt_ipc(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " else:\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", " \n", " params = {'query' : \"collectd_intel_rdt_ipc{exported_instance='\" + node + \"'}\"}\n", " intel_rdt_ipc = query_range(start, end, params, steps)\n", " df = convert_to_df_range(intel_rdt_ipc)\n", "\n", " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", " groups = df.groupby(['intel_rdt'])\n", " for key, item in groups:\n", " curr_df = item\n", " fig = plt.figure(figsize=(24,6), facecolor='oldlace', edgecolor='red')\n", " ax1 = fig.add_subplot(111)\n", " ax1.title.set_text(\"Intel RDT Number: {}, IPC value\".format(key))\n", " ax1.plot(item['timestamp'], item['value'])\n", " plt.show()\n", " return\n", "\n", "\n", "# memeory bandwidtdh\n", "def get_rdt_memory_bandwidth(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " else:\n", "\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", " \n", " params = {'query' : \"collectd_intel_rdt_memory_bandwidth_total{exported_instance='\" + node + \"'}\"}\n", " intel_rdt_mem_bw = query_range(start, end, params, steps)\n", " df = convert_to_df_range(intel_rdt_mem_bw)\n", "\n", " df = df.drop(['__name__', 'instance', 'job'], axis = 1)\n", " \n", " return df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Memory" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "def get_memory_usage(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n", " \n", " if csv is not None:\n", " df = pd.read_csv(csv)\n", " else:\n", " if start is None or end is None or node is None:\n", " return \"Start, end and Node name required when fetching from prometheus\"\n", " \n", " params = {'query' : \"collectd_memory{exported_instance='\" + node + \"'} / (1024*1024*1024) \"} \n", " target_memory_usage_range = query_range(start, end, params, steps)\n", " df = convert_to_df_range(target_memory_usage_range)\n", "\n", " df = df.drop(['instance', 'job'], axis = 1)\n", " return df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Testing Zone" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "# prom fetch\n", "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", "print(cores)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Usage / Examples\n", "\n", "\n", "##### CPU \n", "\n", "- For calling cpu unsued cores\n", "\n", "```py\n", "# Fetching from prometheus\n", "cores = unused_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", "\n", "```\n", "\n", "- For finding fully used cores\n", "\n", "```py\n", "# Fetching from prometheus\n", "fully_used = fully_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", "\n", "```\n", "\n", "- Similarly for plotting used cores\n", "\n", "```py\n", "# Fetching\n", "plot_used_cores('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", "\n", "#csv\n", "# use Analysis-Monitoring-Local Notebook for correct analysis \n", "plot_used_cores(csv='metrics_data/cpu-0/cpu-user-2020-06-02')\n", "\n", "```\n", "\n", "\n", "##### Interface\n", "\n", "- Interface Dropped \n", "\n", "```py\n", "# Fetching from prom\n", "dropped_interfaces = interface_dropped('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", "\n", "```\n", "\n", "- Interface Errors\n", "\n", "```py\n", "# Fetching from prom\n", "interface_errors('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", "```\n", "\n", "##### RDT\n", "\n", "- Plot bytes\n", "\n", "```py\n", "# fetch\n", "plot_rdt_bytes('2020-07-31 08:00:12', '2020-07-31 08:01:12','pod12-node4')\n", "```\n", "\n", "- Plot ipc values\n", "\n", "```py\n", "#fetch\n", "plot_rdt_ipc('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", "```\n", "\n", "- Memory bandwidth\n", "\n", "```py\n", "#fetch\n", "get_rdt_memory_bandwidth('2020-07-31 08:00:12', '2020-07-31 08:01:12', 'pod12-node4')\n", "```" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 4 }