{
"cells": [
{
"cell_type": "markdown",
"id": "ed320ed0",
"metadata": {},
"source": [
"# Data-Query\n",
"This notebook is provided as tool to extract data, based on data-model from thoth, from multiple sources."
]
},
{
"cell_type": "markdown",
"id": "63c2424e",
"metadata": {},
"source": [
"## Traget Sources\n",
"Phase-1:\n",
"1. Prometheus\n",
"2. Elasticsearch"
]
},
{
"cell_type": "markdown",
"id": "a0b349a9",
"metadata": {},
"source": [
"#########################################################################\n",
"\n",
"Copyright 2021 Spirent Communications\n",
"\n",
" Licensed under the Apache License, Version 2.0 (the \"License\");\n",
" you may not use this file except in compliance with the License.\n",
" You may obtain a copy of the License at\n",
"\n",
" http://www.apache.org/licenses/LICENSE-2.0\n",
"\n",
" Unless required by applicable law or agreed to in writing, software\n",
" distributed under the License is distributed on an \"AS IS\" BASIS,\n",
" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
" See the License for the specific language governing permissions and\n",
" limitations under the License.\n",
" \n",
"#########################################################################"
]
},
{
"cell_type": "markdown",
"id": "f11300d8",
"metadata": {},
"source": [
"### Import required packages"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "d238a743",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.dates as mdates\n",
"import numpy as np\n",
"from IPython.display import display\n",
"\n",
"import datetime\n",
"import time\n",
"import requests\n",
"\n",
"from pprint import pprint\n",
"import json\n",
"from datetime import datetime, timedelta\n",
"\n",
"#from elasticsearch import Elasticsearch\n",
"#from elasticsearch_dsl import Search\n",
"#from elasticsearch.connection import create_ssl_context\n",
"import ssl\n",
"import urllib3"
]
},
{
"cell_type": "markdown",
"id": "af05e823",
"metadata": {},
"source": [
"### Sources"
]
},
{
"cell_type": "markdown",
"id": "416a1643",
"metadata": {},
"source": [
"### Helper Functions"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "0c4690ea",
"metadata": {},
"outputs": [],
"source": [
"#function to make DF out of query json\n",
"\n",
"def convert_to_df(res_json):\n",
"\n",
" data_list = res_json['data']['result']\n",
" res_df = pd.DataFrame()\n",
" if not data_list:\n",
" return res_df\n",
"\n",
" # making colums\n",
" headers = data_list[0]\n",
" for data in data_list:\n",
" metrics = data['metric']\n",
" for metric in metrics.keys():\n",
" res_df[metric] = np.nan\n",
" res_df['value'] = 0\n",
" \n",
" # filling the df\n",
" for data in data_list:\n",
" metrics = data['metric']\n",
" metrics['value'] = data['value'][-1]\n",
" res_df = res_df.append(metrics, ignore_index=True) \n",
"\n",
" return res_df\n",
"\n",
"def convert_to_df_range(res_json):\n",
"\n",
" data_list = res_json['data']['result']\n",
" res_df = pd.DataFrame()\n",
" if not data_list:\n",
" return res_df\n",
"\n",
" # filling the df\n",
" for data in data_list:\n",
" metrics = data['metric']\n",
" values = np.array(data['values'])\n",
" for time, value in values:\n",
" metrics['timestamp'] = time\n",
" metrics['value'] = value\n",
" res_df = res_df.append(metrics, ignore_index=True) \n",
"\n",
" return res_df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0226e066",
"metadata": {},
"outputs": [],
"source": [
"# functions to query\n",
"\n",
"def convert_to_timestamp(s):\n",
" return time.mktime(datetime.strptime(s, \"%Y-%m-%d %H:%M:%S\").timetuple())\n",
"\n",
"def query_current(params={}):\n",
" # input: params\n",
" # type: dict\n",
" # Example: {'query': 'container_cpu_user_seconds_total'} \n",
" # Output: dict, loaded json response of the query\n",
"\n",
" res = requests.get(PROMETHEUS + '/api/v1/query', \n",
" params=params)\n",
" return json.loads(res.text)\n",
"\n",
"\n",
"def query_range(start, end, params={}, steps = '30s'):\n",
" # input: params\n",
" # type: dict\n",
" # Example: {'query': 'container_cpu_user_seconds_total'}\n",
" \n",
" # Output: dict, loaded json response of the query\n",
" params[\"start\"] = convert_to_timestamp(start)\n",
" params[\"end\"] = convert_to_timestamp(end)\n",
" params[\"step\"] = steps\n",
"\n",
" # print(params)\n",
"\n",
" res = requests.get(PROMETHEUS + '/api/v1/query_range', \n",
" params=params,\n",
" )\n",
"\n",
" return json.loads(res.text)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "a97f9a5c",
"metadata": {},
"outputs": [],
"source": [
"# Elasticsearch [WIP]\n",
"# https://github.com/elastic/elasticsearch-dsl-py \n",
"def query_es_range(start, end, indx, cat, titl, desc):\n",
" es = Elasticsearch([ELASTICSEARCH])\n",
" # Build a DSL Search object on the 'commits' index, 'summary' document type\n",
" request = elasticsearch_dsl.Search(using=es, index='commits',\n",
" doc_type='summary')\n",
" # Run the Search, using the scan interface to get all resuls\n",
" response = request.scan() # or execute()?"
]
},
{
"cell_type": "markdown",
"id": "21113203",
"metadata": {},
"source": [
"### Data Extractions as Dataframes"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "8fa9918d",
"metadata": {},
"outputs": [],
"source": [
"#https://github.com/Naugrimm/promql-examples\n",
"def cpu_core_utilization(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
" # If total is not available:\n",
" #params = {'query' : \"100 * (1 - sum(collectd_cpu{type='idle'}) by (cpu) / sum(collectd_cpu{exported_instance='\" + node + \"'}) by (cpu))\"}\n",
" params = {'query': \"100 * (1 - sum(collectd_cpu_total{type='idle'}) by (exported_instance) / sum(collectd_cpu_total) by (exported_instance))\"}\n",
" target_cpu_usage_range = query_range(start, end, params, steps)\n",
" current_cpu = query_current(params)\n",
" df = convert_to_df_range(target_cpu_usage_range)\n",
" display(df)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "fd8f91a1",
"metadata": {},
"outputs": [],
"source": [
"# Interface Dropped (both type 1 and 2, i.e rx and tx)\n",
"#TODO: Change this to separate functions later\n",
"def interface_dropped(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
" drop_dfs = []\n",
" if csv is not None:\n",
" df = pd.read_csv(csv)\n",
" df_0 = df #TODO: Change this\n",
" df_1 = df #TODO: Change this\n",
" else:\n",
" if start is None or end is None or node is None:\n",
" return \"Start, end and Node name required when fetching from prometheus\"\n",
" \n",
" params = {'query' : \"collectd_interface_if_dropped_0{exported_instance='\" + node + \"'}\"}\n",
"\n",
" interface_dropped_0 = query_range(start, end, params, steps)\n",
" df_0 = convert_to_df_range(interface_dropped_0)\n",
" \n",
" params = {'query' : \"collectd_interface_if_dropped_1{exported_instance='\" + node + \"'}\"}\n",
" interface_dropped_1 = query_range(start, end, params, steps)\n",
" df_1 = convert_to_df_range(interface_dropped_1)\n",
"\n",
" #df_0 : interfaces_dropped_0_df\n",
" #df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n",
" drop_dfs.append(df_0)\n",
"\n",
" #df_1 : interfaces_dropped_1_df\n",
" #df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n",
" drop_dfs.append(df_1)\n",
"\n",
" return drop_dfs\n",
"\n",
"def virt_interface_dropped(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
" drop_dfs = []\n",
" if csv is not None:\n",
" df = pd.read_csv(csv)\n",
" df_0 = df #TODO: Change this\n",
" df_1 = df #TODO: Change this\n",
" else:\n",
" if start is None or end is None or node is None:\n",
" return \"Start, end and Node name required when fetching from prometheus\"\n",
" \n",
" params = {'query' : \"collectd_virt_if_dropped_0{exported_instance='\" + node + \"'}\"}\n",
" \n",
" interface_dropped_0 = query_range(start, end, params, steps)\n",
" df_0 = convert_to_df_range(interface_dropped_0)\n",
" \n",
" params = {'query' : \"collectd_virt_if_dropped_0{exported_instance='\" + node + \"'}\"}\n",
" interface_dropped_1 = query_range(start, end, params, steps)\n",
" df_1 = convert_to_df_range(interface_dropped_1)\n",
"\n",
" #df_0 : interfaces_dropped_0_df\n",
" df_0 = df_0.drop(['instance', 'job'], axis = 1)\n",
" drop_dfs.append(df_0)\n",
"\n",
" #df_1 : interfaces_dropped_1_df\n",
" df_1 = df_1.drop(['instance', 'job'], axis = 1)\n",
" drop_dfs.append(df_1)\n",
"\n",
" return drop_dfs\n",
"\n",
"\n",
"# Interface Errors (both type 1 and 2, i.e rx and tx)\n",
"#TODO: Change this to separate functions later\n",
"def interface_errors(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
" error_dfs = []\n",
" if csv is not None:\n",
" df = pd.read_csv(csv)\n",
" df_0 = df #TODO: Change this\n",
" df_1 = df #TODO: Change this\n",
" else:\n",
" if start is None or end is None or node is None:\n",
" return \"Start, end and Node name required when fetching from prometheus\"\n",
" \n",
" params = {'query' : \"collectd_interface_if_errors_0_total{exported_instance='\" + node + \"'}\"}\n",
" interfaces_errors_0 = query_range(start, end, params, steps)\n",
" df_0 = convert_to_df_range(interfaces_errors_0)\n",
" \n",
" params = {'query' : \"collectd_interface_if_errors_1_total{exported_instance='\" + node + \"'}\"}\n",
" interface_errors_1 = query_range(start, end, params, steps)\n",
" df_1 = convert_to_df_range(interface_errors_1)\n",
"\n",
" \n",
" #df_0 : interfaces_errors_0_df\n",
" #df_0 = df_0.drop(['__name__', 'instance', 'job'], axis = 1)\n",
" error_dfs.append(df_0)\n",
"\n",
" #df_1 : interfaces_dropped_1_df\n",
" #df_1 = df_1.drop(['__name__', 'instance', 'job'], axis = 1)\n",
" error_dfs.append(df_1)\n",
"\n",
" return error_dfs"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "c98a59da",
"metadata": {},
"outputs": [],
"source": [
"def get_memory_usage(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
" \n",
" if csv is not None:\n",
" df = pd.read_csv(csv)\n",
" else:\n",
" if start is None or end is None or node is None:\n",
" return \"Start, end and Node name required when fetching from prometheus\"\n",
" \n",
" params = {'query' : \"collectd_memory{exported_instance='\" + node + \"'} / (1024*1024*1024) \"} \n",
" target_memory_usage_range = query_range(start, end, params, steps)\n",
" df = convert_to_df_range(target_memory_usage_range) \n",
" #df = df.drop(['instance', 'job'], axis = 1)\n",
" return df\n",
"\n",
"def get_virt_memory_usage(start=None, end=None, node=None, steps='15s', csv=None, verbose=False):\n",
" # Example value: collectd_virt_memory{exported_instance=\"instance-00001a33\", \n",
" # instance=\"localhost:9103\",job=\"promc\",virt=\"total\"}\n",
" if csv is not None:\n",
" df = pd.read_csv(csv)\n",
" else:\n",
" if start is None or end is None or node is None:\n",
" return \"Start, end and Node name required when fetching from prometheus\"\n",
" \n",
" params = {'query' : \"collectd_virt_memory{exported_instance='\" + node + \"'} / (1024*1024*1024) \"} \n",
" target_memory_usage_range = query_range(start, end, params, steps)\n",
" df = convert_to_df_range(target_memory_usage_range)\n",
" \n",
" #df = df.drop(['instance', 'job'], axis = 1)\n",
" return df"
]
},
{
"cell_type": "markdown",
"id": "3733a9a0",
"metadata": {},
"source": [
"### Main Extraction"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "bd5dd0f7",
"metadata": {},
"outputs": [],
"source": [
"def extract(timestamp, node=None, node_phy=True):\n",
" virt = True\n",
" # TODO: If node is provided, we may have to do some validations\n",
" # If its a valid-node node of not.\n",
" end = datetime.strptime(timestamp.split(',')[0], \"%Y-%m-%d %H:%M:%S\")\n",
" start = end - timedelta(days=1)\n",
" \n",
" start = str(start)\n",
" end = str(end)\n",
" steps = '60s'\n",
"\n",
" print(\"Starting Extraction from\",start,\"to\",end,'\\n\\n')\n",
" #cpu\n",
" print(\"===== CPU Data =====\\n\")\n",
" cpu_utilization = cpu_core_utilization(start, end, steps=steps)\n",
" display(cpu_utilization)\n",
" \n",
" \n",
" #interface analysis\n",
" print(\"=====Interfaces (including libvirt) Dropped / Errors=====\\n\")\n",
" dropped_interfaces = interface_dropped(start=start, end=end, node=node, steps=steps)\n",
" for drop_if in dropped_interfaces:\n",
" display(drop_if)\n",
" err_interfaces = interface_errors(start=start, end=end, node=node, steps=steps)\n",
" for err_if in err_interfaces:\n",
" display(err_if)\n",
" print(err_interfaces)\n",
" #vdropped_interfaces = virt_interface_dropped(start,end,node,steps)\n",
" \n",
" \n",
" #Memory Analysis:\n",
" print(\"=====Memory Data =====\\n\")\n",
" mem = get_memory_usage(start, end, node, steps)\n",
" display(mem)\n",
" vmem = get_virt_memory_usage(start, end, node, steps)\n",
" display(vmem)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "e5d3ec9a",
"metadata": {},
"outputs": [],
"source": [
"PROMETHEUS = 'http://10.10.180.20:9090/'\n",
"ELASTICSEARCH = 'http://192.168.2.233:9200/'"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "14298851",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting Extraction from 2022-04-11 06:00:00 to 2022-04-12 06:00:00 \n",
"\n",
"\n",
"===== CPU Data =====\n",
"\n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" exported_instance | \n",
" timestamp | \n",
" value | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" pod18-node4 | \n",
" 1649649660 | \n",
" 1.8592150264858165 | \n",
"
\n",
" \n",
" 1 | \n",
" pod18-node4 | \n",
" 1649649720 | \n",
" 1.859213509941382 | \n",
"
\n",
" \n",
" 2 | \n",
" pod18-node4 | \n",
" 1649649780 | \n",
" 1.8592120502819665 | \n",
"
\n",
" \n",
" 3 | \n",
" pod18-node4 | \n",
" 1649649840 | \n",
" 1.8592107627314003 | \n",
"
\n",
" \n",
" 4 | \n",
" pod18-node4 | \n",
" 1649649900 | \n",
" 1.8592092108782765 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2455 | \n",
" pod18-node5 | \n",
" 1649723160 | \n",
" 1.0958105540852525 | \n",
"
\n",
" \n",
" 2456 | \n",
" pod18-node5 | \n",
" 1649723220 | \n",
" 1.0958088368217433 | \n",
"
\n",
" \n",
" 2457 | \n",
" pod18-node5 | \n",
" 1649723280 | \n",
" 1.095807067448351 | \n",
"
\n",
" \n",
" 2458 | \n",
" pod18-node5 | \n",
" 1649723340 | \n",
" 1.0958053357542963 | \n",
"
\n",
" \n",
" 2459 | \n",
" pod18-node5 | \n",
" 1649723400 | \n",
" 1.0958036155334194 | \n",
"
\n",
" \n",
"
\n",
"
2460 rows × 3 columns
\n",
"
"
],
"text/plain": [
" exported_instance timestamp value\n",
"0 pod18-node4 1649649660 1.8592150264858165\n",
"1 pod18-node4 1649649720 1.859213509941382\n",
"2 pod18-node4 1649649780 1.8592120502819665\n",
"3 pod18-node4 1649649840 1.8592107627314003\n",
"4 pod18-node4 1649649900 1.8592092108782765\n",
"... ... ... ...\n",
"2455 pod18-node5 1649723160 1.0958105540852525\n",
"2456 pod18-node5 1649723220 1.0958088368217433\n",
"2457 pod18-node5 1649723280 1.095807067448351\n",
"2458 pod18-node5 1649723340 1.0958053357542963\n",
"2459 pod18-node5 1649723400 1.0958036155334194\n",
"\n",
"[2460 rows x 3 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" exported_instance | \n",
" timestamp | \n",
" value | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" pod18-node4 | \n",
" 1649649660 | \n",
" 1.8592150264858165 | \n",
"
\n",
" \n",
" 1 | \n",
" pod18-node4 | \n",
" 1649649720 | \n",
" 1.859213509941382 | \n",
"
\n",
" \n",
" 2 | \n",
" pod18-node4 | \n",
" 1649649780 | \n",
" 1.8592120502819665 | \n",
"
\n",
" \n",
" 3 | \n",
" pod18-node4 | \n",
" 1649649840 | \n",
" 1.8592107627314003 | \n",
"
\n",
" \n",
" 4 | \n",
" pod18-node4 | \n",
" 1649649900 | \n",
" 1.8592092108782765 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2455 | \n",
" pod18-node5 | \n",
" 1649723160 | \n",
" 1.0958105540852525 | \n",
"
\n",
" \n",
" 2456 | \n",
" pod18-node5 | \n",
" 1649723220 | \n",
" 1.0958088368217433 | \n",
"
\n",
" \n",
" 2457 | \n",
" pod18-node5 | \n",
" 1649723280 | \n",
" 1.095807067448351 | \n",
"
\n",
" \n",
" 2458 | \n",
" pod18-node5 | \n",
" 1649723340 | \n",
" 1.0958053357542963 | \n",
"
\n",
" \n",
" 2459 | \n",
" pod18-node5 | \n",
" 1649723400 | \n",
" 1.0958036155334194 | \n",
"
\n",
" \n",
"
\n",
"
2460 rows × 3 columns
\n",
"
"
],
"text/plain": [
" exported_instance timestamp value\n",
"0 pod18-node4 1649649660 1.8592150264858165\n",
"1 pod18-node4 1649649720 1.859213509941382\n",
"2 pod18-node4 1649649780 1.8592120502819665\n",
"3 pod18-node4 1649649840 1.8592107627314003\n",
"4 pod18-node4 1649649900 1.8592092108782765\n",
"... ... ... ...\n",
"2455 pod18-node5 1649723160 1.0958105540852525\n",
"2456 pod18-node5 1649723220 1.0958088368217433\n",
"2457 pod18-node5 1649723280 1.095807067448351\n",
"2458 pod18-node5 1649723340 1.0958053357542963\n",
"2459 pod18-node5 1649723400 1.0958036155334194\n",
"\n",
"[2460 rows x 3 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"=====Interfaces (including libvirt) Dropped / Errors=====\n",
"\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Empty DataFrame\n",
"Columns: []\n",
"Index: [], Empty DataFrame\n",
"Columns: []\n",
"Index: []]\n",
"=====Memory Data =====\n",
"\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Call to extract any Node or all nodes...\n",
"# Default start is 1 days before the timestamp provided.\n",
"# Please modify in the extract function if you want anything lesser or more.\n",
"extract('2022-04-12 06:00:00', 'pod18-node4')\n",
"#extract('2021-09-03 18:45:00', node=\"MY-NODE-1\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26b72876",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}