aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/lma/devguide.rst147
-rw-r--r--docs/lma/logs/images/elasticsearch.pngbin0 -> 36046 bytes
-rw-r--r--docs/lma/logs/images/fluentd-cs.pngbin0 -> 40226 bytes
-rw-r--r--docs/lma/logs/images/fluentd-ss.pngbin0 -> 18331 bytes
-rw-r--r--docs/lma/logs/images/nginx.pngbin0 -> 36737 bytes
-rw-r--r--docs/lma/logs/images/setup.pngbin0 -> 43503 bytes
-rw-r--r--docs/lma/logs/userguide.rst348
-rw-r--r--docs/lma/metrics/devguide.rst474
-rw-r--r--docs/lma/metrics/images/dataflow.pngbin0 -> 42443 bytes
-rw-r--r--docs/lma/metrics/images/setup.pngbin0 -> 15019 bytes
-rw-r--r--docs/lma/metrics/userguide.rst230
-rw-r--r--tools/lma/ansible-client/ansible.cfg17
-rw-r--r--tools/lma/ansible-client/hosts2
-rw-r--r--tools/lma/ansible-client/playbooks/clean.yaml25
-rw-r--r--tools/lma/ansible-client/playbooks/setup.yaml28
-rw-r--r--tools/lma/ansible-client/roles/clean-collectd/main.yml44
-rw-r--r--tools/lma/ansible-client/roles/clean-td-agent/tasks/main.yml28
-rw-r--r--tools/lma/ansible-client/roles/collectd/files/collectd.conf.j244
-rw-r--r--tools/lma/ansible-client/roles/collectd/tasks/main.yml60
-rw-r--r--tools/lma/ansible-client/roles/td-agent/files/td-agent.conf63
-rw-r--r--tools/lma/ansible-client/roles/td-agent/tasks/main.yml30
-rw-r--r--tools/lma/ansible-server/ansible.cfg17
-rw-r--r--tools/lma/ansible-server/group_vars/all.yml27
-rw-r--r--tools/lma/ansible-server/hosts12
-rw-r--r--tools/lma/ansible-server/playbooks/clean.yaml52
-rw-r--r--tools/lma/ansible-server/playbooks/setup.yaml44
-rw-r--r--tools/lma/ansible-server/roles/clean-k8s-cluster/tasks/main.yml34
-rw-r--r--tools/lma/ansible-server/roles/clean-k8s-pre/tasks/main.yml65
-rw-r--r--tools/lma/ansible-server/roles/clean-k8s-worker-reset/tasks/main.yml26
-rw-r--r--tools/lma/ansible-server/roles/clean-logging/tasks/main.yml193
-rw-r--r--tools/lma/ansible-server/roles/clean-monitoring/tasks/main.yml48
-rw-r--r--tools/lma/ansible-server/roles/clean-nfs/tasks/main.yml44
-rw-r--r--tools/lma/ansible-server/roles/k8s-master/tasks/main.yml49
-rw-r--r--tools/lma/ansible-server/roles/k8s-pre/tasks/main.yml72
-rw-r--r--tools/lma/ansible-server/roles/k8s-worker/tasks/main.yml24
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elastalert/ealert-conf-cm.yaml48
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elastalert/ealert-key-cm.yaml68
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml132
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elastalert/elastalert.yaml76
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elasticsearch/elasticsearch.yaml231
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elasticsearch/user-secret.yaml23
-rw-r--r--tools/lma/ansible-server/roles/logging/files/fluentd/fluent-cm.yaml525
-rw-r--r--tools/lma/ansible-server/roles/logging/files/fluentd/fluent-service.yaml34
-rw-r--r--tools/lma/ansible-server/roles/logging/files/fluentd/fluent.yaml65
-rw-r--r--tools/lma/ansible-server/roles/logging/files/kibana/kibana.yaml23
-rw-r--r--tools/lma/ansible-server/roles/logging/files/namespace.yaml17
-rw-r--r--tools/lma/ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml36
-rw-r--r--tools/lma/ansible-server/roles/logging/files/nginx/nginx-key-cm.yaml68
-rw-r--r--tools/lma/ansible-server/roles/logging/files/nginx/nginx-service.yaml28
-rw-r--r--tools/lma/ansible-server/roles/logging/files/nginx/nginx.yaml58
-rw-r--r--tools/lma/ansible-server/roles/logging/files/persistentVolume.yaml105
-rw-r--r--tools/lma/ansible-server/roles/logging/files/storageClass.yaml73
-rw-r--r--tools/lma/ansible-server/roles/logging/tasks/main.yml165
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml37
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-deployment.yaml62
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-service.yaml41
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-deployment.yaml62
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-service.yaml42
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-deamonset.yaml79
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-service.yaml30
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml51
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-service.yaml35
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-datasource-config.yaml35
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-deployment.yaml68
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml31
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pvc.yaml33
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-service.yaml36
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml36
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml26
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/monitoring-namespace.yaml18
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-daemonset.yaml80
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-service.yaml33
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/main-prometheus-service.yaml35
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml609
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-deployment.yaml73
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pv.yaml30
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pvc.yaml33
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-service.yaml34
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-deployment.yaml73
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-service.yaml35
-rw-r--r--tools/lma/ansible-server/roles/monitoring/tasks/main.yml273
-rw-r--r--tools/lma/ansible-server/roles/nfs/tasks/main.yml42
-rw-r--r--tools/lma/logs/dockerfile/elastalert/Dockerfile23
-rw-r--r--tools/lma/logs/dockerfile/fluentd/Dockerfile23
-rw-r--r--tools/lma/logs/jupyter-notebooks/Trend-Analysis.ipynb308
-rw-r--r--tools/lma/metrics/dashboard/cpu_usage_using.json750
-rw-r--r--tools/lma/metrics/dashboard/memory_using.json337
-rw-r--r--tools/lma/metrics/dashboard/ovs_stats_using.json854
-rw-r--r--tools/lma/metrics/dashboard/rdt_using.json833
-rw-r--r--tools/lma/yamllintrc25
90 files changed, 9247 insertions, 0 deletions
diff --git a/docs/lma/devguide.rst b/docs/lma/devguide.rst
new file mode 100644
index 00000000..c72b8b12
--- /dev/null
+++ b/docs/lma/devguide.rst
@@ -0,0 +1,147 @@
+=================
+Table of Contents
+=================
+.. contents::
+.. section-numbering::
+
+Ansible Client-side
+====================
+
+Ansible File Organisation
+--------------------------
+Files Structure::
+
+ ansible-client
+ ├── ansible.cfg
+ ├── hosts
+ ├── playbooks
+ │ └── setup.yaml
+ └── roles
+ ├── clean-td-agent
+ │ └── tasks
+ │ └── main.yml
+ └── td-agent
+ ├── files
+ │ └── td-agent.conf
+ └── tasks
+ └── main.yml
+
+Summary of roles
+-----------------
+====================== ======================
+Roles Description
+====================== ======================
+``td-agent`` Install Td-agent & change configuration file
+``clean-td-agent`` Unistall Td-agent
+====================== ======================
+
+Configurable Parameters
+------------------------
+====================================================== ====================== ======================
+File (ansible-client/roles/) Parameter Description
+====================================================== ====================== ======================
+``td-agent/files/td-agent.conf`` host Fluentd-server IP
+``td-agent/files/td-agent.conf`` port Fluentd-Server Port
+====================================================== ====================== ======================
+
+Ansible Server-side
+====================
+
+Ansible File Organisation
+--------------------------
+Files Structure::
+
+ ansible-server
+ ├── ansible.cfg
+ ├── group_vars
+ │ └── all.yml
+ ├── hosts
+ ├── playbooks
+ │ └── setup.yaml
+ └── roles
+ ├── clean-logging
+ │ └── tasks
+ │ └── main.yml
+ ├── k8s-master
+ │ └── tasks
+ │ └── main.yml
+ ├── k8s-pre
+ │ └── tasks
+ │ └── main.yml
+ ├── k8s-worker
+ │ └── tasks
+ │ └── main.yml
+ ├── logging
+ │ ├── files
+ │ │ ├── elastalert
+ │ │ │ ├── ealert-conf-cm.yaml
+ │ │ │ ├── ealert-key-cm.yaml
+ │ │ │ ├── ealert-rule-cm.yaml
+ │ │ │ └── elastalert.yaml
+ │ │ ├── elasticsearch
+ │ │ │ ├── elasticsearch.yaml
+ │ │ │ └── user-secret.yaml
+ │ │ ├── fluentd
+ │ │ │ ├── fluent-cm.yaml
+ │ │ │ ├── fluent-service.yaml
+ │ │ │ └── fluent.yaml
+ │ │ ├── kibana
+ │ │ │ └── kibana.yaml
+ │ │ ├── namespace.yaml
+ │ │ ├── nginx
+ │ │ │ ├── nginx-conf-cm.yaml
+ │ │ │ ├── nginx-key-cm.yaml
+ │ │ │ ├── nginx-service.yaml
+ │ │ │ └── nginx.yaml
+ │ │ ├── persistentVolume.yaml
+ │ │ └── storageClass.yaml
+ │ └── tasks
+ │ └── main.yml
+ └── nfs
+ └── tasks
+ └── main.yml
+
+Summary of roles
+-----------------
+====================== ======================
+Roles Description
+====================== ======================
+``k8s-pre`` Pre-requisite for installing K8s, like installing docker & K8s, disable swap etc.
+``k8s-master`` Reset K8s & make a master
+``k8s-worker`` Join woker nodes with token
+``logging`` EFK & elastalert setup in K8s
+``clean logging`` Remove EFK & elastalert setup from K8s
+``nfs`` Start a NFS server to store Elasticsearch data
+====================== ======================
+
+Configurable Parameters
+------------------------
+========================================================================= ============================================ ======================
+File (ansible-server/roles/) Parameter name Description
+========================================================================= ============================================ ======================
+**Role: logging**
+``logging/files/persistentVolume.yaml`` storage Increase or Decrease Storage size of Persistent Volume size for each VM
+``logging/files/kibana/kibana.yaml`` version To Change the Kibana Version
+``logging/files/kibana/kibana.yaml`` count To increase or decrease the replica
+``logging/files/elasticsearch/elasticsearch.yaml`` version To Change the Elasticsearch Version
+``logging/files/elasticsearch/elasticsearch.yaml`` nodePort To Change Service Port
+``logging/files/elasticsearch/elasticsearch.yaml`` storage Increase or Decrease Storage size of Elasticsearch data for each VM
+``logging/files/elasticsearch/elasticsearch.yaml`` nodeAffinity -> values (hostname) In which VM Elasticsearch master or data pod will run (change the hostname to run the Elasticsearch master or data pod on a specific node)
+``logging/files/elasticsearch/user-secret.yaml`` stringData Add Elasticsearch User & its roles (`Elastic Docs <https://www.elastic.co/guide/en/cloud-on-k8s/master/k8s-users-and-roles.html#k8s_file_realm>`_)
+``logging/files/fluentd/fluent.yaml`` replicas To increase or decrease the replica
+``logging/files/fluentd/fluent-service.yaml`` nodePort To Change Service Port
+``logging/files/fluentd/fluent-cm.yaml`` index_template.json -> number_of_replicas To increase or decrease replica of data in Elasticsearch
+``logging/files/fluentd/fluent-cm.yaml`` fluent.conf Server port & other Fluentd Configuration
+``logging/files/nginx/nginx.yaml`` replicas To increase or decrease the replica
+``logging/files/nginx/nginx-service.yaml`` nodePort To Change Service Port
+``logging/files/nginx/nginx-key-cm.yaml`` kibana-access.key, kibana-access.pem Key file for HTTPs Connection
+``logging/files/nginx/nginx-conf-cm.yaml`` - Nginx Configuration
+``logging/files/elastalert/elastalert.yaml`` replicas To increase or decrease the replica
+``logging/files/elastalert/ealert-key-cm.yaml`` elastalert.key, elastalert.pem Key file for HTTPs Connection
+``logging/files/elastalert/ealert-conf-cm.yaml`` run_every How often ElastAlert will query Elasticsearch
+``logging/files/elastalert/ealert-conf-cm.yaml`` alert_time_limit If an alert fails for some reason, ElastAlert will retry sending the alert until this time period has elapsed
+``logging/files/elastalert/ealert-conf-cm.yaml`` es_host, es_port Elasticsearch Serivce name & port in K8s
+``logging/files/elastalert/ealert-rule-cm.yaml`` http_post_url Alert Receiver IP (`Elastalert Rule Config <https://elastalert.readthedocs.io/en/latest/ruletypes.html>`_)
+**Role: nfs**
+``nfs/tasks/main.yml`` line Path of NFS storage
+========================================================================= ============================================ ======================
diff --git a/docs/lma/logs/images/elasticsearch.png b/docs/lma/logs/images/elasticsearch.png
new file mode 100644
index 00000000..f0b876f5
--- /dev/null
+++ b/docs/lma/logs/images/elasticsearch.png
Binary files differ
diff --git a/docs/lma/logs/images/fluentd-cs.png b/docs/lma/logs/images/fluentd-cs.png
new file mode 100644
index 00000000..513bb3ef
--- /dev/null
+++ b/docs/lma/logs/images/fluentd-cs.png
Binary files differ
diff --git a/docs/lma/logs/images/fluentd-ss.png b/docs/lma/logs/images/fluentd-ss.png
new file mode 100644
index 00000000..4e9ab112
--- /dev/null
+++ b/docs/lma/logs/images/fluentd-ss.png
Binary files differ
diff --git a/docs/lma/logs/images/nginx.png b/docs/lma/logs/images/nginx.png
new file mode 100644
index 00000000..a0b00514
--- /dev/null
+++ b/docs/lma/logs/images/nginx.png
Binary files differ
diff --git a/docs/lma/logs/images/setup.png b/docs/lma/logs/images/setup.png
new file mode 100644
index 00000000..267685fa
--- /dev/null
+++ b/docs/lma/logs/images/setup.png
Binary files differ
diff --git a/docs/lma/logs/userguide.rst b/docs/lma/logs/userguide.rst
new file mode 100644
index 00000000..b410ee6c
--- /dev/null
+++ b/docs/lma/logs/userguide.rst
@@ -0,0 +1,348 @@
+=================
+Table of Contents
+=================
+.. contents::
+.. section-numbering::
+
+Setup
+======
+
+Prerequisites
+-------------------------
+- Require 3 VMs to setup K8s
+- ``$ sudo yum install ansible``
+- ``$ pip install openshift pyyaml kubernetes`` (required for ansible K8s module)
+- Update IPs in all these files (if changed)
+ ====================================================================== ======================
+ Path Description
+ ====================================================================== ======================
+ ``ansible-server/group_vars/all.yml`` IP of K8s apiserver and VM hostname
+ ``ansible-server/hosts`` IP of VMs to install
+ ``ansible-server/roles/logging/files/persistentVolume.yaml`` IP of NFS-Server
+ ``ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml`` IP of alert-receiver
+ ====================================================================== ======================
+
+Architecture
+--------------
+.. image:: images/setup.png
+
+Installation - Clientside
+-------------------------
+
+Nodes
+`````
+- **Node1** = 10.10.120.21
+- **Node4** = 10.10.120.24
+
+How installation is done?
+`````````````````````````
+- TD-agent installation
+ ``$ curl -L https://toolbelt.treasuredata.com/sh/install-redhat-td-agent3.sh | sh``
+- Copy the TD-agent config file in **Node1**
+ ``$ cp tdagent-client-config/node1.conf /etc/td-agent/td-agent.conf``
+- Copy the TD-agent config file in **Node4**
+ ``$ cp tdagent-client-config/node4.conf /etc/td-agent/td-agent.conf``
+- Restart the service
+ ``$ sudo service td-agent restart``
+
+Installation - Serverside
+-------------------------
+
+Nodes
+`````
+Inside Jumphost - POD12
+ - **VM1** = 10.10.120.211
+ - **VM2** = 10.10.120.203
+ - **VM3** = 10.10.120.204
+
+
+How installation is done?
+`````````````````````````
+**Using Ansible:**
+ - **K8s**
+ - **Elasticsearch:** 1 Master & 1 Data node at each VM
+ - **Kibana:** 1 Replicas
+ - **Nginx:** 2 Replicas
+ - **Fluentd:** 2 Replicas
+ - **Elastalert:** 1 Replica (get duplicate alert, if increase replica)
+ - **NFS Server:** at each VM to store elasticsearch data at following path
+ - ``/srv/nfs/master``
+ - ``/srv/nfs/data``
+
+How to setup?
+`````````````
+- **To setup K8s cluster and EFK:** Run the ansible-playbook ``ansible/playbooks/setup.yaml``
+- **To clean everything:** Run the ansible-playbook ``ansible/playbooks/clean.yaml``
+
+Do we have HA?
+````````````````
+Yes
+
+Configuration
+=============
+
+K8s
+---
+Path of all yamls (Serverside)
+````````````````````````````````
+``ansible-server/roles/logging/files/``
+
+K8s namespace
+`````````````
+``logging``
+
+K8s Service details
+````````````````````
+``$ kubectl get svc -n logging``
+
+Elasticsearch Configuration
+---------------------------
+
+Elasticsearch Setup Structure
+`````````````````````````````
+.. image:: images/elasticsearch.png
+
+Elasticsearch service details
+`````````````````````````````
+| **Service Name:** ``logging-es-http``
+| **Service Port:** ``9200``
+| **Service Type:** ``ClusterIP``
+
+How to get elasticsearch default username & password?
+`````````````````````````````````````````````````````
+- User1 (custom user):
+ | **Username:** ``elasticsearch``
+ | **Password:** ``password123``
+- User2 (by default created by Elastic Operator):
+ | **Username:** ``elastic``
+ | To get default password:
+ | ``$ PASSWORD=$(kubectl get secret -n logging logging-es-elastic-user -o go-template='{{.data.elastic | base64decode}}')``
+ | ``$ echo $PASSWORD``
+
+How to increase replica of any index?
+````````````````````````````````````````
+| $ curl -k -u "elasticsearch:password123" -H 'Content-Type: application/json' -XPUT "https://10.10.120.211:9200/indexname*/_settings" -d '
+| {
+| "index" : {
+| "number_of_replicas" : "2" }
+| }'
+
+Index Life
+```````````
+**30 Days**
+
+Kibana Configuration
+--------------------
+
+Kibana Service details
+````````````````````````
+| **Service Name:** ``logging-kb-http``
+| **Service Port:** ``5601``
+| **Service Type:** ``ClusterIP``
+
+Nginx Configuration
+--------------------
+IP
+````
+https://10.10.120.211:32000
+
+Nginx Setup Structure
+`````````````````````
+.. image:: images/nginx.png
+
+Ngnix Service details
+`````````````````````
+| **Service Name:** ``nginx``
+| **Service Port:** ``32000``
+| **Service Type:** ``NodePort``
+
+Why NGINX is used?
+```````````````````
+`Securing ELK using Nginx <https://logz.io/blog/securing-elk-nginx/>`_
+
+Nginx Configuration
+````````````````````
+**Path:** ``ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml``
+
+Fluentd Configuration - Clientside (Td-agent)
+---------------------------------------------
+
+Fluentd Setup Structure
+````````````````````````
+.. image:: images/fluentd-cs.png
+
+Log collection paths
+`````````````````````
+- ``/tmp/result*/*.log``
+- ``/tmp/result*/*.dat``
+- ``/tmp/result*/*.csv``
+- ``/tmp/result*/stc-liveresults.dat.*``
+- ``/var/log/userspace*.log``
+- ``/var/log/sriovdp/*.log.*``
+- ``/var/log/pods/**/*.log``
+
+Logs sends to
+`````````````
+Another fluentd instance of K8s cluster (K8s Master: 10.10.120.211) at Jumphost.
+
+Td-agent logs
+`````````````
+Path of td-agent logs: ``/var/log/td-agent/td-agent.log``
+
+Td-agent configuration
+````````````````````````
+| Path of conf file: ``/etc/td-agent/td-agent.conf``
+| **If any changes is made in td-agent.conf then restart the td-agent service,** ``$ sudo service td-agent restart``
+
+Config Description
+````````````````````
+- Get the logs from collection path
+- | Convert to this format
+ | {
+ | msg: "log line"
+ | log_path: “/file/path”
+ | file: “file.name”
+ | host: “pod12-node4”
+ | }
+- Sends it to fluentd
+
+Fluentd Configuration - Serverside
+----------------------------------
+
+Fluentd Setup Structure
+````````````````````````
+.. image:: images/fluentd-ss.png
+
+Fluentd Service details
+````````````````````````
+| **Service Name:** ``fluentd``
+| **Service Port:** ``32224``
+| **Service Type:** ``NodePort``
+
+Logs sends to
+`````````````
+Elasticsearch service (https://logging-es-http:9200)
+
+Config Description
+````````````````````
+- **Step 1**
+ - Get the logs from Node1 & Node4
+- **Step 2**
+ ======================================== ======================
+ log_path add tag (for routing)
+ ======================================== ======================
+ ``/tmp/result.*/.*errors.dat`` errordat.log
+ ``/tmp/result.*/.*counts.dat`` countdat.log
+ ``/tmp/result.*/stc-liveresults.dat.tx`` stcdattx.log
+ ``/tmp/result.*/stc-liveresults.dat.rx`` stcdatrx.log
+ ``/tmp/result.*/.*Statistics.csv`` ixia.log
+ ``/tmp/result.*/vsperf-overall*`` vsperf.log
+ ``/tmp/result.*/vswitchd*`` vswitchd.log
+ ``/var/log/userspace*`` userspace.log
+ ``/var/log/sriovdp*`` sriovdp.log
+ ``/var/log/pods*`` pods.log
+ ======================================== ======================
+
+- **Step 3**
+ Then parse each type using tags.
+ - error.conf: to find any error
+ - time-series.conf: to parse time series data
+ - time-analysis.conf: to calculate time analyasis
+- **Step 4**
+ ================================ ======================
+ host add tag (for routing)
+ ================================ ======================
+ ``pod12-node4`` node4
+ ``worker`` node1
+ ================================ ======================
+- **Step 5**
+ ================================ ======================
+ Tag elasticsearch
+ ================================ ======================
+ ``node4`` index “node4*”
+ ``node1`` index “node1*”
+ ================================ ======================
+
+Elastalert
+----------
+
+Send alert if
+``````````````
+- Blacklist
+ - "Failed to run test"
+ - "Failed to execute in '30' seconds"
+ - "('Result', 'Failed')"
+ - "could not open socket: connection refused"
+ - "Input/output error"
+ - "dpdk|ERR|EAL: Error - exiting with code: 1"
+ - "Failed to execute in '30' seconds"
+ - "dpdk|ERR|EAL: Driver cannot attach the device"
+ - "dpdk|EMER|Cannot create lock on"
+ - "dpdk|ERR|VHOST_CONFIG: * device not found"
+- Time
+ - vswitch_duration > 3 sec
+
+How to configure alert?
+````````````````````````
+- Add your rule in ``ansible/roles/logging/files/elastalert/ealert-rule-cm.yaml`` (`Elastalert Rule Config <https://elastalert.readthedocs.io/en/latest/ruletypes.html>`_)
+ | name: anything
+ | type: <check-above-link> #The RuleType to use
+ | index: node4* #index name
+ | realert:
+ | minutes: 0 #to get alert for all cases after each interval
+ | alert: post #To send alert as HTTP POST
+ | http_post_url: "http://url"
+
+- Mount this file to elastalert pod in ``ansible/roles/logging/files/elastalert/elastalert.yaml``.
+
+Alert Format
+````````````
+{"type": "pattern-match", "label": "failed", "index": "node4-20200815", "log": "error-log-line", "log-path": "/tmp/result/file.log", "reson": "error-message" }
+
+Data Management
+===============
+
+Elasticsearch
+-------------
+
+Where data is stored now?
+`````````````````````````
+Data is stored in NFS server with 1 replica of each index (default). Path of data are following:
+ - ``/srv/nfs/data (VM1)``
+ - ``/srv/nfs/data (VM2)``
+ - ``/srv/nfs/data (VM3)``
+ - ``/srv/nfs/master (VM1)``
+ - ``/srv/nfs/master (VM2)``
+ - ``/srv/nfs/master (VM3)``
+If user wants to change from NFS to local storage
+``````````````````````````````````````````````````
+Yes, user can do this, need to configure persistent volume. (``ansible-server/roles/logging/files/persistentVolume.yaml``)
+
+Do we have backup of data?
+````````````````````````````
+1 replica of each index
+
+When K8s restart, the data is still accessible?
+`````````````````````````````````````````````````````
+Yes (If data is not deleted from /srv/nfs/data)
+
+Troubleshooting
+===============
+If no logs receiving in Elasticsearch
+--------------------------------------
+- Check IP & port of server-fluentd in client config.
+- Check client-fluentd logs, ``$ sudo tail -f /var/log/td-agent/td-agent.log``
+- Check server-fluentd logs, ``$ sudo kubectl logs -n logging <fluentd-pod-name>``
+
+If no notification received
+---------------------------
+- Search your "log" in Elasticsearch.
+- Check config of elastalert
+- Check IP of alert-receiver
+
+Reference
+=========
+- `Elastic cloud on K8s <https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-quickstart.html>`_
+- `HA Elasticsearch on K8s <https://www.elastic.co/blog/high-availability-elasticsearch-on-kubernetes-with-eck-and-gke>`_
+- `Fluentd Configuration <https://docs.fluentd.org/configuration/config-file>`_
+- `Elastalert Rule Config <https://elastalert.readthedocs.io/en/latest/ruletypes.html>`_ \ No newline at end of file
diff --git a/docs/lma/metrics/devguide.rst b/docs/lma/metrics/devguide.rst
new file mode 100644
index 00000000..93d33016
--- /dev/null
+++ b/docs/lma/metrics/devguide.rst
@@ -0,0 +1,474 @@
+====================
+Metrics Dev Guide
+====================
+Table of Contents
+=================
+.. contents::
+.. section-numbering::
+
+
+Anible File Organization
+============================
+
+Ansible-Server
+----------------
+
+Please follow the following file structure:
+
+.. code-block:: bash
+
+ ansible-server
+ | ansible.cfg
+ | hosts
+ |
+ +---group_vars
+ | all.yml
+ |
+ +---playbooks
+ | clean.yaml
+ | setup.yaml
+ |
+ \---roles
+ +---clean-monitoring
+ | \---tasks
+ | main.yml
+ |
+ +---monitoring
+ +---files
+ | | monitoring-namespace.yaml
+ | |
+ | +---alertmanager
+ | | alertmanager-config.yaml
+ | | alertmanager-deployment.yaml
+ | | alertmanager-service.yaml
+ | | alertmanager1-deployment.yaml
+ | | alertmanager1-service.yaml
+ | |
+ | +---cadvisor
+ | | cadvisor-daemonset.yaml
+ | | cadvisor-service.yaml
+ | |
+ | +---collectd-exporter
+ | | collectd-exporter-deployment.yaml
+ | | collectd-exporter-service.yaml
+ | |
+ | +---grafana
+ | | grafana-datasource-config.yaml
+ | | grafana-deployment.yaml
+ | | grafana-pv.yaml
+ | | grafana-pvc.yaml
+ | | grafana-service.yaml
+ | |
+ | +---kube-state-metrics
+ | | kube-state-metrics-deployment.yaml
+ | | kube-state-metrics-service.yaml
+ | |
+ | +---node-exporter
+ | | nodeexporter-daemonset.yaml
+ | | nodeexporter-service.yaml
+ | |
+ | \---prometheus
+ | main-prometheus-service.yaml
+ | prometheus-config.yaml
+ | prometheus-deployment.yaml
+ | prometheus-pv.yaml
+ | prometheus-pvc.yaml
+ | prometheus-service.yaml
+ | prometheus1-deployment.yaml
+ | prometheus1-service.yaml
+ |
+ \---tasks
+ main.yml
+
+
+Ansible - Client
+------------------
+
+Please follow the following file structure:
+
+.. code-block:: bash
+
+ ansible-server
+ | ansible.cfg
+ | hosts
+ |
+ +---group_vars
+ | all.yml
+ |
+ +---playbooks
+ | clean.yaml
+ | setup.yaml
+ |
+ \---roles
+ +---clean-collectd
+ | \---tasks
+ | main.yml
+ |
+ +---collectd
+ +---files
+ | collectd.conf.j2
+ |
+ \---tasks
+ main.yml
+
+
+Summary of Roles
+==================
+
+A brief description of the Ansible playbook roles,
+which are used to deploy the monitoring cluster
+
+Ansible Server Roles
+----------------------
+
+Ansible Server, this part consists of the roles used to deploy
+Prometheus Alertmanager Grafana stack on the server-side
+
+Role: Monitoring
+~~~~~~~~~~~~~~~~~~
+
+Deployment and configuration of PAG stack along with collectd-exporter,
+cadvisor and node-exporter.
+
+Role: Clean-Monitoring
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Removes all the components deployed by the Monitoring role.
+
+
+File-Task Mapping and Configurable Parameters
+================================================
+
+Ansible Server
+----------------
+
+Role: Monitoring
+~~~~~~~~~~~~~~~~~~~
+
+Alert Manager
+^^^^^^^^^^^^^^^
+
+File: alertmanager-config.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/alertmanager/alertmanager-config.yaml
+
+Task: Configures Receivers for alertmanager
+
+Summary: A configmap, currently configures webhook for alertmanager,
+can be used to configure any kind of receiver
+
+Configurable Parameters:
+ receiver.url: change to the webhook receiver's URL
+ route: Can be used to add receivers
+
+
+File: alertmanager-deployment.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/alertmanager/alertmanager-deployment.yaml
+
+Task: Deploys alertmanager instance
+
+Summary: A Deployment, deploys 1 replica of alertmanager
+
+
+File: alertmanager-service.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/alertmanager/alertmanager-service.yaml
+
+Task: Creates a K8s service for alertmanager
+
+Summary: A Nodeport type of service, so that user can create "silences",
+view the status of alerts from the native alertmanager dashboard / UI.
+
+Configurable Parameters:
+ spec.type: Options : NodePort, ClusterIP, LoadBalancer
+ spec.ports: Edit / add ports to be handled by the service
+
+**Note: alertmanager1-deployment, alertmanager1-service are the same as
+alertmanager-deployment and alertmanager-service respectively.**
+
+CAdvisor
+^^^^^^^^^^^
+
+File: cadvisor-daemonset.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/cadvisor/cadvisor-daemonset.yaml
+
+Task: To create a cadvisor daemonset
+
+Summary: A daemonset, used to scrape data of the kubernetes cluster itself,
+its a daemonset so an instance is run on every node.
+
+Configurable Parameters:
+ spec.template.spec.ports: Port of the container
+
+
+File: cadvisor-service.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/cadvisor/cadvisor-service.yaml
+
+Task: To create a cadvisor service
+
+Summary: A ClusterIP service for cadvisor to communicate with prometheus
+
+Configurable Parameters:
+ spec.ports: Add / Edit ports
+
+
+Collectd Exporter
+^^^^^^^^^^^^^^^^^^^^
+
+File: collectd-exporter-deployment.yaml
+''''''''''''''''''''''''''''''''''''''''''
+Path : monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml
+
+Task: To create a collectd replica
+
+Summary: A deployment, acts as receiver for collectd data sent by client machines,
+prometheus pulls data from this exporter
+
+Configurable Parameters:
+ spec.template.spec.ports: Port of the container
+
+
+File: collectd-exporter.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/collectd-exporter/collectd-exporter.yaml
+
+Task: To create a collectd service
+
+Summary: A NodePort service for collectd-exporter to hold data for prometheus
+to scrape
+
+Configurable Parameters:
+ spec.ports: Add / Edit ports
+
+
+Grafana
+^^^^^^^^^
+
+File: grafana-datasource-config.yaml
+''''''''''''''''''''''''''''''''''''''''''
+Path : monitoring/files/grafana/grafana-datasource-config.yaml
+
+Task: To create config file for grafana
+
+Summary: A configmap, adds prometheus datasource in grafana
+
+
+File: grafana-deployment.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/grafana/grafana-deployment.yaml
+
+Task: To create a grafana deployment
+
+Summary: The grafana deployment creates a single replica of grafana,
+with preconfigured prometheus datasource.
+
+Configurable Parameters:
+ spec.template.spec.ports: Edit ports
+ spec.template.spec.env: Add / Edit environment variables
+
+
+File: grafana-pv.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/grafana/grafana-pv.yaml
+
+Task: To create a persistent volume for grafana
+
+Summary: A persistent volume for grafana.
+
+Configurable Parameters:
+ spec.capacity.storage: Increase / decrease size
+ spec.accessModes: To change the way PV is accessed.
+ spec.nfs.server: To change the ip address of NFS server
+ spec.nfs.path: To change the path of the server
+
+
+File: grafana-pvc.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/grafana/grafana-pvc.yaml
+
+Task: To create a persistent volume claim for grafana
+
+Summary: A persistent volume claim for grafana.
+
+Configurable Parameters:
+ spec.resources.requests.storage: Increase / decrease size
+
+
+File: grafana-service.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/grafana/grafana-service.yaml
+
+Task: To create a service for grafana
+
+Summary: A Nodeport type of service, so that users actually connect to,
+view the dashboard / UI.
+
+Configurable Parameters:
+ spec.type: Options : NodePort, ClusterIP, LoadBalancer
+ spec.ports: Edit / add ports to be handled by the service
+
+
+Kube State Metrics
+^^^^^^^^^^^^^^^^^^^^
+
+File: kube-state-metrics-deployment.yaml
+''''''''''''''''''''''''''''''''''''''''''
+Path : monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml
+
+Task: To create a kube-state-metrics instance
+
+Summary: A deployment, used to collect metrics of the kubernetes cluster iteself
+
+Configurable Parameters:
+ spec.template.spec.containers.ports: Port of the container
+
+
+File: kube-state-metrics-service.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml
+
+Task: To create a collectd service
+
+Summary: A NodePort service for collectd-exporter to hold data for prometheus
+to scrape
+
+Configurable Parameters:
+ spec.ports: Add / Edit ports
+
+
+Node Exporter
+^^^^^^^^^^^^^^^
+
+File: node-exporter-daemonset.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/node-exporter/node-exporter-daemonset.yaml
+
+Task: To create a node exporter daemonset
+
+Summary: A daemonset, used to scrape data of the host machines / node,
+its a daemonset so an instance is run on every node.
+
+Configurable Parameters:
+ spec.template.spec.ports: Port of the container
+
+
+File: node-exporter-service.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/node-exporter/node-exporter-service.yaml
+
+Task: To create a node exporter service
+
+Summary: A ClusterIP service for node exporter to communicate with Prometheus
+
+Configurable Parameters:
+ spec.ports: Add / Edit ports
+
+
+Prometheus
+^^^^^^^^^^^^^
+
+File: prometheus-config.yaml
+''''''''''''''''''''''''''''''''''''''''''
+Path : monitoring/files/prometheus/prometheus-config.yaml
+
+Task: To create a config file for Prometheus
+
+Summary: A configmap, adds alert rules.
+
+Configurable Parameters:
+ data.alert.rules: Add / Edit alert rules
+
+
+File: prometheus-deployment.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/prometheus/prometheus-deployment.yaml
+
+Task: To create a Prometheus deployment
+
+Summary: The Prometheus deployment creates a single replica of Prometheus,
+with preconfigured Prometheus datasource.
+
+Configurable Parameters:
+ spec.template.spec.affinity: To change the node affinity,
+ make sure only 1 instance of prometheus is
+ running on 1 node.
+
+ spec.template.spec.ports: Add / Edit container port
+
+
+File: prometheus-pv.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/prometheus/prometheus-pv.yaml
+
+Task: To create a persistent volume for Prometheus
+
+Summary: A persistent volume for Prometheus.
+
+Configurable Parameters:
+ spec.capacity.storage: Increase / decrease size
+ spec.accessModes: To change the way PV is accessed.
+ spec.hostpath.path: To change the path of the volume
+
+
+File: prometheus-pvc.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/prometheus/prometheus-pvc.yaml
+
+Task: To create a persistent volume claim for Prometheus
+
+Summary: A persistent volume claim for Prometheus.
+
+Configurable Parameters:
+ spec.resources.requests.storage: Increase / decrease size
+
+
+File: prometheus-service.yaml
+'''''''''''''''''''''''''''''''''
+Path : monitoring/files/prometheus/prometheus-service.yaml
+
+Task: To create a service for prometheus
+
+Summary: A Nodeport type of service, prometheus native dashboard
+available here.
+
+Configurable Parameters:
+ spec.type: Options : NodePort, ClusterIP, LoadBalancer
+ spec.ports: Edit / add ports to be handled by the service
+
+
+File: main-prometheus-server.yaml
+'''''''''''''''''''''''''''''''''''
+Path: monitoring/files/prometheus/main-prometheus-service.yaml
+
+Task: A service that connects both prometheus instances.
+
+Summary: A Nodeport service for other services to connect to the Prometheus cluster.
+As HA Prometheus needs to independent instances of Prometheus scraping the same inputs
+having the same configuration
+
+**Note: prometheus-deployment, prometheus1-service are the same as
+prometheus-deployment and prometheus-service respectively.**
+
+
+Ansible Client Roles
+----------------------
+
+Role: Collectd
+~~~~~~~~~~~~~~~~~~
+
+File: main.yml
+^^^^^^^^^^^^^^^^
+Path: collectd/tasks/main.yaml
+
+Task: Install collectd along with prerequisites
+
+Associated template file:
+
+- collectd.conf.j2
+Path: collectd/files/collectd.conf.j2
+
+Summary: Edit this file to change the default configuration to
+be installed on the client's machine
diff --git a/docs/lma/metrics/images/dataflow.png b/docs/lma/metrics/images/dataflow.png
new file mode 100644
index 00000000..ca1ec908
--- /dev/null
+++ b/docs/lma/metrics/images/dataflow.png
Binary files differ
diff --git a/docs/lma/metrics/images/setup.png b/docs/lma/metrics/images/setup.png
new file mode 100644
index 00000000..ce6a1274
--- /dev/null
+++ b/docs/lma/metrics/images/setup.png
Binary files differ
diff --git a/docs/lma/metrics/userguide.rst b/docs/lma/metrics/userguide.rst
new file mode 100644
index 00000000..0ee4a238
--- /dev/null
+++ b/docs/lma/metrics/userguide.rst
@@ -0,0 +1,230 @@
+=================
+Metrics
+=================
+Table of Contents
+=================
+.. contents::
+.. section-numbering::
+
+Setup
+=======
+
+Prerequisites
+-------------------------
+- Require 3 VMs to setup K8s
+- ``$ sudo yum install ansible``
+- ``$ pip install openshift pyyaml kubernetes`` (required for ansible K8s module)
+- Update IPs in all these files (if changed)
+ - ``ansible-server/group_vars/all.yml`` (IP of apiserver and hostname)
+ - ``ansible-server/hosts`` (IP of VMs to install)
+ - ``ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml`` (IP of NFS-Server)
+ - ``ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml`` (IP of alert-receiver)
+
+Setup Structure
+---------------
+.. image:: images/setup.png
+
+Installation - Client Side
+----------------------------
+
+Nodes
+`````
+- **Node1** = 10.10.120.21
+- **Node4** = 10.10.120.24
+
+How installation is done?
+`````````````````````````
+Ansible playbook available in ``tools/lma/ansible-client`` folder
+
+- ``cd tools/lma/ansible-client``
+- ``ansible-playbook setup.yaml``
+
+This deploys collectd and configures it to send data to collectd exporter
+configured at 10.10.120.211 (ip address of current instance of collectd-exporter)
+Please make appropriate changes in the config file present in ``tools/lma/ansible-client/roles/collectd/files/``
+
+Installation - Server Side
+----------------------------
+
+Nodes
+``````
+
+Inside Jumphost - POD12
+ - **VM1** = 10.10.120.211
+ - **VM2** = 10.10.120.203
+ - **VM3** = 10.10.120.204
+
+
+How installation is done?
+`````````````````````````
+**Using Ansible:**
+ - **K8s**
+ - **Prometheus:** 2 independent deployments
+ - **Alertmanager:** 2 independent deployments (cluster peers)
+ - **Grafana:** 1 Replica deployment
+ - **cAdvisor:** 1 daemonset, i.e 3 replicas, one on each node
+ - **collectd-exporter:** 1 Replica
+ - **node-exporter:** 1 statefulset with 3 replicas
+ - **kube-state-metrics:** 1 deployment
+ - **NFS Server:** at each VM to store grafana data at following path
+ - ``/usr/share/monitoring_data/grafana``
+
+How to setup?
+`````````````
+- **To setup K8s cluster, EFK and PAG:** Run the ansible-playbook ``ansible/playbooks/setup.yaml``
+- **To clean everything:** Run the ansible-playbook ``ansible/playbooks/clean.yaml``
+
+Do we have HA?
+````````````````
+Yes
+
+Configuration
+=============
+
+K8s
+---
+Path to all yamls (Server Side)
+````````````````````````````````
+``tools/lma/ansible-server/roles/monitoring/files/``
+
+K8s namespace
+`````````````
+``monitoring``
+
+Configuration
+---------------------------
+
+Serivces and Ports
+``````````````````````````
+
+Services and their ports are listed below,
+one can go to IP of any node on the following ports,
+service will correctly redirect you
+
+
+ ====================== =======
+ Service Port
+ ====================== =======
+ Prometheus 30900
+ Prometheus1 30901
+ Main-Prometheus 30902
+ Alertmanager 30930
+ Alertmanager1 30931
+ Grafana 30000
+ Collectd-exporter 30130
+ ====================== =======
+
+How to change Configuration?
+------------------------------
+- Ports, names of the containers, pretty much every configuration can be modified by changing the required values in the respective yaml files (``/tools/lma/ansible-server/roles/monitoring/``)
+- For metrics, on the client's machine, edit the collectd's configuration (jinja2 template) file, and add required plugins (``/tools/lma/ansible-client/roles/collectd/files/collectd.conf.j2``).
+ For more details refer `this <https://collectd.org/wiki/index.php/First_steps>`_
+
+Where to send metrics?
+------------------------
+
+Metrics are sent to collectd exporter.
+UDP packets are sent to port 38026
+(can be configured and checked at
+``tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml``)
+
+Data Management
+================================
+
+DataFlow:
+--------------
+.. image:: images/dataFlow.png
+
+Where is the data stored now?
+----------------------------------
+ - Grafana data (including dashboards) ==> On master, at ``/usr/share/monitoring_data/grafana`` (its accessed by Presistent volume via NFS)
+ - Prometheus Data ==> On VM2 and VM3, at /usr/share/monitoring_data/prometheus
+
+ **Note: Promethei data also are independent of each other, a shared data solution gave errors**
+
+Do we have backup of data?
+-------------------------------
+ Promethei even though independent scrape same targets,
+ have same alert rules, therefore generate very similar data.
+
+ Grafana's NFS part of the data has no backup
+ Dashboards' json are available in the ``/tools/lma/metrics/dashboards`` directory
+
+When containers are restarted, the data is still accessible?
+-----------------------------------------------------------------
+ Yes, unless the data directories are deleted ``(/usr/share/monitoring_data/*)`` from each node
+
+Alert Management
+==================
+
+Configure Alert receiver
+--------------------------
+- Go to file ``/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml``
+- Under the config.yml section under receivers, add, update, delete receivers
+- Currently ip of unified alert receiver is used.
+- Alertmanager supports multiple types of receivers, you can get a `list here <https://prometheus.io/docs/alerting/latest/configuration/>`_
+
+Add new alerts
+--------------------------------------
+- Go to file ``/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml``
+- Under the data section alert.rules file is mounted on the config-map.
+- In this file alerts are divided in 4 groups, namely:
+ - targets
+ - host and hardware
+ - container
+ - kubernetes
+- Add alerts under exisiting group or add new group. Please follow the structure of the file for adding new group
+- To add new alert:
+ - Use the following structure:
+
+ alert: alertname
+
+ expr: alert rule (generally promql conditional query)
+
+ for: time-range (eg. 5m, 10s, etc, the amount of time the condition needs to be true for the alert to be triggered)
+
+ labels:
+
+ severity: critical (other severity options and other labels can be added here)
+
+ type: hardware
+
+ annotations:
+
+ summary: <summary of the alert>
+
+ description: <descibe the alert here>
+
+- For an exhaustive alerts list you can have a look `here <https://awesome-prometheus-alerts.grep.to/>`_
+
+Troubleshooting
+===============
+No metrics received in grafana plot
+---------------------------------------------
+- Check if all configurations are correctly done.
+- Go to main-prometheus's port and any one VMs' ip, and check if prometheus is getting the metrics
+- If prometheus is getting them, read grafana's logs (``kubectl -n monitoring logs <name_of_grafana_pod>``)
+- Else, have a look at collectd exporter's metrics endpoint (eg. 10.10.120.211:30103/metrics)
+- If collectd is getting them, check prometheus's config file if collectd's ip is correct over there.
+- Else ssh to master, check which node collectd-exporter is scheduled (lets say vm2)
+- Now ssh to vm2
+- Use ``tcpdump -i ens3 #the interface used to connect to the internet > testdump``
+- Grep your client node's ip and check if packets are reaching our monitoring cluster (``cat testdump | grep <ip of client>``)
+- Ideally you should see packets reaching the node, if so please see if the collectd-exporter is running correctly, check its logs.
+- If no packets are received, error is on the client side, check collectd's config file and make sure correct collectd-exporter ip is used in the ``<network>`` section.
+
+If no notification received
+---------------------------
+- Go to main-prometheus's port and any one VMs' ip,(eg. 10.10.120.211:30902) and check if prometheus is getting the metrics
+- If no, read "No metrics received in grafana plot" section, else read ahead.
+- Check IP of alert-receiver, you can see this by going to alertmanager-ip:port and check if alertmanager is configured correctly.
+- If yes, paste the alert rule in the prometheus' query-box and see if any metric staisfy the condition.
+- You may need to change alert rules in the alert.rules section of prometheus-config.yaml if there was a bug in the alert's rule. (please read the "Add new alerts" section for detailed instructions)
+
+Reference
+=========
+- `Prometheus K8S deployment <https://www.metricfire.com/blog/how-to-deploy-prometheus-on-kubernetes/>`_
+- `HA Prometheus <https://prometheus.io/docs/introduction/faq/#can-prometheus-be-made-highly-available>`_
+- `Data Flow Diagram <https://drive.google.com/file/d/1D--LXFqU_H-fqpD57H3lJFOqcqWHoF0U/view?usp=sharing>`_
+- `Collectd Configuration <https://docs.opnfv.org/en/stable-fraser/submodules/barometer/docs/release/userguide/docker.userguide.html#build-the-collectd-docker-image>`_
+- `Alertmanager Rule Config <https://awesome-prometheus-alerts.grep.to/>`_
diff --git a/tools/lma/ansible-client/ansible.cfg b/tools/lma/ansible-client/ansible.cfg
new file mode 100644
index 00000000..307ef457
--- /dev/null
+++ b/tools/lma/ansible-client/ansible.cfg
@@ -0,0 +1,17 @@
+[defaults]
+inventory = ./hosts
+host_key_checking = false
+
+# additional path to search for roles in
+roles_path = roles
+
+# enable logging
+log_path = ./ansible.log
+
+[privilege_escalation]
+become=True
+become_method=sudo
+become_user=root
+
+[ssh_connection]
+pipelining = True
diff --git a/tools/lma/ansible-client/hosts b/tools/lma/ansible-client/hosts
new file mode 100644
index 00000000..eba586ce
--- /dev/null
+++ b/tools/lma/ansible-client/hosts
@@ -0,0 +1,2 @@
+[all]
+127.0.0.1 ansible_connection=local
diff --git a/tools/lma/ansible-client/playbooks/clean.yaml b/tools/lma/ansible-client/playbooks/clean.yaml
new file mode 100644
index 00000000..4f77b062
--- /dev/null
+++ b/tools/lma/ansible-client/playbooks/clean.yaml
@@ -0,0 +1,25 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#clean td-agent
+- name: clean td-agent
+ hosts: all
+ roles:
+ - clean-td-agent
+
+#clean collectd
+- name: clean collectd
+ hosts: all
+ roles:
+ - clean-collectd
diff --git a/tools/lma/ansible-client/playbooks/setup.yaml b/tools/lma/ansible-client/playbooks/setup.yaml
new file mode 100644
index 00000000..c79ee347
--- /dev/null
+++ b/tools/lma/ansible-client/playbooks/setup.yaml
@@ -0,0 +1,28 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#setup td-agent
+- name: setup td-agent
+ hosts: all
+ roles:
+ - td-agent
+
+- name: setup collectd
+ hosts: all
+ vars_prompt:
+ - name: host_name
+ prompt: "Enter host_name for collectd configuration"
+ private: no
+ roles:
+ - collectd
diff --git a/tools/lma/ansible-client/roles/clean-collectd/main.yml b/tools/lma/ansible-client/roles/clean-collectd/main.yml
new file mode 100644
index 00000000..97100cad
--- /dev/null
+++ b/tools/lma/ansible-client/roles/clean-collectd/main.yml
@@ -0,0 +1,44 @@
+# Copyright 2020 Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+- hosts: localhost
+
+ tasks:
+ - name: Check and install dependencies
+ yum:
+ name: docker
+ state: present
+
+ - name: Install python sdk
+ yum:
+ name: python-docker-py
+ state: present
+
+ - name: Stopping collectd container
+ docker_container:
+ name: collectd
+ state: stopped
+
+ - name: Removing collectd container
+ docker_container:
+ name: collectd
+ state: absent
+
+ # Removes the image (not recommended)
+ # - name: Remove image
+ # docker_image:
+ # state: absent
+ # name: opnfv/barometer-collectd
+ # tag: latest
diff --git a/tools/lma/ansible-client/roles/clean-td-agent/tasks/main.yml b/tools/lma/ansible-client/roles/clean-td-agent/tasks/main.yml
new file mode 100644
index 00000000..7c59c698
--- /dev/null
+++ b/tools/lma/ansible-client/roles/clean-td-agent/tasks/main.yml
@@ -0,0 +1,28 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+#TD-agent uninstallation
+- name: TD-agent Uninstallation
+ yum:
+ name: td-agent
+ state: absent
+
+- name: removing folder
+ file:
+ path: "{{ item }}"
+ state: absent
+ with_items:
+ - /etc/td-agent/
+ - /var/log/td-agent/
diff --git a/tools/lma/ansible-client/roles/collectd/files/collectd.conf.j2 b/tools/lma/ansible-client/roles/collectd/files/collectd.conf.j2
new file mode 100644
index 00000000..ba953e3a
--- /dev/null
+++ b/tools/lma/ansible-client/roles/collectd/files/collectd.conf.j2
@@ -0,0 +1,44 @@
+Hostname "{{ host_name }}"
+Interval 10
+LoadPlugin intel_rdt
+LoadPlugin processes
+LoadPlugin interface
+LoadPlugin network
+LoadPlugin ovs_stats
+LoadPlugin cpu
+LoadPlugin memory
+#LoadPlugin csv
+#LoadPlugin write_http
+#LoadPlugin dpdkstat
+##############################################################################
+# Plugin configuration #
+##############################################################################
+<Plugin processes>
+ ProcessMatch "ovs-vswitchd" "ovs-vswitchd"
+ ProcessMatch "ovsdb-server" "ovsdb-server"
+ ProcessMatch "collectd" "collectd"
+</Plugin>
+
+<Plugin cpu>
+ ReportByCpu true
+ ReportByState true
+ ValuesPercentage true
+ ReportNumCpu true
+ ReportGuestState false
+ SubtractGuestState false
+</Plugin>
+
+<Plugin network>
+ Server "10.10.120.211" "30826"
+</Plugin>
+
+<Plugin ovs_stats>
+ Port "6640"
+ Address "127.0.0.1"
+ Socket "/usr/local/var/run/openvswitch/db.sock"
+ Bridges "vsperf-br0"
+</Plugin>
+
+<Plugin "intel_rdt">
+ Cores "2" "4-5" "6-7" "8" "9" "22" "23" "24" "25" "26" "27"
+</Plugin>
diff --git a/tools/lma/ansible-client/roles/collectd/tasks/main.yml b/tools/lma/ansible-client/roles/collectd/tasks/main.yml
new file mode 100644
index 00000000..0befb22b
--- /dev/null
+++ b/tools/lma/ansible-client/roles/collectd/tasks/main.yml
@@ -0,0 +1,60 @@
+# Copyright 2020 Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+
+# Dependency check
+- name: Check and install dependencies
+ yum:
+ name: ['docker', 'python-docker-py']
+ state: present
+
+- name: Install pip
+ yum:
+ name: python-pip
+ state: present
+
+- name: install docker-py
+ pip: name=docker-py
+
+- name: Cloning barometer
+ git:
+ repo: https://gerrit.opnfv.org/gerrit/barometer
+ dest: /tmp/barometer
+
+- name: Create Folder
+ file:
+ path: /tmp/barometer/docker/src/collectd_sample_configs
+ state: directory
+
+# Build collectd
+- name: Downlaod and Build Image
+ command: chdir=/tmp/ {{ item }}
+ become: true
+ with_items:
+ - docker build -t opnfv/barometer-collectd -f barometer/docker/barometer-collectd/Dockerfile barometer/docker/barometer-collectd
+
+# Configuring collectd0
+- name: Ensure collectd is configured
+ template:
+ src: ../files/collectd.conf.j2
+ dest: /tmp/barometer/docker/src/collectd_sample_configs/collectd.conf
+
+# Running Collectd container #####################
+- name: Running collectd
+ command : chdir=/tmp/ {{ item }}
+ become: true
+ with_items:
+ - docker run -tid --name collectd --net=host -v /tmp/barometer/docker/src/collectd_sample_configs:/opt/collectd/etc/collectd.conf.d -v /var/run:/var/run -v /tmp:/tmp --privileged opnfv/barometer-collectd /run_collectd.sh
+ - docker ps
diff --git a/tools/lma/ansible-client/roles/td-agent/files/td-agent.conf b/tools/lma/ansible-client/roles/td-agent/files/td-agent.conf
new file mode 100644
index 00000000..9d656e65
--- /dev/null
+++ b/tools/lma/ansible-client/roles/td-agent/files/td-agent.conf
@@ -0,0 +1,63 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+<source>
+ @type tail
+ path /tmp/result*/*.log, /tmp/result*/*.dat, /tmp/result*/*.csv, /tmp/result*/stc-liveresults.dat.*, /var/log/userspace*.log, /var/log/sriovdp/*.log.*, /var/log/pods/**/*.log
+ path_key log_path
+# read_from_head true
+
+ <parse>
+ @type regexp
+ expression ^(?<msg>.*)$
+ </parse>
+
+ tag log.test
+</source>
+
+<filter log.test>
+ @type record_transformer
+ enable_ruby
+ <record>
+ host "#{Socket.gethostname}"
+ </record>
+</filter>
+
+
+<filter log.test>
+ @type parser
+ key_name log_path
+ reserve_data true
+ <parse>
+ @type regexp
+ expression /.*\/(?<file>.*)/
+ </parse>
+</filter>
+
+<match log.test>
+ @type copy
+
+ <store>
+ @type forward
+ send_timeout 10s
+ <server>
+ host 10.10.120.211
+ port 32224
+ </server>
+ </store>
+
+ <store>
+ @type stdout
+ </store>
+</match> \ No newline at end of file
diff --git a/tools/lma/ansible-client/roles/td-agent/tasks/main.yml b/tools/lma/ansible-client/roles/td-agent/tasks/main.yml
new file mode 100644
index 00000000..c7f50765
--- /dev/null
+++ b/tools/lma/ansible-client/roles/td-agent/tasks/main.yml
@@ -0,0 +1,30 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#TD-agent setup
+- name: TD-agent installation
+ shell: curl -L https://toolbelt.treasuredata.com/sh/install-redhat-td-agent4.sh | sh
+
+#replace the config file
+- name: Replace the content of my file
+ copy:
+ src: ../files/td-agent.conf
+ dest: /etc/td-agent/td-agent.conf
+
+#start the service
+- name: Starting and Enabling the TD-agent services
+ service:
+ name: td-agent
+ state: started
+ enabled: yes
diff --git a/tools/lma/ansible-server/ansible.cfg b/tools/lma/ansible-server/ansible.cfg
new file mode 100644
index 00000000..307ef457
--- /dev/null
+++ b/tools/lma/ansible-server/ansible.cfg
@@ -0,0 +1,17 @@
+[defaults]
+inventory = ./hosts
+host_key_checking = false
+
+# additional path to search for roles in
+roles_path = roles
+
+# enable logging
+log_path = ./ansible.log
+
+[privilege_escalation]
+become=True
+become_method=sudo
+become_user=root
+
+[ssh_connection]
+pipelining = True
diff --git a/tools/lma/ansible-server/group_vars/all.yml b/tools/lma/ansible-server/group_vars/all.yml
new file mode 100644
index 00000000..b0725ff5
--- /dev/null
+++ b/tools/lma/ansible-server/group_vars/all.yml
@@ -0,0 +1,27 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#apiserver advertise address
+ad_addr: 10.10.120.211
+
+#pod network cidr
+pod_cidr: 192.168.0.0/16
+
+#token generated by master
+token_file: join_token
+
+#give hostname
+vm3: 'vm3'
+vm2: 'vm2'
+vm1: 'vm1'
diff --git a/tools/lma/ansible-server/hosts b/tools/lma/ansible-server/hosts
new file mode 100644
index 00000000..0a13d754
--- /dev/null
+++ b/tools/lma/ansible-server/hosts
@@ -0,0 +1,12 @@
+[all]
+10.10.120.211 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+10.10.120.203 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+10.10.120.204 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+
+
+[master]
+10.10.120.211 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+
+[worker-nodes]
+10.10.120.203 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+10.10.120.204 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd \ No newline at end of file
diff --git a/tools/lma/ansible-server/playbooks/clean.yaml b/tools/lma/ansible-server/playbooks/clean.yaml
new file mode 100644
index 00000000..b4da66da
--- /dev/null
+++ b/tools/lma/ansible-server/playbooks/clean.yaml
@@ -0,0 +1,52 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# clean monitoring
+- name: Clean PAG setup
+ hosts: master
+ roles:
+ - clean-monitoring
+
+#clean logging
+- name: Clean EFK setup
+ hosts: master
+ roles:
+ - clean-logging
+
+#IF KUBELET IS RUNNING THEN RUN THIS
+#clean k8s cluster
+- name: Clean k8s cluster
+ hosts: master
+ roles:
+ - clean-k8s-cluster
+
+#reset worker-nodes
+- name: Reset worker-nodes
+ hosts: worker-nodes
+ roles:
+ - clean-k8s-worker-reset
+
+#unistall pre-requisites for k8s
+- name: unistall pre-requisites for k8s
+ hosts: all
+ roles:
+ - clean-k8s-pre
+
+#*************************************************************************************************************
+#THIS WILL DELETE DATA OF ELASTICSEARCH
+#*************************************************************************************************************
+# - name: Clean nfs server
+# hosts: all
+# roles:
+# - clean-nfs
diff --git a/tools/lma/ansible-server/playbooks/setup.yaml b/tools/lma/ansible-server/playbooks/setup.yaml
new file mode 100644
index 00000000..1f5ed1f5
--- /dev/null
+++ b/tools/lma/ansible-server/playbooks/setup.yaml
@@ -0,0 +1,44 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#Pre-requisites for k8s and NFS server
+- name: Pre-requisites for k8s in all the nodes
+ hosts: all
+ roles:
+ - k8s-pre
+ - nfs
+
+#master setup for k8s
+- name: master setup for k8s
+ hosts: master
+ roles:
+ - k8s-master
+
+#worker setup for k8s
+- name: worker setup for k8s
+ hosts: worker-nodes
+ roles:
+ - k8s-worker
+
+#EFK setup in k8s
+- name: EFK setup in k8s
+ hosts: master
+ roles:
+ - logging
+
+#PAG setup in k8s
+- name: PAG setup in k8s
+ hosts: master
+ roles:
+ - monitoring
diff --git a/tools/lma/ansible-server/roles/clean-k8s-cluster/tasks/main.yml b/tools/lma/ansible-server/roles/clean-k8s-cluster/tasks/main.yml
new file mode 100644
index 00000000..83ac086d
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-k8s-cluster/tasks/main.yml
@@ -0,0 +1,34 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#check kubelet is running or not
+- name: check for kubelet
+ shell: "systemctl status kubelet"
+ register: _svc_kubelet
+ failed_when: _svc_kubelet.rc != 0 and ("could not be found" not in _svc_kubelet.stderr)
+
+#IF KUBELET IS RUNNING, THEN
+#reset k8s
+- name: reset k8s
+ shell: |
+ kubectl drain {{vm3}} --delete-local-data --force --ignore-daemonsets
+ kubectl drain {{vm2}} --delete-local-data --force --ignore-daemonsets
+ kubectl drain {{vm1}} --delete-local-data --force --ignore-daemonsets
+ kubectl delete node {{vm3}}
+ kubectl delete node {{vm2}}
+ kubectl delete node {{vm1}}
+ sudo kubeadm reset -f
+ sudo rm $HOME/.kube/config
+ when: "_svc_kubelet.rc == 0"
+
diff --git a/tools/lma/ansible-server/roles/clean-k8s-pre/tasks/main.yml b/tools/lma/ansible-server/roles/clean-k8s-pre/tasks/main.yml
new file mode 100644
index 00000000..6d12bd5f
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-k8s-pre/tasks/main.yml
@@ -0,0 +1,65 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+#Uninstalling K8s
+- name: Uninstalling K8s
+ yum:
+ name: ['kubeadm', 'kubectl', 'kubelet', 'docker-ce']
+ state: absent
+
+#Enabling Swap
+- name: Enabling Swap on all nodes
+ shell: swapon -a
+ ignore_errors: yes
+
+#Uncommenting Swap entries
+- name: Uncommenting Swap entries in /etc/fstab
+ replace:
+ path: /etc/fstab
+ regexp: '^# /(.*swap.*)'
+ replace: '\1'
+
+
+#Starting firewalld
+- name: 'Starting firewall'
+ service:
+ name: firewalld
+ state: started
+ enabled: yes
+
+# Enabling SELinux
+- name: Enabling SELinux on all nodes
+ shell: |
+ setenforce 1
+ sudo sed -i 's/^SELINUX=permissive$/SELINUX=enforcing/' /etc/selinux/config
+
+#removing Docker repo
+- name: removing Docker repo
+ command: yum-config-manager --disable docker-ce-stable
+
+#removing K8s repo
+- name: removing repository details in Kubernetes repo file.
+ blockinfile:
+ path: /etc/yum.repos.d/kubernetes.repo
+ state: absent
+ block: |
+ [kubernetes]
+ name=Kubernetes
+ baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+ enabled=1
+ gpgcheck=1
+ repo_gpgcheck=1
+ gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg
+ https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
diff --git a/tools/lma/ansible-server/roles/clean-k8s-worker-reset/tasks/main.yml b/tools/lma/ansible-server/roles/clean-k8s-worker-reset/tasks/main.yml
new file mode 100644
index 00000000..3ba9c9ea
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-k8s-worker-reset/tasks/main.yml
@@ -0,0 +1,26 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#check kubelet is running or not
+- name: check for kubelet
+ shell: "systemctl status kubelet"
+ register: _svc_kubelet
+ failed_when: _svc_kubelet.rc != 0 and ("could not be found" not in _svc_kubelet.stderr)
+
+#IF KUBELET IS RUNNING, THEN
+#reset k8s
+- name: reset k8s
+ command: kubeadm reset -f
+ when: "_svc_kubelet.rc == 0"
+
diff --git a/tools/lma/ansible-server/roles/clean-logging/tasks/main.yml b/tools/lma/ansible-server/roles/clean-logging/tasks/main.yml
new file mode 100644
index 00000000..259065ed
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-logging/tasks/main.yml
@@ -0,0 +1,193 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#Deleting EFK setup from k8s cluster
+
+#check kubelet is running or not
+- name: check for kubelet
+ shell: "systemctl status kubelet"
+ register: _svc_kubelet
+ failed_when: _svc_kubelet.rc != 0 and ("could not be found" not in _svc_kubelet.stderr)
+
+#***********************************************************************************************************
+#copy all yaml to /tmp/files/
+#***********************************************************************************************************
+- name: copy all yaml to /tmp/files/
+ copy:
+ src: ../../logging/files/
+ dest: /tmp/files/
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop elastalert
+#***********************************************************************************************************
+- name: Delete elastalert config configmap
+ k8s:
+ state: absent
+ src: /tmp/files/elastalert/ealert-conf-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete elastalert key configmap
+ k8s:
+ state: absent
+ src: /tmp/files/elastalert/ealert-key-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete elastalert rule configmap
+ k8s:
+ state: absent
+ src: /tmp/files/elastalert/ealert-rule-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete elastalert pod
+ k8s:
+ state: absent
+ src: /tmp/files/elastalert/elastalert.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop fluentd
+#***********************************************************************************************************
+
+- name: Delete fluentd service
+ k8s:
+ state: absent
+ src: /tmp/files/fluentd/fluent-service.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete fluentd configmap
+ k8s:
+ state: absent
+ src: /tmp/files/fluentd/fluent-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete fluentd pod
+ k8s:
+ state: absent
+ src: /tmp/files/fluentd/fluent.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop nginx
+#***********************************************************************************************************
+- name: Delete nginx service
+ k8s:
+ state: absent
+ src: /tmp/files/nginx/nginx-service.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete nginx configmap
+ k8s:
+ state: absent
+ src: /tmp/files/nginx/nginx-conf-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete nginx key configmap
+ k8s:
+ state: absent
+ src: /tmp/files/nginx/nginx-key-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete nginx pod
+ k8s:
+ state: absent
+ src: /tmp/files/nginx/nginx.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop Kibana
+#***********************************************************************************************************
+- name: Stopping Kibana
+ k8s:
+ state: absent
+ src: /tmp/files/kibana/kibana.yaml
+ namespace: logging
+ ignore_errors: yes
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop Elasticsearch
+#***********************************************************************************************************
+- name: Stopping Elasticsearch
+ k8s:
+ state: absent
+ src: /tmp/files/elasticsearch/elasticsearch.yaml
+ namespace: logging
+ ignore_errors: yes
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop Elasticsearch operator
+#***********************************************************************************************************
+- name: Stopping Elasticsearch operator
+ shell: kubectl delete -f https://download.elastic.co/downloads/eck/1.2.0/all-in-one.yaml
+ ignore_errors: yes
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Delete Persistent Volume
+#***********************************************************************************************************
+- name: Deleting Persistent Volume
+ k8s:
+ state: absent
+ src: /tmp/files/persistentVolume.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Delete Storage Class
+#***********************************************************************************************************
+- name: Deleting Storage Class
+ k8s:
+ state: absent
+ src: /tmp/files/storageClass.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Delete Namespace
+#***********************************************************************************************************
+- name: Deleting Namespace
+ k8s:
+ state: absent
+ src: /tmp/files/namespace.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#removing /tmp/files
+#***********************************************************************************************************
+- name: Removing /tmp/files
+ file:
+ path: "/tmp/files"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/clean-monitoring/tasks/main.yml b/tools/lma/ansible-server/roles/clean-monitoring/tasks/main.yml
new file mode 100644
index 00000000..49943ec0
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-monitoring/tasks/main.yml
@@ -0,0 +1,48 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+#Deleting PAG setup from k8s cluster
+
+#check kubelet is running or not
+- name: check for kubelet
+ shell: "systemctl status kubelet"
+ register: _svc_kubelet
+ failed_when: _svc_kubelet.rc != 0 and ("could not be found" not in _svc_kubelet.stderr)
+
+#***********************************************************************************************************
+#copy yaml to /tmp/files/
+#***********************************************************************************************************
+- name: copy namespace yaml to /tmp/files/
+ copy:
+ src: ../../monitoring/files/monitoring-namespace.yaml
+ dest: /tmp/monitoring-namespace.yaml
+
+#***********************************************************************************************************
+#Deleting Namespace
+#***********************************************************************************************************
+- name: Deleting Namespace
+ k8s:
+ state: absent
+ src: /tmp/monitoring-namespace.yaml
+ namespace: monitoring
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#removing /tmp/files
+#***********************************************************************************************************
+- name: Removing /tmp/monitoring-namespace.yaml
+ file:
+ path: "/tmp/monitoring-namespace.yaml"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/clean-nfs/tasks/main.yml b/tools/lma/ansible-server/roles/clean-nfs/tasks/main.yml
new file mode 100644
index 00000000..157db849
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-nfs/tasks/main.yml
@@ -0,0 +1,44 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#Edit /etc/export
+- name: Edit /etc/export file for NFS
+ lineinfile:
+ path: /etc/exports
+ line: "{{item.line}}"
+ state: absent
+ with_items:
+ - {line: "/srv/nfs/master *(rw,sync,no_root_squash,no_subtree_check)"}
+ - {line: "/srv/nfs/data *(rw,sync,no_root_squash,no_subtree_check)"}
+ - {line: "/usr/share/monitoring_data/grafana *(rw,sync,no_root_squash,no_subtree_check)"}
+
+#uninstall NFS server
+- name: Uninstalling NFS server utils
+ yum:
+ name: nfs-utils
+ state: absent
+
+#remove Elasticsearch data
+- name: Removing Directory for elasticsearch
+ file:
+ path: "/srv/nfs/{{item}}"
+ state: absent
+ with_items:
+ - ['data', 'master']
+
+#remove Grafana data
+- name: Removing Directory for grafana
+ file:
+ path: "/usr/share/monitoring_data/grafana"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/k8s-master/tasks/main.yml b/tools/lma/ansible-server/roles/k8s-master/tasks/main.yml
new file mode 100644
index 00000000..edc8f10b
--- /dev/null
+++ b/tools/lma/ansible-server/roles/k8s-master/tasks/main.yml
@@ -0,0 +1,49 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#pull k8s images
+- name: Pulling images required for setting up a Kubernetes cluster
+ shell: kubeadm config images pull
+
+#reset k8s
+- name: Resetting kubeadm
+ shell: kubeadm reset -f
+
+#init k8s
+- name: Initializing Kubernetes cluster
+ shell: kubeadm init --apiserver-advertise-address {{ad_addr}} --pod-network-cidr={{pod_cidr}}
+
+#Copying required files
+- name: Copying required files
+ shell: |
+ mkdir -p $HOME/.kube
+ sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config
+ sudo chown $(id -u):$(id -g) $HOME/.kube/config
+
+#get token
+- name: Storing token for future purpose.
+ shell: kubeadm token create --print-join-command
+ register: token
+
+#save token to join worker
+- name: Storing token for worker
+ local_action: copy content={{ token.stdout }} dest={{ token_file }}
+
+#install calico
+- name: Install Network Add-on
+ command: kubectl apply -f https://docs.projectcalico.org/v3.11/manifests/calico.yaml
+
+#Taint master
+- name: Taint master
+ command: kubectl taint nodes --all node-role.kubernetes.io/master-
diff --git a/tools/lma/ansible-server/roles/k8s-pre/tasks/main.yml b/tools/lma/ansible-server/roles/k8s-pre/tasks/main.yml
new file mode 100644
index 00000000..95526a28
--- /dev/null
+++ b/tools/lma/ansible-server/roles/k8s-pre/tasks/main.yml
@@ -0,0 +1,72 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+#Disabling Swap
+- name: Disabling Swap on all nodes
+ shell: swapoff -a
+
+#Commenting Swap entries
+- name: Commenting Swap entries in /etc/fstab
+ replace:
+ path: /etc/fstab
+ regexp: '(^/.*swap*)'
+ replace: '# \1'
+
+#Stopping firewalld
+- name: 'Stopping firewall'
+ service:
+ name: firewalld
+ state: stopped
+ enabled: no
+
+#Disabling SELinux
+- name: Disabling SELinux on all nodes
+ shell: |
+ setenforce 0
+ sudo sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
+
+#installing docker
+- name: Installing Docker
+ shell: yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
+
+#Adding K8s repo
+- name: Adding repository details in Kubernetes repo file.
+ blockinfile:
+ path: /etc/yum.repos.d/kubernetes.repo
+ block: |
+ [kubernetes]
+ name=Kubernetes
+ baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+ enabled=1
+ gpgcheck=1
+ repo_gpgcheck=1
+ gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg
+ https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
+
+#installing K8s ans docker
+- name: Installing K8s
+ yum:
+ name: ['kubeadm', 'kubectl', 'kubelet', 'docker-ce']
+ state: present
+
+#Starting docker and kubelet services
+- name: Starting and Enabling the required services
+ service:
+ name: "{{ item }}"
+ state: started
+ enabled: yes
+ with_items:
+ - docker
+ - kubelet
diff --git a/tools/lma/ansible-server/roles/k8s-worker/tasks/main.yml b/tools/lma/ansible-server/roles/k8s-worker/tasks/main.yml
new file mode 100644
index 00000000..89d2b373
--- /dev/null
+++ b/tools/lma/ansible-server/roles/k8s-worker/tasks/main.yml
@@ -0,0 +1,24 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#Worker
+
+- name: Copying token to worker nodes
+ copy: src={{ token_file }} dest=join_token
+
+- name: Joining worker nodes with kubernetes master
+ shell: |
+ kubeadm reset -f
+ cat join_token | tail -1 > out.sh
+ sh out.sh
diff --git a/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-conf-cm.yaml b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-conf-cm.yaml
new file mode 100644
index 00000000..a320ef75
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-conf-cm.yaml
@@ -0,0 +1,48 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: elastalert-config
+data:
+ elastalert.yaml: |
+ # This is the folder that contains the rule yaml files
+ # Any .yaml file will be loaded as a rule
+ rules_folder: rules
+ # How often ElastAlert will query Elasticsearch
+ # The unit can be anything from weeks to seconds
+ run_every:
+ minutes: 1
+ # ElastAlert will buffer results from the most recent
+ # period of time, in case some log sources are not in real time
+ buffer_time:
+ minutes: 15
+
+ scan_subdirectories: false
+
+ # The Elasticsearch hostname for metadata writeback
+ # Note that every rule can have its own Elasticsearch host
+ es_host: logging-es-http
+ es_port: 9200
+ es_username: ${ES_USERNAME}
+ es_password: ${ES_PASSWORD}
+ es_conn_timeout: 120
+ verify_certs: False
+ use_ssl: True
+ client_cert: '/opt/elastalert/key/elastalert.pem'
+ client_key: '/opt/elastalert/key/elastalert.key'
+ writeback_index: elastalert_status
+ writeback_alias: elastalert_alerts
+ alert_time_limit:
+ days: 2
diff --git a/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-key-cm.yaml b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-key-cm.yaml
new file mode 100644
index 00000000..0c606a9c
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-key-cm.yaml
@@ -0,0 +1,68 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: elastalert-key
+data:
+ elastalert.key: |
+ -----BEGIN PRIVATE KEY-----
+ MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQC0uQ+B0gy3VB4w
+ 5CeWOx575lqSUuYvrGW3ILpV1gmj0ZZCMZUGvt4UvaCEaNPIAqNaHPmaslQqJb5C
+ PJH9pMN7vUVp3DACzmYrS4HdROHamn5gjebXs4hq43heLaIB1Kb+4F+7sEY88irK
+ xOevadcN35y5ld7lVUGRsj6JYcweaAeh/YZ/HaBT5RfdGF+x07NDus+mFqT8j3PD
+ rs2+JtEvEoWtjcxwFgloc9GkHsWZoV1AQHgyAWjmDXZtZeV0HQSkl7hWFG9vxTni
+ DvdrdhX0g+D+u8jWnlR4Za4jd64KbTp9C9trSHyMSRIvN5obm/H8O5MQ+sZ+NQ0X
+ PdK92MjbAgMBAAECggEASbRPxrpLxVjhFz91haeGvzErLxHwHvFIam9Gj0tDkzQe
+ +9AM3ztohzzvAhFejevFgzLd+WFRQf8yoQDi6XcQ4p5GeO38Bqj2siGRTRSSp/zq
+ HabBxqbJtA4hQQeLUwPPN5N6d6lke+an3RqBAuE/e8D+whGFXjJvE2SGbLEd9if2
+ uzHj37sPsVi8kRvgZBDOozmt7YFzQVO/1V+4Lw6nz48M3t+hOHaUXY0Yd8nsk5A6
+ kgoDQ4CGUHjtWfSrccZrYNk51Zows9/sX8axfJ94wKJSImWJcuW9PXIQhzT4exnH
+ sPOwY6Noy3nXRk9gcchT60fKpp+tsJZk3ezkwSpgwQKBgQDvsaYcbnIVdFZpaNKF
+ Tmt/w60CmfGeNozRygfi84ot7edUf93cB6WSKChcAE8fbq9Ji5USPNtfbnZfFXsI
+ IyTr2KHW3RkHuDEyu+Lan9JuReEH3QOG83vvN/oYA3J3hqUTCjEGkPjqnoFtdk8L
+ f7WH1jZvXYEMo0C48SXo+yGohQKBgQDBBGkzL928j1QB9NfiNFk70EalDsF8Im2W
+ n8bQ54KYspUybKD/Hmw0jIV7kdu2vhgGC4RYkn9c5qATtulbYJUgUBelaSi0vhXT
+ gfAuO+JIIZ50P+mkkxH/KIUyu1xWUB2jtMulqLLomdoBvfp/u51qCY6fT3WMCB+R
+ ouWLr2oZ3wKBgQCAuas4AaiLFRuDKKRGq0LYLsIvb3VvPmSKFjH+FETVPbrKipEf
+ pYup3p8uKYxUmSDSIoBAdyZpLe2sSuD0Ecu2TXU86yiSGL1zPawrNUHRrv2XN365
+ bvHUGv/Y/aDvyAPHIeYKXLkRZ2ai3rK8vi1Dcitxy4mOu+36ZKezY4tD8QKBgQCd
+ hakJUj4nPd20fwqUnF5a1z5gRGuZkEtZiunp4ZaOYegrL8YwjraGKExjrYTfXcIj
+ ZNDMrDpvKfRoQnWt0mPB7DtwDiNfZmZPqBLI2Kxya6VygBqA6lncoEgcQBY6hsW5
+ rbopZ0UjWTQ3CcFe71GnkUcpMuLetl51L7kgR7dShwKBgQC+vqjhe/h081JGLTo1
+ tKeRUCaDA/V3VHjFKgM5g+S3/KzgU/EaB1rq3Qja1quGv0zHveca3zibdNQi1ENm
+ KSutWh2zQXzzvmycPmVcthhOxaKzRXDjG0mXiA0bnSgK3F2o9t4196RYhIiiSvAH
+ shVjZMTK04h8ciTLIqK/GtZr+g==
+ -----END PRIVATE KEY-----
+ elastalert.pem: |
+ -----BEGIN CERTIFICATE-----
+ MIIDVzCCAj+gAwIBAgIJAORgkR7Y0Nk9MA0GCSqGSIb3DQEBCwUAMEIxCzAJBgNV
+ BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
+ Q29tcGFueSBMdGQwHhcNMjAwNjI4MTM1NjAwWhcNMjEwNjI4MTM1NjAwWjBCMQsw
+ CQYDVQQGEwJYWDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZh
+ dWx0IENvbXBhbnkgTHRkMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA
+ tLkPgdIMt1QeMOQnljsee+ZaklLmL6xltyC6VdYJo9GWQjGVBr7eFL2ghGjTyAKj
+ Whz5mrJUKiW+QjyR/aTDe71FadwwAs5mK0uB3UTh2pp+YI3m17OIauN4Xi2iAdSm
+ /uBfu7BGPPIqysTnr2nXDd+cuZXe5VVBkbI+iWHMHmgHof2Gfx2gU+UX3RhfsdOz
+ Q7rPphak/I9zw67NvibRLxKFrY3McBYJaHPRpB7FmaFdQEB4MgFo5g12bWXldB0E
+ pJe4VhRvb8U54g73a3YV9IPg/rvI1p5UeGWuI3euCm06fQvba0h8jEkSLzeaG5vx
+ /DuTEPrGfjUNFz3SvdjI2wIDAQABo1AwTjAdBgNVHQ4EFgQUFAvjohHTavHmbRbj
+ Yq2h3cq7UMEwHwYDVR0jBBgwFoAUFAvjohHTavHmbRbjYq2h3cq7UMEwDAYDVR0T
+ BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAB9oDASl4OfF/D49i3KtVzjzge4up
+ WssBPYKVwASh3cXfLLe3NdY9ihdCXFd/8Rus0hBGaRPIyR06sZoHRDEfJ2xrRD6g
+ pr4iHRfaoEWqols7+iW0cgQehvw5efEpFL1vg9zK9kOwruS4ZUhDrak6GcO/O8Jh
+ 6lSGmidHSHrQmfqFeTotaezwylV/uHvRZHPvk2JhQfC+vFjn5/iN/0wCeQCwYvOC
+ rePq2ZFdYg/0bS9BYwKsT2w1Z/AU/wIMLmbNB1af+fTBBEQlxb4rAeDb+J9EoSQ5
+ MVP7jm3BVnHQCs6CA4LV4yRQNF2K6GkWem1oUg/H3S2SG8TAUlKpX/1XRw==
+ -----END CERTIFICATE-----
diff --git a/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml
new file mode 100644
index 00000000..af28b6f6
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml
@@ -0,0 +1,132 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: elastalert-rule
+data:
+ rule-node4-vswitch.yaml: |
+ name: vswitch-3-sec
+ type: any
+ index: node4*
+ filter:
+ - range:
+ time_vswitchd:
+ gt: 3 #Greater than
+
+ realert:
+ minutes: 0
+
+ alert: post
+ http_post_url: "http://10.10.120.211:31000/alerts"
+ http_post_static_payload:
+ type: threshold
+ label: vswitchd start time > 3 sec
+ http_post_payload:
+ index: _index
+ log: msg
+ log_path: log_path
+ time_vswitchd: time_vswitchd
+ num_hits: num_hits
+ num_matches: num_matches
+
+ rule-node1-vswitch.yaml: |
+ name: vswitch-3-sec
+ type: any
+ index: node1*
+ filter:
+ - range:
+ time_vswitchd:
+ gt: 3 #Greater than
+
+ realert:
+ minutes: 0
+
+ alert: post
+ http_post_url: "http://10.10.120.211:31000/alerts"
+ http_post_static_payload:
+ type: threshold
+ label: vswitchd start time > 3 sec
+ http_post_payload:
+ index: _index
+ log: msg
+ log_path: log_path
+ time_vswitchd: time_vswitchd
+ num_hits: num_hits
+ num_matches: num_matches
+
+ rule-node4-blacklist.yaml: |
+ name: error-finder-node4
+ type: blacklist
+ compare_key: alert
+ index: node4*
+ blacklist:
+ - "Failed to run test"
+ - "Failed to execute in '30' seconds"
+ - "('Result', 'Failed')"
+ - "could not open socket: connection refused"
+ - "Input/output error"
+ - "dpdk|ERR|EAL: Error - exiting with code: 1"
+ - "Failed to execute in '30' seconds"
+ - "dpdk|ERR|EAL: Driver cannot attach the device"
+ - "dpdk|EMER|Cannot create lock on"
+ - "device not found"
+
+ realert:
+ minutes: 0
+
+ alert: post
+ http_post_url: "http://10.10.120.211:31000/alerts"
+ http_post_static_payload:
+ type: pattern-match
+ label: failed
+ http_post_payload:
+ index: _index
+ log: msg
+ log_path: log_path
+ reason: alert
+ num_hits: num_hits
+ num_matches: num_matches
+ rule-node1-blacklist.yaml: |
+ name: error-finder-node1
+ type: blacklist
+ compare_key: alert
+ index: node1*
+ blacklist:
+ - "Failed to run test"
+ - "Failed to execute in '30' seconds"
+ - "('Result', 'Failed')"
+ - "could not open socket: connection refused"
+ - "Input/output error"
+ - "dpdk|ERR|EAL: Error - exiting with code: 1"
+ - "Failed to execute in '30' seconds"
+ - "dpdk|ERR|EAL: Driver cannot attach the device"
+ - "dpdk|EMER|Cannot create lock on"
+ - "device not found"
+
+ realert:
+ minutes: 0
+
+ alert: post
+ http_post_url: "http://10.10.120.211:31000/alerts"
+ http_post_static_payload:
+ type: pattern-match
+ label: failed
+ http_post_payload:
+ index: _index
+ log: msg
+ log_path: log_path
+ reason: alert
+ num_hits: num_hits
+ num_matches: num_matches
diff --git a/tools/lma/ansible-server/roles/logging/files/elastalert/elastalert.yaml b/tools/lma/ansible-server/roles/logging/files/elastalert/elastalert.yaml
new file mode 100644
index 00000000..9e32e2b7
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elastalert/elastalert.yaml
@@ -0,0 +1,76 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: elastalert
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ run: elastalert
+ template:
+ metadata:
+ labels:
+ run: elastalert
+ spec:
+ volumes:
+ - name: econfig
+ configMap:
+ name: elastalert-config
+ items:
+ - key: elastalert.yaml
+ path: elastalert.yaml
+ - name: erule
+ configMap:
+ name: elastalert-rule
+ items:
+ - key: rule-node4-vswitch.yaml
+ path: rule-node4-vswitch.yaml
+ - key: rule-node4-blacklist.yaml
+ path: rule-node4-blacklist.yaml
+ - key: rule-node1-blacklist.yaml
+ path: rule-node1-blacklist.yaml
+ - name: ekey
+ configMap:
+ name: elastalert-key
+ items:
+ - key: elastalert.key
+ path: elastalert.key
+ - key: elastalert.pem
+ path: elastalert.pem
+ initContainers:
+ - name: init-myservice
+ image: busybox:1.28
+ command: ['sh', '-c', 'until nslookup logging-es-http; do echo "waiting for myservice"; sleep 2; done;']
+ containers:
+ - name: elastalert
+ image: adi0509/elastalert:latest
+ env:
+ - name: ES_USERNAME
+ value: "elastic"
+ - name: ES_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: logging-es-elastic-user
+ key: elastic
+ command: [ "sh", "-c"]
+ args: ["elastalert-create-index --config /opt/elastalert/elastalert.yaml; python -m elastalert.elastalert --config /opt/elastalert/elastalert.yaml"]
+ volumeMounts:
+ - mountPath: /opt/elastalert/
+ name: econfig
+ - mountPath: /opt/elastalert/rules/
+ name: erule
+ - mountPath: /opt/elastalert/key
+ name: ekey
diff --git a/tools/lma/ansible-server/roles/logging/files/elasticsearch/elasticsearch.yaml b/tools/lma/ansible-server/roles/logging/files/elasticsearch/elasticsearch.yaml
new file mode 100644
index 00000000..5b0a8476
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elasticsearch/elasticsearch.yaml
@@ -0,0 +1,231 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: elasticsearch.k8s.elastic.co/v1
+kind: Elasticsearch
+metadata:
+ name: logging
+spec:
+ version: 7.8.0
+ http:
+ service:
+ spec:
+ type: NodePort
+ ports:
+ - name: https
+ nodePort: 31111
+ port: 9200
+ protocol: TCP
+ targetPort: 9200
+ auth:
+ fileRealm:
+ - secretName: custom-user
+ nodeSets:
+ - name: vm1-master
+ count: 1
+ config:
+ node.master: true
+ node.data: false
+ node.attr.zone: vm1
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm1-master
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm1
+ - name: vm1-data
+ count: 1
+ config:
+ node.master: false
+ node.data: true
+ node.attr.zone: vm1
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm1-data
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm1
+ - name: vm2-master
+ count: 1
+ config:
+ node.master: true
+ node.data: false
+ node.attr.zone: vm2
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm2-master
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm2
+ - name: vm2-data
+ count: 1
+ config:
+ node.master: false
+ node.data: true
+ node.attr.zone: vm2
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm2-data
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm2
+ - name: vm3-master
+ count: 1
+ config:
+ node.master: true
+ node.data: false
+ node.attr.zone: vm3
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm3-master
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm3
+ - name: vm3-data
+ count: 1
+ config:
+ node.master: false
+ node.data: true
+ node.attr.zone: vm3
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm3-data
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm3
diff --git a/tools/lma/ansible-server/roles/logging/files/elasticsearch/user-secret.yaml b/tools/lma/ansible-server/roles/logging/files/elasticsearch/user-secret.yaml
new file mode 100644
index 00000000..3e71fe92
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elasticsearch/user-secret.yaml
@@ -0,0 +1,23 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+kind: Secret
+apiVersion: v1
+metadata:
+ name: custom-user
+stringData:
+ users: |-
+ elasticsearch:$2a$10$DzOu7/.Vo2FBDYworbUZe.LNL9tCUl18kpVZ6C/mvkKcXRzYrpmJu
+ users_roles: |-
+ kibana_admin:elasticsearch
+ superuser:elasticsearch
diff --git a/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-cm.yaml b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-cm.yaml
new file mode 100644
index 00000000..36ff80d6
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-cm.yaml
@@ -0,0 +1,525 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: fluentd-config
+data:
+ index_template.json: |
+ {
+ "index_patterns": [
+ "node*"
+ ],
+ "settings": {
+ "index.lifecycle.name": "delete_policy",
+ "number_of_replicas": 1
+ }
+ }
+ fluent.conf: |
+ <source>
+ @type forward
+ port 24224
+ bind 0.0.0.0
+ tag log
+ </source>
+
+ #tag the .dat file
+ <match log>
+ @type rewrite_tag_filter
+ #Trex data
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/.*counts.dat/
+ tag countdat.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/.*errors.dat/
+ tag errordat.${tag}
+ </rule>
+ #Spirent data
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/stc-liveresults.dat.tx/
+ tag stcdattx.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/stc-liveresults.dat.rx/
+ tag stcdatrx.${tag}
+ </rule>
+ #Ixia data
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/.*Statistics.csv/
+ tag ixia.${tag}
+ </rule>
+ #log files
+ <rule>
+ key log_path
+ pattern /vsperf-overall/
+ tag vsperf.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /vswitchd/
+ tag vswitchd.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/var\/log\/userspace/
+ tag userspace.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/var\/log\/sriovdp/
+ tag sriovdp.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/var\/log\/pods/
+ tag pods.${tag}
+ </rule>
+ </match>
+
+ #to find error
+ @include error.conf
+
+ #to parse time-series data
+ @include time-series.conf
+
+ #to calculate time analysis
+ @include time-analysis.conf
+
+ #give tag 'node1' if host is worker and tag 'node4' if host is pod12-node4
+ <match **.log>
+ @type rewrite_tag_filter
+ <rule>
+ key host
+ pattern /pod12-node4/
+ tag node4
+ </rule>
+ <rule>
+ key host
+ pattern /worker/
+ tag node1
+ </rule>
+ </match>
+
+
+ <filter node1>
+ @type elasticsearch_genid
+ hash_id_key _hash1
+ </filter>
+
+ #send the node1 log to node1 index in elasticsearch
+ <match node1>
+ @type copy
+ <store>
+ @type elasticsearch
+ host logging-es-http
+ port 9200
+ scheme https
+ ssl_verify false
+ user "#{ENV['FLUENT_ELASTICSEARCH_USER']}"
+ password "#{ENV['FLUENT_ELASTICSEARCH_PASSWORD']}"
+ logstash_format true
+ logstash_prefix node1
+ logstash_dateformat %Y%m%d
+ flush_interval 1s
+ id_key _hash1
+ remove_keys _hash1
+
+ enable_ilm true
+ application_name ${tag}
+ index_date_pattern ""
+ ilm_policy_id delete_policy
+ template_name delpol-test
+ template_file /fluentd/etc/index_template.json
+ ilm_policy {
+ "policy": {
+ "phases": {
+ "delete": {
+ "min_age": "3m",
+ "actions": {
+ "delete": {}
+ }
+ }
+ }
+ }
+ }
+ </store>
+ <store>
+ @type stdout
+ </store>
+ </match>
+
+ <filter node4>
+ @type elasticsearch_genid
+ hash_id_key _hash4
+ </filter>
+
+ #send the node4 log to node4 index in elasticsearch
+ <match node4>
+ @type copy
+ <store>
+ @type elasticsearch
+ host logging-es-http
+ port 9200
+ scheme https
+ ssl_verify false
+ user "#{ENV['FLUENT_ELASTICSEARCH_USER']}"
+ password "#{ENV['FLUENT_ELASTICSEARCH_PASSWORD']}"
+ logstash_format true
+ logstash_prefix node4
+ logstash_dateformat %Y%m%d
+ flush_interval 1s
+ id_key _hash4
+ remove_keys _hash4
+
+ enable_ilm true
+ application_name ${tag}
+ index_date_pattern ""
+ ilm_policy_id delete_policy
+ template_name delpol-test
+ template_file /fluentd/etc/index_template.json
+ ilm_policy {
+ "policy": {
+ "phases": {
+ "delete": {
+ "min_age": "3m",
+ "actions": {
+ "delete": {}
+ }
+ }
+ }
+ }
+ }
+ </store>
+ <store>
+ @type stdout
+ </store>
+ </match>
+ error.conf: |
+ <filter vsperf.log>
+ @type parser
+ reserve_data true
+ key_name msg
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /(?<alert_time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}).*(?<alert>Failed to run test|Failed to execute in '30' seconds|\('Result', 'Failed'\)|could not open socket: connection refused|Input\/output error)/
+ </parse>
+ </filter>
+
+ <filter vswitchd.log>
+ @type parser
+ reserve_data true
+ key_name msg
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /(?<alert_time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z).*(?<alert>dpdk\|ERR\|EAL: Error - exiting with code: 1|Failed to execute in '30' seconds|dpdk\|ERR\|EAL: Driver cannot attach the device|dpdk\|EMER\|Cannot create lock on)/
+ </parse>
+ </filter>
+ <filter vswitchd.log>
+ @type parser
+ reserve_data true
+ key_name msg
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /(?<alert_time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z).*dpdk\|ERR\|VHOST_CONFIG:.*(?<alert>device not found)/
+ </parse>
+ </filter>
+ time-series.conf: |
+ #parse *counts.dat
+ <filter countdat.log>
+ @type parser
+ key_name msg
+ reserve_data true
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /^(?<ts>[\.\d]*),(?<rx_port>\d*),(?<tx_port>\d*),(?<rx_pkts>[\.\d]*),(?<tx_pkts>[\.\d]*),(?<rx_pps>[\.\d]*),(?<tx_pps>[\.\d]*),(?<rx_bps_num>[\.\d]*),(?<rx_bps_den>[\.\d]*),(?<tx_bps_num>[\.\d]*),(?<tx_bps_den>[\.\d]*)$/
+ types rx_port:integer,tx_port:integer,rx_pkts:float,tx_pkts:float,rx_pps:float,tx_pps:float,rx_bps_num:float,rx_bps_den:float,tx_bps_num:float,tx_bps_den:float
+ </parse>
+ </filter>
+
+ #parse *errors.dat
+ <filter errordat.log>
+ @type parser
+ key_name msg
+ reserve_data true
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /^(?<ts>[\.\d]*),(?<dropped>[\.\d]*),(?<ooo>[\.\d]*),(?<dup>[\.\d]*),(?<seq_too_high>[\.\d]*),(?<seq_too_low>[\.\d]*)$/
+ types ts:integer,dropped:integer,ooo:integer,dup:integer,seq_too_high:integer,seq_too_low:integer
+ </parse>
+ </filter>
+
+ #parse stc-liveresults.dat.tx
+ <filter stcdattx.log>
+ @type parser
+ key_name msg
+ reserve_data true
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /^(?<ts>[\.\d]*),(?<StrId>[\.\d]*),(?<BlkId>[\.\d]*),(?<FrCnt>[\.\d]*),(?<FrRate>[\.\d]*),(?<ERxFrCnt>[\.\d]*),(?<OctCnt>[\.\d]*),(?<OctRate>[\.\d]*),(?<bitCnt>[\.\d]*),(?<bitRate>[\.\d]*)$/
+ types ts:integer,StrId:integer,BlkId:integer,FrCnt:integer,FrRate:integer,ERxFrCnt:integer,OctCnt:integer,OctRate:integer,bitCnt:integer,bitRate:integer
+ </parse>
+ </filter>
+
+ #parse stc-liveresults.dat.rx
+ <filter stcdatrx.log>
+ @type parser
+ key_name msg
+ reserve_data true
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /^(?<ts>[\.\d]*),(.*, |)(?<RxPrt>.*),(?<DrpFrCnt>[\.\d]*),(?<SeqRnLen>[\.\d]*),(?<AvgLat>.*),(?<DrpFrRate>[\.\d]*),(?<FrCnt>[\.\d]*),(?<FrRate>[\.\d]*),(?<MaxLat>[\.\d]*),(?<MinLat>[\.\d]*),(?<OctCnt>[\.\d]*),(?<OctRate>[\.\d]*)$/
+ types ts:integer,DrpFrCnt:integer,SeqRnLen:integer,FrCnt:integer,FrRate:integer,MaxLat:integer,MinLat:integer,OctCnt:integer,OctRate:integer
+ </parse>
+ </filter>
+ time-analysis.conf: |
+ # 1. Test Duration - Duration Between: first line and last line.
+ # 2. Setup Duration - Duration Between: Creating result directory TO Class found ---
+ # 3. Traffic Duration - Duration between From Starting traffic at 0.1 Gbps speed TO Traffic Results
+ # 4. Iteration Durations -- Example: Duration between - Starting traffic at 10.0 Gbps TO Starting traffic at 5.0 Gbps speed
+ # 5. Reporting Duration - Duration between From Traffic Results TO Write results to file
+ # 6. Vswitchd start Duration- Duration between From Starting vswitchd... TO send_traffic with
+
+ <match vsperf.log>
+ @type rewrite_tag_filter
+ <rule>
+ key msg
+ pattern /Creating result directory:/
+ tag firstline.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Write results to file/
+ tag lastline.${tag}
+ </rule>
+
+ <rule>
+ key msg
+ pattern /Class found/
+ tag setupend.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Starting traffic at 0.1 Gbps speed/
+ tag trafficstart.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Traffic Results/
+ tag trafficend.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Starting traffic at 10.0 Gbps/
+ tag iterationstart.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Starting traffic at 5.0 Gbps speed/
+ tag iterationend.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Starting vswitchd/
+ tag vswitchstart.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /send_traffic/
+ tag vswitch.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern ^.*$
+ tag logs.${tag}
+ </rule>
+ </match>
+
+ #############################################################################################
+ #save the starting log and append that log in ending log
+ #############################################################################################
+ <filter firstline.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ msg ${$vswitch_start="";$reportstart="";$firstline="";$traffic_start="";$iteration_start="";$firstline = record["msg"];return record["msg"];}
+ </record>
+ </filter>
+ <filter lastline.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${record["msg"]+" | "+$firstline + " | "+ $reportstart}
+ </record>
+ </filter>
+
+ <filter setupend.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${record["msg"]+" "+$firstline}
+ </record>
+ </filter>
+
+ <filter trafficstart.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ msg ${if $traffic_start.eql?("");$traffic_start=record["msg"];end;return record["msg"];}
+ </record>
+ </filter>
+ <filter trafficend.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${if $reportstart.eql?("");$reportstart=record["msg"];end;return record["msg"]+" "+$traffic_start;}
+ </record>
+ </filter>
+
+ <filter iterationstart.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ msg ${if $iteration_start.eql?("");$iteration_start=record["msg"];end;return record["msg"];}
+ </record>
+ </filter>
+ <filter iterationend.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${record["msg"]+" "+$iteration_start}
+ </record>
+ </filter>
+
+ <filter vswitchstart.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ msg ${$vswitch_start=record["msg"];return record["msg"];}
+ </record>
+ </filter>
+ <filter vswitch.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${record["msg"]+" "+$vswitch_start}
+ </record>
+ </filter>
+ #############################################################################################
+ #parse time from the log
+ #############################################################################################
+ <filter setupend.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<setupend>.*) : Class found: Trex. (?<setupstart>.*) : .*$/
+ </parse>
+ </filter>
+ <filter iterationend.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<iterationend>.*) : Starting traffic at 5.0 Gbps speed (?<iterationstart>.*) : Starting traffic at 10.0 Gbps speed$/
+ </parse>
+ </filter>
+ <filter vswitch.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<vswitch>.*) : send_traffic with <.*> (?<vswitchstart>.*) : Starting vswitchd...$/
+ </parse>
+ </filter>
+ <filter trafficend.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<trafficend>.*) : Traffic Results: (?<trafficstart>.*) : Starting traffic at 0.1 Gbps speed/
+ </parse>
+ </filter>
+ <filter lastline.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<lastline>.*) : Write results to file: .* \| (?<firstline>.*) : Creating result directory: .* \| (?<reportstart>.*) : Traffic Results:$/
+ </parse>
+ </filter>
+ #############################################################################################
+ #calculate time
+ #############################################################################################
+ <filter setupend.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ setup_duration ${ require 'time';Time.parse(record["setupend"])-Time.parse(record["setupstart"]); }
+ </record>
+ </filter>
+ <filter iterationend.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ iteration_duration ${ require 'time';Time.parse(record["iterationend"])-Time.parse(record["iterationstart"]); }
+ </record>
+ </filter>
+ <filter vswitch.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ vswitch_duration ${ require 'time';Time.parse(record["vswitch"])-Time.parse(record["vswitchstart"]); }
+ </record>
+ </filter>
+ <filter trafficend.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ traffic_duration ${ require 'time';Time.parse(record["trafficend"])-Time.parse(record["trafficstart"]); }
+ </record>
+ </filter>
+ <filter lastline.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ test_duration ${ require 'time';Time.parse(record["lastline"])-Time.parse(record["firstline"]); }
+ </record>
+ <record>
+ report_duration ${ require 'time';Time.parse(record["lastline"])-Time.parse(record["reportstart"]); }
+ </record>
+ </filter>
+ #############################################################################################
diff --git a/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-service.yaml b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-service.yaml
new file mode 100644
index 00000000..9a43b82f
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-service.yaml
@@ -0,0 +1,34 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: Service
+metadata:
+ name: fluentd
+ labels:
+ run: fluentd
+spec:
+ type: NodePort
+ ports:
+ - name: tcp
+ port: 32224
+ targetPort: 24224
+ protocol: TCP
+ nodePort: 32224
+ - name: udp
+ port: 32224
+ targetPort: 24224
+ protocol: UDP
+ nodePort: 32224
+ selector:
+ run: fluentd
diff --git a/tools/lma/ansible-server/roles/logging/files/fluentd/fluent.yaml b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent.yaml
new file mode 100644
index 00000000..3830f682
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent.yaml
@@ -0,0 +1,65 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: fluentd
+spec:
+ replicas: 2
+ selector:
+ matchLabels:
+ run: fluentd
+ template:
+ metadata:
+ labels:
+ run: fluentd
+ spec:
+ volumes:
+ - name: fconfig
+ configMap:
+ name: fluentd-config
+ items:
+ - key: fluent.conf
+ path: fluent.conf
+ - key: error.conf
+ path: error.conf
+ - key: time-series.conf
+ path: time-series.conf
+ - key: time-analysis.conf
+ path: time-analysis.conf
+ - key: index_template.json
+ path: index_template.json
+ initContainers:
+ - name: init-myservice
+ image: busybox:1.28
+ command: ['sh', '-c', 'until nslookup logging-es-http; do echo "waiting for myservice"; sleep 2; done;']
+ containers:
+ - name: fluentd
+ image: adi0509/fluentd:latest
+ env:
+ - name: FLUENT_ELASTICSEARCH_USER
+ value: "elastic"
+ - name: FLUENT_ELASTICSEARCH_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: logging-es-elastic-user
+ key: elastic
+ ports:
+ - containerPort: 24224
+ protocol: TCP
+ - containerPort: 24224
+ protocol: UDP
+ volumeMounts:
+ - name: fconfig
+ mountPath: /fluentd/etc/
diff --git a/tools/lma/ansible-server/roles/logging/files/kibana/kibana.yaml b/tools/lma/ansible-server/roles/logging/files/kibana/kibana.yaml
new file mode 100644
index 00000000..5ec6937e
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/kibana/kibana.yaml
@@ -0,0 +1,23 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: kibana.k8s.elastic.co/v1
+kind: Kibana
+metadata:
+ name: logging
+spec:
+ version: 7.8.0
+ count: 1
+ elasticsearchRef:
+ name: logging
+ namespace: logging
diff --git a/tools/lma/ansible-server/roles/logging/files/namespace.yaml b/tools/lma/ansible-server/roles/logging/files/namespace.yaml
new file mode 100644
index 00000000..6964af5c
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/namespace.yaml
@@ -0,0 +1,17 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: logging
diff --git a/tools/lma/ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml
new file mode 100644
index 00000000..f5a11e80
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml
@@ -0,0 +1,36 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: nginx-config
+data:
+ default.conf: |
+ server {
+ listen 80;
+ ssl on;
+ ssl_certificate /etc/ssl/certs/kibana-access.pem;
+ ssl_certificate_key /etc/ssl/private/kibana-access.key;
+
+ location / {
+ proxy_pass https://logging-kb-http:5601;
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection 'upgrade';
+ proxy_set_header Host $host;
+ proxy_cache_bypass $http_upgrade;
+ proxy_read_timeout 300s;
+ proxy_connect_timeout 75s;
+ }
+ }
diff --git a/tools/lma/ansible-server/roles/logging/files/nginx/nginx-key-cm.yaml b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-key-cm.yaml
new file mode 100644
index 00000000..93d7d6ec
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-key-cm.yaml
@@ -0,0 +1,68 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: nginx-key
+data:
+ kibana-access.key: |
+ -----BEGIN PRIVATE KEY-----
+ MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDI92SBrcEdzxqS
+ rt883bVcj4F4RuKtm+AjjOEhbFUd3XOp5Wa5NzyYQSTP9ZJHG0dYiTAKOJBVcgbc
+ YRMNUAzHAIskf1q2/SvwyLNRMJLkBD5HHjbnEcuKQm/+nPdzkfvo2zfRNTDPKS83
+ HqFQ779hT8ZLkSzoPuR0QD17ZNWYVMZv/r9wqnjX8U/k5AjrJOIxuaO9nGAgv2Pu
+ Qm6wuU8UBEaMRgPVHQ3ztflQr9QPr/S6HU0cl4Gu+Nwid6iC1RVYxANNq7E7wRvq
+ GMKRS5cA9Nlnu/b7IEI4LSx5yeTSDzwmZKTNnUWi2cpqk30M4G4cUokoz9bP+62I
+ YWEh3B8HAgMBAAECggEBAI1luzqepTSzBhBUp88sczGX6tFUlqLt/Ism0TPyBAVK
+ TdopBNima6T4mM0VDIGpSM6bX8ihObRU0Uz3pC8GtqbB1CSu0oXTpbn5jGlAkumJ
+ rsPdF2YHGD3ENwZfLKANA8A3lZNGKHxpjsXqcDgBJ5dxSKTclUsnDRhaJqgOL1bI
+ d9QCXdA1vbpxHDJWSo73E7omv3AyHi3HxMWU4gzyerUFSMFGqm0W5dPeeresNE3a
+ bv9/46YdykufuRuJZqsUDLCgUUcJPhbE5iOrB4iv8oaDqT0onxwzRQTSgidPxbp2
+ EmjVHpFCACltOKSqELM4+PQFCk8xUBya8HWD5UHrVDkCgYEA4y3WwmhtLUT/g3G3
+ cowvmxjgPl6xqkqTA7Xcdc3sk+6/jS1kayT5TL1qfpd1QL/K617jva9mfSMZ8ei9
+ Y7M/2QkSb0uHKulGR0+if+7sT0L8OYO/OE7c+HTZmZK4hD1CCJN2M34D9Qo2fzQ6
+ 4v+AO1wGiAtiNev0YIBKYNSco+sCgYEA4nY8m93XuC19z991sFRvE0UBeKcN2esg
+ TwY9UuYHJ56s+6UozkUgZArwYFW8LWFeIjkrrKELBNDsmJtTZ006TyUWxY/ccdjV
+ fJZTLV3niv6IQzy74aOmXV2vtNjxyBlllT9mvig6T0t0TvAtolsuSVHBL09zxcy4
+ wN4pGIfqllUCgYBYLq/hMKXIX7MK87YwqYfFHWfV7e3q2x2r4AjeVXuShKcoBsmm
+ 6Wg3yIKw9tuVsZzzthaSx6XxxxFIHH5/V9Hdzi6wstGZ74jPH3NFU5m4vpinPqOY
+ GMyfSMQ6X4BuHFUofQzxueWRVVCIGd8Nw/2jjPogDsMliRyH5OR6J61R1wKBgEa6
+ 8SEpf7fJlZL4UzS4mlylX9lEK+JVOqkT5NFggPmR6KtMIVuTYZN9iyg7fuOZlqIP
+ wyFOxzdA3bSoRrtr9ntDtUINNaflNoCMHvx7aNcTupFthazqxQpCOZ+9Zn691+lu
+ fPOFcvjTM0d4YnhkDCfgPfs90IYF8+phOOqtgMplAoGBAI+mcaUH7ADYxlONCi1E
+ gNHRvHJRBdQBaydKUfPxbe3vS5QJb8Gb5RU46vDl3w+YHUVwUi+Hj68zuKExXxhD
+ 9CGTAQIejtHWScZ1Djl3bcvNa/czHyuNVsGwvJ3fy1JzpxRmUUMPSdJ90A1n57Tk
+ LFEmZhwaj7YF869wfKngQ57d
+ -----END PRIVATE KEY-----
+ kibana-access.pem: |
+ -----BEGIN CERTIFICATE-----
+ MIIDVzCCAj+gAwIBAgIJAIQzf1mxHsvgMA0GCSqGSIb3DQEBCwUAMEIxCzAJBgNV
+ BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
+ Q29tcGFueSBMdGQwHhcNMjAwNjI1MTY1NzQ3WhcNMjEwNjI1MTY1NzQ3WjBCMQsw
+ CQYDVQQGEwJYWDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZh
+ dWx0IENvbXBhbnkgTHRkMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA
+ yPdkga3BHc8akq7fPN21XI+BeEbirZvgI4zhIWxVHd1zqeVmuTc8mEEkz/WSRxtH
+ WIkwCjiQVXIG3GETDVAMxwCLJH9atv0r8MizUTCS5AQ+Rx425xHLikJv/pz3c5H7
+ 6Ns30TUwzykvNx6hUO+/YU/GS5Es6D7kdEA9e2TVmFTGb/6/cKp41/FP5OQI6yTi
+ MbmjvZxgIL9j7kJusLlPFARGjEYD1R0N87X5UK/UD6/0uh1NHJeBrvjcIneogtUV
+ WMQDTauxO8Eb6hjCkUuXAPTZZ7v2+yBCOC0secnk0g88JmSkzZ1FotnKapN9DOBu
+ HFKJKM/Wz/utiGFhIdwfBwIDAQABo1AwTjAdBgNVHQ4EFgQUrz/R+M2XkTTfjrau
+ VVBW6+pdatgwHwYDVR0jBBgwFoAUrz/R+M2XkTTfjrauVVBW6+pdatgwDAYDVR0T
+ BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAyIhJLwg9oTil0Rb1zbYQb0Mr0UYz
+ rlS4f8QkxygkGLAZ8q9VkR+NpKfqhYDSHofGg5Yg5/p54NRJh5M4ASuM7N9AK0LH
+ KbCvS+YRNWhmo+7H7zjDNkV8FbzG41nkt9jQjaKFF7GdKr4HkWvupMX6PwsAZ0jI
+ b2Y6QzFQP9wF0QoBHrK42u3eWbfYv2IIDd6xsV90ilKRDtKkCiI4dyKGK46YDyZB
+ 3eqJ08Pm67HDbxQLydRXkNJvd33PASRgE/VOh44n3xWG+Gu4IMz7EO/4monyuv1Q
+ V2v1A9NV+ZnAq4PT7WJY7fWYavDUr+kwxMAGNQkG/Cg3X4FYrRwrq6gk7Q==
+ -----END CERTIFICATE-----
diff --git a/tools/lma/ansible-server/roles/logging/files/nginx/nginx-service.yaml b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-service.yaml
new file mode 100644
index 00000000..8aea53dd
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-service.yaml
@@ -0,0 +1,28 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: Service
+metadata:
+ name: nginx
+ labels:
+ run: nginx
+spec:
+ type: NodePort
+ ports:
+ - port: 8000
+ targetPort: 80
+ protocol: TCP
+ nodePort: 32000
+ selector:
+ run: nginx
diff --git a/tools/lma/ansible-server/roles/logging/files/nginx/nginx.yaml b/tools/lma/ansible-server/roles/logging/files/nginx/nginx.yaml
new file mode 100644
index 00000000..fdf5c835
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/nginx/nginx.yaml
@@ -0,0 +1,58 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: nginx
+spec:
+ replicas: 2
+ selector:
+ matchLabels:
+ run: nginx
+ template:
+ metadata:
+ labels:
+ run: nginx
+ spec:
+ volumes:
+ - name: nconfig
+ configMap:
+ name: nginx-config
+ items:
+ - key: default.conf
+ path: default.conf
+ - name: nkey
+ configMap:
+ name: nginx-key
+ items:
+ - key: kibana-access.key
+ path: kibana-access.key
+ - key: kibana-access.pem
+ path: kibana-access.pem
+ initContainers:
+ - name: init-myservice
+ image: busybox:1.28
+ command: ['sh', '-c', 'until nslookup logging-kb-http; do echo "waiting for myservice"; sleep 2; done;']
+ containers:
+ - name: nginx
+ image: nginx
+ volumeMounts:
+ - mountPath: /etc/nginx/conf.d/
+ name: nconfig
+ - mountPath: /etc/ssl/certs/
+ name: nkey
+ - mountPath: /etc/ssl/private/
+ name: nkey
+ ports:
+ - containerPort: 80
diff --git a/tools/lma/ansible-server/roles/logging/files/persistentVolume.yaml b/tools/lma/ansible-server/roles/logging/files/persistentVolume.yaml
new file mode 100644
index 00000000..c1a96077
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/persistentVolume.yaml
@@ -0,0 +1,105 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-master-vm1
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm1-master
+ nfs:
+ server: 10.10.120.211
+ path: "/srv/nfs/master"
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-data-vm1
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm1-data
+ nfs:
+ server: 10.10.120.211
+ path: "/srv/nfs/data"
+
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-master-vm2
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm2-master
+ nfs:
+ server: 10.10.120.203
+ path: "/srv/nfs/master"
+
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-data-vm2
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm2-data
+ nfs:
+ server: 10.10.120.203
+ path: "/srv/nfs/data"
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-master-vm3
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm3-master
+ nfs:
+ server: 10.10.120.204
+ path: "/srv/nfs/master"
+
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-data-vm3
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm3-data
+ nfs:
+ server: 10.10.120.204
+ path: "/srv/nfs/data"
diff --git a/tools/lma/ansible-server/roles/logging/files/storageClass.yaml b/tools/lma/ansible-server/roles/logging/files/storageClass.yaml
new file mode 100644
index 00000000..a2f1e3aa
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/storageClass.yaml
@@ -0,0 +1,73 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#storage class for VM1 master
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm1-master
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM1 data
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm1-data
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM2 master
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm2-master
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM2 data
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm2-data
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM3 master
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm3-master
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM3 data
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm3-data
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
diff --git a/tools/lma/ansible-server/roles/logging/tasks/main.yml b/tools/lma/ansible-server/roles/logging/tasks/main.yml
new file mode 100644
index 00000000..dcbf4d4d
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/tasks/main.yml
@@ -0,0 +1,165 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#EFK setup in k8s cluster
+
+#***********************************************************************************************************
+#copy all yaml to /tmp/files/
+#***********************************************************************************************************
+- name: copy all yaml to /tmp/files/
+ copy:
+ src: ../files/
+ dest: /tmp/files/
+
+#***********************************************************************************************************
+#Creating Namespace
+#***********************************************************************************************************
+- name: Creating Namespace
+ k8s:
+ state: present
+ src: /tmp/files/namespace.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#creating Storage Class
+#***********************************************************************************************************
+- name: creating Storage Class
+ k8s:
+ state: present
+ src: /tmp/files/storageClass.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#creating Persistent Volume
+#***********************************************************************************************************
+- name: creating Persistent Volume
+ k8s:
+ state: present
+ src: /tmp/files/persistentVolume.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#add user
+#***********************************************************************************************************
+- name: add user
+ k8s:
+ state: present
+ src: /tmp/files/elasticsearch/user-secret.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#Starting Elasticsearch operator
+#***********************************************************************************************************
+- name: Starting Elasticsearch operator
+ shell: kubectl apply -f https://download.elastic.co/downloads/eck/1.2.0/all-in-one.yaml
+ ignore_errors: yes
+
+#***********************************************************************************************************
+#Starting Elasticsearch
+#***********************************************************************************************************
+- name: Starting Elasticsearch
+ k8s:
+ state: present
+ src: /tmp/files/elasticsearch/elasticsearch.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#Starting Kibana
+#***********************************************************************************************************
+- name: Starting Kibana
+ k8s:
+ state: present
+ src: /tmp/files/kibana/kibana.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#Starting nginx
+#***********************************************************************************************************
+- name: creating nginx configmap
+ k8s:
+ state: present
+ src: /tmp/files/nginx/nginx-conf-cm.yaml
+ namespace: logging
+
+- name: creating nginx key configmap
+ k8s:
+ state: present
+ src: /tmp/files/nginx/nginx-key-cm.yaml
+ namespace: logging
+
+- name: creating nginx pod
+ k8s:
+ state: present
+ src: /tmp/files/nginx/nginx.yaml
+ namespace: logging
+
+- name: creating nginx service
+ k8s:
+ state: present
+ src: /tmp/files/nginx/nginx-service.yaml
+ namespace: logging
+#***********************************************************************************************************
+#Starting fluentd
+#***********************************************************************************************************
+- name: creating fluentd configmap
+ k8s:
+ state: present
+ src: /tmp/files/fluentd/fluent-cm.yaml
+ namespace: logging
+
+- name: creating fluentd pod
+ k8s:
+ state: present
+ src: /tmp/files/fluentd/fluent.yaml
+ namespace: logging
+
+- name: creating fluentd service
+ k8s:
+ state: present
+ src: /tmp/files/fluentd/fluent-service.yaml
+ namespace: logging
+#***********************************************************************************************************
+#Starting elastalert
+#***********************************************************************************************************
+- name: creating elastalert config configmap
+ k8s:
+ state: present
+ src: /tmp/files/elastalert/ealert-conf-cm.yaml
+ namespace: logging
+
+- name: creating elastalert key configmap
+ k8s:
+ state: present
+ src: /tmp/files/elastalert/ealert-key-cm.yaml
+ namespace: logging
+
+- name: creating elastalert rule configmap
+ k8s:
+ state: present
+ src: /tmp/files/elastalert/ealert-rule-cm.yaml
+ namespace: logging
+
+- name: creating elastalert pod
+ k8s:
+ state: present
+ src: /tmp/files/elastalert/elastalert.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#removing /tmp/files
+#***********************************************************************************************************
+- name: Removing /tmp/files
+ file:
+ path: "/tmp/files"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml
new file mode 100644
index 00000000..7b9abc47
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml
@@ -0,0 +1,37 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kind: ConfigMap
+apiVersion: v1
+metadata:
+ name: alertmanager-config
+ namespace: monitoring
+data:
+ config.yml: |-
+ global:
+ route:
+ receiver: "webhook"
+ group_by: ['alertname', 'priority']
+ group_wait: 1s
+ group_interval: 5s
+ repeat_interval: 5s
+ routes:
+ - match:
+ severity: critical
+
+ receivers:
+ - name: "webhook"
+ webhook_configs:
+ - url: 'http://10.10.120.20/alertmanager'
+ send_resolved: true
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-deployment.yaml
new file mode 100644
index 00000000..f1c3d78e
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-deployment.yaml
@@ -0,0 +1,62 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ labels:
+ app: alertmanager
+ adi10hero.monitoring: alertmanager
+ name: alertmanager
+ namespace: monitoring
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: alertmanager
+ adi10hero.monitoring: alertmanager
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ name: alertmanager
+ labels:
+ app: alertmanager
+ adi10hero.monitoring: alertmanager
+ spec:
+ containers:
+ - name: alertmanager
+ image: prom/alertmanager
+ args:
+ - --config.file=/etc/alertmanager/config.yml
+ - --storage.path=/alertmanager
+ - --cluster.peer=alertmanager1:6783
+ - --cluster.listen-address=0.0.0.0:6783
+ ports:
+ - containerPort: 9093
+ - containerPort: 6783
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - name: config-volume
+ mountPath: /etc/alertmanager
+ - name: alertmanager
+ mountPath: /alertmanager
+ restartPolicy: Always
+ volumes:
+ - name: config-volume
+ configMap:
+ name: alertmanager-config
+ - name: alertmanager
+ emptyDir: {}
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-service.yaml
new file mode 100644
index 00000000..c67517d3
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-service.yaml
@@ -0,0 +1,41 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: alertmanager
+ app: alertmanager
+ name: alertmanager
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/path: /
+ prometheus.io/port: '8080'
+
+spec:
+ selector:
+ app: alertmanager
+ adi10hero.monitoring: alertmanager
+ type: NodePort
+ ports:
+ - name: "9093"
+ port: 9093
+ targetPort: 9093
+ nodePort: 30930
+ - name: "6783"
+ port: 6783
+ targetPort: 6783
+ nodePort: 30679
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-deployment.yaml
new file mode 100644
index 00000000..18b76456
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-deployment.yaml
@@ -0,0 +1,62 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ labels:
+ app: alertmanager1
+ adi10hero.monitoring: alertmanager1
+ name: alertmanager1
+ namespace: monitoring
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: alertmanager1
+ adi10hero.monitoring: alertmanager1
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ name: alertmanager1
+ labels:
+ app: alertmanager1
+ adi10hero.monitoring: alertmanager1
+ spec:
+ containers:
+ - name: alertmanager1
+ image: prom/alertmanager
+ args:
+ - --config.file=/etc/alertmanager/config.yml
+ - --storage.path=/alertmanager
+ - --cluster.peer=alertmanager:6783
+ - --cluster.listen-address=0.0.0.0:6783
+ ports:
+ - containerPort: 9093
+ - containerPort: 6783
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - name: config-volume
+ mountPath: /etc/alertmanager
+ - name: alertmanager
+ mountPath: /alertmanager
+ restartPolicy: Always
+ volumes:
+ - name: config-volume
+ configMap:
+ name: alertmanager-config
+ - name: alertmanager
+ emptyDir: {}
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-service.yaml
new file mode 100644
index 00000000..66d0d2b1
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-service.yaml
@@ -0,0 +1,42 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: alertmanager1
+ app: alertmanager1
+ name: alertmanager1
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/path: /
+ prometheus.io/port: '8080'
+
+spec:
+ selector:
+ app: alertmanager1
+ adi10hero.monitoring: alertmanager1
+ type: NodePort
+ ports:
+ - name: "9093"
+ port: 9093
+ targetPort: 9093
+ nodePort: 30931
+ - name: "6783"
+ port: 6783
+ targetPort: 6783
+ nodePort: 30678
+
diff --git a/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-deamonset.yaml b/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-deamonset.yaml
new file mode 100644
index 00000000..6a62985e
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-deamonset.yaml
@@ -0,0 +1,79 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ name: cadvisor
+ namespace: monitoring
+ labels:
+ adi10hero.monitoring: cadvisor
+ app: cadvisor
+spec:
+ selector:
+ matchLabels:
+ app: cadvisor
+ adi10hero.monitoring: cadvisor
+ template:
+ metadata:
+ name: cadvisor
+ labels:
+ adi10hero.monitoring: cadvisor
+ app: cadvisor
+ spec:
+ containers:
+ - image: gcr.io/google-containers/cadvisor
+ name: cadvisor
+ ports:
+ - containerPort: 8080
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - mountPath: /rootfs
+ name: cadvisor-hostpath0
+ readOnly: true
+ - mountPath: /var/run
+ name: cadvisor-hostpath1
+ - mountPath: /sys
+ name: cadvisor-hostpath2
+ readOnly: true
+ - mountPath: /sys/fs/cgroup
+ name: cadvisor-hostpath3
+ readOnly: true
+ - mountPath: /dev/disk
+ name: cadvisor-hostpath4
+ readOnly: true
+ - mountPath: /var/lib/docker
+ name: cadvisor-hostpath5
+ readOnly: true
+ restartPolicy: Always
+ volumes:
+ - hostPath:
+ path: /
+ name: cadvisor-hostpath0
+ - hostPath:
+ path: /var/run
+ name: cadvisor-hostpath1
+ - hostPath:
+ path: /sys
+ name: cadvisor-hostpath2
+ - hostPath:
+ path: /cgroup
+ name: cadvisor-hostpath3
+ - hostPath:
+ path: /dev/disk/
+ name: cadvisor-hostpath4
+ - hostPath:
+ path: /var/lib/docker/
+ name: cadvisor-hostpath5
diff --git a/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-service.yaml
new file mode 100644
index 00000000..734240b8
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-service.yaml
@@ -0,0 +1,30 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: cadvisor
+ adi10hero.monitoring: cadvisor
+ name: cadvisor
+ namespace: monitoring
+spec:
+ ports:
+ - name: "8080"
+ port: 8080
+ targetPort: 8080
+ selector:
+ app: cadvisor
+ adi10hero.monitoring: cadvisor
diff --git a/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml
new file mode 100644
index 00000000..b6bfe0b6
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml
@@ -0,0 +1,51 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: collectd-exporter
+ namespace: monitoring
+ labels:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ name: collectd-exporter
+ labels:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+ spec:
+ containers:
+ - args:
+ - --collectd.listen-address=0.0.0.0:25826
+ image: prom/collectd-exporter
+ name: collectd-exporter
+ ports:
+ - containerPort: 9103
+ - containerPort: 25826
+ protocol: UDP
+ securityContext:
+ runAsUser: 0
+ restartPolicy: Always
+ volumes: null
+
diff --git a/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-service.yaml
new file mode 100644
index 00000000..5609d04a
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-service.yaml
@@ -0,0 +1,35 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: collectd-exporter
+ namespace: monitoring
+ labels:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+spec:
+ ports:
+ - name: "9103"
+ port: 9103
+ nodePort: 30103
+ - name: "25826"
+ port: 25826
+ protocol: UDP
+ nodePort: 30826
+ selector:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+ type: NodePort
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-datasource-config.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-datasource-config.yaml
new file mode 100644
index 00000000..e2b8c9fa
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-datasource-config.yaml
@@ -0,0 +1,35 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-datasources
+ namespace: monitoring
+data:
+ prometheus.yaml: |-
+ {
+ "apiVersion": 1,
+ "datasources": [
+ {
+ "access":"proxy",
+ "editable": true,
+ "name": "prometheus",
+ "orgId": 1,
+ "type": "prometheus",
+ "url": "http://prometheus-main:9090",
+ "version": 1
+ }
+ ]
+ }
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-deployment.yaml
new file mode 100644
index 00000000..afb00948
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-deployment.yaml
@@ -0,0 +1,68 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ labels:
+ adi10hero.monitoring: grafana
+ app: grafana
+ name: grafana
+ namespace: monitoring
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ adi10hero.monitoring: grafana
+ app: grafana
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ name: grafana
+ labels:
+ adi10hero.monitoring: grafana
+ app: grafana
+ spec:
+ containers:
+ - name: grafana
+ image: grafana/grafana
+ ports:
+ - containerPort: 3000
+ env:
+ - name: GF_SECURITY_ADMIN_PASSWORD
+ value: admin
+ - name: GF_SECURITY_ADMIN_USER
+ value: admin
+ - name: GF_SERVER_DOMAIN
+ value: 10.10.120.20
+ - name: GF_SERVER_ROOT_URL
+ value: "%(protocol)s://%(domain)s:/metrics"
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - mountPath: /var/lib/grafana
+ name: grafana-storage
+ - mountPath: /etc/grafana/provisioning/datasources
+ name: grafana-datasources
+ readOnly: false
+ restartPolicy: Always
+ volumes:
+ - name: grafana-storage
+ persistentVolumeClaim:
+ claimName: grafana-pvc
+ - name: grafana-datasources
+ configMap:
+ defaultMode: 420
+ name: grafana-datasources
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml
new file mode 100644
index 00000000..06bcc31b
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml
@@ -0,0 +1,31 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: grafana-pv
+ namespace: monitoring
+ labels:
+ app: grafana-pv
+ adi10hero.monitoring: grafana-pv
+spec:
+ storageClassName: monitoring
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteMany
+ nfs:
+ server: 10.10.120.211
+ path: "/usr/share/monitoring_data/grafana"
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pvc.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pvc.yaml
new file mode 100644
index 00000000..2c2955c8
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pvc.yaml
@@ -0,0 +1,33 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: grafana-pvc
+ namespace: monitoring
+ labels:
+ app: grafana-pvc
+ adi10hero.monitoring: grafana-pvc
+spec:
+ accessModes:
+ - ReadWriteMany
+ storageClassName: monitoring
+ resources:
+ requests:
+ storage: 4Gi
+ selector:
+ matchLabels:
+ app: grafana-pv
+ adi10hero.monitoring: grafana-pv
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-service.yaml
new file mode 100644
index 00000000..d1c9c9cc
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-service.yaml
@@ -0,0 +1,36 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: grafana
+ namespace: monitoring
+ labels:
+ app: grafana
+ adi10hero.monitoring: grafana
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: '3000'
+spec:
+ selector:
+ app: grafana
+ adi10hero.monitoring: grafana
+ type: NodePort
+ ports:
+ - name: "3000"
+ port: 3000
+ targetPort: 3000
+ nodePort: 30000
+
diff --git a/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml
new file mode 100644
index 00000000..af3c5469
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml
@@ -0,0 +1,36 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: kube-state-metrics
+ namespace: kube-system
+spec:
+ selector:
+ matchLabels:
+ app: kube-state-metrics
+ replicas: 1
+ template:
+ metadata:
+ labels:
+ app: kube-state-metrics
+ spec:
+ #serviceAccountName: prometheus
+ containers:
+ - name: kube-state-metrics
+ image: quay.io/coreos/kube-state-metrics:v1.2.0
+ ports:
+ - containerPort: 8080
+ name: monitoring
diff --git a/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml
new file mode 100644
index 00000000..8d294391
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml
@@ -0,0 +1,26 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kind: Service
+apiVersion: v1
+metadata:
+ name: kube-state-metrics
+ namespace: kube-system
+spec:
+ selector:
+ app: kube-state-metrics
+ ports:
+ - protocol: TCP
+ port: 8080
+ targetPort: 8080
diff --git a/tools/lma/ansible-server/roles/monitoring/files/monitoring-namespace.yaml b/tools/lma/ansible-server/roles/monitoring/files/monitoring-namespace.yaml
new file mode 100644
index 00000000..f1c9b889
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/monitoring-namespace.yaml
@@ -0,0 +1,18 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: monitoring
diff --git a/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-daemonset.yaml b/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-daemonset.yaml
new file mode 100644
index 00000000..9334b2f4
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-daemonset.yaml
@@ -0,0 +1,80 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ name: node-exporter-daemonset
+ namespace: monitoring
+ labels:
+ app: node-exporter
+ adi10hero.monitoring: node-exporter
+spec:
+ selector:
+ matchLabels:
+ app: node-exporter
+ adi10hero.monitoring: node-exporter
+ template:
+ metadata:
+ labels:
+ app: node-exporter
+ adi10hero.monitoring: node-exporter
+ annotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/port: "9100"
+ spec:
+ hostPID: true
+ hostIPC: true
+ hostNetwork: true
+ containers:
+ - ports:
+ - containerPort: 9100
+ protocol: TCP
+ resources:
+ requests:
+ cpu: 0.15
+ securityContext:
+ runAsUser: 0
+ privileged: true
+ image: prom/node-exporter:v0.15.2
+ args:
+ - --path.procfs
+ - /host/proc
+ - --path.sysfs
+ - /host/sys
+ - --collector.filesystem.ignored-mount-points
+ - '"^/(sys|proc|dev|host|etc)($|/)"'
+ name: node-exporter
+ volumeMounts:
+ - name: dev
+ mountPath: /host/dev
+ - name: proc
+ mountPath: /host/proc
+ - name: sys
+ mountPath: /host/sys
+ - name: rootfs
+ mountPath: /rootfs
+ volumes:
+ - name: proc
+ hostPath:
+ path: /proc
+ - name: dev
+ hostPath:
+ path: /dev
+ - name: sys
+ hostPath:
+ path: /sys
+ - name: rootfs
+ hostPath:
+ path: /
diff --git a/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-service.yaml
new file mode 100644
index 00000000..dd0aea4d
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-service.yaml
@@ -0,0 +1,33 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: node-exporter
+ app: node-exporter
+ name: node-exporter
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/port: "9100"
+spec:
+ ports:
+ - name: "node-exporter"
+ port: 9100
+ targetPort: 9100
+ selector:
+ adi10hero.monitoring: node-exporter
+ app: node-exporter
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/main-prometheus-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/main-prometheus-service.yaml
new file mode 100644
index 00000000..58b220a8
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/main-prometheus-service.yaml
@@ -0,0 +1,35 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: prometheus-main
+ app: prometheus-main
+ name: prometheus-main
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: '9090'
+spec:
+ type: NodePort
+ ports:
+ - name: prometheus-main
+ protocol: TCP
+ port: 9090
+ nodePort: 30902
+ selector:
+ adi10hero.monitoring: prometheus1
+ app: prometheus
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml
new file mode 100644
index 00000000..917f978f
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml
@@ -0,0 +1,609 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: prometheus-config
+ namespace: monitoring
+data:
+ alert.rules: |-
+ groups:
+ - name: targets
+ rules:
+ - alert: MonitorServiceDown
+ expr: up == 0
+ for: 30s
+ labels:
+ severity: critical
+ annotations:
+ summary: "Monitor service non-operational"
+ description: "Service {{ $labels.instance }} is down."
+ - alert: HighCpuLoad
+ expr: node_load1 > 1.9
+ for: 15s
+ labels:
+ severity: critical
+ annotations:
+ summary: "Service under high load"
+ description: "Docker host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
+
+ - name: host and hardware
+ rules:
+ - alert: HostHighCpuLoad
+ expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host high CPU load (instance {{ $labels.instance }})"
+ description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostSwapIsFillingUp
+ expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host swap is filling up (instance {{ $labels.instance }})"
+ description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HighMemoryLoad
+ expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100 > 85
+ for: 30s
+ labels:
+ severity: warning
+ annotations:
+ summary: "Server memory is almost full"
+ description: "Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
+
+ - alert: HighStorageLoad
+ expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
+ for: 30s
+ labels:
+ severity: warning
+ annotations:
+ summary: "Server storage is almost full"
+ description: "Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
+
+ - alert: HostNetworkTransmitErrors
+ expr: increase(node_network_transmit_errs_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host Network Transmit Errors (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last five minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostOutOfMemory
+ expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host out of memory (instance {{ $labels.instance }})"
+ description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostMemoryUnderMemoryPressure
+ expr: rate(node_vmstat_pgmajfault[1m]) > 1000
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host memory under memory pressure (instance {{ $labels.instance }})"
+ description: "The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostUnusualNetworkThroughputIn
+ expr: sum by (instance) (irate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host unusual network throughput in (instance {{ $labels.instance }})"
+ description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostUnusualNetworkThroughputOut
+ expr: sum by (instance) (irate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host unusual network throughput out (instance {{ $labels.instance }})"
+ description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostUnusualDiskRateRead
+ expr: sum by (instance) (irate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host unusual disk read rate (instance {{ $labels.instance }})"
+ description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostUnusualDiskRateWrite
+ expr: sum by (instance) (irate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host unusual disk write rate (instance {{ $labels.instance }})"
+ description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostOutOfDiskSpace
+ expr: (node_filesystem_avail_bytes{mountpoint="/rootfs"} * 100) / node_filesystem_size_bytes{mountpoint="/rootfs"} < 10
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host out of disk space (instance {{ $labels.instance }})"
+ description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostDiskWillFillIn4Hours
+ expr: predict_linear(node_filesystem_free_bytes{fstype!~"tmpfs"}[1h], 4 * 3600) < 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host disk will fill in 4 hours (instance {{ $labels.instance }})"
+ description: "Disk will fill in 4 hours at current write rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostPhysicalComponentTooHot
+ expr: node_hwmon_temp_celsius > 75
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host physical component too hot (instance {{ $labels.instance }})"
+ description: "Physical hardware component too hot\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostNodeOvertemperatureAlarm
+ expr: node_hwmon_temp_alarm == 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Host node overtemperature alarm (instance {{ $labels.instance }})"
+ description: "Physical node temperature alarm triggered\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostKernelVersionDeviations
+ expr: count(sum(label_replace(node_uname_info, "kernel", "$1", "release", "([0-9]+.[0-9]+.[0-9]+).*")) by (kernel)) > 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host kernel version deviations (instance {{ $labels.instance }})"
+ description: "Different kernel versions are running\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostOomKillDetected
+ expr: increase(node_vmstat_oom_kill[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host OOM kill detected (instance {{ $labels.instance }})"
+ description: "OOM kill detected\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostEdacCorrectableErrorsDetected
+ expr: increase(node_edac_correctable_errors_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: info
+ annotations:
+ summary: "Host EDAC Correctable Errors detected (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostEdacUncorrectableErrorsDetected
+ expr: node_edac_uncorrectable_errors_total > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostNetworkReceiveErrors
+ expr: increase(node_network_receive_errs_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host Network Receive Errors (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last five minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostNetworkTransmitErrors
+ expr: increase(node_network_transmit_errs_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host Network Transmit Errors (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last five minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - name: container
+ rules:
+ - alert: ContainerKilled
+ expr: time() - container_last_seen > 60
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container killed (instance {{ $labels.instance }})"
+ description: "A container has disappeared\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerCpuUsage
+ expr: sum by(instance, name) (rate(container_cpu_usage_seconds_total[3m]) * 100 > 80)
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container CPU usage (instance {{ $labels.instance }})"
+ description: "Container CPU usage is above 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerMemoryUsage
+ expr: (sum(container_memory_usage_bytes) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) > 125
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container Memory usage (instance {{ $labels.instance }})"
+ description: "Container Memory usage is above 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerVolumeUsage
+ expr: (1 - (sum(container_fs_inodes_free) BY (instance) / sum(container_fs_inodes_total) BY (instance)) * 100) > 80
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container Volume usage (instance {{ $labels.instance }})"
+ description: "Container Volume usage is above 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerVolumeIoUsage
+ expr: (sum(container_fs_io_current) BY (instance, name) * 100) > 80
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container Volume IO usage (instance {{ $labels.instance }})"
+ description: "Container Volume IO usage is above 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerHighThrottleRate
+ expr: rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container high throttle rate (instance {{ $labels.instance }})"
+ description: "Container is being throttled\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - name: kubernetes
+ rules:
+ - alert: KubernetesNodeReady
+ expr: kube_node_status_condition{condition="Ready",status="true"} == 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes Node ready (instance {{ $labels.instance }})"
+ description: "Node {{ $labels.node }} has been unready for a long time\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesMemoryPressure
+ expr: kube_node_status_condition{condition="MemoryPressure",status="true"} == 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes memory pressure (instance {{ $labels.instance }})"
+ description: "{{ $labels.node }} has MemoryPressure condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDiskPressure
+ expr: kube_node_status_condition{condition="DiskPressure",status="true"} == 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes disk pressure (instance {{ $labels.instance }})"
+ description: "{{ $labels.node }} has DiskPressure condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesOutOfDisk
+ expr: kube_node_status_condition{condition="OutOfDisk",status="true"} == 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes out of disk (instance {{ $labels.instance }})"
+ description: "{{ $labels.node }} has OutOfDisk condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesJobFailed
+ expr: kube_job_status_failed > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes Job failed (instance {{ $labels.instance }})"
+ description: "Job {{$labels.namespace}}/{{$labels.exported_job}} failed to complete\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesCronjobSuspended
+ expr: kube_cronjob_spec_suspend != 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes CronJob suspended (instance {{ $labels.instance }})"
+ description: "CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is suspended\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesPersistentvolumeclaimPending
+ expr: kube_persistentvolumeclaim_status_phase{phase="Pending"} == 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes PersistentVolumeClaim pending (instance {{ $labels.instance }})"
+ description: "PersistentVolumeClaim {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is pending\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesVolumeOutOfDiskSpace
+ expr: kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes * 100 < 10
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes Volume out of disk space (instance {{ $labels.instance }})"
+ description: "Volume is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesVolumeFullInFourDays
+ expr: predict_linear(kubelet_volume_stats_available_bytes[6h], 4 * 24 * 3600) < 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes Volume full in four days (instance {{ $labels.instance }})"
+ description: "{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is expected to fill up within four days. Currently {{ $value | humanize }}% is available.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesPersistentvolumeError
+ expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes PersistentVolume error (instance {{ $labels.instance }})"
+ description: "Persistent volume is in bad state\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesStatefulsetDown
+ expr: (kube_statefulset_status_replicas_ready / kube_statefulset_status_replicas_current) != 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes StatefulSet down (instance {{ $labels.instance }})"
+ description: "A StatefulSet went down\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesHpaScalingAbility
+ expr: kube_hpa_status_condition{condition="false", status="AbleToScale"} == 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes HPA scaling ability (instance {{ $labels.instance }})"
+ description: "Pod is unable to scale\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesHpaMetricAvailability
+ expr: kube_hpa_status_condition{condition="false", status="ScalingActive"} == 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes HPA metric availability (instance {{ $labels.instance }})"
+ description: "HPA is not able to colelct metrics\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesHpaScaleCapability
+ expr: kube_hpa_status_desired_replicas >= kube_hpa_spec_max_replicas
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes HPA scale capability (instance {{ $labels.instance }})"
+ description: "The maximum number of desired Pods has been hit\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesPodNotHealthy
+ expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:]) > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes Pod not healthy (instance {{ $labels.instance }})"
+ description: "Pod has been in a non-ready state for longer than an hour.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesPodCrashLooping
+ expr: rate(kube_pod_container_status_restarts_total[15m]) * 60 * 5 > 5
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes pod crash looping (instance {{ $labels.instance }})"
+ description: "Pod {{ $labels.pod }} is crash looping\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesReplicassetMismatch
+ expr: kube_replicaset_spec_replicas != kube_replicaset_status_ready_replicas
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes ReplicasSet mismatch (instance {{ $labels.instance }})"
+ description: "Deployment Replicas mismatch\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDeploymentReplicasMismatch
+ expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes Deployment replicas mismatch (instance {{ $labels.instance }})"
+ description: "Deployment Replicas mismatch\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesStatefulsetReplicasMismatch
+ expr: kube_statefulset_status_replicas_ready != kube_statefulset_status_replicas
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes StatefulSet replicas mismatch (instance {{ $labels.instance }})"
+ description: "A StatefulSet has not matched the expected number of replicas for longer than 15 minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDeploymentGenerationMismatch
+ expr: kube_deployment_status_observed_generation != kube_deployment_metadata_generation
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes Deployment generation mismatch (instance {{ $labels.instance }})"
+ description: "A Deployment has failed but has not been rolled back.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesStatefulsetGenerationMismatch
+ expr: kube_statefulset_status_observed_generation != kube_statefulset_metadata_generation
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes StatefulSet generation mismatch (instance {{ $labels.instance }})"
+ description: "A StatefulSet has failed but has not been rolled back.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesStatefulsetUpdateNotRolledOut
+ expr: max without (revision) (kube_statefulset_status_current_revision unless kube_statefulset_status_update_revision) * (kube_statefulset_replicas != kube_statefulset_status_replicas_updated)
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes StatefulSet update not rolled out (instance {{ $labels.instance }})"
+ description: "StatefulSet update has not been rolled out.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDaemonsetRolloutStuck
+ expr: kube_daemonset_status_number_ready / kube_daemonset_status_desired_number_scheduled * 100 < 100 or kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes DaemonSet rollout stuck (instance {{ $labels.instance }})"
+ description: "Some Pods of DaemonSet are not scheduled or not ready\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDaemonsetMisscheduled
+ expr: kube_daemonset_status_number_misscheduled > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes DaemonSet misscheduled (instance {{ $labels.instance }})"
+ description: "Some DaemonSet Pods are running where they are not supposed to run\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesCronjobTooLong
+ expr: time() - kube_cronjob_next_schedule_time > 3600
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes CronJob too long (instance {{ $labels.instance }})"
+ description: "CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesJobCompletion
+ expr: kube_job_spec_completions - kube_job_status_succeeded > 0 or kube_job_status_failed > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes job completion (instance {{ $labels.instance }})"
+ description: "Kubernetes Job failed to complete\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesApiServerErrors
+ expr: sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[2m])) / sum(rate(apiserver_request_count{job="apiserver"}[2m])) * 100 > 3
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes API server errors (instance {{ $labels.instance }})"
+ description: "Kubernetes API server is experiencing high error rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesApiClientErrors
+ expr: (sum(rate(rest_client_requests_total{code=~"(4|5).."}[2m])) by (instance, job) / sum(rate(rest_client_requests_total[2m])) by (instance, job)) * 100 > 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes API client errors (instance {{ $labels.instance }})"
+ description: "Kubernetes API client is experiencing high error rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesClientCertificateExpiresNextWeek
+ expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 7*24*60*60
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes client certificate expires next week (instance {{ $labels.instance }})"
+ description: "A client certificate used to authenticate to the apiserver is expiring next week.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesClientCertificateExpiresSoon
+ expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 24*60*60
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes client certificate expires soon (instance {{ $labels.instance }})"
+ description: "A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesApiServerLatency
+ expr: histogram_quantile(0.99, sum(apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH|PROXY"}) WITHOUT (instance, resource)) / 1e+06 > 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes API server latency (instance {{ $labels.instance }})"
+ description: "Kubernetes API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+
+ prometheus.yml: |-
+ global:
+ scrape_interval: 15s
+ evaluation_interval: 15s
+
+ rule_files:
+ - "/etc/prometheus/alert.rules"
+
+ scrape_configs:
+ - job_name: 'collectd-exporter'
+ scrape_interval: 5s
+ static_configs:
+ - targets: ['collectd-exporter:9103']
+
+ - job_name: 'cadvisor'
+ scrape_interval: 5s
+ static_configs:
+ - targets: ['cadvisor:8080']
+
+ - job_name: 'node-exporter'
+ scrape_interval: 5s
+ static_configs:
+ - targets: ['node-exporter:9100']
+
+ - job_name: 'prometheus'
+ scrape_interval: 10s
+ static_configs:
+ - targets: ['localhost:9090']
+
+ - job_name: 'kube-state-metrics'
+ scrape_interval: 10s
+ static_configs:
+ - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080']
+
+ alerting:
+ alertmanagers:
+ - scheme: http
+ static_configs:
+ - targets: ['alertmanager:9093', 'alertmanager1:9093']
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-deployment.yaml
new file mode 100644
index 00000000..5b98b154
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-deployment.yaml
@@ -0,0 +1,73 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: prometheus-deployment
+ namespace: monitoring
+ labels:
+ app: prometheus
+ adi10hero.monitoring: prometheus
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ adi10hero.monitoring: prometheus
+ app: prometheus
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ labels:
+ adi10hero.monitoring: prometheus
+ app: prometheus
+ spec:
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm2
+ containers:
+ - name: prometheus
+ image: prom/prometheus
+ args:
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --storage.tsdb.path=/prometheus
+ - --storage.tsdb.retention.size=3GB
+ - --storage.tsdb.retention.time=30d
+ - --web.console.libraries=/etc/prometheus/console_libraries
+ - --web.console.templates=/etc/prometheus/consoles
+ ports:
+ - containerPort: 9090
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - name: prometheus-config-volume
+ mountPath: /etc/prometheus/
+ - name: prometheus-storage-volume
+ mountPath: /prometheus/
+ restartPolicy: Always
+ volumes:
+ - name: prometheus-config-volume
+ configMap:
+ defaultMode: 420
+ name: prometheus-config
+ - name: prometheus-storage-volume
+ persistentVolumeClaim:
+ claimName: prometheus-pvc
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pv.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pv.yaml
new file mode 100644
index 00000000..f10cd073
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pv.yaml
@@ -0,0 +1,30 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: prometheus-pv
+ namespace: monitoring
+ labels:
+ app: prometheus-pv
+ adi10hero.monitoring: prometheus-pv
+spec:
+ storageClassName: monitoring
+ capacity:
+ storage: 6Gi
+ accessModes:
+ - ReadWriteMany
+ hostPath:
+ path: "/usr/share/monitoring_data/prometheus"
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pvc.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pvc.yaml
new file mode 100644
index 00000000..812fcc73
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pvc.yaml
@@ -0,0 +1,33 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: prometheus-pvc
+ namespace: monitoring
+ labels:
+ app: prometheus-pvc
+ adi10hero.monitoring: prometheus-pvc
+spec:
+ accessModes:
+ - ReadWriteMany
+ storageClassName: monitoring
+ resources:
+ requests:
+ storage: 3Gi
+ selector:
+ matchLabels:
+ app: prometheus-pv
+ adi10hero.monitoring: prometheus-pv
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-service.yaml
new file mode 100644
index 00000000..5be76d3e
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-service.yaml
@@ -0,0 +1,34 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: prometheus
+ app: prometheus
+ name: prometheus
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: '9090'
+spec:
+ type: NodePort
+ ports:
+ - name: prometheus
+ protocol: TCP
+ port: 9090
+ nodePort: 30900
+ selector:
+ adi10hero.monitoring: prometheus
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-deployment.yaml
new file mode 100644
index 00000000..149bea84
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-deployment.yaml
@@ -0,0 +1,73 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: prometheus1-deployment
+ namespace: monitoring
+ labels:
+ app: prometheus1
+ adi10hero.monitoring: prometheus1
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ adi10hero.monitoring: prometheus1
+ app: prometheus1
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ labels:
+ adi10hero.monitoring: prometheus1
+ app: prometheus1
+ spec:
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm3
+ containers:
+ - name: prometheus
+ image: prom/prometheus
+ args:
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --storage.tsdb.path=/prometheus
+ - --storage.tsdb.retention.size=3GB
+ - --storage.tsdb.retention.time=30d
+ - --web.console.libraries=/etc/prometheus/console_libraries
+ - --web.console.templates=/etc/prometheus/consoles
+ ports:
+ - containerPort: 9090
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - name: prometheus-config-volume
+ mountPath: /etc/prometheus/
+ - name: prometheus-storage-volume
+ mountPath: /prometheus/
+ restartPolicy: Always
+ volumes:
+ - name: prometheus-config-volume
+ configMap:
+ defaultMode: 420
+ name: prometheus-config
+ - name: prometheus-storage-volume
+ persistentVolumeClaim:
+ claimName: prometheus-pvc
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-service.yaml
new file mode 100644
index 00000000..439deec1
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-service.yaml
@@ -0,0 +1,35 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: prometheus1
+ app: prometheus1
+ name: prometheus1
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: '9090'
+spec:
+ type: NodePort
+ ports:
+ - name: prometheus1
+ protocol: TCP
+ port: 9090
+ nodePort: 30901
+ selector:
+ adi10hero.monitoring: prometheus1
+ app: prometheus1
diff --git a/tools/lma/ansible-server/roles/monitoring/tasks/main.yml b/tools/lma/ansible-server/roles/monitoring/tasks/main.yml
new file mode 100644
index 00000000..cd4e6aca
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/tasks/main.yml
@@ -0,0 +1,273 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+#PAG setup in k8s cluster
+
+#***********************************************************************************************************
+#copy all yaml to /tmp/files/
+#***********************************************************************************************************
+- name: copy all yaml to /tmp/files/
+ copy:
+ src: ../files/
+ dest: /tmp/files/
+
+#***********************************************************************************************************
+#Creating Namespace
+#***********************************************************************************************************
+- name: Creating Monitoring Namespace
+ k8s:
+ state: present
+ src: /tmp/files/monitoring-namespace.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#creating Persistent Volume
+#***********************************************************************************************************
+- name: creating Persistent Volume for Prometheus
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-pv.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#creating Persistent Volume
+#***********************************************************************************************************
+- name: creating Persistent Volume for Grafana
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-pv.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#creating Persistent Volume Claim
+#***********************************************************************************************************
+- name: creating Persistent Volume Claim for Prometheus
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-pvc.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#creating Persistent Volume Claim
+#***********************************************************************************************************
+- name: creating Persistent Volume Claim for Grafana
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-pvc.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the CAdvisor deamonset
+#***********************************************************************************************************
+- name: Creating cAdvisor deamonset
+ k8s:
+ state: present
+ src: /tmp/files/cadvisor/cadvisor-deamonset.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Starting the CAdvisor service
+#***********************************************************************************************************
+- name: Starting cAdvisor service
+ k8s:
+ state: present
+ src: /tmp/files/cadvisor/cadvisor-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Deploying and Starting the kube-system-metrics service
+#***********************************************************************************************************
+- name: Deploying kube-system-metrics
+ k8s:
+ state: present
+ src: /tmp/files/kube-state-metrics/kube-state-metrics-deployment.yaml
+ namespace: kube-system
+
+- name: Starting kube-system-metrics service
+ k8s:
+ state: present
+ src: /tmp/files/kube-state-metrics/kube-state-metrics-service.yaml
+ namespace: kube-system
+
+#***********************************************************************************************************
+#Making the NodeExporter deamonset
+#***********************************************************************************************************
+- name: Creating NodeExporter deamonset
+ k8s:
+ state: present
+ src: /tmp/files/node-exporter/nodeexporter-daemonset.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Starting the NodeExporter service
+#***********************************************************************************************************
+- name: Starting NodeExporter service
+ k8s:
+ state: present
+ src: /tmp/files/node-exporter/nodeexporter-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the collectd-exporter deployment
+#***********************************************************************************************************
+- name: Creating collectd-exporter deamonset
+ k8s:
+ state: present
+ src: /tmp/files/collectd-exporter/collectd-exporter-deployment.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the collectd-exporter service
+#***********************************************************************************************************
+- name: Creating collectd-exporter service
+ k8s:
+ state: present
+ src: /tmp/files/collectd-exporter/collectd-exporter-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Webhook goes here
+#***********************************************************************************************************
+
+#***********************************************************************************************************
+#Making the config file for Alertmanagers
+#***********************************************************************************************************
+- name: Creating config map for Alertmanagers
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager-config.yaml
+ namespace: monitoring
+
+# - name: Creating config map for Alertmanagers
+# k8s:
+# state: present
+# src: /tmp/files/alertmanager1-config.yaml
+# namespace: monitoring
+
+#***********************************************************************************************************
+#Making the 1st alertmanager deployment
+#***********************************************************************************************************
+- name: Creating 1st alertmanager deployment
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager-deployment.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the 1st alertmanager service
+#***********************************************************************************************************
+- name: Creating 1st alertmanager service
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the 2nd alertmanager deployment
+#***********************************************************************************************************
+- name: Creating 2nd alertmanager deployment
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager1-deployment.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the 2nd alertmanager service
+#***********************************************************************************************************
+- name: Creating 2nd alertmanager service
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager1-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the config file for Prometheus
+#***********************************************************************************************************
+- name: Creating 1st Prometheus Config
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-config.yaml
+ namespace: monitoring
+
+# - name: Creating 2nd Prometheus Config
+# k8s:
+# state: present
+# src: /tmp/files/prometheus1-config.yaml
+# namespace: monitoring
+
+#***********************************************************************************************************
+#Starting Prometheus
+#***********************************************************************************************************
+- name: Starting Prometheus 1
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-deployment.yaml
+ namespace: monitoring
+
+- name: Starting Prometheus 2
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus1-deployment.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Starting Prometheus Service
+#***********************************************************************************************************
+- name: Starting Prometheus 1 Service
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-service.yaml
+ namespace: monitoring
+
+- name: Starting Prometheus 2 Service
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus1-service.yaml
+ namespace: monitoring
+
+- name: Starting Main Prometheus Service
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/main-prometheus-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Starting Grafana
+#***********************************************************************************************************
+- name: Creating Grafana Datasource Config
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-datasource-config.yaml
+ namespace: monitoring
+
+- name: Starting Grafana
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-deployment.yaml
+ namespace: monitoring
+
+- name: Starting Grafana Service
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#removing /tmp/files
+#***********************************************************************************************************
+- name: Removing /tmp/files
+ file:
+ path: "/tmp/files"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/nfs/tasks/main.yml b/tools/lma/ansible-server/roles/nfs/tasks/main.yml
new file mode 100644
index 00000000..2380ea74
--- /dev/null
+++ b/tools/lma/ansible-server/roles/nfs/tasks/main.yml
@@ -0,0 +1,42 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#create Dir /srv/nfs
+- name: Create Directory for elasticserch
+ file: path="/srv/nfs/{{item}}" state=directory
+ with_items:
+ - ['data', 'master']
+
+- name: Create Directory for grafana
+ file: path="/usr/share/monitoring_data/grafana" state=directory
+
+#installing NFS
+- name: Installing NFS server utils
+ yum:
+ name: nfs-utils
+ state: present
+
+#update /etc/export file
+- name: Edit /etc/export file for NFS
+ lineinfile: path=/etc/exports line="{{item.line}}"
+ with_items:
+ - {line: "/srv/nfs/master *(rw,sync,no_root_squash,no_subtree_check)"}
+ - {line: "/srv/nfs/data *(rw,sync,no_root_squash,no_subtree_check)"}
+ - {line: "/usr/share/monitoring_data/grafana *(rw,sync,no_root_squash,no_subtree_check)"}
+
+#starting NFS service
+- name: 'starting NFS service'
+ service:
+ name: nfs
+ state: restarted
diff --git a/tools/lma/logs/dockerfile/elastalert/Dockerfile b/tools/lma/logs/dockerfile/elastalert/Dockerfile
new file mode 100644
index 00000000..3304ad17
--- /dev/null
+++ b/tools/lma/logs/dockerfile/elastalert/Dockerfile
@@ -0,0 +1,23 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM python:alpine
+RUN apk --update upgrade && \
+ apk add gcc libffi-dev musl-dev python3-dev openssl-dev tzdata libmagic && \
+ rm -rf /var/cache/apk/*
+RUN pip install elastalert &&\
+ apk del gcc libffi-dev musl-dev python3-dev openssl-dev
+RUN mkdir -p /opt/elastalert && \
+ mkdir -p /opt/elastalert/rules &&\
+WORKDIR /opt/elastalert \ No newline at end of file
diff --git a/tools/lma/logs/dockerfile/fluentd/Dockerfile b/tools/lma/logs/dockerfile/fluentd/Dockerfile
new file mode 100644
index 00000000..19dea0f8
--- /dev/null
+++ b/tools/lma/logs/dockerfile/fluentd/Dockerfile
@@ -0,0 +1,23 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM fluent/fluentd:v1.11.0-debian-1.0
+USER root
+RUN gem sources --add https://rubygems.org/
+RUN apt-get update \
+ && gem install fluent-plugin-elasticsearch \
+ && gem install elasticsearch-xpack\
+ && gem install fluent-plugin-rewrite-tag-filter\
+ && gem install fluent-plugin-dio
+USER fluent \ No newline at end of file
diff --git a/tools/lma/logs/jupyter-notebooks/Trend-Analysis.ipynb b/tools/lma/logs/jupyter-notebooks/Trend-Analysis.ipynb
new file mode 100644
index 00000000..1bc770a1
--- /dev/null
+++ b/tools/lma/logs/jupyter-notebooks/Trend-Analysis.ipynb
@@ -0,0 +1,308 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Trend Analysis\n",
+ "##### Contributor:\n",
+ "\n",
+ "- Adarsh Yadav <adiyadav0509@gmail.com> "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "import matplotlib.dates as mdates\n",
+ "import numpy as np\n",
+ "import io \n",
+ "\n",
+ "from elasticsearch import Elasticsearch\n",
+ "from elasticsearch_dsl import Search\n",
+ "from elasticsearch.connection import create_ssl_context\n",
+ "import csv\n",
+ "import ssl\n",
+ "import urllib3\n",
+ "import os"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Enter foldername and index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Give folder name\n",
+ "# foldername = \"results_2020-06-12_06-47-56\"\n",
+ "foldername = \"result-test1\"\n",
+ "#Give index name - \"node1*\" or \"node4*\"\n",
+ "index = \"node4*\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ssl_context = create_ssl_context()\n",
+ "ssl_context.check_hostname = False\n",
+ "ssl_context.verify_mode = ssl.CERT_NONE\n",
+ "urllib3.disable_warnings()\n",
+ "client = Elasticsearch(['https://elasticsearch:password123@10.10.120.211:31111'],verify_certs=False,ssl_context=ssl_context)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Trex"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filename = \"/tmp/\"+foldername+\"/trex-liveresults-counts.dat\"\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"ts\").query(\"match_phrase\", log_path=filename)\n",
+ "\n",
+ "trex = pd.DataFrame()\n",
+ "trex_data = dict()\n",
+ "for hits in s.scan():\n",
+ " trex_data['ts'] = hits.ts\n",
+ " trex_data['rx_pkts'] = hits.rx_pkts\n",
+ " trex_data['rx_port'] = hits.rx_port\n",
+ " trex_data['tx_port'] = hits.tx_port\n",
+ " trex = trex.append(trex_data, ignore_index=True)\n",
+ "if not trex.empty:\n",
+ " #convert 'ts' to datetime\n",
+ " trex['ts'] = pd.to_datetime(trex['ts'],unit='s')\n",
+ " trex_grp = trex.groupby('rx_port')\n",
+ " trex_rx_0 = trex_grp.get_group(0.0) \n",
+ " trex_rx_1 = trex_grp.get_group(1.0) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if not trex.empty:\n",
+ " fig, ax = plt.subplots(2,figsize=(16, 10))\n",
+ " ax[0].plot(trex_rx_0['ts'],\n",
+ " trex_rx_0['rx_pkts'],\n",
+ " 'tab:orange')\n",
+ " ax[0].title.set_text(\"At rx_port=0 & tx_port=1\")\n",
+ " ax[0].set(xlabel=\"timestamp\")\n",
+ " ax[0].set(ylabel=\"rx_pkts\")\n",
+ "\n",
+ " ax[1].plot(trex_rx_1['ts'],\n",
+ " trex_rx_1['rx_pkts'],\n",
+ " 'tab:green')\n",
+ " ax[1].title.set_text(\"At rx_port=1 & tx_port=0\")\n",
+ " ax[1].set(xlabel=\"timestamp\")\n",
+ " ax[1].set(ylabel=\"rx_pkts\")\n",
+ "\n",
+ " #change date format\n",
+ " myFmt = mdates.DateFormatter('%Y-%m-%d %H:%M:%S')\n",
+ " for i in range(2):\n",
+ " ax[i].xaxis.set_major_formatter(myFmt) \n",
+ " plt.show()\n",
+ "else:\n",
+ " print(\"No data Found\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Spirent"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filename = \"/tmp/\"+foldername+\"/stc-liveresults.dat.rx\"\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"ts\").query(\"match_phrase\", log_path=filename)\n",
+ "\n",
+ "spirent = pd.DataFrame()\n",
+ "spirent_data = dict()\n",
+ "for hits in s.scan():\n",
+ " spirent_data['ts'] = hits.ts\n",
+ " spirent_data['RxPrt'] = hits.RxPrt\n",
+ " spirent_data['FrCnt'] = hits.FrCnt\n",
+ " spirent = spirent.append(spirent_data, ignore_index=True)\n",
+ "if not spirent.empty:\n",
+ " #convert 'ts' to datetime\n",
+ " spirent['ts'] = pd.to_datetime(spirent['ts'],unit='s')\n",
+ " spirent_grp = spirent.groupby('RxPrt')\n",
+ " spirent_rx_1 = spirent_grp.get_group('Port //1/1') \n",
+ " spirent_rx_2 = spirent_grp.get_group('Port //1/2') "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if not spirent.empty:\n",
+ " fig, ax = plt.subplots(2,figsize=(16, 10))\n",
+ " ax[0].plot(spirent_rx_1['ts'],\n",
+ " spirent_rx_1['FrCnt'],\n",
+ " 'tab:orange')\n",
+ " ax[0].title.set_text(\"At RxPrt=//1/1\")\n",
+ " ax[0].set(xlabel=\"timestamp\")\n",
+ " ax[0].set(ylabel=\"FrCnt\")\n",
+ "\n",
+ " ax[1].plot(spirent_rx_2['ts'],\n",
+ " spirent_rx_2['FrCnt'],\n",
+ " 'tab:green')\n",
+ " ax[1].title.set_text(\"At RxPrt=//1/2\")\n",
+ " ax[1].set(xlabel=\"timestamp\")\n",
+ " ax[1].set(ylabel=\"FrCnt\")\n",
+ "\n",
+ " #change date format\n",
+ " myFmt = mdates.DateFormatter('%Y-%m-%d %H:%M:%S')\n",
+ " for i in range(2):\n",
+ " ax[i].xaxis.set_major_formatter(myFmt) \n",
+ " plt.show()\n",
+ "else:\n",
+ " print(\"No data Found\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Ixia"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filename = \"/tmp/\"+foldername+\"/Traffic Item Statistics.csv\"\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"msg\").query(\"match_phrase\", log_path=filename)\n",
+ "\n",
+ "for hits in s.scan():\n",
+ " with open('./ixia-traffic.csv', 'a+') as f:\n",
+ " f.write(hits.msg+\"\\n\")\n",
+ " \n",
+ "ixia = pd.DataFrame()\n",
+ "if os.path.exists('./ixia-traffic.csv'):\n",
+ " ixia = pd.read_csv('./ixia-traffic.csv')\n",
+ " os.remove(f.name)\n",
+ " f.close()\n",
+ "if not ixia.empty:\n",
+ " ixia = ixia[['~ElapsedTime','Traffic Item 1:Frames Delta','Traffic Item 1:Loss %']].astype(float)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if not ixia.empty:\n",
+ " fig, ax = plt.subplots(2,figsize=(16, 10))\n",
+ " ax[0].plot(ixia['~ElapsedTime'],\n",
+ " ixia['Traffic Item 1:Frames Delta'],\n",
+ " 'tab:orange')\n",
+ " ax[0].set(xlabel=\"Elapsed Time\")\n",
+ " ax[0].set(ylabel=\"Frames Delta\")\n",
+ "\n",
+ " ax[1].plot(ixia['~ElapsedTime'],\n",
+ " ixia['Traffic Item 1:Loss %'],\n",
+ " 'tab:green')\n",
+ " ax[1].set(xlabel=\"Elapsed Time\")\n",
+ " ax[1].set(ylabel=\"Loss %\")\n",
+ "\n",
+ " plt.show()\n",
+ "else:\n",
+ " print(\"No data Found\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Time Analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filename = \"/tmp/\"+foldername+\"/\"\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"setup_duration\").query(\"match_phrase\", log_path=filename)\n",
+ "for hits in s.scan():\n",
+ " print(\"Setup duration: \", hits.setup_duration,\"s\")\n",
+ "\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"iteration_duration\").query(\"match_phrase\", log_path=filename)\n",
+ "for hits in s.scan():\n",
+ " print(\"Iteration duration: \", hits.iteration_duration,\"s\")\n",
+ "\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"traffic_duration\").query(\"match_phrase\", log_path=filename)\n",
+ "for hits in s.scan():\n",
+ " print(\"Traffic duration: \", hits.traffic_duration,\"s\")\n",
+ "\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"test_duration\").query(\"match_phrase\", log_path=filename)\n",
+ "for hits in s.scan():\n",
+ " print(\"Test duration: \", hits.test_duration,\"s\")\n",
+ "\n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"report_duration\").query(\"match_phrase\", log_path=filename)\n",
+ "for hits in s.scan():\n",
+ " print(\"Report duration: \", hits.report_duration,\"s\")\n",
+ " \n",
+ "s = Search(index=index).using(client).query(\"exists\", field=\"vswitch_duration\").query(\"match_phrase\", log_path=filename)\n",
+ "for hits in s.scan():\n",
+ " print(\"Vswitch starting duration: \", hits.vswitch_duration,\"s\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tools/lma/metrics/dashboard/cpu_usage_using.json b/tools/lma/metrics/dashboard/cpu_usage_using.json
new file mode 100644
index 00000000..85f7f122
--- /dev/null
+++ b/tools/lma/metrics/dashboard/cpu_usage_using.json
@@ -0,0 +1,750 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "limit": 100,
+ "name": "Monitoring",
+ "showIn": 0,
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": 4,
+ "iteration": 1596637894836,
+ "links": [],
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "hiddenSeries": false,
+ "id": 3,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.1",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "collectd_cpu_percent{exported_instance='$host'}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU Usage",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 7
+ },
+ "hiddenSeries": false,
+ "id": 4,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.1",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "collectd_cpu_percent{cpu='$core', exported_instance='$host'}",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU utilization per core",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 14
+ },
+ "hiddenSeries": false,
+ "id": 5,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.1",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "collectd_cpu_percent{cpu='$core',exported_instance='$host'}",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU Usage per core",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "10s",
+ "schemaVersion": 26,
+ "style": "dark",
+ "tags": [
+ "monitoring"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": true,
+ "text": "prometheus",
+ "value": "prometheus"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "datasource",
+ "options": [],
+ "query": "prometheus",
+ "queryValue": "",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": false,
+ "text": "pod12-node4",
+ "value": "pod12-node4"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "host",
+ "options": [
+ {
+ "selected": true,
+ "text": "pod12-node4",
+ "value": "pod12-node4"
+ }
+ ],
+ "query": "pod12-node4,",
+ "queryValue": "",
+ "skipUrlSync": false,
+ "type": "custom"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": true,
+ "text": "0",
+ "value": "0"
+ },
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "core",
+ "options": [
+ {
+ "selected": false,
+ "text": "All",
+ "value": "$__all"
+ },
+ {
+ "selected": true,
+ "text": "0",
+ "value": "0"
+ },
+ {
+ "selected": false,
+ "text": "1",
+ "value": "1"
+ },
+ {
+ "selected": false,
+ "text": "2",
+ "value": "2"
+ },
+ {
+ "selected": false,
+ "text": "3",
+ "value": "3"
+ },
+ {
+ "selected": false,
+ "text": "4",
+ "value": "4"
+ },
+ {
+ "selected": false,
+ "text": "5",
+ "value": "5"
+ },
+ {
+ "selected": false,
+ "text": "6",
+ "value": "6"
+ },
+ {
+ "selected": false,
+ "text": "7",
+ "value": "7"
+ },
+ {
+ "selected": false,
+ "text": "8",
+ "value": "8"
+ },
+ {
+ "selected": false,
+ "text": "9",
+ "value": "9"
+ },
+ {
+ "selected": false,
+ "text": "10",
+ "value": "10"
+ },
+ {
+ "selected": false,
+ "text": "11",
+ "value": "11"
+ },
+ {
+ "selected": false,
+ "text": "12",
+ "value": "12"
+ },
+ {
+ "selected": false,
+ "text": "13",
+ "value": "13"
+ },
+ {
+ "selected": false,
+ "text": "14",
+ "value": "14"
+ },
+ {
+ "selected": false,
+ "text": "15",
+ "value": "15"
+ },
+ {
+ "selected": false,
+ "text": "16",
+ "value": "16"
+ },
+ {
+ "selected": false,
+ "text": "17",
+ "value": "17"
+ },
+ {
+ "selected": false,
+ "text": "18",
+ "value": "18"
+ },
+ {
+ "selected": false,
+ "text": "19",
+ "value": "19"
+ },
+ {
+ "selected": false,
+ "text": "20",
+ "value": "20"
+ },
+ {
+ "selected": false,
+ "text": "21",
+ "value": "21"
+ },
+ {
+ "selected": false,
+ "text": "22",
+ "value": "22"
+ },
+ {
+ "selected": false,
+ "text": "23",
+ "value": "23"
+ },
+ {
+ "selected": false,
+ "text": "24",
+ "value": "24"
+ },
+ {
+ "selected": false,
+ "text": "25",
+ "value": "25"
+ },
+ {
+ "selected": false,
+ "text": "26",
+ "value": "26"
+ },
+ {
+ "selected": false,
+ "text": "27",
+ "value": "27"
+ },
+ {
+ "selected": false,
+ "text": "28",
+ "value": "28"
+ },
+ {
+ "selected": false,
+ "text": "29",
+ "value": "29"
+ },
+ {
+ "selected": false,
+ "text": "30",
+ "value": "30"
+ },
+ {
+ "selected": false,
+ "text": "31",
+ "value": "31"
+ },
+ {
+ "selected": false,
+ "text": "32",
+ "value": "32"
+ },
+ {
+ "selected": false,
+ "text": "33",
+ "value": "33"
+ },
+ {
+ "selected": false,
+ "text": "34",
+ "value": "34"
+ },
+ {
+ "selected": false,
+ "text": "35",
+ "value": "35"
+ },
+ {
+ "selected": false,
+ "text": "36",
+ "value": "36"
+ },
+ {
+ "selected": false,
+ "text": "37",
+ "value": "37"
+ },
+ {
+ "selected": false,
+ "text": "38",
+ "value": "38"
+ },
+ {
+ "selected": false,
+ "text": "39",
+ "value": "39"
+ },
+ {
+ "selected": false,
+ "text": "40",
+ "value": "40"
+ },
+ {
+ "selected": false,
+ "text": "41",
+ "value": "41"
+ },
+ {
+ "selected": false,
+ "text": "42",
+ "value": "42"
+ },
+ {
+ "selected": false,
+ "text": "43",
+ "value": "43"
+ },
+ {
+ "selected": false,
+ "text": "44",
+ "value": "44"
+ },
+ {
+ "selected": false,
+ "text": "45",
+ "value": "45"
+ },
+ {
+ "selected": false,
+ "text": "46",
+ "value": "46"
+ },
+ {
+ "selected": false,
+ "text": "47",
+ "value": "47"
+ },
+ {
+ "selected": false,
+ "text": "48",
+ "value": "48"
+ },
+ {
+ "selected": false,
+ "text": "49",
+ "value": "49"
+ },
+ {
+ "selected": false,
+ "text": "50",
+ "value": "50"
+ },
+ {
+ "selected": false,
+ "text": "51",
+ "value": "51"
+ },
+ {
+ "selected": false,
+ "text": "52",
+ "value": "52"
+ },
+ {
+ "selected": false,
+ "text": "53",
+ "value": "53"
+ },
+ {
+ "selected": false,
+ "text": "54",
+ "value": "54"
+ },
+ {
+ "selected": false,
+ "text": "55",
+ "value": "55"
+ },
+ {
+ "selected": false,
+ "text": "56",
+ "value": "56"
+ },
+ {
+ "selected": false,
+ "text": "57",
+ "value": "57"
+ },
+ {
+ "selected": false,
+ "text": "58",
+ "value": "58"
+ },
+ {
+ "selected": false,
+ "text": "59",
+ "value": "59"
+ },
+ {
+ "selected": false,
+ "text": "60",
+ "value": "60"
+ },
+ {
+ "selected": false,
+ "text": "61",
+ "value": "61"
+ },
+ {
+ "selected": false,
+ "text": "62",
+ "value": "62"
+ },
+ {
+ "selected": false,
+ "text": "63",
+ "value": "63"
+ },
+ {
+ "selected": false,
+ "text": "64",
+ "value": "64"
+ }
+ ],
+ "query": "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64",
+ "queryValue": "",
+ "skipUrlSync": false,
+ "type": "custom"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "CPU Usage",
+ "uid": "XeDwSiSGk",
+ "version": 13
+} \ No newline at end of file
diff --git a/tools/lma/metrics/dashboard/memory_using.json b/tools/lma/metrics/dashboard/memory_using.json
new file mode 100644
index 00000000..3b92d8f5
--- /dev/null
+++ b/tools/lma/metrics/dashboard/memory_using.json
@@ -0,0 +1,337 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "limit": 100,
+ "name": "Monitoring",
+ "showIn": 0,
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": 6,
+ "iteration": 1597616052316,
+ "links": [],
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 15,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "hiddenSeries": false,
+ "id": 1,
+ "interval": "1s",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": false,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.3",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(collectd_memory{exported_instance='$host', memory='$type'}[$range])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Bytes",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 26,
+ "style": "dark",
+ "tags": [
+ "monitoring"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "prometheus",
+ "value": "prometheus"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "datasource",
+ "options": [],
+ "query": "prometheus",
+ "queryValue": "",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": false,
+ "text": "pod12-node4",
+ "value": "pod12-node4"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "host",
+ "options": [
+ {
+ "selected": true,
+ "text": "pod12-node4",
+ "value": "pod12-node4"
+ }
+ ],
+ "query": "pod12-node4,",
+ "queryValue": "",
+ "skipUrlSync": false,
+ "type": "custom"
+ },
+ {
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "selected": false,
+ "text": "30s",
+ "value": "30s"
+ },
+ "hide": 0,
+ "label": null,
+ "name": "range",
+ "options": [
+ {
+ "selected": true,
+ "text": "30s",
+ "value": "30s"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "5m",
+ "value": "5m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "30s,1m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "queryValue": "",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": true,
+ "text": "used",
+ "value": "used"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "type",
+ "options": [
+ {
+ "selected": false,
+ "text": "buffered",
+ "value": "buffered"
+ },
+ {
+ "selected": false,
+ "text": "cached",
+ "value": "cached"
+ },
+ {
+ "selected": false,
+ "text": "free",
+ "value": "free"
+ },
+ {
+ "selected": false,
+ "text": "slab_recl",
+ "value": "slab_recl"
+ },
+ {
+ "selected": false,
+ "text": "slab_unrecl",
+ "value": "slab_unrecl"
+ },
+ {
+ "selected": true,
+ "text": "used",
+ "value": "used"
+ }
+ ],
+ "query": "buffered,cached,free,slab_recl,slab_unrecl,used",
+ "queryValue": "",
+ "skipUrlSync": false,
+ "type": "custom"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Memory",
+ "uid": "kuro-mem",
+ "version": 4
+} \ No newline at end of file
diff --git a/tools/lma/metrics/dashboard/ovs_stats_using.json b/tools/lma/metrics/dashboard/ovs_stats_using.json
new file mode 100644
index 00000000..1e679fbe
--- /dev/null
+++ b/tools/lma/metrics/dashboard/ovs_stats_using.json
@@ -0,0 +1,854 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "limit": 100,
+ "name": "Monitoring",
+ "showIn": 0,
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": 6,
+ "iteration": 1596643135141,
+ "links": [],
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "hiddenSeries": false,
+ "id": 1,
+ "interval": "1s",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.1",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(collectd_ovs_stats_if_rx_octets_total{exported_instance='$host'}[$__interval])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Average RX values",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "hiddenSeries": false,
+ "id": 2,
+ "interval": "1s",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.1",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(collectd_ovs_stats_if_tx_octets_total{exported_instance='$host'}[$__interval])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Average TX values",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 5,
+ "w": 24,
+ "x": 0,
+ "y": 12
+ },
+ "hiddenSeries": false,
+ "id": 3,
+ "interval": "1s",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.1",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(collectd_ovs_stats_if_collisions_total{exported_instance='$host'}[$range])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(collectd_ovs_stats_if_dropped_0_total{exported_instance='$host'}[$range])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "B"
+ },
+ {
+ "expr": "rate(collectd_ovs_stats_if_dropped_1_total{exported_instance='$host'}[$range])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "C"
+ },
+ {
+ "expr": "rate(collectd_ovs_stats_if_errors_0_total{exported_instance='$host'}[$range])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "D"
+ },
+ {
+ "expr": "rate(collectd_ovs_stats_if_errors_1_total{exported_instance='$host'}[$range])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "E"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Average Collisions, Drops and Error values",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 26,
+ "style": "dark",
+ "tags": [
+ "monitoring"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "prometheus",
+ "value": "prometheus"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "datasource",
+ "options": [],
+ "query": "prometheus",
+ "queryValue": "",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": false,
+ "text": "pod12-node4",
+ "value": "pod12-node4"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "host",
+ "options": [
+ {
+ "selected": true,
+ "text": "pod12-node4",
+ "value": "pod12-node4"
+ }
+ ],
+ "query": "pod12-node4,",
+ "queryValue": "",
+ "skipUrlSync": false,
+ "type": "custom"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": true,
+ "text": "0",
+ "value": "0"
+ },
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "core",
+ "options": [
+ {
+ "selected": false,
+ "text": "All",
+ "value": "$__all"
+ },
+ {
+ "selected": true,
+ "text": "0",
+ "value": "0"
+ },
+ {
+ "selected": false,
+ "text": "1",
+ "value": "1"
+ },
+ {
+ "selected": false,
+ "text": "2",
+ "value": "2"
+ },
+ {
+ "selected": false,
+ "text": "3",
+ "value": "3"
+ },
+ {
+ "selected": false,
+ "text": "4",
+ "value": "4"
+ },
+ {
+ "selected": false,
+ "text": "5",
+ "value": "5"
+ },
+ {
+ "selected": false,
+ "text": "6",
+ "value": "6"
+ },
+ {
+ "selected": false,
+ "text": "7",
+ "value": "7"
+ },
+ {
+ "selected": false,
+ "text": "8",
+ "value": "8"
+ },
+ {
+ "selected": false,
+ "text": "9",
+ "value": "9"
+ },
+ {
+ "selected": false,
+ "text": "10",
+ "value": "10"
+ },
+ {
+ "selected": false,
+ "text": "11",
+ "value": "11"
+ },
+ {
+ "selected": false,
+ "text": "12",
+ "value": "12"
+ },
+ {
+ "selected": false,
+ "text": "13",
+ "value": "13"
+ },
+ {
+ "selected": false,
+ "text": "14",
+ "value": "14"
+ },
+ {
+ "selected": false,
+ "text": "15",
+ "value": "15"
+ },
+ {
+ "selected": false,
+ "text": "16",
+ "value": "16"
+ },
+ {
+ "selected": false,
+ "text": "17",
+ "value": "17"
+ },
+ {
+ "selected": false,
+ "text": "18",
+ "value": "18"
+ },
+ {
+ "selected": false,
+ "text": "19",
+ "value": "19"
+ },
+ {
+ "selected": false,
+ "text": "20",
+ "value": "20"
+ },
+ {
+ "selected": false,
+ "text": "21",
+ "value": "21"
+ },
+ {
+ "selected": false,
+ "text": "22",
+ "value": "22"
+ },
+ {
+ "selected": false,
+ "text": "23",
+ "value": "23"
+ },
+ {
+ "selected": false,
+ "text": "24",
+ "value": "24"
+ },
+ {
+ "selected": false,
+ "text": "25",
+ "value": "25"
+ },
+ {
+ "selected": false,
+ "text": "26",
+ "value": "26"
+ },
+ {
+ "selected": false,
+ "text": "27",
+ "value": "27"
+ },
+ {
+ "selected": false,
+ "text": "28",
+ "value": "28"
+ },
+ {
+ "selected": false,
+ "text": "29",
+ "value": "29"
+ },
+ {
+ "selected": false,
+ "text": "30",
+ "value": "30"
+ },
+ {
+ "selected": false,
+ "text": "31",
+ "value": "31"
+ },
+ {
+ "selected": false,
+ "text": "32",
+ "value": "32"
+ },
+ {
+ "selected": false,
+ "text": "33",
+ "value": "33"
+ },
+ {
+ "selected": false,
+ "text": "34",
+ "value": "34"
+ },
+ {
+ "selected": false,
+ "text": "35",
+ "value": "35"
+ },
+ {
+ "selected": false,
+ "text": "36",
+ "value": "36"
+ },
+ {
+ "selected": false,
+ "text": "37",
+ "value": "37"
+ },
+ {
+ "selected": false,
+ "text": "38",
+ "value": "38"
+ },
+ {
+ "selected": false,
+ "text": "39",
+ "value": "39"
+ },
+ {
+ "selected": false,
+ "text": "40",
+ "value": "40"
+ },
+ {
+ "selected": false,
+ "text": "41",
+ "value": "41"
+ },
+ {
+ "selected": false,
+ "text": "42",
+ "value": "42"
+ },
+ {
+ "selected": false,
+ "text": "43",
+ "value": "43"
+ },
+ {
+ "selected": false,
+ "text": "44",
+ "value": "44"
+ },
+ {
+ "selected": false,
+ "text": "45",
+ "value": "45"
+ },
+ {
+ "selected": false,
+ "text": "46",
+ "value": "46"
+ },
+ {
+ "selected": false,
+ "text": "47",
+ "value": "47"
+ },
+ {
+ "selected": false,
+ "text": "48",
+ "value": "48"
+ },
+ {
+ "selected": false,
+ "text": "49",
+ "value": "49"
+ },
+ {
+ "selected": false,
+ "text": "50",
+ "value": "50"
+ },
+ {
+ "selected": false,
+ "text": "51",
+ "value": "51"
+ },
+ {
+ "selected": false,
+ "text": "52",
+ "value": "52"
+ },
+ {
+ "selected": false,
+ "text": "53",
+ "value": "53"
+ },
+ {
+ "selected": false,
+ "text": "54",
+ "value": "54"
+ },
+ {
+ "selected": false,
+ "text": "55",
+ "value": "55"
+ },
+ {
+ "selected": false,
+ "text": "56",
+ "value": "56"
+ },
+ {
+ "selected": false,
+ "text": "57",
+ "value": "57"
+ },
+ {
+ "selected": false,
+ "text": "58",
+ "value": "58"
+ },
+ {
+ "selected": false,
+ "text": "59",
+ "value": "59"
+ },
+ {
+ "selected": false,
+ "text": "60",
+ "value": "60"
+ },
+ {
+ "selected": false,
+ "text": "61",
+ "value": "61"
+ },
+ {
+ "selected": false,
+ "text": "62",
+ "value": "62"
+ },
+ {
+ "selected": false,
+ "text": "63",
+ "value": "63"
+ },
+ {
+ "selected": false,
+ "text": "64",
+ "value": "64"
+ }
+ ],
+ "query": "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64",
+ "queryValue": "",
+ "skipUrlSync": false,
+ "type": "custom"
+ },
+ {
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "selected": false,
+ "text": "30s",
+ "value": "30s"
+ },
+ "hide": 0,
+ "label": null,
+ "name": "range",
+ "options": [
+ {
+ "selected": true,
+ "text": "30s",
+ "value": "30s"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "5m",
+ "value": "5m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "30s,1m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "queryValue": "",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "OVS Stats",
+ "uid": "K1N5ciIGz",
+ "version": 7
+ } \ No newline at end of file
diff --git a/tools/lma/metrics/dashboard/rdt_using.json b/tools/lma/metrics/dashboard/rdt_using.json
new file mode 100644
index 00000000..a0ce7987
--- /dev/null
+++ b/tools/lma/metrics/dashboard/rdt_using.json
@@ -0,0 +1,833 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "prometheus",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "limit": 100,
+ "name": "Monitoring",
+ "showIn": 0,
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": 7,
+ "iteration": 1597615840124,
+ "links": [],
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "hiddenSeries": false,
+ "id": 1,
+ "interval": "1s",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": false,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.3",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(collectd_intel_rdt_bytes{exported_instance='$host', intel_rdt='$intel_rdt'}[$range])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "RDT Bytes",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "hiddenSeries": false,
+ "id": 2,
+ "interval": "1s",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": false,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.3",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(collectd_intel_rdt_ipc{exported_instance='$host', intel_rdt='$intel_rdt'}[$range])",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "IPC values",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 5,
+ "w": 24,
+ "x": 0,
+ "y": 12
+ },
+ "hiddenSeries": false,
+ "id": 3,
+ "interval": "1s",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": false,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.3",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(collectd_intel_rdt_memory_bandwidth_total{exported_instance='$host', type='local'}[$range])",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(collectd_intel_rdt_memory_bandwidth_total{exported_instance='$host', type='remote'}[$range])",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Bandwidth Total",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 26,
+ "style": "dark",
+ "tags": [
+ "monitoring"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "prometheus",
+ "value": "prometheus"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "datasource",
+ "options": [],
+ "query": "prometheus",
+ "queryValue": "",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": false,
+ "text": "pod12-node4",
+ "value": "pod12-node4"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "host",
+ "options": [
+ {
+ "selected": true,
+ "text": "pod12-node4",
+ "value": "pod12-node4"
+ }
+ ],
+ "query": "pod12-node4,",
+ "queryValue": "",
+ "skipUrlSync": false,
+ "type": "custom"
+ },
+ {
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "selected": false,
+ "text": "30s",
+ "value": "30s"
+ },
+ "hide": 0,
+ "label": null,
+ "name": "range",
+ "options": [
+ {
+ "selected": true,
+ "text": "30s",
+ "value": "30s"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "5m",
+ "value": "5m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "30s,1m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "queryValue": "",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": true,
+ "text": "2",
+ "value": "2"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "intel_rdt",
+ "options": [
+ {
+ "selected": false,
+ "text": "0",
+ "value": "0"
+ },
+ {
+ "selected": false,
+ "text": "1",
+ "value": "1"
+ },
+ {
+ "selected": true,
+ "text": "2",
+ "value": "2"
+ },
+ {
+ "selected": false,
+ "text": "3",
+ "value": "3"
+ },
+ {
+ "selected": false,
+ "text": "4",
+ "value": "4"
+ },
+ {
+ "selected": false,
+ "text": "5",
+ "value": "5"
+ },
+ {
+ "selected": false,
+ "text": "6",
+ "value": "6"
+ },
+ {
+ "selected": false,
+ "text": "7",
+ "value": "7"
+ },
+ {
+ "selected": false,
+ "text": "8",
+ "value": "8"
+ },
+ {
+ "selected": false,
+ "text": "9",
+ "value": "9"
+ },
+ {
+ "selected": false,
+ "text": "10",
+ "value": "10"
+ },
+ {
+ "selected": false,
+ "text": "11",
+ "value": "11"
+ },
+ {
+ "selected": false,
+ "text": "12",
+ "value": "12"
+ },
+ {
+ "selected": false,
+ "text": "13",
+ "value": "13"
+ },
+ {
+ "selected": false,
+ "text": "14",
+ "value": "14"
+ },
+ {
+ "selected": false,
+ "text": "15",
+ "value": "15"
+ },
+ {
+ "selected": false,
+ "text": "16",
+ "value": "16"
+ },
+ {
+ "selected": false,
+ "text": "17",
+ "value": "17"
+ },
+ {
+ "selected": false,
+ "text": "18",
+ "value": "18"
+ },
+ {
+ "selected": false,
+ "text": "19",
+ "value": "19"
+ },
+ {
+ "selected": false,
+ "text": "20",
+ "value": "20"
+ },
+ {
+ "selected": false,
+ "text": "21",
+ "value": "21"
+ },
+ {
+ "selected": false,
+ "text": "22",
+ "value": "22"
+ },
+ {
+ "selected": false,
+ "text": "23",
+ "value": "23"
+ },
+ {
+ "selected": false,
+ "text": "24",
+ "value": "24"
+ },
+ {
+ "selected": false,
+ "text": "25",
+ "value": "25"
+ },
+ {
+ "selected": false,
+ "text": "26",
+ "value": "26"
+ },
+ {
+ "selected": false,
+ "text": "27",
+ "value": "27"
+ },
+ {
+ "selected": false,
+ "text": "28",
+ "value": "28"
+ },
+ {
+ "selected": false,
+ "text": "29",
+ "value": "29"
+ },
+ {
+ "selected": false,
+ "text": "30",
+ "value": "30"
+ },
+ {
+ "selected": false,
+ "text": "31",
+ "value": "31"
+ },
+ {
+ "selected": false,
+ "text": "32",
+ "value": "32"
+ },
+ {
+ "selected": false,
+ "text": "33",
+ "value": "33"
+ },
+ {
+ "selected": false,
+ "text": "34",
+ "value": "34"
+ },
+ {
+ "selected": false,
+ "text": "35",
+ "value": "35"
+ },
+ {
+ "selected": false,
+ "text": "36",
+ "value": "36"
+ },
+ {
+ "selected": false,
+ "text": "37",
+ "value": "37"
+ },
+ {
+ "selected": false,
+ "text": "38",
+ "value": "38"
+ },
+ {
+ "selected": false,
+ "text": "39",
+ "value": "39"
+ },
+ {
+ "selected": false,
+ "text": "40",
+ "value": "40"
+ },
+ {
+ "selected": false,
+ "text": "41",
+ "value": "41"
+ },
+ {
+ "selected": false,
+ "text": "42",
+ "value": "42"
+ },
+ {
+ "selected": false,
+ "text": "43",
+ "value": "43"
+ },
+ {
+ "selected": false,
+ "text": "44",
+ "value": "44"
+ },
+ {
+ "selected": false,
+ "text": "45",
+ "value": "45"
+ },
+ {
+ "selected": false,
+ "text": "46",
+ "value": "46"
+ },
+ {
+ "selected": false,
+ "text": "47",
+ "value": "47"
+ },
+ {
+ "selected": false,
+ "text": "48",
+ "value": "48"
+ },
+ {
+ "selected": false,
+ "text": "49",
+ "value": "49"
+ },
+ {
+ "selected": false,
+ "text": "50",
+ "value": "50"
+ },
+ {
+ "selected": false,
+ "text": "51",
+ "value": "51"
+ },
+ {
+ "selected": false,
+ "text": "52",
+ "value": "52"
+ },
+ {
+ "selected": false,
+ "text": "53",
+ "value": "53"
+ },
+ {
+ "selected": false,
+ "text": "54",
+ "value": "54"
+ },
+ {
+ "selected": false,
+ "text": "55",
+ "value": "55"
+ },
+ {
+ "selected": false,
+ "text": "56",
+ "value": "56"
+ },
+ {
+ "selected": false,
+ "text": "57",
+ "value": "57"
+ },
+ {
+ "selected": false,
+ "text": "58",
+ "value": "58"
+ },
+ {
+ "selected": false,
+ "text": "59",
+ "value": "59"
+ },
+ {
+ "selected": false,
+ "text": "60",
+ "value": "60"
+ },
+ {
+ "selected": false,
+ "text": "61",
+ "value": "61"
+ },
+ {
+ "selected": false,
+ "text": "62",
+ "value": "62"
+ },
+ {
+ "selected": false,
+ "text": "63",
+ "value": "63"
+ },
+ {
+ "selected": false,
+ "text": "64",
+ "value": "64"
+ }
+ ],
+ "query": "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64",
+ "queryValue": "",
+ "skipUrlSync": false,
+ "type": "custom"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "RDT (L3 Cache)",
+ "uid": "kuro-rdt",
+ "version": 9
+} \ No newline at end of file
diff --git a/tools/lma/yamllintrc b/tools/lma/yamllintrc
new file mode 100644
index 00000000..9714a565
--- /dev/null
+++ b/tools/lma/yamllintrc
@@ -0,0 +1,25 @@
+# Copyright 2020 Tieto
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+extends: relaxed
+
+rules:
+ empty-lines:
+ max-start: 1
+ max-end: 1
+ colons:
+ max-spaces-after: 1
+ max-spaces-before: 1
+ line-length:
+ max: 250