aboutsummaryrefslogtreecommitdiffstats
path: root/tools/lma/ansible-server
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lma/ansible-server')
-rw-r--r--tools/lma/ansible-server/ansible.cfg17
-rw-r--r--tools/lma/ansible-server/group_vars/all.yml27
-rw-r--r--tools/lma/ansible-server/hosts12
-rw-r--r--tools/lma/ansible-server/playbooks/clean.yaml52
-rw-r--r--tools/lma/ansible-server/playbooks/setup.yaml44
-rw-r--r--tools/lma/ansible-server/roles/clean-k8s-cluster/tasks/main.yml34
-rw-r--r--tools/lma/ansible-server/roles/clean-k8s-pre/tasks/main.yml65
-rw-r--r--tools/lma/ansible-server/roles/clean-k8s-worker-reset/tasks/main.yml26
-rw-r--r--tools/lma/ansible-server/roles/clean-logging/tasks/main.yml193
-rw-r--r--tools/lma/ansible-server/roles/clean-monitoring/tasks/main.yml48
-rw-r--r--tools/lma/ansible-server/roles/clean-nfs/tasks/main.yml44
-rw-r--r--tools/lma/ansible-server/roles/k8s-master/tasks/main.yml49
-rw-r--r--tools/lma/ansible-server/roles/k8s-pre/tasks/main.yml72
-rw-r--r--tools/lma/ansible-server/roles/k8s-worker/tasks/main.yml24
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elastalert/ealert-conf-cm.yaml48
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elastalert/ealert-key-cm.yaml68
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml132
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elastalert/elastalert.yaml76
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elasticsearch/elasticsearch.yaml231
-rw-r--r--tools/lma/ansible-server/roles/logging/files/elasticsearch/user-secret.yaml23
-rw-r--r--tools/lma/ansible-server/roles/logging/files/fluentd/fluent-cm.yaml525
-rw-r--r--tools/lma/ansible-server/roles/logging/files/fluentd/fluent-service.yaml34
-rw-r--r--tools/lma/ansible-server/roles/logging/files/fluentd/fluent.yaml65
-rw-r--r--tools/lma/ansible-server/roles/logging/files/kibana/kibana.yaml23
-rw-r--r--tools/lma/ansible-server/roles/logging/files/namespace.yaml17
-rw-r--r--tools/lma/ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml36
-rw-r--r--tools/lma/ansible-server/roles/logging/files/nginx/nginx-key-cm.yaml68
-rw-r--r--tools/lma/ansible-server/roles/logging/files/nginx/nginx-service.yaml28
-rw-r--r--tools/lma/ansible-server/roles/logging/files/nginx/nginx.yaml58
-rw-r--r--tools/lma/ansible-server/roles/logging/files/persistentVolume.yaml105
-rw-r--r--tools/lma/ansible-server/roles/logging/files/storageClass.yaml73
-rw-r--r--tools/lma/ansible-server/roles/logging/tasks/main.yml165
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml37
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-deployment.yaml62
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-service.yaml41
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-deployment.yaml62
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-service.yaml42
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-deamonset.yaml79
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-service.yaml30
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml51
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-service.yaml35
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-datasource-config.yaml35
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-deployment.yaml68
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml31
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pvc.yaml33
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-service.yaml36
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml36
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml26
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/monitoring-namespace.yaml18
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-daemonset.yaml80
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-service.yaml33
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/main-prometheus-service.yaml35
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml609
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-deployment.yaml73
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pv.yaml30
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pvc.yaml33
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-service.yaml34
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-deployment.yaml73
-rw-r--r--tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-service.yaml35
-rw-r--r--tools/lma/ansible-server/roles/monitoring/tasks/main.yml273
-rw-r--r--tools/lma/ansible-server/roles/nfs/tasks/main.yml42
61 files changed, 4554 insertions, 0 deletions
diff --git a/tools/lma/ansible-server/ansible.cfg b/tools/lma/ansible-server/ansible.cfg
new file mode 100644
index 00000000..307ef457
--- /dev/null
+++ b/tools/lma/ansible-server/ansible.cfg
@@ -0,0 +1,17 @@
+[defaults]
+inventory = ./hosts
+host_key_checking = false
+
+# additional path to search for roles in
+roles_path = roles
+
+# enable logging
+log_path = ./ansible.log
+
+[privilege_escalation]
+become=True
+become_method=sudo
+become_user=root
+
+[ssh_connection]
+pipelining = True
diff --git a/tools/lma/ansible-server/group_vars/all.yml b/tools/lma/ansible-server/group_vars/all.yml
new file mode 100644
index 00000000..b0725ff5
--- /dev/null
+++ b/tools/lma/ansible-server/group_vars/all.yml
@@ -0,0 +1,27 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#apiserver advertise address
+ad_addr: 10.10.120.211
+
+#pod network cidr
+pod_cidr: 192.168.0.0/16
+
+#token generated by master
+token_file: join_token
+
+#give hostname
+vm3: 'vm3'
+vm2: 'vm2'
+vm1: 'vm1'
diff --git a/tools/lma/ansible-server/hosts b/tools/lma/ansible-server/hosts
new file mode 100644
index 00000000..0a13d754
--- /dev/null
+++ b/tools/lma/ansible-server/hosts
@@ -0,0 +1,12 @@
+[all]
+10.10.120.211 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+10.10.120.203 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+10.10.120.204 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+
+
+[master]
+10.10.120.211 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+
+[worker-nodes]
+10.10.120.203 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd
+10.10.120.204 ansible_connection=ssh ansible_ssh_user=root ansible_sudo_pass=P@ssw0rd ansible_ssh_pass=P@ssw0rd \ No newline at end of file
diff --git a/tools/lma/ansible-server/playbooks/clean.yaml b/tools/lma/ansible-server/playbooks/clean.yaml
new file mode 100644
index 00000000..b4da66da
--- /dev/null
+++ b/tools/lma/ansible-server/playbooks/clean.yaml
@@ -0,0 +1,52 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# clean monitoring
+- name: Clean PAG setup
+ hosts: master
+ roles:
+ - clean-monitoring
+
+#clean logging
+- name: Clean EFK setup
+ hosts: master
+ roles:
+ - clean-logging
+
+#IF KUBELET IS RUNNING THEN RUN THIS
+#clean k8s cluster
+- name: Clean k8s cluster
+ hosts: master
+ roles:
+ - clean-k8s-cluster
+
+#reset worker-nodes
+- name: Reset worker-nodes
+ hosts: worker-nodes
+ roles:
+ - clean-k8s-worker-reset
+
+#unistall pre-requisites for k8s
+- name: unistall pre-requisites for k8s
+ hosts: all
+ roles:
+ - clean-k8s-pre
+
+#*************************************************************************************************************
+#THIS WILL DELETE DATA OF ELASTICSEARCH
+#*************************************************************************************************************
+# - name: Clean nfs server
+# hosts: all
+# roles:
+# - clean-nfs
diff --git a/tools/lma/ansible-server/playbooks/setup.yaml b/tools/lma/ansible-server/playbooks/setup.yaml
new file mode 100644
index 00000000..1f5ed1f5
--- /dev/null
+++ b/tools/lma/ansible-server/playbooks/setup.yaml
@@ -0,0 +1,44 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#Pre-requisites for k8s and NFS server
+- name: Pre-requisites for k8s in all the nodes
+ hosts: all
+ roles:
+ - k8s-pre
+ - nfs
+
+#master setup for k8s
+- name: master setup for k8s
+ hosts: master
+ roles:
+ - k8s-master
+
+#worker setup for k8s
+- name: worker setup for k8s
+ hosts: worker-nodes
+ roles:
+ - k8s-worker
+
+#EFK setup in k8s
+- name: EFK setup in k8s
+ hosts: master
+ roles:
+ - logging
+
+#PAG setup in k8s
+- name: PAG setup in k8s
+ hosts: master
+ roles:
+ - monitoring
diff --git a/tools/lma/ansible-server/roles/clean-k8s-cluster/tasks/main.yml b/tools/lma/ansible-server/roles/clean-k8s-cluster/tasks/main.yml
new file mode 100644
index 00000000..83ac086d
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-k8s-cluster/tasks/main.yml
@@ -0,0 +1,34 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#check kubelet is running or not
+- name: check for kubelet
+ shell: "systemctl status kubelet"
+ register: _svc_kubelet
+ failed_when: _svc_kubelet.rc != 0 and ("could not be found" not in _svc_kubelet.stderr)
+
+#IF KUBELET IS RUNNING, THEN
+#reset k8s
+- name: reset k8s
+ shell: |
+ kubectl drain {{vm3}} --delete-local-data --force --ignore-daemonsets
+ kubectl drain {{vm2}} --delete-local-data --force --ignore-daemonsets
+ kubectl drain {{vm1}} --delete-local-data --force --ignore-daemonsets
+ kubectl delete node {{vm3}}
+ kubectl delete node {{vm2}}
+ kubectl delete node {{vm1}}
+ sudo kubeadm reset -f
+ sudo rm $HOME/.kube/config
+ when: "_svc_kubelet.rc == 0"
+
diff --git a/tools/lma/ansible-server/roles/clean-k8s-pre/tasks/main.yml b/tools/lma/ansible-server/roles/clean-k8s-pre/tasks/main.yml
new file mode 100644
index 00000000..6d12bd5f
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-k8s-pre/tasks/main.yml
@@ -0,0 +1,65 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+#Uninstalling K8s
+- name: Uninstalling K8s
+ yum:
+ name: ['kubeadm', 'kubectl', 'kubelet', 'docker-ce']
+ state: absent
+
+#Enabling Swap
+- name: Enabling Swap on all nodes
+ shell: swapon -a
+ ignore_errors: yes
+
+#Uncommenting Swap entries
+- name: Uncommenting Swap entries in /etc/fstab
+ replace:
+ path: /etc/fstab
+ regexp: '^# /(.*swap.*)'
+ replace: '\1'
+
+
+#Starting firewalld
+- name: 'Starting firewall'
+ service:
+ name: firewalld
+ state: started
+ enabled: yes
+
+# Enabling SELinux
+- name: Enabling SELinux on all nodes
+ shell: |
+ setenforce 1
+ sudo sed -i 's/^SELINUX=permissive$/SELINUX=enforcing/' /etc/selinux/config
+
+#removing Docker repo
+- name: removing Docker repo
+ command: yum-config-manager --disable docker-ce-stable
+
+#removing K8s repo
+- name: removing repository details in Kubernetes repo file.
+ blockinfile:
+ path: /etc/yum.repos.d/kubernetes.repo
+ state: absent
+ block: |
+ [kubernetes]
+ name=Kubernetes
+ baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+ enabled=1
+ gpgcheck=1
+ repo_gpgcheck=1
+ gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg
+ https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
diff --git a/tools/lma/ansible-server/roles/clean-k8s-worker-reset/tasks/main.yml b/tools/lma/ansible-server/roles/clean-k8s-worker-reset/tasks/main.yml
new file mode 100644
index 00000000..3ba9c9ea
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-k8s-worker-reset/tasks/main.yml
@@ -0,0 +1,26 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#check kubelet is running or not
+- name: check for kubelet
+ shell: "systemctl status kubelet"
+ register: _svc_kubelet
+ failed_when: _svc_kubelet.rc != 0 and ("could not be found" not in _svc_kubelet.stderr)
+
+#IF KUBELET IS RUNNING, THEN
+#reset k8s
+- name: reset k8s
+ command: kubeadm reset -f
+ when: "_svc_kubelet.rc == 0"
+
diff --git a/tools/lma/ansible-server/roles/clean-logging/tasks/main.yml b/tools/lma/ansible-server/roles/clean-logging/tasks/main.yml
new file mode 100644
index 00000000..259065ed
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-logging/tasks/main.yml
@@ -0,0 +1,193 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#Deleting EFK setup from k8s cluster
+
+#check kubelet is running or not
+- name: check for kubelet
+ shell: "systemctl status kubelet"
+ register: _svc_kubelet
+ failed_when: _svc_kubelet.rc != 0 and ("could not be found" not in _svc_kubelet.stderr)
+
+#***********************************************************************************************************
+#copy all yaml to /tmp/files/
+#***********************************************************************************************************
+- name: copy all yaml to /tmp/files/
+ copy:
+ src: ../../logging/files/
+ dest: /tmp/files/
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop elastalert
+#***********************************************************************************************************
+- name: Delete elastalert config configmap
+ k8s:
+ state: absent
+ src: /tmp/files/elastalert/ealert-conf-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete elastalert key configmap
+ k8s:
+ state: absent
+ src: /tmp/files/elastalert/ealert-key-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete elastalert rule configmap
+ k8s:
+ state: absent
+ src: /tmp/files/elastalert/ealert-rule-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete elastalert pod
+ k8s:
+ state: absent
+ src: /tmp/files/elastalert/elastalert.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop fluentd
+#***********************************************************************************************************
+
+- name: Delete fluentd service
+ k8s:
+ state: absent
+ src: /tmp/files/fluentd/fluent-service.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete fluentd configmap
+ k8s:
+ state: absent
+ src: /tmp/files/fluentd/fluent-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete fluentd pod
+ k8s:
+ state: absent
+ src: /tmp/files/fluentd/fluent.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop nginx
+#***********************************************************************************************************
+- name: Delete nginx service
+ k8s:
+ state: absent
+ src: /tmp/files/nginx/nginx-service.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete nginx configmap
+ k8s:
+ state: absent
+ src: /tmp/files/nginx/nginx-conf-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete nginx key configmap
+ k8s:
+ state: absent
+ src: /tmp/files/nginx/nginx-key-cm.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+- name: Delete nginx pod
+ k8s:
+ state: absent
+ src: /tmp/files/nginx/nginx.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop Kibana
+#***********************************************************************************************************
+- name: Stopping Kibana
+ k8s:
+ state: absent
+ src: /tmp/files/kibana/kibana.yaml
+ namespace: logging
+ ignore_errors: yes
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop Elasticsearch
+#***********************************************************************************************************
+- name: Stopping Elasticsearch
+ k8s:
+ state: absent
+ src: /tmp/files/elasticsearch/elasticsearch.yaml
+ namespace: logging
+ ignore_errors: yes
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Stop Elasticsearch operator
+#***********************************************************************************************************
+- name: Stopping Elasticsearch operator
+ shell: kubectl delete -f https://download.elastic.co/downloads/eck/1.2.0/all-in-one.yaml
+ ignore_errors: yes
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Delete Persistent Volume
+#***********************************************************************************************************
+- name: Deleting Persistent Volume
+ k8s:
+ state: absent
+ src: /tmp/files/persistentVolume.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Delete Storage Class
+#***********************************************************************************************************
+- name: Deleting Storage Class
+ k8s:
+ state: absent
+ src: /tmp/files/storageClass.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#IF KUBELET IS RUNNING, THEN
+#Delete Namespace
+#***********************************************************************************************************
+- name: Deleting Namespace
+ k8s:
+ state: absent
+ src: /tmp/files/namespace.yaml
+ namespace: logging
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#removing /tmp/files
+#***********************************************************************************************************
+- name: Removing /tmp/files
+ file:
+ path: "/tmp/files"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/clean-monitoring/tasks/main.yml b/tools/lma/ansible-server/roles/clean-monitoring/tasks/main.yml
new file mode 100644
index 00000000..49943ec0
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-monitoring/tasks/main.yml
@@ -0,0 +1,48 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+#Deleting PAG setup from k8s cluster
+
+#check kubelet is running or not
+- name: check for kubelet
+ shell: "systemctl status kubelet"
+ register: _svc_kubelet
+ failed_when: _svc_kubelet.rc != 0 and ("could not be found" not in _svc_kubelet.stderr)
+
+#***********************************************************************************************************
+#copy yaml to /tmp/files/
+#***********************************************************************************************************
+- name: copy namespace yaml to /tmp/files/
+ copy:
+ src: ../../monitoring/files/monitoring-namespace.yaml
+ dest: /tmp/monitoring-namespace.yaml
+
+#***********************************************************************************************************
+#Deleting Namespace
+#***********************************************************************************************************
+- name: Deleting Namespace
+ k8s:
+ state: absent
+ src: /tmp/monitoring-namespace.yaml
+ namespace: monitoring
+ when: "_svc_kubelet.rc == 0"
+
+#***********************************************************************************************************
+#removing /tmp/files
+#***********************************************************************************************************
+- name: Removing /tmp/monitoring-namespace.yaml
+ file:
+ path: "/tmp/monitoring-namespace.yaml"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/clean-nfs/tasks/main.yml b/tools/lma/ansible-server/roles/clean-nfs/tasks/main.yml
new file mode 100644
index 00000000..157db849
--- /dev/null
+++ b/tools/lma/ansible-server/roles/clean-nfs/tasks/main.yml
@@ -0,0 +1,44 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#Edit /etc/export
+- name: Edit /etc/export file for NFS
+ lineinfile:
+ path: /etc/exports
+ line: "{{item.line}}"
+ state: absent
+ with_items:
+ - {line: "/srv/nfs/master *(rw,sync,no_root_squash,no_subtree_check)"}
+ - {line: "/srv/nfs/data *(rw,sync,no_root_squash,no_subtree_check)"}
+ - {line: "/usr/share/monitoring_data/grafana *(rw,sync,no_root_squash,no_subtree_check)"}
+
+#uninstall NFS server
+- name: Uninstalling NFS server utils
+ yum:
+ name: nfs-utils
+ state: absent
+
+#remove Elasticsearch data
+- name: Removing Directory for elasticsearch
+ file:
+ path: "/srv/nfs/{{item}}"
+ state: absent
+ with_items:
+ - ['data', 'master']
+
+#remove Grafana data
+- name: Removing Directory for grafana
+ file:
+ path: "/usr/share/monitoring_data/grafana"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/k8s-master/tasks/main.yml b/tools/lma/ansible-server/roles/k8s-master/tasks/main.yml
new file mode 100644
index 00000000..edc8f10b
--- /dev/null
+++ b/tools/lma/ansible-server/roles/k8s-master/tasks/main.yml
@@ -0,0 +1,49 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#pull k8s images
+- name: Pulling images required for setting up a Kubernetes cluster
+ shell: kubeadm config images pull
+
+#reset k8s
+- name: Resetting kubeadm
+ shell: kubeadm reset -f
+
+#init k8s
+- name: Initializing Kubernetes cluster
+ shell: kubeadm init --apiserver-advertise-address {{ad_addr}} --pod-network-cidr={{pod_cidr}}
+
+#Copying required files
+- name: Copying required files
+ shell: |
+ mkdir -p $HOME/.kube
+ sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config
+ sudo chown $(id -u):$(id -g) $HOME/.kube/config
+
+#get token
+- name: Storing token for future purpose.
+ shell: kubeadm token create --print-join-command
+ register: token
+
+#save token to join worker
+- name: Storing token for worker
+ local_action: copy content={{ token.stdout }} dest={{ token_file }}
+
+#install calico
+- name: Install Network Add-on
+ command: kubectl apply -f https://docs.projectcalico.org/v3.11/manifests/calico.yaml
+
+#Taint master
+- name: Taint master
+ command: kubectl taint nodes --all node-role.kubernetes.io/master-
diff --git a/tools/lma/ansible-server/roles/k8s-pre/tasks/main.yml b/tools/lma/ansible-server/roles/k8s-pre/tasks/main.yml
new file mode 100644
index 00000000..95526a28
--- /dev/null
+++ b/tools/lma/ansible-server/roles/k8s-pre/tasks/main.yml
@@ -0,0 +1,72 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+#Disabling Swap
+- name: Disabling Swap on all nodes
+ shell: swapoff -a
+
+#Commenting Swap entries
+- name: Commenting Swap entries in /etc/fstab
+ replace:
+ path: /etc/fstab
+ regexp: '(^/.*swap*)'
+ replace: '# \1'
+
+#Stopping firewalld
+- name: 'Stopping firewall'
+ service:
+ name: firewalld
+ state: stopped
+ enabled: no
+
+#Disabling SELinux
+- name: Disabling SELinux on all nodes
+ shell: |
+ setenforce 0
+ sudo sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
+
+#installing docker
+- name: Installing Docker
+ shell: yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
+
+#Adding K8s repo
+- name: Adding repository details in Kubernetes repo file.
+ blockinfile:
+ path: /etc/yum.repos.d/kubernetes.repo
+ block: |
+ [kubernetes]
+ name=Kubernetes
+ baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+ enabled=1
+ gpgcheck=1
+ repo_gpgcheck=1
+ gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg
+ https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
+
+#installing K8s ans docker
+- name: Installing K8s
+ yum:
+ name: ['kubeadm', 'kubectl', 'kubelet', 'docker-ce']
+ state: present
+
+#Starting docker and kubelet services
+- name: Starting and Enabling the required services
+ service:
+ name: "{{ item }}"
+ state: started
+ enabled: yes
+ with_items:
+ - docker
+ - kubelet
diff --git a/tools/lma/ansible-server/roles/k8s-worker/tasks/main.yml b/tools/lma/ansible-server/roles/k8s-worker/tasks/main.yml
new file mode 100644
index 00000000..89d2b373
--- /dev/null
+++ b/tools/lma/ansible-server/roles/k8s-worker/tasks/main.yml
@@ -0,0 +1,24 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#Worker
+
+- name: Copying token to worker nodes
+ copy: src={{ token_file }} dest=join_token
+
+- name: Joining worker nodes with kubernetes master
+ shell: |
+ kubeadm reset -f
+ cat join_token | tail -1 > out.sh
+ sh out.sh
diff --git a/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-conf-cm.yaml b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-conf-cm.yaml
new file mode 100644
index 00000000..a320ef75
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-conf-cm.yaml
@@ -0,0 +1,48 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: elastalert-config
+data:
+ elastalert.yaml: |
+ # This is the folder that contains the rule yaml files
+ # Any .yaml file will be loaded as a rule
+ rules_folder: rules
+ # How often ElastAlert will query Elasticsearch
+ # The unit can be anything from weeks to seconds
+ run_every:
+ minutes: 1
+ # ElastAlert will buffer results from the most recent
+ # period of time, in case some log sources are not in real time
+ buffer_time:
+ minutes: 15
+
+ scan_subdirectories: false
+
+ # The Elasticsearch hostname for metadata writeback
+ # Note that every rule can have its own Elasticsearch host
+ es_host: logging-es-http
+ es_port: 9200
+ es_username: ${ES_USERNAME}
+ es_password: ${ES_PASSWORD}
+ es_conn_timeout: 120
+ verify_certs: False
+ use_ssl: True
+ client_cert: '/opt/elastalert/key/elastalert.pem'
+ client_key: '/opt/elastalert/key/elastalert.key'
+ writeback_index: elastalert_status
+ writeback_alias: elastalert_alerts
+ alert_time_limit:
+ days: 2
diff --git a/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-key-cm.yaml b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-key-cm.yaml
new file mode 100644
index 00000000..0c606a9c
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-key-cm.yaml
@@ -0,0 +1,68 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: elastalert-key
+data:
+ elastalert.key: |
+ -----BEGIN PRIVATE KEY-----
+ MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQC0uQ+B0gy3VB4w
+ 5CeWOx575lqSUuYvrGW3ILpV1gmj0ZZCMZUGvt4UvaCEaNPIAqNaHPmaslQqJb5C
+ PJH9pMN7vUVp3DACzmYrS4HdROHamn5gjebXs4hq43heLaIB1Kb+4F+7sEY88irK
+ xOevadcN35y5ld7lVUGRsj6JYcweaAeh/YZ/HaBT5RfdGF+x07NDus+mFqT8j3PD
+ rs2+JtEvEoWtjcxwFgloc9GkHsWZoV1AQHgyAWjmDXZtZeV0HQSkl7hWFG9vxTni
+ DvdrdhX0g+D+u8jWnlR4Za4jd64KbTp9C9trSHyMSRIvN5obm/H8O5MQ+sZ+NQ0X
+ PdK92MjbAgMBAAECggEASbRPxrpLxVjhFz91haeGvzErLxHwHvFIam9Gj0tDkzQe
+ +9AM3ztohzzvAhFejevFgzLd+WFRQf8yoQDi6XcQ4p5GeO38Bqj2siGRTRSSp/zq
+ HabBxqbJtA4hQQeLUwPPN5N6d6lke+an3RqBAuE/e8D+whGFXjJvE2SGbLEd9if2
+ uzHj37sPsVi8kRvgZBDOozmt7YFzQVO/1V+4Lw6nz48M3t+hOHaUXY0Yd8nsk5A6
+ kgoDQ4CGUHjtWfSrccZrYNk51Zows9/sX8axfJ94wKJSImWJcuW9PXIQhzT4exnH
+ sPOwY6Noy3nXRk9gcchT60fKpp+tsJZk3ezkwSpgwQKBgQDvsaYcbnIVdFZpaNKF
+ Tmt/w60CmfGeNozRygfi84ot7edUf93cB6WSKChcAE8fbq9Ji5USPNtfbnZfFXsI
+ IyTr2KHW3RkHuDEyu+Lan9JuReEH3QOG83vvN/oYA3J3hqUTCjEGkPjqnoFtdk8L
+ f7WH1jZvXYEMo0C48SXo+yGohQKBgQDBBGkzL928j1QB9NfiNFk70EalDsF8Im2W
+ n8bQ54KYspUybKD/Hmw0jIV7kdu2vhgGC4RYkn9c5qATtulbYJUgUBelaSi0vhXT
+ gfAuO+JIIZ50P+mkkxH/KIUyu1xWUB2jtMulqLLomdoBvfp/u51qCY6fT3WMCB+R
+ ouWLr2oZ3wKBgQCAuas4AaiLFRuDKKRGq0LYLsIvb3VvPmSKFjH+FETVPbrKipEf
+ pYup3p8uKYxUmSDSIoBAdyZpLe2sSuD0Ecu2TXU86yiSGL1zPawrNUHRrv2XN365
+ bvHUGv/Y/aDvyAPHIeYKXLkRZ2ai3rK8vi1Dcitxy4mOu+36ZKezY4tD8QKBgQCd
+ hakJUj4nPd20fwqUnF5a1z5gRGuZkEtZiunp4ZaOYegrL8YwjraGKExjrYTfXcIj
+ ZNDMrDpvKfRoQnWt0mPB7DtwDiNfZmZPqBLI2Kxya6VygBqA6lncoEgcQBY6hsW5
+ rbopZ0UjWTQ3CcFe71GnkUcpMuLetl51L7kgR7dShwKBgQC+vqjhe/h081JGLTo1
+ tKeRUCaDA/V3VHjFKgM5g+S3/KzgU/EaB1rq3Qja1quGv0zHveca3zibdNQi1ENm
+ KSutWh2zQXzzvmycPmVcthhOxaKzRXDjG0mXiA0bnSgK3F2o9t4196RYhIiiSvAH
+ shVjZMTK04h8ciTLIqK/GtZr+g==
+ -----END PRIVATE KEY-----
+ elastalert.pem: |
+ -----BEGIN CERTIFICATE-----
+ MIIDVzCCAj+gAwIBAgIJAORgkR7Y0Nk9MA0GCSqGSIb3DQEBCwUAMEIxCzAJBgNV
+ BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
+ Q29tcGFueSBMdGQwHhcNMjAwNjI4MTM1NjAwWhcNMjEwNjI4MTM1NjAwWjBCMQsw
+ CQYDVQQGEwJYWDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZh
+ dWx0IENvbXBhbnkgTHRkMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA
+ tLkPgdIMt1QeMOQnljsee+ZaklLmL6xltyC6VdYJo9GWQjGVBr7eFL2ghGjTyAKj
+ Whz5mrJUKiW+QjyR/aTDe71FadwwAs5mK0uB3UTh2pp+YI3m17OIauN4Xi2iAdSm
+ /uBfu7BGPPIqysTnr2nXDd+cuZXe5VVBkbI+iWHMHmgHof2Gfx2gU+UX3RhfsdOz
+ Q7rPphak/I9zw67NvibRLxKFrY3McBYJaHPRpB7FmaFdQEB4MgFo5g12bWXldB0E
+ pJe4VhRvb8U54g73a3YV9IPg/rvI1p5UeGWuI3euCm06fQvba0h8jEkSLzeaG5vx
+ /DuTEPrGfjUNFz3SvdjI2wIDAQABo1AwTjAdBgNVHQ4EFgQUFAvjohHTavHmbRbj
+ Yq2h3cq7UMEwHwYDVR0jBBgwFoAUFAvjohHTavHmbRbjYq2h3cq7UMEwDAYDVR0T
+ BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAB9oDASl4OfF/D49i3KtVzjzge4up
+ WssBPYKVwASh3cXfLLe3NdY9ihdCXFd/8Rus0hBGaRPIyR06sZoHRDEfJ2xrRD6g
+ pr4iHRfaoEWqols7+iW0cgQehvw5efEpFL1vg9zK9kOwruS4ZUhDrak6GcO/O8Jh
+ 6lSGmidHSHrQmfqFeTotaezwylV/uHvRZHPvk2JhQfC+vFjn5/iN/0wCeQCwYvOC
+ rePq2ZFdYg/0bS9BYwKsT2w1Z/AU/wIMLmbNB1af+fTBBEQlxb4rAeDb+J9EoSQ5
+ MVP7jm3BVnHQCs6CA4LV4yRQNF2K6GkWem1oUg/H3S2SG8TAUlKpX/1XRw==
+ -----END CERTIFICATE-----
diff --git a/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml
new file mode 100644
index 00000000..af28b6f6
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elastalert/ealert-rule-cm.yaml
@@ -0,0 +1,132 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: elastalert-rule
+data:
+ rule-node4-vswitch.yaml: |
+ name: vswitch-3-sec
+ type: any
+ index: node4*
+ filter:
+ - range:
+ time_vswitchd:
+ gt: 3 #Greater than
+
+ realert:
+ minutes: 0
+
+ alert: post
+ http_post_url: "http://10.10.120.211:31000/alerts"
+ http_post_static_payload:
+ type: threshold
+ label: vswitchd start time > 3 sec
+ http_post_payload:
+ index: _index
+ log: msg
+ log_path: log_path
+ time_vswitchd: time_vswitchd
+ num_hits: num_hits
+ num_matches: num_matches
+
+ rule-node1-vswitch.yaml: |
+ name: vswitch-3-sec
+ type: any
+ index: node1*
+ filter:
+ - range:
+ time_vswitchd:
+ gt: 3 #Greater than
+
+ realert:
+ minutes: 0
+
+ alert: post
+ http_post_url: "http://10.10.120.211:31000/alerts"
+ http_post_static_payload:
+ type: threshold
+ label: vswitchd start time > 3 sec
+ http_post_payload:
+ index: _index
+ log: msg
+ log_path: log_path
+ time_vswitchd: time_vswitchd
+ num_hits: num_hits
+ num_matches: num_matches
+
+ rule-node4-blacklist.yaml: |
+ name: error-finder-node4
+ type: blacklist
+ compare_key: alert
+ index: node4*
+ blacklist:
+ - "Failed to run test"
+ - "Failed to execute in '30' seconds"
+ - "('Result', 'Failed')"
+ - "could not open socket: connection refused"
+ - "Input/output error"
+ - "dpdk|ERR|EAL: Error - exiting with code: 1"
+ - "Failed to execute in '30' seconds"
+ - "dpdk|ERR|EAL: Driver cannot attach the device"
+ - "dpdk|EMER|Cannot create lock on"
+ - "device not found"
+
+ realert:
+ minutes: 0
+
+ alert: post
+ http_post_url: "http://10.10.120.211:31000/alerts"
+ http_post_static_payload:
+ type: pattern-match
+ label: failed
+ http_post_payload:
+ index: _index
+ log: msg
+ log_path: log_path
+ reason: alert
+ num_hits: num_hits
+ num_matches: num_matches
+ rule-node1-blacklist.yaml: |
+ name: error-finder-node1
+ type: blacklist
+ compare_key: alert
+ index: node1*
+ blacklist:
+ - "Failed to run test"
+ - "Failed to execute in '30' seconds"
+ - "('Result', 'Failed')"
+ - "could not open socket: connection refused"
+ - "Input/output error"
+ - "dpdk|ERR|EAL: Error - exiting with code: 1"
+ - "Failed to execute in '30' seconds"
+ - "dpdk|ERR|EAL: Driver cannot attach the device"
+ - "dpdk|EMER|Cannot create lock on"
+ - "device not found"
+
+ realert:
+ minutes: 0
+
+ alert: post
+ http_post_url: "http://10.10.120.211:31000/alerts"
+ http_post_static_payload:
+ type: pattern-match
+ label: failed
+ http_post_payload:
+ index: _index
+ log: msg
+ log_path: log_path
+ reason: alert
+ num_hits: num_hits
+ num_matches: num_matches
diff --git a/tools/lma/ansible-server/roles/logging/files/elastalert/elastalert.yaml b/tools/lma/ansible-server/roles/logging/files/elastalert/elastalert.yaml
new file mode 100644
index 00000000..9e32e2b7
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elastalert/elastalert.yaml
@@ -0,0 +1,76 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: elastalert
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ run: elastalert
+ template:
+ metadata:
+ labels:
+ run: elastalert
+ spec:
+ volumes:
+ - name: econfig
+ configMap:
+ name: elastalert-config
+ items:
+ - key: elastalert.yaml
+ path: elastalert.yaml
+ - name: erule
+ configMap:
+ name: elastalert-rule
+ items:
+ - key: rule-node4-vswitch.yaml
+ path: rule-node4-vswitch.yaml
+ - key: rule-node4-blacklist.yaml
+ path: rule-node4-blacklist.yaml
+ - key: rule-node1-blacklist.yaml
+ path: rule-node1-blacklist.yaml
+ - name: ekey
+ configMap:
+ name: elastalert-key
+ items:
+ - key: elastalert.key
+ path: elastalert.key
+ - key: elastalert.pem
+ path: elastalert.pem
+ initContainers:
+ - name: init-myservice
+ image: busybox:1.28
+ command: ['sh', '-c', 'until nslookup logging-es-http; do echo "waiting for myservice"; sleep 2; done;']
+ containers:
+ - name: elastalert
+ image: adi0509/elastalert:latest
+ env:
+ - name: ES_USERNAME
+ value: "elastic"
+ - name: ES_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: logging-es-elastic-user
+ key: elastic
+ command: [ "sh", "-c"]
+ args: ["elastalert-create-index --config /opt/elastalert/elastalert.yaml; python -m elastalert.elastalert --config /opt/elastalert/elastalert.yaml"]
+ volumeMounts:
+ - mountPath: /opt/elastalert/
+ name: econfig
+ - mountPath: /opt/elastalert/rules/
+ name: erule
+ - mountPath: /opt/elastalert/key
+ name: ekey
diff --git a/tools/lma/ansible-server/roles/logging/files/elasticsearch/elasticsearch.yaml b/tools/lma/ansible-server/roles/logging/files/elasticsearch/elasticsearch.yaml
new file mode 100644
index 00000000..5b0a8476
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elasticsearch/elasticsearch.yaml
@@ -0,0 +1,231 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: elasticsearch.k8s.elastic.co/v1
+kind: Elasticsearch
+metadata:
+ name: logging
+spec:
+ version: 7.8.0
+ http:
+ service:
+ spec:
+ type: NodePort
+ ports:
+ - name: https
+ nodePort: 31111
+ port: 9200
+ protocol: TCP
+ targetPort: 9200
+ auth:
+ fileRealm:
+ - secretName: custom-user
+ nodeSets:
+ - name: vm1-master
+ count: 1
+ config:
+ node.master: true
+ node.data: false
+ node.attr.zone: vm1
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm1-master
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm1
+ - name: vm1-data
+ count: 1
+ config:
+ node.master: false
+ node.data: true
+ node.attr.zone: vm1
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm1-data
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm1
+ - name: vm2-master
+ count: 1
+ config:
+ node.master: true
+ node.data: false
+ node.attr.zone: vm2
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm2-master
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm2
+ - name: vm2-data
+ count: 1
+ config:
+ node.master: false
+ node.data: true
+ node.attr.zone: vm2
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm2-data
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm2
+ - name: vm3-master
+ count: 1
+ config:
+ node.master: true
+ node.data: false
+ node.attr.zone: vm3
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm3-master
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm3
+ - name: vm3-data
+ count: 1
+ config:
+ node.master: false
+ node.data: true
+ node.attr.zone: vm3
+ cluster.routing.allocation.awareness.attributes: zone
+ volumeClaimTemplates:
+ - metadata:
+ name: elasticsearch-data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+ storageClassName: log-vm3-data
+ podTemplate:
+ spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm3
diff --git a/tools/lma/ansible-server/roles/logging/files/elasticsearch/user-secret.yaml b/tools/lma/ansible-server/roles/logging/files/elasticsearch/user-secret.yaml
new file mode 100644
index 00000000..3e71fe92
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/elasticsearch/user-secret.yaml
@@ -0,0 +1,23 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+kind: Secret
+apiVersion: v1
+metadata:
+ name: custom-user
+stringData:
+ users: |-
+ elasticsearch:$2a$10$DzOu7/.Vo2FBDYworbUZe.LNL9tCUl18kpVZ6C/mvkKcXRzYrpmJu
+ users_roles: |-
+ kibana_admin:elasticsearch
+ superuser:elasticsearch
diff --git a/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-cm.yaml b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-cm.yaml
new file mode 100644
index 00000000..36ff80d6
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-cm.yaml
@@ -0,0 +1,525 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: fluentd-config
+data:
+ index_template.json: |
+ {
+ "index_patterns": [
+ "node*"
+ ],
+ "settings": {
+ "index.lifecycle.name": "delete_policy",
+ "number_of_replicas": 1
+ }
+ }
+ fluent.conf: |
+ <source>
+ @type forward
+ port 24224
+ bind 0.0.0.0
+ tag log
+ </source>
+
+ #tag the .dat file
+ <match log>
+ @type rewrite_tag_filter
+ #Trex data
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/.*counts.dat/
+ tag countdat.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/.*errors.dat/
+ tag errordat.${tag}
+ </rule>
+ #Spirent data
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/stc-liveresults.dat.tx/
+ tag stcdattx.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/stc-liveresults.dat.rx/
+ tag stcdatrx.${tag}
+ </rule>
+ #Ixia data
+ <rule>
+ key log_path
+ pattern /\/tmp\/result.*\/.*Statistics.csv/
+ tag ixia.${tag}
+ </rule>
+ #log files
+ <rule>
+ key log_path
+ pattern /vsperf-overall/
+ tag vsperf.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /vswitchd/
+ tag vswitchd.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/var\/log\/userspace/
+ tag userspace.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/var\/log\/sriovdp/
+ tag sriovdp.${tag}
+ </rule>
+ <rule>
+ key log_path
+ pattern /\/var\/log\/pods/
+ tag pods.${tag}
+ </rule>
+ </match>
+
+ #to find error
+ @include error.conf
+
+ #to parse time-series data
+ @include time-series.conf
+
+ #to calculate time analysis
+ @include time-analysis.conf
+
+ #give tag 'node1' if host is worker and tag 'node4' if host is pod12-node4
+ <match **.log>
+ @type rewrite_tag_filter
+ <rule>
+ key host
+ pattern /pod12-node4/
+ tag node4
+ </rule>
+ <rule>
+ key host
+ pattern /worker/
+ tag node1
+ </rule>
+ </match>
+
+
+ <filter node1>
+ @type elasticsearch_genid
+ hash_id_key _hash1
+ </filter>
+
+ #send the node1 log to node1 index in elasticsearch
+ <match node1>
+ @type copy
+ <store>
+ @type elasticsearch
+ host logging-es-http
+ port 9200
+ scheme https
+ ssl_verify false
+ user "#{ENV['FLUENT_ELASTICSEARCH_USER']}"
+ password "#{ENV['FLUENT_ELASTICSEARCH_PASSWORD']}"
+ logstash_format true
+ logstash_prefix node1
+ logstash_dateformat %Y%m%d
+ flush_interval 1s
+ id_key _hash1
+ remove_keys _hash1
+
+ enable_ilm true
+ application_name ${tag}
+ index_date_pattern ""
+ ilm_policy_id delete_policy
+ template_name delpol-test
+ template_file /fluentd/etc/index_template.json
+ ilm_policy {
+ "policy": {
+ "phases": {
+ "delete": {
+ "min_age": "3m",
+ "actions": {
+ "delete": {}
+ }
+ }
+ }
+ }
+ }
+ </store>
+ <store>
+ @type stdout
+ </store>
+ </match>
+
+ <filter node4>
+ @type elasticsearch_genid
+ hash_id_key _hash4
+ </filter>
+
+ #send the node4 log to node4 index in elasticsearch
+ <match node4>
+ @type copy
+ <store>
+ @type elasticsearch
+ host logging-es-http
+ port 9200
+ scheme https
+ ssl_verify false
+ user "#{ENV['FLUENT_ELASTICSEARCH_USER']}"
+ password "#{ENV['FLUENT_ELASTICSEARCH_PASSWORD']}"
+ logstash_format true
+ logstash_prefix node4
+ logstash_dateformat %Y%m%d
+ flush_interval 1s
+ id_key _hash4
+ remove_keys _hash4
+
+ enable_ilm true
+ application_name ${tag}
+ index_date_pattern ""
+ ilm_policy_id delete_policy
+ template_name delpol-test
+ template_file /fluentd/etc/index_template.json
+ ilm_policy {
+ "policy": {
+ "phases": {
+ "delete": {
+ "min_age": "3m",
+ "actions": {
+ "delete": {}
+ }
+ }
+ }
+ }
+ }
+ </store>
+ <store>
+ @type stdout
+ </store>
+ </match>
+ error.conf: |
+ <filter vsperf.log>
+ @type parser
+ reserve_data true
+ key_name msg
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /(?<alert_time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}).*(?<alert>Failed to run test|Failed to execute in '30' seconds|\('Result', 'Failed'\)|could not open socket: connection refused|Input\/output error)/
+ </parse>
+ </filter>
+
+ <filter vswitchd.log>
+ @type parser
+ reserve_data true
+ key_name msg
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /(?<alert_time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z).*(?<alert>dpdk\|ERR\|EAL: Error - exiting with code: 1|Failed to execute in '30' seconds|dpdk\|ERR\|EAL: Driver cannot attach the device|dpdk\|EMER\|Cannot create lock on)/
+ </parse>
+ </filter>
+ <filter vswitchd.log>
+ @type parser
+ reserve_data true
+ key_name msg
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /(?<alert_time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z).*dpdk\|ERR\|VHOST_CONFIG:.*(?<alert>device not found)/
+ </parse>
+ </filter>
+ time-series.conf: |
+ #parse *counts.dat
+ <filter countdat.log>
+ @type parser
+ key_name msg
+ reserve_data true
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /^(?<ts>[\.\d]*),(?<rx_port>\d*),(?<tx_port>\d*),(?<rx_pkts>[\.\d]*),(?<tx_pkts>[\.\d]*),(?<rx_pps>[\.\d]*),(?<tx_pps>[\.\d]*),(?<rx_bps_num>[\.\d]*),(?<rx_bps_den>[\.\d]*),(?<tx_bps_num>[\.\d]*),(?<tx_bps_den>[\.\d]*)$/
+ types rx_port:integer,tx_port:integer,rx_pkts:float,tx_pkts:float,rx_pps:float,tx_pps:float,rx_bps_num:float,rx_bps_den:float,tx_bps_num:float,tx_bps_den:float
+ </parse>
+ </filter>
+
+ #parse *errors.dat
+ <filter errordat.log>
+ @type parser
+ key_name msg
+ reserve_data true
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /^(?<ts>[\.\d]*),(?<dropped>[\.\d]*),(?<ooo>[\.\d]*),(?<dup>[\.\d]*),(?<seq_too_high>[\.\d]*),(?<seq_too_low>[\.\d]*)$/
+ types ts:integer,dropped:integer,ooo:integer,dup:integer,seq_too_high:integer,seq_too_low:integer
+ </parse>
+ </filter>
+
+ #parse stc-liveresults.dat.tx
+ <filter stcdattx.log>
+ @type parser
+ key_name msg
+ reserve_data true
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /^(?<ts>[\.\d]*),(?<StrId>[\.\d]*),(?<BlkId>[\.\d]*),(?<FrCnt>[\.\d]*),(?<FrRate>[\.\d]*),(?<ERxFrCnt>[\.\d]*),(?<OctCnt>[\.\d]*),(?<OctRate>[\.\d]*),(?<bitCnt>[\.\d]*),(?<bitRate>[\.\d]*)$/
+ types ts:integer,StrId:integer,BlkId:integer,FrCnt:integer,FrRate:integer,ERxFrCnt:integer,OctCnt:integer,OctRate:integer,bitCnt:integer,bitRate:integer
+ </parse>
+ </filter>
+
+ #parse stc-liveresults.dat.rx
+ <filter stcdatrx.log>
+ @type parser
+ key_name msg
+ reserve_data true
+ emit_invalid_record_to_error false
+ <parse>
+ @type regexp
+ expression /^(?<ts>[\.\d]*),(.*, |)(?<RxPrt>.*),(?<DrpFrCnt>[\.\d]*),(?<SeqRnLen>[\.\d]*),(?<AvgLat>.*),(?<DrpFrRate>[\.\d]*),(?<FrCnt>[\.\d]*),(?<FrRate>[\.\d]*),(?<MaxLat>[\.\d]*),(?<MinLat>[\.\d]*),(?<OctCnt>[\.\d]*),(?<OctRate>[\.\d]*)$/
+ types ts:integer,DrpFrCnt:integer,SeqRnLen:integer,FrCnt:integer,FrRate:integer,MaxLat:integer,MinLat:integer,OctCnt:integer,OctRate:integer
+ </parse>
+ </filter>
+ time-analysis.conf: |
+ # 1. Test Duration - Duration Between: first line and last line.
+ # 2. Setup Duration - Duration Between: Creating result directory TO Class found ---
+ # 3. Traffic Duration - Duration between From Starting traffic at 0.1 Gbps speed TO Traffic Results
+ # 4. Iteration Durations -- Example: Duration between - Starting traffic at 10.0 Gbps TO Starting traffic at 5.0 Gbps speed
+ # 5. Reporting Duration - Duration between From Traffic Results TO Write results to file
+ # 6. Vswitchd start Duration- Duration between From Starting vswitchd... TO send_traffic with
+
+ <match vsperf.log>
+ @type rewrite_tag_filter
+ <rule>
+ key msg
+ pattern /Creating result directory:/
+ tag firstline.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Write results to file/
+ tag lastline.${tag}
+ </rule>
+
+ <rule>
+ key msg
+ pattern /Class found/
+ tag setupend.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Starting traffic at 0.1 Gbps speed/
+ tag trafficstart.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Traffic Results/
+ tag trafficend.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Starting traffic at 10.0 Gbps/
+ tag iterationstart.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Starting traffic at 5.0 Gbps speed/
+ tag iterationend.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /Starting vswitchd/
+ tag vswitchstart.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern /send_traffic/
+ tag vswitch.${tag}
+ </rule>
+ <rule>
+ key msg
+ pattern ^.*$
+ tag logs.${tag}
+ </rule>
+ </match>
+
+ #############################################################################################
+ #save the starting log and append that log in ending log
+ #############################################################################################
+ <filter firstline.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ msg ${$vswitch_start="";$reportstart="";$firstline="";$traffic_start="";$iteration_start="";$firstline = record["msg"];return record["msg"];}
+ </record>
+ </filter>
+ <filter lastline.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${record["msg"]+" | "+$firstline + " | "+ $reportstart}
+ </record>
+ </filter>
+
+ <filter setupend.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${record["msg"]+" "+$firstline}
+ </record>
+ </filter>
+
+ <filter trafficstart.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ msg ${if $traffic_start.eql?("");$traffic_start=record["msg"];end;return record["msg"];}
+ </record>
+ </filter>
+ <filter trafficend.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${if $reportstart.eql?("");$reportstart=record["msg"];end;return record["msg"]+" "+$traffic_start;}
+ </record>
+ </filter>
+
+ <filter iterationstart.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ msg ${if $iteration_start.eql?("");$iteration_start=record["msg"];end;return record["msg"];}
+ </record>
+ </filter>
+ <filter iterationend.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${record["msg"]+" "+$iteration_start}
+ </record>
+ </filter>
+
+ <filter vswitchstart.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ msg ${$vswitch_start=record["msg"];return record["msg"];}
+ </record>
+ </filter>
+ <filter vswitch.**>
+ @type record_transformer
+ enable_ruby true
+ <record>
+ newmsg ${record["msg"]+" "+$vswitch_start}
+ </record>
+ </filter>
+ #############################################################################################
+ #parse time from the log
+ #############################################################################################
+ <filter setupend.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<setupend>.*) : Class found: Trex. (?<setupstart>.*) : .*$/
+ </parse>
+ </filter>
+ <filter iterationend.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<iterationend>.*) : Starting traffic at 5.0 Gbps speed (?<iterationstart>.*) : Starting traffic at 10.0 Gbps speed$/
+ </parse>
+ </filter>
+ <filter vswitch.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<vswitch>.*) : send_traffic with <.*> (?<vswitchstart>.*) : Starting vswitchd...$/
+ </parse>
+ </filter>
+ <filter trafficend.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<trafficend>.*) : Traffic Results: (?<trafficstart>.*) : Starting traffic at 0.1 Gbps speed/
+ </parse>
+ </filter>
+ <filter lastline.**>
+ @type parser
+ key_name newmsg
+ reserve_data true
+ remove_key_name_field true
+ <parse>
+ @type regexp
+ expression /^(?<lastline>.*) : Write results to file: .* \| (?<firstline>.*) : Creating result directory: .* \| (?<reportstart>.*) : Traffic Results:$/
+ </parse>
+ </filter>
+ #############################################################################################
+ #calculate time
+ #############################################################################################
+ <filter setupend.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ setup_duration ${ require 'time';Time.parse(record["setupend"])-Time.parse(record["setupstart"]); }
+ </record>
+ </filter>
+ <filter iterationend.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ iteration_duration ${ require 'time';Time.parse(record["iterationend"])-Time.parse(record["iterationstart"]); }
+ </record>
+ </filter>
+ <filter vswitch.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ vswitch_duration ${ require 'time';Time.parse(record["vswitch"])-Time.parse(record["vswitchstart"]); }
+ </record>
+ </filter>
+ <filter trafficend.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ traffic_duration ${ require 'time';Time.parse(record["trafficend"])-Time.parse(record["trafficstart"]); }
+ </record>
+ </filter>
+ <filter lastline.**>
+ @type record_transformer
+ enable_ruby
+ <record>
+ test_duration ${ require 'time';Time.parse(record["lastline"])-Time.parse(record["firstline"]); }
+ </record>
+ <record>
+ report_duration ${ require 'time';Time.parse(record["lastline"])-Time.parse(record["reportstart"]); }
+ </record>
+ </filter>
+ #############################################################################################
diff --git a/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-service.yaml b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-service.yaml
new file mode 100644
index 00000000..9a43b82f
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent-service.yaml
@@ -0,0 +1,34 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: Service
+metadata:
+ name: fluentd
+ labels:
+ run: fluentd
+spec:
+ type: NodePort
+ ports:
+ - name: tcp
+ port: 32224
+ targetPort: 24224
+ protocol: TCP
+ nodePort: 32224
+ - name: udp
+ port: 32224
+ targetPort: 24224
+ protocol: UDP
+ nodePort: 32224
+ selector:
+ run: fluentd
diff --git a/tools/lma/ansible-server/roles/logging/files/fluentd/fluent.yaml b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent.yaml
new file mode 100644
index 00000000..3830f682
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/fluentd/fluent.yaml
@@ -0,0 +1,65 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: fluentd
+spec:
+ replicas: 2
+ selector:
+ matchLabels:
+ run: fluentd
+ template:
+ metadata:
+ labels:
+ run: fluentd
+ spec:
+ volumes:
+ - name: fconfig
+ configMap:
+ name: fluentd-config
+ items:
+ - key: fluent.conf
+ path: fluent.conf
+ - key: error.conf
+ path: error.conf
+ - key: time-series.conf
+ path: time-series.conf
+ - key: time-analysis.conf
+ path: time-analysis.conf
+ - key: index_template.json
+ path: index_template.json
+ initContainers:
+ - name: init-myservice
+ image: busybox:1.28
+ command: ['sh', '-c', 'until nslookup logging-es-http; do echo "waiting for myservice"; sleep 2; done;']
+ containers:
+ - name: fluentd
+ image: adi0509/fluentd:latest
+ env:
+ - name: FLUENT_ELASTICSEARCH_USER
+ value: "elastic"
+ - name: FLUENT_ELASTICSEARCH_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: logging-es-elastic-user
+ key: elastic
+ ports:
+ - containerPort: 24224
+ protocol: TCP
+ - containerPort: 24224
+ protocol: UDP
+ volumeMounts:
+ - name: fconfig
+ mountPath: /fluentd/etc/
diff --git a/tools/lma/ansible-server/roles/logging/files/kibana/kibana.yaml b/tools/lma/ansible-server/roles/logging/files/kibana/kibana.yaml
new file mode 100644
index 00000000..5ec6937e
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/kibana/kibana.yaml
@@ -0,0 +1,23 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: kibana.k8s.elastic.co/v1
+kind: Kibana
+metadata:
+ name: logging
+spec:
+ version: 7.8.0
+ count: 1
+ elasticsearchRef:
+ name: logging
+ namespace: logging
diff --git a/tools/lma/ansible-server/roles/logging/files/namespace.yaml b/tools/lma/ansible-server/roles/logging/files/namespace.yaml
new file mode 100644
index 00000000..6964af5c
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/namespace.yaml
@@ -0,0 +1,17 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: logging
diff --git a/tools/lma/ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml
new file mode 100644
index 00000000..f5a11e80
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-conf-cm.yaml
@@ -0,0 +1,36 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: nginx-config
+data:
+ default.conf: |
+ server {
+ listen 80;
+ ssl on;
+ ssl_certificate /etc/ssl/certs/kibana-access.pem;
+ ssl_certificate_key /etc/ssl/private/kibana-access.key;
+
+ location / {
+ proxy_pass https://logging-kb-http:5601;
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection 'upgrade';
+ proxy_set_header Host $host;
+ proxy_cache_bypass $http_upgrade;
+ proxy_read_timeout 300s;
+ proxy_connect_timeout 75s;
+ }
+ }
diff --git a/tools/lma/ansible-server/roles/logging/files/nginx/nginx-key-cm.yaml b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-key-cm.yaml
new file mode 100644
index 00000000..93d7d6ec
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-key-cm.yaml
@@ -0,0 +1,68 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: nginx-key
+data:
+ kibana-access.key: |
+ -----BEGIN PRIVATE KEY-----
+ MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDI92SBrcEdzxqS
+ rt883bVcj4F4RuKtm+AjjOEhbFUd3XOp5Wa5NzyYQSTP9ZJHG0dYiTAKOJBVcgbc
+ YRMNUAzHAIskf1q2/SvwyLNRMJLkBD5HHjbnEcuKQm/+nPdzkfvo2zfRNTDPKS83
+ HqFQ779hT8ZLkSzoPuR0QD17ZNWYVMZv/r9wqnjX8U/k5AjrJOIxuaO9nGAgv2Pu
+ Qm6wuU8UBEaMRgPVHQ3ztflQr9QPr/S6HU0cl4Gu+Nwid6iC1RVYxANNq7E7wRvq
+ GMKRS5cA9Nlnu/b7IEI4LSx5yeTSDzwmZKTNnUWi2cpqk30M4G4cUokoz9bP+62I
+ YWEh3B8HAgMBAAECggEBAI1luzqepTSzBhBUp88sczGX6tFUlqLt/Ism0TPyBAVK
+ TdopBNima6T4mM0VDIGpSM6bX8ihObRU0Uz3pC8GtqbB1CSu0oXTpbn5jGlAkumJ
+ rsPdF2YHGD3ENwZfLKANA8A3lZNGKHxpjsXqcDgBJ5dxSKTclUsnDRhaJqgOL1bI
+ d9QCXdA1vbpxHDJWSo73E7omv3AyHi3HxMWU4gzyerUFSMFGqm0W5dPeeresNE3a
+ bv9/46YdykufuRuJZqsUDLCgUUcJPhbE5iOrB4iv8oaDqT0onxwzRQTSgidPxbp2
+ EmjVHpFCACltOKSqELM4+PQFCk8xUBya8HWD5UHrVDkCgYEA4y3WwmhtLUT/g3G3
+ cowvmxjgPl6xqkqTA7Xcdc3sk+6/jS1kayT5TL1qfpd1QL/K617jva9mfSMZ8ei9
+ Y7M/2QkSb0uHKulGR0+if+7sT0L8OYO/OE7c+HTZmZK4hD1CCJN2M34D9Qo2fzQ6
+ 4v+AO1wGiAtiNev0YIBKYNSco+sCgYEA4nY8m93XuC19z991sFRvE0UBeKcN2esg
+ TwY9UuYHJ56s+6UozkUgZArwYFW8LWFeIjkrrKELBNDsmJtTZ006TyUWxY/ccdjV
+ fJZTLV3niv6IQzy74aOmXV2vtNjxyBlllT9mvig6T0t0TvAtolsuSVHBL09zxcy4
+ wN4pGIfqllUCgYBYLq/hMKXIX7MK87YwqYfFHWfV7e3q2x2r4AjeVXuShKcoBsmm
+ 6Wg3yIKw9tuVsZzzthaSx6XxxxFIHH5/V9Hdzi6wstGZ74jPH3NFU5m4vpinPqOY
+ GMyfSMQ6X4BuHFUofQzxueWRVVCIGd8Nw/2jjPogDsMliRyH5OR6J61R1wKBgEa6
+ 8SEpf7fJlZL4UzS4mlylX9lEK+JVOqkT5NFggPmR6KtMIVuTYZN9iyg7fuOZlqIP
+ wyFOxzdA3bSoRrtr9ntDtUINNaflNoCMHvx7aNcTupFthazqxQpCOZ+9Zn691+lu
+ fPOFcvjTM0d4YnhkDCfgPfs90IYF8+phOOqtgMplAoGBAI+mcaUH7ADYxlONCi1E
+ gNHRvHJRBdQBaydKUfPxbe3vS5QJb8Gb5RU46vDl3w+YHUVwUi+Hj68zuKExXxhD
+ 9CGTAQIejtHWScZ1Djl3bcvNa/czHyuNVsGwvJ3fy1JzpxRmUUMPSdJ90A1n57Tk
+ LFEmZhwaj7YF869wfKngQ57d
+ -----END PRIVATE KEY-----
+ kibana-access.pem: |
+ -----BEGIN CERTIFICATE-----
+ MIIDVzCCAj+gAwIBAgIJAIQzf1mxHsvgMA0GCSqGSIb3DQEBCwUAMEIxCzAJBgNV
+ BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
+ Q29tcGFueSBMdGQwHhcNMjAwNjI1MTY1NzQ3WhcNMjEwNjI1MTY1NzQ3WjBCMQsw
+ CQYDVQQGEwJYWDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZh
+ dWx0IENvbXBhbnkgTHRkMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA
+ yPdkga3BHc8akq7fPN21XI+BeEbirZvgI4zhIWxVHd1zqeVmuTc8mEEkz/WSRxtH
+ WIkwCjiQVXIG3GETDVAMxwCLJH9atv0r8MizUTCS5AQ+Rx425xHLikJv/pz3c5H7
+ 6Ns30TUwzykvNx6hUO+/YU/GS5Es6D7kdEA9e2TVmFTGb/6/cKp41/FP5OQI6yTi
+ MbmjvZxgIL9j7kJusLlPFARGjEYD1R0N87X5UK/UD6/0uh1NHJeBrvjcIneogtUV
+ WMQDTauxO8Eb6hjCkUuXAPTZZ7v2+yBCOC0secnk0g88JmSkzZ1FotnKapN9DOBu
+ HFKJKM/Wz/utiGFhIdwfBwIDAQABo1AwTjAdBgNVHQ4EFgQUrz/R+M2XkTTfjrau
+ VVBW6+pdatgwHwYDVR0jBBgwFoAUrz/R+M2XkTTfjrauVVBW6+pdatgwDAYDVR0T
+ BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAyIhJLwg9oTil0Rb1zbYQb0Mr0UYz
+ rlS4f8QkxygkGLAZ8q9VkR+NpKfqhYDSHofGg5Yg5/p54NRJh5M4ASuM7N9AK0LH
+ KbCvS+YRNWhmo+7H7zjDNkV8FbzG41nkt9jQjaKFF7GdKr4HkWvupMX6PwsAZ0jI
+ b2Y6QzFQP9wF0QoBHrK42u3eWbfYv2IIDd6xsV90ilKRDtKkCiI4dyKGK46YDyZB
+ 3eqJ08Pm67HDbxQLydRXkNJvd33PASRgE/VOh44n3xWG+Gu4IMz7EO/4monyuv1Q
+ V2v1A9NV+ZnAq4PT7WJY7fWYavDUr+kwxMAGNQkG/Cg3X4FYrRwrq6gk7Q==
+ -----END CERTIFICATE-----
diff --git a/tools/lma/ansible-server/roles/logging/files/nginx/nginx-service.yaml b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-service.yaml
new file mode 100644
index 00000000..8aea53dd
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/nginx/nginx-service.yaml
@@ -0,0 +1,28 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: Service
+metadata:
+ name: nginx
+ labels:
+ run: nginx
+spec:
+ type: NodePort
+ ports:
+ - port: 8000
+ targetPort: 80
+ protocol: TCP
+ nodePort: 32000
+ selector:
+ run: nginx
diff --git a/tools/lma/ansible-server/roles/logging/files/nginx/nginx.yaml b/tools/lma/ansible-server/roles/logging/files/nginx/nginx.yaml
new file mode 100644
index 00000000..fdf5c835
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/nginx/nginx.yaml
@@ -0,0 +1,58 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: nginx
+spec:
+ replicas: 2
+ selector:
+ matchLabels:
+ run: nginx
+ template:
+ metadata:
+ labels:
+ run: nginx
+ spec:
+ volumes:
+ - name: nconfig
+ configMap:
+ name: nginx-config
+ items:
+ - key: default.conf
+ path: default.conf
+ - name: nkey
+ configMap:
+ name: nginx-key
+ items:
+ - key: kibana-access.key
+ path: kibana-access.key
+ - key: kibana-access.pem
+ path: kibana-access.pem
+ initContainers:
+ - name: init-myservice
+ image: busybox:1.28
+ command: ['sh', '-c', 'until nslookup logging-kb-http; do echo "waiting for myservice"; sleep 2; done;']
+ containers:
+ - name: nginx
+ image: nginx
+ volumeMounts:
+ - mountPath: /etc/nginx/conf.d/
+ name: nconfig
+ - mountPath: /etc/ssl/certs/
+ name: nkey
+ - mountPath: /etc/ssl/private/
+ name: nkey
+ ports:
+ - containerPort: 80
diff --git a/tools/lma/ansible-server/roles/logging/files/persistentVolume.yaml b/tools/lma/ansible-server/roles/logging/files/persistentVolume.yaml
new file mode 100644
index 00000000..c1a96077
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/persistentVolume.yaml
@@ -0,0 +1,105 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-master-vm1
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm1-master
+ nfs:
+ server: 10.10.120.211
+ path: "/srv/nfs/master"
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-data-vm1
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm1-data
+ nfs:
+ server: 10.10.120.211
+ path: "/srv/nfs/data"
+
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-master-vm2
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm2-master
+ nfs:
+ server: 10.10.120.203
+ path: "/srv/nfs/master"
+
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-data-vm2
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm2-data
+ nfs:
+ server: 10.10.120.203
+ path: "/srv/nfs/data"
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-master-vm3
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm3-master
+ nfs:
+ server: 10.10.120.204
+ path: "/srv/nfs/master"
+
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: pv-data-vm3
+spec:
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteOnce
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: log-vm3-data
+ nfs:
+ server: 10.10.120.204
+ path: "/srv/nfs/data"
diff --git a/tools/lma/ansible-server/roles/logging/files/storageClass.yaml b/tools/lma/ansible-server/roles/logging/files/storageClass.yaml
new file mode 100644
index 00000000..a2f1e3aa
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/files/storageClass.yaml
@@ -0,0 +1,73 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#storage class for VM1 master
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm1-master
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM1 data
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm1-data
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM2 master
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm2-master
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM2 data
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm2-data
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM3 master
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm3-master
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
+---
+#storage class for VM3 data
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+ name: log-vm3-data
+reclaimPolicy: Retain
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: Immediate
+allowVolumeExpansion: true
diff --git a/tools/lma/ansible-server/roles/logging/tasks/main.yml b/tools/lma/ansible-server/roles/logging/tasks/main.yml
new file mode 100644
index 00000000..dcbf4d4d
--- /dev/null
+++ b/tools/lma/ansible-server/roles/logging/tasks/main.yml
@@ -0,0 +1,165 @@
+# Copyright 2020 Adarsh yadav
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#EFK setup in k8s cluster
+
+#***********************************************************************************************************
+#copy all yaml to /tmp/files/
+#***********************************************************************************************************
+- name: copy all yaml to /tmp/files/
+ copy:
+ src: ../files/
+ dest: /tmp/files/
+
+#***********************************************************************************************************
+#Creating Namespace
+#***********************************************************************************************************
+- name: Creating Namespace
+ k8s:
+ state: present
+ src: /tmp/files/namespace.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#creating Storage Class
+#***********************************************************************************************************
+- name: creating Storage Class
+ k8s:
+ state: present
+ src: /tmp/files/storageClass.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#creating Persistent Volume
+#***********************************************************************************************************
+- name: creating Persistent Volume
+ k8s:
+ state: present
+ src: /tmp/files/persistentVolume.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#add user
+#***********************************************************************************************************
+- name: add user
+ k8s:
+ state: present
+ src: /tmp/files/elasticsearch/user-secret.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#Starting Elasticsearch operator
+#***********************************************************************************************************
+- name: Starting Elasticsearch operator
+ shell: kubectl apply -f https://download.elastic.co/downloads/eck/1.2.0/all-in-one.yaml
+ ignore_errors: yes
+
+#***********************************************************************************************************
+#Starting Elasticsearch
+#***********************************************************************************************************
+- name: Starting Elasticsearch
+ k8s:
+ state: present
+ src: /tmp/files/elasticsearch/elasticsearch.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#Starting Kibana
+#***********************************************************************************************************
+- name: Starting Kibana
+ k8s:
+ state: present
+ src: /tmp/files/kibana/kibana.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#Starting nginx
+#***********************************************************************************************************
+- name: creating nginx configmap
+ k8s:
+ state: present
+ src: /tmp/files/nginx/nginx-conf-cm.yaml
+ namespace: logging
+
+- name: creating nginx key configmap
+ k8s:
+ state: present
+ src: /tmp/files/nginx/nginx-key-cm.yaml
+ namespace: logging
+
+- name: creating nginx pod
+ k8s:
+ state: present
+ src: /tmp/files/nginx/nginx.yaml
+ namespace: logging
+
+- name: creating nginx service
+ k8s:
+ state: present
+ src: /tmp/files/nginx/nginx-service.yaml
+ namespace: logging
+#***********************************************************************************************************
+#Starting fluentd
+#***********************************************************************************************************
+- name: creating fluentd configmap
+ k8s:
+ state: present
+ src: /tmp/files/fluentd/fluent-cm.yaml
+ namespace: logging
+
+- name: creating fluentd pod
+ k8s:
+ state: present
+ src: /tmp/files/fluentd/fluent.yaml
+ namespace: logging
+
+- name: creating fluentd service
+ k8s:
+ state: present
+ src: /tmp/files/fluentd/fluent-service.yaml
+ namespace: logging
+#***********************************************************************************************************
+#Starting elastalert
+#***********************************************************************************************************
+- name: creating elastalert config configmap
+ k8s:
+ state: present
+ src: /tmp/files/elastalert/ealert-conf-cm.yaml
+ namespace: logging
+
+- name: creating elastalert key configmap
+ k8s:
+ state: present
+ src: /tmp/files/elastalert/ealert-key-cm.yaml
+ namespace: logging
+
+- name: creating elastalert rule configmap
+ k8s:
+ state: present
+ src: /tmp/files/elastalert/ealert-rule-cm.yaml
+ namespace: logging
+
+- name: creating elastalert pod
+ k8s:
+ state: present
+ src: /tmp/files/elastalert/elastalert.yaml
+ namespace: logging
+
+#***********************************************************************************************************
+#removing /tmp/files
+#***********************************************************************************************************
+- name: Removing /tmp/files
+ file:
+ path: "/tmp/files"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml
new file mode 100644
index 00000000..7b9abc47
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-config.yaml
@@ -0,0 +1,37 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kind: ConfigMap
+apiVersion: v1
+metadata:
+ name: alertmanager-config
+ namespace: monitoring
+data:
+ config.yml: |-
+ global:
+ route:
+ receiver: "webhook"
+ group_by: ['alertname', 'priority']
+ group_wait: 1s
+ group_interval: 5s
+ repeat_interval: 5s
+ routes:
+ - match:
+ severity: critical
+
+ receivers:
+ - name: "webhook"
+ webhook_configs:
+ - url: 'http://10.10.120.20/alertmanager'
+ send_resolved: true
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-deployment.yaml
new file mode 100644
index 00000000..f1c3d78e
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-deployment.yaml
@@ -0,0 +1,62 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ labels:
+ app: alertmanager
+ adi10hero.monitoring: alertmanager
+ name: alertmanager
+ namespace: monitoring
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: alertmanager
+ adi10hero.monitoring: alertmanager
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ name: alertmanager
+ labels:
+ app: alertmanager
+ adi10hero.monitoring: alertmanager
+ spec:
+ containers:
+ - name: alertmanager
+ image: prom/alertmanager
+ args:
+ - --config.file=/etc/alertmanager/config.yml
+ - --storage.path=/alertmanager
+ - --cluster.peer=alertmanager1:6783
+ - --cluster.listen-address=0.0.0.0:6783
+ ports:
+ - containerPort: 9093
+ - containerPort: 6783
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - name: config-volume
+ mountPath: /etc/alertmanager
+ - name: alertmanager
+ mountPath: /alertmanager
+ restartPolicy: Always
+ volumes:
+ - name: config-volume
+ configMap:
+ name: alertmanager-config
+ - name: alertmanager
+ emptyDir: {}
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-service.yaml
new file mode 100644
index 00000000..c67517d3
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager-service.yaml
@@ -0,0 +1,41 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: alertmanager
+ app: alertmanager
+ name: alertmanager
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/path: /
+ prometheus.io/port: '8080'
+
+spec:
+ selector:
+ app: alertmanager
+ adi10hero.monitoring: alertmanager
+ type: NodePort
+ ports:
+ - name: "9093"
+ port: 9093
+ targetPort: 9093
+ nodePort: 30930
+ - name: "6783"
+ port: 6783
+ targetPort: 6783
+ nodePort: 30679
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-deployment.yaml
new file mode 100644
index 00000000..18b76456
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-deployment.yaml
@@ -0,0 +1,62 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ labels:
+ app: alertmanager1
+ adi10hero.monitoring: alertmanager1
+ name: alertmanager1
+ namespace: monitoring
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: alertmanager1
+ adi10hero.monitoring: alertmanager1
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ name: alertmanager1
+ labels:
+ app: alertmanager1
+ adi10hero.monitoring: alertmanager1
+ spec:
+ containers:
+ - name: alertmanager1
+ image: prom/alertmanager
+ args:
+ - --config.file=/etc/alertmanager/config.yml
+ - --storage.path=/alertmanager
+ - --cluster.peer=alertmanager:6783
+ - --cluster.listen-address=0.0.0.0:6783
+ ports:
+ - containerPort: 9093
+ - containerPort: 6783
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - name: config-volume
+ mountPath: /etc/alertmanager
+ - name: alertmanager
+ mountPath: /alertmanager
+ restartPolicy: Always
+ volumes:
+ - name: config-volume
+ configMap:
+ name: alertmanager-config
+ - name: alertmanager
+ emptyDir: {}
diff --git a/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-service.yaml
new file mode 100644
index 00000000..66d0d2b1
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/alertmanager/alertmanager1-service.yaml
@@ -0,0 +1,42 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: alertmanager1
+ app: alertmanager1
+ name: alertmanager1
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/path: /
+ prometheus.io/port: '8080'
+
+spec:
+ selector:
+ app: alertmanager1
+ adi10hero.monitoring: alertmanager1
+ type: NodePort
+ ports:
+ - name: "9093"
+ port: 9093
+ targetPort: 9093
+ nodePort: 30931
+ - name: "6783"
+ port: 6783
+ targetPort: 6783
+ nodePort: 30678
+
diff --git a/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-deamonset.yaml b/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-deamonset.yaml
new file mode 100644
index 00000000..6a62985e
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-deamonset.yaml
@@ -0,0 +1,79 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ name: cadvisor
+ namespace: monitoring
+ labels:
+ adi10hero.monitoring: cadvisor
+ app: cadvisor
+spec:
+ selector:
+ matchLabels:
+ app: cadvisor
+ adi10hero.monitoring: cadvisor
+ template:
+ metadata:
+ name: cadvisor
+ labels:
+ adi10hero.monitoring: cadvisor
+ app: cadvisor
+ spec:
+ containers:
+ - image: gcr.io/google-containers/cadvisor
+ name: cadvisor
+ ports:
+ - containerPort: 8080
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - mountPath: /rootfs
+ name: cadvisor-hostpath0
+ readOnly: true
+ - mountPath: /var/run
+ name: cadvisor-hostpath1
+ - mountPath: /sys
+ name: cadvisor-hostpath2
+ readOnly: true
+ - mountPath: /sys/fs/cgroup
+ name: cadvisor-hostpath3
+ readOnly: true
+ - mountPath: /dev/disk
+ name: cadvisor-hostpath4
+ readOnly: true
+ - mountPath: /var/lib/docker
+ name: cadvisor-hostpath5
+ readOnly: true
+ restartPolicy: Always
+ volumes:
+ - hostPath:
+ path: /
+ name: cadvisor-hostpath0
+ - hostPath:
+ path: /var/run
+ name: cadvisor-hostpath1
+ - hostPath:
+ path: /sys
+ name: cadvisor-hostpath2
+ - hostPath:
+ path: /cgroup
+ name: cadvisor-hostpath3
+ - hostPath:
+ path: /dev/disk/
+ name: cadvisor-hostpath4
+ - hostPath:
+ path: /var/lib/docker/
+ name: cadvisor-hostpath5
diff --git a/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-service.yaml
new file mode 100644
index 00000000..734240b8
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/cadvisor/cadvisor-service.yaml
@@ -0,0 +1,30 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: cadvisor
+ adi10hero.monitoring: cadvisor
+ name: cadvisor
+ namespace: monitoring
+spec:
+ ports:
+ - name: "8080"
+ port: 8080
+ targetPort: 8080
+ selector:
+ app: cadvisor
+ adi10hero.monitoring: cadvisor
diff --git a/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml
new file mode 100644
index 00000000..b6bfe0b6
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-deployment.yaml
@@ -0,0 +1,51 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: collectd-exporter
+ namespace: monitoring
+ labels:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ name: collectd-exporter
+ labels:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+ spec:
+ containers:
+ - args:
+ - --collectd.listen-address=0.0.0.0:25826
+ image: prom/collectd-exporter
+ name: collectd-exporter
+ ports:
+ - containerPort: 9103
+ - containerPort: 25826
+ protocol: UDP
+ securityContext:
+ runAsUser: 0
+ restartPolicy: Always
+ volumes: null
+
diff --git a/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-service.yaml
new file mode 100644
index 00000000..5609d04a
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/collectd-exporter/collectd-exporter-service.yaml
@@ -0,0 +1,35 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: collectd-exporter
+ namespace: monitoring
+ labels:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+spec:
+ ports:
+ - name: "9103"
+ port: 9103
+ nodePort: 30103
+ - name: "25826"
+ port: 25826
+ protocol: UDP
+ nodePort: 30826
+ selector:
+ app: collectd-exporter
+ adi10hero.monitoring: collectd-exporter
+ type: NodePort
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-datasource-config.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-datasource-config.yaml
new file mode 100644
index 00000000..e2b8c9fa
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-datasource-config.yaml
@@ -0,0 +1,35 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-datasources
+ namespace: monitoring
+data:
+ prometheus.yaml: |-
+ {
+ "apiVersion": 1,
+ "datasources": [
+ {
+ "access":"proxy",
+ "editable": true,
+ "name": "prometheus",
+ "orgId": 1,
+ "type": "prometheus",
+ "url": "http://prometheus-main:9090",
+ "version": 1
+ }
+ ]
+ }
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-deployment.yaml
new file mode 100644
index 00000000..afb00948
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-deployment.yaml
@@ -0,0 +1,68 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ labels:
+ adi10hero.monitoring: grafana
+ app: grafana
+ name: grafana
+ namespace: monitoring
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ adi10hero.monitoring: grafana
+ app: grafana
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ name: grafana
+ labels:
+ adi10hero.monitoring: grafana
+ app: grafana
+ spec:
+ containers:
+ - name: grafana
+ image: grafana/grafana
+ ports:
+ - containerPort: 3000
+ env:
+ - name: GF_SECURITY_ADMIN_PASSWORD
+ value: admin
+ - name: GF_SECURITY_ADMIN_USER
+ value: admin
+ - name: GF_SERVER_DOMAIN
+ value: 10.10.120.20
+ - name: GF_SERVER_ROOT_URL
+ value: "%(protocol)s://%(domain)s:/metrics"
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - mountPath: /var/lib/grafana
+ name: grafana-storage
+ - mountPath: /etc/grafana/provisioning/datasources
+ name: grafana-datasources
+ readOnly: false
+ restartPolicy: Always
+ volumes:
+ - name: grafana-storage
+ persistentVolumeClaim:
+ claimName: grafana-pvc
+ - name: grafana-datasources
+ configMap:
+ defaultMode: 420
+ name: grafana-datasources
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml
new file mode 100644
index 00000000..06bcc31b
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pv.yaml
@@ -0,0 +1,31 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: grafana-pv
+ namespace: monitoring
+ labels:
+ app: grafana-pv
+ adi10hero.monitoring: grafana-pv
+spec:
+ storageClassName: monitoring
+ capacity:
+ storage: 5Gi
+ accessModes:
+ - ReadWriteMany
+ nfs:
+ server: 10.10.120.211
+ path: "/usr/share/monitoring_data/grafana"
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pvc.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pvc.yaml
new file mode 100644
index 00000000..2c2955c8
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-pvc.yaml
@@ -0,0 +1,33 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: grafana-pvc
+ namespace: monitoring
+ labels:
+ app: grafana-pvc
+ adi10hero.monitoring: grafana-pvc
+spec:
+ accessModes:
+ - ReadWriteMany
+ storageClassName: monitoring
+ resources:
+ requests:
+ storage: 4Gi
+ selector:
+ matchLabels:
+ app: grafana-pv
+ adi10hero.monitoring: grafana-pv
diff --git a/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-service.yaml
new file mode 100644
index 00000000..d1c9c9cc
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/grafana/grafana-service.yaml
@@ -0,0 +1,36 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: grafana
+ namespace: monitoring
+ labels:
+ app: grafana
+ adi10hero.monitoring: grafana
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: '3000'
+spec:
+ selector:
+ app: grafana
+ adi10hero.monitoring: grafana
+ type: NodePort
+ ports:
+ - name: "3000"
+ port: 3000
+ targetPort: 3000
+ nodePort: 30000
+
diff --git a/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml
new file mode 100644
index 00000000..af3c5469
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-deployment.yaml
@@ -0,0 +1,36 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: kube-state-metrics
+ namespace: kube-system
+spec:
+ selector:
+ matchLabels:
+ app: kube-state-metrics
+ replicas: 1
+ template:
+ metadata:
+ labels:
+ app: kube-state-metrics
+ spec:
+ #serviceAccountName: prometheus
+ containers:
+ - name: kube-state-metrics
+ image: quay.io/coreos/kube-state-metrics:v1.2.0
+ ports:
+ - containerPort: 8080
+ name: monitoring
diff --git a/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml
new file mode 100644
index 00000000..8d294391
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/kube-state-metrics/kube-state-metrics-service.yaml
@@ -0,0 +1,26 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kind: Service
+apiVersion: v1
+metadata:
+ name: kube-state-metrics
+ namespace: kube-system
+spec:
+ selector:
+ app: kube-state-metrics
+ ports:
+ - protocol: TCP
+ port: 8080
+ targetPort: 8080
diff --git a/tools/lma/ansible-server/roles/monitoring/files/monitoring-namespace.yaml b/tools/lma/ansible-server/roles/monitoring/files/monitoring-namespace.yaml
new file mode 100644
index 00000000..f1c9b889
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/monitoring-namespace.yaml
@@ -0,0 +1,18 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: monitoring
diff --git a/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-daemonset.yaml b/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-daemonset.yaml
new file mode 100644
index 00000000..9334b2f4
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-daemonset.yaml
@@ -0,0 +1,80 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ name: node-exporter-daemonset
+ namespace: monitoring
+ labels:
+ app: node-exporter
+ adi10hero.monitoring: node-exporter
+spec:
+ selector:
+ matchLabels:
+ app: node-exporter
+ adi10hero.monitoring: node-exporter
+ template:
+ metadata:
+ labels:
+ app: node-exporter
+ adi10hero.monitoring: node-exporter
+ annotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/port: "9100"
+ spec:
+ hostPID: true
+ hostIPC: true
+ hostNetwork: true
+ containers:
+ - ports:
+ - containerPort: 9100
+ protocol: TCP
+ resources:
+ requests:
+ cpu: 0.15
+ securityContext:
+ runAsUser: 0
+ privileged: true
+ image: prom/node-exporter:v0.15.2
+ args:
+ - --path.procfs
+ - /host/proc
+ - --path.sysfs
+ - /host/sys
+ - --collector.filesystem.ignored-mount-points
+ - '"^/(sys|proc|dev|host|etc)($|/)"'
+ name: node-exporter
+ volumeMounts:
+ - name: dev
+ mountPath: /host/dev
+ - name: proc
+ mountPath: /host/proc
+ - name: sys
+ mountPath: /host/sys
+ - name: rootfs
+ mountPath: /rootfs
+ volumes:
+ - name: proc
+ hostPath:
+ path: /proc
+ - name: dev
+ hostPath:
+ path: /dev
+ - name: sys
+ hostPath:
+ path: /sys
+ - name: rootfs
+ hostPath:
+ path: /
diff --git a/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-service.yaml
new file mode 100644
index 00000000..dd0aea4d
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/node-exporter/nodeexporter-service.yaml
@@ -0,0 +1,33 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: node-exporter
+ app: node-exporter
+ name: node-exporter
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/port: "9100"
+spec:
+ ports:
+ - name: "node-exporter"
+ port: 9100
+ targetPort: 9100
+ selector:
+ adi10hero.monitoring: node-exporter
+ app: node-exporter
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/main-prometheus-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/main-prometheus-service.yaml
new file mode 100644
index 00000000..58b220a8
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/main-prometheus-service.yaml
@@ -0,0 +1,35 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: prometheus-main
+ app: prometheus-main
+ name: prometheus-main
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: '9090'
+spec:
+ type: NodePort
+ ports:
+ - name: prometheus-main
+ protocol: TCP
+ port: 9090
+ nodePort: 30902
+ selector:
+ adi10hero.monitoring: prometheus1
+ app: prometheus
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml
new file mode 100644
index 00000000..917f978f
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-config.yaml
@@ -0,0 +1,609 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: prometheus-config
+ namespace: monitoring
+data:
+ alert.rules: |-
+ groups:
+ - name: targets
+ rules:
+ - alert: MonitorServiceDown
+ expr: up == 0
+ for: 30s
+ labels:
+ severity: critical
+ annotations:
+ summary: "Monitor service non-operational"
+ description: "Service {{ $labels.instance }} is down."
+ - alert: HighCpuLoad
+ expr: node_load1 > 1.9
+ for: 15s
+ labels:
+ severity: critical
+ annotations:
+ summary: "Service under high load"
+ description: "Docker host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
+
+ - name: host and hardware
+ rules:
+ - alert: HostHighCpuLoad
+ expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host high CPU load (instance {{ $labels.instance }})"
+ description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostSwapIsFillingUp
+ expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host swap is filling up (instance {{ $labels.instance }})"
+ description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HighMemoryLoad
+ expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100 > 85
+ for: 30s
+ labels:
+ severity: warning
+ annotations:
+ summary: "Server memory is almost full"
+ description: "Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
+
+ - alert: HighStorageLoad
+ expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
+ for: 30s
+ labels:
+ severity: warning
+ annotations:
+ summary: "Server storage is almost full"
+ description: "Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
+
+ - alert: HostNetworkTransmitErrors
+ expr: increase(node_network_transmit_errs_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host Network Transmit Errors (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last five minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostOutOfMemory
+ expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host out of memory (instance {{ $labels.instance }})"
+ description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostMemoryUnderMemoryPressure
+ expr: rate(node_vmstat_pgmajfault[1m]) > 1000
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host memory under memory pressure (instance {{ $labels.instance }})"
+ description: "The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostUnusualNetworkThroughputIn
+ expr: sum by (instance) (irate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host unusual network throughput in (instance {{ $labels.instance }})"
+ description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostUnusualNetworkThroughputOut
+ expr: sum by (instance) (irate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host unusual network throughput out (instance {{ $labels.instance }})"
+ description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostUnusualDiskRateRead
+ expr: sum by (instance) (irate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host unusual disk read rate (instance {{ $labels.instance }})"
+ description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostUnusualDiskRateWrite
+ expr: sum by (instance) (irate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host unusual disk write rate (instance {{ $labels.instance }})"
+ description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostOutOfDiskSpace
+ expr: (node_filesystem_avail_bytes{mountpoint="/rootfs"} * 100) / node_filesystem_size_bytes{mountpoint="/rootfs"} < 10
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host out of disk space (instance {{ $labels.instance }})"
+ description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostDiskWillFillIn4Hours
+ expr: predict_linear(node_filesystem_free_bytes{fstype!~"tmpfs"}[1h], 4 * 3600) < 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host disk will fill in 4 hours (instance {{ $labels.instance }})"
+ description: "Disk will fill in 4 hours at current write rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostPhysicalComponentTooHot
+ expr: node_hwmon_temp_celsius > 75
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host physical component too hot (instance {{ $labels.instance }})"
+ description: "Physical hardware component too hot\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostNodeOvertemperatureAlarm
+ expr: node_hwmon_temp_alarm == 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Host node overtemperature alarm (instance {{ $labels.instance }})"
+ description: "Physical node temperature alarm triggered\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostKernelVersionDeviations
+ expr: count(sum(label_replace(node_uname_info, "kernel", "$1", "release", "([0-9]+.[0-9]+.[0-9]+).*")) by (kernel)) > 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host kernel version deviations (instance {{ $labels.instance }})"
+ description: "Different kernel versions are running\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostOomKillDetected
+ expr: increase(node_vmstat_oom_kill[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host OOM kill detected (instance {{ $labels.instance }})"
+ description: "OOM kill detected\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostEdacCorrectableErrorsDetected
+ expr: increase(node_edac_correctable_errors_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: info
+ annotations:
+ summary: "Host EDAC Correctable Errors detected (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostEdacUncorrectableErrorsDetected
+ expr: node_edac_uncorrectable_errors_total > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostNetworkReceiveErrors
+ expr: increase(node_network_receive_errs_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host Network Receive Errors (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last five minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: HostNetworkTransmitErrors
+ expr: increase(node_network_transmit_errs_total[5m]) > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Host Network Transmit Errors (instance {{ $labels.instance }})"
+ description: "{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last five minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - name: container
+ rules:
+ - alert: ContainerKilled
+ expr: time() - container_last_seen > 60
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container killed (instance {{ $labels.instance }})"
+ description: "A container has disappeared\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerCpuUsage
+ expr: sum by(instance, name) (rate(container_cpu_usage_seconds_total[3m]) * 100 > 80)
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container CPU usage (instance {{ $labels.instance }})"
+ description: "Container CPU usage is above 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerMemoryUsage
+ expr: (sum(container_memory_usage_bytes) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) > 125
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container Memory usage (instance {{ $labels.instance }})"
+ description: "Container Memory usage is above 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerVolumeUsage
+ expr: (1 - (sum(container_fs_inodes_free) BY (instance) / sum(container_fs_inodes_total) BY (instance)) * 100) > 80
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container Volume usage (instance {{ $labels.instance }})"
+ description: "Container Volume usage is above 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerVolumeIoUsage
+ expr: (sum(container_fs_io_current) BY (instance, name) * 100) > 80
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container Volume IO usage (instance {{ $labels.instance }})"
+ description: "Container Volume IO usage is above 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: ContainerHighThrottleRate
+ expr: rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Container high throttle rate (instance {{ $labels.instance }})"
+ description: "Container is being throttled\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - name: kubernetes
+ rules:
+ - alert: KubernetesNodeReady
+ expr: kube_node_status_condition{condition="Ready",status="true"} == 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes Node ready (instance {{ $labels.instance }})"
+ description: "Node {{ $labels.node }} has been unready for a long time\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesMemoryPressure
+ expr: kube_node_status_condition{condition="MemoryPressure",status="true"} == 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes memory pressure (instance {{ $labels.instance }})"
+ description: "{{ $labels.node }} has MemoryPressure condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDiskPressure
+ expr: kube_node_status_condition{condition="DiskPressure",status="true"} == 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes disk pressure (instance {{ $labels.instance }})"
+ description: "{{ $labels.node }} has DiskPressure condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesOutOfDisk
+ expr: kube_node_status_condition{condition="OutOfDisk",status="true"} == 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes out of disk (instance {{ $labels.instance }})"
+ description: "{{ $labels.node }} has OutOfDisk condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesJobFailed
+ expr: kube_job_status_failed > 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes Job failed (instance {{ $labels.instance }})"
+ description: "Job {{$labels.namespace}}/{{$labels.exported_job}} failed to complete\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesCronjobSuspended
+ expr: kube_cronjob_spec_suspend != 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes CronJob suspended (instance {{ $labels.instance }})"
+ description: "CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is suspended\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesPersistentvolumeclaimPending
+ expr: kube_persistentvolumeclaim_status_phase{phase="Pending"} == 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes PersistentVolumeClaim pending (instance {{ $labels.instance }})"
+ description: "PersistentVolumeClaim {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is pending\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesVolumeOutOfDiskSpace
+ expr: kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes * 100 < 10
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes Volume out of disk space (instance {{ $labels.instance }})"
+ description: "Volume is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesVolumeFullInFourDays
+ expr: predict_linear(kubelet_volume_stats_available_bytes[6h], 4 * 24 * 3600) < 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes Volume full in four days (instance {{ $labels.instance }})"
+ description: "{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is expected to fill up within four days. Currently {{ $value | humanize }}% is available.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesPersistentvolumeError
+ expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes PersistentVolume error (instance {{ $labels.instance }})"
+ description: "Persistent volume is in bad state\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesStatefulsetDown
+ expr: (kube_statefulset_status_replicas_ready / kube_statefulset_status_replicas_current) != 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes StatefulSet down (instance {{ $labels.instance }})"
+ description: "A StatefulSet went down\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesHpaScalingAbility
+ expr: kube_hpa_status_condition{condition="false", status="AbleToScale"} == 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes HPA scaling ability (instance {{ $labels.instance }})"
+ description: "Pod is unable to scale\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesHpaMetricAvailability
+ expr: kube_hpa_status_condition{condition="false", status="ScalingActive"} == 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes HPA metric availability (instance {{ $labels.instance }})"
+ description: "HPA is not able to colelct metrics\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesHpaScaleCapability
+ expr: kube_hpa_status_desired_replicas >= kube_hpa_spec_max_replicas
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes HPA scale capability (instance {{ $labels.instance }})"
+ description: "The maximum number of desired Pods has been hit\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesPodNotHealthy
+ expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:]) > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes Pod not healthy (instance {{ $labels.instance }})"
+ description: "Pod has been in a non-ready state for longer than an hour.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesPodCrashLooping
+ expr: rate(kube_pod_container_status_restarts_total[15m]) * 60 * 5 > 5
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes pod crash looping (instance {{ $labels.instance }})"
+ description: "Pod {{ $labels.pod }} is crash looping\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesReplicassetMismatch
+ expr: kube_replicaset_spec_replicas != kube_replicaset_status_ready_replicas
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes ReplicasSet mismatch (instance {{ $labels.instance }})"
+ description: "Deployment Replicas mismatch\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDeploymentReplicasMismatch
+ expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes Deployment replicas mismatch (instance {{ $labels.instance }})"
+ description: "Deployment Replicas mismatch\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesStatefulsetReplicasMismatch
+ expr: kube_statefulset_status_replicas_ready != kube_statefulset_status_replicas
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes StatefulSet replicas mismatch (instance {{ $labels.instance }})"
+ description: "A StatefulSet has not matched the expected number of replicas for longer than 15 minutes.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDeploymentGenerationMismatch
+ expr: kube_deployment_status_observed_generation != kube_deployment_metadata_generation
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes Deployment generation mismatch (instance {{ $labels.instance }})"
+ description: "A Deployment has failed but has not been rolled back.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesStatefulsetGenerationMismatch
+ expr: kube_statefulset_status_observed_generation != kube_statefulset_metadata_generation
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes StatefulSet generation mismatch (instance {{ $labels.instance }})"
+ description: "A StatefulSet has failed but has not been rolled back.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesStatefulsetUpdateNotRolledOut
+ expr: max without (revision) (kube_statefulset_status_current_revision unless kube_statefulset_status_update_revision) * (kube_statefulset_replicas != kube_statefulset_status_replicas_updated)
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes StatefulSet update not rolled out (instance {{ $labels.instance }})"
+ description: "StatefulSet update has not been rolled out.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDaemonsetRolloutStuck
+ expr: kube_daemonset_status_number_ready / kube_daemonset_status_desired_number_scheduled * 100 < 100 or kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes DaemonSet rollout stuck (instance {{ $labels.instance }})"
+ description: "Some Pods of DaemonSet are not scheduled or not ready\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesDaemonsetMisscheduled
+ expr: kube_daemonset_status_number_misscheduled > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes DaemonSet misscheduled (instance {{ $labels.instance }})"
+ description: "Some DaemonSet Pods are running where they are not supposed to run\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesCronjobTooLong
+ expr: time() - kube_cronjob_next_schedule_time > 3600
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes CronJob too long (instance {{ $labels.instance }})"
+ description: "CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesJobCompletion
+ expr: kube_job_spec_completions - kube_job_status_succeeded > 0 or kube_job_status_failed > 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes job completion (instance {{ $labels.instance }})"
+ description: "Kubernetes Job failed to complete\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesApiServerErrors
+ expr: sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[2m])) / sum(rate(apiserver_request_count{job="apiserver"}[2m])) * 100 > 3
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes API server errors (instance {{ $labels.instance }})"
+ description: "Kubernetes API server is experiencing high error rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesApiClientErrors
+ expr: (sum(rate(rest_client_requests_total{code=~"(4|5).."}[2m])) by (instance, job) / sum(rate(rest_client_requests_total[2m])) by (instance, job)) * 100 > 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes API client errors (instance {{ $labels.instance }})"
+ description: "Kubernetes API client is experiencing high error rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesClientCertificateExpiresNextWeek
+ expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 7*24*60*60
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes client certificate expires next week (instance {{ $labels.instance }})"
+ description: "A client certificate used to authenticate to the apiserver is expiring next week.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesClientCertificateExpiresSoon
+ expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 24*60*60
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Kubernetes client certificate expires soon (instance {{ $labels.instance }})"
+ description: "A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+ - alert: KubernetesApiServerLatency
+ expr: histogram_quantile(0.99, sum(apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH|PROXY"}) WITHOUT (instance, resource)) / 1e+06 > 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Kubernetes API server latency (instance {{ $labels.instance }})"
+ description: "Kubernetes API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
+
+
+ prometheus.yml: |-
+ global:
+ scrape_interval: 15s
+ evaluation_interval: 15s
+
+ rule_files:
+ - "/etc/prometheus/alert.rules"
+
+ scrape_configs:
+ - job_name: 'collectd-exporter'
+ scrape_interval: 5s
+ static_configs:
+ - targets: ['collectd-exporter:9103']
+
+ - job_name: 'cadvisor'
+ scrape_interval: 5s
+ static_configs:
+ - targets: ['cadvisor:8080']
+
+ - job_name: 'node-exporter'
+ scrape_interval: 5s
+ static_configs:
+ - targets: ['node-exporter:9100']
+
+ - job_name: 'prometheus'
+ scrape_interval: 10s
+ static_configs:
+ - targets: ['localhost:9090']
+
+ - job_name: 'kube-state-metrics'
+ scrape_interval: 10s
+ static_configs:
+ - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080']
+
+ alerting:
+ alertmanagers:
+ - scheme: http
+ static_configs:
+ - targets: ['alertmanager:9093', 'alertmanager1:9093']
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-deployment.yaml
new file mode 100644
index 00000000..5b98b154
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-deployment.yaml
@@ -0,0 +1,73 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: prometheus-deployment
+ namespace: monitoring
+ labels:
+ app: prometheus
+ adi10hero.monitoring: prometheus
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ adi10hero.monitoring: prometheus
+ app: prometheus
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ labels:
+ adi10hero.monitoring: prometheus
+ app: prometheus
+ spec:
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm2
+ containers:
+ - name: prometheus
+ image: prom/prometheus
+ args:
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --storage.tsdb.path=/prometheus
+ - --storage.tsdb.retention.size=3GB
+ - --storage.tsdb.retention.time=30d
+ - --web.console.libraries=/etc/prometheus/console_libraries
+ - --web.console.templates=/etc/prometheus/consoles
+ ports:
+ - containerPort: 9090
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - name: prometheus-config-volume
+ mountPath: /etc/prometheus/
+ - name: prometheus-storage-volume
+ mountPath: /prometheus/
+ restartPolicy: Always
+ volumes:
+ - name: prometheus-config-volume
+ configMap:
+ defaultMode: 420
+ name: prometheus-config
+ - name: prometheus-storage-volume
+ persistentVolumeClaim:
+ claimName: prometheus-pvc
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pv.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pv.yaml
new file mode 100644
index 00000000..f10cd073
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pv.yaml
@@ -0,0 +1,30 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: prometheus-pv
+ namespace: monitoring
+ labels:
+ app: prometheus-pv
+ adi10hero.monitoring: prometheus-pv
+spec:
+ storageClassName: monitoring
+ capacity:
+ storage: 6Gi
+ accessModes:
+ - ReadWriteMany
+ hostPath:
+ path: "/usr/share/monitoring_data/prometheus"
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pvc.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pvc.yaml
new file mode 100644
index 00000000..812fcc73
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-pvc.yaml
@@ -0,0 +1,33 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: prometheus-pvc
+ namespace: monitoring
+ labels:
+ app: prometheus-pvc
+ adi10hero.monitoring: prometheus-pvc
+spec:
+ accessModes:
+ - ReadWriteMany
+ storageClassName: monitoring
+ resources:
+ requests:
+ storage: 3Gi
+ selector:
+ matchLabels:
+ app: prometheus-pv
+ adi10hero.monitoring: prometheus-pv
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-service.yaml
new file mode 100644
index 00000000..5be76d3e
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus-service.yaml
@@ -0,0 +1,34 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: prometheus
+ app: prometheus
+ name: prometheus
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: '9090'
+spec:
+ type: NodePort
+ ports:
+ - name: prometheus
+ protocol: TCP
+ port: 9090
+ nodePort: 30900
+ selector:
+ adi10hero.monitoring: prometheus
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-deployment.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-deployment.yaml
new file mode 100644
index 00000000..149bea84
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-deployment.yaml
@@ -0,0 +1,73 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: prometheus1-deployment
+ namespace: monitoring
+ labels:
+ app: prometheus1
+ adi10hero.monitoring: prometheus1
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ adi10hero.monitoring: prometheus1
+ app: prometheus1
+ strategy:
+ type: Recreate
+ template:
+ metadata:
+ labels:
+ adi10hero.monitoring: prometheus1
+ app: prometheus1
+ spec:
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - vm3
+ containers:
+ - name: prometheus
+ image: prom/prometheus
+ args:
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --storage.tsdb.path=/prometheus
+ - --storage.tsdb.retention.size=3GB
+ - --storage.tsdb.retention.time=30d
+ - --web.console.libraries=/etc/prometheus/console_libraries
+ - --web.console.templates=/etc/prometheus/consoles
+ ports:
+ - containerPort: 9090
+ securityContext:
+ runAsUser: 0
+ volumeMounts:
+ - name: prometheus-config-volume
+ mountPath: /etc/prometheus/
+ - name: prometheus-storage-volume
+ mountPath: /prometheus/
+ restartPolicy: Always
+ volumes:
+ - name: prometheus-config-volume
+ configMap:
+ defaultMode: 420
+ name: prometheus-config
+ - name: prometheus-storage-volume
+ persistentVolumeClaim:
+ claimName: prometheus-pvc
diff --git a/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-service.yaml b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-service.yaml
new file mode 100644
index 00000000..439deec1
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/files/prometheus/prometheus1-service.yaml
@@ -0,0 +1,35 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ adi10hero.monitoring: prometheus1
+ app: prometheus1
+ name: prometheus1
+ namespace: monitoring
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: '9090'
+spec:
+ type: NodePort
+ ports:
+ - name: prometheus1
+ protocol: TCP
+ port: 9090
+ nodePort: 30901
+ selector:
+ adi10hero.monitoring: prometheus1
+ app: prometheus1
diff --git a/tools/lma/ansible-server/roles/monitoring/tasks/main.yml b/tools/lma/ansible-server/roles/monitoring/tasks/main.yml
new file mode 100644
index 00000000..cd4e6aca
--- /dev/null
+++ b/tools/lma/ansible-server/roles/monitoring/tasks/main.yml
@@ -0,0 +1,273 @@
+# Copyright 2020 Aditya Srivastava.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+#PAG setup in k8s cluster
+
+#***********************************************************************************************************
+#copy all yaml to /tmp/files/
+#***********************************************************************************************************
+- name: copy all yaml to /tmp/files/
+ copy:
+ src: ../files/
+ dest: /tmp/files/
+
+#***********************************************************************************************************
+#Creating Namespace
+#***********************************************************************************************************
+- name: Creating Monitoring Namespace
+ k8s:
+ state: present
+ src: /tmp/files/monitoring-namespace.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#creating Persistent Volume
+#***********************************************************************************************************
+- name: creating Persistent Volume for Prometheus
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-pv.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#creating Persistent Volume
+#***********************************************************************************************************
+- name: creating Persistent Volume for Grafana
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-pv.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#creating Persistent Volume Claim
+#***********************************************************************************************************
+- name: creating Persistent Volume Claim for Prometheus
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-pvc.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#creating Persistent Volume Claim
+#***********************************************************************************************************
+- name: creating Persistent Volume Claim for Grafana
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-pvc.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the CAdvisor deamonset
+#***********************************************************************************************************
+- name: Creating cAdvisor deamonset
+ k8s:
+ state: present
+ src: /tmp/files/cadvisor/cadvisor-deamonset.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Starting the CAdvisor service
+#***********************************************************************************************************
+- name: Starting cAdvisor service
+ k8s:
+ state: present
+ src: /tmp/files/cadvisor/cadvisor-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Deploying and Starting the kube-system-metrics service
+#***********************************************************************************************************
+- name: Deploying kube-system-metrics
+ k8s:
+ state: present
+ src: /tmp/files/kube-state-metrics/kube-state-metrics-deployment.yaml
+ namespace: kube-system
+
+- name: Starting kube-system-metrics service
+ k8s:
+ state: present
+ src: /tmp/files/kube-state-metrics/kube-state-metrics-service.yaml
+ namespace: kube-system
+
+#***********************************************************************************************************
+#Making the NodeExporter deamonset
+#***********************************************************************************************************
+- name: Creating NodeExporter deamonset
+ k8s:
+ state: present
+ src: /tmp/files/node-exporter/nodeexporter-daemonset.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Starting the NodeExporter service
+#***********************************************************************************************************
+- name: Starting NodeExporter service
+ k8s:
+ state: present
+ src: /tmp/files/node-exporter/nodeexporter-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the collectd-exporter deployment
+#***********************************************************************************************************
+- name: Creating collectd-exporter deamonset
+ k8s:
+ state: present
+ src: /tmp/files/collectd-exporter/collectd-exporter-deployment.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the collectd-exporter service
+#***********************************************************************************************************
+- name: Creating collectd-exporter service
+ k8s:
+ state: present
+ src: /tmp/files/collectd-exporter/collectd-exporter-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Webhook goes here
+#***********************************************************************************************************
+
+#***********************************************************************************************************
+#Making the config file for Alertmanagers
+#***********************************************************************************************************
+- name: Creating config map for Alertmanagers
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager-config.yaml
+ namespace: monitoring
+
+# - name: Creating config map for Alertmanagers
+# k8s:
+# state: present
+# src: /tmp/files/alertmanager1-config.yaml
+# namespace: monitoring
+
+#***********************************************************************************************************
+#Making the 1st alertmanager deployment
+#***********************************************************************************************************
+- name: Creating 1st alertmanager deployment
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager-deployment.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the 1st alertmanager service
+#***********************************************************************************************************
+- name: Creating 1st alertmanager service
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the 2nd alertmanager deployment
+#***********************************************************************************************************
+- name: Creating 2nd alertmanager deployment
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager1-deployment.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the 2nd alertmanager service
+#***********************************************************************************************************
+- name: Creating 2nd alertmanager service
+ k8s:
+ state: present
+ src: /tmp/files/alertmanager/alertmanager1-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Making the config file for Prometheus
+#***********************************************************************************************************
+- name: Creating 1st Prometheus Config
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-config.yaml
+ namespace: monitoring
+
+# - name: Creating 2nd Prometheus Config
+# k8s:
+# state: present
+# src: /tmp/files/prometheus1-config.yaml
+# namespace: monitoring
+
+#***********************************************************************************************************
+#Starting Prometheus
+#***********************************************************************************************************
+- name: Starting Prometheus 1
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-deployment.yaml
+ namespace: monitoring
+
+- name: Starting Prometheus 2
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus1-deployment.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Starting Prometheus Service
+#***********************************************************************************************************
+- name: Starting Prometheus 1 Service
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus-service.yaml
+ namespace: monitoring
+
+- name: Starting Prometheus 2 Service
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/prometheus1-service.yaml
+ namespace: monitoring
+
+- name: Starting Main Prometheus Service
+ k8s:
+ state: present
+ src: /tmp/files/prometheus/main-prometheus-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#Starting Grafana
+#***********************************************************************************************************
+- name: Creating Grafana Datasource Config
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-datasource-config.yaml
+ namespace: monitoring
+
+- name: Starting Grafana
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-deployment.yaml
+ namespace: monitoring
+
+- name: Starting Grafana Service
+ k8s:
+ state: present
+ src: /tmp/files/grafana/grafana-service.yaml
+ namespace: monitoring
+
+#***********************************************************************************************************
+#removing /tmp/files
+#***********************************************************************************************************
+- name: Removing /tmp/files
+ file:
+ path: "/tmp/files"
+ state: absent
diff --git a/tools/lma/ansible-server/roles/nfs/tasks/main.yml b/tools/lma/ansible-server/roles/nfs/tasks/main.yml
new file mode 100644
index 00000000..2380ea74
--- /dev/null
+++ b/tools/lma/ansible-server/roles/nfs/tasks/main.yml
@@ -0,0 +1,42 @@
+# Copyright 2020 Adarsh yadav, Aditya Srivastava
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+#create Dir /srv/nfs
+- name: Create Directory for elasticserch
+ file: path="/srv/nfs/{{item}}" state=directory
+ with_items:
+ - ['data', 'master']
+
+- name: Create Directory for grafana
+ file: path="/usr/share/monitoring_data/grafana" state=directory
+
+#installing NFS
+- name: Installing NFS server utils
+ yum:
+ name: nfs-utils
+ state: present
+
+#update /etc/export file
+- name: Edit /etc/export file for NFS
+ lineinfile: path=/etc/exports line="{{item.line}}"
+ with_items:
+ - {line: "/srv/nfs/master *(rw,sync,no_root_squash,no_subtree_check)"}
+ - {line: "/srv/nfs/data *(rw,sync,no_root_squash,no_subtree_check)"}
+ - {line: "/usr/share/monitoring_data/grafana *(rw,sync,no_root_squash,no_subtree_check)"}
+
+#starting NFS service
+- name: 'starting NFS service'
+ service:
+ name: nfs
+ state: restarted