aboutsummaryrefslogtreecommitdiffstats
path: root/sdv/docker/sdvstate/internal/validator/kuberef/monitoring_agent_checker.py
diff options
context:
space:
mode:
Diffstat (limited to 'sdv/docker/sdvstate/internal/validator/kuberef/monitoring_agent_checker.py')
-rw-r--r--sdv/docker/sdvstate/internal/validator/kuberef/monitoring_agent_checker.py102
1 files changed, 102 insertions, 0 deletions
diff --git a/sdv/docker/sdvstate/internal/validator/kuberef/monitoring_agent_checker.py b/sdv/docker/sdvstate/internal/validator/kuberef/monitoring_agent_checker.py
new file mode 100644
index 0000000..bc94c33
--- /dev/null
+++ b/sdv/docker/sdvstate/internal/validator/kuberef/monitoring_agent_checker.py
@@ -0,0 +1,102 @@
+"""
+Monitoring agent checks
+Checks for prometheus and collectd existence and health
+"""
+
+import logging
+from tools.kube_utils import kube_api
+from internal.store_result import store_result
+from internal.checks.pod_health_check import pod_status, get_logs
+
+def health_checker(pod, api_instance, logger, result):
+ """
+ Checks the health of pod
+ """
+ status = []
+ pod_stats = pod_status(logger, pod)
+
+ if pod_stats['criteria'] == 'fail':
+ pod_stats['logs'] = get_logs(api_instance, pod)
+ result['criteria'] = 'fail'
+
+ status.append(pod.metadata.name)
+ status.append(pod_stats)
+ return status
+
+def monitoring_agent_check():
+ """
+ Checks existence & health of prometheus pods
+ """
+ api_instance = kube_api()
+ namespaces = api_instance.list_namespace()
+ ns_names = []
+
+ for nspace in namespaces.items:
+ ns_names.append(nspace.metadata.name)
+
+ result = {'category': 'observability',
+ 'case_name': 'prometheus_check',
+ 'criteria': 'pass',
+ 'details': []
+ }
+
+ status = []
+ flag = False
+ logger = logging.getLogger(__name__)
+ if 'monitoring' in ns_names:
+ pod_details = api_instance.list_namespaced_pod('monitoring', watch=False)
+ pods = pod_details.items
+ for pod in pods:
+ if 'prometheus' in pod.metadata.name:
+ stats = health_checker(pod, api_instance, logger, result)
+ status.append(stats)
+ flag = True
+ else:
+ for name in ns_names:
+ pod_details = api_instance.list_namespaced_pod(name, watch=False)
+ pods = pod_details.items
+ for pod in pods:
+ if 'prometheus' in pod.metadata.name:
+ stats = health_checker(pod, api_instance, logger, result)
+ status.append(stats)
+ flag = True
+
+ if flag is False:
+ result['criteria'] = 'fail'
+
+ result['details'].append(status)
+ store_result(logger, result)
+ return result
+
+
+def collectd_check():
+ """
+ Checks for collectd pods present and their state of being
+ """
+ api_instance = kube_api()
+ pod_details = api_instance.list_pod_for_all_namespaces()
+ pods = pod_details.items
+
+ result = {'category': 'observability',
+ 'case_name': 'collectd_check',
+ 'criteria': 'pass',
+ 'details': []
+ }
+
+ logger = logging.getLogger(__name__)
+
+ status = []
+
+ flag = False
+ for pod in pods:
+ if 'collectd' in pod.metadata.name:
+ stats = health_checker(pod, api_instance, logger, result)
+ status.append(stats)
+ flag = True
+
+ if flag is False:
+ result['criteria'] = 'fail'
+
+ result['details'].append(status)
+ store_result(logger, result)
+ return result