summaryrefslogtreecommitdiffstats
path: root/tests/lib/monitors
diff options
context:
space:
mode:
authorUmar Farooq <umar.farooq@neclab.eu>2017-07-13 12:20:56 +0200
committerRyota Mibu <r-mibu@cq.jp.nec.com>2017-08-09 04:39:54 +0000
commit282369b6fd58a78e6a7c91f21b331363d4ed0fb3 (patch)
tree3dc95a2c16ca948627df6fb93629a3652e620211 /tests/lib/monitors
parentdb6d9cdcb37fee810fdf59e4d96b9a5139b5c8b7 (diff)
Add Collectd as a Monitor Type
A plugin for collectd is added to use collectd on compute as a monitor type. Monitor files are updated accordingly. The inspector now listens on all interfaces instead of only localhost to enable it to communicate with compute node. JIRA: DOCTOR-86 JIRA: DOCTOR-101 Change-Id: Idc834d428152e4687020eff7d8db36a652b1bf86 Signed-off-by: Umar Farooq <umar.farooq@neclab.eu>
Diffstat (limited to 'tests/lib/monitors')
-rw-r--r--tests/lib/monitors/collectd/collectd101
-rw-r--r--tests/lib/monitors/collectd/collectd_plugin.py167
-rw-r--r--tests/lib/monitors/sample/monitor.py124
-rw-r--r--tests/lib/monitors/sample/sample18
4 files changed, 410 insertions, 0 deletions
diff --git a/tests/lib/monitors/collectd/collectd b/tests/lib/monitors/collectd/collectd
new file mode 100644
index 00000000..f5096658
--- /dev/null
+++ b/tests/lib/monitors/collectd/collectd
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+function start_monitor_collectd {
+ ## CONTROL_IP is the IP of primary interface of control node i.e.
+ ## eth0, eno1. It is used by collectd monitor to communicate with
+ ## sample inspector.
+ ## @TODO (umar) see if mgmt IP of control is a better option. Also
+ ## primary interface may not be the right option
+ CONTROL_IP="$(ip a | sed -En 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*/\2/p' | sed -n 1p)"
+ #CONTROL_IP=192.168.98.6
+
+ echo "
+Hostname \"$COMPUTE_HOST\"
+FQDNLookup false
+Interval 1
+MaxReadInterval 2
+
+<LoadPlugin python>
+ Globals true
+</LoadPlugin>
+LoadPlugin ovs_events
+LoadPlugin logfile
+
+<Plugin logfile>
+ File \"/var/log/collectd.log\"
+ Timestamp true
+ LogLevel \"info\"
+</Plugin>
+
+<Plugin python>
+ ModulePath \"/home/$COMPUTE_USER\"
+ LogTraces true
+ Interactive false
+ Import \"collectd_plugin\"
+ <Module \"collectd_plugin\">
+ control_ip \"$CONTROL_IP\"
+ compute_ip \"$COMPUTE_IP\"
+ compute_host \"$COMPUTE_HOST\"
+ compute_user \"$COMPUTE_USER\"
+ inspector_type \"$INSPECTOR_TYPE\"
+ os_auth_url \"$OS_AUTH_URL\"
+ os_username \"$OS_USERNAME\"
+ os_password \"$OS_PASSWORD\"
+ os_project_name \"$OS_PROJECT_NAME\"
+ os_user_domain_name \"$OS_USER_DOMAIN_NAME\"
+ os_user_domain_id \"$OS_USER_DOMAIN_ID\"
+ os_project_domain_name \"$OS_PROJECT_DOMAIN_NAME\"
+ os_project_domain_id \"$OS_PROJECT_DOMAIN_ID\"
+ </Module>
+</Plugin>
+
+<Plugin ovs_events>
+ Port 6640
+ Socket \"/var/run/openvswitch/db.sock\"
+ Interfaces \"@INTERFACE_NAME@\"
+ SendNotification true
+ DispatchValues false
+</Plugin>
+
+" > $TOP_DIR/lib/monitors/collectd.conf
+
+ scp $ssh_opts_cpu $TOP_DIR/lib/monitors/collectd.conf $COMPUTE_USER@$COMPUTE_IP:
+ ## @TODO (umar) Always assuming that the interface is assigned an IP if
+ ## interface name is not provided. See if there is a better approach
+ ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" "
+ if [ -n \"$INTERFACE_NAME\" ]; then
+ dev=$INTERFACE_NAME
+ else
+ dev=\$(sudo ip a | awk '/ $COMPUTE_IP\//{print \$NF}')
+ fi
+ sed -i -e \"s/@INTERFACE_NAME@/\$dev/\" collectd.conf
+ collectd_conf=/opt/collectd/etc/collectd.conf
+ if [ -e \$collectd_conf ]; then
+ sudo cp \$collectd_conf \${collectd_conf}-doctor-saved
+ else
+ sudo touch \${collectd_conf}-doctor-created
+ fi
+ sudo mv collectd.conf /opt/collectd/etc/collectd.conf"
+
+ scp $ssh_opts_cpu $TOP_DIR/lib/monitors/collectd/collectd_plugin.py $COMPUTE_USER@$COMPUTE_IP:collectd_plugin.py
+ ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" "sudo pkill collectd
+ sudo /opt/collectd/sbin/collectd"
+}
+
+function stop_monitor_collectd {
+ ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'sudo pkill collectd'
+}
+
+function cleanup_monitor_collectd {
+ ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" "
+ collectd_conf=/opt/collectd/etc/collectd.conf
+ if [ -e \"\${collectd_conf}-doctor-created\" ]; then
+ sudo rm \"\${collectd_conf}-doctor-created\"
+ sudo rm \$collectd_conf
+ elif [ -e \"\${collectd_conf}-doctor-saved\" ]; then
+ sudo cp -f \"\${collectd_conf}-doctor-saved\" \$collectd_conf
+ sudo rm \"\${collectd_conf}-doctor-saved\"
+ fi"
+
+ rm $TOP_DIR/lib/monitors/collectd.conf
+}
diff --git a/tests/lib/monitors/collectd/collectd_plugin.py b/tests/lib/monitors/collectd/collectd_plugin.py
new file mode 100644
index 00000000..70fcf26e
--- /dev/null
+++ b/tests/lib/monitors/collectd/collectd_plugin.py
@@ -0,0 +1,167 @@
+##############################################################################
+# Copyright (c) 2017 NEC Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+import collectd
+import sys
+from netifaces import interfaces, ifaddresses, AF_INET
+from datetime import datetime
+import json
+import requests
+import time
+from requests.exceptions import ConnectionError
+
+from keystoneauth1 import loading
+from keystoneauth1 import session
+from congressclient.v1 import client
+
+
+def write_debug(str_write, write_type, compute_user):
+ file_name = ('/home/%s/monitor.log' % compute_user)
+ file_tmp = open(file_name, write_type)
+ file_tmp.write( "%s" % str_write)
+ file_tmp.close()
+
+
+class DoctorMonitorCollectd(object):
+ def __init__(self):
+ self.control_ip = ''
+ self.compute_user = ''
+ self.compute_ip = ''
+ self.host_name = ''
+ self.inspector_type = ''
+ self.inspector_url = ''
+ self.os_auth_url = ''
+ self.os_username = ''
+ self.os_password = ''
+ self.os_project_name = ''
+ self.os_user_domain_name = ''
+ self.os_user_domain_id = ''
+ self.os_project_domain_name = ''
+ self.os_project_domain_id = ''
+ self.sess = ''
+ self.auth = ''
+ self.inspector_notified = 0
+ self.start_notifications = 0
+ self.monitor_type = 'sample'
+
+ def config_func(self, config):
+ for node in config.children:
+ key = node.key.lower()
+ val = node.values[0]
+
+ if key == 'compute_host':
+ self.host_name = val
+ elif key == 'control_ip':
+ self.control_ip = val
+ elif key == 'compute_ip':
+ self.compute_ip = val
+ elif key == 'compute_user':
+ self.compute_user = val
+ elif key == 'inspector_type':
+ self.inspector_type = val
+ elif key == 'os_auth_url':
+ self.os_auth_url = val
+ elif key == 'os_username':
+ self.os_username = val
+ elif key == 'os_password':
+ self.os_password = val
+ elif key == 'os_project_name':
+ self.os_project_name = val
+ elif key == 'os_user_domain_name':
+ self.os_user_domain_name = val
+ elif key == 'os_user_domain_id':
+ self.os_user_domain_id = val
+ elif key == 'os_project_domain_name':
+ self.os_project_domain_name = val
+ elif key == 'os_project_domain_id':
+ self.os_project_domain_id = val
+ else:
+ collectd.info('Unknown config key "%s"' % key)
+
+ def init_collectd(self):
+ write_debug("Compute node collectd monitor start at %s\n\n" % datetime.now().isoformat(), "w", self.compute_user)
+
+ if self.inspector_type == 'sample':
+ self.inspector_url = ('http://%s:12345/events' % self.control_ip)
+ elif self.inspector_type == 'congress':
+ loader = loading.get_plugin_loader('password')
+ self.auth = loader.load_from_options(auth_url=self.os_auth_url,
+ username=self.os_username,
+ password=self.os_password,
+ project_name=self.os_project_name,
+ user_domain_name=self.os_user_domain_name,
+ user_domain_id=self.os_user_domain_id,
+ project_domain_name=self.os_project_domain_name,
+ project_domain_id=self.os_project_domain_id)
+ self.sess=session.Session(auth=self.auth)
+ congress = client.Client(session=self.sess, service_type='policy')
+ ds = congress.list_datasources()['results']
+ doctor_ds = next((item for item in ds if item['driver'] == 'doctor'),
+ None)
+
+ congress_endpoint = congress.httpclient.get_endpoint(auth=self.auth)
+ self.inspector_url = ('%s/v1/data-sources/%s/tables/events/rows' %
+ (congress_endpoint, doctor_ds['id']))
+ else:
+ sys.exit()
+ self.start_notifications = 1
+
+
+ def notify_inspector(self):
+ event_type = "compute.host.down"
+ payload = [
+ {
+ 'id': ("monitor_%s_id1" % self.monitor_type),
+ 'time': datetime.now().isoformat(),
+ 'type': event_type,
+ 'details': {
+ 'hostname': self.host_name,
+ 'status': 'down',
+ 'monitor': ("monitor_%s" % self.monitor_type),
+ 'monitor_event_id': ("monitor_%s_event1" % self.monitor_type)
+ },
+ },
+ ]
+ data = json.dumps(payload)
+ self.inspector_notified = 1
+
+ if self.inspector_type == 'sample':
+ headers = {'content-type': 'application/json'}
+ try:
+ requests.post(self.inspector_url, data=data, headers=headers)
+ except ConnectionError as err:
+ print err
+ elif self.inspector_type == 'congress':
+ # TODO(umar) enhance for token expiry case
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ 'X-Auth-Token': self.sess.get_token()
+ }
+ requests.put(self.inspector_url, data=data, headers=headers)
+
+
+ def handle_notif(self, notification, data=None):
+ if (notification.severity == collectd.NOTIF_FAILURE or
+ notification.severity == collectd.NOTIF_WARNING):
+ if (self.start_notifications == 1 and self.inspector_notified == 0):
+ write_debug("Received down notification: doctor monitor detected at %s\n" % time.time(), "a", self.compute_user)
+ self.notify_inspector()
+
+ elif notification.severity == collectd.NOTIF_OKAY:
+ collectd.info("Interface status: UP again %s\n" % time.time())
+ else:
+ collectd.info("Unknown notification severity %s\n" % notification.severity)
+
+
+monitor = DoctorMonitorCollectd()
+
+collectd.register_config(monitor.config_func)
+collectd.register_init(monitor.init_collectd)
+collectd.register_notification(monitor.handle_notif)
diff --git a/tests/lib/monitors/sample/monitor.py b/tests/lib/monitors/sample/monitor.py
new file mode 100644
index 00000000..7450c534
--- /dev/null
+++ b/tests/lib/monitors/sample/monitor.py
@@ -0,0 +1,124 @@
+##############################################################################
+# Copyright (c) 2016 NEC Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+import argparse
+from datetime import datetime
+import json
+import logger as doctor_log
+import requests
+import socket
+import time
+
+from keystoneauth1 import session
+from congressclient.v1 import client
+
+import identity_auth
+
+# NOTE: icmp message with all zero data (checksum = 0xf7ff)
+# see https://tools.ietf.org/html/rfc792
+ICMP_ECHO_MESSAGE = '\x08\x00\xf7\xff\x00\x00\x00\x00'
+
+SUPPORTED_INSPECTOR_TYPES = ['sample', 'congress']
+
+LOG = doctor_log.Logger('doctor_monitor').getLogger()
+
+
+class DoctorMonitorSample(object):
+
+ interval = 0.1 # second
+ timeout = 0.1 # second
+ event_type = "compute.host.down"
+
+ def __init__(self, args):
+ if args.inspector_type not in SUPPORTED_INSPECTOR_TYPES:
+ raise Exception("Inspector type '%s' not supported", args.inspector_type)
+
+ self.hostname = args.hostname
+ self.inspector_type = args.inspector_type
+ self.ip_addr = args.ip or socket.gethostbyname(self.hostname)
+
+ if self.inspector_type == 'sample':
+ self.inspector_url = 'http://127.0.0.1:12345/events'
+ elif self.inspector_type == 'congress':
+ auth=identity_auth.get_identity_auth()
+ self.session=session.Session(auth=auth)
+ congress = client.Client(session=self.session, service_type='policy')
+ ds = congress.list_datasources()['results']
+ doctor_ds = next((item for item in ds if item['driver'] == 'doctor'),
+ None)
+
+ congress_endpoint = congress.httpclient.get_endpoint(auth=auth)
+ self.inspector_url = ('%s/v1/data-sources/%s/tables/events/rows' %
+ (congress_endpoint, doctor_ds['id']))
+
+ def start_loop(self):
+ LOG.debug("start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
+ 'i': self.ip_addr})
+ sock = socket.socket(socket.AF_INET, socket.SOCK_RAW,
+ socket.IPPROTO_ICMP)
+ sock.settimeout(self.timeout)
+ while True:
+ try:
+ sock.sendto(ICMP_ECHO_MESSAGE, (self.ip_addr, 0))
+ data = sock.recv(4096)
+ except socket.timeout:
+ LOG.info("doctor monitor detected at %s" % time.time())
+ self.report_error()
+ LOG.info("ping timeout, quit monitoring...")
+ return
+ time.sleep(self.interval)
+
+ def report_error(self):
+ payload = [
+ {
+ 'id': 'monitor_sample_id1',
+ 'time': datetime.now().isoformat(),
+ 'type': self.event_type,
+ 'details': {
+ 'hostname': self.hostname,
+ 'status': 'down',
+ 'monitor': 'monitor_sample',
+ 'monitor_event_id': 'monitor_sample_event1'
+ },
+ },
+ ]
+ data = json.dumps(payload)
+
+ if self.inspector_type == 'sample':
+ headers = {'content-type': 'application/json'}
+ requests.post(self.inspector_url, data=data, headers=headers)
+ elif self.inspector_type == 'congress':
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ 'X-Auth-Token':self.session.get_token(),
+ }
+ requests.put(self.inspector_url, data=data, headers=headers)
+
+
+def get_args():
+ parser = argparse.ArgumentParser(description='Doctor Sample Monitor')
+ parser.add_argument('hostname', metavar='HOSTNAME', type=str, nargs='?',
+ help='a hostname to monitor connectivity')
+ parser.add_argument('ip', metavar='IP', type=str, nargs='?',
+ help='an IP address to monitor connectivity')
+ parser.add_argument('inspector_type', metavar='INSPECTOR_TYPE', type=str, nargs='?',
+ help='inspector to report',
+ default='sample')
+ return parser.parse_args()
+
+
+def main():
+ args = get_args()
+ monitor = DoctorMonitorSample(args)
+ monitor.start_loop()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/lib/monitors/sample/sample b/tests/lib/monitors/sample/sample
new file mode 100644
index 00000000..1d310333
--- /dev/null
+++ b/tests/lib/monitors/sample/sample
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+function start_monitor_sample {
+ cp $TOP_DIR/lib/monitors/sample/monitor.py $TOP_DIR/monitor.py
+ pgrep -f "python monitor.py" && return 0
+ sudo -E python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" "$INSPECTOR_TYPE" \
+ > monitor.log 2>&1 &
+}
+
+function stop_monitor_sample {
+ pgrep -f "python monitor.py" || return 0
+ sudo kill $(pgrep -f "python monitor.py")
+}
+
+function cleanup_monitor_sample {
+ rm monitor.py
+ return
+}