summaryrefslogtreecommitdiffstats
path: root/tests/monitor
diff options
context:
space:
mode:
Diffstat (limited to 'tests/monitor')
-rw-r--r--tests/monitor/__init__.py29
-rw-r--r--tests/monitor/base.py27
-rw-r--r--tests/monitor/collectd.py145
-rw-r--r--tests/monitor/sample.py115
4 files changed, 316 insertions, 0 deletions
diff --git a/tests/monitor/__init__.py b/tests/monitor/__init__.py
new file mode 100644
index 00000000..e268907f
--- /dev/null
+++ b/tests/monitor/__init__.py
@@ -0,0 +1,29 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from oslo_config import cfg
+from oslo_utils import importutils
+
+OPTS = [
+ cfg.StrOpt('type',
+ default='sample',
+ choices=['sample', 'collectd'],
+ help='the type of doctor monitor component',
+ required=True),
+]
+
+
+_monitor_name_class_mapping = {
+ 'sample': 'monitor.sample.SampleMonitor',
+ 'collectd': 'monitor.collectd.CollectdMonitor'
+}
+
+def get_monitor(conf, inspector_url, log):
+ monitor_class = _monitor_name_class_mapping.get(conf.monitor.type)
+ return importutils.import_object(monitor_class, conf,
+ inspector_url, log)
diff --git a/tests/monitor/base.py b/tests/monitor/base.py
new file mode 100644
index 00000000..ccb647cf
--- /dev/null
+++ b/tests/monitor/base.py
@@ -0,0 +1,27 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import abc
+import six
+
+
+@six.add_metaclass(abc.ABCMeta)
+class BaseMonitor(object):
+ """Monitor computer fault and report error to the inspector"""
+ def __init__(self, conf, inspector_url, log):
+ self.conf = conf
+ self.log = log
+ self.inspector_url = inspector_url
+
+ @abc.abstractmethod
+ def start(self):
+ pass
+
+ @abc.abstractmethod
+ def stop(self):
+ pass
diff --git a/tests/monitor/collectd.py b/tests/monitor/collectd.py
new file mode 100644
index 00000000..f7a4f442
--- /dev/null
+++ b/tests/monitor/collectd.py
@@ -0,0 +1,145 @@
+##############################################################################
+# Copyright (c) 2017 NEC Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+import os
+import socket
+import getpass
+import sys
+
+from identity_auth import get_session
+from os_clients import nova_client
+from monitor.base import BaseMonitor
+
+
+class CollectdMonitor(BaseMonitor):
+ def __init__(self, conf, inspector_url, log):
+ super(CollectdMonitor, self).__init__(conf, inspector_url, log)
+ self.top_dir = os.path.dirname(sys.path[0])
+ self.session = get_session()
+ self.nova = nova_client(conf.nova_version, self.session)
+ self.compute_hosts = self.nova.hypervisors.list(detailed=True)
+ for host in self.compute_hosts:
+ host_dict = host.__dict__
+ self.compute_host = host_dict['hypervisor_hostname']
+ self.compute_ip = host_dict['host_ip']
+ tmp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ tmp_sock.connect(("8.8.8.8", 80))
+
+ ## control_ip is the IP of primary interface of control node i.e.
+ ## eth0, eno1. It is used by collectd monitor to communicate with
+ ## sample inspector.
+ ## TODO (umar) see if mgmt IP of control is a better option. Also
+ ## primary interface may not be the right option
+ self.control_ip = tmp_sock.getsockname()[0]
+ self.compute_user = getpass.getuser()
+ self.interface_name = os.environ.get('INTERFACE_NAME') or ''
+ self.inspector_type = os.environ.get('INSPECTOR_TYPE', 'sample')
+ self.auth_url = os.environ.get('OS_AUTH_URL')
+ self.username = os.environ.get('OS_USERNAME')
+ self.password = os.environ.get('OS_PASSWORD')
+ self.project_name = os.environ.get('OS_PROJECT_NAME')
+ self.user_domain_name = os.environ.get('OS_USER_DOMAIN_NAME') or 'default'
+ self.user_domain_id = os.environ.get('OS_USER_DOMAIN_ID')
+ self.project_domain_name = os.environ.get('OS_PROJECT_DOMAIN_NAME') or 'default'
+ self.project_domain_id = os.environ.get('OS_PROJECT_DOMAIN_ID')
+ self.ssh_opts_cpu = '-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'
+
+ def start(self):
+ self.log.info("Collectd monitor start.........")
+ f = open("%s/tests/collectd.conf" % self.top_dir, 'w')
+ collectd_conf_file = """
+Hostname %s
+FQDNLookup false
+Interval 1
+MaxReadInterval 2
+
+<LoadPlugin python>
+Globals true
+</LoadPlugin>
+LoadPlugin ovs_events
+LoadPlugin logfile
+
+<Plugin logfile>
+ File \"/var/log/collectd.log\"
+ Timestamp true
+ LogLevel \"info\"
+</Plugin>
+
+<Plugin python>
+ ModulePath \"/home/%s\"
+ LogTraces true
+ Interactive false
+ Import \"collectd_plugin\"
+ <Module \"collectd_plugin\">
+ control_ip \"%s\"
+ compute_ip \"%s\"
+ compute_host \"%s\"
+ compute_user \"%s\"
+ inspector_type \"%s\"
+ os_auth_url \"%s\"
+ os_username \"%s\"
+ os_password \"%s\"
+ os_project_name \"%s\"
+ os_user_domain_name \"%s\"
+ os_user_domain_id \"%s\"
+ os_project_domain_name \"%s\"
+ os_project_domain_id \"%s\"
+ </Module>
+</Plugin>
+
+<Plugin ovs_events>
+ Port 6640
+ Socket \"/var/run/openvswitch/db.sock\"
+ Interfaces \"@INTERFACE_NAME@\"
+ SendNotification true
+ DispatchValues false
+</Plugin>
+ """ % (self.compute_host, self.compute_user, self.control_ip, self.compute_ip, self.compute_host, self.compute_user,
+ self.inspector_type, self.auth_url, self.username, self.password, self.project_name, self.user_domain_name,
+ self.user_domain_id, self.project_domain_name, self.project_domain_id)
+ f.write(collectd_conf_file)
+ f.close()
+
+ os.system(" scp %s %s/tests/collectd.conf %s@%s: " % (self.ssh_opts_cpu, self.top_dir, self.compute_user, self.compute_ip))
+ self.log.info("after first scp")
+ ## @TODO (umar) Always assuming that the interface is assigned an IP if
+ ## interface name is not provided. See if there is a better approach
+ os.system(""" ssh %s %s@%s \"if [ -n \"%s\" ]; then
+ dev=%s
+ else
+ dev=\$(sudo ip a | awk '/ %s\//{print \$NF}')
+ fi
+ sed -i -e \"s/@INTERFACE_NAME@/\$dev/\" collectd.conf
+ collectd_conf=/opt/collectd/etc/collectd.conf
+ if [ -e \$collectd_conf ]; then
+ sudo cp \$collectd_conf \${collectd_conf}-doctor-saved
+ else
+ sudo touch \${collectd_conf}-doctor-created
+ fi
+ sudo mv collectd.conf /opt/collectd/etc/collectd.conf\" """ % (self.ssh_opts_cpu, self.compute_user, self.compute_ip, self.interface_name, self.interface_name, self.compute_ip))
+ self.log.info("after first ssh")
+ os.system(" scp %s %s/tests/lib/monitors/collectd/collectd_plugin.py %s@%s:collectd_plugin.py " % (self.ssh_opts_cpu, self.top_dir, self.compute_user, self.compute_ip))
+ self.log.info("after sec scp")
+ os.system(" ssh %s %s@%s \"sudo pkill collectd; sudo /opt/collectd/sbin/collectd\" " % (self.ssh_opts_cpu, self.compute_user, self.compute_ip))
+ self.log.info("after sec ssh")
+
+ def stop(self):
+ os.system(" ssh %s %s@%s \"sudo pkill collectd\" " % (self.ssh_opts_cpu, self.compute_user, self.compute_ip))
+
+ def cleanup(self):
+ os.system(""" ssh %s %s@%s \"
+ collectd_conf=/opt/collectd/etc/collectd.conf
+ if [ -e \"\${collectd_conf}-doctor-created\" ]; then
+ sudo rm \"\${collectd_conf}-doctor-created\"
+ sudo rm \$collectd_conf
+ elif [ -e \"\${collectd_conf}-doctor-saved\" ]; then
+ sudo cp -f \"\${collectd_conf}-doctor-saved\" \$collectd_conf
+ sudo rm \"\${collectd_conf}-doctor-saved\"
+ fi\" """ % (self.ssh_opts_cpu, self.compute_user, self.compute_ip))
+ os.remove("%s/tests/collectd.conf" % self.top_dir)
diff --git a/tests/monitor/sample.py b/tests/monitor/sample.py
new file mode 100644
index 00000000..1333a2ec
--- /dev/null
+++ b/tests/monitor/sample.py
@@ -0,0 +1,115 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from datetime import datetime
+import json
+import requests
+import socket
+from threading import Thread
+import time
+
+from identity_auth import get_session
+from os_clients import nova_client
+from monitor.base import BaseMonitor
+
+
+class SampleMonitor(BaseMonitor):
+ event_type = "compute.host.down"
+
+ def __init__(self, conf, inspector_url, log):
+ super(SampleMonitor, self).__init__(conf, inspector_url, log)
+ self.session = get_session()
+ self.nova = nova_client(conf.nova_version, self.session)
+ self.hosts = self.nova.hypervisors.list(detailed=True)
+ self.pingers = []
+
+ def start(self):
+ self.log.info('sample monitor start......')
+ for host in self.hosts:
+ host_dict = host.__dict__
+ host_name = host_dict['hypervisor_hostname']
+ host_ip = host_dict['host_ip']
+ pinger = Pinger(host_name, host_ip, self, self.log)
+ pinger.start()
+ self.pingers.append(pinger)
+
+ def stop(self):
+ self.log.info('sample monitor stop......')
+ for pinger in self.pingers:
+ pinger.stop()
+ pinger.join()
+ del self.pingers
+
+ def report_error(self, hostname):
+ self.log.info('sample monitor report error......')
+ data = [
+ {
+ 'id': 'monitor_sample_id1',
+ 'time': datetime.now().isoformat(),
+ 'type': self.event_type,
+ 'details': {
+ 'hostname': hostname,
+ 'status': 'down',
+ 'monitor': 'monitor_sample',
+ 'monitor_event_id': 'monitor_sample_event1'
+ },
+ },
+ ]
+
+ auth_token = self.session.get_token() if \
+ self.conf.inspector.type != 'sample' else None
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ 'X-Auth-Token': auth_token,
+ }
+
+ url = '%s%s' % (self.inspector_url, 'events') \
+ if self.inspector_url.endswith('/') else \
+ '%s%s' % (self.inspector_url, '/events')
+ requests.put(url, data=json.dumps(data), headers=headers)
+
+
+class Pinger(Thread):
+ interval = 0.1 # second
+ timeout = 0.1 # second
+ ICMP_ECHO_MESSAGE = bytes([0x08, 0x00, 0xf7, 0xff, 0x00, 0x00, 0x00, 0x00])
+
+ def __init__(self, host_name, host_ip, monitor, log):
+ Thread.__init__(self)
+ self.monitor = monitor
+ self.hostname = host_name
+ self.ip_addr = host_ip or socket.gethostbyname(self.hostname)
+ self.log = log
+ self._stopped = False
+
+ def run(self):
+ self.log.info("Starting Pinger host_name(%s), host_ip(%s)"
+ % (self.hostname, self.ip_addr))
+
+ sock = socket.socket(socket.AF_INET, socket.SOCK_RAW,
+ socket.IPPROTO_ICMP)
+ sock.settimeout(self.timeout)
+ while True:
+ if self._stopped:
+ return
+ try:
+ sock.sendto(self.ICMP_ECHO_MESSAGE, (self.ip_addr, 0))
+ sock.recv(4096)
+ except socket.timeout:
+ self.log.info("doctor monitor detected at %s" % time.time())
+ self.monitor.report_error(self.hostname)
+ self.log.info("ping timeout, quit monitoring...")
+ self._stopped = True
+ return
+ time.sleep(self.interval)
+
+ def stop(self):
+ self.log.info("Stopping Pinger host_name(%s), host_ip(%s)"
+ % (self.hostname, self.ip_addr))
+ self._stopped = True