diff options
Diffstat (limited to 'doctor_tests/monitor')
-rw-r--r-- | doctor_tests/monitor/__init__.py | 29 | ||||
-rw-r--r-- | doctor_tests/monitor/base.py | 27 | ||||
-rw-r--r-- | doctor_tests/monitor/collectd.py | 137 | ||||
-rw-r--r-- | doctor_tests/monitor/sample.py | 106 |
4 files changed, 299 insertions, 0 deletions
diff --git a/doctor_tests/monitor/__init__.py b/doctor_tests/monitor/__init__.py new file mode 100644 index 00000000..7e30c9f8 --- /dev/null +++ b/doctor_tests/monitor/__init__.py @@ -0,0 +1,29 @@ +############################################################################## +# Copyright (c) 2017 ZTE Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +from oslo_config import cfg +from oslo_utils import importutils + +OPTS = [ + cfg.StrOpt('type', + default='sample', + choices=['sample', 'collectd'], + help='the type of doctor monitor component', + required=True), +] + + +_monitor_name_class_mapping = { + 'sample': 'doctor_tests.monitor.sample.SampleMonitor', + 'collectd': 'doctor_tests.monitor.collectd.CollectdMonitor' +} + +def get_monitor(conf, inspector_url, log): + monitor_class = _monitor_name_class_mapping.get(conf.monitor.type) + return importutils.import_object(monitor_class, conf, + inspector_url, log) diff --git a/doctor_tests/monitor/base.py b/doctor_tests/monitor/base.py new file mode 100644 index 00000000..119c8a1c --- /dev/null +++ b/doctor_tests/monitor/base.py @@ -0,0 +1,27 @@ +############################################################################## +# Copyright (c) 2017 ZTE Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +import abc +import six + + +@six.add_metaclass(abc.ABCMeta) +class BaseMonitor(object): + """Monitor computer fault and report error to the inspector""" + def __init__(self, conf, inspector_url, log): + self.conf = conf + self.log = log + self.inspector_url = inspector_url + + @abc.abstractmethod + def start(self, host): + pass + + @abc.abstractmethod + def stop(self): + pass diff --git a/doctor_tests/monitor/collectd.py b/doctor_tests/monitor/collectd.py new file mode 100644 index 00000000..a22d7edc --- /dev/null +++ b/doctor_tests/monitor/collectd.py @@ -0,0 +1,137 @@ +############################################################################## +# Copyright (c) 2017 NEC Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +import os +import socket +import getpass +import sys + +from doctor_tests.monitor.base import BaseMonitor + + +class CollectdMonitor(BaseMonitor): + def __init__(self, conf, inspector_url, log): + super(CollectdMonitor, self).__init__(conf, inspector_url, log) + self.top_dir = os.path.dirname(sys.path[0]) + tmp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + tmp_sock.connect(("8.8.8.8", 80)) + + ## control_ip is the IP of primary interface of control node i.e. + ## eth0, eno1. It is used by collectd monitor to communicate with + ## sample inspector. + ## TODO (umar) see if mgmt IP of control is a better option. Also + ## primary interface may not be the right option + self.control_ip = tmp_sock.getsockname()[0] + self.compute_user = getpass.getuser() + self.interface_name = os.environ.get('INTERFACE_NAME') or '' + self.inspector_type = os.environ.get('INSPECTOR_TYPE', 'sample') + self.auth_url = os.environ.get('OS_AUTH_URL') + self.username = os.environ.get('OS_USERNAME') + self.password = os.environ.get('OS_PASSWORD') + self.project_name = os.environ.get('OS_PROJECT_NAME') + self.user_domain_name = os.environ.get('OS_USER_DOMAIN_NAME') or 'default' + self.user_domain_id = os.environ.get('OS_USER_DOMAIN_ID') + self.project_domain_name = os.environ.get('OS_PROJECT_DOMAIN_NAME') or 'default' + self.project_domain_id = os.environ.get('OS_PROJECT_DOMAIN_ID') + self.ssh_opts_cpu = '-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' + + def start(self, host): + self.log.info("Collectd monitor start.........") + self.compute_host = host.name + self.compute_ip = host.ip + f = open("%s/tests/collectd.conf" % self.top_dir, 'w') + collectd_conf_file = """ +Hostname %s +FQDNLookup false +Interval 1 +MaxReadInterval 2 + +<LoadPlugin python> +Globals true +</LoadPlugin> +LoadPlugin ovs_events +LoadPlugin logfile + +<Plugin logfile> + File \"/var/log/collectd.log\" + Timestamp true + LogLevel \"info\" +</Plugin> + +<Plugin python> + ModulePath \"/home/%s\" + LogTraces true + Interactive false + Import \"collectd_plugin\" + <Module \"collectd_plugin\"> + control_ip \"%s\" + compute_ip \"%s\" + compute_host \"%s\" + compute_user \"%s\" + inspector_type \"%s\" + os_auth_url \"%s\" + os_username \"%s\" + os_password \"%s\" + os_project_name \"%s\" + os_user_domain_name \"%s\" + os_user_domain_id \"%s\" + os_project_domain_name \"%s\" + os_project_domain_id \"%s\" + </Module> +</Plugin> + +<Plugin ovs_events> + Port 6640 + Socket \"/var/run/openvswitch/db.sock\" + Interfaces \"@INTERFACE_NAME@\" + SendNotification true + DispatchValues false +</Plugin> + """ % (self.compute_host, self.compute_user, self.control_ip, self.compute_ip, self.compute_host, self.compute_user, + self.inspector_type, self.auth_url, self.username, self.password, self.project_name, self.user_domain_name, + self.user_domain_id, self.project_domain_name, self.project_domain_id) + f.write(collectd_conf_file) + f.close() + + os.system(" scp %s %s/tests/collectd.conf %s@%s: " % (self.ssh_opts_cpu, self.top_dir, self.compute_user, self.compute_ip)) + self.log.info("after first scp") + ## @TODO (umar) Always assuming that the interface is assigned an IP if + ## interface name is not provided. See if there is a better approach + os.system(""" ssh %s %s@%s \"if [ -n \"%s\" ]; then + dev=%s + else + dev=\$(sudo ip a | awk '/ %s\//{print \$NF}') + fi + sed -i -e \"s/@INTERFACE_NAME@/\$dev/\" collectd.conf + collectd_conf=/opt/collectd/etc/collectd.conf + if [ -e \$collectd_conf ]; then + sudo cp \$collectd_conf \${collectd_conf}-doctor-saved + else + sudo touch \${collectd_conf}-doctor-created + fi + sudo mv collectd.conf /opt/collectd/etc/collectd.conf\" """ % (self.ssh_opts_cpu, self.compute_user, self.compute_ip, self.interface_name, self.interface_name, self.compute_ip)) + self.log.info("after first ssh") + os.system(" scp %s %s/tests/lib/monitors/collectd/collectd_plugin.py %s@%s:collectd_plugin.py " % (self.ssh_opts_cpu, self.top_dir, self.compute_user, self.compute_ip)) + self.log.info("after sec scp") + os.system(" ssh %s %s@%s \"sudo pkill collectd; sudo /opt/collectd/sbin/collectd\" " % (self.ssh_opts_cpu, self.compute_user, self.compute_ip)) + self.log.info("after sec ssh") + + def stop(self): + os.system(" ssh %s %s@%s \"sudo pkill collectd\" " % (self.ssh_opts_cpu, self.compute_user, self.compute_ip)) + + def cleanup(self): + os.system(""" ssh %s %s@%s \" + collectd_conf=/opt/collectd/etc/collectd.conf + if [ -e \"\${collectd_conf}-doctor-created\" ]; then + sudo rm \"\${collectd_conf}-doctor-created\" + sudo rm \$collectd_conf + elif [ -e \"\${collectd_conf}-doctor-saved\" ]; then + sudo cp -f \"\${collectd_conf}-doctor-saved\" \$collectd_conf + sudo rm \"\${collectd_conf}-doctor-saved\" + fi\" """ % (self.ssh_opts_cpu, self.compute_user, self.compute_ip)) + os.remove("%s/tests/collectd.conf" % self.top_dir) diff --git a/doctor_tests/monitor/sample.py b/doctor_tests/monitor/sample.py new file mode 100644 index 00000000..7a463048 --- /dev/null +++ b/doctor_tests/monitor/sample.py @@ -0,0 +1,106 @@ +############################################################################## +# Copyright (c) 2017 ZTE Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +from datetime import datetime +import json +import requests +import socket +from threading import Thread +import time + +from doctor_tests.identity_auth import get_session +from doctor_tests.monitor.base import BaseMonitor + + +class SampleMonitor(BaseMonitor): + event_type = "compute.host.down" + + def __init__(self, conf, inspector_url, log): + super(SampleMonitor, self).__init__(conf, inspector_url, log) + self.session = get_session() + self.pinger = None + + def start(self, host): + self.log.info('sample monitor start......') + self.pinger = Pinger(host.name, host.ip, self, self.log) + self.pinger.start() + + def stop(self): + self.log.info('sample monitor stop......') + if self.pinger is not None: + self.pinger.stop() + self.pinger.join() + + def report_error(self, hostname): + self.log.info('sample monitor report error......') + data = [ + { + 'id': 'monitor_sample_id1', + 'time': datetime.now().isoformat(), + 'type': self.event_type, + 'details': { + 'hostname': hostname, + 'status': 'down', + 'monitor': 'monitor_sample', + 'monitor_event_id': 'monitor_sample_event1' + }, + }, + ] + + auth_token = self.session.get_token() if \ + self.conf.inspector.type != 'sample' else None + headers = { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'X-Auth-Token': auth_token, + } + + url = '%s%s' % (self.inspector_url, 'events') \ + if self.inspector_url.endswith('/') else \ + '%s%s' % (self.inspector_url, '/events') + requests.put(url, data=json.dumps(data), headers=headers) + + +class Pinger(Thread): + interval = 0.1 # second + timeout = 0.1 # second + ICMP_ECHO_MESSAGE = bytes([0x08, 0x00, 0xf7, 0xff, 0x00, 0x00, 0x00, 0x00]) + + def __init__(self, host_name, host_ip, monitor, log): + Thread.__init__(self) + self.monitor = monitor + self.hostname = host_name + self.ip_addr = host_ip or socket.gethostbyname(self.hostname) + self.log = log + self._stopped = False + + def run(self): + self.log.info("Starting Pinger host_name(%s), host_ip(%s)" + % (self.hostname, self.ip_addr)) + + sock = socket.socket(socket.AF_INET, socket.SOCK_RAW, + socket.IPPROTO_ICMP) + sock.settimeout(self.timeout) + while True: + if self._stopped: + return + try: + sock.sendto(self.ICMP_ECHO_MESSAGE, (self.ip_addr, 0)) + sock.recv(4096) + except socket.timeout: + self.log.info("doctor monitor detected at %s" % time.time()) + self.monitor.report_error(self.hostname) + self.log.info("ping timeout, quit monitoring...") + self._stopped = True + return + time.sleep(self.interval) + + def stop(self): + self.log.info("Stopping Pinger host_name(%s), host_ip(%s)" + % (self.hostname, self.ip_addr)) + self._stopped = True |