From dbd84d95163fb492962ebacdea7bc2a89a8b56f9 Mon Sep 17 00:00:00 2001 From: dongwenjuan Date: Wed, 12 Apr 2017 10:46:39 +0800 Subject: refactor the monitor JIRA: DOCTOR-99 Change-Id: I1831d329ffe80435532678fcb23d54f310422ce8 Signed-off-by: dongwenjuan --- tests/config.py | 4 +- tests/inspector/__init__.py | 4 +- tests/main.py | 7 +++ tests/monitor/__init__.py | 28 +++++++++++ tests/monitor/base.py | 27 ++++++++++ tests/monitor/sample.py | 119 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 tests/monitor/__init__.py create mode 100644 tests/monitor/base.py create mode 100644 tests/monitor/sample.py diff --git a/tests/config.py b/tests/config.py index 969d829f..88910f6d 100644 --- a/tests/config.py +++ b/tests/config.py @@ -16,14 +16,16 @@ import image import instance import network import inspector +import monitor import os_clients import user def list_opts(): return [ - ('consumer', consumer.OPTS), + ('monitor', monitor.OPTS), ('inspector', inspector.OPTS), + ('consumer', consumer.OPTS), ('DEFAULT', itertools.chain( os_clients.OPTS, image.OPTS, diff --git a/tests/inspector/__init__.py b/tests/inspector/__init__.py index 35bdb5b9..151cc034 100644 --- a/tests/inspector/__init__.py +++ b/tests/inspector/__init__.py @@ -11,6 +11,7 @@ import os from oslo_config import cfg from oslo_utils import importutils + OPTS = [ cfg.StrOpt('type', default=os.environ.get('INSPECTOR_TYPE', 'sample'), @@ -19,7 +20,7 @@ OPTS = [ required=True), cfg.StrOpt('ip', default='127.0.0.1', - help='the ip of default inspector', + help='the host ip of inspector', required=False), cfg.StrOpt('port', default='12345', @@ -32,6 +33,7 @@ _inspector_name_class_mapping = { 'sample': 'inspector.sample.SampleInspector', } + def get_inspector(conf, log): inspector_class = _inspector_name_class_mapping[conf.inspector.type] return importutils.import_object(inspector_class, conf, log) diff --git a/tests/main.py b/tests/main.py index 797e28b2..6644b544 100644 --- a/tests/main.py +++ b/tests/main.py @@ -18,6 +18,8 @@ from inspector import get_inspector import logger as doctor_log from user import User from network import Network +from monitor import get_monitor + LOG = doctor_log.Logger('doctor').getLogger() @@ -32,6 +34,9 @@ class DoctorTest(object): self.instance = Instance(self.conf, LOG) self.alarm = Alarm(self.conf, LOG) self.inspector = get_inspector(self.conf, LOG) + self.monitor = get_monitor(self.conf, + self.inspector.get_inspector_url(), + LOG) def setup(self): # prepare the cloud env @@ -53,6 +58,7 @@ class DoctorTest(object): # starting doctor sample components... self.inspector.start() + self.monitor.start() def run(self): """run doctor test""" @@ -78,6 +84,7 @@ class DoctorTest(object): self.image.delete() self.user.delete() self.inspector.stop() + self.monitor.stop() def main(): diff --git a/tests/monitor/__init__.py b/tests/monitor/__init__.py new file mode 100644 index 00000000..51a6a65d --- /dev/null +++ b/tests/monitor/__init__.py @@ -0,0 +1,28 @@ +############################################################################## +# Copyright (c) 2017 ZTE Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +from oslo_config import cfg +from oslo_utils import importutils + +OPTS = [ + cfg.StrOpt('type', + default='sample', + choices=['sample'], + help='the type of doctor monitor component', + required=True), +] + + +_monitor_name_class_mapping = { + 'sample': 'monitor.sample.SampleMonitor' +} + +def get_monitor(conf, inspector_url, log): + monitor_class = _monitor_name_class_mapping.get(conf.monitor.type) + return importutils.import_object(monitor_class, conf, + inspector_url, log) diff --git a/tests/monitor/base.py b/tests/monitor/base.py new file mode 100644 index 00000000..ccb647cf --- /dev/null +++ b/tests/monitor/base.py @@ -0,0 +1,27 @@ +############################################################################## +# Copyright (c) 2017 ZTE Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +import abc +import six + + +@six.add_metaclass(abc.ABCMeta) +class BaseMonitor(object): + """Monitor computer fault and report error to the inspector""" + def __init__(self, conf, inspector_url, log): + self.conf = conf + self.log = log + self.inspector_url = inspector_url + + @abc.abstractmethod + def start(self): + pass + + @abc.abstractmethod + def stop(self): + pass diff --git a/tests/monitor/sample.py b/tests/monitor/sample.py new file mode 100644 index 00000000..4f1ce490 --- /dev/null +++ b/tests/monitor/sample.py @@ -0,0 +1,119 @@ +############################################################################## +# Copyright (c) 2017 ZTE Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +from datetime import datetime +import json +import requests +import socket +from threading import Thread +import time + +from identity_auth import get_session +from os_clients import nova_client +from monitor.base import BaseMonitor + + +class SampleMonitor(BaseMonitor): + event_type = "compute.host.down" + + def __init__(self, conf, inspector_url, log): + super(SampleMonitor, self).__init__(conf, inspector_url, log) + self.session = get_session() + self.nova = nova_client(conf.nova_version, self.session) + self.hosts = self.nova.hypervisors.list(detailed=True) + self.pingers = [] + + def start(self): + self.log.info('sample monitor start......') + for host in self.hosts: + host_dict = host.__dict__ + host_name = host_dict['hypervisor_hostname'] + host_ip = host_dict['host_ip'] + pinger = Pinger(host_name, host_ip, self, self.log) + pinger.start() + self.pingers.append(pinger) + + def stop(self): + self.log.info('sample monitor stop......') + for pinger in self.pingers: + pinger.stop() + pinger.join() + del self.pingers + + def report_error(self, hostname): + self.log.info('sample monitor report error......') + data = [ + { + 'id': 'monitor_sample_id1', + 'time': datetime.now().isoformat(), + 'type': self.event_type, + 'details': { + 'hostname': hostname, + 'status': 'down', + 'monitor': 'monitor_sample', + 'monitor_event_id': 'monitor_sample_event1' + }, + }, + ] + + auth_token = self.session.get_token() if \ + self.conf.inspector.type != 'sample' else None + headers = { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'X-Auth-Token': auth_token, + } + + url = '%s%s' % (self.inspector_url, 'events') \ + if self.inspector_url.endswith('/') else \ + '%s%s' % (self.inspector_url, '/events') + requests.put(url, data=json.dumps(data), headers=headers) + + +class Pinger(Thread): + interval = 0.1 # second + timeout = 0.1 # second + ICMP_ECHO_MESSAGE = '\x08\x00\xf7\xff\x00\x00\x00\x00' + + def __init__(self, host_name, host_ip, monitor, log): + Thread.__init__(self) + self.monitor = monitor + self.hostname = host_name + self.ip_addr = host_ip or socket.gethostbyname(self.hostname) + self.log = log + self._stopped = False + + def run(self): + while True: + if self._stopped: + return + self._run() + time.sleep(self.interval) + + def stop(self): + self.log.info("Stopping Pinger host_name(%s), host_ip(%s)" + % (self.hostname, self.ip_addr)) + self._stopped = True + + def _run(self): + self.log.info("Starting Pinger host_name(%s), host_ip(%s)" + % (self.hostname, self.ip_addr)) + + sock = socket.socket(socket.AF_INET, socket.SOCK_RAW, + socket.IPPROTO_ICMP) + sock.settimeout(self.timeout) + while True: + try: + sock.sendto(self.ICMP_ECHO_MESSAGE.encode(), (self.ip_addr, 0)) + sock.recv(4096) + except socket.timeout: + self.log.info("doctor monitor detected at %s" % time.time()) + self.monitor.report_error(self.hostname) + self.log.info("ping timeout, quit monitoring...") + self._stopped = True + return -- cgit 1.2.3-korg