aboutsummaryrefslogtreecommitdiffstats
path: root/app/monitoring/checks
diff options
context:
space:
mode:
Diffstat (limited to 'app/monitoring/checks')
-rw-r--r--app/monitoring/checks/binary_converter.py17
-rwxr-xr-xapp/monitoring/checks/check_interface.py50
-rwxr-xr-xapp/monitoring/checks/check_ping.py121
-rwxr-xr-xapp/monitoring/checks/check_pnic_vpp.py53
-rwxr-xr-xapp/monitoring/checks/check_vedge_ovs.py43
-rwxr-xr-xapp/monitoring/checks/check_vedge_vpp.py50
-rwxr-xr-xapp/monitoring/checks/check_vnic_vconnector.py72
-rwxr-xr-xapp/monitoring/checks/check_vnic_vpp.py48
-rw-r--r--app/monitoring/checks/check_vservice.py82
9 files changed, 536 insertions, 0 deletions
diff --git a/app/monitoring/checks/binary_converter.py b/app/monitoring/checks/binary_converter.py
new file mode 100644
index 0000000..4da1107
--- /dev/null
+++ b/app/monitoring/checks/binary_converter.py
@@ -0,0 +1,17 @@
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+def binary2str(txt):
+ if not isinstance(txt, bytes):
+ return str(txt)
+ try:
+ s = txt.decode("utf-8")
+ except TypeError:
+ s = str(txt)
+ return s
diff --git a/app/monitoring/checks/check_interface.py b/app/monitoring/checks/check_interface.py
new file mode 100755
index 0000000..4140dfe
--- /dev/null
+++ b/app/monitoring/checks/check_interface.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+
+import re
+import sys
+import subprocess
+
+from binary_converter import binary2str
+
+
+if len(sys.argv) < 2:
+ print('name of interface must be specified')
+ exit(2)
+nic_name = str(sys.argv[1])
+
+rc = 0
+
+try:
+ out = subprocess.check_output(["ifconfig " + nic_name],
+ stderr=subprocess.STDOUT,
+ shell=True)
+ out = binary2str(out)
+ lines = out.splitlines()
+ line_number = 1
+ line = -1
+ while line_number < len(lines):
+ line = lines[line_number]
+ if ' BROADCAST ' in line:
+ break
+ line_number += 1
+ state_match = re.match('^\W+([A-Z]+)', line)
+ if not state_match:
+ rc = 2
+ print('Error: failed to find status in ifconfig output: ' + out)
+ else:
+ rc = 0 if state_match.group(1) == 'UP' else 2
+ print(out)
+except subprocess.CalledProcessError as e:
+ print("Error finding NIC {}: {}\n".format(nic_name, binary2str(e.output)))
+ rc = 2
+
+exit(rc)
diff --git a/app/monitoring/checks/check_ping.py b/app/monitoring/checks/check_ping.py
new file mode 100755
index 0000000..35e7234
--- /dev/null
+++ b/app/monitoring/checks/check_ping.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+
+import argparse
+import re
+import sys
+import subprocess
+
+from binary_converter import binary2str
+
+
+if len(sys.argv) < 2:
+ raise ValueError('destination address must be specified')
+
+
+def thresholds_string(string):
+ matches = re.match('\d+%/\d+([.]\d+)?/\d+([.]\d+)?', string)
+ if not matches:
+ msg = "%r is not a valid thresholds string" % string
+ raise argparse.ArgumentTypeError(msg)
+ return string
+
+
+def get_args():
+ # try to read scan plan from command line parameters
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("-W", "--warning", nargs="?",
+ type=thresholds_string,
+ default='1%/300/600',
+ help="warning thresholds: packet-loss "
+ "(%)/avg-rtt (ms)/max-rtt (ms)"
+ "(example: 1%/300ms/600ms)")
+ parser.add_argument("-C", "--critical", nargs="?",
+ type=thresholds_string,
+ default='10%/1000/2000',
+ help="critical thresholds: packet-loss "
+ "(%)/avg-rtt (ms)/max-rtt (ms)"
+ "(example: 1%/300ms/600ms)")
+ parser.add_argument("-f", "--source", nargs="?", type=str, default='',
+ help="source address")
+ parser.add_argument("-t", "--target", nargs="?", type=str, default='',
+ help="target address")
+ parser.add_argument("-c", "--count", nargs="?", type=int, default=5,
+ help="how many packets will be sent")
+ parser.add_argument("-i", "--interval", nargs="?", type=float, default=0.5,
+ help="seconds between sending each packet")
+ parser.add_argument("-p", "--pattern", nargs="?", type=str,
+ default='OS-DNA', help="pattern to pad packet with")
+ parser.add_argument("-w", "--wait", nargs="?", type=int, default=5,
+ help="seconds to wait for completion of all responses")
+ parser.add_argument("-s", "--packetsize", nargs="?", type=int, default=256,
+ help="size of packet vseconds to wait for completion "
+ "of all responses")
+ return parser.parse_args()
+
+args = get_args()
+
+if not args.target:
+ raise ValueError('target address must be specified')
+
+rc = 0
+
+try:
+ cmd = "ping -c {} -i {} -p {} -w {} -s {} {}{} {}".format(
+ args.count, args.interval,
+ args.pattern, args.wait,
+ args.packetsize,
+ '-I ' if args.source else '',
+ args.source, args.target)
+ out = subprocess.check_output([cmd],
+ stderr=subprocess.STDOUT,
+ shell=True)
+ out = binary2str(out)
+except subprocess.CalledProcessError as e:
+ print("Error doing ping: {}\n".format(binary2str(e.output)))
+
+# find packet loss data
+packet_loss_match = re.search('(\d+)[%] packet loss', out, re.M)
+if not packet_loss_match:
+ out += '\npacket loss data not found'
+ rc = 2
+
+# find rtt avg/max data
+rtt_results = None
+if rc < 2:
+ regexp = 'rtt min/avg/max/mdev = [0-9.]+/([0-9.]+)/([0-9.]+)/[0-9.]+ ms'
+ rtt_results = re.search(regexp, out, re.M)
+ if not rtt_results:
+ out += '\nrtt results not found'
+ rc = 2
+if rc < 2:
+ packet_loss = int(packet_loss_match.group(1))
+ avg_rtt = float(rtt_results.group(1))
+ max_rtt = float(rtt_results.group(2))
+ thresholds_regexp = r'(\d+)%/(\d+[.0-9]*)/(\d+[.0-9]*)'
+ warn_threshold_match = re.match(thresholds_regexp, args.warning)
+ critical_threshold_match = re.match(thresholds_regexp, args.critical)
+ packet_loss_warn = int(warn_threshold_match.group(1))
+ packet_loss_critical = int(critical_threshold_match.group(1))
+ avg_rtt_warn = float(warn_threshold_match.group(2))
+ avg_rtt_critical = float(critical_threshold_match.group(2))
+ max_rtt_warn = float(warn_threshold_match.group(3))
+ max_rtt_critical = float(critical_threshold_match.group(3))
+ if packet_loss > packet_loss_critical or avg_rtt >= avg_rtt_critical or \
+ max_rtt >= max_rtt_critical:
+ rc = 2
+ elif packet_loss > packet_loss_warn or avg_rtt >= avg_rtt_warn or \
+ max_rtt >= max_rtt_warn:
+ rc = 1
+
+print(out)
+exit(rc)
diff --git a/app/monitoring/checks/check_pnic_vpp.py b/app/monitoring/checks/check_pnic_vpp.py
new file mode 100755
index 0000000..942fdc2
--- /dev/null
+++ b/app/monitoring/checks/check_pnic_vpp.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+"""
+sudo vppctl show hardware-interfaces:
+
+take only the virtual interfaces, e.g. "VirtualEthernet0/0/0"
+Status: "OK" if "up" is detected in the interface line, CRITICAL otherwise
+
+return full text of "vppctl show hardware-interfaces"
+"""
+
+import re
+import subprocess
+
+from binary_converter import binary2str
+
+
+NAME_RE = '^[a-zA-Z]*GigabitEthernet'
+
+rc = 0
+
+try:
+ out = subprocess.check_output(["sudo vppctl show hardware-interfaces"],
+ stderr=subprocess.STDOUT,
+ shell=True)
+ out = binary2str(out)
+ lines = out.splitlines()
+ name_re = re.compile(NAME_RE)
+ matching_lines = [l for l in lines if name_re.search(l)]
+ matching_line = matching_lines[0] if matching_lines else None
+ if matching_line:
+ rc = 0 if "up" in matching_line.split() else 2
+ print('output from "vppctl show hardware-interfaces":\n{}'
+ .format(out))
+ else:
+ rc = 2
+ print('Error: failed to find pNic in output of '
+ '"vppctl show hardware-interfaces": {}'
+ .format(out))
+except subprocess.CalledProcessError as e:
+ print("Error running 'vppctl show hardware-interfaces': {}"
+ .format(binary2str(e.output)))
+ rc = 2
+
+exit(rc)
diff --git a/app/monitoring/checks/check_vedge_ovs.py b/app/monitoring/checks/check_vedge_ovs.py
new file mode 100755
index 0000000..849af66
--- /dev/null
+++ b/app/monitoring/checks/check_vedge_ovs.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+"""
+Check OVS vEdge health
+
+Run command:
+ps -aux | grep "\(ovs-vswitchd\|ovsdb-server\)"
+
+OK if for both ovs-vswitchd AND ovsdb-server processes we see '(healthy)'
+otherwise CRITICAL
+
+return full text output of the command
+"""
+
+import subprocess
+
+from binary_converter import binary2str
+
+
+rc = 0
+cmd = 'ps aux | grep "\(ovs-vswitchd\|ovsdb-server\): monitoring" | ' + \
+ 'grep -v grep'
+
+try:
+ out = subprocess.check_output([cmd], stderr=subprocess.STDOUT, shell=True)
+ out = binary2str(out)
+ lines = out.splitlines()
+ matching_lines = [l for l in lines if '(healthy)']
+ rc = 0 if len(matching_lines) == 2 else 2
+ print(out)
+except subprocess.CalledProcessError as e:
+ print("Error finding expected output: {}".format(binary2str(e.output)))
+ rc = 2
+
+exit(rc)
diff --git a/app/monitoring/checks/check_vedge_vpp.py b/app/monitoring/checks/check_vedge_vpp.py
new file mode 100755
index 0000000..346feae
--- /dev/null
+++ b/app/monitoring/checks/check_vedge_vpp.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+"""
+sudo vppctl show runtime:
+
+test 1: was the return value not null?
+test 2: is startup-config-process = done?
+1 and 2 = vedge status ok
+1 and not 2 = vedge status warning
+not 1 = vedge status critical
+
+return full text of "vppctl show runtime"
+"""
+
+import re
+import subprocess
+
+from binary_converter import binary2str
+
+
+rc = 0
+search_pattern = re.compile("^startup-config-process ")
+
+try:
+ out = subprocess.check_output(["sudo vppctl show runtime"],
+ stderr=subprocess.STDOUT,
+ shell=True)
+ out = binary2str(out)
+ lines = out.splitlines()
+ matching_lines = [l for l in lines if search_pattern.match(l)]
+ matching_line = matching_lines[0] if matching_lines else None
+ if matching_line and "done" in matching_line.split():
+ print(out)
+ else:
+ rc = 1
+ print('Error: failed to find status in ifconfig output: ' + out)
+except subprocess.CalledProcessError as e:
+ print("Error finding 'vppctl show runtime': {}"
+ .format(binary2str(e.output)))
+ rc = 2
+
+exit(rc)
diff --git a/app/monitoring/checks/check_vnic_vconnector.py b/app/monitoring/checks/check_vnic_vconnector.py
new file mode 100755
index 0000000..b0f96cd
--- /dev/null
+++ b/app/monitoring/checks/check_vnic_vconnector.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+
+# find status of vnic-vconnector link
+# vconnector object name defines name of bridge
+# use "brctl showmacs <bridge>", then look for the MAC address
+
+import re
+import sys
+import subprocess
+
+from binary_converter import binary2str
+
+
+if len(sys.argv) < 3:
+ print('usage: ' + sys.argv[0] + ' <bridge> <mac_address>')
+ exit(2)
+bridge_name = str(sys.argv[1])
+mac_address = str(sys.argv[2])
+
+rc = 0
+
+try:
+ out = subprocess.check_output(["brctl showmacs " + bridge_name],
+ stderr=subprocess.STDOUT,
+ shell=True)
+ out = binary2str(out)
+ lines = out.splitlines()
+ line_number = 1
+ line = ''
+ found = False
+ while line_number < len(lines):
+ line = lines[line_number]
+ if mac_address in line:
+ found = True
+ break
+ line_number += 1
+ state_match = re.match('^\W+([A-Z]+)', line)
+ if not found:
+ rc = 2
+ print('Error: failed to find MAC {}:\n{}\n'
+ .format(mac_address, out))
+ else:
+ # grab "is local?" and "ageing timer" values
+ line_parts = line.split() # port, mac address, is local?, ageing timer
+ is_local = line_parts[2]
+ ageing_timer = line_parts[3]
+ msg_format =\
+ 'vConnector bridge name: {}\n'\
+ 'vNIC MAC address: {}\n'\
+ 'is local: {}\n'\
+ 'ageing timer: {}\n'\
+ 'vNIC MAC address: {}\n'\
+ 'command: brctl showmacs {}\n'\
+ 'output:\n{}'
+ msg = msg_format.format(bridge_name, mac_address, is_local,
+ ageing_timer, mac_address, bridge_name, out)
+ print(msg)
+except subprocess.CalledProcessError as e:
+ print("Error finding MAC {}: {}\n"
+ .format(mac_address, binary2str(e.output)))
+ rc = 2
+
+exit(rc)
diff --git a/app/monitoring/checks/check_vnic_vpp.py b/app/monitoring/checks/check_vnic_vpp.py
new file mode 100755
index 0000000..0f77ddd
--- /dev/null
+++ b/app/monitoring/checks/check_vnic_vpp.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+"""
+sudo vppctl show hardware-interfaces:
+
+take only the virtual interfaces, e.g. "VirtualEthernet0/0/0"
+Status: "OK" if "up" is detected in the interface line, CRITICAL otherwise
+
+return full text of "vppctl show hardware-interfaces"
+"""
+
+import re
+import subprocess
+
+from binary_converter import binary2str
+
+rc = 0
+search_pattern = re.compile("^Virtual")
+
+try:
+ out = subprocess.check_output(["sudo vppctl show hardware-interfaces"],
+ stderr=subprocess.STDOUT,
+ shell=True)
+ out = binary2str(out)
+ lines = out.splitlines()
+ matching_lines = [l for l in lines if search_pattern.match(l)]
+ matching_line = matching_lines[0] if matching_lines else None
+ if matching_line and "up" in matching_line.split():
+ print('output of "vppctl show hardware-interfaces":\n{}'
+ .format(out))
+ else:
+ rc = 2
+ print('Error: failed to find status in output of '
+ '"vppctl show hardware-interfaces": {}'.format(out))
+except subprocess.CalledProcessError as e:
+ print("Error finding 'vppctl show hardware-interfaces': {}"
+ .format(binary2str(e.output)))
+ rc = 2
+
+exit(rc)
diff --git a/app/monitoring/checks/check_vservice.py b/app/monitoring/checks/check_vservice.py
new file mode 100644
index 0000000..a95a46a
--- /dev/null
+++ b/app/monitoring/checks/check_vservice.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+###############################################################################
+# Copyright (c) 2017 Koren Lev (Cisco Systems), Yaron Yogev (Cisco Systems) #
+# and others #
+# #
+# All rights reserved. This program and the accompanying materials #
+# are made available under the terms of the Apache License, Version 2.0 #
+# which accompanies this distribution, and is available at #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+###############################################################################
+
+"""
+for vservice with type T and id X
+run on the corresponding host:
+ip netns pid X
+response is pid(s), for example:
+32075
+
+For DHCP there are multiple pid, we will take the dnsmasq process
+
+then run :
+ps -uf -p 32075
+
+get STAT - "S" and "R" = OK
+"""
+
+import subprocess
+import sys
+
+from binary_converter import binary2str
+
+
+rc = 0
+
+args = sys.argv
+if len(args) < 3:
+ print('usage: check_vservice.py <vService type> <vService ID>')
+ exit(2)
+
+vservice_type = args[1]
+vservice_id = args[2]
+netns_cmd = 'sudo ip netns pid {}'.format(vservice_id)
+pid = ''
+ps_cmd = ''
+try:
+ out = subprocess.check_output([netns_cmd], stderr=subprocess.STDOUT,
+ shell=True)
+ out = binary2str(out)
+ lines = out.splitlines()
+ if not lines:
+ print('no matching vservice: {}\ncommand: {}\noutput: {}'
+ .format(vservice_id, netns_cmd, out))
+ exit(2)
+ pid = lines[0]
+except subprocess.CalledProcessError as e:
+ print("Error running '{}': {}"
+ .format(netns_cmd, binary2str(e.output)))
+ exit(2)
+try:
+ ps_cmd = 'ps -uf -p {}'.format(pid)
+ out = subprocess.check_output([ps_cmd], stderr=subprocess.STDOUT,
+ shell=True)
+ ps_out = binary2str(out)
+ lines = ps_out.splitlines()
+ if not lines:
+ print('no matching vservice: {}\noutput of {}:\n{}'
+ .format(vservice_id, netns_cmd, out))
+ exit(2)
+ headers = lines[0].split()
+ lines = lines[1:]
+ if vservice_type == 'dhcp' and len(lines) > 1:
+ lines = [line for line in lines if 'dnsmasq' in line]
+ values = lines[0].split()
+ stat_index = headers.index('STAT')
+ status = values[stat_index]
+ rc = 0 if status in ['S', 'R'] else 2
+ print('{}\n{}\n{}'.format(netns_cmd, ps_cmd, ps_out))
+except subprocess.CalledProcessError as e:
+ print("Error running '{}': {}".format(ps_cmd, binary2str(e.output)))
+ rc = 2
+
+exit(rc)