aboutsummaryrefslogtreecommitdiffstats
path: root/anteater
diff options
context:
space:
mode:
Diffstat (limited to 'anteater')
-rw-r--r--anteater/__init__.py0
-rw-r--r--anteater/anteater.py64
-rw-r--r--anteater/src/__init__.py6
-rw-r--r--anteater/src/get_lists.py119
-rw-r--r--anteater/src/patch_scan.py164
-rw-r--r--anteater/src/project_scan.py154
-rw-r--r--anteater/utils/__init__.py6
-rw-r--r--anteater/utils/anteater_logger.py39
8 files changed, 552 insertions, 0 deletions
diff --git a/anteater/__init__.py b/anteater/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/anteater/__init__.py
diff --git a/anteater/anteater.py b/anteater/anteater.py
new file mode 100644
index 0000000..063fcbd
--- /dev/null
+++ b/anteater/anteater.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+# from __future__ import division, print_function, absolute_import
+
+"""Anteater - CI Gate Checks.
+
+Usage:
+ anteater (-p |--project) <project> [(-ps |--patchset) <patchset>]
+ anteater (-p |--project) <project> [--path <project_path>]
+ anteater (-h | --help)
+ anteater --version
+
+Options:
+ -h --help Show this screen.
+ --version Show version.
+"""
+
+import ConfigParser
+from docopt import docopt
+import os
+from src.patch_scan import prepare_patchset
+from src.project_scan import prepare_project
+import utils.anteater_logger as antlog
+
+
+config = ConfigParser.RawConfigParser()
+config.read('anteater.conf')
+reports_dir = config.get('config', 'reports_dir')
+logger = antlog.Logger(__name__).getLogger()
+__version__ = "0.1"
+
+
+def check_dir():
+ """ Creates a directory for scan reports """
+ try:
+ os.makedirs(reports_dir)
+ logger.info('Creating reports directory: {0}'.format(reports_dir))
+ except OSError as e:
+ if not os.path.isdir(reports_dir):
+ logger.error(e)
+
+
+def main():
+ """ Main function, mostly for passing arguments """
+ check_dir()
+ arguments = docopt(__doc__, version=__version__)
+
+ if arguments['<patchset>']:
+ prepare_patchset(arguments['<project>'], arguments['<patchset>'])
+ elif arguments['<project_path>']:
+ prepare_project(arguments['<project>'], arguments['<project_path>'])
+
+
+if __name__ == "__main__":
+ main()
diff --git a/anteater/src/__init__.py b/anteater/src/__init__.py
new file mode 100644
index 0000000..896994c
--- /dev/null
+++ b/anteater/src/__init__.py
@@ -0,0 +1,6 @@
+import pkg_resources
+
+try:
+ __version__ = pkg_resources.get_distribution(__name__).version
+except:
+ __version__ = 'unknown'
diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py
new file mode 100644
index 0000000..8941510
--- /dev/null
+++ b/anteater/src/get_lists.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+"""
+ Gathers various values from the gate check yaml file and return them to the
+ calling instance
+"""
+
+import anteater.utils.anteater_logger as antlog
+import ConfigParser
+import yaml
+import re
+
+config = ConfigParser.RawConfigParser()
+config.read('anteater.conf')
+logger = antlog.Logger(__name__).getLogger()
+gate_checks = config.get('config', 'gate_checks')
+
+with open(gate_checks, 'r') as f:
+ yl = yaml.safe_load(f)
+
+
+class GetLists(object):
+ def __init__(self, *args):
+ # Placeholder for future args if more filters are needed
+ self.args = args
+
+ def binary_list(self, project):
+ project_list = False
+ try:
+ default_list = (yl['binaries']['binary_ignore'])
+ except KeyError:
+ logger.error('Key Error processing binary list values')
+ try:
+ project_list = (yl['binaries'][project]['binary_ignore'])
+ except KeyError:
+ logger.info('No binary waivers found for {0}'.
+ format(project))
+
+ binary_re = re.compile("|".join(default_list),
+ flags=re.IGNORECASE)
+
+ if project_list:
+ binary_project_re = re.compile("|".join(project_list),
+ flags=re.IGNORECASE)
+ return binary_re, binary_project_re
+ else:
+ binary_project_re = re.compile("")
+ return binary_re, binary_project_re
+
+ def file_audit_list(self, project):
+ project_list = False
+ try:
+ default_list = set((yl['file_audits']['file_names']))
+ except KeyError:
+ logger.error('Key Error processing file_names list values')
+ try:
+ project_list = set((yl['file_audits'][project]['file_names']))
+ logger.info('file_names waivers found for {0}'.
+ format(project))
+ except KeyError:
+ logger.info('No file_names waivers found for {0}'.
+ format(project))
+
+ file_names_re = re.compile("|".join(default_list),
+ flags=re.IGNORECASE)
+
+ if project_list:
+ file_names_proj_re = re.compile("|".join(project_list),
+ flags=re.IGNORECASE)
+ return file_names_re, file_names_proj_re
+ else:
+ file_names_proj_re = re.compile("")
+ return file_names_re, file_names_proj_re
+
+ def file_content_list(self, project):
+ project_list = False
+ try:
+ default_list = set((yl['file_audits']['file_contents']))
+ except KeyError:
+ logger.error('Key Error processing file_contents list values')
+ try:
+ project_list = set((yl['file_audits'][project]['file_contents']))
+ except KeyError:
+ logger.info('No file_contents waivers found for {0}'.
+ format(project))
+
+ file_contents_re = re.compile("|".join(default_list),
+ flags=re.IGNORECASE)
+
+ if project_list:
+ file_contents_proj_re = re.compile("|".join(project_list),
+ flags=re.IGNORECASE)
+ return file_contents_re, file_contents_proj_re
+ else:
+ file_contents_proj_re = re.compile("")
+ return file_contents_re, file_contents_proj_re
+
+ def licence_extensions(self):
+ try:
+ licence_extensions = (yl['licence']['licence_ext'])
+ except KeyError:
+ logger.error('Key Error processing licence_extensions list values')
+ return licence_extensions
+
+ def licence_ignore(self):
+ try:
+ licence_ignore = (yl['licence']['licence_ignore'])
+ except KeyError:
+ logger.error('Key Error processing licence_ignore list values')
+ return licence_ignore
diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py
new file mode 100644
index 0000000..5bd1609
--- /dev/null
+++ b/anteater/src/patch_scan.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+"""
+ Accepts the --patchset argument and iterates through each line of the
+ patchset file to perform various checks such as if the file is a binary, or
+ contains a blacklisted string. If any violations are found, the script
+ exits with code 1 and logs the violation(s) found.
+"""
+
+from __future__ import division, print_function, absolute_import
+from binaryornot.check import is_binary
+import anteater.utils.anteater_logger as antlog
+import anteater.src.get_lists as get_lists
+import ConfigParser
+import sys
+import re
+
+
+logger = antlog.Logger(__name__).getLogger()
+config = ConfigParser.RawConfigParser()
+config.read('anteater.conf')
+reports_dir = config.get('config', 'reports_dir')
+failure = False
+
+
+def prepare_patchset(project, patchset):
+ """ Create black/white lists and default / project waivers
+ and iterates over patchset file """
+
+ # Get Various Lists / Project Waivers
+ lists = get_lists.GetLists()
+ # Get binary white list
+ binary_list, binary_project_list = lists.binary_list(project)
+
+ # Get file name black list and project waivers
+ file_audit_list, file_audit_project_list = lists.file_audit_list(project)
+
+ # Get file content black list and project waivers
+ file_content_list, \
+ file_content_project_list = lists.file_content_list(project)
+
+ # Get Licence Lists
+ licence_ext = lists.licence_extensions()
+ licence_ignore = lists.licence_ignore()
+
+ # Open patch set to get file list
+ fo = open(patchset, 'r')
+ lines = fo.readlines()
+
+ for line in lines:
+ patch_file = line.strip('\n')
+ # Perform binary and file / content checks
+ scan_patch(project, patch_file, binary_list, binary_project_list,
+ file_audit_list, file_audit_project_list,
+ file_content_list, file_content_project_list, licence_ext,
+ licence_ignore)
+
+ # Process each file in patch set using waivers generated above
+ # Process final result
+ process_failure()
+
+
+def scan_patch(project, patch_file, binary_list, binary_project_list,
+ file_audit_list, file_audit_project_list, file_content_list,
+ file_content_project_list, licence_ext, licence_ignore):
+ """ Scan actions for each commited file in patch set """
+ global failure
+ if is_binary(patch_file):
+ if not binary_list.search(patch_file) and not binary_project_list\
+ .search(patch_file):
+ logger.error('Non Whitelisted Binary file: {0}'.
+ format(patch_file))
+ failure = True
+ with open(reports_dir + "binaries-" + project + ".log", "a") \
+ as gate_report:
+ gate_report.write('Non Whitelisted Binary file: {0}\n'.
+ format(patch_file))
+ else:
+ # Check file names / extensions
+ if file_audit_list.search(patch_file) and not \
+ file_audit_project_list.search(patch_file):
+ match = file_audit_list.search(patch_file)
+ logger.error('Blacklisted file: {0}'.
+ format(patch_file))
+ logger.error('Matched String: {0}'.
+ format(match.group()))
+ failure = True
+ with open(reports_dir + "file-names_" + project + ".log", "a") \
+ as gate_report:
+ gate_report.write('Blacklisted file: {0}\n'.
+ format(patch_file))
+ gate_report.write('Matched String: {0}'.
+ format(match.group()))
+
+ # Open file to check for blacklisted content
+ fo = open(patch_file, 'r')
+ lines = fo.readlines()
+
+ for line in lines:
+ if file_content_list.search(line) and not \
+ file_content_project_list.search(line):
+ match = file_content_list.search(line)
+ logger.error('File contains violation: {0}'.
+ format(patch_file))
+ logger.error('Flagged Content: {0}'.
+ format(line.rstrip()))
+ logger.error('Matched String: {0}'.
+ format(match.group()))
+ failure = True
+ with open(reports_dir + "contents_" + project + ".log",
+ "a") as gate_report:
+ gate_report.write('File contains violation: {0}\n'.
+ format(patch_file))
+ gate_report.write('Flagged Content: {0}'.
+ format(line))
+ gate_report.write('Matched String: {0}\n'.
+ format(match.group()))
+
+ # Run license check
+ licence_check(project, licence_ext, licence_ignore, patch_file)
+
+
+def licence_check(project, licence_ext,
+ licence_ignore, patch_file):
+ """ Performs licence checks """
+ global failure
+ if patch_file.endswith(tuple(licence_ext)) \
+ and patch_file not in licence_ignore:
+ fo = open(patch_file, 'r')
+ content = fo.read()
+ # Note: Hardcoded use of 'copyright' & 'spdx' is the result
+ # of a decision made at 2017 plugfest to limit searches to
+ # just these two strings.
+ if re.search("copyright", content, re.IGNORECASE):
+ logger.info('Contains needed Licence string: {0}'.
+ format(patch_file))
+ elif re.search("spdx", content, re.IGNORECASE):
+ logger.info('Contains needed Licence string: {0}'.
+ format(patch_file))
+ else:
+ logger.error('Licence header missing in file: {0}'.
+ format(patch_file))
+ failure = True
+ with open(reports_dir + "licence-" + project + ".log", "a") \
+ as gate_report:
+ gate_report.write('Licence header missing in file: {0}\n'.
+ format(patch_file))
+
+
+def process_failure():
+ """ If any scan operations register a failure, sys.exit(1) is called
+ to allow jjb to register a failure"""
+ if failure:
+ logger.error('Failures registered')
+ sys.exit(1)
diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py
new file mode 100644
index 0000000..9ab9e17
--- /dev/null
+++ b/anteater/src/project_scan.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+"""
+ Accepts the --path argument and iterates the root directory using os.walk
+ If a file is a binary, or contains a blacklisted string. If any violations
+ are found, the script adds the violation to a log file.
+"""
+
+from __future__ import division, print_function, absolute_import
+import ConfigParser
+import os
+import re
+import anteater.utils.anteater_logger as antlog
+import anteater.src.get_lists as get_lists
+from binaryornot.check import is_binary
+
+logger = antlog.Logger(__name__).getLogger()
+config = ConfigParser.RawConfigParser()
+config.read('anteater.conf')
+reports_dir = config.get('config', 'reports_dir')
+gate_checks = config.get('config', 'gate_checks')
+ignore_dirs = ['.git']
+
+
+def prepare_project(project, project_dir):
+ """ Generates blacklists / whitelists and calls main functions """
+
+ # Get Various Lists / Project Waivers
+ lists = get_lists.GetLists()
+
+ # Get binary white list
+ binary_list, binary_project_list = lists.binary_list(project)
+
+ # Get file name black list and project waivers
+ file_audit_list, file_audit_project_list = lists.file_audit_list(project)
+
+ # Get file content black list and project waivers
+ file_content_list, project_content_list = lists.file_content_list(project)
+
+ # Get Licence Lists
+ licence_ext = lists.licence_extensions()
+ licence_ignore = lists.licence_ignore()
+
+ # Perform rudimentary scans
+ scan_file(project_dir, project, binary_list, binary_project_list,
+ file_audit_list, file_audit_project_list, file_content_list,
+ project_content_list)
+
+ # Perform licence header checks
+ licence_check(licence_ext, licence_ignore, project, project_dir)
+
+
+def scan_file(project_dir, project, binary_list, binary_project_list,
+ file_audit_list, file_audit_project_list, file_content_list,
+ project_content_list):
+ """Searches for banned strings and files that are listed """
+ for root, dirs, files in os.walk(project_dir):
+ # Filter out ignored directories from list.
+ dirs[:] = [d for d in dirs if d not in ignore_dirs]
+ for items in files:
+ full_path = os.path.join(root, items)
+ # Check for Blacklisted file names
+ if file_audit_list.search(full_path) and not \
+ file_audit_project_list.search(full_path):
+ match = file_audit_list.search(full_path)
+ logger.error('Blacklisted filename: {0}'.
+ format(full_path))
+ logger.error('Matched String: {0}'.
+ format(match.group()))
+ with open(reports_dir + "file-names_" + project + ".log",
+ "a") as gate_report:
+ gate_report. \
+ write('Blacklisted filename: {0}\n'.
+ format(full_path))
+ gate_report. \
+ write('Matched String: {0}'.
+ format(match.group()))
+
+ if not is_binary(full_path):
+ fo = open(full_path, 'r')
+ lines = fo.readlines()
+ for line in lines:
+ # Check for sensitive content in project files
+ if file_content_list.search(line) and not \
+ project_content_list.search(line):
+ match = file_content_list.search(line)
+ logger.error('File contains violation: {0}'.
+ format(full_path))
+ logger.error('Flagged Content: {0}'.
+ format(line.rstrip()))
+ logger.error('Matched String: {0}'.
+ format(match.group()))
+ with open(reports_dir + "contents_" + project + ".log",
+ "a") \
+ as gate_report:
+ gate_report. \
+ write('File contains violation: {0}\n'.
+ format(full_path))
+ gate_report. \
+ write('Flagged Content: {0}'.
+ format(line))
+ gate_report. \
+ write('Matched String: {0}\n'.
+ format(match.group()))
+ else:
+ # Check if Binary is whitelisted
+ if not binary_list.search(full_path) \
+ and not binary_project_list.search(full_path):
+ logger.error('Non Whitelisted Binary: {0}'.
+ format(full_path))
+ with open(reports_dir + "binaries-" + project + ".log",
+ "a") \
+ as gate_report:
+ gate_report.write('Non Whitelisted Binary: {0}\n'.
+ format(full_path))
+
+
+def licence_check(licence_ext, licence_ignore, project, project_dir):
+ """ Peform basic checks for the presence of licence strings """
+ for root, dirs, files in os.walk(project_dir):
+ dirs[:] = [d for d in dirs if d not in ignore_dirs]
+ for file in files:
+ if file.endswith(tuple(licence_ext)) \
+ and file not in licence_ignore:
+ full_path = os.path.join(root, file)
+ if not is_binary(full_path):
+ fo = open(full_path, 'r')
+ content = fo.read()
+ # Note: Hardcoded use of 'copyright' & 'spdx' is the result
+ # of a decision made at 2017 plugfest to limit searches to
+ # just these two strings.
+ if re.search("copyright", content, re.IGNORECASE):
+ logger.info('Licence string present: {0}'.
+ format(full_path))
+ elif re.search("spdx", content, re.IGNORECASE):
+ logger.info('Licence string present: {0}'.
+ format(full_path))
+ else:
+ logger.error('Licence header missing: {0}'.
+ format(full_path))
+ with open(reports_dir + "licence-" + project + ".log",
+ "a") \
+ as gate_report:
+ gate_report.write('Licence header missing: {0}\n'.
+ format(full_path))
diff --git a/anteater/utils/__init__.py b/anteater/utils/__init__.py
new file mode 100644
index 0000000..896994c
--- /dev/null
+++ b/anteater/utils/__init__.py
@@ -0,0 +1,6 @@
+import pkg_resources
+
+try:
+ __version__ = pkg_resources.get_distribution(__name__).version
+except:
+ __version__ = 'unknown'
diff --git a/anteater/utils/anteater_logger.py b/anteater/utils/anteater_logger.py
new file mode 100644
index 0000000..ae9f356
--- /dev/null
+++ b/anteater/utils/anteater_logger.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2017 jose.lausuch@ericsson.com
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+import ConfigParser
+import logging
+
+config = ConfigParser.RawConfigParser()
+config.read('anteater.conf')
+anteater_log = config.get('config', 'anteater_log')
+
+
+class Logger:
+ def __init__(self, logger_name):
+ self.logger = logging.getLogger(logger_name)
+ self.logger.propagate = 0
+ self.logger.setLevel(logging.DEBUG)
+
+ ch = logging.StreamHandler()
+ formatter = logging.Formatter('%(asctime)s - %(name)s - '
+ '%(levelname)s - %(message)s')
+ ch.setFormatter(formatter)
+ ch.setLevel(logging.DEBUG)
+ self.logger.addHandler(ch)
+
+ handler = logging.FileHandler(anteater_log)
+ handler.setFormatter(formatter)
+ handler.setLevel(logging.DEBUG)
+ self.logger.addHandler(handler)
+
+ def getLogger(self):
+ return self.logger