From 0142c227fca974fb65561d0aeb9b38c8683e22e6 Mon Sep 17 00:00:00 2001 From: lhinds Date: Wed, 17 May 2017 13:31:18 +0100 Subject: Initial code push of Anteater Likely far to much to cover in a commit msg. Main bulk is the Anteater code itself, alongside packaging requirements and build tools and Dockerfile. Unit tests are planned as a follow up, so pushing this for now so that efforts can get underway to integrate the tool with jjb. Questions on how it works, please reach me in IRC. Change-Id: I2cd3cae391f8bf2cdc91b39c56dfc4833a1c4913 Signed-off-by: lhinds --- anteater/__init__.py | 0 anteater/anteater.py | 64 +++++++++++++++ anteater/src/__init__.py | 6 ++ anteater/src/get_lists.py | 119 +++++++++++++++++++++++++++ anteater/src/patch_scan.py | 164 ++++++++++++++++++++++++++++++++++++++ anteater/src/project_scan.py | 154 +++++++++++++++++++++++++++++++++++ anteater/utils/__init__.py | 6 ++ anteater/utils/anteater_logger.py | 39 +++++++++ 8 files changed, 552 insertions(+) create mode 100644 anteater/__init__.py create mode 100644 anteater/anteater.py create mode 100644 anteater/src/__init__.py create mode 100644 anteater/src/get_lists.py create mode 100644 anteater/src/patch_scan.py create mode 100644 anteater/src/project_scan.py create mode 100644 anteater/utils/__init__.py create mode 100644 anteater/utils/anteater_logger.py (limited to 'anteater') diff --git a/anteater/__init__.py b/anteater/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/anteater/anteater.py b/anteater/anteater.py new file mode 100644 index 0000000..063fcbd --- /dev/null +++ b/anteater/anteater.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds , Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +# from __future__ import division, print_function, absolute_import + +"""Anteater - CI Gate Checks. + +Usage: + anteater (-p |--project) [(-ps |--patchset) ] + anteater (-p |--project) [--path ] + anteater (-h | --help) + anteater --version + +Options: + -h --help Show this screen. + --version Show version. +""" + +import ConfigParser +from docopt import docopt +import os +from src.patch_scan import prepare_patchset +from src.project_scan import prepare_project +import utils.anteater_logger as antlog + + +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +reports_dir = config.get('config', 'reports_dir') +logger = antlog.Logger(__name__).getLogger() +__version__ = "0.1" + + +def check_dir(): + """ Creates a directory for scan reports """ + try: + os.makedirs(reports_dir) + logger.info('Creating reports directory: {0}'.format(reports_dir)) + except OSError as e: + if not os.path.isdir(reports_dir): + logger.error(e) + + +def main(): + """ Main function, mostly for passing arguments """ + check_dir() + arguments = docopt(__doc__, version=__version__) + + if arguments['']: + prepare_patchset(arguments[''], arguments['']) + elif arguments['']: + prepare_project(arguments[''], arguments['']) + + +if __name__ == "__main__": + main() diff --git a/anteater/src/__init__.py b/anteater/src/__init__.py new file mode 100644 index 0000000..896994c --- /dev/null +++ b/anteater/src/__init__.py @@ -0,0 +1,6 @@ +import pkg_resources + +try: + __version__ = pkg_resources.get_distribution(__name__).version +except: + __version__ = 'unknown' diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py new file mode 100644 index 0000000..8941510 --- /dev/null +++ b/anteater/src/get_lists.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds , Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" + Gathers various values from the gate check yaml file and return them to the + calling instance +""" + +import anteater.utils.anteater_logger as antlog +import ConfigParser +import yaml +import re + +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +logger = antlog.Logger(__name__).getLogger() +gate_checks = config.get('config', 'gate_checks') + +with open(gate_checks, 'r') as f: + yl = yaml.safe_load(f) + + +class GetLists(object): + def __init__(self, *args): + # Placeholder for future args if more filters are needed + self.args = args + + def binary_list(self, project): + project_list = False + try: + default_list = (yl['binaries']['binary_ignore']) + except KeyError: + logger.error('Key Error processing binary list values') + try: + project_list = (yl['binaries'][project]['binary_ignore']) + except KeyError: + logger.info('No binary waivers found for {0}'. + format(project)) + + binary_re = re.compile("|".join(default_list), + flags=re.IGNORECASE) + + if project_list: + binary_project_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) + return binary_re, binary_project_re + else: + binary_project_re = re.compile("") + return binary_re, binary_project_re + + def file_audit_list(self, project): + project_list = False + try: + default_list = set((yl['file_audits']['file_names'])) + except KeyError: + logger.error('Key Error processing file_names list values') + try: + project_list = set((yl['file_audits'][project]['file_names'])) + logger.info('file_names waivers found for {0}'. + format(project)) + except KeyError: + logger.info('No file_names waivers found for {0}'. + format(project)) + + file_names_re = re.compile("|".join(default_list), + flags=re.IGNORECASE) + + if project_list: + file_names_proj_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) + return file_names_re, file_names_proj_re + else: + file_names_proj_re = re.compile("") + return file_names_re, file_names_proj_re + + def file_content_list(self, project): + project_list = False + try: + default_list = set((yl['file_audits']['file_contents'])) + except KeyError: + logger.error('Key Error processing file_contents list values') + try: + project_list = set((yl['file_audits'][project]['file_contents'])) + except KeyError: + logger.info('No file_contents waivers found for {0}'. + format(project)) + + file_contents_re = re.compile("|".join(default_list), + flags=re.IGNORECASE) + + if project_list: + file_contents_proj_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) + return file_contents_re, file_contents_proj_re + else: + file_contents_proj_re = re.compile("") + return file_contents_re, file_contents_proj_re + + def licence_extensions(self): + try: + licence_extensions = (yl['licence']['licence_ext']) + except KeyError: + logger.error('Key Error processing licence_extensions list values') + return licence_extensions + + def licence_ignore(self): + try: + licence_ignore = (yl['licence']['licence_ignore']) + except KeyError: + logger.error('Key Error processing licence_ignore list values') + return licence_ignore diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py new file mode 100644 index 0000000..5bd1609 --- /dev/null +++ b/anteater/src/patch_scan.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds , Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" + Accepts the --patchset argument and iterates through each line of the + patchset file to perform various checks such as if the file is a binary, or + contains a blacklisted string. If any violations are found, the script + exits with code 1 and logs the violation(s) found. +""" + +from __future__ import division, print_function, absolute_import +from binaryornot.check import is_binary +import anteater.utils.anteater_logger as antlog +import anteater.src.get_lists as get_lists +import ConfigParser +import sys +import re + + +logger = antlog.Logger(__name__).getLogger() +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +reports_dir = config.get('config', 'reports_dir') +failure = False + + +def prepare_patchset(project, patchset): + """ Create black/white lists and default / project waivers + and iterates over patchset file """ + + # Get Various Lists / Project Waivers + lists = get_lists.GetLists() + # Get binary white list + binary_list, binary_project_list = lists.binary_list(project) + + # Get file name black list and project waivers + file_audit_list, file_audit_project_list = lists.file_audit_list(project) + + # Get file content black list and project waivers + file_content_list, \ + file_content_project_list = lists.file_content_list(project) + + # Get Licence Lists + licence_ext = lists.licence_extensions() + licence_ignore = lists.licence_ignore() + + # Open patch set to get file list + fo = open(patchset, 'r') + lines = fo.readlines() + + for line in lines: + patch_file = line.strip('\n') + # Perform binary and file / content checks + scan_patch(project, patch_file, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, + file_content_list, file_content_project_list, licence_ext, + licence_ignore) + + # Process each file in patch set using waivers generated above + # Process final result + process_failure() + + +def scan_patch(project, patch_file, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + file_content_project_list, licence_ext, licence_ignore): + """ Scan actions for each commited file in patch set """ + global failure + if is_binary(patch_file): + if not binary_list.search(patch_file) and not binary_project_list\ + .search(patch_file): + logger.error('Non Whitelisted Binary file: {0}'. + format(patch_file)) + failure = True + with open(reports_dir + "binaries-" + project + ".log", "a") \ + as gate_report: + gate_report.write('Non Whitelisted Binary file: {0}\n'. + format(patch_file)) + else: + # Check file names / extensions + if file_audit_list.search(patch_file) and not \ + file_audit_project_list.search(patch_file): + match = file_audit_list.search(patch_file) + logger.error('Blacklisted file: {0}'. + format(patch_file)) + logger.error('Matched String: {0}'. + format(match.group())) + failure = True + with open(reports_dir + "file-names_" + project + ".log", "a") \ + as gate_report: + gate_report.write('Blacklisted file: {0}\n'. + format(patch_file)) + gate_report.write('Matched String: {0}'. + format(match.group())) + + # Open file to check for blacklisted content + fo = open(patch_file, 'r') + lines = fo.readlines() + + for line in lines: + if file_content_list.search(line) and not \ + file_content_project_list.search(line): + match = file_content_list.search(line) + logger.error('File contains violation: {0}'. + format(patch_file)) + logger.error('Flagged Content: {0}'. + format(line.rstrip())) + logger.error('Matched String: {0}'. + format(match.group())) + failure = True + with open(reports_dir + "contents_" + project + ".log", + "a") as gate_report: + gate_report.write('File contains violation: {0}\n'. + format(patch_file)) + gate_report.write('Flagged Content: {0}'. + format(line)) + gate_report.write('Matched String: {0}\n'. + format(match.group())) + + # Run license check + licence_check(project, licence_ext, licence_ignore, patch_file) + + +def licence_check(project, licence_ext, + licence_ignore, patch_file): + """ Performs licence checks """ + global failure + if patch_file.endswith(tuple(licence_ext)) \ + and patch_file not in licence_ignore: + fo = open(patch_file, 'r') + content = fo.read() + # Note: Hardcoded use of 'copyright' & 'spdx' is the result + # of a decision made at 2017 plugfest to limit searches to + # just these two strings. + if re.search("copyright", content, re.IGNORECASE): + logger.info('Contains needed Licence string: {0}'. + format(patch_file)) + elif re.search("spdx", content, re.IGNORECASE): + logger.info('Contains needed Licence string: {0}'. + format(patch_file)) + else: + logger.error('Licence header missing in file: {0}'. + format(patch_file)) + failure = True + with open(reports_dir + "licence-" + project + ".log", "a") \ + as gate_report: + gate_report.write('Licence header missing in file: {0}\n'. + format(patch_file)) + + +def process_failure(): + """ If any scan operations register a failure, sys.exit(1) is called + to allow jjb to register a failure""" + if failure: + logger.error('Failures registered') + sys.exit(1) diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py new file mode 100644 index 0000000..9ab9e17 --- /dev/null +++ b/anteater/src/project_scan.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds , Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" + Accepts the --path argument and iterates the root directory using os.walk + If a file is a binary, or contains a blacklisted string. If any violations + are found, the script adds the violation to a log file. +""" + +from __future__ import division, print_function, absolute_import +import ConfigParser +import os +import re +import anteater.utils.anteater_logger as antlog +import anteater.src.get_lists as get_lists +from binaryornot.check import is_binary + +logger = antlog.Logger(__name__).getLogger() +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +reports_dir = config.get('config', 'reports_dir') +gate_checks = config.get('config', 'gate_checks') +ignore_dirs = ['.git'] + + +def prepare_project(project, project_dir): + """ Generates blacklists / whitelists and calls main functions """ + + # Get Various Lists / Project Waivers + lists = get_lists.GetLists() + + # Get binary white list + binary_list, binary_project_list = lists.binary_list(project) + + # Get file name black list and project waivers + file_audit_list, file_audit_project_list = lists.file_audit_list(project) + + # Get file content black list and project waivers + file_content_list, project_content_list = lists.file_content_list(project) + + # Get Licence Lists + licence_ext = lists.licence_extensions() + licence_ignore = lists.licence_ignore() + + # Perform rudimentary scans + scan_file(project_dir, project, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + project_content_list) + + # Perform licence header checks + licence_check(licence_ext, licence_ignore, project, project_dir) + + +def scan_file(project_dir, project, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + project_content_list): + """Searches for banned strings and files that are listed """ + for root, dirs, files in os.walk(project_dir): + # Filter out ignored directories from list. + dirs[:] = [d for d in dirs if d not in ignore_dirs] + for items in files: + full_path = os.path.join(root, items) + # Check for Blacklisted file names + if file_audit_list.search(full_path) and not \ + file_audit_project_list.search(full_path): + match = file_audit_list.search(full_path) + logger.error('Blacklisted filename: {0}'. + format(full_path)) + logger.error('Matched String: {0}'. + format(match.group())) + with open(reports_dir + "file-names_" + project + ".log", + "a") as gate_report: + gate_report. \ + write('Blacklisted filename: {0}\n'. + format(full_path)) + gate_report. \ + write('Matched String: {0}'. + format(match.group())) + + if not is_binary(full_path): + fo = open(full_path, 'r') + lines = fo.readlines() + for line in lines: + # Check for sensitive content in project files + if file_content_list.search(line) and not \ + project_content_list.search(line): + match = file_content_list.search(line) + logger.error('File contains violation: {0}'. + format(full_path)) + logger.error('Flagged Content: {0}'. + format(line.rstrip())) + logger.error('Matched String: {0}'. + format(match.group())) + with open(reports_dir + "contents_" + project + ".log", + "a") \ + as gate_report: + gate_report. \ + write('File contains violation: {0}\n'. + format(full_path)) + gate_report. \ + write('Flagged Content: {0}'. + format(line)) + gate_report. \ + write('Matched String: {0}\n'. + format(match.group())) + else: + # Check if Binary is whitelisted + if not binary_list.search(full_path) \ + and not binary_project_list.search(full_path): + logger.error('Non Whitelisted Binary: {0}'. + format(full_path)) + with open(reports_dir + "binaries-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Non Whitelisted Binary: {0}\n'. + format(full_path)) + + +def licence_check(licence_ext, licence_ignore, project, project_dir): + """ Peform basic checks for the presence of licence strings """ + for root, dirs, files in os.walk(project_dir): + dirs[:] = [d for d in dirs if d not in ignore_dirs] + for file in files: + if file.endswith(tuple(licence_ext)) \ + and file not in licence_ignore: + full_path = os.path.join(root, file) + if not is_binary(full_path): + fo = open(full_path, 'r') + content = fo.read() + # Note: Hardcoded use of 'copyright' & 'spdx' is the result + # of a decision made at 2017 plugfest to limit searches to + # just these two strings. + if re.search("copyright", content, re.IGNORECASE): + logger.info('Licence string present: {0}'. + format(full_path)) + elif re.search("spdx", content, re.IGNORECASE): + logger.info('Licence string present: {0}'. + format(full_path)) + else: + logger.error('Licence header missing: {0}'. + format(full_path)) + with open(reports_dir + "licence-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Licence header missing: {0}\n'. + format(full_path)) diff --git a/anteater/utils/__init__.py b/anteater/utils/__init__.py new file mode 100644 index 0000000..896994c --- /dev/null +++ b/anteater/utils/__init__.py @@ -0,0 +1,6 @@ +import pkg_resources + +try: + __version__ = pkg_resources.get_distribution(__name__).version +except: + __version__ = 'unknown' diff --git a/anteater/utils/anteater_logger.py b/anteater/utils/anteater_logger.py new file mode 100644 index 0000000..ae9f356 --- /dev/null +++ b/anteater/utils/anteater_logger.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 jose.lausuch@ericsson.com +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +import ConfigParser +import logging + +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +anteater_log = config.get('config', 'anteater_log') + + +class Logger: + def __init__(self, logger_name): + self.logger = logging.getLogger(logger_name) + self.logger.propagate = 0 + self.logger.setLevel(logging.DEBUG) + + ch = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s - %(name)s - ' + '%(levelname)s - %(message)s') + ch.setFormatter(formatter) + ch.setLevel(logging.DEBUG) + self.logger.addHandler(ch) + + handler = logging.FileHandler(anteater_log) + handler.setFormatter(formatter) + handler.setLevel(logging.DEBUG) + self.logger.addHandler(handler) + + def getLogger(self): + return self.logger -- cgit 1.2.3-korg