From 0142c227fca974fb65561d0aeb9b38c8683e22e6 Mon Sep 17 00:00:00 2001 From: lhinds Date: Wed, 17 May 2017 13:31:18 +0100 Subject: Initial code push of Anteater Likely far to much to cover in a commit msg. Main bulk is the Anteater code itself, alongside packaging requirements and build tools and Dockerfile. Unit tests are planned as a follow up, so pushing this for now so that efforts can get underway to integrate the tool with jjb. Questions on how it works, please reach me in IRC. Change-Id: I2cd3cae391f8bf2cdc91b39c56dfc4833a1c4913 Signed-off-by: lhinds --- anteater/src/__init__.py | 6 ++ anteater/src/get_lists.py | 119 +++++++++++++++++++++++++++++++ anteater/src/patch_scan.py | 164 +++++++++++++++++++++++++++++++++++++++++++ anteater/src/project_scan.py | 154 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 443 insertions(+) create mode 100644 anteater/src/__init__.py create mode 100644 anteater/src/get_lists.py create mode 100644 anteater/src/patch_scan.py create mode 100644 anteater/src/project_scan.py (limited to 'anteater/src') diff --git a/anteater/src/__init__.py b/anteater/src/__init__.py new file mode 100644 index 0000000..896994c --- /dev/null +++ b/anteater/src/__init__.py @@ -0,0 +1,6 @@ +import pkg_resources + +try: + __version__ = pkg_resources.get_distribution(__name__).version +except: + __version__ = 'unknown' diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py new file mode 100644 index 0000000..8941510 --- /dev/null +++ b/anteater/src/get_lists.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds , Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" + Gathers various values from the gate check yaml file and return them to the + calling instance +""" + +import anteater.utils.anteater_logger as antlog +import ConfigParser +import yaml +import re + +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +logger = antlog.Logger(__name__).getLogger() +gate_checks = config.get('config', 'gate_checks') + +with open(gate_checks, 'r') as f: + yl = yaml.safe_load(f) + + +class GetLists(object): + def __init__(self, *args): + # Placeholder for future args if more filters are needed + self.args = args + + def binary_list(self, project): + project_list = False + try: + default_list = (yl['binaries']['binary_ignore']) + except KeyError: + logger.error('Key Error processing binary list values') + try: + project_list = (yl['binaries'][project]['binary_ignore']) + except KeyError: + logger.info('No binary waivers found for {0}'. + format(project)) + + binary_re = re.compile("|".join(default_list), + flags=re.IGNORECASE) + + if project_list: + binary_project_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) + return binary_re, binary_project_re + else: + binary_project_re = re.compile("") + return binary_re, binary_project_re + + def file_audit_list(self, project): + project_list = False + try: + default_list = set((yl['file_audits']['file_names'])) + except KeyError: + logger.error('Key Error processing file_names list values') + try: + project_list = set((yl['file_audits'][project]['file_names'])) + logger.info('file_names waivers found for {0}'. + format(project)) + except KeyError: + logger.info('No file_names waivers found for {0}'. + format(project)) + + file_names_re = re.compile("|".join(default_list), + flags=re.IGNORECASE) + + if project_list: + file_names_proj_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) + return file_names_re, file_names_proj_re + else: + file_names_proj_re = re.compile("") + return file_names_re, file_names_proj_re + + def file_content_list(self, project): + project_list = False + try: + default_list = set((yl['file_audits']['file_contents'])) + except KeyError: + logger.error('Key Error processing file_contents list values') + try: + project_list = set((yl['file_audits'][project]['file_contents'])) + except KeyError: + logger.info('No file_contents waivers found for {0}'. + format(project)) + + file_contents_re = re.compile("|".join(default_list), + flags=re.IGNORECASE) + + if project_list: + file_contents_proj_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) + return file_contents_re, file_contents_proj_re + else: + file_contents_proj_re = re.compile("") + return file_contents_re, file_contents_proj_re + + def licence_extensions(self): + try: + licence_extensions = (yl['licence']['licence_ext']) + except KeyError: + logger.error('Key Error processing licence_extensions list values') + return licence_extensions + + def licence_ignore(self): + try: + licence_ignore = (yl['licence']['licence_ignore']) + except KeyError: + logger.error('Key Error processing licence_ignore list values') + return licence_ignore diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py new file mode 100644 index 0000000..5bd1609 --- /dev/null +++ b/anteater/src/patch_scan.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds , Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" + Accepts the --patchset argument and iterates through each line of the + patchset file to perform various checks such as if the file is a binary, or + contains a blacklisted string. If any violations are found, the script + exits with code 1 and logs the violation(s) found. +""" + +from __future__ import division, print_function, absolute_import +from binaryornot.check import is_binary +import anteater.utils.anteater_logger as antlog +import anteater.src.get_lists as get_lists +import ConfigParser +import sys +import re + + +logger = antlog.Logger(__name__).getLogger() +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +reports_dir = config.get('config', 'reports_dir') +failure = False + + +def prepare_patchset(project, patchset): + """ Create black/white lists and default / project waivers + and iterates over patchset file """ + + # Get Various Lists / Project Waivers + lists = get_lists.GetLists() + # Get binary white list + binary_list, binary_project_list = lists.binary_list(project) + + # Get file name black list and project waivers + file_audit_list, file_audit_project_list = lists.file_audit_list(project) + + # Get file content black list and project waivers + file_content_list, \ + file_content_project_list = lists.file_content_list(project) + + # Get Licence Lists + licence_ext = lists.licence_extensions() + licence_ignore = lists.licence_ignore() + + # Open patch set to get file list + fo = open(patchset, 'r') + lines = fo.readlines() + + for line in lines: + patch_file = line.strip('\n') + # Perform binary and file / content checks + scan_patch(project, patch_file, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, + file_content_list, file_content_project_list, licence_ext, + licence_ignore) + + # Process each file in patch set using waivers generated above + # Process final result + process_failure() + + +def scan_patch(project, patch_file, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + file_content_project_list, licence_ext, licence_ignore): + """ Scan actions for each commited file in patch set """ + global failure + if is_binary(patch_file): + if not binary_list.search(patch_file) and not binary_project_list\ + .search(patch_file): + logger.error('Non Whitelisted Binary file: {0}'. + format(patch_file)) + failure = True + with open(reports_dir + "binaries-" + project + ".log", "a") \ + as gate_report: + gate_report.write('Non Whitelisted Binary file: {0}\n'. + format(patch_file)) + else: + # Check file names / extensions + if file_audit_list.search(patch_file) and not \ + file_audit_project_list.search(patch_file): + match = file_audit_list.search(patch_file) + logger.error('Blacklisted file: {0}'. + format(patch_file)) + logger.error('Matched String: {0}'. + format(match.group())) + failure = True + with open(reports_dir + "file-names_" + project + ".log", "a") \ + as gate_report: + gate_report.write('Blacklisted file: {0}\n'. + format(patch_file)) + gate_report.write('Matched String: {0}'. + format(match.group())) + + # Open file to check for blacklisted content + fo = open(patch_file, 'r') + lines = fo.readlines() + + for line in lines: + if file_content_list.search(line) and not \ + file_content_project_list.search(line): + match = file_content_list.search(line) + logger.error('File contains violation: {0}'. + format(patch_file)) + logger.error('Flagged Content: {0}'. + format(line.rstrip())) + logger.error('Matched String: {0}'. + format(match.group())) + failure = True + with open(reports_dir + "contents_" + project + ".log", + "a") as gate_report: + gate_report.write('File contains violation: {0}\n'. + format(patch_file)) + gate_report.write('Flagged Content: {0}'. + format(line)) + gate_report.write('Matched String: {0}\n'. + format(match.group())) + + # Run license check + licence_check(project, licence_ext, licence_ignore, patch_file) + + +def licence_check(project, licence_ext, + licence_ignore, patch_file): + """ Performs licence checks """ + global failure + if patch_file.endswith(tuple(licence_ext)) \ + and patch_file not in licence_ignore: + fo = open(patch_file, 'r') + content = fo.read() + # Note: Hardcoded use of 'copyright' & 'spdx' is the result + # of a decision made at 2017 plugfest to limit searches to + # just these two strings. + if re.search("copyright", content, re.IGNORECASE): + logger.info('Contains needed Licence string: {0}'. + format(patch_file)) + elif re.search("spdx", content, re.IGNORECASE): + logger.info('Contains needed Licence string: {0}'. + format(patch_file)) + else: + logger.error('Licence header missing in file: {0}'. + format(patch_file)) + failure = True + with open(reports_dir + "licence-" + project + ".log", "a") \ + as gate_report: + gate_report.write('Licence header missing in file: {0}\n'. + format(patch_file)) + + +def process_failure(): + """ If any scan operations register a failure, sys.exit(1) is called + to allow jjb to register a failure""" + if failure: + logger.error('Failures registered') + sys.exit(1) diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py new file mode 100644 index 0000000..9ab9e17 --- /dev/null +++ b/anteater/src/project_scan.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds , Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" + Accepts the --path argument and iterates the root directory using os.walk + If a file is a binary, or contains a blacklisted string. If any violations + are found, the script adds the violation to a log file. +""" + +from __future__ import division, print_function, absolute_import +import ConfigParser +import os +import re +import anteater.utils.anteater_logger as antlog +import anteater.src.get_lists as get_lists +from binaryornot.check import is_binary + +logger = antlog.Logger(__name__).getLogger() +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +reports_dir = config.get('config', 'reports_dir') +gate_checks = config.get('config', 'gate_checks') +ignore_dirs = ['.git'] + + +def prepare_project(project, project_dir): + """ Generates blacklists / whitelists and calls main functions """ + + # Get Various Lists / Project Waivers + lists = get_lists.GetLists() + + # Get binary white list + binary_list, binary_project_list = lists.binary_list(project) + + # Get file name black list and project waivers + file_audit_list, file_audit_project_list = lists.file_audit_list(project) + + # Get file content black list and project waivers + file_content_list, project_content_list = lists.file_content_list(project) + + # Get Licence Lists + licence_ext = lists.licence_extensions() + licence_ignore = lists.licence_ignore() + + # Perform rudimentary scans + scan_file(project_dir, project, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + project_content_list) + + # Perform licence header checks + licence_check(licence_ext, licence_ignore, project, project_dir) + + +def scan_file(project_dir, project, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + project_content_list): + """Searches for banned strings and files that are listed """ + for root, dirs, files in os.walk(project_dir): + # Filter out ignored directories from list. + dirs[:] = [d for d in dirs if d not in ignore_dirs] + for items in files: + full_path = os.path.join(root, items) + # Check for Blacklisted file names + if file_audit_list.search(full_path) and not \ + file_audit_project_list.search(full_path): + match = file_audit_list.search(full_path) + logger.error('Blacklisted filename: {0}'. + format(full_path)) + logger.error('Matched String: {0}'. + format(match.group())) + with open(reports_dir + "file-names_" + project + ".log", + "a") as gate_report: + gate_report. \ + write('Blacklisted filename: {0}\n'. + format(full_path)) + gate_report. \ + write('Matched String: {0}'. + format(match.group())) + + if not is_binary(full_path): + fo = open(full_path, 'r') + lines = fo.readlines() + for line in lines: + # Check for sensitive content in project files + if file_content_list.search(line) and not \ + project_content_list.search(line): + match = file_content_list.search(line) + logger.error('File contains violation: {0}'. + format(full_path)) + logger.error('Flagged Content: {0}'. + format(line.rstrip())) + logger.error('Matched String: {0}'. + format(match.group())) + with open(reports_dir + "contents_" + project + ".log", + "a") \ + as gate_report: + gate_report. \ + write('File contains violation: {0}\n'. + format(full_path)) + gate_report. \ + write('Flagged Content: {0}'. + format(line)) + gate_report. \ + write('Matched String: {0}\n'. + format(match.group())) + else: + # Check if Binary is whitelisted + if not binary_list.search(full_path) \ + and not binary_project_list.search(full_path): + logger.error('Non Whitelisted Binary: {0}'. + format(full_path)) + with open(reports_dir + "binaries-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Non Whitelisted Binary: {0}\n'. + format(full_path)) + + +def licence_check(licence_ext, licence_ignore, project, project_dir): + """ Peform basic checks for the presence of licence strings """ + for root, dirs, files in os.walk(project_dir): + dirs[:] = [d for d in dirs if d not in ignore_dirs] + for file in files: + if file.endswith(tuple(licence_ext)) \ + and file not in licence_ignore: + full_path = os.path.join(root, file) + if not is_binary(full_path): + fo = open(full_path, 'r') + content = fo.read() + # Note: Hardcoded use of 'copyright' & 'spdx' is the result + # of a decision made at 2017 plugfest to limit searches to + # just these two strings. + if re.search("copyright", content, re.IGNORECASE): + logger.info('Licence string present: {0}'. + format(full_path)) + elif re.search("spdx", content, re.IGNORECASE): + logger.info('Licence string present: {0}'. + format(full_path)) + else: + logger.error('Licence header missing: {0}'. + format(full_path)) + with open(reports_dir + "licence-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Licence header missing: {0}\n'. + format(full_path)) -- cgit 1.2.3-korg