aboutsummaryrefslogtreecommitdiffstats
path: root/anteater/src
diff options
context:
space:
mode:
authorlhinds <lhinds@redhat.com>2017-05-17 13:31:18 +0100
committerlhinds <lhinds@redhat.com>2017-05-22 14:12:27 +0100
commit0142c227fca974fb65561d0aeb9b38c8683e22e6 (patch)
treef802b60e2ceab8b033212568d3adddc754faa7da /anteater/src
parent9c00ca00dcad5624288de38e0a529f7f7b3915db (diff)
Initial code push of Anteater
Likely far to much to cover in a commit msg. Main bulk is the Anteater code itself, alongside packaging requirements and build tools and Dockerfile. Unit tests are planned as a follow up, so pushing this for now so that efforts can get underway to integrate the tool with jjb. Questions on how it works, please reach me in IRC. Change-Id: I2cd3cae391f8bf2cdc91b39c56dfc4833a1c4913 Signed-off-by: lhinds <lhinds@redhat.com>
Diffstat (limited to 'anteater/src')
-rw-r--r--anteater/src/__init__.py6
-rw-r--r--anteater/src/get_lists.py119
-rw-r--r--anteater/src/patch_scan.py164
-rw-r--r--anteater/src/project_scan.py154
4 files changed, 443 insertions, 0 deletions
diff --git a/anteater/src/__init__.py b/anteater/src/__init__.py
new file mode 100644
index 0000000..896994c
--- /dev/null
+++ b/anteater/src/__init__.py
@@ -0,0 +1,6 @@
+import pkg_resources
+
+try:
+ __version__ = pkg_resources.get_distribution(__name__).version
+except:
+ __version__ = 'unknown'
diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py
new file mode 100644
index 0000000..8941510
--- /dev/null
+++ b/anteater/src/get_lists.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+"""
+ Gathers various values from the gate check yaml file and return them to the
+ calling instance
+"""
+
+import anteater.utils.anteater_logger as antlog
+import ConfigParser
+import yaml
+import re
+
+config = ConfigParser.RawConfigParser()
+config.read('anteater.conf')
+logger = antlog.Logger(__name__).getLogger()
+gate_checks = config.get('config', 'gate_checks')
+
+with open(gate_checks, 'r') as f:
+ yl = yaml.safe_load(f)
+
+
+class GetLists(object):
+ def __init__(self, *args):
+ # Placeholder for future args if more filters are needed
+ self.args = args
+
+ def binary_list(self, project):
+ project_list = False
+ try:
+ default_list = (yl['binaries']['binary_ignore'])
+ except KeyError:
+ logger.error('Key Error processing binary list values')
+ try:
+ project_list = (yl['binaries'][project]['binary_ignore'])
+ except KeyError:
+ logger.info('No binary waivers found for {0}'.
+ format(project))
+
+ binary_re = re.compile("|".join(default_list),
+ flags=re.IGNORECASE)
+
+ if project_list:
+ binary_project_re = re.compile("|".join(project_list),
+ flags=re.IGNORECASE)
+ return binary_re, binary_project_re
+ else:
+ binary_project_re = re.compile("")
+ return binary_re, binary_project_re
+
+ def file_audit_list(self, project):
+ project_list = False
+ try:
+ default_list = set((yl['file_audits']['file_names']))
+ except KeyError:
+ logger.error('Key Error processing file_names list values')
+ try:
+ project_list = set((yl['file_audits'][project]['file_names']))
+ logger.info('file_names waivers found for {0}'.
+ format(project))
+ except KeyError:
+ logger.info('No file_names waivers found for {0}'.
+ format(project))
+
+ file_names_re = re.compile("|".join(default_list),
+ flags=re.IGNORECASE)
+
+ if project_list:
+ file_names_proj_re = re.compile("|".join(project_list),
+ flags=re.IGNORECASE)
+ return file_names_re, file_names_proj_re
+ else:
+ file_names_proj_re = re.compile("")
+ return file_names_re, file_names_proj_re
+
+ def file_content_list(self, project):
+ project_list = False
+ try:
+ default_list = set((yl['file_audits']['file_contents']))
+ except KeyError:
+ logger.error('Key Error processing file_contents list values')
+ try:
+ project_list = set((yl['file_audits'][project]['file_contents']))
+ except KeyError:
+ logger.info('No file_contents waivers found for {0}'.
+ format(project))
+
+ file_contents_re = re.compile("|".join(default_list),
+ flags=re.IGNORECASE)
+
+ if project_list:
+ file_contents_proj_re = re.compile("|".join(project_list),
+ flags=re.IGNORECASE)
+ return file_contents_re, file_contents_proj_re
+ else:
+ file_contents_proj_re = re.compile("")
+ return file_contents_re, file_contents_proj_re
+
+ def licence_extensions(self):
+ try:
+ licence_extensions = (yl['licence']['licence_ext'])
+ except KeyError:
+ logger.error('Key Error processing licence_extensions list values')
+ return licence_extensions
+
+ def licence_ignore(self):
+ try:
+ licence_ignore = (yl['licence']['licence_ignore'])
+ except KeyError:
+ logger.error('Key Error processing licence_ignore list values')
+ return licence_ignore
diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py
new file mode 100644
index 0000000..5bd1609
--- /dev/null
+++ b/anteater/src/patch_scan.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+"""
+ Accepts the --patchset argument and iterates through each line of the
+ patchset file to perform various checks such as if the file is a binary, or
+ contains a blacklisted string. If any violations are found, the script
+ exits with code 1 and logs the violation(s) found.
+"""
+
+from __future__ import division, print_function, absolute_import
+from binaryornot.check import is_binary
+import anteater.utils.anteater_logger as antlog
+import anteater.src.get_lists as get_lists
+import ConfigParser
+import sys
+import re
+
+
+logger = antlog.Logger(__name__).getLogger()
+config = ConfigParser.RawConfigParser()
+config.read('anteater.conf')
+reports_dir = config.get('config', 'reports_dir')
+failure = False
+
+
+def prepare_patchset(project, patchset):
+ """ Create black/white lists and default / project waivers
+ and iterates over patchset file """
+
+ # Get Various Lists / Project Waivers
+ lists = get_lists.GetLists()
+ # Get binary white list
+ binary_list, binary_project_list = lists.binary_list(project)
+
+ # Get file name black list and project waivers
+ file_audit_list, file_audit_project_list = lists.file_audit_list(project)
+
+ # Get file content black list and project waivers
+ file_content_list, \
+ file_content_project_list = lists.file_content_list(project)
+
+ # Get Licence Lists
+ licence_ext = lists.licence_extensions()
+ licence_ignore = lists.licence_ignore()
+
+ # Open patch set to get file list
+ fo = open(patchset, 'r')
+ lines = fo.readlines()
+
+ for line in lines:
+ patch_file = line.strip('\n')
+ # Perform binary and file / content checks
+ scan_patch(project, patch_file, binary_list, binary_project_list,
+ file_audit_list, file_audit_project_list,
+ file_content_list, file_content_project_list, licence_ext,
+ licence_ignore)
+
+ # Process each file in patch set using waivers generated above
+ # Process final result
+ process_failure()
+
+
+def scan_patch(project, patch_file, binary_list, binary_project_list,
+ file_audit_list, file_audit_project_list, file_content_list,
+ file_content_project_list, licence_ext, licence_ignore):
+ """ Scan actions for each commited file in patch set """
+ global failure
+ if is_binary(patch_file):
+ if not binary_list.search(patch_file) and not binary_project_list\
+ .search(patch_file):
+ logger.error('Non Whitelisted Binary file: {0}'.
+ format(patch_file))
+ failure = True
+ with open(reports_dir + "binaries-" + project + ".log", "a") \
+ as gate_report:
+ gate_report.write('Non Whitelisted Binary file: {0}\n'.
+ format(patch_file))
+ else:
+ # Check file names / extensions
+ if file_audit_list.search(patch_file) and not \
+ file_audit_project_list.search(patch_file):
+ match = file_audit_list.search(patch_file)
+ logger.error('Blacklisted file: {0}'.
+ format(patch_file))
+ logger.error('Matched String: {0}'.
+ format(match.group()))
+ failure = True
+ with open(reports_dir + "file-names_" + project + ".log", "a") \
+ as gate_report:
+ gate_report.write('Blacklisted file: {0}\n'.
+ format(patch_file))
+ gate_report.write('Matched String: {0}'.
+ format(match.group()))
+
+ # Open file to check for blacklisted content
+ fo = open(patch_file, 'r')
+ lines = fo.readlines()
+
+ for line in lines:
+ if file_content_list.search(line) and not \
+ file_content_project_list.search(line):
+ match = file_content_list.search(line)
+ logger.error('File contains violation: {0}'.
+ format(patch_file))
+ logger.error('Flagged Content: {0}'.
+ format(line.rstrip()))
+ logger.error('Matched String: {0}'.
+ format(match.group()))
+ failure = True
+ with open(reports_dir + "contents_" + project + ".log",
+ "a") as gate_report:
+ gate_report.write('File contains violation: {0}\n'.
+ format(patch_file))
+ gate_report.write('Flagged Content: {0}'.
+ format(line))
+ gate_report.write('Matched String: {0}\n'.
+ format(match.group()))
+
+ # Run license check
+ licence_check(project, licence_ext, licence_ignore, patch_file)
+
+
+def licence_check(project, licence_ext,
+ licence_ignore, patch_file):
+ """ Performs licence checks """
+ global failure
+ if patch_file.endswith(tuple(licence_ext)) \
+ and patch_file not in licence_ignore:
+ fo = open(patch_file, 'r')
+ content = fo.read()
+ # Note: Hardcoded use of 'copyright' & 'spdx' is the result
+ # of a decision made at 2017 plugfest to limit searches to
+ # just these two strings.
+ if re.search("copyright", content, re.IGNORECASE):
+ logger.info('Contains needed Licence string: {0}'.
+ format(patch_file))
+ elif re.search("spdx", content, re.IGNORECASE):
+ logger.info('Contains needed Licence string: {0}'.
+ format(patch_file))
+ else:
+ logger.error('Licence header missing in file: {0}'.
+ format(patch_file))
+ failure = True
+ with open(reports_dir + "licence-" + project + ".log", "a") \
+ as gate_report:
+ gate_report.write('Licence header missing in file: {0}\n'.
+ format(patch_file))
+
+
+def process_failure():
+ """ If any scan operations register a failure, sys.exit(1) is called
+ to allow jjb to register a failure"""
+ if failure:
+ logger.error('Failures registered')
+ sys.exit(1)
diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py
new file mode 100644
index 0000000..9ab9e17
--- /dev/null
+++ b/anteater/src/project_scan.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+"""
+ Accepts the --path argument and iterates the root directory using os.walk
+ If a file is a binary, or contains a blacklisted string. If any violations
+ are found, the script adds the violation to a log file.
+"""
+
+from __future__ import division, print_function, absolute_import
+import ConfigParser
+import os
+import re
+import anteater.utils.anteater_logger as antlog
+import anteater.src.get_lists as get_lists
+from binaryornot.check import is_binary
+
+logger = antlog.Logger(__name__).getLogger()
+config = ConfigParser.RawConfigParser()
+config.read('anteater.conf')
+reports_dir = config.get('config', 'reports_dir')
+gate_checks = config.get('config', 'gate_checks')
+ignore_dirs = ['.git']
+
+
+def prepare_project(project, project_dir):
+ """ Generates blacklists / whitelists and calls main functions """
+
+ # Get Various Lists / Project Waivers
+ lists = get_lists.GetLists()
+
+ # Get binary white list
+ binary_list, binary_project_list = lists.binary_list(project)
+
+ # Get file name black list and project waivers
+ file_audit_list, file_audit_project_list = lists.file_audit_list(project)
+
+ # Get file content black list and project waivers
+ file_content_list, project_content_list = lists.file_content_list(project)
+
+ # Get Licence Lists
+ licence_ext = lists.licence_extensions()
+ licence_ignore = lists.licence_ignore()
+
+ # Perform rudimentary scans
+ scan_file(project_dir, project, binary_list, binary_project_list,
+ file_audit_list, file_audit_project_list, file_content_list,
+ project_content_list)
+
+ # Perform licence header checks
+ licence_check(licence_ext, licence_ignore, project, project_dir)
+
+
+def scan_file(project_dir, project, binary_list, binary_project_list,
+ file_audit_list, file_audit_project_list, file_content_list,
+ project_content_list):
+ """Searches for banned strings and files that are listed """
+ for root, dirs, files in os.walk(project_dir):
+ # Filter out ignored directories from list.
+ dirs[:] = [d for d in dirs if d not in ignore_dirs]
+ for items in files:
+ full_path = os.path.join(root, items)
+ # Check for Blacklisted file names
+ if file_audit_list.search(full_path) and not \
+ file_audit_project_list.search(full_path):
+ match = file_audit_list.search(full_path)
+ logger.error('Blacklisted filename: {0}'.
+ format(full_path))
+ logger.error('Matched String: {0}'.
+ format(match.group()))
+ with open(reports_dir + "file-names_" + project + ".log",
+ "a") as gate_report:
+ gate_report. \
+ write('Blacklisted filename: {0}\n'.
+ format(full_path))
+ gate_report. \
+ write('Matched String: {0}'.
+ format(match.group()))
+
+ if not is_binary(full_path):
+ fo = open(full_path, 'r')
+ lines = fo.readlines()
+ for line in lines:
+ # Check for sensitive content in project files
+ if file_content_list.search(line) and not \
+ project_content_list.search(line):
+ match = file_content_list.search(line)
+ logger.error('File contains violation: {0}'.
+ format(full_path))
+ logger.error('Flagged Content: {0}'.
+ format(line.rstrip()))
+ logger.error('Matched String: {0}'.
+ format(match.group()))
+ with open(reports_dir + "contents_" + project + ".log",
+ "a") \
+ as gate_report:
+ gate_report. \
+ write('File contains violation: {0}\n'.
+ format(full_path))
+ gate_report. \
+ write('Flagged Content: {0}'.
+ format(line))
+ gate_report. \
+ write('Matched String: {0}\n'.
+ format(match.group()))
+ else:
+ # Check if Binary is whitelisted
+ if not binary_list.search(full_path) \
+ and not binary_project_list.search(full_path):
+ logger.error('Non Whitelisted Binary: {0}'.
+ format(full_path))
+ with open(reports_dir + "binaries-" + project + ".log",
+ "a") \
+ as gate_report:
+ gate_report.write('Non Whitelisted Binary: {0}\n'.
+ format(full_path))
+
+
+def licence_check(licence_ext, licence_ignore, project, project_dir):
+ """ Peform basic checks for the presence of licence strings """
+ for root, dirs, files in os.walk(project_dir):
+ dirs[:] = [d for d in dirs if d not in ignore_dirs]
+ for file in files:
+ if file.endswith(tuple(licence_ext)) \
+ and file not in licence_ignore:
+ full_path = os.path.join(root, file)
+ if not is_binary(full_path):
+ fo = open(full_path, 'r')
+ content = fo.read()
+ # Note: Hardcoded use of 'copyright' & 'spdx' is the result
+ # of a decision made at 2017 plugfest to limit searches to
+ # just these two strings.
+ if re.search("copyright", content, re.IGNORECASE):
+ logger.info('Licence string present: {0}'.
+ format(full_path))
+ elif re.search("spdx", content, re.IGNORECASE):
+ logger.info('Licence string present: {0}'.
+ format(full_path))
+ else:
+ logger.error('Licence header missing: {0}'.
+ format(full_path))
+ with open(reports_dir + "licence-" + project + ".log",
+ "a") \
+ as gate_report:
+ gate_report.write('Licence header missing: {0}\n'.
+ format(full_path))