diff options
author | lhinds <lhinds@redhat.com> | 2017-05-17 13:31:18 +0100 |
---|---|---|
committer | lhinds <lhinds@redhat.com> | 2017-05-22 14:12:27 +0100 |
commit | 0142c227fca974fb65561d0aeb9b38c8683e22e6 (patch) | |
tree | f802b60e2ceab8b033212568d3adddc754faa7da /anteater/src/project_scan.py | |
parent | 9c00ca00dcad5624288de38e0a529f7f7b3915db (diff) |
Initial code push of Anteater
Likely far to much to cover in a commit msg.
Main bulk is the Anteater code itself, alongside
packaging requirements and build tools and Dockerfile.
Unit tests are planned as a follow up, so pushing this for now
so that efforts can get underway to integrate the tool with
jjb.
Questions on how it works, please reach me in IRC.
Change-Id: I2cd3cae391f8bf2cdc91b39c56dfc4833a1c4913
Signed-off-by: lhinds <lhinds@redhat.com>
Diffstat (limited to 'anteater/src/project_scan.py')
-rw-r--r-- | anteater/src/project_scan.py | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py new file mode 100644 index 0000000..9ab9e17 --- /dev/null +++ b/anteater/src/project_scan.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" + Accepts the --path argument and iterates the root directory using os.walk + If a file is a binary, or contains a blacklisted string. If any violations + are found, the script adds the violation to a log file. +""" + +from __future__ import division, print_function, absolute_import +import ConfigParser +import os +import re +import anteater.utils.anteater_logger as antlog +import anteater.src.get_lists as get_lists +from binaryornot.check import is_binary + +logger = antlog.Logger(__name__).getLogger() +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +reports_dir = config.get('config', 'reports_dir') +gate_checks = config.get('config', 'gate_checks') +ignore_dirs = ['.git'] + + +def prepare_project(project, project_dir): + """ Generates blacklists / whitelists and calls main functions """ + + # Get Various Lists / Project Waivers + lists = get_lists.GetLists() + + # Get binary white list + binary_list, binary_project_list = lists.binary_list(project) + + # Get file name black list and project waivers + file_audit_list, file_audit_project_list = lists.file_audit_list(project) + + # Get file content black list and project waivers + file_content_list, project_content_list = lists.file_content_list(project) + + # Get Licence Lists + licence_ext = lists.licence_extensions() + licence_ignore = lists.licence_ignore() + + # Perform rudimentary scans + scan_file(project_dir, project, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + project_content_list) + + # Perform licence header checks + licence_check(licence_ext, licence_ignore, project, project_dir) + + +def scan_file(project_dir, project, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + project_content_list): + """Searches for banned strings and files that are listed """ + for root, dirs, files in os.walk(project_dir): + # Filter out ignored directories from list. + dirs[:] = [d for d in dirs if d not in ignore_dirs] + for items in files: + full_path = os.path.join(root, items) + # Check for Blacklisted file names + if file_audit_list.search(full_path) and not \ + file_audit_project_list.search(full_path): + match = file_audit_list.search(full_path) + logger.error('Blacklisted filename: {0}'. + format(full_path)) + logger.error('Matched String: {0}'. + format(match.group())) + with open(reports_dir + "file-names_" + project + ".log", + "a") as gate_report: + gate_report. \ + write('Blacklisted filename: {0}\n'. + format(full_path)) + gate_report. \ + write('Matched String: {0}'. + format(match.group())) + + if not is_binary(full_path): + fo = open(full_path, 'r') + lines = fo.readlines() + for line in lines: + # Check for sensitive content in project files + if file_content_list.search(line) and not \ + project_content_list.search(line): + match = file_content_list.search(line) + logger.error('File contains violation: {0}'. + format(full_path)) + logger.error('Flagged Content: {0}'. + format(line.rstrip())) + logger.error('Matched String: {0}'. + format(match.group())) + with open(reports_dir + "contents_" + project + ".log", + "a") \ + as gate_report: + gate_report. \ + write('File contains violation: {0}\n'. + format(full_path)) + gate_report. \ + write('Flagged Content: {0}'. + format(line)) + gate_report. \ + write('Matched String: {0}\n'. + format(match.group())) + else: + # Check if Binary is whitelisted + if not binary_list.search(full_path) \ + and not binary_project_list.search(full_path): + logger.error('Non Whitelisted Binary: {0}'. + format(full_path)) + with open(reports_dir + "binaries-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Non Whitelisted Binary: {0}\n'. + format(full_path)) + + +def licence_check(licence_ext, licence_ignore, project, project_dir): + """ Peform basic checks for the presence of licence strings """ + for root, dirs, files in os.walk(project_dir): + dirs[:] = [d for d in dirs if d not in ignore_dirs] + for file in files: + if file.endswith(tuple(licence_ext)) \ + and file not in licence_ignore: + full_path = os.path.join(root, file) + if not is_binary(full_path): + fo = open(full_path, 'r') + content = fo.read() + # Note: Hardcoded use of 'copyright' & 'spdx' is the result + # of a decision made at 2017 plugfest to limit searches to + # just these two strings. + if re.search("copyright", content, re.IGNORECASE): + logger.info('Licence string present: {0}'. + format(full_path)) + elif re.search("spdx", content, re.IGNORECASE): + logger.info('Licence string present: {0}'. + format(full_path)) + else: + logger.error('Licence header missing: {0}'. + format(full_path)) + with open(reports_dir + "licence-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Licence header missing: {0}\n'. + format(full_path)) |