From 0142c227fca974fb65561d0aeb9b38c8683e22e6 Mon Sep 17 00:00:00 2001 From: lhinds Date: Wed, 17 May 2017 13:31:18 +0100 Subject: Initial code push of Anteater Likely far to much to cover in a commit msg. Main bulk is the Anteater code itself, alongside packaging requirements and build tools and Dockerfile. Unit tests are planned as a follow up, so pushing this for now so that efforts can get underway to integrate the tool with jjb. Questions on how it works, please reach me in IRC. Change-Id: I2cd3cae391f8bf2cdc91b39c56dfc4833a1c4913 Signed-off-by: lhinds --- anteater/src/project_scan.py | 154 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 anteater/src/project_scan.py (limited to 'anteater/src/project_scan.py') diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py new file mode 100644 index 0000000..9ab9e17 --- /dev/null +++ b/anteater/src/project_scan.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +############################################################################## +# Copyright (c) 2017 Luke Hinds , Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" + Accepts the --path argument and iterates the root directory using os.walk + If a file is a binary, or contains a blacklisted string. If any violations + are found, the script adds the violation to a log file. +""" + +from __future__ import division, print_function, absolute_import +import ConfigParser +import os +import re +import anteater.utils.anteater_logger as antlog +import anteater.src.get_lists as get_lists +from binaryornot.check import is_binary + +logger = antlog.Logger(__name__).getLogger() +config = ConfigParser.RawConfigParser() +config.read('anteater.conf') +reports_dir = config.get('config', 'reports_dir') +gate_checks = config.get('config', 'gate_checks') +ignore_dirs = ['.git'] + + +def prepare_project(project, project_dir): + """ Generates blacklists / whitelists and calls main functions """ + + # Get Various Lists / Project Waivers + lists = get_lists.GetLists() + + # Get binary white list + binary_list, binary_project_list = lists.binary_list(project) + + # Get file name black list and project waivers + file_audit_list, file_audit_project_list = lists.file_audit_list(project) + + # Get file content black list and project waivers + file_content_list, project_content_list = lists.file_content_list(project) + + # Get Licence Lists + licence_ext = lists.licence_extensions() + licence_ignore = lists.licence_ignore() + + # Perform rudimentary scans + scan_file(project_dir, project, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + project_content_list) + + # Perform licence header checks + licence_check(licence_ext, licence_ignore, project, project_dir) + + +def scan_file(project_dir, project, binary_list, binary_project_list, + file_audit_list, file_audit_project_list, file_content_list, + project_content_list): + """Searches for banned strings and files that are listed """ + for root, dirs, files in os.walk(project_dir): + # Filter out ignored directories from list. + dirs[:] = [d for d in dirs if d not in ignore_dirs] + for items in files: + full_path = os.path.join(root, items) + # Check for Blacklisted file names + if file_audit_list.search(full_path) and not \ + file_audit_project_list.search(full_path): + match = file_audit_list.search(full_path) + logger.error('Blacklisted filename: {0}'. + format(full_path)) + logger.error('Matched String: {0}'. + format(match.group())) + with open(reports_dir + "file-names_" + project + ".log", + "a") as gate_report: + gate_report. \ + write('Blacklisted filename: {0}\n'. + format(full_path)) + gate_report. \ + write('Matched String: {0}'. + format(match.group())) + + if not is_binary(full_path): + fo = open(full_path, 'r') + lines = fo.readlines() + for line in lines: + # Check for sensitive content in project files + if file_content_list.search(line) and not \ + project_content_list.search(line): + match = file_content_list.search(line) + logger.error('File contains violation: {0}'. + format(full_path)) + logger.error('Flagged Content: {0}'. + format(line.rstrip())) + logger.error('Matched String: {0}'. + format(match.group())) + with open(reports_dir + "contents_" + project + ".log", + "a") \ + as gate_report: + gate_report. \ + write('File contains violation: {0}\n'. + format(full_path)) + gate_report. \ + write('Flagged Content: {0}'. + format(line)) + gate_report. \ + write('Matched String: {0}\n'. + format(match.group())) + else: + # Check if Binary is whitelisted + if not binary_list.search(full_path) \ + and not binary_project_list.search(full_path): + logger.error('Non Whitelisted Binary: {0}'. + format(full_path)) + with open(reports_dir + "binaries-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Non Whitelisted Binary: {0}\n'. + format(full_path)) + + +def licence_check(licence_ext, licence_ignore, project, project_dir): + """ Peform basic checks for the presence of licence strings """ + for root, dirs, files in os.walk(project_dir): + dirs[:] = [d for d in dirs if d not in ignore_dirs] + for file in files: + if file.endswith(tuple(licence_ext)) \ + and file not in licence_ignore: + full_path = os.path.join(root, file) + if not is_binary(full_path): + fo = open(full_path, 'r') + content = fo.read() + # Note: Hardcoded use of 'copyright' & 'spdx' is the result + # of a decision made at 2017 plugfest to limit searches to + # just these two strings. + if re.search("copyright", content, re.IGNORECASE): + logger.info('Licence string present: {0}'. + format(full_path)) + elif re.search("spdx", content, re.IGNORECASE): + logger.info('Licence string present: {0}'. + format(full_path)) + else: + logger.error('Licence header missing: {0}'. + format(full_path)) + with open(reports_dir + "licence-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Licence header missing: {0}\n'. + format(full_path)) -- cgit 1.2.3-korg