aboutsummaryrefslogtreecommitdiffstats
path: root/anteater
diff options
context:
space:
mode:
Diffstat (limited to 'anteater')
-rw-r--r--anteater/src/get_lists.py17
-rw-r--r--anteater/src/patch_scan.py38
-rw-r--r--anteater/src/project_scan.py39
3 files changed, 52 insertions, 42 deletions
diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py
index 2419660..fd80a6a 100644
--- a/anteater/src/get_lists.py
+++ b/anteater/src/get_lists.py
@@ -120,24 +120,19 @@ class GetLists(object):
project_list = False
self.load_project_exception_file(yl.get('project_exceptions'), project)
try:
- default_list = set((yl['file_audits']['file_contents']))
+ master_list = (yl['file_audits']['file_contents'])
+
except KeyError:
logger.error('Key Error processing file_contents list values')
+
try:
project_list = set((yl['file_audits'][project]['file_contents']))
+ project_list_re = re.compile("|".join(project_list),
+ flags=re.IGNORECASE)
except KeyError:
logger.info('No file_contents waivers found for %s', project)
- file_contents_re = re.compile("|".join(default_list),
- flags=re.IGNORECASE)
-
- if project_list:
- file_contents_proj_re = re.compile("|".join(project_list),
- flags=re.IGNORECASE)
- return file_contents_re, file_contents_proj_re
- else:
- file_contents_proj_re = re.compile("")
- return file_contents_re, file_contents_proj_re
+ return master_list, project_list_re
def licence_extensions(self):
try:
diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py
index 873c069..9f15556 100644
--- a/anteater/src/patch_scan.py
+++ b/anteater/src/patch_scan.py
@@ -47,8 +47,7 @@ def prepare_patchset(project, patchset):
file_audit_list, file_audit_project_list = lists.file_audit_list(project)
# Get file content black list and project waivers
- file_content_list, \
- file_content_project_list = lists.file_content_list(project)
+ master_list, project_list_re = lists.file_content_list(project)
# Get Licence Lists
licence_ext = lists.licence_extensions()
@@ -67,7 +66,7 @@ def prepare_patchset(project, patchset):
# Perform binary and file / content checks
scan_patch(project, patch_file, binary_list,
file_audit_list, file_audit_project_list,
- file_content_list, file_content_project_list, licence_ext,
+ master_list, project_list_re, licence_ext,
licence_ignore)
# Process each file in patch set using waivers generated above
@@ -76,8 +75,8 @@ def prepare_patchset(project, patchset):
def scan_patch(project, patch_file, binary_list, file_audit_list,
- file_audit_project_list, file_content_list,
- file_content_project_list, licence_ext, licence_ignore):
+ file_audit_project_list, master_list,
+ project_list_re, licence_ext, licence_ignore):
""" Scan actions for each commited file in patch set """
global failure
if is_binary(patch_file):
@@ -116,16 +115,22 @@ def scan_patch(project, patch_file, binary_list, file_audit_list,
format(match.group()))
# Open file to check for blacklisted content
- fo = open(patch_file, 'r')
- lines = fo.readlines()
+ try:
+ fo = open(patch_file, 'r')
+ lines = fo.readlines()
+ except IOError:
+ logger.error('%s does not exist', patch_file)
+ sys.exit(1)
for line in lines:
- if file_content_list.search(line) and not \
- file_content_project_list.search(line):
- match = file_content_list.search(line)
- logger.error('File contains violation: %s', patch_file)
- logger.error('Flagged Content: %s', line.rstrip())
- logger.error('Matched String: %s', match.group())
+ for key, value in master_list.iteritems():
+ regex = value['regex']
+ desc = value['desc']
+ if re.search(regex, line) and not re.search(project_list_re, line):
+ logger.error('File contains violation: %s', patch_file)
+ logger.error('Flagged Content: %s', line.rstrip())
+ logger.error('Matched Regular Exp: %s', regex)
+ logger.error('Rationale: %s', desc.rstrip())
failure = True
with open(reports_dir + "contents_" + project + ".log",
"a") as gate_report:
@@ -133,9 +138,10 @@ def scan_patch(project, patch_file, binary_list, file_audit_list,
format(patch_file))
gate_report.write('Flagged Content: {0}'.
format(line))
- gate_report.write('Matched String: {0}\n'.
- format(match.group()))
-
+ gate_report.write('Matched Regular Exp: {0}'.
+ format(regex))
+ gate_report.write('Rationale: {0}'.
+ format(desc.rstrip()))
# Run license check
licence_check(project, licence_ext, licence_ignore, patch_file)
diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py
index 5ac8b10..3c37621 100644
--- a/anteater/src/project_scan.py
+++ b/anteater/src/project_scan.py
@@ -47,7 +47,7 @@ def prepare_project(project, project_dir):
file_audit_list, file_audit_project_list = lists.file_audit_list(project)
# Get file content black list and project waivers
- file_content_list, project_content_list = lists.file_content_list(project)
+ master_list, project_list = lists.file_content_list(project)
# Get Licence Lists
licence_ext = lists.licence_extensions()
@@ -55,8 +55,8 @@ def prepare_project(project, project_dir):
# Perform rudimentary scans
scan_file(project_dir, project, binary_list,file_audit_list,
- file_audit_project_list, file_content_list,
- project_content_list)
+ file_audit_project_list, master_list,
+ project_list)
# Perform licence header checks
licence_check(licence_ext, licence_ignore, project, project_dir)
@@ -64,8 +64,8 @@ def prepare_project(project, project_dir):
def scan_file(project_dir, project, binary_list, file_audit_list,
- file_audit_project_list, file_content_list,
- project_content_list):
+ file_audit_project_list, master_list,
+ project_list):
"""Searches for banned strings and files that are listed """
for root, dirs, files in os.walk(project_dir):
# Filter out ignored directories from list.
@@ -88,16 +88,22 @@ def scan_file(project_dir, project, binary_list, file_audit_list,
format(match.group()))
if not is_binary(full_path):
- fo = open(full_path, 'r')
- lines = fo.readlines()
+ try:
+ fo = open(full_path, 'r')
+ lines = fo.readlines()
+ except IOError:
+ logger.error('%s does not exist', full_path)
+
for line in lines:
# Check for sensitive content in project files
- if file_content_list.search(line) and not \
- project_content_list.search(line):
- match = file_content_list.search(line)
- logger.error('File contains violation: %s', full_path)
- logger.error('Flagged Content: %s', line.rstrip())
- logger.error('Matched String: %s', match.group())
+ for key, value in master_list.iteritems():
+ regex = value['regex']
+ desc = value['desc']
+ if re.search(regex, line) and not re.search(project_list, line):
+ logger.error('File contains violation: %s', full_path)
+ logger.error('Flagged Content: %s', line.rstrip())
+ logger.error('Matched Regular Exp: %s', regex)
+ logger.error('Rationale: %s', desc.rstrip())
with open(reports_dir + "contents-" + project + ".log",
"a") \
as gate_report:
@@ -108,8 +114,11 @@ def scan_file(project_dir, project, binary_list, file_audit_list,
write('Flagged Content: {0}'.
format(line))
gate_report. \
- write('Matched String: {0}\n'.
- format(match.group()))
+ write('Matched Regular Exp: {0}'.
+ format(regex))
+ gate_report. \
+ write('Rationale: {0}\n'.
+ format(desc.rstrip()))
else:
# Check if Binary is whitelisted
hashlist = get_lists.GetLists()