aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlhinds <lhinds@redhat.com>2017-09-14 15:39:48 +0100
committerlhinds <lhinds@redhat.com>2017-09-14 15:43:38 +0100
commit5441bf6d822ed743b756af148a9ea74024c795a0 (patch)
treee4f4549523101cb03621552c5b9884f44b88058b
parentb3c29549dd5d1048604f45222981d343d0dde09f (diff)
Introduce ignore list for content scans
Anteater was reporting fails on files which are documents and so harmless (for example an rst file) This patch introduces a file_ignore list in master_list.yaml Change-Id: I87c73c80a36114a7df9e1da47d89ca14e3bf668a Signed-off-by: lhinds <lhinds@redhat.com>
-rw-r--r--anteater/src/get_lists.py8
-rw-r--r--anteater/src/patch_scan.py9
-rw-r--r--anteater/src/project_scan.py111
-rw-r--r--master_list.yaml4
4 files changed, 76 insertions, 56 deletions
diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py
index fd80a6a..17de7cb 100644
--- a/anteater/src/get_lists.py
+++ b/anteater/src/get_lists.py
@@ -87,7 +87,6 @@ class GetLists(object):
binary_hash = (yl['binaries'][project][file_name])
return binary_hash
except KeyError:
- logger.info('No checksum entries found for %s', file_name)
binary_hash = 'null'
return binary_hash
@@ -134,6 +133,13 @@ class GetLists(object):
return master_list, project_list_re
+ def file_ignore(self):
+ try:
+ file_ignore = (yl['file_ignore'])
+ except KeyError:
+ logger.error('Key Error processing file_ignore list values')
+ return file_ignore
+
def licence_extensions(self):
try:
licence_extensions = (yl['licence']['licence_ext'])
diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py
index 083639f..3b71f0a 100644
--- a/anteater/src/patch_scan.py
+++ b/anteater/src/patch_scan.py
@@ -49,6 +49,9 @@ def prepare_patchset(project, patchset):
# Get file content black list and project waivers
master_list, project_list_re = lists.file_content_list(project)
+ # Get File Ignore Lists
+ file_ignore = lists.file_ignore()
+
# Get Licence Lists
licence_ext = lists.licence_extensions()
licence_ignore = lists.licence_ignore()
@@ -67,7 +70,7 @@ def prepare_patchset(project, patchset):
scan_patch(project, patch_file, binary_list,
file_audit_list, file_audit_project_list,
master_list, project_list_re, licence_ext,
- licence_ignore)
+ file_ignore, licence_ignore)
# Process each file in patch set using waivers generated above
# Process final result
@@ -76,7 +79,7 @@ def prepare_patchset(project, patchset):
def scan_patch(project, patch_file, binary_list, file_audit_list,
file_audit_project_list, master_list,
- project_list_re, licence_ext, licence_ignore):
+ project_list_re, licence_ext, file_ignore, licence_ignore):
""" Scan actions for each commited file in patch set """
global failure
if is_binary(patch_file):
@@ -122,7 +125,7 @@ def scan_patch(project, patch_file, binary_list, file_audit_list,
except IOError:
file_exists = False
- if file_exists:
+ if file_exists and not patch_file.endswith(tuple(file_ignore)):
for line in lines:
for key, value in master_list.iteritems():
regex = value['regex']
diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py
index 411e47f..12e9a97 100644
--- a/anteater/src/project_scan.py
+++ b/anteater/src/project_scan.py
@@ -49,13 +49,16 @@ def prepare_project(project, project_dir):
# Get file content black list and project waivers
master_list, project_list = lists.file_content_list(project)
+ # Get File Ignore Lists
+ file_ignore = lists.file_ignore()
+
# Get Licence Lists
licence_ext = lists.licence_extensions()
licence_ignore = lists.licence_ignore()
# Perform rudimentary scans
scan_file(project_dir, project, binary_list,file_audit_list,
- file_audit_project_list, master_list,
+ file_audit_project_list, master_list, file_ignore,
project_list)
# Perform licence header checks
@@ -64,7 +67,7 @@ def prepare_project(project, project_dir):
def scan_file(project_dir, project, binary_list, file_audit_list,
- file_audit_project_list, master_list,
+ file_audit_project_list, master_list, file_ignore,
project_list):
"""Searches for banned strings and files that are listed """
for root, dirs, files in os.walk(project_dir):
@@ -87,60 +90,64 @@ def scan_file(project_dir, project, binary_list, file_audit_list,
write('Matched String: {0}'.
format(match.group()))
- if not is_binary(full_path):
- try:
- fo = open(full_path, 'r')
- lines = fo.readlines()
- except IOError:
- logger.error('%s does not exist', full_path)
-
- for line in lines:
- # Check for sensitive content in project files
- for key, value in master_list.iteritems():
- regex = value['regex']
- desc = value['desc']
- if re.search(regex, line) and not re.search(project_list, line):
- logger.error('File contains violation: %s', full_path)
- logger.error('Flagged Content: %s', line.rstrip())
- logger.error('Matched Regular Exp: %s', regex)
- logger.error('Rationale: %s', desc.rstrip())
- with open(reports_dir + "contents-" + project + ".log",
- "a") \
- as gate_report:
- gate_report. \
- write('File contains violation: {0}\n'.
- format(full_path))
- gate_report. \
- write('Flagged Content: {0}'.
- format(line))
- gate_report. \
- write('Matched Regular Exp: {0}'.
- format(regex))
- gate_report. \
- write('Rationale: {0}\n'.
- format(desc.rstrip()))
- else:
- # Check if Binary is whitelisted
- hashlist = get_lists.GetLists()
- binary_hash = hashlist.binary_hash(project, full_path)
- if not binary_list.search(full_path):
- with open(full_path, 'rb') as afile:
- buf = afile.read()
- hasher.update(buf)
- if hasher.hexdigest() in binary_hash:
- logger.info('Found matching file hash for file: %s',
+ # Check if Binary is whitelisted
+ hashlist = get_lists.GetLists()
+ binary_hash = hashlist.binary_hash(project, full_path)
+ if is_binary(full_path) and not binary_list.search(full_path):
+ with open(full_path, 'rb') as afile:
+ buf = afile.read()
+ hasher.update(buf)
+ if hasher.hexdigest() in binary_hash:
+ logger.info('Found matching file hash for file: %s',
full_path)
- else:
- logger.error('Non Whitelisted Binary file: %s',
- full_path)
- logger.error('Please submit patch with this hash: %s',
- hasher.hexdigest())
- with open(reports_dir + "binaries-" + project + ".log",
- "a") \
- as gate_report:
+ else:
+ logger.error('Non Whitelisted Binary file: %s',
+ full_path)
+ logger.error('Please submit patch with this hash: %s',
+ hasher.hexdigest())
+ with open(reports_dir + "binaries-" + project + ".log",
+ "a") as gate_report:
gate_report.write('Non Whitelisted Binary: {0}\n'.
format(full_path))
+ else:
+ if not items.endswith(tuple(file_ignore)):
+ try:
+ fo = open(full_path, 'r')
+ lines = fo.readlines()
+ except IOError:
+ logger.error('%s does not exist', full_path)
+
+ for line in lines:
+ # Check for sensitive content in project files
+ for key, value in master_list.iteritems():
+ regex = value['regex']
+ desc = value['desc']
+ if re.search(regex, line) and not re.search(
+ project_list, line):
+ logger.error('File contains violation: %s',
+ full_path)
+ logger.error('Flagged Content: %s',
+ line.rstrip())
+ logger.error('Matched Regular Exp: %s', regex)
+ logger.error('Rationale: %s', desc.rstrip())
+ with open(reports_dir + "contents-" + project
+ + ".log", "a") \
+ as gate_report:
+ gate_report. \
+ write('File contains violation: {0}\n'.
+ format(full_path))
+ gate_report. \
+ write('Flagged Content: {0}'.
+ format(line))
+ gate_report. \
+ write('Matched Regular Exp: {0}'.
+ format(regex))
+ gate_report. \
+ write('Rationale: {0}\n'.
+ format(desc.rstrip()))
+
+
def licence_root_check(project_dir, project):
if os.path.isfile(project_dir + '/LICENSE'):
diff --git a/master_list.yaml b/master_list.yaml
index 4ee1f4c..4c33835 100644
--- a/master_list.yaml
+++ b/master_list.yaml
@@ -189,6 +189,10 @@ file_audits:
regex: 0\.0\.0\.0
desc: "Interface listening on all addresses - may break security zones"
+file_ignore:
+ - '.rst'
+ - '.md'
+
licence:
licence_ext:
- '.java'