From 5441bf6d822ed743b756af148a9ea74024c795a0 Mon Sep 17 00:00:00 2001 From: lhinds Date: Thu, 14 Sep 2017 15:39:48 +0100 Subject: Introduce ignore list for content scans Anteater was reporting fails on files which are documents and so harmless (for example an rst file) This patch introduces a file_ignore list in master_list.yaml Change-Id: I87c73c80a36114a7df9e1da47d89ca14e3bf668a Signed-off-by: lhinds --- anteater/src/get_lists.py | 8 +++- anteater/src/patch_scan.py | 9 ++-- anteater/src/project_scan.py | 111 +++++++++++++++++++++++-------------------- master_list.yaml | 4 ++ 4 files changed, 76 insertions(+), 56 deletions(-) diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py index fd80a6a..17de7cb 100644 --- a/anteater/src/get_lists.py +++ b/anteater/src/get_lists.py @@ -87,7 +87,6 @@ class GetLists(object): binary_hash = (yl['binaries'][project][file_name]) return binary_hash except KeyError: - logger.info('No checksum entries found for %s', file_name) binary_hash = 'null' return binary_hash @@ -134,6 +133,13 @@ class GetLists(object): return master_list, project_list_re + def file_ignore(self): + try: + file_ignore = (yl['file_ignore']) + except KeyError: + logger.error('Key Error processing file_ignore list values') + return file_ignore + def licence_extensions(self): try: licence_extensions = (yl['licence']['licence_ext']) diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py index 083639f..3b71f0a 100644 --- a/anteater/src/patch_scan.py +++ b/anteater/src/patch_scan.py @@ -49,6 +49,9 @@ def prepare_patchset(project, patchset): # Get file content black list and project waivers master_list, project_list_re = lists.file_content_list(project) + # Get File Ignore Lists + file_ignore = lists.file_ignore() + # Get Licence Lists licence_ext = lists.licence_extensions() licence_ignore = lists.licence_ignore() @@ -67,7 +70,7 @@ def prepare_patchset(project, patchset): scan_patch(project, patch_file, binary_list, file_audit_list, file_audit_project_list, master_list, project_list_re, licence_ext, - licence_ignore) + file_ignore, licence_ignore) # Process each file in patch set using waivers generated above # Process final result @@ -76,7 +79,7 @@ def prepare_patchset(project, patchset): def scan_patch(project, patch_file, binary_list, file_audit_list, file_audit_project_list, master_list, - project_list_re, licence_ext, licence_ignore): + project_list_re, licence_ext, file_ignore, licence_ignore): """ Scan actions for each commited file in patch set """ global failure if is_binary(patch_file): @@ -122,7 +125,7 @@ def scan_patch(project, patch_file, binary_list, file_audit_list, except IOError: file_exists = False - if file_exists: + if file_exists and not patch_file.endswith(tuple(file_ignore)): for line in lines: for key, value in master_list.iteritems(): regex = value['regex'] diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py index 411e47f..12e9a97 100644 --- a/anteater/src/project_scan.py +++ b/anteater/src/project_scan.py @@ -49,13 +49,16 @@ def prepare_project(project, project_dir): # Get file content black list and project waivers master_list, project_list = lists.file_content_list(project) + # Get File Ignore Lists + file_ignore = lists.file_ignore() + # Get Licence Lists licence_ext = lists.licence_extensions() licence_ignore = lists.licence_ignore() # Perform rudimentary scans scan_file(project_dir, project, binary_list,file_audit_list, - file_audit_project_list, master_list, + file_audit_project_list, master_list, file_ignore, project_list) # Perform licence header checks @@ -64,7 +67,7 @@ def prepare_project(project, project_dir): def scan_file(project_dir, project, binary_list, file_audit_list, - file_audit_project_list, master_list, + file_audit_project_list, master_list, file_ignore, project_list): """Searches for banned strings and files that are listed """ for root, dirs, files in os.walk(project_dir): @@ -87,60 +90,64 @@ def scan_file(project_dir, project, binary_list, file_audit_list, write('Matched String: {0}'. format(match.group())) - if not is_binary(full_path): - try: - fo = open(full_path, 'r') - lines = fo.readlines() - except IOError: - logger.error('%s does not exist', full_path) - - for line in lines: - # Check for sensitive content in project files - for key, value in master_list.iteritems(): - regex = value['regex'] - desc = value['desc'] - if re.search(regex, line) and not re.search(project_list, line): - logger.error('File contains violation: %s', full_path) - logger.error('Flagged Content: %s', line.rstrip()) - logger.error('Matched Regular Exp: %s', regex) - logger.error('Rationale: %s', desc.rstrip()) - with open(reports_dir + "contents-" + project + ".log", - "a") \ - as gate_report: - gate_report. \ - write('File contains violation: {0}\n'. - format(full_path)) - gate_report. \ - write('Flagged Content: {0}'. - format(line)) - gate_report. \ - write('Matched Regular Exp: {0}'. - format(regex)) - gate_report. \ - write('Rationale: {0}\n'. - format(desc.rstrip())) - else: - # Check if Binary is whitelisted - hashlist = get_lists.GetLists() - binary_hash = hashlist.binary_hash(project, full_path) - if not binary_list.search(full_path): - with open(full_path, 'rb') as afile: - buf = afile.read() - hasher.update(buf) - if hasher.hexdigest() in binary_hash: - logger.info('Found matching file hash for file: %s', + # Check if Binary is whitelisted + hashlist = get_lists.GetLists() + binary_hash = hashlist.binary_hash(project, full_path) + if is_binary(full_path) and not binary_list.search(full_path): + with open(full_path, 'rb') as afile: + buf = afile.read() + hasher.update(buf) + if hasher.hexdigest() in binary_hash: + logger.info('Found matching file hash for file: %s', full_path) - else: - logger.error('Non Whitelisted Binary file: %s', - full_path) - logger.error('Please submit patch with this hash: %s', - hasher.hexdigest()) - with open(reports_dir + "binaries-" + project + ".log", - "a") \ - as gate_report: + else: + logger.error('Non Whitelisted Binary file: %s', + full_path) + logger.error('Please submit patch with this hash: %s', + hasher.hexdigest()) + with open(reports_dir + "binaries-" + project + ".log", + "a") as gate_report: gate_report.write('Non Whitelisted Binary: {0}\n'. format(full_path)) + else: + if not items.endswith(tuple(file_ignore)): + try: + fo = open(full_path, 'r') + lines = fo.readlines() + except IOError: + logger.error('%s does not exist', full_path) + + for line in lines: + # Check for sensitive content in project files + for key, value in master_list.iteritems(): + regex = value['regex'] + desc = value['desc'] + if re.search(regex, line) and not re.search( + project_list, line): + logger.error('File contains violation: %s', + full_path) + logger.error('Flagged Content: %s', + line.rstrip()) + logger.error('Matched Regular Exp: %s', regex) + logger.error('Rationale: %s', desc.rstrip()) + with open(reports_dir + "contents-" + project + + ".log", "a") \ + as gate_report: + gate_report. \ + write('File contains violation: {0}\n'. + format(full_path)) + gate_report. \ + write('Flagged Content: {0}'. + format(line)) + gate_report. \ + write('Matched Regular Exp: {0}'. + format(regex)) + gate_report. \ + write('Rationale: {0}\n'. + format(desc.rstrip())) + + def licence_root_check(project_dir, project): if os.path.isfile(project_dir + '/LICENSE'): diff --git a/master_list.yaml b/master_list.yaml index 4ee1f4c..4c33835 100644 --- a/master_list.yaml +++ b/master_list.yaml @@ -189,6 +189,10 @@ file_audits: regex: 0\.0\.0\.0 desc: "Interface listening on all addresses - may break security zones" +file_ignore: + - '.rst' + - '.md' + licence: licence_ext: - '.java' -- cgit 1.2.3-korg