diff options
Diffstat (limited to 'anteater')
-rw-r--r-- | anteater/src/get_lists.py | 17 | ||||
-rw-r--r-- | anteater/src/patch_scan.py | 38 | ||||
-rw-r--r-- | anteater/src/project_scan.py | 39 |
3 files changed, 52 insertions, 42 deletions
diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py index 2419660..fd80a6a 100644 --- a/anteater/src/get_lists.py +++ b/anteater/src/get_lists.py @@ -120,24 +120,19 @@ class GetLists(object): project_list = False self.load_project_exception_file(yl.get('project_exceptions'), project) try: - default_list = set((yl['file_audits']['file_contents'])) + master_list = (yl['file_audits']['file_contents']) + except KeyError: logger.error('Key Error processing file_contents list values') + try: project_list = set((yl['file_audits'][project]['file_contents'])) + project_list_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) except KeyError: logger.info('No file_contents waivers found for %s', project) - file_contents_re = re.compile("|".join(default_list), - flags=re.IGNORECASE) - - if project_list: - file_contents_proj_re = re.compile("|".join(project_list), - flags=re.IGNORECASE) - return file_contents_re, file_contents_proj_re - else: - file_contents_proj_re = re.compile("") - return file_contents_re, file_contents_proj_re + return master_list, project_list_re def licence_extensions(self): try: diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py index 873c069..9f15556 100644 --- a/anteater/src/patch_scan.py +++ b/anteater/src/patch_scan.py @@ -47,8 +47,7 @@ def prepare_patchset(project, patchset): file_audit_list, file_audit_project_list = lists.file_audit_list(project) # Get file content black list and project waivers - file_content_list, \ - file_content_project_list = lists.file_content_list(project) + master_list, project_list_re = lists.file_content_list(project) # Get Licence Lists licence_ext = lists.licence_extensions() @@ -67,7 +66,7 @@ def prepare_patchset(project, patchset): # Perform binary and file / content checks scan_patch(project, patch_file, binary_list, file_audit_list, file_audit_project_list, - file_content_list, file_content_project_list, licence_ext, + master_list, project_list_re, licence_ext, licence_ignore) # Process each file in patch set using waivers generated above @@ -76,8 +75,8 @@ def prepare_patchset(project, patchset): def scan_patch(project, patch_file, binary_list, file_audit_list, - file_audit_project_list, file_content_list, - file_content_project_list, licence_ext, licence_ignore): + file_audit_project_list, master_list, + project_list_re, licence_ext, licence_ignore): """ Scan actions for each commited file in patch set """ global failure if is_binary(patch_file): @@ -116,16 +115,22 @@ def scan_patch(project, patch_file, binary_list, file_audit_list, format(match.group())) # Open file to check for blacklisted content - fo = open(patch_file, 'r') - lines = fo.readlines() + try: + fo = open(patch_file, 'r') + lines = fo.readlines() + except IOError: + logger.error('%s does not exist', patch_file) + sys.exit(1) for line in lines: - if file_content_list.search(line) and not \ - file_content_project_list.search(line): - match = file_content_list.search(line) - logger.error('File contains violation: %s', patch_file) - logger.error('Flagged Content: %s', line.rstrip()) - logger.error('Matched String: %s', match.group()) + for key, value in master_list.iteritems(): + regex = value['regex'] + desc = value['desc'] + if re.search(regex, line) and not re.search(project_list_re, line): + logger.error('File contains violation: %s', patch_file) + logger.error('Flagged Content: %s', line.rstrip()) + logger.error('Matched Regular Exp: %s', regex) + logger.error('Rationale: %s', desc.rstrip()) failure = True with open(reports_dir + "contents_" + project + ".log", "a") as gate_report: @@ -133,9 +138,10 @@ def scan_patch(project, patch_file, binary_list, file_audit_list, format(patch_file)) gate_report.write('Flagged Content: {0}'. format(line)) - gate_report.write('Matched String: {0}\n'. - format(match.group())) - + gate_report.write('Matched Regular Exp: {0}'. + format(regex)) + gate_report.write('Rationale: {0}'. + format(desc.rstrip())) # Run license check licence_check(project, licence_ext, licence_ignore, patch_file) diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py index 5ac8b10..3c37621 100644 --- a/anteater/src/project_scan.py +++ b/anteater/src/project_scan.py @@ -47,7 +47,7 @@ def prepare_project(project, project_dir): file_audit_list, file_audit_project_list = lists.file_audit_list(project) # Get file content black list and project waivers - file_content_list, project_content_list = lists.file_content_list(project) + master_list, project_list = lists.file_content_list(project) # Get Licence Lists licence_ext = lists.licence_extensions() @@ -55,8 +55,8 @@ def prepare_project(project, project_dir): # Perform rudimentary scans scan_file(project_dir, project, binary_list,file_audit_list, - file_audit_project_list, file_content_list, - project_content_list) + file_audit_project_list, master_list, + project_list) # Perform licence header checks licence_check(licence_ext, licence_ignore, project, project_dir) @@ -64,8 +64,8 @@ def prepare_project(project, project_dir): def scan_file(project_dir, project, binary_list, file_audit_list, - file_audit_project_list, file_content_list, - project_content_list): + file_audit_project_list, master_list, + project_list): """Searches for banned strings and files that are listed """ for root, dirs, files in os.walk(project_dir): # Filter out ignored directories from list. @@ -88,16 +88,22 @@ def scan_file(project_dir, project, binary_list, file_audit_list, format(match.group())) if not is_binary(full_path): - fo = open(full_path, 'r') - lines = fo.readlines() + try: + fo = open(full_path, 'r') + lines = fo.readlines() + except IOError: + logger.error('%s does not exist', full_path) + for line in lines: # Check for sensitive content in project files - if file_content_list.search(line) and not \ - project_content_list.search(line): - match = file_content_list.search(line) - logger.error('File contains violation: %s', full_path) - logger.error('Flagged Content: %s', line.rstrip()) - logger.error('Matched String: %s', match.group()) + for key, value in master_list.iteritems(): + regex = value['regex'] + desc = value['desc'] + if re.search(regex, line) and not re.search(project_list, line): + logger.error('File contains violation: %s', full_path) + logger.error('Flagged Content: %s', line.rstrip()) + logger.error('Matched Regular Exp: %s', regex) + logger.error('Rationale: %s', desc.rstrip()) with open(reports_dir + "contents-" + project + ".log", "a") \ as gate_report: @@ -108,8 +114,11 @@ def scan_file(project_dir, project, binary_list, file_audit_list, write('Flagged Content: {0}'. format(line)) gate_report. \ - write('Matched String: {0}\n'. - format(match.group())) + write('Matched Regular Exp: {0}'. + format(regex)) + gate_report. \ + write('Rationale: {0}\n'. + format(desc.rstrip())) else: # Check if Binary is whitelisted hashlist = get_lists.GetLists() |