diff options
-rw-r--r-- | anteater/src/get_lists.py | 17 | ||||
-rw-r--r-- | anteater/src/patch_scan.py | 61 | ||||
-rw-r--r-- | anteater/src/project_scan.py | 59 | ||||
-rw-r--r-- | master_list.yaml | 176 | ||||
-rw-r--r-- | requirements.txt | 2 | ||||
-rwxr-xr-x | utils/get-patch.sh | 85 |
6 files changed, 301 insertions, 99 deletions
diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py index 2419660..fd80a6a 100644 --- a/anteater/src/get_lists.py +++ b/anteater/src/get_lists.py @@ -120,24 +120,19 @@ class GetLists(object): project_list = False self.load_project_exception_file(yl.get('project_exceptions'), project) try: - default_list = set((yl['file_audits']['file_contents'])) + master_list = (yl['file_audits']['file_contents']) + except KeyError: logger.error('Key Error processing file_contents list values') + try: project_list = set((yl['file_audits'][project]['file_contents'])) + project_list_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) except KeyError: logger.info('No file_contents waivers found for %s', project) - file_contents_re = re.compile("|".join(default_list), - flags=re.IGNORECASE) - - if project_list: - file_contents_proj_re = re.compile("|".join(project_list), - flags=re.IGNORECASE) - return file_contents_re, file_contents_proj_re - else: - file_contents_proj_re = re.compile("") - return file_contents_re, file_contents_proj_re + return master_list, project_list_re def licence_extensions(self): try: diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py index 873c069..ba0acdf 100644 --- a/anteater/src/patch_scan.py +++ b/anteater/src/patch_scan.py @@ -47,8 +47,7 @@ def prepare_patchset(project, patchset): file_audit_list, file_audit_project_list = lists.file_audit_list(project) # Get file content black list and project waivers - file_content_list, \ - file_content_project_list = lists.file_content_list(project) + master_list, project_list_re = lists.file_content_list(project) # Get Licence Lists licence_ext = lists.licence_extensions() @@ -67,7 +66,7 @@ def prepare_patchset(project, patchset): # Perform binary and file / content checks scan_patch(project, patch_file, binary_list, file_audit_list, file_audit_project_list, - file_content_list, file_content_project_list, licence_ext, + master_list, project_list_re, licence_ext, licence_ignore) # Process each file in patch set using waivers generated above @@ -76,8 +75,8 @@ def prepare_patchset(project, patchset): def scan_patch(project, patch_file, binary_list, file_audit_list, - file_audit_project_list, file_content_list, - file_content_project_list, licence_ext, licence_ignore): + file_audit_project_list, master_list, + project_list_re, licence_ext, licence_ignore): """ Scan actions for each commited file in patch set """ global failure if is_binary(patch_file): @@ -116,28 +115,36 @@ def scan_patch(project, patch_file, binary_list, file_audit_list, format(match.group())) # Open file to check for blacklisted content - fo = open(patch_file, 'r') - lines = fo.readlines() - - for line in lines: - if file_content_list.search(line) and not \ - file_content_project_list.search(line): - match = file_content_list.search(line) - logger.error('File contains violation: %s', patch_file) - logger.error('Flagged Content: %s', line.rstrip()) - logger.error('Matched String: %s', match.group()) - failure = True - with open(reports_dir + "contents_" + project + ".log", - "a") as gate_report: - gate_report.write('File contains violation: {0}\n'. - format(patch_file)) - gate_report.write('Flagged Content: {0}'. - format(line)) - gate_report.write('Matched String: {0}\n'. - format(match.group())) - - # Run license check - licence_check(project, licence_ext, licence_ignore, patch_file) + try: + fo = open(patch_file, 'r') + lines = fo.readlines() + file_exists = True + except IOError: + file_exists = False + + if file_exists: + for line in lines: + for key, value in master_list.iteritems(): + regex = value['regex'] + desc = value['desc'] + if re.search(regex, line) and not re.search(project_list_re, line): + logger.error('File contains violation: %s', patch_file) + logger.error('Flagged Content: %s', line.rstrip()) + logger.error('Matched Regular Exp: %s', regex) + logger.error('Rationale: %s', desc.rstrip()) + failure = True + with open(reports_dir + "contents_" + project + ".log", + "a") as gate_report: + gate_report.write('File contains violation: {0}\n'. + format(patch_file)) + gate_report.write('Flagged Content: {0}'. + format(line)) + gate_report.write('Matched Regular Exp: {0}'. + format(regex)) + gate_report.write('Rationale: {0}'. + format(desc.rstrip())) + # Run license check + licence_check(project, licence_ext, licence_ignore, patch_file) def licence_check(project, licence_ext, diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py index 5ac8b10..411e47f 100644 --- a/anteater/src/project_scan.py +++ b/anteater/src/project_scan.py @@ -47,7 +47,7 @@ def prepare_project(project, project_dir): file_audit_list, file_audit_project_list = lists.file_audit_list(project) # Get file content black list and project waivers - file_content_list, project_content_list = lists.file_content_list(project) + master_list, project_list = lists.file_content_list(project) # Get Licence Lists licence_ext = lists.licence_extensions() @@ -55,8 +55,8 @@ def prepare_project(project, project_dir): # Perform rudimentary scans scan_file(project_dir, project, binary_list,file_audit_list, - file_audit_project_list, file_content_list, - project_content_list) + file_audit_project_list, master_list, + project_list) # Perform licence header checks licence_check(licence_ext, licence_ignore, project, project_dir) @@ -64,8 +64,8 @@ def prepare_project(project, project_dir): def scan_file(project_dir, project, binary_list, file_audit_list, - file_audit_project_list, file_content_list, - project_content_list): + file_audit_project_list, master_list, + project_list): """Searches for banned strings and files that are listed """ for root, dirs, files in os.walk(project_dir): # Filter out ignored directories from list. @@ -88,28 +88,37 @@ def scan_file(project_dir, project, binary_list, file_audit_list, format(match.group())) if not is_binary(full_path): - fo = open(full_path, 'r') - lines = fo.readlines() + try: + fo = open(full_path, 'r') + lines = fo.readlines() + except IOError: + logger.error('%s does not exist', full_path) + for line in lines: # Check for sensitive content in project files - if file_content_list.search(line) and not \ - project_content_list.search(line): - match = file_content_list.search(line) - logger.error('File contains violation: %s', full_path) - logger.error('Flagged Content: %s', line.rstrip()) - logger.error('Matched String: %s', match.group()) - with open(reports_dir + "contents-" + project + ".log", - "a") \ - as gate_report: - gate_report. \ - write('File contains violation: {0}\n'. - format(full_path)) - gate_report. \ - write('Flagged Content: {0}'. - format(line)) - gate_report. \ - write('Matched String: {0}\n'. - format(match.group())) + for key, value in master_list.iteritems(): + regex = value['regex'] + desc = value['desc'] + if re.search(regex, line) and not re.search(project_list, line): + logger.error('File contains violation: %s', full_path) + logger.error('Flagged Content: %s', line.rstrip()) + logger.error('Matched Regular Exp: %s', regex) + logger.error('Rationale: %s', desc.rstrip()) + with open(reports_dir + "contents-" + project + ".log", + "a") \ + as gate_report: + gate_report. \ + write('File contains violation: {0}\n'. + format(full_path)) + gate_report. \ + write('Flagged Content: {0}'. + format(line)) + gate_report. \ + write('Matched Regular Exp: {0}'. + format(regex)) + gate_report. \ + write('Rationale: {0}\n'. + format(desc.rstrip())) else: # Check if Binary is whitelisted hashlist = get_lists.GetLists() diff --git a/master_list.yaml b/master_list.yaml index c40e138..178dde4 100644 --- a/master_list.yaml +++ b/master_list.yaml @@ -1,7 +1,4 @@ --- -# When adding projects all `arrays: []` sections must have -# a value, Use 'nullvalue' if no waivers are available. -# # This file uses standard regular expression syntax, however be mindful # of escaping YAML delimiters too (such as `:`) using double quotes "". @@ -58,38 +55,147 @@ file_audits: - aws_secret_access_key file_contents: - - -----BEGIN\sRSA\sPRIVATE\sKEY---- - - (password|passwd)(.*:|.*=.*) - - curl - - git.*clone - - dual_ec_drbg - - base64_decode - - gost - - md[245] - - panama - - private_key - - rc4 - - ripemd - - secret - - sha0 - - snefru - - ssh_key - - sslv[12] - - streebog - - tlsv1 - - wget - - run_as_root.*=.*True - - exec\s*(\"|\().+(\"|\)) - - \beval\b - - app\.run\s*\(.*debug.*=.*True.*\) - - autoescape.*=.*False - - safestring\.mark_safe.*\(.*\) - - shell.*=.*True - - \/tmp\/ - - \yaml\.load - - telnet - - ftp - - finger + private_key: + regex: -----BEGIN\sRSA\sPRIVATE\sKEY---- + desc: "This looks like it could be a private key" + + password: + regex: (password|passwd)(.*:|.*=.*) + desc: "Possible hardcoded password" + + curl: + regex: \bcurl\b + desc: "Curl can be used for retrieving objects from untrusted sources" + + clone: + regex: git.*clone + desc: "clone blocked as using an non approved external source" + + dual_ec_drbg: + regex: dual_ec_drbg + desc: "Insecure cryptographic algorithm" + + base64_decode: + regex: base64_decode + desc: "Insecure cryptographic algorithm" + + gost: + regex: gost + desc: "Insecure cryptographic algorithm" + + md245: + regex: md[245] + desc: "Insecure hashing algorithm" + + panama: + regex: panama + desc: "Insecure cryptographic algorithm" + + private_key2: + regex: private_key + desc: "This looks like it could be a private key" + + rc4: + regex: rc4 + desc: "Rivest Cipher 4 is an insecure stream cipher" + + ripemd: + regex: ripemd + desc: | + "RACE Integrity Primitives Evaluation Message Digest + is an insecure hashing algorithm" + + secret: + regex: secret + desc: "Possible leak of sensitive information" + + sha: + regex: sha[01] + desc: "Insecure hashing algorithm" + + snefru: + regex: snefru + desc: "Insecure hashing algorithm" + + ssh_key: + regex: ssh_key + desc: "Possible leak of private SSH key" + + sslv: + regex: sslv[12] + desc: "Insecure SSL Version" + + streebog: + regex: sslv[12] + desc: "Insecure cryptographic hashing algorithm" + + tlsv1: + regex: tlsv1 + desc: "Insecure TLS Version" + + wget: + regex: wget + desc: "WGET is blocked to unknown / untrusted destinations" + + run_as_root: + regex: run_as_root.*=.*True + desc: "Its better to use sudo or a rootwrapper" + + exec: + regex: \sexec\s*(\"|\().+(\"|\)) + desc: "Exec can be dangerous when used with arbitrary, untrusted code." + + eval: + regex: \beval\b + desc: "Eval can be dangerous when used with arbitrary, untrusted code." + + apprun: + regex: app\.run\s*\(.*debug.*=.*True.*\) + desc: | + "Running flask in debug mode can give away sensitive data on a + systems configuration" + + autoescape: + regex: autoescape.*=.*False + desc: | + "Without escaping HTML input an application becomes + vulnerable to Cross Site Scripting (XSS) attacks." + + safestring: + regex: safestring\.mark_safe.*\(.*\) + desc: | + "Without escaping HTML input an application becomes + vulnerable to Cross Site Scripting (XSS) attacks." + + shelltrue: + regex: shell.*=.*True + desc: | + "Shell=True can lead to dangerous shell escapes, + expecially when the input can be crafted by untrusted external input" + + tmp: + regex: \/tmp\/ + desc: | + "Use of tmp directories can be dangerous. Its world writable and + accessable, and can be easily guessed by attackers" + + yamlload: + regex: \yaml\.load + desc: | + "Avoid dangerous file parsing and object serialization libraries, + use instead `yaml.safe_load`" + + telnet: + regex: telnet + desc: "Avoid coms applications that transmit credentials in clear text" + + ftp: + regex: \bftp\b + desc: "Avoid coms applications that transmit credentials in clear text" + + finger: + regex: \bfinger\b + desc: "Avoid coms applications that transmit credentials in clear text" licence: licence_ext: diff --git a/requirements.txt b/requirements.txt index 201b07f..b904f1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ appdirs==1.4.3 -binaryornot==0.4.3 +binaryornot==0.4.4 chardet==3.0.2 docopt==0.6.2 invoke==0.18.0 diff --git a/utils/get-patch.sh b/utils/get-patch.sh new file mode 100755 index 0000000..76c5a7e --- /dev/null +++ b/utils/get-patch.sh @@ -0,0 +1,85 @@ +#!/bin/bash +############################################################################## +# Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +GERRITUSER="lukehinds" +REPO_DIR="/home/luke/repos/opnfv" +FORMATED_DIR=$(echo $REPO_DIR |sed 's./.\\/.g') + +help (){ +echo "" +echo -e "A script to generate a patchset file to allow local anteater tests" +echo -e "in the same manner as OPNFV security audit gate checks.\n" +echo -e "Please insure GERRITUSER and REPO_DIR are set within the script\n" +echo -e "You will also need to pass the following arguments.\n" +echo -e "--project <project>\n" +echo -e "for example:" +echo -e "--project releng\n" +echo -e " * note that the project name has to be the same as the git" +echo -e " repository name for the project\n" +echo -e "--patch <patch_number>\n" +echo -e "for example:" +echo -e "--patch 39741\n" +echo -e " * note that the patchset can be retrieved from the URL," +echo -e " e.g https://gerrit.opnfv.org/gerrit/#/c/39741/\n" +echo -e "The above would create the patch set as /tmp/patchset_39741\n" +echo -e "You will then be able to run anteater as follows:" +echo -e "$ anteater --project sandbox --patchset /tmp/patchset_39741" +exit +} + +# GetOpts + +usage() { + echo "Usage: $0 [--project <project>] [--patch <patch_number>] [--help>]" 1>&2; exit 1; +} + +for arg in "$@"; do + shift + case "$arg" in + "--project") set -- "$@" "-p" ;; + "--patch") set -- "$@" "-n" ;; + "--help") set -- "$@" "-h" ;; + *) set -- "$@" "$arg" + esac +done + + +while getopts ":p:n:h" arg; do + case "${arg}" in + p) + p=${OPTARG} + ;; + n) + n=${OPTARG} + ;; + h) + help + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + + +if [ -z "${p}" ] || [ -z "${n}" ]; then + usage +fi + +ssh -p 29418 ${GERRITUSER}@gerrit.opnfv.org gerrit query \ + --current-patch-set ${n} \ + --files|grep file:|sed 's/file:\s\/COMMIT_MSG//;s/file://'| \ + sed '/^\s*$/d'| \ + sed -e "s/^/${FORMATED_DIR}\/${p}\//"| tr -d " \t\r" \ + > /tmp/patchset_${n} + +echo -e "Patchset created as /tmp/patchset_${n}" +echo -e "You can now run: $ anteater --project ${p} --patchset /tmp/patchset_${n}"
\ No newline at end of file |