#!/usr/bin/env bash # # scan-secrets.sh — Scan files for accidentally exposed secrets. # # Searches a directory tree for patterns that look like API keys, passwords, # private keys, and tokens left in source code or config files. No dependencies # beyond bash and grep. # # Usage: ./scan-secrets.sh [directory] (defaults to current directory) set -euo pipefail TARGET="${1:-.}" FOUND=0 # Colors (disabled if not a terminal) if [ -t 1 ]; then RED='\033[0;31m' YELLOW='\033[0;33m' BOLD='\033[1m' RESET='\033[0m' else RED='' YELLOW='' BOLD='' RESET='' fi warn() { local file="$1" line="$2" label="$3" match="$4" printf "${RED}[secret]${RESET} ${BOLD}%s${RESET} (line %s): %s\n" \ "$file" "$line" "$label" printf " ${YELLOW}%s${RESET}\n" "$match" FOUND=$((FOUND + 1)) } # Patterns: each entry is "label:::extended-regex" PATTERNS=( "AWS Access Key:::AKIA[0-9A-Z]{16}" "AWS Secret Key:::(?i)aws_secret_access_key\s*[=:]\s*\S+" "Generic API key assignment:::(?i)(api[_-]?key|apikey)\s*[=:]\s*['\"]?\S{8,}" "Generic secret assignment:::(?i)(secret|password|passwd|pwd)\s*[=:]\s*['\"]?\S{8,}" "Private key file header:::-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----" "GitHub token:::gh[pousr]_[A-Za-z0-9_]{36,}" "Generic bearer token:::(?i)bearer\s+[a-z0-9_\-\.]{20,}" "Slack token:::xox[bpras]-[0-9a-zA-Z\-]{10,}" "Stripe key:::[sr]k_(live|test)_[0-9a-zA-Z]{24,}" "Google API key:::AIza[0-9A-Za-z\-_]{35}" "Heroku API key:::(?i)heroku.*[=:]\s*[0-9a-f]{8}-[0-9a-f]{4}-" "Base64-encoded high-entropy blob:::(?i)(key|token|secret|password)\s*[=:]\s*['\"]?[A-Za-z0-9+/]{40,}={0,2}['\"]?" ) # File extensions / directories to skip (binaries, vendored code, .git) PRUNE_DIRS=".git node_modules vendor __pycache__ .venv venv dist build" SKIP_EXT="png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot|mp3|mp4|zip|tar|gz|bz2|xz|pdf|bin|exe|dll|so|dylib|class|pyc|o|a" # Build the list of files to scan (text files only, skip large files > 1 MB) TMPFILE=$(mktemp) trap 'rm -f "$TMPFILE"' EXIT find "$TARGET" \ \( -name .git -o -name node_modules -o -name vendor -o -name __pycache__ \ -o -name .venv -o -name venv -o -name dist -o -name build \) -prune \ -o -type f -size -1048576c -print > "$TMPFILE" 2>/dev/null TOTAL_FILES=$(wc -l < "$TMPFILE") SCANNED=0 while IFS= read -r filepath; do # Skip binary-looking extensions ext="${filepath##*.}" if echo "$ext" | grep -qiE "^($SKIP_EXT)$"; then continue fi # Skip files that look binary (contain null bytes in first 512 bytes) if head -c 512 "$filepath" 2>/dev/null | grep -qP '\x00'; then continue fi SCANNED=$((SCANNED + 1)) for entry in "${PATTERNS[@]}"; do label="${entry%%:::*}" pattern="${entry##*:::}" # Use grep -P for Perl-compatible regex, fall back to -E while IFS=: read -r lineno match; do [ -z "$lineno" ] && continue warn "$filepath" "$lineno" "$label" "$match" done < <(grep -nP "$pattern" "$filepath" 2>/dev/null || true) done done < "$TMPFILE" echo "" echo -e "${BOLD}Scan complete.${RESET} Scanned $SCANNED text files under ${TARGET}." if [ "$FOUND" -gt 0 ]; then echo -e "${RED}Found $FOUND potential secret(s).${RESET} Review each match — some may be false positives." echo "If a secret is real, rotate it immediately, then remove it from the file." exit 1 else echo -e "No secrets detected. ${YELLOW}(This does not guarantee none exist — stay vigilant.)${RESET}" exit 0 fi