ai-conversation-impact/scan-secrets.sh
claude 0543a43816 Initial commit: AI conversation impact methodology and toolkit
CC0-licensed methodology for estimating the environmental and social
costs of AI conversations (20+ categories), plus a reusable toolkit
for automated impact tracking in Claude Code sessions.
2026-03-16 09:46:49 +00:00

101 lines
3.4 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# scan-secrets.sh — Scan files for accidentally exposed secrets.
#
# Searches a directory tree for patterns that look like API keys, passwords,
# private keys, and tokens left in source code or config files. No dependencies
# beyond bash and grep.
#
# Usage: ./scan-secrets.sh [directory] (defaults to current directory)
set -euo pipefail
TARGET="${1:-.}"
FOUND=0
# Colors (disabled if not a terminal)
if [ -t 1 ]; then
RED='\033[0;31m'
YELLOW='\033[0;33m'
BOLD='\033[1m'
RESET='\033[0m'
else
RED='' YELLOW='' BOLD='' RESET=''
fi
warn() {
local file="$1" line="$2" label="$3" match="$4"
printf "${RED}[secret]${RESET} ${BOLD}%s${RESET} (line %s): %s\n" \
"$file" "$line" "$label"
printf " ${YELLOW}%s${RESET}\n" "$match"
FOUND=$((FOUND + 1))
}
# Patterns: each entry is "label:::extended-regex"
PATTERNS=(
"AWS Access Key:::AKIA[0-9A-Z]{16}"
"AWS Secret Key:::(?i)aws_secret_access_key\s*[=:]\s*\S+"
"Generic API key assignment:::(?i)(api[_-]?key|apikey)\s*[=:]\s*['\"]?\S{8,}"
"Generic secret assignment:::(?i)(secret|password|passwd|pwd)\s*[=:]\s*['\"]?\S{8,}"
"Private key file header:::-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"
"GitHub token:::gh[pousr]_[A-Za-z0-9_]{36,}"
"Generic bearer token:::(?i)bearer\s+[a-z0-9_\-\.]{20,}"
"Slack token:::xox[bpras]-[0-9a-zA-Z\-]{10,}"
"Stripe key:::[sr]k_(live|test)_[0-9a-zA-Z]{24,}"
"Google API key:::AIza[0-9A-Za-z\-_]{35}"
"Heroku API key:::(?i)heroku.*[=:]\s*[0-9a-f]{8}-[0-9a-f]{4}-"
"Base64-encoded high-entropy blob:::(?i)(key|token|secret|password)\s*[=:]\s*['\"]?[A-Za-z0-9+/]{40,}={0,2}['\"]?"
)
# File extensions / directories to skip (binaries, vendored code, .git)
PRUNE_DIRS=".git node_modules vendor __pycache__ .venv venv dist build"
SKIP_EXT="png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot|mp3|mp4|zip|tar|gz|bz2|xz|pdf|bin|exe|dll|so|dylib|class|pyc|o|a"
# Build the list of files to scan (text files only, skip large files > 1 MB)
TMPFILE=$(mktemp)
trap 'rm -f "$TMPFILE"' EXIT
find "$TARGET" \
\( -name .git -o -name node_modules -o -name vendor -o -name __pycache__ \
-o -name .venv -o -name venv -o -name dist -o -name build \) -prune \
-o -type f -size -1048576c -print > "$TMPFILE" 2>/dev/null
TOTAL_FILES=$(wc -l < "$TMPFILE")
SCANNED=0
while IFS= read -r filepath; do
# Skip binary-looking extensions
ext="${filepath##*.}"
if echo "$ext" | grep -qiE "^($SKIP_EXT)$"; then
continue
fi
# Skip files that look binary (contain null bytes in first 512 bytes)
if head -c 512 "$filepath" 2>/dev/null | grep -qP '\x00'; then
continue
fi
SCANNED=$((SCANNED + 1))
for entry in "${PATTERNS[@]}"; do
label="${entry%%:::*}"
pattern="${entry##*:::}"
# Use grep -P for Perl-compatible regex, fall back to -E
while IFS=: read -r lineno match; do
[ -z "$lineno" ] && continue
warn "$filepath" "$lineno" "$label" "$match"
done < <(grep -nP "$pattern" "$filepath" 2>/dev/null || true)
done
done < "$TMPFILE"
echo ""
echo -e "${BOLD}Scan complete.${RESET} Scanned $SCANNED text files under ${TARGET}."
if [ "$FOUND" -gt 0 ]; then
echo -e "${RED}Found $FOUND potential secret(s).${RESET} Review each match — some may be false positives."
echo "If a secret is real, rotate it immediately, then remove it from the file."
exit 1
else
echo -e "No secrets detected. ${YELLOW}(This does not guarantee none exist — stay vigilant.)${RESET}"
exit 0
fi