#!/bin/bash # Simple analytics for llm-impact.org # Parses nginx access logs. Run as root or a user with log read access. # # Usage: ./analytics.sh [days] # days: how many days back to analyze (default: 7) set -euo pipefail DAYS="${1:-7}" LOG="/var/log/nginx/access.log" CUTOFF=$(date -d "$DAYS days ago" +%d/%b/%Y) if [ ! -r "$LOG" ]; then echo "Error: Cannot read $LOG (run as root or add user to adm group)" exit 1 fi echo "=== llm-impact.org analytics (last $DAYS days) ===" echo # Filter to recent entries, exclude assets and known scanners recent=$(awk -v cutoff="$CUTOFF" ' $4 ~ cutoff || $4 > "["cutoff { print } ' "$LOG" \ | grep -v -E '\.(css|js|ico|png|jpg|svg|woff|ttf|map)' \ | grep -v -iE '(bot|crawler|spider|leakix|zgrab|masscan|nmap)' \ | grep -v -E '\.(env|php|git|xml|yml|yaml|bak|sql)') if [ -z "$recent" ]; then echo "No matching requests in the last $DAYS days." exit 0 fi # Unique IPs (proxy for unique visitors) unique_ips=$(echo "$recent" | awk '{print $1}' | sort -u | wc -l) echo "Unique IPs: $unique_ips" # Total requests (excluding assets) total=$(echo "$recent" | wc -l) echo "Total page requests: $total" echo echo "=== Top pages ===" echo "$recent" | awk '{print $7}' | sort | uniq -c | sort -rn | head -10 echo echo "=== Referrers (external) ===" # In combined log format: IP - - [date] "request" status size "referer" "ua" echo "$recent" | awk -F'"' '{print $4}' | grep -v -E '(^-$|^$|llm-impact\.org)' | sort | uniq -c | sort -rn | head -10 echo echo "=== Landing page vs repo ===" landing=$(echo "$recent" | awk '$7 == "/" || $7 == "/index.html"' | wc -l) forge=$(echo "$recent" | grep '/forge/' | wc -l) echo "Landing page: $landing" echo "Forge (repo): $forge"