ai-conversation-impact/www/analytics.sh

58 lines
1.7 KiB
Bash
Raw Normal View History

#!/bin/bash
# Simple analytics for llm-impact.org
# Parses nginx access logs. Run as root or a user with log read access.
#
# Usage: ./analytics.sh [days]
# days: how many days back to analyze (default: 7)
set -euo pipefail
DAYS="${1:-7}"
LOG="/var/log/nginx/access.log"
CUTOFF=$(date -d "$DAYS days ago" +%d/%b/%Y)
if [ ! -r "$LOG" ]; then
echo "Error: Cannot read $LOG (run as root or add user to adm group)"
exit 1
fi
echo "=== llm-impact.org analytics (last $DAYS days) ==="
echo
# Filter to recent entries, exclude assets and known scanners
recent=$(awk -v cutoff="$CUTOFF" '
$4 ~ cutoff || $4 > "["cutoff { print }
' "$LOG" \
| grep -v -E '\.(css|js|ico|png|jpg|svg|woff|ttf|map)' \
| grep -v -iE '(bot|crawler|spider|leakix|zgrab|masscan|nmap)' \
| grep -v -E '\.(env|php|git|xml|yml|yaml|bak|sql)')
if [ -z "$recent" ]; then
echo "No matching requests in the last $DAYS days."
exit 0
fi
# Unique IPs (proxy for unique visitors)
unique_ips=$(echo "$recent" | awk '{print $1}' | sort -u | wc -l)
echo "Unique IPs: $unique_ips"
# Total requests (excluding assets)
total=$(echo "$recent" | wc -l)
echo "Total page requests: $total"
echo
echo "=== Top pages ==="
echo "$recent" | awk '{print $7}' | sort | uniq -c | sort -rn | head -10
echo
echo "=== Referrers (external) ==="
# In combined log format: IP - - [date] "request" status size "referer" "ua"
echo "$recent" | awk -F'"' '{print $4}' | grep -v -E '(^-$|^$|llm-impact\.org)' | sort | uniq -c | sort -rn | head -10
echo
echo "=== Landing page vs repo ==="
landing=$(echo "$recent" | awk '$7 == "/" || $7 == "/index.html"' | wc -l)
forge=$(echo "$recent" | grep '/forge/' | wc -l)
echo "Landing page: $landing"
echo "Forge (repo): $forge"