2026-03-16 09:46:49 +00:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
#
|
|
|
|
|
# pre-compact-snapshot.sh — Snapshot impact metrics before context compaction.
|
|
|
|
|
#
|
|
|
|
|
# Runs as a PreCompact hook. Reads the conversation transcript, extracts
|
|
|
|
|
# actual token counts when available (falls back to heuristic estimates),
|
|
|
|
|
# and appends a timestamped entry to the impact log.
|
|
|
|
|
#
|
|
|
|
|
# Input: JSON on stdin with fields: trigger, session_id, transcript_path, cwd
|
|
|
|
|
# Output: nothing on stdout (hook succeeds silently). Logs to impact-log.jsonl.
|
|
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
HOOK_INPUT=$(cat)
|
|
|
|
|
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(echo "$HOOK_INPUT" | jq -r '.cwd')}"
|
|
|
|
|
TRANSCRIPT_PATH=$(echo "$HOOK_INPUT" | jq -r '.transcript_path')
|
|
|
|
|
SESSION_ID=$(echo "$HOOK_INPUT" | jq -r '.session_id')
|
|
|
|
|
TRIGGER=$(echo "$HOOK_INPUT" | jq -r '.trigger')
|
|
|
|
|
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
|
|
|
|
|
|
LOG_DIR="$PROJECT_DIR/.claude/impact"
|
|
|
|
|
LOG_FILE="$LOG_DIR/impact-log.jsonl"
|
|
|
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
|
|
|
|
|
|
# --- Extract or estimate metrics from transcript ---
|
|
|
|
|
|
|
|
|
|
if [ -f "$TRANSCRIPT_PATH" ]; then
|
|
|
|
|
TRANSCRIPT_BYTES=$(wc -c < "$TRANSCRIPT_PATH")
|
|
|
|
|
TRANSCRIPT_LINES=$(wc -l < "$TRANSCRIPT_PATH")
|
|
|
|
|
|
|
|
|
|
# Count tool uses
|
|
|
|
|
TOOL_USES=$(grep -c '"tool_use"' "$TRANSCRIPT_PATH" 2>/dev/null || echo 0)
|
|
|
|
|
|
|
|
|
|
# Try to extract actual token counts from usage fields in the transcript.
|
|
|
|
|
# The transcript contains .message.usage with input_tokens,
|
|
|
|
|
# cache_creation_input_tokens, cache_read_input_tokens, output_tokens.
|
|
|
|
|
USAGE_DATA=$(python3 -c "
|
2026-03-16 15:05:53 +00:00
|
|
|
import json, sys, re
|
|
|
|
|
|
2026-03-16 09:46:49 +00:00
|
|
|
input_tokens = 0
|
|
|
|
|
cache_creation = 0
|
|
|
|
|
cache_read = 0
|
|
|
|
|
output_tokens = 0
|
|
|
|
|
turns = 0
|
2026-03-16 15:05:53 +00:00
|
|
|
model_id = ''
|
|
|
|
|
user_bytes = 0
|
|
|
|
|
edited_files = {} # file_path -> edit count
|
|
|
|
|
test_passes = 0
|
|
|
|
|
test_failures = 0
|
|
|
|
|
has_public_push = 0
|
|
|
|
|
|
2026-03-16 09:46:49 +00:00
|
|
|
with open(sys.argv[1]) as f:
|
|
|
|
|
for line in f:
|
|
|
|
|
try:
|
|
|
|
|
d = json.loads(line.strip())
|
2026-03-16 15:05:53 +00:00
|
|
|
msg = d.get('message', {})
|
|
|
|
|
role = msg.get('role')
|
|
|
|
|
content = msg.get('content', '')
|
|
|
|
|
|
|
|
|
|
# Track user message size (proxy for user contribution)
|
|
|
|
|
if role == 'user':
|
|
|
|
|
if isinstance(content, str):
|
|
|
|
|
user_bytes += len(content.encode('utf-8', errors='replace'))
|
|
|
|
|
elif isinstance(content, list):
|
|
|
|
|
for block in content:
|
|
|
|
|
if isinstance(block, dict) and block.get('type') == 'text':
|
|
|
|
|
user_bytes += len(block.get('text', '').encode('utf-8', errors='replace'))
|
|
|
|
|
|
|
|
|
|
# Extract usage data and model from assistant messages
|
|
|
|
|
if role == 'assistant':
|
|
|
|
|
m = msg.get('model', '')
|
|
|
|
|
if m:
|
|
|
|
|
model_id = m
|
|
|
|
|
|
|
|
|
|
u = msg.get('usage')
|
|
|
|
|
if u and 'input_tokens' in u:
|
|
|
|
|
turns += 1
|
|
|
|
|
input_tokens += u.get('input_tokens', 0)
|
|
|
|
|
cache_creation += u.get('cache_creation_input_tokens', 0)
|
|
|
|
|
cache_read += u.get('cache_read_input_tokens', 0)
|
|
|
|
|
output_tokens += u.get('output_tokens', 0)
|
|
|
|
|
|
|
|
|
|
# Parse tool use blocks
|
|
|
|
|
if isinstance(content, list):
|
|
|
|
|
for block in content:
|
|
|
|
|
if not isinstance(block, dict) or block.get('type') != 'tool_use':
|
|
|
|
|
continue
|
|
|
|
|
name = block.get('name', '')
|
|
|
|
|
inp = block.get('input', {})
|
|
|
|
|
|
|
|
|
|
# File churn: count Edit/Write per file
|
|
|
|
|
if name in ('Edit', 'Write'):
|
|
|
|
|
fp = inp.get('file_path', '')
|
|
|
|
|
if fp:
|
|
|
|
|
edited_files[fp] = edited_files.get(fp, 0) + 1
|
|
|
|
|
|
|
|
|
|
# Public push detection
|
|
|
|
|
if name == 'Bash':
|
|
|
|
|
cmd = inp.get('command', '')
|
|
|
|
|
if re.search(r'git\s+push', cmd):
|
|
|
|
|
has_public_push = 1
|
|
|
|
|
|
|
|
|
|
# Test results from tool_result blocks (user role, tool_result type)
|
|
|
|
|
if role == 'user' and isinstance(content, list):
|
|
|
|
|
for block in content:
|
|
|
|
|
if isinstance(block, dict) and block.get('type') == 'tool_result':
|
|
|
|
|
text = ''
|
|
|
|
|
rc = block.get('content', '')
|
|
|
|
|
if isinstance(rc, str):
|
|
|
|
|
text = rc
|
|
|
|
|
elif isinstance(rc, list):
|
|
|
|
|
text = ' '.join(b.get('text', '') for b in rc if isinstance(b, dict))
|
|
|
|
|
# Detect test outcomes from common test runner output
|
|
|
|
|
if re.search(r'(\d+)\s+(tests?\s+)?passed', text, re.I):
|
|
|
|
|
test_passes += 1
|
|
|
|
|
if re.search(r'(\d+)\s+(tests?\s+)?failed|FAIL[ED]?|ERROR', text, re.I):
|
|
|
|
|
test_failures += 1
|
|
|
|
|
|
2026-03-16 09:46:49 +00:00
|
|
|
except Exception:
|
|
|
|
|
pass
|
2026-03-16 15:05:53 +00:00
|
|
|
|
|
|
|
|
user_tokens_est = user_bytes // 4 # rough byte-to-token estimate
|
|
|
|
|
unique_files = len(edited_files)
|
|
|
|
|
total_edits = sum(edited_files.values())
|
|
|
|
|
churn = round(total_edits / unique_files, 2) if unique_files > 0 else 0
|
|
|
|
|
|
|
|
|
|
# automation_ratio: 0 = all human, 1 = all AI (as permille for integer arithmetic)
|
|
|
|
|
if output_tokens + user_tokens_est > 0:
|
|
|
|
|
auto_ratio_pm = output_tokens * 1000 // (output_tokens + user_tokens_est)
|
|
|
|
|
else:
|
|
|
|
|
auto_ratio_pm = 0
|
|
|
|
|
|
|
|
|
|
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
|
2026-03-16 15:11:30 +00:00
|
|
|
# Second line: JSON array of edited files with counts
|
|
|
|
|
print(json.dumps(edited_files))
|
2026-03-16 09:46:49 +00:00
|
|
|
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
|
|
|
|
|
2026-03-16 15:11:30 +00:00
|
|
|
USAGE_LINE1=$(echo "$USAGE_DATA" | head -1)
|
|
|
|
|
EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1)
|
|
|
|
|
|
|
|
|
|
if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
2026-03-16 09:46:49 +00:00
|
|
|
# Actual token counts available
|
|
|
|
|
TOKEN_SOURCE="actual"
|
2026-03-16 15:11:30 +00:00
|
|
|
ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1)
|
|
|
|
|
INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2)
|
|
|
|
|
CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3)
|
|
|
|
|
CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4)
|
|
|
|
|
OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5)
|
|
|
|
|
MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6)
|
|
|
|
|
AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7)
|
|
|
|
|
USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8)
|
|
|
|
|
UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9)
|
|
|
|
|
TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10)
|
|
|
|
|
TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11)
|
|
|
|
|
TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12)
|
|
|
|
|
HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13)
|
2026-03-16 09:46:49 +00:00
|
|
|
|
|
|
|
|
# Cumulative input = all tokens that went through the model.
|
|
|
|
|
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
|
|
|
|
# Full-cost tokens: input_tokens + cache_creation_input_tokens
|
|
|
|
|
# Reduced-cost tokens: cache_read_input_tokens (weight at 0.1x for energy)
|
|
|
|
|
FULL_COST_INPUT=$(( INPUT_TOKENS + CACHE_CREATION ))
|
|
|
|
|
CACHE_READ_EFFECTIVE=$(( CACHE_READ / 10 ))
|
|
|
|
|
CUMULATIVE_INPUT=$(( FULL_COST_INPUT + CACHE_READ_EFFECTIVE ))
|
|
|
|
|
# Also track raw total for the log
|
|
|
|
|
CUMULATIVE_INPUT_RAW=$(( INPUT_TOKENS + CACHE_CREATION + CACHE_READ ))
|
|
|
|
|
else
|
|
|
|
|
# Fallback: heuristic estimation
|
|
|
|
|
TOKEN_SOURCE="heuristic"
|
|
|
|
|
ESTIMATED_TOKENS=$((TRANSCRIPT_BYTES / 4))
|
|
|
|
|
ASSISTANT_TURNS=$(grep -c '"role":\s*"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null || echo 0)
|
|
|
|
|
|
|
|
|
|
if [ "$ASSISTANT_TURNS" -gt 0 ]; then
|
|
|
|
|
AVG_CONTEXT=$((ESTIMATED_TOKENS / 2))
|
|
|
|
|
CUMULATIVE_INPUT=$((AVG_CONTEXT * ASSISTANT_TURNS))
|
|
|
|
|
else
|
|
|
|
|
CUMULATIVE_INPUT=$ESTIMATED_TOKENS
|
|
|
|
|
fi
|
|
|
|
|
CUMULATIVE_INPUT_RAW=$CUMULATIVE_INPUT
|
|
|
|
|
OUTPUT_TOKENS=$((ESTIMATED_TOKENS / 20))
|
|
|
|
|
CACHE_CREATION=0
|
|
|
|
|
CACHE_READ=0
|
|
|
|
|
INPUT_TOKENS=0
|
2026-03-16 15:05:53 +00:00
|
|
|
MODEL_ID=""
|
|
|
|
|
AUTO_RATIO_PM=0
|
|
|
|
|
USER_TOKENS_EST=0
|
|
|
|
|
UNIQUE_FILES=0
|
|
|
|
|
TOTAL_EDITS=0
|
|
|
|
|
TEST_PASSES=0
|
|
|
|
|
TEST_FAILURES=0
|
|
|
|
|
HAS_PUBLIC_PUSH=0
|
2026-03-16 15:11:30 +00:00
|
|
|
EDITED_FILES_JSON="{}"
|
2026-03-16 09:46:49 +00:00
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# --- Cost estimates ---
|
2026-03-16 10:49:58 +00:00
|
|
|
# Energy: 0.1 Wh per 1K input tokens, 0.5 Wh per 1K output tokens, PUE 1.2
|
|
|
|
|
# Calibrated against Google (Patterson et al., Aug 2025) and Jegham et al. (May 2025)
|
2026-03-16 09:46:49 +00:00
|
|
|
# Using integer arithmetic in centiwatt-hours to avoid bc dependency
|
2026-03-16 10:49:58 +00:00
|
|
|
INPUT_CWH=$(( CUMULATIVE_INPUT * 100 / 10000 )) # 0.1 Wh/1K = 100 cWh/10K
|
|
|
|
|
OUTPUT_CWH=$(( OUTPUT_TOKENS * 500 / 10000 )) # 0.5 Wh/1K = 500 cWh/10K
|
2026-03-16 09:46:49 +00:00
|
|
|
ENERGY_CWH=$(( (INPUT_CWH + OUTPUT_CWH) * 12 / 10 )) # PUE 1.2
|
|
|
|
|
ENERGY_WH=$(( ENERGY_CWH / 100 ))
|
|
|
|
|
|
|
|
|
|
# CO2: 325g/kWh -> 0.325g/Wh -> 325 mg/Wh
|
|
|
|
|
CO2_MG=$(( ENERGY_WH * 325 ))
|
|
|
|
|
CO2_G=$(( CO2_MG / 1000 ))
|
|
|
|
|
|
|
|
|
|
# Financial: $15/M input, $75/M output (in cents)
|
|
|
|
|
# Use effective cumulative input (cache-weighted) for cost too
|
|
|
|
|
COST_INPUT_CENTS=$(( CUMULATIVE_INPUT * 15 / 10000 )) # $15/M = 1.5c/100K
|
|
|
|
|
COST_OUTPUT_CENTS=$(( OUTPUT_TOKENS * 75 / 10000 ))
|
|
|
|
|
COST_CENTS=$(( COST_INPUT_CENTS + COST_OUTPUT_CENTS ))
|
|
|
|
|
else
|
|
|
|
|
TRANSCRIPT_BYTES=0
|
|
|
|
|
TRANSCRIPT_LINES=0
|
|
|
|
|
ASSISTANT_TURNS=0
|
|
|
|
|
TOOL_USES=0
|
|
|
|
|
CUMULATIVE_INPUT=0
|
|
|
|
|
CUMULATIVE_INPUT_RAW=0
|
|
|
|
|
OUTPUT_TOKENS=0
|
|
|
|
|
CACHE_CREATION=0
|
|
|
|
|
CACHE_READ=0
|
|
|
|
|
ENERGY_WH=0
|
|
|
|
|
CO2_G=0
|
|
|
|
|
COST_CENTS=0
|
|
|
|
|
TOKEN_SOURCE="none"
|
2026-03-16 15:05:53 +00:00
|
|
|
MODEL_ID=""
|
|
|
|
|
AUTO_RATIO_PM=0
|
|
|
|
|
USER_TOKENS_EST=0
|
|
|
|
|
UNIQUE_FILES=0
|
|
|
|
|
TOTAL_EDITS=0
|
|
|
|
|
TEST_PASSES=0
|
|
|
|
|
TEST_FAILURES=0
|
|
|
|
|
HAS_PUBLIC_PUSH=0
|
2026-03-16 15:11:30 +00:00
|
|
|
EDITED_FILES_JSON="{}"
|
2026-03-16 09:46:49 +00:00
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# --- Write log entry ---
|
|
|
|
|
|
2026-03-16 15:11:30 +00:00
|
|
|
# Build log entry using Python to safely embed the edited_files JSON
|
|
|
|
|
python3 -c "
|
|
|
|
|
import json, sys
|
|
|
|
|
entry = {
|
|
|
|
|
'timestamp': sys.argv[1],
|
|
|
|
|
'session_id': sys.argv[2],
|
|
|
|
|
'trigger': sys.argv[3],
|
|
|
|
|
'token_source': sys.argv[4],
|
|
|
|
|
'transcript_bytes': int(sys.argv[5]),
|
|
|
|
|
'transcript_lines': int(sys.argv[6]),
|
|
|
|
|
'assistant_turns': int(sys.argv[7]),
|
|
|
|
|
'tool_uses': int(sys.argv[8]),
|
|
|
|
|
'cumulative_input_tokens': int(sys.argv[9]),
|
|
|
|
|
'cumulative_input_raw': int(sys.argv[10]),
|
|
|
|
|
'cache_creation_tokens': int(sys.argv[11]),
|
|
|
|
|
'cache_read_tokens': int(sys.argv[12]),
|
|
|
|
|
'output_tokens': int(sys.argv[13]),
|
|
|
|
|
'energy_wh': int(sys.argv[14]),
|
|
|
|
|
'co2_g': int(sys.argv[15]),
|
|
|
|
|
'cost_cents': int(sys.argv[16]),
|
|
|
|
|
'model_id': sys.argv[17],
|
|
|
|
|
'automation_ratio_pm': int(sys.argv[18]),
|
|
|
|
|
'user_tokens_est': int(sys.argv[19]),
|
|
|
|
|
'unique_files_edited': int(sys.argv[20]),
|
|
|
|
|
'total_file_edits': int(sys.argv[21]),
|
|
|
|
|
'test_passes': int(sys.argv[22]),
|
|
|
|
|
'test_failures': int(sys.argv[23]),
|
|
|
|
|
'has_public_push': int(sys.argv[24]),
|
|
|
|
|
'edited_files': json.loads(sys.argv[25]),
|
|
|
|
|
}
|
|
|
|
|
print(json.dumps(entry, separators=(',', ':')))
|
|
|
|
|
" "$TIMESTAMP" "$SESSION_ID" "$TRIGGER" "$TOKEN_SOURCE" \
|
|
|
|
|
"$TRANSCRIPT_BYTES" "$TRANSCRIPT_LINES" "$ASSISTANT_TURNS" "$TOOL_USES" \
|
|
|
|
|
"$CUMULATIVE_INPUT" "$CUMULATIVE_INPUT_RAW" "$CACHE_CREATION" "$CACHE_READ" \
|
|
|
|
|
"$OUTPUT_TOKENS" "$ENERGY_WH" "$CO2_G" "$COST_CENTS" \
|
|
|
|
|
"$MODEL_ID" "$AUTO_RATIO_PM" "$USER_TOKENS_EST" \
|
|
|
|
|
"$UNIQUE_FILES" "$TOTAL_EDITS" "$TEST_PASSES" "$TEST_FAILURES" \
|
|
|
|
|
"$HAS_PUBLIC_PUSH" "$EDITED_FILES_JSON" >> "$LOG_FILE"
|
2026-03-16 09:46:49 +00:00
|
|
|
|
|
|
|
|
exit 0
|