ai-conversation-impact/.claude/hooks/pre-compact-snapshot.sh

#!/usr/bin/env bash
#
# pre-compact-snapshot.sh — Snapshot impact metrics before context compaction.
#
# Runs as a PreCompact hook. Reads the conversation transcript, extracts
# actual token counts when available (falls back to heuristic estimates),
# and appends a timestamped entry to the impact log.
#
# Input: JSON on stdin with fields: trigger, session_id, transcript_path, cwd
# Output: nothing on stdout (hook succeeds silently). Logs to impact-log.jsonl.

set -euo pipefail

HOOK_INPUT=$(cat)
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(echo "$HOOK_INPUT" | jq -r '.cwd')}"
TRANSCRIPT_PATH=$(echo "$HOOK_INPUT" | jq -r '.transcript_path')
SESSION_ID=$(echo "$HOOK_INPUT" | jq -r '.session_id')
TRIGGER=$(echo "$HOOK_INPUT" | jq -r '.trigger')
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")

LOG_DIR="$PROJECT_DIR/.claude/impact"
LOG_FILE="$LOG_DIR/impact-log.jsonl"
mkdir -p "$LOG_DIR"

# --- Extract or estimate metrics from transcript ---

if [ -f "$TRANSCRIPT_PATH" ]; then
  TRANSCRIPT_BYTES=$(wc -c < "$TRANSCRIPT_PATH")
  TRANSCRIPT_LINES=$(wc -l < "$TRANSCRIPT_PATH")

  # Count tool uses
  TOOL_USES=$(grep -c '"tool_use"' "$TRANSCRIPT_PATH" 2>/dev/null || echo 0)

  # Try to extract actual token counts from usage fields in the transcript.
  # The transcript contains .message.usage with input_tokens,
  # cache_creation_input_tokens, cache_read_input_tokens, output_tokens.
  USAGE_DATA=$(python3 -c "
import json, sys, re

input_tokens = 0
cache_creation = 0
cache_read = 0
output_tokens = 0
turns = 0
model_id = ''
user_bytes = 0
edited_files = {}   # file_path -> edit count
test_passes = 0
test_failures = 0
has_public_push = 0

with open(sys.argv[1]) as f:
    for line in f:
        try:
            d = json.loads(line.strip())
            msg = d.get('message', {})
            role = msg.get('role')
            content = msg.get('content', '')

            # Track user message size (proxy for user contribution)
            if role == 'user':
                if isinstance(content, str):
                    user_bytes += len(content.encode('utf-8', errors='replace'))
                elif isinstance(content, list):
                    for block in content:
                        if isinstance(block, dict) and block.get('type') == 'text':
                            user_bytes += len(block.get('text', '').encode('utf-8', errors='replace'))

            # Extract usage data and model from assistant messages
            if role == 'assistant':
                m = msg.get('model', '')
                if m:
                    model_id = m

                u = msg.get('usage')
                if u and 'input_tokens' in u:
                    turns += 1
                    input_tokens += u.get('input_tokens', 0)
                    cache_creation += u.get('cache_creation_input_tokens', 0)
                    cache_read += u.get('cache_read_input_tokens', 0)
                    output_tokens += u.get('output_tokens', 0)

                # Parse tool use blocks
                if isinstance(content, list):
                    for block in content:
                        if not isinstance(block, dict) or block.get('type') != 'tool_use':
                            continue
                        name = block.get('name', '')
                        inp = block.get('input', {})

                        # File churn: count Edit/Write per file
                        if name in ('Edit', 'Write'):
                            fp = inp.get('file_path', '')
                            if fp:
                                edited_files[fp] = edited_files.get(fp, 0) + 1

                        # Public push detection
                        if name == 'Bash':
                            cmd = inp.get('command', '')
                            if re.search(r'git\s+push', cmd):
                                has_public_push = 1

            # Test results from tool_result blocks (user role, tool_result type)
            if role == 'user' and isinstance(content, list):
                for block in content:
                    if isinstance(block, dict) and block.get('type') == 'tool_result':
                        text = ''
                        rc = block.get('content', '')
                        if isinstance(rc, str):
                            text = rc
                        elif isinstance(rc, list):
                            text = ' '.join(b.get('text', '') for b in rc if isinstance(b, dict))
                        # Detect test outcomes from common test runner output
                        if re.search(r'(\d+)\s+(tests?\s+)?passed', text, re.I):
                            test_passes += 1
                        if re.search(r'(\d+)\s+(tests?\s+)?failed|FAIL[ED]?|ERROR', text, re.I):
                            test_failures += 1

        except Exception:
            pass

user_tokens_est = user_bytes // 4  # rough byte-to-token estimate
unique_files = len(edited_files)
total_edits = sum(edited_files.values())
churn = round(total_edits / unique_files, 2) if unique_files > 0 else 0

# automation_ratio: 0 = all human, 1 = all AI (as permille for integer arithmetic)
if output_tokens + user_tokens_est > 0:
    auto_ratio_pm = output_tokens * 1000 // (output_tokens + user_tokens_est)
else:
    auto_ratio_pm = 0

print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
# Second line: JSON array of edited files with counts
print(json.dumps(edited_files))
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")

  USAGE_LINE1=$(echo "$USAGE_DATA" | head -1)
  EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1)

  if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then
    # Actual token counts available
    TOKEN_SOURCE="actual"
    ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1)
    INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2)
    CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3)
    CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4)
    OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5)
    MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6)
    AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7)
    USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8)
    UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9)
    TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10)
    TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11)
    TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12)
    HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13)

    # Cumulative input = all tokens that went through the model.
    # Cache reads are cheaper (~10-20% of full compute), so we weight them.
    # Full-cost tokens: input_tokens + cache_creation_input_tokens
    # Reduced-cost tokens: cache_read_input_tokens (weight at 0.1x for energy)
    FULL_COST_INPUT=$(( INPUT_TOKENS + CACHE_CREATION ))
    CACHE_READ_EFFECTIVE=$(( CACHE_READ / 10 ))
    CUMULATIVE_INPUT=$(( FULL_COST_INPUT + CACHE_READ_EFFECTIVE ))
    # Also track raw total for the log
    CUMULATIVE_INPUT_RAW=$(( INPUT_TOKENS + CACHE_CREATION + CACHE_READ ))
  else
    # Fallback: heuristic estimation
    TOKEN_SOURCE="heuristic"
    ESTIMATED_TOKENS=$((TRANSCRIPT_BYTES / 4))
    ASSISTANT_TURNS=$(grep -c '"role":\s*"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null || echo 0)

    if [ "$ASSISTANT_TURNS" -gt 0 ]; then
      AVG_CONTEXT=$((ESTIMATED_TOKENS / 2))
      CUMULATIVE_INPUT=$((AVG_CONTEXT * ASSISTANT_TURNS))
    else
      CUMULATIVE_INPUT=$ESTIMATED_TOKENS
    fi
    CUMULATIVE_INPUT_RAW=$CUMULATIVE_INPUT
    OUTPUT_TOKENS=$((ESTIMATED_TOKENS / 20))
    CACHE_CREATION=0
    CACHE_READ=0
    INPUT_TOKENS=0
    MODEL_ID=""
    AUTO_RATIO_PM=0
    USER_TOKENS_EST=0
    UNIQUE_FILES=0
    TOTAL_EDITS=0
    TEST_PASSES=0
    TEST_FAILURES=0
    HAS_PUBLIC_PUSH=0
    EDITED_FILES_JSON="{}"
  fi

  # --- Cost estimates ---
  # Energy: 0.1 Wh per 1K input tokens, 0.5 Wh per 1K output tokens, PUE 1.2
  # Calibrated against Google (Patterson et al., Aug 2025) and Jegham et al. (May 2025)
  # Using integer arithmetic in centiwatt-hours to avoid bc dependency
  INPUT_CWH=$(( CUMULATIVE_INPUT * 100 / 10000 ))   # 0.1 Wh/1K = 100 cWh/10K
  OUTPUT_CWH=$(( OUTPUT_TOKENS * 500 / 10000 ))      # 0.5 Wh/1K = 500 cWh/10K
  ENERGY_CWH=$(( (INPUT_CWH + OUTPUT_CWH) * 12 / 10 ))  # PUE 1.2
  ENERGY_WH=$(( ENERGY_CWH / 100 ))

  # CO2: 325g/kWh -> 0.325g/Wh -> 325 mg/Wh
  CO2_MG=$(( ENERGY_WH * 325 ))
  CO2_G=$(( CO2_MG / 1000 ))

  # Financial: $15/M input, $75/M output (in cents)
  # Use effective cumulative input (cache-weighted) for cost too
  COST_INPUT_CENTS=$(( CUMULATIVE_INPUT * 15 / 10000 ))  # $15/M = 1.5c/100K
  COST_OUTPUT_CENTS=$(( OUTPUT_TOKENS * 75 / 10000 ))
  COST_CENTS=$(( COST_INPUT_CENTS + COST_OUTPUT_CENTS ))
else
  TRANSCRIPT_BYTES=0
  TRANSCRIPT_LINES=0
  ASSISTANT_TURNS=0
  TOOL_USES=0
  CUMULATIVE_INPUT=0
  CUMULATIVE_INPUT_RAW=0
  OUTPUT_TOKENS=0
  CACHE_CREATION=0
  CACHE_READ=0
  ENERGY_WH=0
  CO2_G=0
  COST_CENTS=0
  TOKEN_SOURCE="none"
  MODEL_ID=""
  AUTO_RATIO_PM=0
  USER_TOKENS_EST=0
  UNIQUE_FILES=0
  TOTAL_EDITS=0
  TEST_PASSES=0
  TEST_FAILURES=0
  HAS_PUBLIC_PUSH=0
  EDITED_FILES_JSON="{}"
fi

# --- Write log entry ---

# Build log entry using Python to safely embed the edited_files JSON
python3 -c "
import json, sys
entry = {
    'timestamp': sys.argv[1],
    'session_id': sys.argv[2],
    'trigger': sys.argv[3],
    'token_source': sys.argv[4],
    'transcript_bytes': int(sys.argv[5]),
    'transcript_lines': int(sys.argv[6]),
    'assistant_turns': int(sys.argv[7]),
    'tool_uses': int(sys.argv[8]),
    'cumulative_input_tokens': int(sys.argv[9]),
    'cumulative_input_raw': int(sys.argv[10]),
    'cache_creation_tokens': int(sys.argv[11]),
    'cache_read_tokens': int(sys.argv[12]),
    'output_tokens': int(sys.argv[13]),
    'energy_wh': int(sys.argv[14]),
    'co2_g': int(sys.argv[15]),
    'cost_cents': int(sys.argv[16]),
    'model_id': sys.argv[17],
    'automation_ratio_pm': int(sys.argv[18]),
    'user_tokens_est': int(sys.argv[19]),
    'unique_files_edited': int(sys.argv[20]),
    'total_file_edits': int(sys.argv[21]),
    'test_passes': int(sys.argv[22]),
    'test_failures': int(sys.argv[23]),
    'has_public_push': int(sys.argv[24]),
    'edited_files': json.loads(sys.argv[25]),
}
print(json.dumps(entry, separators=(',', ':')))
" "$TIMESTAMP" "$SESSION_ID" "$TRIGGER" "$TOKEN_SOURCE" \
  "$TRANSCRIPT_BYTES" "$TRANSCRIPT_LINES" "$ASSISTANT_TURNS" "$TOOL_USES" \
  "$CUMULATIVE_INPUT" "$CUMULATIVE_INPUT_RAW" "$CACHE_CREATION" "$CACHE_READ" \
  "$OUTPUT_TOKENS" "$ENERGY_WH" "$CO2_G" "$COST_CENTS" \
  "$MODEL_ID" "$AUTO_RATIO_PM" "$USER_TOKENS_EST" \
  "$UNIQUE_FILES" "$TOTAL_EDITS" "$TEST_PASSES" "$TEST_FAILURES" \
  "$HAS_PUBLIC_PUSH" "$EDITED_FILES_JSON" >> "$LOG_FILE"

exit 0
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`#!/usr/bin/env bash`
			`#`
			`# pre-compact-snapshot.sh — Snapshot impact metrics before context compaction.`
			`#`
			`# Runs as a PreCompact hook. Reads the conversation transcript, extracts`
			`# actual token counts when available (falls back to heuristic estimates),`
			`# and appends a timestamped entry to the impact log.`
			`#`
			`# Input: JSON on stdin with fields: trigger, session_id, transcript_path, cwd`
			`# Output: nothing on stdout (hook succeeds silently). Logs to impact-log.jsonl.`

			`set -euo pipefail`

			`HOOK_INPUT=$(cat)`
			`PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(echo "$HOOK_INPUT" \| jq -r '.cwd')}"`
			`TRANSCRIPT_PATH=$(echo "$HOOK_INPUT" \| jq -r '.transcript_path')`
			`SESSION_ID=$(echo "$HOOK_INPUT" \| jq -r '.session_id')`
			`TRIGGER=$(echo "$HOOK_INPUT" \| jq -r '.trigger')`
			`TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")`

			`LOG_DIR="$PROJECT_DIR/.claude/impact"`
			`LOG_FILE="$LOG_DIR/impact-log.jsonl"`
			`mkdir -p "$LOG_DIR"`

			`# --- Extract or estimate metrics from transcript ---`

			`if [ -f "$TRANSCRIPT_PATH" ]; then`
			`TRANSCRIPT_BYTES=$(wc -c < "$TRANSCRIPT_PATH")`
			`TRANSCRIPT_LINES=$(wc -l < "$TRANSCRIPT_PATH")`

			`# Count tool uses`
			`TOOL_USES=$(grep -c '"tool_use"' "$TRANSCRIPT_PATH" 2>/dev/null \|\| echo 0)`

			`# Try to extract actual token counts from usage fields in the transcript.`
			`# The transcript contains .message.usage with input_tokens,`
			`# cache_creation_input_tokens, cache_read_input_tokens, output_tokens.`
			`USAGE_DATA=$(python3 -c "`
Add social cost proxies to impact tracking hooks Extend pre-compact-snapshot.sh to extract 5 new per-conversation metrics from the transcript: automation ratio (deskilling proxy), model ID (monoculture tracking), test pass/fail counts (code quality proxy), file churn (edits per unique file), and public push detection (data pollution risk flag). Update show-impact.sh to display them. New plan: quantify-social-costs.md — roadmap for moving non-environmental cost categories from qualitative to proxy-measurable. Tasks 19-24 done. Task 25 (methodology update) pending. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:05:53 +00:00			`import json, sys, re`

Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`input_tokens = 0`
			`cache_creation = 0`
			`cache_read = 0`
			`output_tokens = 0`
			`turns = 0`
Add social cost proxies to impact tracking hooks Extend pre-compact-snapshot.sh to extract 5 new per-conversation metrics from the transcript: automation ratio (deskilling proxy), model ID (monoculture tracking), test pass/fail counts (code quality proxy), file churn (edits per unique file), and public push detection (data pollution risk flag). Update show-impact.sh to display them. New plan: quantify-social-costs.md — roadmap for moving non-environmental cost categories from qualitative to proxy-measurable. Tasks 19-24 done. Task 25 (methodology update) pending. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:05:53 +00:00			`model_id = ''`
			`user_bytes = 0`
			`edited_files = {} # file_path -> edit count`
			`test_passes = 0`
			`test_failures = 0`
			`has_public_push = 0`

Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`with open(sys.argv[1]) as f:`
			`for line in f:`
			`try:`
			`d = json.loads(line.strip())`
Add social cost proxies to impact tracking hooks Extend pre-compact-snapshot.sh to extract 5 new per-conversation metrics from the transcript: automation ratio (deskilling proxy), model ID (monoculture tracking), test pass/fail counts (code quality proxy), file churn (edits per unique file), and public push detection (data pollution risk flag). Update show-impact.sh to display them. New plan: quantify-social-costs.md — roadmap for moving non-environmental cost categories from qualitative to proxy-measurable. Tasks 19-24 done. Task 25 (methodology update) pending. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:05:53 +00:00			`msg = d.get('message', {})`
			`role = msg.get('role')`
			`content = msg.get('content', '')`

			`# Track user message size (proxy for user contribution)`
			`if role == 'user':`
			`if isinstance(content, str):`
			`user_bytes += len(content.encode('utf-8', errors='replace'))`
			`elif isinstance(content, list):`
			`for block in content:`
			`if isinstance(block, dict) and block.get('type') == 'text':`
			`user_bytes += len(block.get('text', '').encode('utf-8', errors='replace'))`

			`# Extract usage data and model from assistant messages`
			`if role == 'assistant':`
			`m = msg.get('model', '')`
			`if m:`
			`model_id = m`

			`u = msg.get('usage')`
			`if u and 'input_tokens' in u:`
			`turns += 1`
			`input_tokens += u.get('input_tokens', 0)`
			`cache_creation += u.get('cache_creation_input_tokens', 0)`
			`cache_read += u.get('cache_read_input_tokens', 0)`
			`output_tokens += u.get('output_tokens', 0)`

			`# Parse tool use blocks`
			`if isinstance(content, list):`
			`for block in content:`
			`if not isinstance(block, dict) or block.get('type') != 'tool_use':`
			`continue`
			`name = block.get('name', '')`
			`inp = block.get('input', {})`

			`# File churn: count Edit/Write per file`
			`if name in ('Edit', 'Write'):`
			`fp = inp.get('file_path', '')`
			`if fp:`
			`edited_files[fp] = edited_files.get(fp, 0) + 1`

			`# Public push detection`
			`if name == 'Bash':`
			`cmd = inp.get('command', '')`
			`if re.search(r'git\s+push', cmd):`
			`has_public_push = 1`

			`# Test results from tool_result blocks (user role, tool_result type)`
			`if role == 'user' and isinstance(content, list):`
			`for block in content:`
			`if isinstance(block, dict) and block.get('type') == 'tool_result':`
			`text = ''`
			`rc = block.get('content', '')`
			`if isinstance(rc, str):`
			`text = rc`
			`elif isinstance(rc, list):`
			`text = ' '.join(b.get('text', '') for b in rc if isinstance(b, dict))`
			`# Detect test outcomes from common test runner output`
			`if re.search(r'(\d+)\s+(tests?\s+)?passed', text, re.I):`
			`test_passes += 1`
			`if re.search(r'(\d+)\s+(tests?\s+)?failed\|FAIL[ED]?\|ERROR', text, re.I):`
			`test_failures += 1`

Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`except Exception:`
			`pass`
Add social cost proxies to impact tracking hooks Extend pre-compact-snapshot.sh to extract 5 new per-conversation metrics from the transcript: automation ratio (deskilling proxy), model ID (monoculture tracking), test pass/fail counts (code quality proxy), file churn (edits per unique file), and public push detection (data pollution risk flag). Update show-impact.sh to display them. New plan: quantify-social-costs.md — roadmap for moving non-environmental cost categories from qualitative to proxy-measurable. Tasks 19-24 done. Task 25 (methodology update) pending. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:05:53 +00:00
			`user_tokens_est = user_bytes // 4 # rough byte-to-token estimate`
			`unique_files = len(edited_files)`
			`total_edits = sum(edited_files.values())`
			`churn = round(total_edits / unique_files, 2) if unique_files > 0 else 0`

			`# automation_ratio: 0 = all human, 1 = all AI (as permille for integer arithmetic)`
			`if output_tokens + user_tokens_est > 0:`
			`auto_ratio_pm = output_tokens * 1000 // (output_tokens + user_tokens_est)`
			`else:`
			`auto_ratio_pm = 0`

			`print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')`
Log edited file list in impact hook for review delta analysis The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:11:30 +00:00			`# Second line: JSON array of edited files with counts`
			`print(json.dumps(edited_files))`
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`" "$TRANSCRIPT_PATH" 2>/dev/null \|\| echo "")`

Log edited file list in impact hook for review delta analysis The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:11:30 +00:00			`USAGE_LINE1=$(echo "$USAGE_DATA" \| head -1)`
			`EDITED_FILES_JSON=$(echo "$USAGE_DATA" \| tail -1)`

			`if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" \| cut -f1)" -gt 0 ] 2>/dev/null; then`
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`# Actual token counts available`
			`TOKEN_SOURCE="actual"`
Log edited file list in impact hook for review delta analysis The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:11:30 +00:00			`ASSISTANT_TURNS=$(echo "$USAGE_LINE1" \| cut -f1)`
			`INPUT_TOKENS=$(echo "$USAGE_LINE1" \| cut -f2)`
			`CACHE_CREATION=$(echo "$USAGE_LINE1" \| cut -f3)`
			`CACHE_READ=$(echo "$USAGE_LINE1" \| cut -f4)`
			`OUTPUT_TOKENS=$(echo "$USAGE_LINE1" \| cut -f5)`
			`MODEL_ID=$(echo "$USAGE_LINE1" \| cut -f6)`
			`AUTO_RATIO_PM=$(echo "$USAGE_LINE1" \| cut -f7)`
			`USER_TOKENS_EST=$(echo "$USAGE_LINE1" \| cut -f8)`
			`UNIQUE_FILES=$(echo "$USAGE_LINE1" \| cut -f9)`
			`TOTAL_EDITS=$(echo "$USAGE_LINE1" \| cut -f10)`
			`TEST_PASSES=$(echo "$USAGE_LINE1" \| cut -f11)`
			`TEST_FAILURES=$(echo "$USAGE_LINE1" \| cut -f12)`
			`HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" \| cut -f13)`
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00
			`# Cumulative input = all tokens that went through the model.`
			`# Cache reads are cheaper (~10-20% of full compute), so we weight them.`
			`# Full-cost tokens: input_tokens + cache_creation_input_tokens`
			`# Reduced-cost tokens: cache_read_input_tokens (weight at 0.1x for energy)`
			`FULL_COST_INPUT=$(( INPUT_TOKENS + CACHE_CREATION ))`
			`CACHE_READ_EFFECTIVE=$(( CACHE_READ / 10 ))`
			`CUMULATIVE_INPUT=$(( FULL_COST_INPUT + CACHE_READ_EFFECTIVE ))`
			`# Also track raw total for the log`
			`CUMULATIVE_INPUT_RAW=$(( INPUT_TOKENS + CACHE_CREATION + CACHE_READ ))`
			`else`
			`# Fallback: heuristic estimation`
			`TOKEN_SOURCE="heuristic"`
			`ESTIMATED_TOKENS=$((TRANSCRIPT_BYTES / 4))`
			`ASSISTANT_TURNS=$(grep -c '"role":\s*"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null \|\| echo 0)`

			`if [ "$ASSISTANT_TURNS" -gt 0 ]; then`
			`AVG_CONTEXT=$((ESTIMATED_TOKENS / 2))`
			`CUMULATIVE_INPUT=$((AVG_CONTEXT * ASSISTANT_TURNS))`
			`else`
			`CUMULATIVE_INPUT=$ESTIMATED_TOKENS`
			`fi`
			`CUMULATIVE_INPUT_RAW=$CUMULATIVE_INPUT`
			`OUTPUT_TOKENS=$((ESTIMATED_TOKENS / 20))`
			`CACHE_CREATION=0`
			`CACHE_READ=0`
			`INPUT_TOKENS=0`
Add social cost proxies to impact tracking hooks Extend pre-compact-snapshot.sh to extract 5 new per-conversation metrics from the transcript: automation ratio (deskilling proxy), model ID (monoculture tracking), test pass/fail counts (code quality proxy), file churn (edits per unique file), and public push detection (data pollution risk flag). Update show-impact.sh to display them. New plan: quantify-social-costs.md — roadmap for moving non-environmental cost categories from qualitative to proxy-measurable. Tasks 19-24 done. Task 25 (methodology update) pending. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:05:53 +00:00			`MODEL_ID=""`
			`AUTO_RATIO_PM=0`
			`USER_TOKENS_EST=0`
			`UNIQUE_FILES=0`
			`TOTAL_EDITS=0`
			`TEST_PASSES=0`
			`TEST_FAILURES=0`
			`HAS_PUBLIC_PUSH=0`
Log edited file list in impact hook for review delta analysis The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:11:30 +00:00			`EDITED_FILES_JSON="{}"`
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`fi`

			`# --- Cost estimates ---`
Fix pre-launch inconsistencies - Update energy values in hook scripts to match calibrated methodology (0.1/0.5 Wh per 1K tokens, was 0.003/0.015) - Fix license in toolkit README: CC0, not MIT - Update H2 sharing framing to match "beyond carbon" positioning 2026-03-16 10:49:58 +00:00			`# Energy: 0.1 Wh per 1K input tokens, 0.5 Wh per 1K output tokens, PUE 1.2`
			`# Calibrated against Google (Patterson et al., Aug 2025) and Jegham et al. (May 2025)`
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`# Using integer arithmetic in centiwatt-hours to avoid bc dependency`
Fix pre-launch inconsistencies - Update energy values in hook scripts to match calibrated methodology (0.1/0.5 Wh per 1K tokens, was 0.003/0.015) - Fix license in toolkit README: CC0, not MIT - Update H2 sharing framing to match "beyond carbon" positioning 2026-03-16 10:49:58 +00:00			`INPUT_CWH=$(( CUMULATIVE_INPUT * 100 / 10000 )) # 0.1 Wh/1K = 100 cWh/10K`
			`OUTPUT_CWH=$(( OUTPUT_TOKENS * 500 / 10000 )) # 0.5 Wh/1K = 500 cWh/10K`
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`ENERGY_CWH=$(( (INPUT_CWH + OUTPUT_CWH) * 12 / 10 )) # PUE 1.2`
			`ENERGY_WH=$(( ENERGY_CWH / 100 ))`

			`# CO2: 325g/kWh -> 0.325g/Wh -> 325 mg/Wh`
			`CO2_MG=$(( ENERGY_WH * 325 ))`
			`CO2_G=$(( CO2_MG / 1000 ))`

			`# Financial: $15/M input, $75/M output (in cents)`
			`# Use effective cumulative input (cache-weighted) for cost too`
			`COST_INPUT_CENTS=$(( CUMULATIVE_INPUT * 15 / 10000 )) # $15/M = 1.5c/100K`
			`COST_OUTPUT_CENTS=$(( OUTPUT_TOKENS * 75 / 10000 ))`
			`COST_CENTS=$(( COST_INPUT_CENTS + COST_OUTPUT_CENTS ))`
			`else`
			`TRANSCRIPT_BYTES=0`
			`TRANSCRIPT_LINES=0`
			`ASSISTANT_TURNS=0`
			`TOOL_USES=0`
			`CUMULATIVE_INPUT=0`
			`CUMULATIVE_INPUT_RAW=0`
			`OUTPUT_TOKENS=0`
			`CACHE_CREATION=0`
			`CACHE_READ=0`
			`ENERGY_WH=0`
			`CO2_G=0`
			`COST_CENTS=0`
			`TOKEN_SOURCE="none"`
Add social cost proxies to impact tracking hooks Extend pre-compact-snapshot.sh to extract 5 new per-conversation metrics from the transcript: automation ratio (deskilling proxy), model ID (monoculture tracking), test pass/fail counts (code quality proxy), file churn (edits per unique file), and public push detection (data pollution risk flag). Update show-impact.sh to display them. New plan: quantify-social-costs.md — roadmap for moving non-environmental cost categories from qualitative to proxy-measurable. Tasks 19-24 done. Task 25 (methodology update) pending. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:05:53 +00:00			`MODEL_ID=""`
			`AUTO_RATIO_PM=0`
			`USER_TOKENS_EST=0`
			`UNIQUE_FILES=0`
			`TOTAL_EDITS=0`
			`TEST_PASSES=0`
			`TEST_FAILURES=0`
			`HAS_PUBLIC_PUSH=0`
Log edited file list in impact hook for review delta analysis The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:11:30 +00:00			`EDITED_FILES_JSON="{}"`
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00			`fi`

			`# --- Write log entry ---`

Log edited file list in impact hook for review delta analysis The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-16 15:11:30 +00:00			`# Build log entry using Python to safely embed the edited_files JSON`
			`python3 -c "`
			`import json, sys`
			`entry = {`
			`'timestamp': sys.argv[1],`
			`'session_id': sys.argv[2],`
			`'trigger': sys.argv[3],`
			`'token_source': sys.argv[4],`
			`'transcript_bytes': int(sys.argv[5]),`
			`'transcript_lines': int(sys.argv[6]),`
			`'assistant_turns': int(sys.argv[7]),`
			`'tool_uses': int(sys.argv[8]),`
			`'cumulative_input_tokens': int(sys.argv[9]),`
			`'cumulative_input_raw': int(sys.argv[10]),`
			`'cache_creation_tokens': int(sys.argv[11]),`
			`'cache_read_tokens': int(sys.argv[12]),`
			`'output_tokens': int(sys.argv[13]),`
			`'energy_wh': int(sys.argv[14]),`
			`'co2_g': int(sys.argv[15]),`
			`'cost_cents': int(sys.argv[16]),`
			`'model_id': sys.argv[17],`
			`'automation_ratio_pm': int(sys.argv[18]),`
			`'user_tokens_est': int(sys.argv[19]),`
			`'unique_files_edited': int(sys.argv[20]),`
			`'total_file_edits': int(sys.argv[21]),`
			`'test_passes': int(sys.argv[22]),`
			`'test_failures': int(sys.argv[23]),`
			`'has_public_push': int(sys.argv[24]),`
			`'edited_files': json.loads(sys.argv[25]),`
			`}`
			`print(json.dumps(entry, separators=(',', ':')))`
			`" "$TIMESTAMP" "$SESSION_ID" "$TRIGGER" "$TOKEN_SOURCE" \`
			`"$TRANSCRIPT_BYTES" "$TRANSCRIPT_LINES" "$ASSISTANT_TURNS" "$TOOL_USES" \`
			`"$CUMULATIVE_INPUT" "$CUMULATIVE_INPUT_RAW" "$CACHE_CREATION" "$CACHE_READ" \`
			`"$OUTPUT_TOKENS" "$ENERGY_WH" "$CO2_G" "$COST_CENTS" \`
			`"$MODEL_ID" "$AUTO_RATIO_PM" "$USER_TOKENS_EST" \`
			`"$UNIQUE_FILES" "$TOTAL_EDITS" "$TEST_PASSES" "$TEST_FAILURES" \`
			`"$HAS_PUBLIC_PUSH" "$EDITED_FILES_JSON" >> "$LOG_FILE"`
Initial commit: AI conversation impact methodology and toolkit CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions. 2026-03-16 09:46:49 +00:00
			`exit 0`