Initial commit: AI conversation impact methodology and toolkit
CC0-licensed methodology for estimating the environmental and social costs of AI conversations (20+ categories), plus a reusable toolkit for automated impact tracking in Claude Code sessions.
This commit is contained in:
commit
0543a43816
27 changed files with 2439 additions and 0 deletions
82
.claude/hooks/annotate-impact.sh
Executable file
82
.claude/hooks/annotate-impact.sh
Executable file
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# annotate-impact.sh — Annotate the most recent impact log entry with
|
||||
# positive impact data.
|
||||
#
|
||||
# Usage: ./annotate-impact.sh
|
||||
# Interactive: prompts for value assessment of the last logged session.
|
||||
#
|
||||
# This adds value-side data to complement the cost data captured
|
||||
# automatically by the PreCompact hook.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(cd "$(dirname "$0")/../.." && pwd)}"
|
||||
LOG_FILE="$PROJECT_DIR/.claude/impact/impact-log.jsonl"
|
||||
|
||||
if [ ! -f "$LOG_FILE" ]; then
|
||||
echo "No impact log found. Run a conversation with compaction first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Show the last entry
|
||||
LAST=$(tail -1 "$LOG_FILE")
|
||||
echo "Last log entry:"
|
||||
echo "$LAST" | jq .
|
||||
echo ""
|
||||
|
||||
SESSION_ID=$(echo "$LAST" | jq -r '.session_id')
|
||||
TIMESTAMP=$(echo "$LAST" | jq -r '.timestamp')
|
||||
|
||||
echo "Annotating session $SESSION_ID (snapshot $TIMESTAMP)"
|
||||
echo ""
|
||||
|
||||
# Gather value data
|
||||
read -rp "Brief summary of value produced: " VALUE_SUMMARY
|
||||
|
||||
read -rp "Estimated reach (number of people affected) [1]: " REACH
|
||||
REACH=${REACH:-1}
|
||||
|
||||
echo "Counterfactual (would the user have achieved this without the conversation?):"
|
||||
echo " 1. Yes, same speed (no value added)"
|
||||
echo " 2. Yes, but slower"
|
||||
echo " 3. Yes, but lower quality"
|
||||
echo " 4. No (could not have done it alone)"
|
||||
read -rp "Choice [2]: " CF_CHOICE
|
||||
CF_CHOICE=${CF_CHOICE:-2}
|
||||
case "$CF_CHOICE" in
|
||||
1) COUNTERFACTUAL="same_speed" ;;
|
||||
2) COUNTERFACTUAL="slower" ;;
|
||||
3) COUNTERFACTUAL="lower_quality" ;;
|
||||
4) COUNTERFACTUAL="impossible" ;;
|
||||
*) COUNTERFACTUAL="unknown" ;;
|
||||
esac
|
||||
|
||||
echo "Net assessment:"
|
||||
echo " 1. Clearly net-positive"
|
||||
echo " 2. Probably net-positive"
|
||||
echo " 3. Uncertain"
|
||||
echo " 4. Probably net-negative"
|
||||
echo " 5. Clearly net-negative"
|
||||
read -rp "Choice [3]: " NET_CHOICE
|
||||
NET_CHOICE=${NET_CHOICE:-3}
|
||||
case "$NET_CHOICE" in
|
||||
1) NET_ASSESSMENT="clearly_positive" ;;
|
||||
2) NET_ASSESSMENT="probably_positive" ;;
|
||||
3) NET_ASSESSMENT="uncertain" ;;
|
||||
4) NET_ASSESSMENT="probably_negative" ;;
|
||||
5) NET_ASSESSMENT="clearly_negative" ;;
|
||||
*) NET_ASSESSMENT="unknown" ;;
|
||||
esac
|
||||
|
||||
# Write annotation as a separate log entry linked by session_id
|
||||
ANNOTATION_FILE="$PROJECT_DIR/.claude/impact/annotations.jsonl"
|
||||
|
||||
ANNOT_TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
cat >> "$ANNOTATION_FILE" <<EOF
|
||||
{"timestamp":"$ANNOT_TIMESTAMP","snapshot_timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","value_summary":"$VALUE_SUMMARY","estimated_reach":$REACH,"counterfactual":"$COUNTERFACTUAL","net_assessment":"$NET_ASSESSMENT"}
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
echo "Annotation saved to $ANNOTATION_FILE"
|
||||
137
.claude/hooks/pre-compact-snapshot.sh
Executable file
137
.claude/hooks/pre-compact-snapshot.sh
Executable file
|
|
@ -0,0 +1,137 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# pre-compact-snapshot.sh — Snapshot impact metrics before context compaction.
|
||||
#
|
||||
# Runs as a PreCompact hook. Reads the conversation transcript, extracts
|
||||
# actual token counts when available (falls back to heuristic estimates),
|
||||
# and appends a timestamped entry to the impact log.
|
||||
#
|
||||
# Input: JSON on stdin with fields: trigger, session_id, transcript_path, cwd
|
||||
# Output: nothing on stdout (hook succeeds silently). Logs to impact-log.jsonl.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
HOOK_INPUT=$(cat)
|
||||
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(echo "$HOOK_INPUT" | jq -r '.cwd')}"
|
||||
TRANSCRIPT_PATH=$(echo "$HOOK_INPUT" | jq -r '.transcript_path')
|
||||
SESSION_ID=$(echo "$HOOK_INPUT" | jq -r '.session_id')
|
||||
TRIGGER=$(echo "$HOOK_INPUT" | jq -r '.trigger')
|
||||
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
LOG_DIR="$PROJECT_DIR/.claude/impact"
|
||||
LOG_FILE="$LOG_DIR/impact-log.jsonl"
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# --- Extract or estimate metrics from transcript ---
|
||||
|
||||
if [ -f "$TRANSCRIPT_PATH" ]; then
|
||||
TRANSCRIPT_BYTES=$(wc -c < "$TRANSCRIPT_PATH")
|
||||
TRANSCRIPT_LINES=$(wc -l < "$TRANSCRIPT_PATH")
|
||||
|
||||
# Count tool uses
|
||||
TOOL_USES=$(grep -c '"tool_use"' "$TRANSCRIPT_PATH" 2>/dev/null || echo 0)
|
||||
|
||||
# Try to extract actual token counts from usage fields in the transcript.
|
||||
# The transcript contains .message.usage with input_tokens,
|
||||
# cache_creation_input_tokens, cache_read_input_tokens, output_tokens.
|
||||
USAGE_DATA=$(python3 -c "
|
||||
import json, sys
|
||||
input_tokens = 0
|
||||
cache_creation = 0
|
||||
cache_read = 0
|
||||
output_tokens = 0
|
||||
turns = 0
|
||||
with open(sys.argv[1]) as f:
|
||||
for line in f:
|
||||
try:
|
||||
d = json.loads(line.strip())
|
||||
u = d.get('message', {}).get('usage')
|
||||
if u and 'input_tokens' in u:
|
||||
turns += 1
|
||||
input_tokens += u.get('input_tokens', 0)
|
||||
cache_creation += u.get('cache_creation_input_tokens', 0)
|
||||
cache_read += u.get('cache_read_input_tokens', 0)
|
||||
output_tokens += u.get('output_tokens', 0)
|
||||
except Exception:
|
||||
pass
|
||||
# Print as tab-separated for easy shell parsing
|
||||
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}')
|
||||
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||
# Actual token counts available
|
||||
TOKEN_SOURCE="actual"
|
||||
ASSISTANT_TURNS=$(echo "$USAGE_DATA" | cut -f1)
|
||||
INPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f2)
|
||||
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
|
||||
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
|
||||
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
|
||||
|
||||
# Cumulative input = all tokens that went through the model.
|
||||
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
||||
# Full-cost tokens: input_tokens + cache_creation_input_tokens
|
||||
# Reduced-cost tokens: cache_read_input_tokens (weight at 0.1x for energy)
|
||||
FULL_COST_INPUT=$(( INPUT_TOKENS + CACHE_CREATION ))
|
||||
CACHE_READ_EFFECTIVE=$(( CACHE_READ / 10 ))
|
||||
CUMULATIVE_INPUT=$(( FULL_COST_INPUT + CACHE_READ_EFFECTIVE ))
|
||||
# Also track raw total for the log
|
||||
CUMULATIVE_INPUT_RAW=$(( INPUT_TOKENS + CACHE_CREATION + CACHE_READ ))
|
||||
else
|
||||
# Fallback: heuristic estimation
|
||||
TOKEN_SOURCE="heuristic"
|
||||
ESTIMATED_TOKENS=$((TRANSCRIPT_BYTES / 4))
|
||||
ASSISTANT_TURNS=$(grep -c '"role":\s*"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null || echo 0)
|
||||
|
||||
if [ "$ASSISTANT_TURNS" -gt 0 ]; then
|
||||
AVG_CONTEXT=$((ESTIMATED_TOKENS / 2))
|
||||
CUMULATIVE_INPUT=$((AVG_CONTEXT * ASSISTANT_TURNS))
|
||||
else
|
||||
CUMULATIVE_INPUT=$ESTIMATED_TOKENS
|
||||
fi
|
||||
CUMULATIVE_INPUT_RAW=$CUMULATIVE_INPUT
|
||||
OUTPUT_TOKENS=$((ESTIMATED_TOKENS / 20))
|
||||
CACHE_CREATION=0
|
||||
CACHE_READ=0
|
||||
INPUT_TOKENS=0
|
||||
fi
|
||||
|
||||
# --- Cost estimates ---
|
||||
# Energy: 0.003 Wh per 1K input tokens, 0.015 Wh per 1K output tokens, PUE 1.2
|
||||
# Using integer arithmetic in centiwatt-hours to avoid bc dependency
|
||||
INPUT_CWH=$(( CUMULATIVE_INPUT * 3 / 10000 )) # 0.003 Wh/1K = 3 cWh/10K
|
||||
OUTPUT_CWH=$(( OUTPUT_TOKENS * 15 / 10000 )) # 0.015 Wh/1K = 15 cWh/10K
|
||||
ENERGY_CWH=$(( (INPUT_CWH + OUTPUT_CWH) * 12 / 10 )) # PUE 1.2
|
||||
ENERGY_WH=$(( ENERGY_CWH / 100 ))
|
||||
|
||||
# CO2: 325g/kWh -> 0.325g/Wh -> 325 mg/Wh
|
||||
CO2_MG=$(( ENERGY_WH * 325 ))
|
||||
CO2_G=$(( CO2_MG / 1000 ))
|
||||
|
||||
# Financial: $15/M input, $75/M output (in cents)
|
||||
# Use effective cumulative input (cache-weighted) for cost too
|
||||
COST_INPUT_CENTS=$(( CUMULATIVE_INPUT * 15 / 10000 )) # $15/M = 1.5c/100K
|
||||
COST_OUTPUT_CENTS=$(( OUTPUT_TOKENS * 75 / 10000 ))
|
||||
COST_CENTS=$(( COST_INPUT_CENTS + COST_OUTPUT_CENTS ))
|
||||
else
|
||||
TRANSCRIPT_BYTES=0
|
||||
TRANSCRIPT_LINES=0
|
||||
ASSISTANT_TURNS=0
|
||||
TOOL_USES=0
|
||||
CUMULATIVE_INPUT=0
|
||||
CUMULATIVE_INPUT_RAW=0
|
||||
OUTPUT_TOKENS=0
|
||||
CACHE_CREATION=0
|
||||
CACHE_READ=0
|
||||
ENERGY_WH=0
|
||||
CO2_G=0
|
||||
COST_CENTS=0
|
||||
TOKEN_SOURCE="none"
|
||||
fi
|
||||
|
||||
# --- Write log entry ---
|
||||
|
||||
cat >> "$LOG_FILE" <<EOF
|
||||
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS}
|
||||
EOF
|
||||
|
||||
exit 0
|
||||
87
.claude/hooks/show-impact.sh
Executable file
87
.claude/hooks/show-impact.sh
Executable file
|
|
@ -0,0 +1,87 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# show-impact.sh — Display accumulated impact metrics from the log.
|
||||
#
|
||||
# Usage: ./show-impact.sh [session_id]
|
||||
# Without arguments: shows summary across all sessions.
|
||||
# With session_id: shows entries for that session only.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(cd "$(dirname "$0")/../.." && pwd)}"
|
||||
LOG_FILE="$PROJECT_DIR/.claude/impact/impact-log.jsonl"
|
||||
|
||||
if [ ! -f "$LOG_FILE" ]; then
|
||||
echo "No impact log found at $LOG_FILE"
|
||||
echo "The PreCompact hook will create it on first context compaction."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
FILTER="${1:-.}"
|
||||
|
||||
echo "=== Impact Log ==="
|
||||
echo ""
|
||||
|
||||
while IFS= read -r line; do
|
||||
sid=$(echo "$line" | jq -r '.session_id')
|
||||
if ! echo "$sid" | grep -q "$FILTER"; then
|
||||
continue
|
||||
fi
|
||||
|
||||
ts=$(echo "$line" | jq -r '.timestamp')
|
||||
trigger=$(echo "$line" | jq -r '.trigger')
|
||||
turns=$(echo "$line" | jq -r '.assistant_turns')
|
||||
tools=$(echo "$line" | jq -r '.tool_uses')
|
||||
source=$(echo "$line" | jq -r '.token_source // "heuristic"')
|
||||
cum_input=$(echo "$line" | jq -r '.cumulative_input_tokens')
|
||||
# Support both old field name and new field name
|
||||
output=$(echo "$line" | jq -r '.output_tokens // .estimated_output_tokens')
|
||||
cache_create=$(echo "$line" | jq -r '.cache_creation_tokens // 0')
|
||||
cache_read=$(echo "$line" | jq -r '.cache_read_tokens // 0')
|
||||
energy=$(echo "$line" | jq -r '.energy_wh')
|
||||
co2=$(echo "$line" | jq -r '.co2_g')
|
||||
cost=$(echo "$line" | jq -r '.cost_cents')
|
||||
|
||||
printf "%s [%s] session=%s\n" "$ts" "$trigger" "${sid:0:12}..."
|
||||
printf " Turns: %s Tool uses: %s Token source: %s\n" "$turns" "$tools" "$source"
|
||||
printf " Input tokens (cache-weighted): %s Output tokens: %s\n" "$cum_input" "$output"
|
||||
if [ "$cache_create" != "0" ] || [ "$cache_read" != "0" ]; then
|
||||
printf " Cache: %s created, %s read\n" "$cache_create" "$cache_read"
|
||||
fi
|
||||
LC_NUMERIC=C printf " Energy: ~%s Wh CO2: ~%sg Cost: ~\$%.2f\n" "$energy" "$co2" "$(echo "$cost / 100" | bc -l 2>/dev/null || echo "$cost cents")"
|
||||
echo ""
|
||||
done < "$LOG_FILE"
|
||||
|
||||
# Totals
|
||||
TOTAL_ENERGY=$(jq -s '[.[].energy_wh] | add' "$LOG_FILE")
|
||||
TOTAL_CO2=$(jq -s '[.[].co2_g] | add' "$LOG_FILE")
|
||||
TOTAL_COST=$(jq -s '[.[].cost_cents] | add' "$LOG_FILE")
|
||||
TOTAL_ENTRIES=$(wc -l < "$LOG_FILE")
|
||||
|
||||
echo "=== Totals ($TOTAL_ENTRIES snapshots) ==="
|
||||
LC_NUMERIC=C printf " Energy: ~%s Wh CO2: ~%sg Cost: ~\$%.2f\n" \
|
||||
"$TOTAL_ENERGY" "$TOTAL_CO2" \
|
||||
"$(echo "$TOTAL_COST / 100" | bc -l 2>/dev/null || echo "$TOTAL_COST cents")"
|
||||
|
||||
# Show annotations if they exist
|
||||
ANNOT_FILE="$PROJECT_DIR/.claude/impact/annotations.jsonl"
|
||||
if [ -f "$ANNOT_FILE" ] && [ -s "$ANNOT_FILE" ]; then
|
||||
echo ""
|
||||
echo "=== Value Annotations ==="
|
||||
echo ""
|
||||
while IFS= read -r line; do
|
||||
sid=$(echo "$line" | jq -r '.session_id')
|
||||
if ! echo "$sid" | grep -q "$FILTER"; then
|
||||
continue
|
||||
fi
|
||||
ts=$(echo "$line" | jq -r '.timestamp')
|
||||
summary=$(echo "$line" | jq -r '.value_summary')
|
||||
reach=$(echo "$line" | jq -r '.estimated_reach')
|
||||
cf=$(echo "$line" | jq -r '.counterfactual')
|
||||
net=$(echo "$line" | jq -r '.net_assessment')
|
||||
printf "%s session=%s\n" "$ts" "${sid:0:12}..."
|
||||
printf " Value: %s\n" "$summary"
|
||||
printf " Reach: %s Counterfactual: %s Net: %s\n" "$reach" "$cf" "$net"
|
||||
echo ""
|
||||
done < "$ANNOT_FILE"
|
||||
fi
|
||||
Loading…
Add table
Add a link
Reference in a new issue