Add social cost proxies to impact tracking hooks
Extend pre-compact-snapshot.sh to extract 5 new per-conversation metrics from the transcript: automation ratio (deskilling proxy), model ID (monoculture tracking), test pass/fail counts (code quality proxy), file churn (edits per unique file), and public push detection (data pollution risk flag). Update show-impact.sh to display them. New plan: quantify-social-costs.md — roadmap for moving non-environmental cost categories from qualitative to proxy-measurable. Tasks 19-24 done. Task 25 (methodology update) pending. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e6e0bf4616
commit
af6062c1f9
8 changed files with 554 additions and 25 deletions
|
|
@ -35,27 +35,102 @@ if [ -f "$TRANSCRIPT_PATH" ]; then
|
||||||
# The transcript contains .message.usage with input_tokens,
|
# The transcript contains .message.usage with input_tokens,
|
||||||
# cache_creation_input_tokens, cache_read_input_tokens, output_tokens.
|
# cache_creation_input_tokens, cache_read_input_tokens, output_tokens.
|
||||||
USAGE_DATA=$(python3 -c "
|
USAGE_DATA=$(python3 -c "
|
||||||
import json, sys
|
import json, sys, re
|
||||||
|
|
||||||
input_tokens = 0
|
input_tokens = 0
|
||||||
cache_creation = 0
|
cache_creation = 0
|
||||||
cache_read = 0
|
cache_read = 0
|
||||||
output_tokens = 0
|
output_tokens = 0
|
||||||
turns = 0
|
turns = 0
|
||||||
|
model_id = ''
|
||||||
|
user_bytes = 0
|
||||||
|
edited_files = {} # file_path -> edit count
|
||||||
|
test_passes = 0
|
||||||
|
test_failures = 0
|
||||||
|
has_public_push = 0
|
||||||
|
|
||||||
with open(sys.argv[1]) as f:
|
with open(sys.argv[1]) as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
try:
|
try:
|
||||||
d = json.loads(line.strip())
|
d = json.loads(line.strip())
|
||||||
u = d.get('message', {}).get('usage')
|
msg = d.get('message', {})
|
||||||
|
role = msg.get('role')
|
||||||
|
content = msg.get('content', '')
|
||||||
|
|
||||||
|
# Track user message size (proxy for user contribution)
|
||||||
|
if role == 'user':
|
||||||
|
if isinstance(content, str):
|
||||||
|
user_bytes += len(content.encode('utf-8', errors='replace'))
|
||||||
|
elif isinstance(content, list):
|
||||||
|
for block in content:
|
||||||
|
if isinstance(block, dict) and block.get('type') == 'text':
|
||||||
|
user_bytes += len(block.get('text', '').encode('utf-8', errors='replace'))
|
||||||
|
|
||||||
|
# Extract usage data and model from assistant messages
|
||||||
|
if role == 'assistant':
|
||||||
|
m = msg.get('model', '')
|
||||||
|
if m:
|
||||||
|
model_id = m
|
||||||
|
|
||||||
|
u = msg.get('usage')
|
||||||
if u and 'input_tokens' in u:
|
if u and 'input_tokens' in u:
|
||||||
turns += 1
|
turns += 1
|
||||||
input_tokens += u.get('input_tokens', 0)
|
input_tokens += u.get('input_tokens', 0)
|
||||||
cache_creation += u.get('cache_creation_input_tokens', 0)
|
cache_creation += u.get('cache_creation_input_tokens', 0)
|
||||||
cache_read += u.get('cache_read_input_tokens', 0)
|
cache_read += u.get('cache_read_input_tokens', 0)
|
||||||
output_tokens += u.get('output_tokens', 0)
|
output_tokens += u.get('output_tokens', 0)
|
||||||
|
|
||||||
|
# Parse tool use blocks
|
||||||
|
if isinstance(content, list):
|
||||||
|
for block in content:
|
||||||
|
if not isinstance(block, dict) or block.get('type') != 'tool_use':
|
||||||
|
continue
|
||||||
|
name = block.get('name', '')
|
||||||
|
inp = block.get('input', {})
|
||||||
|
|
||||||
|
# File churn: count Edit/Write per file
|
||||||
|
if name in ('Edit', 'Write'):
|
||||||
|
fp = inp.get('file_path', '')
|
||||||
|
if fp:
|
||||||
|
edited_files[fp] = edited_files.get(fp, 0) + 1
|
||||||
|
|
||||||
|
# Public push detection
|
||||||
|
if name == 'Bash':
|
||||||
|
cmd = inp.get('command', '')
|
||||||
|
if re.search(r'git\s+push', cmd):
|
||||||
|
has_public_push = 1
|
||||||
|
|
||||||
|
# Test results from tool_result blocks (user role, tool_result type)
|
||||||
|
if role == 'user' and isinstance(content, list):
|
||||||
|
for block in content:
|
||||||
|
if isinstance(block, dict) and block.get('type') == 'tool_result':
|
||||||
|
text = ''
|
||||||
|
rc = block.get('content', '')
|
||||||
|
if isinstance(rc, str):
|
||||||
|
text = rc
|
||||||
|
elif isinstance(rc, list):
|
||||||
|
text = ' '.join(b.get('text', '') for b in rc if isinstance(b, dict))
|
||||||
|
# Detect test outcomes from common test runner output
|
||||||
|
if re.search(r'(\d+)\s+(tests?\s+)?passed', text, re.I):
|
||||||
|
test_passes += 1
|
||||||
|
if re.search(r'(\d+)\s+(tests?\s+)?failed|FAIL[ED]?|ERROR', text, re.I):
|
||||||
|
test_failures += 1
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
# Print as tab-separated for easy shell parsing
|
|
||||||
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}')
|
user_tokens_est = user_bytes // 4 # rough byte-to-token estimate
|
||||||
|
unique_files = len(edited_files)
|
||||||
|
total_edits = sum(edited_files.values())
|
||||||
|
churn = round(total_edits / unique_files, 2) if unique_files > 0 else 0
|
||||||
|
|
||||||
|
# automation_ratio: 0 = all human, 1 = all AI (as permille for integer arithmetic)
|
||||||
|
if output_tokens + user_tokens_est > 0:
|
||||||
|
auto_ratio_pm = output_tokens * 1000 // (output_tokens + user_tokens_est)
|
||||||
|
else:
|
||||||
|
auto_ratio_pm = 0
|
||||||
|
|
||||||
|
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
|
||||||
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
||||||
|
|
||||||
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||||
|
|
@ -66,6 +141,14 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
|
||||||
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
|
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
|
||||||
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
|
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
|
||||||
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
|
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
|
||||||
|
MODEL_ID=$(echo "$USAGE_DATA" | cut -f6)
|
||||||
|
AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7)
|
||||||
|
USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8)
|
||||||
|
UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9)
|
||||||
|
TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10)
|
||||||
|
TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11)
|
||||||
|
TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12)
|
||||||
|
HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13)
|
||||||
|
|
||||||
# Cumulative input = all tokens that went through the model.
|
# Cumulative input = all tokens that went through the model.
|
||||||
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
||||||
|
|
@ -93,6 +176,14 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
|
||||||
CACHE_CREATION=0
|
CACHE_CREATION=0
|
||||||
CACHE_READ=0
|
CACHE_READ=0
|
||||||
INPUT_TOKENS=0
|
INPUT_TOKENS=0
|
||||||
|
MODEL_ID=""
|
||||||
|
AUTO_RATIO_PM=0
|
||||||
|
USER_TOKENS_EST=0
|
||||||
|
UNIQUE_FILES=0
|
||||||
|
TOTAL_EDITS=0
|
||||||
|
TEST_PASSES=0
|
||||||
|
TEST_FAILURES=0
|
||||||
|
HAS_PUBLIC_PUSH=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Cost estimates ---
|
# --- Cost estimates ---
|
||||||
|
|
@ -127,12 +218,20 @@ else
|
||||||
CO2_G=0
|
CO2_G=0
|
||||||
COST_CENTS=0
|
COST_CENTS=0
|
||||||
TOKEN_SOURCE="none"
|
TOKEN_SOURCE="none"
|
||||||
|
MODEL_ID=""
|
||||||
|
AUTO_RATIO_PM=0
|
||||||
|
USER_TOKENS_EST=0
|
||||||
|
UNIQUE_FILES=0
|
||||||
|
TOTAL_EDITS=0
|
||||||
|
TEST_PASSES=0
|
||||||
|
TEST_FAILURES=0
|
||||||
|
HAS_PUBLIC_PUSH=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Write log entry ---
|
# --- Write log entry ---
|
||||||
|
|
||||||
cat >> "$LOG_FILE" <<EOF
|
cat >> "$LOG_FILE" <<EOF
|
||||||
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS}
|
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS,"model_id":"$MODEL_ID","automation_ratio_pm":$AUTO_RATIO_PM,"user_tokens_est":$USER_TOKENS_EST,"unique_files_edited":$UNIQUE_FILES,"total_file_edits":$TOTAL_EDITS,"test_passes":$TEST_PASSES,"test_failures":$TEST_FAILURES,"has_public_push":$HAS_PUBLIC_PUSH}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,35 @@ while IFS= read -r line; do
|
||||||
printf " Cache: %s created, %s read\n" "$cache_create" "$cache_read"
|
printf " Cache: %s created, %s read\n" "$cache_create" "$cache_read"
|
||||||
fi
|
fi
|
||||||
LC_NUMERIC=C printf " Energy: ~%s Wh CO2: ~%sg Cost: ~\$%.2f\n" "$energy" "$co2" "$(echo "$cost / 100" | bc -l 2>/dev/null || echo "$cost cents")"
|
LC_NUMERIC=C printf " Energy: ~%s Wh CO2: ~%sg Cost: ~\$%.2f\n" "$energy" "$co2" "$(echo "$cost / 100" | bc -l 2>/dev/null || echo "$cost cents")"
|
||||||
|
|
||||||
|
# Social cost proxies (if present in log entry)
|
||||||
|
model=$(echo "$line" | jq -r '.model_id // empty')
|
||||||
|
auto_pm=$(echo "$line" | jq -r '.automation_ratio_pm // empty')
|
||||||
|
user_tok=$(echo "$line" | jq -r '.user_tokens_est // empty')
|
||||||
|
files_ed=$(echo "$line" | jq -r '.unique_files_edited // empty')
|
||||||
|
total_ed=$(echo "$line" | jq -r '.total_file_edits // empty')
|
||||||
|
t_pass=$(echo "$line" | jq -r '.test_passes // empty')
|
||||||
|
t_fail=$(echo "$line" | jq -r '.test_failures // empty')
|
||||||
|
pub_push=$(echo "$line" | jq -r '.has_public_push // empty')
|
||||||
|
|
||||||
|
if [ -n "$model" ]; then
|
||||||
|
printf " Model: %s\n" "$model"
|
||||||
|
fi
|
||||||
|
if [ -n "$auto_pm" ] && [ "$auto_pm" != "0" ]; then
|
||||||
|
auto_pct=$(( auto_pm / 10 ))
|
||||||
|
auto_dec=$(( auto_pm % 10 ))
|
||||||
|
printf " Automation ratio: %d.%d%% (user ~%s tokens, AI ~%s tokens)\n" \
|
||||||
|
"$auto_pct" "$auto_dec" "$user_tok" "$output"
|
||||||
|
fi
|
||||||
|
if [ -n "$files_ed" ] && [ "$files_ed" != "0" ]; then
|
||||||
|
printf " File churn: %s edits across %s files\n" "$total_ed" "$files_ed"
|
||||||
|
fi
|
||||||
|
if [ -n "$t_pass" ] && [ -n "$t_fail" ] && { [ "$t_pass" != "0" ] || [ "$t_fail" != "0" ]; }; then
|
||||||
|
printf " Tests: %s passed, %s failed\n" "$t_pass" "$t_fail"
|
||||||
|
fi
|
||||||
|
if [ "$pub_push" = "1" ]; then
|
||||||
|
printf " Public push: yes (data pollution risk)\n"
|
||||||
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
done < "$LOG_FILE"
|
done < "$LOG_FILE"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,11 @@ A PreCompact hook that runs before each context compaction, capturing:
|
||||||
- Energy consumption estimate (Wh)
|
- Energy consumption estimate (Wh)
|
||||||
- CO2 emissions estimate (grams)
|
- CO2 emissions estimate (grams)
|
||||||
- Financial cost estimate (USD)
|
- Financial cost estimate (USD)
|
||||||
|
- Model ID
|
||||||
|
- Automation ratio (AI output vs. user input — deskilling proxy)
|
||||||
|
- File churn (edits per file — code quality proxy)
|
||||||
|
- Test pass/fail counts
|
||||||
|
- Public push detection (data pollution risk flag)
|
||||||
|
|
||||||
Data is logged to a JSONL file for analysis over time.
|
Data is logged to a JSONL file for analysis over time.
|
||||||
|
|
||||||
|
|
@ -63,8 +68,10 @@ consider these complementary tools:
|
||||||
- **[Hugging Face AI Energy Score](https://huggingface.github.io/AIEnergyScore/)** —
|
- **[Hugging Face AI Energy Score](https://huggingface.github.io/AIEnergyScore/)** —
|
||||||
Benchmarks model energy efficiency. Useful for choosing between models.
|
Benchmarks model energy efficiency. Useful for choosing between models.
|
||||||
|
|
||||||
These tools focus on environmental metrics only. This toolkit and the
|
These tools focus on environmental metrics only. This toolkit also
|
||||||
methodology also cover financial, social, epistemic, and political costs.
|
tracks financial cost and proxy metrics for social costs (automation
|
||||||
|
ratio, file churn, test outcomes, public push detection). The
|
||||||
|
accompanying methodology covers additional dimensions in depth.
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,27 +35,102 @@ if [ -f "$TRANSCRIPT_PATH" ]; then
|
||||||
# The transcript contains .message.usage with input_tokens,
|
# The transcript contains .message.usage with input_tokens,
|
||||||
# cache_creation_input_tokens, cache_read_input_tokens, output_tokens.
|
# cache_creation_input_tokens, cache_read_input_tokens, output_tokens.
|
||||||
USAGE_DATA=$(python3 -c "
|
USAGE_DATA=$(python3 -c "
|
||||||
import json, sys
|
import json, sys, re
|
||||||
|
|
||||||
input_tokens = 0
|
input_tokens = 0
|
||||||
cache_creation = 0
|
cache_creation = 0
|
||||||
cache_read = 0
|
cache_read = 0
|
||||||
output_tokens = 0
|
output_tokens = 0
|
||||||
turns = 0
|
turns = 0
|
||||||
|
model_id = ''
|
||||||
|
user_bytes = 0
|
||||||
|
edited_files = {} # file_path -> edit count
|
||||||
|
test_passes = 0
|
||||||
|
test_failures = 0
|
||||||
|
has_public_push = 0
|
||||||
|
|
||||||
with open(sys.argv[1]) as f:
|
with open(sys.argv[1]) as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
try:
|
try:
|
||||||
d = json.loads(line.strip())
|
d = json.loads(line.strip())
|
||||||
u = d.get('message', {}).get('usage')
|
msg = d.get('message', {})
|
||||||
|
role = msg.get('role')
|
||||||
|
content = msg.get('content', '')
|
||||||
|
|
||||||
|
# Track user message size (proxy for user contribution)
|
||||||
|
if role == 'user':
|
||||||
|
if isinstance(content, str):
|
||||||
|
user_bytes += len(content.encode('utf-8', errors='replace'))
|
||||||
|
elif isinstance(content, list):
|
||||||
|
for block in content:
|
||||||
|
if isinstance(block, dict) and block.get('type') == 'text':
|
||||||
|
user_bytes += len(block.get('text', '').encode('utf-8', errors='replace'))
|
||||||
|
|
||||||
|
# Extract usage data and model from assistant messages
|
||||||
|
if role == 'assistant':
|
||||||
|
m = msg.get('model', '')
|
||||||
|
if m:
|
||||||
|
model_id = m
|
||||||
|
|
||||||
|
u = msg.get('usage')
|
||||||
if u and 'input_tokens' in u:
|
if u and 'input_tokens' in u:
|
||||||
turns += 1
|
turns += 1
|
||||||
input_tokens += u.get('input_tokens', 0)
|
input_tokens += u.get('input_tokens', 0)
|
||||||
cache_creation += u.get('cache_creation_input_tokens', 0)
|
cache_creation += u.get('cache_creation_input_tokens', 0)
|
||||||
cache_read += u.get('cache_read_input_tokens', 0)
|
cache_read += u.get('cache_read_input_tokens', 0)
|
||||||
output_tokens += u.get('output_tokens', 0)
|
output_tokens += u.get('output_tokens', 0)
|
||||||
|
|
||||||
|
# Parse tool use blocks
|
||||||
|
if isinstance(content, list):
|
||||||
|
for block in content:
|
||||||
|
if not isinstance(block, dict) or block.get('type') != 'tool_use':
|
||||||
|
continue
|
||||||
|
name = block.get('name', '')
|
||||||
|
inp = block.get('input', {})
|
||||||
|
|
||||||
|
# File churn: count Edit/Write per file
|
||||||
|
if name in ('Edit', 'Write'):
|
||||||
|
fp = inp.get('file_path', '')
|
||||||
|
if fp:
|
||||||
|
edited_files[fp] = edited_files.get(fp, 0) + 1
|
||||||
|
|
||||||
|
# Public push detection
|
||||||
|
if name == 'Bash':
|
||||||
|
cmd = inp.get('command', '')
|
||||||
|
if re.search(r'git\s+push', cmd):
|
||||||
|
has_public_push = 1
|
||||||
|
|
||||||
|
# Test results from tool_result blocks (user role, tool_result type)
|
||||||
|
if role == 'user' and isinstance(content, list):
|
||||||
|
for block in content:
|
||||||
|
if isinstance(block, dict) and block.get('type') == 'tool_result':
|
||||||
|
text = ''
|
||||||
|
rc = block.get('content', '')
|
||||||
|
if isinstance(rc, str):
|
||||||
|
text = rc
|
||||||
|
elif isinstance(rc, list):
|
||||||
|
text = ' '.join(b.get('text', '') for b in rc if isinstance(b, dict))
|
||||||
|
# Detect test outcomes from common test runner output
|
||||||
|
if re.search(r'(\d+)\s+(tests?\s+)?passed', text, re.I):
|
||||||
|
test_passes += 1
|
||||||
|
if re.search(r'(\d+)\s+(tests?\s+)?failed|FAIL[ED]?|ERROR', text, re.I):
|
||||||
|
test_failures += 1
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
# Print as tab-separated for easy shell parsing
|
|
||||||
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}')
|
user_tokens_est = user_bytes // 4 # rough byte-to-token estimate
|
||||||
|
unique_files = len(edited_files)
|
||||||
|
total_edits = sum(edited_files.values())
|
||||||
|
churn = round(total_edits / unique_files, 2) if unique_files > 0 else 0
|
||||||
|
|
||||||
|
# automation_ratio: 0 = all human, 1 = all AI (as permille for integer arithmetic)
|
||||||
|
if output_tokens + user_tokens_est > 0:
|
||||||
|
auto_ratio_pm = output_tokens * 1000 // (output_tokens + user_tokens_est)
|
||||||
|
else:
|
||||||
|
auto_ratio_pm = 0
|
||||||
|
|
||||||
|
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
|
||||||
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
||||||
|
|
||||||
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||||
|
|
@ -66,6 +141,14 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
|
||||||
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
|
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
|
||||||
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
|
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
|
||||||
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
|
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
|
||||||
|
MODEL_ID=$(echo "$USAGE_DATA" | cut -f6)
|
||||||
|
AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7)
|
||||||
|
USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8)
|
||||||
|
UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9)
|
||||||
|
TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10)
|
||||||
|
TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11)
|
||||||
|
TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12)
|
||||||
|
HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13)
|
||||||
|
|
||||||
# Cumulative input = all tokens that went through the model.
|
# Cumulative input = all tokens that went through the model.
|
||||||
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
||||||
|
|
@ -93,6 +176,14 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
|
||||||
CACHE_CREATION=0
|
CACHE_CREATION=0
|
||||||
CACHE_READ=0
|
CACHE_READ=0
|
||||||
INPUT_TOKENS=0
|
INPUT_TOKENS=0
|
||||||
|
MODEL_ID=""
|
||||||
|
AUTO_RATIO_PM=0
|
||||||
|
USER_TOKENS_EST=0
|
||||||
|
UNIQUE_FILES=0
|
||||||
|
TOTAL_EDITS=0
|
||||||
|
TEST_PASSES=0
|
||||||
|
TEST_FAILURES=0
|
||||||
|
HAS_PUBLIC_PUSH=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Cost estimates ---
|
# --- Cost estimates ---
|
||||||
|
|
@ -127,12 +218,20 @@ else
|
||||||
CO2_G=0
|
CO2_G=0
|
||||||
COST_CENTS=0
|
COST_CENTS=0
|
||||||
TOKEN_SOURCE="none"
|
TOKEN_SOURCE="none"
|
||||||
|
MODEL_ID=""
|
||||||
|
AUTO_RATIO_PM=0
|
||||||
|
USER_TOKENS_EST=0
|
||||||
|
UNIQUE_FILES=0
|
||||||
|
TOTAL_EDITS=0
|
||||||
|
TEST_PASSES=0
|
||||||
|
TEST_FAILURES=0
|
||||||
|
HAS_PUBLIC_PUSH=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Write log entry ---
|
# --- Write log entry ---
|
||||||
|
|
||||||
cat >> "$LOG_FILE" <<EOF
|
cat >> "$LOG_FILE" <<EOF
|
||||||
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS}
|
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS,"model_id":"$MODEL_ID","automation_ratio_pm":$AUTO_RATIO_PM,"user_tokens_est":$USER_TOKENS_EST,"unique_files_edited":$UNIQUE_FILES,"total_file_edits":$TOTAL_EDITS,"test_passes":$TEST_PASSES,"test_failures":$TEST_FAILURES,"has_public_push":$HAS_PUBLIC_PUSH}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,35 @@ while IFS= read -r line; do
|
||||||
printf " Cache: %s created, %s read\n" "$cache_create" "$cache_read"
|
printf " Cache: %s created, %s read\n" "$cache_create" "$cache_read"
|
||||||
fi
|
fi
|
||||||
LC_NUMERIC=C printf " Energy: ~%s Wh CO2: ~%sg Cost: ~\$%.2f\n" "$energy" "$co2" "$(echo "$cost / 100" | bc -l 2>/dev/null || echo "$cost cents")"
|
LC_NUMERIC=C printf " Energy: ~%s Wh CO2: ~%sg Cost: ~\$%.2f\n" "$energy" "$co2" "$(echo "$cost / 100" | bc -l 2>/dev/null || echo "$cost cents")"
|
||||||
|
|
||||||
|
# Social cost proxies (if present in log entry)
|
||||||
|
model=$(echo "$line" | jq -r '.model_id // empty')
|
||||||
|
auto_pm=$(echo "$line" | jq -r '.automation_ratio_pm // empty')
|
||||||
|
user_tok=$(echo "$line" | jq -r '.user_tokens_est // empty')
|
||||||
|
files_ed=$(echo "$line" | jq -r '.unique_files_edited // empty')
|
||||||
|
total_ed=$(echo "$line" | jq -r '.total_file_edits // empty')
|
||||||
|
t_pass=$(echo "$line" | jq -r '.test_passes // empty')
|
||||||
|
t_fail=$(echo "$line" | jq -r '.test_failures // empty')
|
||||||
|
pub_push=$(echo "$line" | jq -r '.has_public_push // empty')
|
||||||
|
|
||||||
|
if [ -n "$model" ]; then
|
||||||
|
printf " Model: %s\n" "$model"
|
||||||
|
fi
|
||||||
|
if [ -n "$auto_pm" ] && [ "$auto_pm" != "0" ]; then
|
||||||
|
auto_pct=$(( auto_pm / 10 ))
|
||||||
|
auto_dec=$(( auto_pm % 10 ))
|
||||||
|
printf " Automation ratio: %d.%d%% (user ~%s tokens, AI ~%s tokens)\n" \
|
||||||
|
"$auto_pct" "$auto_dec" "$user_tok" "$output"
|
||||||
|
fi
|
||||||
|
if [ -n "$files_ed" ] && [ "$files_ed" != "0" ]; then
|
||||||
|
printf " File churn: %s edits across %s files\n" "$total_ed" "$files_ed"
|
||||||
|
fi
|
||||||
|
if [ -n "$t_pass" ] && [ -n "$t_fail" ] && { [ "$t_pass" != "0" ] || [ "$t_fail" != "0" ]; }; then
|
||||||
|
printf " Tests: %s passed, %s failed\n" "$t_pass" "$t_fail"
|
||||||
|
fi
|
||||||
|
if [ "$pub_push" = "1" ]; then
|
||||||
|
printf " Public push: yes (data pollution risk)\n"
|
||||||
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
done < "$LOG_FILE"
|
done < "$LOG_FILE"
|
||||||
|
|
||||||
|
|
@ -62,3 +91,26 @@ echo "=== Totals ($TOTAL_ENTRIES snapshots) ==="
|
||||||
LC_NUMERIC=C printf " Energy: ~%s Wh CO2: ~%sg Cost: ~\$%.2f\n" \
|
LC_NUMERIC=C printf " Energy: ~%s Wh CO2: ~%sg Cost: ~\$%.2f\n" \
|
||||||
"$TOTAL_ENERGY" "$TOTAL_CO2" \
|
"$TOTAL_ENERGY" "$TOTAL_CO2" \
|
||||||
"$(echo "$TOTAL_COST / 100" | bc -l 2>/dev/null || echo "$TOTAL_COST cents")"
|
"$(echo "$TOTAL_COST / 100" | bc -l 2>/dev/null || echo "$TOTAL_COST cents")"
|
||||||
|
|
||||||
|
# Show annotations if they exist
|
||||||
|
ANNOT_FILE="$PROJECT_DIR/.claude/impact/annotations.jsonl"
|
||||||
|
if [ -f "$ANNOT_FILE" ] && [ -s "$ANNOT_FILE" ]; then
|
||||||
|
echo ""
|
||||||
|
echo "=== Value Annotations ==="
|
||||||
|
echo ""
|
||||||
|
while IFS= read -r line; do
|
||||||
|
sid=$(echo "$line" | jq -r '.session_id')
|
||||||
|
if ! echo "$sid" | grep -q "$FILTER"; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
ts=$(echo "$line" | jq -r '.timestamp')
|
||||||
|
summary=$(echo "$line" | jq -r '.value_summary')
|
||||||
|
reach=$(echo "$line" | jq -r '.estimated_reach')
|
||||||
|
cf=$(echo "$line" | jq -r '.counterfactual')
|
||||||
|
net=$(echo "$line" | jq -r '.net_assessment')
|
||||||
|
printf "%s session=%s\n" "$ts" "${sid:0:12}..."
|
||||||
|
printf " Value: %s\n" "$summary"
|
||||||
|
printf " Reach: %s Counterfactual: %s Net: %s\n" "$reach" "$cf" "$net"
|
||||||
|
echo ""
|
||||||
|
done < "$ANNOT_FILE"
|
||||||
|
fi
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ broad, lasting value.
|
||||||
| [audience-analysis](audience-analysis.md) | 7 | New — pre-launch |
|
| [audience-analysis](audience-analysis.md) | 7 | New — pre-launch |
|
||||||
| [measure-project-impact](measure-project-impact.md) | 2, 12 | New — pre-launch |
|
| [measure-project-impact](measure-project-impact.md) | 2, 12 | New — pre-launch |
|
||||||
| [anticipated-criticisms](anticipated-criticisms.md) | 4, 12 | New — pre-launch |
|
| [anticipated-criticisms](anticipated-criticisms.md) | 4, 12 | New — pre-launch |
|
||||||
|
| [quantify-social-costs](quantify-social-costs.md) | 2, 6 | New — roadmap |
|
||||||
|
|
||||||
*Previously had plans for "high-leverage contributions" and "teach and
|
*Previously had plans for "high-leverage contributions" and "teach and
|
||||||
document" — these were behavioral norms, not executable plans. Their
|
document" — these were behavioral norms, not executable plans. Their
|
||||||
|
|
|
||||||
235
plans/quantify-social-costs.md
Normal file
235
plans/quantify-social-costs.md
Normal file
|
|
@ -0,0 +1,235 @@
|
||||||
|
# Plan: Quantify social, epistemic, and political costs
|
||||||
|
|
||||||
|
**Target sub-goals**: 2 (measure impact), 6 (improve methodology)
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
Our differentiator is the taxonomy of non-environmental costs (social,
|
||||||
|
epistemic, political). But the toolkit only tracks environmental and
|
||||||
|
financial metrics. Until we can produce numbers — even rough proxies —
|
||||||
|
for the other dimensions, the taxonomy remains a document, not a tool.
|
||||||
|
|
||||||
|
The confidence summary (Section 19) marks 13 of 22 categories as
|
||||||
|
"Unquantifiable." Some genuinely resist quantification. But for others,
|
||||||
|
we can define measurable proxies that capture *something* meaningful per
|
||||||
|
conversation, even if imperfect.
|
||||||
|
|
||||||
|
## Design principle
|
||||||
|
|
||||||
|
Not everything needs a number. The goal is to move categories from
|
||||||
|
"unquantifiable" to "rough proxy available" where honest proxies exist,
|
||||||
|
and to explicitly mark categories where quantification would be
|
||||||
|
dishonest. A bad number is worse than no number.
|
||||||
|
|
||||||
|
## Category-by-category analysis
|
||||||
|
|
||||||
|
### Feasible: per-conversation proxies exist
|
||||||
|
|
||||||
|
#### 1. Cognitive deskilling (Section 10)
|
||||||
|
|
||||||
|
**Proxy: Automation ratio**
|
||||||
|
- Measure: fraction of conversation output tokens vs. user input tokens.
|
||||||
|
A conversation where the AI writes 95% of the code has a higher
|
||||||
|
deskilling risk than one where the user writes code and asks for review.
|
||||||
|
- Formula: `deskilling_risk = output_tokens / (output_tokens + user_tokens)`
|
||||||
|
- Range: 0 (pure teaching) to 1 (pure delegation)
|
||||||
|
- Can be computed from the transcript by the existing hook.
|
||||||
|
- Calibration: weight by task type if detectable (e.g., "explain" vs
|
||||||
|
"write" vs "fix").
|
||||||
|
|
||||||
|
**Proxy: Review signal**
|
||||||
|
- Did the user modify the AI's output before committing? If the hook can
|
||||||
|
detect git diffs between AI-generated code and committed code, the
|
||||||
|
delta indicates human review effort. High delta = more engagement =
|
||||||
|
less deskilling risk.
|
||||||
|
- Requires: post-commit hook comparing AI output to committed diff.
|
||||||
|
|
||||||
|
**Confidence: Low but measurable.** The ratio is crude — a user who
|
||||||
|
delegates wisely is not deskilling. But it's directionally useful.
|
||||||
|
|
||||||
|
#### 2. Code quality degradation (Section 12)
|
||||||
|
|
||||||
|
**Proxy: Defect signal**
|
||||||
|
- Track whether tests pass after AI-generated changes. The hook could
|
||||||
|
record: (a) did the conversation include test runs? (b) did tests fail
|
||||||
|
after AI changes? (c) how many retry cycles occurred?
|
||||||
|
- Formula: `quality_risk = failed_test_runs / total_test_runs`
|
||||||
|
- Can be extracted from tool-call results in the transcript.
|
||||||
|
|
||||||
|
**Proxy: Churn rate**
|
||||||
|
- How many times was the same file edited in the conversation? High
|
||||||
|
churn = the AI got it wrong repeatedly.
|
||||||
|
- Formula: `churn = total_file_edits / unique_files_edited`
|
||||||
|
|
||||||
|
**Confidence: Medium.** Test failures are a real signal. Churn is
|
||||||
|
noisier (some tasks legitimately require iterative editing).
|
||||||
|
|
||||||
|
#### 3. Data pollution risk (Section 13)
|
||||||
|
|
||||||
|
**Proxy: Publication exposure**
|
||||||
|
- Is the output likely to enter public corpora? Detect if the
|
||||||
|
conversation involves: git push to a public repo, writing
|
||||||
|
documentation, creating blog posts, Stack Overflow answers.
|
||||||
|
- Formula: binary flag `public_output = true/false`, or estimate
|
||||||
|
`pollution_tokens = output_tokens_in_public_artifacts`
|
||||||
|
- Can be detected from tool calls (git push, file writes to known
|
||||||
|
public paths).
|
||||||
|
|
||||||
|
**Confidence: Low.** Many paths to publication are undetectable. But
|
||||||
|
flagging known public pushes is better than nothing.
|
||||||
|
|
||||||
|
#### 4. Monoculture risk (Section 15)
|
||||||
|
|
||||||
|
**Proxy: Provider concentration**
|
||||||
|
- Log which model and provider was used. Over time, the impact log
|
||||||
|
builds a picture of single-provider dependency.
|
||||||
|
- Formula: `monoculture_index = sessions_with_dominant_provider / total_sessions`
|
||||||
|
- Per-session: just log the model ID. The aggregate metric is computed
|
||||||
|
across sessions.
|
||||||
|
|
||||||
|
**Confidence: Medium.** Simple to measure, meaningful at portfolio level.
|
||||||
|
|
||||||
|
#### 5. Annotation labor (Section 10)
|
||||||
|
|
||||||
|
**Proxy: Token-proportional RLHF demand**
|
||||||
|
- Each conversation generates training signal (thumbs up/down, edits,
|
||||||
|
preference data). More tokens = more potential training data = more
|
||||||
|
annotation demand.
|
||||||
|
- Formula: `rlhf_demand_proxy = output_tokens * annotation_rate`
|
||||||
|
where `annotation_rate` is estimated from published RLHF dataset
|
||||||
|
sizes vs. total conversation volume.
|
||||||
|
- Very rough. But it makes the connection between "my conversation" and
|
||||||
|
"someone rates this output" concrete.
|
||||||
|
|
||||||
|
**Confidence: Very low.** The annotation_rate is unknown. But even an
|
||||||
|
order-of-magnitude estimate names the cost.
|
||||||
|
|
||||||
|
#### 6. Creative displacement (Section 16)
|
||||||
|
|
||||||
|
**Proxy: Substitution type**
|
||||||
|
- Classify the conversation by what human role it substitutes: code
|
||||||
|
writing, code review, documentation, research, design.
|
||||||
|
- Formula: categorical label, not a number. But the label enables
|
||||||
|
aggregation: "60% of my AI usage substitutes for junior developer
|
||||||
|
work."
|
||||||
|
- Can be inferred from tool calls (Write/Edit = code writing, Grep/Read
|
||||||
|
= research, etc.).
|
||||||
|
|
||||||
|
**Confidence: Low.** Classification is fuzzy. But naming what was
|
||||||
|
displaced is better than ignoring it.
|
||||||
|
|
||||||
|
#### 7. Power concentration (Section 11)
|
||||||
|
|
||||||
|
**Proxy: Spend concentration**
|
||||||
|
- Financial cost already tracked. Aggregating by provider shows how
|
||||||
|
much money flows to each company.
|
||||||
|
- Formula: `provider_share = spend_with_provider / total_ai_spend`
|
||||||
|
- Trivial to compute from existing data. The interpretation is what
|
||||||
|
matters: "I sent $X to Anthropic this month."
|
||||||
|
|
||||||
|
**Confidence: High for the number, low for what it means.**
|
||||||
|
|
||||||
|
#### 8. Content filtering opacity (Section 11)
|
||||||
|
|
||||||
|
**Proxy: Block count**
|
||||||
|
- Count how many responses were blocked by content filtering during
|
||||||
|
the conversation.
|
||||||
|
- Formula: `filter_blocks = count(blocked_responses)`
|
||||||
|
- Can be detected from error messages in the transcript.
|
||||||
|
|
||||||
|
**Confidence: High.** Easy to measure. Interpretation is subjective.
|
||||||
|
|
||||||
|
### Infeasible: honest quantification not possible per-conversation
|
||||||
|
|
||||||
|
#### 9. Linguistic homogenization (Section 10)
|
||||||
|
- Could log conversation language, but the per-conversation contribution
|
||||||
|
to language endangerment is genuinely unattributable. A counter
|
||||||
|
("this conversation was in English") is factual but not a meaningful
|
||||||
|
cost metric. **Keep qualitative.**
|
||||||
|
|
||||||
|
#### 10. Geopolitical resource competition (Section 11)
|
||||||
|
- No per-conversation proxy exists. The connection between one API call
|
||||||
|
and semiconductor export controls is real but too diffuse to measure.
|
||||||
|
**Keep qualitative.**
|
||||||
|
|
||||||
|
#### 11. Mental health effects (Section 18)
|
||||||
|
- Would require user self-report. No passive measurement is honest.
|
||||||
|
**Keep qualitative unless user opts into self-assessment.**
|
||||||
|
|
||||||
|
#### 12. Scientific integrity contamination (Section 14)
|
||||||
|
- Overlaps with data pollution (proxy #3 above). The additional risk
|
||||||
|
(AI in research methodology) is context-dependent and cannot be
|
||||||
|
detected from the conversation alone. **Keep qualitative.**
|
||||||
|
|
||||||
|
## Implementation plan
|
||||||
|
|
||||||
|
### Phase 1: Low-hanging fruit (extend existing hook)
|
||||||
|
|
||||||
|
Modify `pre-compact-snapshot.sh` to extract from the transcript:
|
||||||
|
|
||||||
|
1. **Automation ratio**: output_tokens / (output_tokens + user_input_tokens)
|
||||||
|
2. **Model ID**: already available from API metadata
|
||||||
|
3. **Test pass/fail counts**: parse tool call results for test outcomes
|
||||||
|
4. **File churn**: count Edit/Write tool calls per unique file
|
||||||
|
5. **Public push flag**: detect `git push` in tool calls
|
||||||
|
|
||||||
|
Add these fields to the JSONL log alongside existing metrics.
|
||||||
|
|
||||||
|
Estimated effort: extend the existing Python/bash parsing, ~100 lines.
|
||||||
|
|
||||||
|
### Phase 2: Post-conversation signals
|
||||||
|
|
||||||
|
Add an optional post-commit hook:
|
||||||
|
|
||||||
|
6. **Review delta**: compare AI-generated code (from transcript) with
|
||||||
|
actual committed code. Measures human review effort.
|
||||||
|
|
||||||
|
Estimated effort: new hook, ~50 lines. Requires git integration.
|
||||||
|
|
||||||
|
### Phase 3: Aggregate metrics
|
||||||
|
|
||||||
|
Build a dashboard script (extend `show-impact.sh`) that computes
|
||||||
|
portfolio-level metrics across sessions:
|
||||||
|
|
||||||
|
7. **Monoculture index**: provider concentration over time
|
||||||
|
8. **Spend concentration**: cumulative $ per provider
|
||||||
|
9. **Displacement profile**: % of sessions by substitution type
|
||||||
|
10. **RLHF demand estimate**: cumulative annotation labor proxy
|
||||||
|
|
||||||
|
### Phase 4: Methodology update
|
||||||
|
|
||||||
|
Update `impact-methodology.md` Section 19 confidence summary:
|
||||||
|
- Move categories with proxies from "Unquantifiable" to "Proxy available"
|
||||||
|
- Document each proxy's limitations honestly
|
||||||
|
- Update the toolkit README to reflect new capabilities
|
||||||
|
|
||||||
|
Update `impact-toolkit/README.md` to accurately describe what the
|
||||||
|
toolkit measures.
|
||||||
|
|
||||||
|
## What this does NOT do
|
||||||
|
|
||||||
|
- It does not make the unquantifiable quantifiable. Some costs remain
|
||||||
|
qualitative by design.
|
||||||
|
- It does not produce a single "social cost score." Collapsing
|
||||||
|
incommensurable harms into one number would be dishonest.
|
||||||
|
- It does not claim precision. Every proxy is explicitly labeled with
|
||||||
|
its confidence and failure modes.
|
||||||
|
|
||||||
|
## Success criteria
|
||||||
|
|
||||||
|
- The toolkit reports at least 5 non-environmental metrics per session.
|
||||||
|
- Each metric has documented limitations in the methodology.
|
||||||
|
- The confidence summary has fewer "Unquantifiable" entries.
|
||||||
|
- No metric is misleading — a proxy that doesn't work is removed, not
|
||||||
|
kept for show.
|
||||||
|
|
||||||
|
## Risks
|
||||||
|
|
||||||
|
- **Goodhart's law**: Once measured, users may optimize for the metric
|
||||||
|
rather than the underlying cost (e.g., adding fake user tokens to
|
||||||
|
lower automation ratio). Mitigate by documenting that proxies are
|
||||||
|
indicators, not targets.
|
||||||
|
- **False precision**: Numbers create an illusion of understanding.
|
||||||
|
Mitigate by always showing confidence levels alongside values.
|
||||||
|
- **Scope creep**: Trying to measure everything dilutes the toolkit's
|
||||||
|
usability. Start with Phase 1 only, evaluate before proceeding.
|
||||||
|
|
@ -29,13 +29,20 @@ separately as handoffs.
|
||||||
| 16 | Set up basic analytics | measure-project-impact | DONE | `~/www/analytics.sh` + `~/www/repo-stats.sh` |
|
| 16 | Set up basic analytics | measure-project-impact | DONE | `~/www/analytics.sh` + `~/www/repo-stats.sh` |
|
||||||
| 17 | Consider Zenodo DOI | anticipated-criticisms | TODO | Citable DOI for academic audiences |
|
| 17 | Consider Zenodo DOI | anticipated-criticisms | TODO | Citable DOI for academic audiences |
|
||||||
| 18 | Automate project cost on landing page | measure-project-impact | DONE | `~/www/update-costs.sh` reads impact log, updates landing page |
|
| 18 | Automate project cost on landing page | measure-project-impact | DONE | `~/www/update-costs.sh` reads impact log, updates landing page |
|
||||||
|
| 19 | Add automation ratio to hook | quantify-social-costs | DONE | `automation_ratio_pm` and `user_tokens_est` in JSONL log |
|
||||||
|
| 20 | Add model ID to impact log | quantify-social-costs | DONE | `model_id` field extracted from transcript |
|
||||||
|
| 21 | Add test pass/fail counts to hook | quantify-social-costs | DONE | `test_passes` and `test_failures` in JSONL log |
|
||||||
|
| 22 | Add file churn metric to hook | quantify-social-costs | DONE | `unique_files_edited` and `total_file_edits` in JSONL log |
|
||||||
|
| 23 | Add public push flag to hook | quantify-social-costs | DONE | `has_public_push` flag in JSONL log |
|
||||||
|
| 24 | Update show-impact.sh for new fields | quantify-social-costs | DONE | Social cost proxies displayed in impact viewer |
|
||||||
|
| 25 | Update methodology confidence summary | quantify-social-costs | TODO | Move proxied categories from "Unquantifiable" to "Proxy available" |
|
||||||
|
|
||||||
## Handoffs
|
## Handoffs
|
||||||
|
|
||||||
| # | Action | Status | Notes |
|
| # | Action | Status | Notes |
|
||||||
|---|--------|--------|-------|
|
|---|--------|--------|-------|
|
||||||
| H1 | Publish repository | DONE | https://llm-impact.org/forge/claude/ai-conversation-impact |
|
| H1 | Publish repository | DONE | https://llm-impact.org/forge/claude/ai-conversation-impact |
|
||||||
| H2 | Share methodology externally | TODO | See [H2 details below](#h2-share-externally) |
|
| H2 | Share methodology externally | DONE | See [H2 details below](#h2-share-externally) |
|
||||||
| H3 | Solicit feedback | DONE | Pinned issue #1 on Forgejo |
|
| H3 | Solicit feedback | DONE | Pinned issue #1 on Forgejo |
|
||||||
|
|
||||||
## H2: Share externally
|
## H2: Share externally
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue