Log edited file list in impact hook for review delta analysis

The hook now records which files were edited and how many times,
enabling future comparison with committed code to measure human
review effort (Phase 2 of quantify-social-costs plan).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
claude 2026-03-16 15:11:30 +00:00
parent 60eca18c85
commit ad06b12e50
3 changed files with 119 additions and 34 deletions

View file

@ -131,24 +131,29 @@ else:
auto_ratio_pm = 0
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
# Second line: JSON array of edited files with counts
print(json.dumps(edited_files))
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
USAGE_LINE1=$(echo "$USAGE_DATA" | head -1)
EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1)
if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then
# Actual token counts available
TOKEN_SOURCE="actual"
ASSISTANT_TURNS=$(echo "$USAGE_DATA" | cut -f1)
INPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f2)
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
MODEL_ID=$(echo "$USAGE_DATA" | cut -f6)
AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7)
USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8)
UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9)
TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10)
TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11)
TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12)
HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13)
ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1)
INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2)
CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3)
CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4)
OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5)
MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6)
AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7)
USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8)
UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9)
TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10)
TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11)
TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12)
HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13)
# Cumulative input = all tokens that went through the model.
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
@ -184,6 +189,7 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
TEST_PASSES=0
TEST_FAILURES=0
HAS_PUBLIC_PUSH=0
EDITED_FILES_JSON="{}"
fi
# --- Cost estimates ---
@ -226,12 +232,48 @@ else
TEST_PASSES=0
TEST_FAILURES=0
HAS_PUBLIC_PUSH=0
EDITED_FILES_JSON="{}"
fi
# --- Write log entry ---
cat >> "$LOG_FILE" <<EOF
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS,"model_id":"$MODEL_ID","automation_ratio_pm":$AUTO_RATIO_PM,"user_tokens_est":$USER_TOKENS_EST,"unique_files_edited":$UNIQUE_FILES,"total_file_edits":$TOTAL_EDITS,"test_passes":$TEST_PASSES,"test_failures":$TEST_FAILURES,"has_public_push":$HAS_PUBLIC_PUSH}
EOF
# Build log entry using Python to safely embed the edited_files JSON
python3 -c "
import json, sys
entry = {
'timestamp': sys.argv[1],
'session_id': sys.argv[2],
'trigger': sys.argv[3],
'token_source': sys.argv[4],
'transcript_bytes': int(sys.argv[5]),
'transcript_lines': int(sys.argv[6]),
'assistant_turns': int(sys.argv[7]),
'tool_uses': int(sys.argv[8]),
'cumulative_input_tokens': int(sys.argv[9]),
'cumulative_input_raw': int(sys.argv[10]),
'cache_creation_tokens': int(sys.argv[11]),
'cache_read_tokens': int(sys.argv[12]),
'output_tokens': int(sys.argv[13]),
'energy_wh': int(sys.argv[14]),
'co2_g': int(sys.argv[15]),
'cost_cents': int(sys.argv[16]),
'model_id': sys.argv[17],
'automation_ratio_pm': int(sys.argv[18]),
'user_tokens_est': int(sys.argv[19]),
'unique_files_edited': int(sys.argv[20]),
'total_file_edits': int(sys.argv[21]),
'test_passes': int(sys.argv[22]),
'test_failures': int(sys.argv[23]),
'has_public_push': int(sys.argv[24]),
'edited_files': json.loads(sys.argv[25]),
}
print(json.dumps(entry, separators=(',', ':')))
" "$TIMESTAMP" "$SESSION_ID" "$TRIGGER" "$TOKEN_SOURCE" \
"$TRANSCRIPT_BYTES" "$TRANSCRIPT_LINES" "$ASSISTANT_TURNS" "$TOOL_USES" \
"$CUMULATIVE_INPUT" "$CUMULATIVE_INPUT_RAW" "$CACHE_CREATION" "$CACHE_READ" \
"$OUTPUT_TOKENS" "$ENERGY_WH" "$CO2_G" "$COST_CENTS" \
"$MODEL_ID" "$AUTO_RATIO_PM" "$USER_TOKENS_EST" \
"$UNIQUE_FILES" "$TOTAL_EDITS" "$TEST_PASSES" "$TEST_FAILURES" \
"$HAS_PUBLIC_PUSH" "$EDITED_FILES_JSON" >> "$LOG_FILE"
exit 0

View file

@ -131,24 +131,29 @@ else:
auto_ratio_pm = 0
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
# Second line: JSON array of edited files with counts
print(json.dumps(edited_files))
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
USAGE_LINE1=$(echo "$USAGE_DATA" | head -1)
EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1)
if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then
# Actual token counts available
TOKEN_SOURCE="actual"
ASSISTANT_TURNS=$(echo "$USAGE_DATA" | cut -f1)
INPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f2)
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
MODEL_ID=$(echo "$USAGE_DATA" | cut -f6)
AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7)
USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8)
UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9)
TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10)
TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11)
TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12)
HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13)
ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1)
INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2)
CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3)
CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4)
OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5)
MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6)
AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7)
USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8)
UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9)
TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10)
TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11)
TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12)
HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13)
# Cumulative input = all tokens that went through the model.
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
@ -184,6 +189,7 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
TEST_PASSES=0
TEST_FAILURES=0
HAS_PUBLIC_PUSH=0
EDITED_FILES_JSON="{}"
fi
# --- Cost estimates ---
@ -226,12 +232,48 @@ else
TEST_PASSES=0
TEST_FAILURES=0
HAS_PUBLIC_PUSH=0
EDITED_FILES_JSON="{}"
fi
# --- Write log entry ---
cat >> "$LOG_FILE" <<EOF
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS,"model_id":"$MODEL_ID","automation_ratio_pm":$AUTO_RATIO_PM,"user_tokens_est":$USER_TOKENS_EST,"unique_files_edited":$UNIQUE_FILES,"total_file_edits":$TOTAL_EDITS,"test_passes":$TEST_PASSES,"test_failures":$TEST_FAILURES,"has_public_push":$HAS_PUBLIC_PUSH}
EOF
# Build log entry using Python to safely embed the edited_files JSON
python3 -c "
import json, sys
entry = {
'timestamp': sys.argv[1],
'session_id': sys.argv[2],
'trigger': sys.argv[3],
'token_source': sys.argv[4],
'transcript_bytes': int(sys.argv[5]),
'transcript_lines': int(sys.argv[6]),
'assistant_turns': int(sys.argv[7]),
'tool_uses': int(sys.argv[8]),
'cumulative_input_tokens': int(sys.argv[9]),
'cumulative_input_raw': int(sys.argv[10]),
'cache_creation_tokens': int(sys.argv[11]),
'cache_read_tokens': int(sys.argv[12]),
'output_tokens': int(sys.argv[13]),
'energy_wh': int(sys.argv[14]),
'co2_g': int(sys.argv[15]),
'cost_cents': int(sys.argv[16]),
'model_id': sys.argv[17],
'automation_ratio_pm': int(sys.argv[18]),
'user_tokens_est': int(sys.argv[19]),
'unique_files_edited': int(sys.argv[20]),
'total_file_edits': int(sys.argv[21]),
'test_passes': int(sys.argv[22]),
'test_failures': int(sys.argv[23]),
'has_public_push': int(sys.argv[24]),
'edited_files': json.loads(sys.argv[25]),
}
print(json.dumps(entry, separators=(',', ':')))
" "$TIMESTAMP" "$SESSION_ID" "$TRIGGER" "$TOKEN_SOURCE" \
"$TRANSCRIPT_BYTES" "$TRANSCRIPT_LINES" "$ASSISTANT_TURNS" "$TOOL_USES" \
"$CUMULATIVE_INPUT" "$CUMULATIVE_INPUT_RAW" "$CACHE_CREATION" "$CACHE_READ" \
"$OUTPUT_TOKENS" "$ENERGY_WH" "$CO2_G" "$COST_CENTS" \
"$MODEL_ID" "$AUTO_RATIO_PM" "$USER_TOKENS_EST" \
"$UNIQUE_FILES" "$TOTAL_EDITS" "$TEST_PASSES" "$TEST_FAILURES" \
"$HAS_PUBLIC_PUSH" "$EDITED_FILES_JSON" >> "$LOG_FILE"
exit 0

View file

@ -37,6 +37,7 @@ separately as handoffs.
| 24 | Update show-impact.sh for new fields | quantify-social-costs | DONE | Social cost proxies displayed in impact viewer |
| 25 | Update methodology confidence summary | quantify-social-costs | DONE | 4 categories moved to "Proxy", explanation added |
| 26 | Build aggregate dashboard | quantify-social-costs | DONE | `show-aggregate.sh` — portfolio-level social cost metrics |
| 27 | Log edited file list in hook | quantify-social-costs | DONE | `edited_files` dict in JSONL (file path → edit count) |
## Handoffs