From ad06b12e50470b470dddecb17ff7ac1a806e6534 Mon Sep 17 00:00:00 2001 From: claude Date: Mon, 16 Mar 2026 15:11:30 +0000 Subject: [PATCH] Log edited file list in impact hook for review delta analysis The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/hooks/pre-compact-snapshot.sh | 76 +++++++++++++++----- impact-toolkit/hooks/pre-compact-snapshot.sh | 76 +++++++++++++++----- tasks/README.md | 1 + 3 files changed, 119 insertions(+), 34 deletions(-) diff --git a/.claude/hooks/pre-compact-snapshot.sh b/.claude/hooks/pre-compact-snapshot.sh index c37da0b..699049d 100755 --- a/.claude/hooks/pre-compact-snapshot.sh +++ b/.claude/hooks/pre-compact-snapshot.sh @@ -131,24 +131,29 @@ else: auto_ratio_pm = 0 print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}') +# Second line: JSON array of edited files with counts +print(json.dumps(edited_files)) " "$TRANSCRIPT_PATH" 2>/dev/null || echo "") - if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then + USAGE_LINE1=$(echo "$USAGE_DATA" | head -1) + EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1) + + if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then # Actual token counts available TOKEN_SOURCE="actual" - ASSISTANT_TURNS=$(echo "$USAGE_DATA" | cut -f1) - INPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f2) - CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3) - CACHE_READ=$(echo "$USAGE_DATA" | cut -f4) - OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5) - MODEL_ID=$(echo "$USAGE_DATA" | cut -f6) - AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7) - USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8) - UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9) - TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10) - TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11) - TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12) - HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13) + ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1) + INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2) + CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3) + CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4) + OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5) + MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6) + AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7) + USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8) + UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9) + TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10) + TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11) + TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12) + HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13) # Cumulative input = all tokens that went through the model. # Cache reads are cheaper (~10-20% of full compute), so we weight them. @@ -184,6 +189,7 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens} TEST_PASSES=0 TEST_FAILURES=0 HAS_PUBLIC_PUSH=0 + EDITED_FILES_JSON="{}" fi # --- Cost estimates --- @@ -226,12 +232,48 @@ else TEST_PASSES=0 TEST_FAILURES=0 HAS_PUBLIC_PUSH=0 + EDITED_FILES_JSON="{}" fi # --- Write log entry --- -cat >> "$LOG_FILE" <> "$LOG_FILE" exit 0 diff --git a/impact-toolkit/hooks/pre-compact-snapshot.sh b/impact-toolkit/hooks/pre-compact-snapshot.sh index c37da0b..699049d 100755 --- a/impact-toolkit/hooks/pre-compact-snapshot.sh +++ b/impact-toolkit/hooks/pre-compact-snapshot.sh @@ -131,24 +131,29 @@ else: auto_ratio_pm = 0 print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}') +# Second line: JSON array of edited files with counts +print(json.dumps(edited_files)) " "$TRANSCRIPT_PATH" 2>/dev/null || echo "") - if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then + USAGE_LINE1=$(echo "$USAGE_DATA" | head -1) + EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1) + + if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then # Actual token counts available TOKEN_SOURCE="actual" - ASSISTANT_TURNS=$(echo "$USAGE_DATA" | cut -f1) - INPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f2) - CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3) - CACHE_READ=$(echo "$USAGE_DATA" | cut -f4) - OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5) - MODEL_ID=$(echo "$USAGE_DATA" | cut -f6) - AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7) - USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8) - UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9) - TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10) - TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11) - TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12) - HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13) + ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1) + INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2) + CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3) + CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4) + OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5) + MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6) + AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7) + USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8) + UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9) + TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10) + TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11) + TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12) + HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13) # Cumulative input = all tokens that went through the model. # Cache reads are cheaper (~10-20% of full compute), so we weight them. @@ -184,6 +189,7 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens} TEST_PASSES=0 TEST_FAILURES=0 HAS_PUBLIC_PUSH=0 + EDITED_FILES_JSON="{}" fi # --- Cost estimates --- @@ -226,12 +232,48 @@ else TEST_PASSES=0 TEST_FAILURES=0 HAS_PUBLIC_PUSH=0 + EDITED_FILES_JSON="{}" fi # --- Write log entry --- -cat >> "$LOG_FILE" <> "$LOG_FILE" exit 0 diff --git a/tasks/README.md b/tasks/README.md index e1ee318..14d4289 100644 --- a/tasks/README.md +++ b/tasks/README.md @@ -37,6 +37,7 @@ separately as handoffs. | 24 | Update show-impact.sh for new fields | quantify-social-costs | DONE | Social cost proxies displayed in impact viewer | | 25 | Update methodology confidence summary | quantify-social-costs | DONE | 4 categories moved to "Proxy", explanation added | | 26 | Build aggregate dashboard | quantify-social-costs | DONE | `show-aggregate.sh` — portfolio-level social cost metrics | +| 27 | Log edited file list in hook | quantify-social-costs | DONE | `edited_files` dict in JSONL (file path → edit count) | ## Handoffs