Log edited file list in impact hook for review delta analysis
The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
60eca18c85
commit
ad06b12e50
3 changed files with 119 additions and 34 deletions
|
|
@ -131,24 +131,29 @@ else:
|
|||
auto_ratio_pm = 0
|
||||
|
||||
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
|
||||
# Second line: JSON array of edited files with counts
|
||||
print(json.dumps(edited_files))
|
||||
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||
USAGE_LINE1=$(echo "$USAGE_DATA" | head -1)
|
||||
EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1)
|
||||
|
||||
if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||
# Actual token counts available
|
||||
TOKEN_SOURCE="actual"
|
||||
ASSISTANT_TURNS=$(echo "$USAGE_DATA" | cut -f1)
|
||||
INPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f2)
|
||||
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
|
||||
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
|
||||
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
|
||||
MODEL_ID=$(echo "$USAGE_DATA" | cut -f6)
|
||||
AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7)
|
||||
USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8)
|
||||
UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9)
|
||||
TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10)
|
||||
TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11)
|
||||
TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12)
|
||||
HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13)
|
||||
ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1)
|
||||
INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2)
|
||||
CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3)
|
||||
CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4)
|
||||
OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5)
|
||||
MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6)
|
||||
AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7)
|
||||
USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8)
|
||||
UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9)
|
||||
TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10)
|
||||
TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11)
|
||||
TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12)
|
||||
HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13)
|
||||
|
||||
# Cumulative input = all tokens that went through the model.
|
||||
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
||||
|
|
@ -184,6 +189,7 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
|
|||
TEST_PASSES=0
|
||||
TEST_FAILURES=0
|
||||
HAS_PUBLIC_PUSH=0
|
||||
EDITED_FILES_JSON="{}"
|
||||
fi
|
||||
|
||||
# --- Cost estimates ---
|
||||
|
|
@ -226,12 +232,48 @@ else
|
|||
TEST_PASSES=0
|
||||
TEST_FAILURES=0
|
||||
HAS_PUBLIC_PUSH=0
|
||||
EDITED_FILES_JSON="{}"
|
||||
fi
|
||||
|
||||
# --- Write log entry ---
|
||||
|
||||
cat >> "$LOG_FILE" <<EOF
|
||||
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS,"model_id":"$MODEL_ID","automation_ratio_pm":$AUTO_RATIO_PM,"user_tokens_est":$USER_TOKENS_EST,"unique_files_edited":$UNIQUE_FILES,"total_file_edits":$TOTAL_EDITS,"test_passes":$TEST_PASSES,"test_failures":$TEST_FAILURES,"has_public_push":$HAS_PUBLIC_PUSH}
|
||||
EOF
|
||||
# Build log entry using Python to safely embed the edited_files JSON
|
||||
python3 -c "
|
||||
import json, sys
|
||||
entry = {
|
||||
'timestamp': sys.argv[1],
|
||||
'session_id': sys.argv[2],
|
||||
'trigger': sys.argv[3],
|
||||
'token_source': sys.argv[4],
|
||||
'transcript_bytes': int(sys.argv[5]),
|
||||
'transcript_lines': int(sys.argv[6]),
|
||||
'assistant_turns': int(sys.argv[7]),
|
||||
'tool_uses': int(sys.argv[8]),
|
||||
'cumulative_input_tokens': int(sys.argv[9]),
|
||||
'cumulative_input_raw': int(sys.argv[10]),
|
||||
'cache_creation_tokens': int(sys.argv[11]),
|
||||
'cache_read_tokens': int(sys.argv[12]),
|
||||
'output_tokens': int(sys.argv[13]),
|
||||
'energy_wh': int(sys.argv[14]),
|
||||
'co2_g': int(sys.argv[15]),
|
||||
'cost_cents': int(sys.argv[16]),
|
||||
'model_id': sys.argv[17],
|
||||
'automation_ratio_pm': int(sys.argv[18]),
|
||||
'user_tokens_est': int(sys.argv[19]),
|
||||
'unique_files_edited': int(sys.argv[20]),
|
||||
'total_file_edits': int(sys.argv[21]),
|
||||
'test_passes': int(sys.argv[22]),
|
||||
'test_failures': int(sys.argv[23]),
|
||||
'has_public_push': int(sys.argv[24]),
|
||||
'edited_files': json.loads(sys.argv[25]),
|
||||
}
|
||||
print(json.dumps(entry, separators=(',', ':')))
|
||||
" "$TIMESTAMP" "$SESSION_ID" "$TRIGGER" "$TOKEN_SOURCE" \
|
||||
"$TRANSCRIPT_BYTES" "$TRANSCRIPT_LINES" "$ASSISTANT_TURNS" "$TOOL_USES" \
|
||||
"$CUMULATIVE_INPUT" "$CUMULATIVE_INPUT_RAW" "$CACHE_CREATION" "$CACHE_READ" \
|
||||
"$OUTPUT_TOKENS" "$ENERGY_WH" "$CO2_G" "$COST_CENTS" \
|
||||
"$MODEL_ID" "$AUTO_RATIO_PM" "$USER_TOKENS_EST" \
|
||||
"$UNIQUE_FILES" "$TOTAL_EDITS" "$TEST_PASSES" "$TEST_FAILURES" \
|
||||
"$HAS_PUBLIC_PUSH" "$EDITED_FILES_JSON" >> "$LOG_FILE"
|
||||
|
||||
exit 0
|
||||
|
|
|
|||
|
|
@ -131,24 +131,29 @@ else:
|
|||
auto_ratio_pm = 0
|
||||
|
||||
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
|
||||
# Second line: JSON array of edited files with counts
|
||||
print(json.dumps(edited_files))
|
||||
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||
USAGE_LINE1=$(echo "$USAGE_DATA" | head -1)
|
||||
EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1)
|
||||
|
||||
if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||
# Actual token counts available
|
||||
TOKEN_SOURCE="actual"
|
||||
ASSISTANT_TURNS=$(echo "$USAGE_DATA" | cut -f1)
|
||||
INPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f2)
|
||||
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
|
||||
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
|
||||
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
|
||||
MODEL_ID=$(echo "$USAGE_DATA" | cut -f6)
|
||||
AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7)
|
||||
USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8)
|
||||
UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9)
|
||||
TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10)
|
||||
TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11)
|
||||
TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12)
|
||||
HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13)
|
||||
ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1)
|
||||
INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2)
|
||||
CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3)
|
||||
CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4)
|
||||
OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5)
|
||||
MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6)
|
||||
AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7)
|
||||
USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8)
|
||||
UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9)
|
||||
TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10)
|
||||
TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11)
|
||||
TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12)
|
||||
HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13)
|
||||
|
||||
# Cumulative input = all tokens that went through the model.
|
||||
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
||||
|
|
@ -184,6 +189,7 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
|
|||
TEST_PASSES=0
|
||||
TEST_FAILURES=0
|
||||
HAS_PUBLIC_PUSH=0
|
||||
EDITED_FILES_JSON="{}"
|
||||
fi
|
||||
|
||||
# --- Cost estimates ---
|
||||
|
|
@ -226,12 +232,48 @@ else
|
|||
TEST_PASSES=0
|
||||
TEST_FAILURES=0
|
||||
HAS_PUBLIC_PUSH=0
|
||||
EDITED_FILES_JSON="{}"
|
||||
fi
|
||||
|
||||
# --- Write log entry ---
|
||||
|
||||
cat >> "$LOG_FILE" <<EOF
|
||||
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS,"model_id":"$MODEL_ID","automation_ratio_pm":$AUTO_RATIO_PM,"user_tokens_est":$USER_TOKENS_EST,"unique_files_edited":$UNIQUE_FILES,"total_file_edits":$TOTAL_EDITS,"test_passes":$TEST_PASSES,"test_failures":$TEST_FAILURES,"has_public_push":$HAS_PUBLIC_PUSH}
|
||||
EOF
|
||||
# Build log entry using Python to safely embed the edited_files JSON
|
||||
python3 -c "
|
||||
import json, sys
|
||||
entry = {
|
||||
'timestamp': sys.argv[1],
|
||||
'session_id': sys.argv[2],
|
||||
'trigger': sys.argv[3],
|
||||
'token_source': sys.argv[4],
|
||||
'transcript_bytes': int(sys.argv[5]),
|
||||
'transcript_lines': int(sys.argv[6]),
|
||||
'assistant_turns': int(sys.argv[7]),
|
||||
'tool_uses': int(sys.argv[8]),
|
||||
'cumulative_input_tokens': int(sys.argv[9]),
|
||||
'cumulative_input_raw': int(sys.argv[10]),
|
||||
'cache_creation_tokens': int(sys.argv[11]),
|
||||
'cache_read_tokens': int(sys.argv[12]),
|
||||
'output_tokens': int(sys.argv[13]),
|
||||
'energy_wh': int(sys.argv[14]),
|
||||
'co2_g': int(sys.argv[15]),
|
||||
'cost_cents': int(sys.argv[16]),
|
||||
'model_id': sys.argv[17],
|
||||
'automation_ratio_pm': int(sys.argv[18]),
|
||||
'user_tokens_est': int(sys.argv[19]),
|
||||
'unique_files_edited': int(sys.argv[20]),
|
||||
'total_file_edits': int(sys.argv[21]),
|
||||
'test_passes': int(sys.argv[22]),
|
||||
'test_failures': int(sys.argv[23]),
|
||||
'has_public_push': int(sys.argv[24]),
|
||||
'edited_files': json.loads(sys.argv[25]),
|
||||
}
|
||||
print(json.dumps(entry, separators=(',', ':')))
|
||||
" "$TIMESTAMP" "$SESSION_ID" "$TRIGGER" "$TOKEN_SOURCE" \
|
||||
"$TRANSCRIPT_BYTES" "$TRANSCRIPT_LINES" "$ASSISTANT_TURNS" "$TOOL_USES" \
|
||||
"$CUMULATIVE_INPUT" "$CUMULATIVE_INPUT_RAW" "$CACHE_CREATION" "$CACHE_READ" \
|
||||
"$OUTPUT_TOKENS" "$ENERGY_WH" "$CO2_G" "$COST_CENTS" \
|
||||
"$MODEL_ID" "$AUTO_RATIO_PM" "$USER_TOKENS_EST" \
|
||||
"$UNIQUE_FILES" "$TOTAL_EDITS" "$TEST_PASSES" "$TEST_FAILURES" \
|
||||
"$HAS_PUBLIC_PUSH" "$EDITED_FILES_JSON" >> "$LOG_FILE"
|
||||
|
||||
exit 0
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ separately as handoffs.
|
|||
| 24 | Update show-impact.sh for new fields | quantify-social-costs | DONE | Social cost proxies displayed in impact viewer |
|
||||
| 25 | Update methodology confidence summary | quantify-social-costs | DONE | 4 categories moved to "Proxy", explanation added |
|
||||
| 26 | Build aggregate dashboard | quantify-social-costs | DONE | `show-aggregate.sh` — portfolio-level social cost metrics |
|
||||
| 27 | Log edited file list in hook | quantify-social-costs | DONE | `edited_files` dict in JSONL (file path → edit count) |
|
||||
|
||||
## Handoffs
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue