Log edited file list in impact hook for review delta analysis
The hook now records which files were edited and how many times, enabling future comparison with committed code to measure human review effort (Phase 2 of quantify-social-costs plan). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
60eca18c85
commit
ad06b12e50
3 changed files with 119 additions and 34 deletions
|
|
@ -131,24 +131,29 @@ else:
|
|||
auto_ratio_pm = 0
|
||||
|
||||
print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}\t{model_id}\t{auto_ratio_pm}\t{user_tokens_est}\t{unique_files}\t{total_edits}\t{test_passes}\t{test_failures}\t{has_public_push}')
|
||||
# Second line: JSON array of edited files with counts
|
||||
print(json.dumps(edited_files))
|
||||
" "$TRANSCRIPT_PATH" 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "$USAGE_DATA" ] && [ "$(echo "$USAGE_DATA" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||
USAGE_LINE1=$(echo "$USAGE_DATA" | head -1)
|
||||
EDITED_FILES_JSON=$(echo "$USAGE_DATA" | tail -1)
|
||||
|
||||
if [ -n "$USAGE_LINE1" ] && [ "$(echo "$USAGE_LINE1" | cut -f1)" -gt 0 ] 2>/dev/null; then
|
||||
# Actual token counts available
|
||||
TOKEN_SOURCE="actual"
|
||||
ASSISTANT_TURNS=$(echo "$USAGE_DATA" | cut -f1)
|
||||
INPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f2)
|
||||
CACHE_CREATION=$(echo "$USAGE_DATA" | cut -f3)
|
||||
CACHE_READ=$(echo "$USAGE_DATA" | cut -f4)
|
||||
OUTPUT_TOKENS=$(echo "$USAGE_DATA" | cut -f5)
|
||||
MODEL_ID=$(echo "$USAGE_DATA" | cut -f6)
|
||||
AUTO_RATIO_PM=$(echo "$USAGE_DATA" | cut -f7)
|
||||
USER_TOKENS_EST=$(echo "$USAGE_DATA" | cut -f8)
|
||||
UNIQUE_FILES=$(echo "$USAGE_DATA" | cut -f9)
|
||||
TOTAL_EDITS=$(echo "$USAGE_DATA" | cut -f10)
|
||||
TEST_PASSES=$(echo "$USAGE_DATA" | cut -f11)
|
||||
TEST_FAILURES=$(echo "$USAGE_DATA" | cut -f12)
|
||||
HAS_PUBLIC_PUSH=$(echo "$USAGE_DATA" | cut -f13)
|
||||
ASSISTANT_TURNS=$(echo "$USAGE_LINE1" | cut -f1)
|
||||
INPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f2)
|
||||
CACHE_CREATION=$(echo "$USAGE_LINE1" | cut -f3)
|
||||
CACHE_READ=$(echo "$USAGE_LINE1" | cut -f4)
|
||||
OUTPUT_TOKENS=$(echo "$USAGE_LINE1" | cut -f5)
|
||||
MODEL_ID=$(echo "$USAGE_LINE1" | cut -f6)
|
||||
AUTO_RATIO_PM=$(echo "$USAGE_LINE1" | cut -f7)
|
||||
USER_TOKENS_EST=$(echo "$USAGE_LINE1" | cut -f8)
|
||||
UNIQUE_FILES=$(echo "$USAGE_LINE1" | cut -f9)
|
||||
TOTAL_EDITS=$(echo "$USAGE_LINE1" | cut -f10)
|
||||
TEST_PASSES=$(echo "$USAGE_LINE1" | cut -f11)
|
||||
TEST_FAILURES=$(echo "$USAGE_LINE1" | cut -f12)
|
||||
HAS_PUBLIC_PUSH=$(echo "$USAGE_LINE1" | cut -f13)
|
||||
|
||||
# Cumulative input = all tokens that went through the model.
|
||||
# Cache reads are cheaper (~10-20% of full compute), so we weight them.
|
||||
|
|
@ -184,6 +189,7 @@ print(f'{turns}\t{input_tokens}\t{cache_creation}\t{cache_read}\t{output_tokens}
|
|||
TEST_PASSES=0
|
||||
TEST_FAILURES=0
|
||||
HAS_PUBLIC_PUSH=0
|
||||
EDITED_FILES_JSON="{}"
|
||||
fi
|
||||
|
||||
# --- Cost estimates ---
|
||||
|
|
@ -226,12 +232,48 @@ else
|
|||
TEST_PASSES=0
|
||||
TEST_FAILURES=0
|
||||
HAS_PUBLIC_PUSH=0
|
||||
EDITED_FILES_JSON="{}"
|
||||
fi
|
||||
|
||||
# --- Write log entry ---
|
||||
|
||||
cat >> "$LOG_FILE" <<EOF
|
||||
{"timestamp":"$TIMESTAMP","session_id":"$SESSION_ID","trigger":"$TRIGGER","token_source":"$TOKEN_SOURCE","transcript_bytes":$TRANSCRIPT_BYTES,"transcript_lines":$TRANSCRIPT_LINES,"assistant_turns":$ASSISTANT_TURNS,"tool_uses":$TOOL_USES,"cumulative_input_tokens":$CUMULATIVE_INPUT,"cumulative_input_raw":$CUMULATIVE_INPUT_RAW,"cache_creation_tokens":$CACHE_CREATION,"cache_read_tokens":$CACHE_READ,"output_tokens":$OUTPUT_TOKENS,"energy_wh":$ENERGY_WH,"co2_g":$CO2_G,"cost_cents":$COST_CENTS,"model_id":"$MODEL_ID","automation_ratio_pm":$AUTO_RATIO_PM,"user_tokens_est":$USER_TOKENS_EST,"unique_files_edited":$UNIQUE_FILES,"total_file_edits":$TOTAL_EDITS,"test_passes":$TEST_PASSES,"test_failures":$TEST_FAILURES,"has_public_push":$HAS_PUBLIC_PUSH}
|
||||
EOF
|
||||
# Build log entry using Python to safely embed the edited_files JSON
|
||||
python3 -c "
|
||||
import json, sys
|
||||
entry = {
|
||||
'timestamp': sys.argv[1],
|
||||
'session_id': sys.argv[2],
|
||||
'trigger': sys.argv[3],
|
||||
'token_source': sys.argv[4],
|
||||
'transcript_bytes': int(sys.argv[5]),
|
||||
'transcript_lines': int(sys.argv[6]),
|
||||
'assistant_turns': int(sys.argv[7]),
|
||||
'tool_uses': int(sys.argv[8]),
|
||||
'cumulative_input_tokens': int(sys.argv[9]),
|
||||
'cumulative_input_raw': int(sys.argv[10]),
|
||||
'cache_creation_tokens': int(sys.argv[11]),
|
||||
'cache_read_tokens': int(sys.argv[12]),
|
||||
'output_tokens': int(sys.argv[13]),
|
||||
'energy_wh': int(sys.argv[14]),
|
||||
'co2_g': int(sys.argv[15]),
|
||||
'cost_cents': int(sys.argv[16]),
|
||||
'model_id': sys.argv[17],
|
||||
'automation_ratio_pm': int(sys.argv[18]),
|
||||
'user_tokens_est': int(sys.argv[19]),
|
||||
'unique_files_edited': int(sys.argv[20]),
|
||||
'total_file_edits': int(sys.argv[21]),
|
||||
'test_passes': int(sys.argv[22]),
|
||||
'test_failures': int(sys.argv[23]),
|
||||
'has_public_push': int(sys.argv[24]),
|
||||
'edited_files': json.loads(sys.argv[25]),
|
||||
}
|
||||
print(json.dumps(entry, separators=(',', ':')))
|
||||
" "$TIMESTAMP" "$SESSION_ID" "$TRIGGER" "$TOKEN_SOURCE" \
|
||||
"$TRANSCRIPT_BYTES" "$TRANSCRIPT_LINES" "$ASSISTANT_TURNS" "$TOOL_USES" \
|
||||
"$CUMULATIVE_INPUT" "$CUMULATIVE_INPUT_RAW" "$CACHE_CREATION" "$CACHE_READ" \
|
||||
"$OUTPUT_TOKENS" "$ENERGY_WH" "$CO2_G" "$COST_CENTS" \
|
||||
"$MODEL_ID" "$AUTO_RATIO_PM" "$USER_TOKENS_EST" \
|
||||
"$UNIQUE_FILES" "$TOTAL_EDITS" "$TEST_PASSES" "$TEST_FAILURES" \
|
||||
"$HAS_PUBLIC_PUSH" "$EDITED_FILES_JSON" >> "$LOG_FILE"
|
||||
|
||||
exit 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue