From 4f26eea1ba5758fc5d8f7ee41bee681db0cdc464 Mon Sep 17 00:00:00 2001 From: Brendan O'Leary Date: Mon, 9 Mar 2026 10:09:12 -0400 Subject: [PATCH] Include score summary and submission ID in Slack notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parses the new benchmark.py output to extract: - Final score (e.g., 8.50/10 (85.0%)) - Submission ID (UUID) - Leaderboard URL Results section now shows: • model-name: 8.50/10 (85.0%) — https://pinchbench.com/... (submission: uuid) --- bench_runner.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/bench_runner.sh b/bench_runner.sh index dfa110a..7b2b311 100755 --- a/bench_runner.sh +++ b/bench_runner.sh @@ -201,11 +201,23 @@ for model in "${MODELS[@]}"; do MODEL_OUTPUT=$(cat "$MODEL_TMPFILE") rm -f "$MODEL_TMPFILE" - # Extract "View at: https://..." leaderboard URL from model run output + # Extract score, submission ID, and leaderboard URL from model run output + MODEL_SCORE=$(echo "$MODEL_OUTPUT" | grep -oP "Final score: \K[\d.]+/[\d.]+ \([\d.]+%\)" | head -1 || true) + MODEL_SUBMISSION=$(echo "$MODEL_OUTPUT" | grep -i "Submission ID" | grep -oE '[a-f0-9-]{36}' | head -1 || true) MODEL_URL=$(echo "$MODEL_OUTPUT" | grep -i "View at" | grep -oE 'https?://[^ ]+' | head -1 || true) + + # Build result entry with score and URL + RESULT_ENTRY="$model" + if [ -n "$MODEL_SCORE" ]; then + RESULT_ENTRY="$RESULT_ENTRY: $MODEL_SCORE" + fi if [ -n "$MODEL_URL" ]; then - RESULT_URLS+=("$model: $MODEL_URL") + RESULT_ENTRY="$RESULT_ENTRY — $MODEL_URL" + fi + if [ -n "$MODEL_SUBMISSION" ]; then + RESULT_ENTRY="$RESULT_ENTRY (submission: $MODEL_SUBMISSION)" fi + RESULT_URLS+=("$RESULT_ENTRY") if [ "$MODEL_EXIT" -eq 0 ]; then echo "✓ $model complete at $(date -u)"