From 4f26eea1ba5758fc5d8f7ee41bee681db0cdc464 Mon Sep 17 00:00:00 2001
From: Brendan O'Leary <brendan@olearycrew.com>
Date: Mon, 9 Mar 2026 10:09:12 -0400
Subject: [PATCH] Include score summary and submission ID in Slack
 notifications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Parses the new benchmark.py output to extract:
- Final score (e.g., 8.50/10 (85.0%))
- Submission ID (UUID)
- Leaderboard URL

Results section now shows:
 • model-name: 8.50/10 (85.0%) — https://pinchbench.com/... (submission: uuid)
---
 bench_runner.sh | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/bench_runner.sh b/bench_runner.sh
index dfa110a..7b2b311 100755
--- a/bench_runner.sh
+++ b/bench_runner.sh
@@ -201,11 +201,23 @@ for model in "${MODELS[@]}"; do
     MODEL_OUTPUT=$(cat "$MODEL_TMPFILE")
     rm -f "$MODEL_TMPFILE"
 
-    # Extract "View at: https://..." leaderboard URL from model run output
+    # Extract score, submission ID, and leaderboard URL from model run output
+    MODEL_SCORE=$(echo "$MODEL_OUTPUT" | grep -oP "Final score: \K[\d.]+/[\d.]+ \([\d.]+%\)" | head -1 || true)
+    MODEL_SUBMISSION=$(echo "$MODEL_OUTPUT" | grep -i "Submission ID" | grep -oE '[a-f0-9-]{36}' | head -1 || true)
     MODEL_URL=$(echo "$MODEL_OUTPUT" | grep -i "View at" | grep -oE 'https?://[^ ]+' | head -1 || true)
+    
+    # Build result entry with score and URL
+    RESULT_ENTRY="$model"
+    if [ -n "$MODEL_SCORE" ]; then
+        RESULT_ENTRY="$RESULT_ENTRY: $MODEL_SCORE"
+    fi
     if [ -n "$MODEL_URL" ]; then
-        RESULT_URLS+=("$model: $MODEL_URL")
+        RESULT_ENTRY="$RESULT_ENTRY — $MODEL_URL"
+    fi
+    if [ -n "$MODEL_SUBMISSION" ]; then
+        RESULT_ENTRY="$RESULT_ENTRY (submission: $MODEL_SUBMISSION)"
     fi
+    RESULT_URLS+=("$RESULT_ENTRY")
 
     if [ "$MODEL_EXIT" -eq 0 ]; then
         echo "✓ $model complete at $(date -u)"