diff --git a/tests/device_tests/a2a3/tensormap_and_ringbuffer/alternating_matmul_add/golden.py b/tests/device_tests/a2a3/tensormap_and_ringbuffer/alternating_matmul_add/golden.py index 1cbadde7..0134977e 100644 --- a/tests/device_tests/a2a3/tensormap_and_ringbuffer/alternating_matmul_add/golden.py +++ b/tests/device_tests/a2a3/tensormap_and_ringbuffer/alternating_matmul_add/golden.py @@ -18,7 +18,7 @@ ATOL = 1e-3 ALL_CASES = { - "case1": { + "Case1": { "batch": 500, "M": 4, "N": 4, @@ -26,7 +26,7 @@ "matmul_batch": 4, "add_batch": 4, }, - "case2": { + "Case2": { "batch": 512, "M": 2, # Number of matmul tasks per batch "N": 5, # Number of add tasks per batch @@ -34,10 +34,10 @@ "matmul_batch": 4, # Number of matmul tiles per task "add_batch": 5, # Number of add tiles per task }, - + } -DEFAULT_CASE = "case1" +DEFAULT_CASE = "Case1" def generate_inputs(params: dict) -> list: diff --git a/tools/benchmark_rounds.sh b/tools/benchmark_rounds.sh index 0d630ae8..536b9e15 100755 --- a/tools/benchmark_rounds.sh +++ b/tools/benchmark_rounds.sh @@ -5,7 +5,7 @@ # Usage: # ./tools/benchmark_rounds.sh [-p ] [-d ] [-n ] # -# Runs all examples listed in EXAMPLES array and prints timing for each. +# Edit the EXAMPLE_CASES map below to control which examples and cases to run. set -euo pipefail @@ -14,10 +14,27 @@ PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" RUN_EXAMPLE="$PROJECT_ROOT/examples/scripts/run_example.py" # --------------------------------------------------------------------------- -# Examples to benchmark (paths relative to tests/device_tests//tensormap_and_ringbuffer/) -# Each entry is just the directory name; kernels/ and golden.py are implied. +# Examples to benchmark and their case lists. +# Key = directory name under tests/device_tests//tensormap_and_ringbuffer/ +# Value = comma-separated case names to run (empty string = run DEFAULT_CASE) +# +# Available cases per example (from golden.py ALL_CASES): +# alternating_matmul_add : Case1, Case2 +# benchmark_bgemm : Case0, Case1, Case2, Case3, Case4 +# paged_attention_unroll : Case1, Case2, Case3 +# batch_paged_attention : Case1, Case2, Case3 +# paged_attention : Case1, Case2, Case3, Case4, Case5, Case6 # --------------------------------------------------------------------------- -EXAMPLES=( +declare -A EXAMPLE_CASES=( + [alternating_matmul_add]="" + [benchmark_bgemm]="" + [paged_attention_unroll]="Case1,Case2" + [batch_paged_attention]="" + [paged_attention]="" +) + +# Ordered list to control benchmark execution order +EXAMPLE_ORDER=( alternating_matmul_add benchmark_bgemm paged_attention_unroll @@ -62,6 +79,9 @@ Options: All other options are passed through to run_example.py (e.g. --case). +Edit the EXAMPLE_CASES map at the top of this script to control which +examples and cases to benchmark. + Output: Average elapsed time in microseconds for each example. USAGE @@ -142,11 +162,19 @@ parse_timing() { printf " %-8s %12s\n", "Round", "Elapsed (us)" printf " %-8s %12s\n", "-----", "------------" sum_v = 0 + min_v = results[0] + max_v = results[0] for (i = 0; i < count; i++) { printf " %-8d %12.1f\n", i, results[i] sum_v += results[i] + if (results[i] < min_v) min_v = results[i] + if (results[i] > max_v) max_v = results[i] } printf "\n Avg: %.1f us (%d rounds)\n", sum_v / count, count + if (count > 2) { + trimmed = (sum_v - min_v - max_v) / (count - 2) + printf " Trimmed Avg: %.1f us (excluding min=%.1f, max=%.1f)\n", trimmed, min_v, max_v + } }' } @@ -181,13 +209,70 @@ wait_for_new_log() { return 1 } +# --------------------------------------------------------------------------- +# run_bench [case_name] +# Run one benchmark invocation and parse timing from the resulting log. +# Sets global PASS / FAIL counters. +# --------------------------------------------------------------------------- +run_bench() { + local example="$1" kernels_dir="$2" golden="$3" case_name="${4:-}" + + if [[ -n "$case_name" ]]; then + echo " ---- $case_name ----" + fi + + # Snapshot existing logs + local pre_log_file + pre_log_file=$(mktemp) + trap 'rm -f -- "$pre_log_file"' RETURN + ls -1 "$DEVICE_LOG_DIR"/*.log 2>/dev/null | sort > "$pre_log_file" || true + + # Build run command + local run_cmd=( + python3 "$RUN_EXAMPLE" + -k "$kernels_dir" -g "$golden" + -p "$PLATFORM" -d "$DEVICE_ID" + -n "$ROUNDS" + ) + if [[ -n "$case_name" ]]; then + run_cmd+=(--case "$case_name") + fi + run_cmd+=("${EXTRA_ARGS[@]}") + + # Run example + if ! "${run_cmd[@]}" > /dev/null 2>&1; then + echo " FAILED: run_example.py returned non-zero" + ((FAIL++)) || true + return + fi + + # Find new device log + local new_log + new_log=$(wait_for_new_log "$pre_log_file") + + if [[ -z "$new_log" ]]; then + echo " FAILED: no device log found in $DEVICE_LOG_DIR" + ((FAIL++)) || true + return + fi + + echo " Log: $new_log" + if parse_timing "$new_log"; then + ((PASS++)) || true + else + ((FAIL++)) || true + fi +} + # --------------------------------------------------------------------------- # Main loop # --------------------------------------------------------------------------- PASS=0 FAIL=0 -for example in "${EXAMPLES[@]}"; do +for example in "${EXAMPLE_ORDER[@]}"; do + case_list="${EXAMPLE_CASES[$example]:-}" + EXAMPLE_DIR="$EXAMPLES_DIR/$example" KERNELS_DIR="$EXAMPLE_DIR/kernels" GOLDEN="$EXAMPLE_DIR/golden.py" @@ -203,46 +288,23 @@ for example in "${EXAMPLES[@]}"; do continue fi - # Snapshot existing logs - PRE_LOG_FILE=$(mktemp) - ls -1 "$DEVICE_LOG_DIR"/*.log 2>/dev/null | sort > "$PRE_LOG_FILE" || true - - # Run example - if ! python3 "$RUN_EXAMPLE" \ - -k "$KERNELS_DIR" -g "$GOLDEN" \ - -p "$PLATFORM" -d "$DEVICE_ID" \ - -n "$ROUNDS" \ - "${EXTRA_ARGS[@]}" > /dev/null 2>&1; then - echo " FAILED: run_example.py returned non-zero" - rm -f "$PRE_LOG_FILE" - ((FAIL++)) || true - continue - fi - - # Find new device log - NEW_LOG=$(wait_for_new_log "$PRE_LOG_FILE") - rm -f "$PRE_LOG_FILE" - - if [[ -z "$NEW_LOG" ]]; then - echo " FAILED: no device log found in $DEVICE_LOG_DIR" - ((FAIL++)) || true - continue - fi - - echo " Log: $NEW_LOG" - if parse_timing "$NEW_LOG"; then - ((PASS++)) || true + if [[ -z "${case_list:-}" ]]; then + run_bench "$example" "$KERNELS_DIR" "$GOLDEN" else - ((FAIL++)) || true + IFS=',' read -ra cases <<< "$case_list" + for c in "${cases[@]}"; do + run_bench "$example" "$KERNELS_DIR" "$GOLDEN" "$c" + done fi done # --------------------------------------------------------------------------- # Summary # --------------------------------------------------------------------------- +TOTAL=$((PASS + FAIL)) echo "" echo "================================================================" -echo " Benchmark complete: $PASS passed, $FAIL failed (${#EXAMPLES[@]} total)" +echo " Benchmark complete: $PASS passed, $FAIL failed ($TOTAL total)" echo "================================================================" [[ $FAIL -eq 0 ]]