Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 92 additions & 25 deletions tools/benchmark_rounds.sh
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ parse_timing() {
local log_file="$1"

local timing
timing=$(grep -E 'Thread=[0-9]+ (orch_start|end)=' "$log_file" || true)
timing=$(grep -E 'Thread=[0-9]+ (orch_start|end)=|BENCHMARK: thread=[0-9]+ end=|Thread [0-9]+: Scheduler summary: total_time=' "$log_file" || true)

if [[ -z "$timing" ]]; then
echo " (no benchmark timing data — was PTO2_PROFILING enabled?)"
Expand All @@ -129,51 +129,118 @@ parse_timing() {

echo "$timing" | awk '
function flush_round() {
if (round >= 0 && max_end > 0 && min_start > 0) {
results[round] = (max_end - min_start) / 50.0
if (round >= 0 && max_end > 0 && min_orch_start > 0) {
r_elapsed[round] = (max_end - min_orch_start) / 50.0

# Orch wall-clock: BENCHMARK end (= last orch finish) - earliest orch start
if (benchmark_end > 0)
r_orch[round] = (benchmark_end - min_orch_start) / 50.0

# Sched wall-clock: max(sched_end) - min(sched_start)
# Derive sched_start per thread: end_cycle - total_time_us * 50
min_ss = 0; max_se = 0
for (tid in sched_total_us) {
if (tid in thread_end) {
ss = thread_end[tid] - sched_total_us[tid] * 50
if (min_ss == 0 || ss < min_ss) min_ss = ss
if (thread_end[tid] > max_se) max_se = thread_end[tid]
}
}
if (max_se > 0 && min_ss > 0)
r_sched[round] = (max_se - min_ss) / 50.0

count++
}
}
BEGIN { round = 0; min_start = 0; max_end = 0; count = 0 }
BEGIN {
round = 0; min_orch_start = 0; max_end = 0; benchmark_end = 0; count = 0
}
/orch_start=/ {
match($0, /Thread=([0-9]+)/, tm)
tid = tm[1] + 0
if (tid in seen) {
if (tid in orch_seen) {
flush_round()
round++
min_start = 0
max_end = 0
delete seen
min_orch_start = 0; max_end = 0; benchmark_end = 0
delete orch_seen; delete thread_end; delete sched_total_us
}
seen[tid] = 1
orch_seen[tid] = 1
match($0, /orch_start=([0-9]+)/, m)
val = m[1] + 0
if (min_start == 0 || val < min_start) min_start = val
if (min_orch_start == 0 || val < min_orch_start) min_orch_start = val
}
/end=/ {
match($0, /end=([0-9]+)/, m)
val = m[1] + 0
# Scheduler/orch thread end cycle (uppercase Thread=)
/Thread=[0-9]+ end=/ {
match($0, /Thread=([0-9]+)/, tm); tid = tm[1] + 0
match($0, /end=([0-9]+)/, m); val = m[1] + 0
thread_end[tid] = val
if (val > max_end) max_end = val
}
# Orchestrator benchmark end cycle (lowercase thread=, last orch to finish)
/BENCHMARK:.*end=/ {
match($0, /end=([0-9]+)/, m)
benchmark_end = m[1] + 0
}
# Scheduler summary: extract thread id and total_time for sched_start derivation
/Scheduler summary: total_time=/ {
match($0, /Thread ([0-9]+):/, tm); tid = tm[1] + 0
match($0, /total_time=([0-9.]+)us/, m)
sched_total_us[tid] = m[1] + 0.0
}
END {
flush_round()
if (count == 0) { print " (no rounds parsed)"; exit 1 }

printf " %-8s %12s\n", "Round", "Elapsed (us)"
printf " %-8s %12s\n", "-----", "------------"
sum_v = 0
min_v = results[0]
max_v = results[0]
# Detect whether orch/sched data is available
has_detail = 0
for (i = 0; i < count; i++) {
if (r_orch[i] > 0 || r_sched[i] > 0) { has_detail = 1; break }
}

if (has_detail) {
printf " %-8s %12s %12s %12s\n", "Round", "Elapsed (us)", "Orch (us)", "Sched (us)"
printf " %-8s %12s %12s %12s\n", "-----", "------------", "---------", "----------"
} else {
printf " %-8s %12s\n", "Round", "Elapsed (us)"
printf " %-8s %12s\n", "-----", "------------"
}
sum_e = 0; min_e = r_elapsed[0]; max_e = r_elapsed[0]
sum_o = 0; min_o = r_orch[0]; max_o = r_orch[0]
sum_s = 0; min_s = r_sched[0]; max_s = r_sched[0]
for (i = 0; i < count; i++) {
printf " %-8d %12.1f\n", i, results[i]
sum_v += results[i]
if (results[i] < min_v) min_v = results[i]
if (results[i] > max_v) max_v = results[i]
if (has_detail)
printf " %-8d %12.1f %12.1f %12.1f\n", i, r_elapsed[i], r_orch[i], r_sched[i]
else
printf " %-8d %12.1f\n", i, r_elapsed[i]
sum_e += r_elapsed[i]
if (r_elapsed[i] < min_e) min_e = r_elapsed[i]
if (r_elapsed[i] > max_e) max_e = r_elapsed[i]
sum_o += r_orch[i]
if (r_orch[i] < min_o) min_o = r_orch[i]
if (r_orch[i] > max_o) max_o = r_orch[i]
sum_s += r_sched[i]
if (r_sched[i] < min_s) min_s = r_sched[i]
if (r_sched[i] > max_s) max_s = r_sched[i]
}
printf "\n Avg: %.1f us (%d rounds)\n", sum_v / count, count
printf "\n Avg: %.1f us (%d rounds)\n", sum_e / count, count
if (count > 2) {
trimmed = (sum_v - min_v - max_v) / (count - 2)
printf " Trimmed Avg: %.1f us (excluding min=%.1f, max=%.1f)\n", trimmed, min_v, max_v
trimmed = (sum_e - min_e - max_e) / (count - 2)
printf " Trimmed Avg: %.1f us (excluding min=%.1f, max=%.1f)\n", trimmed, min_e, max_e
}
if (has_detail) {
printf "\n Orch Avg: %.1f us", sum_o / count
if (count > 2) {
trimmed_o = (sum_o - min_o - max_o) / (count - 2)
printf " Trimmed: %.1f us", trimmed_o
}
printf " Total: %.1f us\n", sum_o

printf " Sched Avg: %.1f us", sum_s / count
if (count > 2) {
trimmed_s = (sum_s - min_s - max_s) / (count - 2)
printf " Trimmed: %.1f us", trimmed_s
}
printf " Total: %.1f us\n", sum_s
}
Comment on lines +207 to 244

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The calculation of statistics (average, min, max, trimmed average) for orchestration and scheduling times is incorrect when some rounds are missing this data. The current implementation initializes min_o and min_s with r_orch[0] and r_sched[0], which can be 0 if data for the first round is missing. This leads to an incorrect minimum value of 0. Additionally, the average is calculated by dividing by the total number of rounds (count) instead of the number of rounds that actually have orchestration or scheduling data, skewing the average. The trimmed average is also affected by these issues.

The logic should be updated to only consider rounds with valid data (> 0) for these statistics. This involves:

  1. Initializing min_o and min_s to a value indicating they are not set yet.
  2. Keeping separate counters (count_o, count_s) for rounds with orchestration and scheduling data.
  3. Updating sums, min, max, and counts only for rounds with data.
  4. Using these separate counters for calculating averages and trimmed averages.
        sum_e = 0; min_e = r_elapsed[0]; max_e = r_elapsed[0]
        sum_o = 0; min_o = -1; max_o = 0; count_o = 0
        sum_s = 0; min_s = -1; max_s = 0; count_s = 0
        for (i = 0; i < count; i++) {
            if (has_detail)
                printf "  %-8d  %12.1f  %12.1f  %12.1f\n", i, r_elapsed[i], r_orch[i], r_sched[i]
            else
                printf "  %-8d  %12.1f\n", i, r_elapsed[i]

            sum_e += r_elapsed[i]
            if (r_elapsed[i] < min_e) min_e = r_elapsed[i]
            if (r_elapsed[i] > max_e) max_e = r_elapsed[i]

            if (r_orch[i] > 0) {
                sum_o += r_orch[i]
                if (min_o < 0 || r_orch[i] < min_o) min_o = r_orch[i]
                if (r_orch[i] > max_o) max_o = r_orch[i]
                count_o++
            }
            if (r_sched[i] > 0) {
                sum_s += r_sched[i]
                if (min_s < 0 || r_sched[i] < min_s) min_s = r_sched[i]
                if (r_sched[i] > max_s) max_s = r_sched[i]
                count_s++
            }
        }
        printf "\n  Avg: %.1f us  (%d rounds)\n", sum_e / count, count
        if (count > 2) {
            trimmed = (sum_e - min_e - max_e) / (count - 2)
            printf "  Trimmed Avg: %.1f us  (excluding min=%.1f, max=%.1f)\n", trimmed, min_e, max_e
        }
        if (has_detail) {
            if (count_o > 0) {
                printf "\n  Orch  Avg: %.1f us", sum_o / count_o
                if (count_o > 2) {
                    trimmed_o = (sum_o - min_o - max_o) / (count_o - 2)
                    printf "  Trimmed: %.1f us", trimmed_o
                }
                printf "  Total: %.1f us\n", sum_o
            }
            if (count_s > 0) {
                printf "  Sched Avg: %.1f us", sum_s / count_s
                if (count_s > 2) {
                    trimmed_s = (sum_s - min_s - max_s) / (count_s - 2)
                    printf "  Trimmed: %.1f us", trimmed_s
                }
                printf "  Total: %.1f us\n", sum_s
            }
        }

}'
}
Expand Down
Loading