From 55a8cf04111ebc24016bdea732fb6dd26887219d Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 10 Jun 2026 14:56:00 -0700 Subject: [PATCH 1/3] Add virtual-cycles to callgrind measure --- crates/recorder/src/measure/callgrind.rs | 27 ++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/crates/recorder/src/measure/callgrind.rs b/crates/recorder/src/measure/callgrind.rs index 3eb450f6..f27ae4bd 100644 --- a/crates/recorder/src/measure/callgrind.rs +++ b/crates/recorder/src/measure/callgrind.rs @@ -139,7 +139,8 @@ impl Measure for CallgrindMeasure { .unwrap_or_else(|error| panic!("failed to read callgrind dump: {error:#}")); self.next_dump_part += 1; - measurements.reserve(dump.counts.len()); + measurements.reserve(dump.counts.len() + 1); + measurements.add(phase, "virtual-cycles".into(), dump.virtual_cycles()); for event in dump.counts { measurements.add(phase, event.name.into(), event.count); } @@ -154,7 +155,29 @@ struct ParsedCallgrindDump { counts: Vec, } -#[derive(Debug, Clone, PartialEq, Eq)] +impl ParsedCallgrindDump { + /// Compute "virtual cycles" based on the number of instructions retired, + /// cache misses, and branch mispredicts. + /// + /// This metric should roughly correspond with real cycles, but should be + /// independent of our machine's exact microarchitectural details. + fn virtual_cycles(&self) -> u64 { + let count = |name| self.counts.iter().find(|e| e.name == name).unwrap().count; + let cost = |factor, event| factor * count(event); + + cost(1, "instructions-retired") + + cost(10, "l1-icache-misses") + + cost(10, "l1-dcache-read-misses") + + cost(10, "l1-dcache-write-misses") + + cost(100, "ll-icache-misses") + + cost(100, "ll-dcache-read-misses") + + cost(100, "ll-dcache-write-misses") + + cost(10, "conditional-branch-misses") + + cost(10, "indirect-branch-misses") + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] struct CallgrindEventCount { name: &'static str, count: u64, From 4e00bb7ba51370ddc30b8106f19dd558b5ec8e08 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 11 Jun 2026 12:29:29 -0700 Subject: [PATCH 2/3] Adjust virtual cycle cost factors --- crates/recorder/src/measure/callgrind.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/recorder/src/measure/callgrind.rs b/crates/recorder/src/measure/callgrind.rs index f27ae4bd..53250c65 100644 --- a/crates/recorder/src/measure/callgrind.rs +++ b/crates/recorder/src/measure/callgrind.rs @@ -169,11 +169,11 @@ impl ParsedCallgrindDump { + cost(10, "l1-icache-misses") + cost(10, "l1-dcache-read-misses") + cost(10, "l1-dcache-write-misses") - + cost(100, "ll-icache-misses") - + cost(100, "ll-dcache-read-misses") - + cost(100, "ll-dcache-write-misses") - + cost(10, "conditional-branch-misses") - + cost(10, "indirect-branch-misses") + + cost(300, "ll-icache-misses") + + cost(300, "ll-dcache-read-misses") + + cost(300, "ll-dcache-write-misses") + + cost(15, "conditional-branch-misses") + + cost(15, "indirect-branch-misses") } } From 311101d2301a0b1e9ced1706958ae70afc993524 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 11 Jun 2026 12:43:26 -0700 Subject: [PATCH 3/3] Double L1 instruction and data caches to 64KiB in callgrind --- crates/cli/src/benchmark.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/cli/src/benchmark.rs b/crates/cli/src/benchmark.rs index c42c7550..92d5fcf7 100644 --- a/crates/cli/src/benchmark.rs +++ b/crates/cli/src/benchmark.rs @@ -26,8 +26,12 @@ mod callgrind { const DEFAULT_CALLGRIND_PROCESSES: usize = 3; const DEFAULT_CALLGRIND_ITERATIONS_PER_PROCESS: usize = 1; - const CACHE_MODEL_I1: &str = "32768,8,64"; - const CACHE_MODEL_D1: &str = "32768,8,64"; + + // 64KiB, 8-way associative, 64B line size. + const CACHE_MODEL_I1: &str = "65536,8,64"; + // 64KiB, 8-way associative, 64B line size. + const CACHE_MODEL_D1: &str = "65536,8,64"; + // 8MiB, 16-way associative, 64B line size. const CACHE_MODEL_LL: &str = "8388608,16,64"; impl PreparedCommand {