From aea8a943c2b32c398882b22d8e6e0b4bc6e43ff3 Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Thu, 30 Apr 2026 20:32:56 +0000 Subject: [PATCH 1/8] fix: update benchmark runner behavior --- src/sandbox/benchmark.ts | 4 +--- src/sandbox/concurrent.ts | 4 +--- src/sandbox/staggered.ts | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/sandbox/benchmark.ts b/src/sandbox/benchmark.ts index fcb73bf..c38c2a1 100644 --- a/src/sandbox/benchmark.ts +++ b/src/sandbox/benchmark.ts @@ -17,7 +17,6 @@ export async function runBenchmark(config: ProviderConfig): Promise - runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs) + runIteration(config.createCompute(), timeout, sandboxOptions, destroyTimeoutMs) .then(result => { console.log(` Sandbox ${i + 1}/${concurrency}: TTI ${(result.ttiMs / 1000).toFixed(2)}s`); return result; diff --git a/src/sandbox/staggered.ts b/src/sandbox/staggered.ts index 78e63af..86017f7 100644 --- a/src/sandbox/staggered.ts +++ b/src/sandbox/staggered.ts @@ -28,8 +28,6 @@ export async function runStaggeredBenchmark(config: StaggeredConfig): Promise { const readyAt = performance.now() - wallStart; rampProfile.push({ launchedAt, readyAt, ttiMs: result.ttiMs }); From a5992d247fd4444c79819aec6f04d3823e087742 Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Thu, 30 Apr 2026 21:04:49 +0000 Subject: [PATCH 2/8] fix: detect sandbox reuse across benchmark runs --- src/sandbox/benchmark.ts | 50 +++++++++++++++++++++++++++++++++++++-- src/sandbox/concurrent.ts | 3 ++- src/sandbox/staggered.ts | 3 ++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/sandbox/benchmark.ts b/src/sandbox/benchmark.ts index c38c2a1..cc1f2d9 100644 --- a/src/sandbox/benchmark.ts +++ b/src/sandbox/benchmark.ts @@ -18,6 +18,7 @@ export async function runBenchmark(config: ProviderConfig): Promise(); console.log(`\n--- Benchmarking: ${name} (${iterations} iterations) ---`); @@ -25,7 +26,13 @@ export async function runBenchmark(config: ProviderConfig): Promise, destroyTimeoutMs: number = 15_000): Promise { +function getSandboxFingerprint(sandbox: any): string | null { + const candidateKeys = ['id', 'sandboxId', 'containerId', 'instanceId']; + for (const key of candidateKeys) { + const value = sandbox?.[key]; + if (typeof value === 'string' && value.trim()) { + return `sandbox:${value}`; + } + } + + return null; +} + +export async function runIteration( + compute: any, + timeout: number, + sandboxOptions?: Record, + destroyTimeoutMs: number = 15_000, + seenSandboxFingerprints?: Set, +): Promise { let sandbox: any = null; try { @@ -56,6 +81,27 @@ export async function runIteration(compute: any, timeout: number, sandboxOptions sandbox = await withTimeout(compute.sandbox.create(sandboxOptions), timeout, 'Sandbox creation timed out'); + const identityResult = await withTimeout( + sandbox.runCommand("sh -lc 'echo -n $(hostname)'"), + 30_000, + 'Sandbox identity check timed out' + ) as { exitCode: number; stdout?: string; stderr?: string }; + + if (identityResult.exitCode !== 0) { + throw new Error(`Sandbox identity check failed with exit code ${identityResult.exitCode}: ${identityResult.stderr || 'Unknown error'}`); + } + + const runtimeIdentity = (identityResult.stdout || '').trim(); + const sandboxFingerprint = getSandboxFingerprint(sandbox); + const fingerprint = sandboxFingerprint || (runtimeIdentity ? `runtime:${runtimeIdentity}` : null); + + if (seenSandboxFingerprints && fingerprint) { + if (seenSandboxFingerprints.has(fingerprint)) { + throw new Error('Sandbox/container reuse detected across benchmark iterations'); + } + seenSandboxFingerprints.add(fingerprint); + } + const result = await withTimeout( sandbox.runCommand('node -v'), 30_000, diff --git a/src/sandbox/concurrent.ts b/src/sandbox/concurrent.ts index 92609a7..7bb5885 100644 --- a/src/sandbox/concurrent.ts +++ b/src/sandbox/concurrent.ts @@ -28,10 +28,11 @@ export async function runConcurrentBenchmark(config: ConcurrentConfig): Promise< console.log(`\n--- Concurrent Benchmark: ${name} (${concurrency} sandboxes) ---`); const wallStart = performance.now(); + const seenSandboxFingerprints = new Set(); // Fire all sandbox creations simultaneously — no awaiting between launches const promises = Array.from({ length: concurrency }, (_, i) => - runIteration(config.createCompute(), timeout, sandboxOptions, destroyTimeoutMs) + runIteration(config.createCompute(), timeout, sandboxOptions, destroyTimeoutMs, seenSandboxFingerprints) .then(result => { console.log(` Sandbox ${i + 1}/${concurrency}: TTI ${(result.ttiMs / 1000).toFixed(2)}s`); return result; diff --git a/src/sandbox/staggered.ts b/src/sandbox/staggered.ts index 86017f7..287bef2 100644 --- a/src/sandbox/staggered.ts +++ b/src/sandbox/staggered.ts @@ -31,13 +31,14 @@ export async function runStaggeredBenchmark(config: StaggeredConfig): Promise(); const promises: Promise[] = []; const rampProfile: { launchedAt: number; readyAt: number; ttiMs: number }[] = []; for (let i = 0; i < concurrency; i++) { const launchedAt = performance.now() - wallStart; - const p = runIteration(config.createCompute(), timeout, sandboxOptions, destroyTimeoutMs) + const p = runIteration(config.createCompute(), timeout, sandboxOptions, destroyTimeoutMs, seenSandboxFingerprints) .then(result => { const readyAt = performance.now() - wallStart; rampProfile.push({ launchedAt, readyAt, ttiMs: result.ttiMs }); From a0b516c13ab14ac9bdb94824e94c89a7dfc140a4 Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Thu, 30 Apr 2026 21:06:28 +0000 Subject: [PATCH 3/8] fix: keep sandbox fingerprint reuse checks only --- src/sandbox/benchmark.ts | 3 ++- src/sandbox/concurrent.ts | 3 ++- src/sandbox/staggered.ts | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/sandbox/benchmark.ts b/src/sandbox/benchmark.ts index cc1f2d9..dcb3999 100644 --- a/src/sandbox/benchmark.ts +++ b/src/sandbox/benchmark.ts @@ -17,6 +17,7 @@ export async function runBenchmark(config: ProviderConfig): Promise(); @@ -27,7 +28,7 @@ export async function runBenchmark(config: ProviderConfig): Promise - runIteration(config.createCompute(), timeout, sandboxOptions, destroyTimeoutMs, seenSandboxFingerprints) + runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs, seenSandboxFingerprints) .then(result => { console.log(` Sandbox ${i + 1}/${concurrency}: TTI ${(result.ttiMs / 1000).toFixed(2)}s`); return result; diff --git a/src/sandbox/staggered.ts b/src/sandbox/staggered.ts index 287bef2..26ee054 100644 --- a/src/sandbox/staggered.ts +++ b/src/sandbox/staggered.ts @@ -28,6 +28,7 @@ export async function runStaggeredBenchmark(config: StaggeredConfig): Promise { const readyAt = performance.now() - wallStart; rampProfile.push({ launchedAt, readyAt, ttiMs: result.ttiMs }); From ae65b27cced19b2c240ae88ef722c39cc1a09026 Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Thu, 30 Apr 2026 21:28:57 +0000 Subject: [PATCH 4/8] fix: strengthen sandbox reuse detection signals --- src/sandbox/benchmark.ts | 109 +++++++++++++++++++++++++++++++------- src/sandbox/concurrent.ts | 8 ++- src/sandbox/staggered.ts | 8 ++- 3 files changed, 102 insertions(+), 23 deletions(-) diff --git a/src/sandbox/benchmark.ts b/src/sandbox/benchmark.ts index dcb3999..6af7691 100644 --- a/src/sandbox/benchmark.ts +++ b/src/sandbox/benchmark.ts @@ -1,6 +1,7 @@ import type { ProviderConfig, BenchmarkResult, TimingResult } from './types.js'; import { computeStats } from '../util/stats.js'; import { withTimeout } from '../util/timeout.js'; +import { randomUUID } from 'node:crypto'; export async function runBenchmark(config: ProviderConfig): Promise { const { name, iterations = 100, timeout = 120_000, requiredEnvVars, sandboxOptions, destroyTimeoutMs } = config; @@ -19,7 +20,11 @@ export async function runBenchmark(config: ProviderConfig): Promise(); + const runNonce = randomUUID(); + const reuseDetector = { + runNonce, + seenSignals: new Map>(), + }; console.log(`\n--- Benchmarking: ${name} (${iterations} iterations) ---`); @@ -32,7 +37,7 @@ export async function runBenchmark(config: ProviderConfig): Promise>; +}; + +function parseKeyValueOutput(stdout: string): Record { + const parsed: Record = {}; + for (const line of stdout.split('\n')) { + const index = line.indexOf('='); + if (index <= 0) continue; + const key = line.slice(0, index).trim(); + const value = line.slice(index + 1).trim(); + if (!key) continue; + parsed[key] = value; } + return parsed; +} + +function countStrongSignalMatches(identity: Record, detector: ReuseDetector): number { + const strongKeys = ['ns_mnt', 'ns_pid', 'ns_uts', 'cgroup_hash', 'boot_id', 'pid1']; + let matches = 0; - return null; + for (const key of strongKeys) { + const value = identity[key]; + if (!value || value === 'unknown') continue; + const seen = detector.seenSignals.get(key); + if (seen?.has(value)) matches++; + } + + return matches; +} + +function rememberSignals(identity: Record, detector: ReuseDetector): void { + for (const [key, value] of Object.entries(identity)) { + if (!value || value === 'unknown') continue; + if (!detector.seenSignals.has(key)) detector.seenSignals.set(key, new Set()); + detector.seenSignals.get(key)!.add(value); + } } export async function runIteration( @@ -73,7 +106,7 @@ export async function runIteration( timeout: number, sandboxOptions?: Record, destroyTimeoutMs: number = 15_000, - seenSandboxFingerprints?: Set, + reuseDetector?: ReuseDetector, ): Promise { let sandbox: any = null; @@ -82,8 +115,42 @@ export async function runIteration( sandbox = await withTimeout(compute.sandbox.create(sandboxOptions), timeout, 'Sandbox creation timed out'); + const markerA = '/tmp/.bench_ephemeral_check'; + const markerB = '/var/tmp/.bench_ephemeral_check'; + const probeToken = reuseDetector + ? `${reuseDetector.runNonce}:${Date.now().toString(36)}:${Math.random().toString(36).slice(2, 10)}` + : `${Date.now().toString(36)}:${Math.random().toString(36).slice(2, 10)}`; + + const identityProbeCommand = [ + "marker_a='/tmp/.bench_ephemeral_check'", + "marker_b='/var/tmp/.bench_ephemeral_check'", + "marker_path=''", + "for p in \"$marker_a\" \"$marker_b\"; do if [ -f \"$p\" ]; then marker_path=$p; break; fi; done", + "marker_value='unknown'", + "if [ -n \"$marker_path\" ]; then marker_value=$(tr -d '\\n' < \"$marker_path\" 2>/dev/null || true); fi", + "ns_mnt=$(readlink /proc/self/ns/mnt 2>/dev/null || printf unknown)", + "ns_pid=$(readlink /proc/self/ns/pid 2>/dev/null || printf unknown)", + "ns_uts=$(readlink /proc/self/ns/uts 2>/dev/null || printf unknown)", + "cgroup_hash=$(cat /proc/self/cgroup 2>/dev/null | sha256sum 2>/dev/null | cut -d\" \" -f1)", + "if [ -z \"$cgroup_hash\" ]; then cgroup_hash=unknown; fi", + "boot_id=$(cat /proc/sys/kernel/random/boot_id 2>/dev/null || printf unknown)", + "pid1=$(tr -d '\\0' < /proc/1/cmdline 2>/dev/null || printf unknown)", + "uptime=$(cut -d' ' -f1 /proc/uptime 2>/dev/null || printf unknown)", + "printf 'marker_path=%s\\n' \"$marker_path\"", + "printf 'marker_value=%s\\n' \"$marker_value\"", + "printf 'ns_mnt=%s\\n' \"$ns_mnt\"", + "printf 'ns_pid=%s\\n' \"$ns_pid\"", + "printf 'ns_uts=%s\\n' \"$ns_uts\"", + "printf 'cgroup_hash=%s\\n' \"$cgroup_hash\"", + "printf 'boot_id=%s\\n' \"$boot_id\"", + "printf 'pid1=%s\\n' \"$pid1\"", + "printf 'uptime=%s\\n' \"$uptime\"", + `printf '%s' '${probeToken}' > ${markerA}`, + `printf '%s' '${probeToken}' > ${markerB}`, + ].join('; '); + const identityResult = await withTimeout( - sandbox.runCommand("sh -lc 'echo -n $(hostname)'"), + sandbox.runCommand(`sh -lc "${identityProbeCommand}"`), 30_000, 'Sandbox identity check timed out' ) as { exitCode: number; stdout?: string; stderr?: string }; @@ -92,15 +159,19 @@ export async function runIteration( throw new Error(`Sandbox identity check failed with exit code ${identityResult.exitCode}: ${identityResult.stderr || 'Unknown error'}`); } - const runtimeIdentity = (identityResult.stdout || '').trim(); - const sandboxFingerprint = getSandboxFingerprint(sandbox); - const fingerprint = sandboxFingerprint || (runtimeIdentity ? `runtime:${runtimeIdentity}` : null); + const identity = parseKeyValueOutput(identityResult.stdout || ''); - if (seenSandboxFingerprints && fingerprint) { - if (seenSandboxFingerprints.has(fingerprint)) { - throw new Error('Sandbox/container reuse detected across benchmark iterations'); + if (reuseDetector) { + if (identity.marker_path) { + throw new Error(`Sandbox/container reuse detected: persistent marker at ${identity.marker_path}`); } - seenSandboxFingerprints.add(fingerprint); + + const strongMatches = countStrongSignalMatches(identity, reuseDetector); + if (strongMatches >= 3) { + throw new Error(`Sandbox/container reuse suspected: ${strongMatches} strong runtime signals repeated`); + } + + rememberSignals(identity, reuseDetector); } const result = await withTimeout( diff --git a/src/sandbox/concurrent.ts b/src/sandbox/concurrent.ts index 0b19d34..03ee9ce 100644 --- a/src/sandbox/concurrent.ts +++ b/src/sandbox/concurrent.ts @@ -1,6 +1,7 @@ import type { ProviderConfig, TimingResult, ConcurrentBenchmarkResult } from './types.js'; import { runIteration } from './benchmark.js'; import { computeStats } from '../util/stats.js'; +import { randomUUID } from 'node:crypto'; interface ConcurrentConfig extends ProviderConfig { concurrency: number; @@ -29,11 +30,14 @@ export async function runConcurrentBenchmark(config: ConcurrentConfig): Promise< console.log(`\n--- Concurrent Benchmark: ${name} (${concurrency} sandboxes) ---`); const wallStart = performance.now(); - const seenSandboxFingerprints = new Set(); + const reuseDetector = { + runNonce: randomUUID(), + seenSignals: new Map>(), + }; // Fire all sandbox creations simultaneously — no awaiting between launches const promises = Array.from({ length: concurrency }, (_, i) => - runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs, seenSandboxFingerprints) + runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs, reuseDetector) .then(result => { console.log(` Sandbox ${i + 1}/${concurrency}: TTI ${(result.ttiMs / 1000).toFixed(2)}s`); return result; diff --git a/src/sandbox/staggered.ts b/src/sandbox/staggered.ts index 26ee054..c958472 100644 --- a/src/sandbox/staggered.ts +++ b/src/sandbox/staggered.ts @@ -1,6 +1,7 @@ import type { ProviderConfig, TimingResult, StaggeredBenchmarkResult } from './types.js'; import { runIteration } from './benchmark.js'; import { computeStats } from '../util/stats.js'; +import { randomUUID } from 'node:crypto'; interface StaggeredConfig extends ProviderConfig { concurrency: number; @@ -32,14 +33,17 @@ export async function runStaggeredBenchmark(config: StaggeredConfig): Promise(); + const reuseDetector = { + runNonce: randomUUID(), + seenSignals: new Map>(), + }; const promises: Promise[] = []; const rampProfile: { launchedAt: number; readyAt: number; ttiMs: number }[] = []; for (let i = 0; i < concurrency; i++) { const launchedAt = performance.now() - wallStart; - const p = runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs, seenSandboxFingerprints) + const p = runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs, reuseDetector) .then(result => { const readyAt = performance.now() - wallStart; rampProfile.push({ launchedAt, readyAt, ttiMs: result.ttiMs }); From 2a4c75afa8d0e0bf6245366c155f0a0e0da010e5 Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Fri, 1 May 2026 15:37:59 +0000 Subject: [PATCH 5/8] fix: address review feedback on reuse probe command --- src/sandbox/benchmark.ts | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/sandbox/benchmark.ts b/src/sandbox/benchmark.ts index 6af7691..7028c0e 100644 --- a/src/sandbox/benchmark.ts +++ b/src/sandbox/benchmark.ts @@ -66,6 +66,8 @@ type ReuseDetector = { seenSignals: Map>; }; +const STRONG_SIGNAL_KEYS = ['ns_mnt', 'ns_pid', 'ns_uts', 'cgroup_hash', 'boot_id', 'pid1'] as const; + function parseKeyValueOutput(stdout: string): Record { const parsed: Record = {}; for (const line of stdout.split('\n')) { @@ -80,10 +82,9 @@ function parseKeyValueOutput(stdout: string): Record { } function countStrongSignalMatches(identity: Record, detector: ReuseDetector): number { - const strongKeys = ['ns_mnt', 'ns_pid', 'ns_uts', 'cgroup_hash', 'boot_id', 'pid1']; let matches = 0; - for (const key of strongKeys) { + for (const key of STRONG_SIGNAL_KEYS) { const value = identity[key]; if (!value || value === 'unknown') continue; const seen = detector.seenSignals.get(key); @@ -94,7 +95,8 @@ function countStrongSignalMatches(identity: Record, detector: Re } function rememberSignals(identity: Record, detector: ReuseDetector): void { - for (const [key, value] of Object.entries(identity)) { + for (const key of STRONG_SIGNAL_KEYS) { + const value = identity[key]; if (!value || value === 'unknown') continue; if (!detector.seenSignals.has(key)) detector.seenSignals.set(key, new Set()); detector.seenSignals.get(key)!.add(value); @@ -122,8 +124,8 @@ export async function runIteration( : `${Date.now().toString(36)}:${Math.random().toString(36).slice(2, 10)}`; const identityProbeCommand = [ - "marker_a='/tmp/.bench_ephemeral_check'", - "marker_b='/var/tmp/.bench_ephemeral_check'", + `marker_a='${markerA}'`, + `marker_b='${markerB}'`, "marker_path=''", "for p in \"$marker_a\" \"$marker_b\"; do if [ -f \"$p\" ]; then marker_path=$p; break; fi; done", "marker_value='unknown'", @@ -150,7 +152,7 @@ export async function runIteration( ].join('; '); const identityResult = await withTimeout( - sandbox.runCommand(`sh -lc "${identityProbeCommand}"`), + sandbox.runCommand(identityProbeCommand), 30_000, 'Sandbox identity check timed out' ) as { exitCode: number; stdout?: string; stderr?: string }; From 4a8594fee479755796ef2f2b242c007e388c53ef Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Fri, 1 May 2026 15:44:02 +0000 Subject: [PATCH 6/8] fix: make runtime reuse signals warn by default --- src/sandbox/benchmark.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sandbox/benchmark.ts b/src/sandbox/benchmark.ts index 7028c0e..079b5a0 100644 --- a/src/sandbox/benchmark.ts +++ b/src/sandbox/benchmark.ts @@ -170,7 +170,11 @@ export async function runIteration( const strongMatches = countStrongSignalMatches(identity, reuseDetector); if (strongMatches >= 3) { - throw new Error(`Sandbox/container reuse suspected: ${strongMatches} strong runtime signals repeated`); + const message = `Sandbox/container reuse suspected: ${strongMatches} strong runtime signals repeated`; + if (process.env.BENCH_STRICT_REUSE_SIGNALS === '1') { + throw new Error(message); + } + console.warn(` [reuse-check] ${message}`); } rememberSignals(identity, reuseDetector); From c12846a2a7154985eba5e15b644dcba331409a6b Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Fri, 1 May 2026 17:08:01 +0000 Subject: [PATCH 7/8] fix: ignore non-sandbox artifacts in sandbox merge --- src/merge-results.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/merge-results.ts b/src/merge-results.ts index a75d651..eac75c2 100644 --- a/src/merge-results.ts +++ b/src/merge-results.ts @@ -101,9 +101,15 @@ async function main() { for (const file of jsonFiles) { const raw: ResultFile = JSON.parse(fs.readFileSync(file, 'utf-8')); const fromSingleProvider = raw.results.length === 1; + const dirName = path.basename(path.dirname(file)); + const isSandboxDir = dirName === 'sequential_tti' || dirName === 'staggered_tti' || dirName === 'burst_tti'; + + if (!isSandboxDir) { + continue; + } + for (const result of raw.results) { // Determine mode from the directory name (e.g. sequential_tti, burst_tti) - const dirName = path.basename(path.dirname(file)); let mode = normalizeMode(result.mode || 'sequential'); // Infer from directory name if available if (dirName.includes('sequential')) mode = 'sequential'; From 84eda54b883f38c4889d41db61f6d97ccde3e334 Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Fri, 1 May 2026 21:18:05 +0000 Subject: [PATCH 8/8] fix: only fail reuse check on persistent markers --- src/sandbox/benchmark.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/sandbox/benchmark.ts b/src/sandbox/benchmark.ts index 079b5a0..7cfcdce 100644 --- a/src/sandbox/benchmark.ts +++ b/src/sandbox/benchmark.ts @@ -170,11 +170,9 @@ export async function runIteration( const strongMatches = countStrongSignalMatches(identity, reuseDetector); if (strongMatches >= 3) { - const message = `Sandbox/container reuse suspected: ${strongMatches} strong runtime signals repeated`; - if (process.env.BENCH_STRICT_REUSE_SIGNALS === '1') { - throw new Error(message); + if (process.env.BENCH_REUSE_DEBUG === '1') { + console.warn(` [reuse-check] Sandbox/container reuse suspected: ${strongMatches} strong runtime signals repeated`); } - console.warn(` [reuse-check] ${message}`); } rememberSignals(identity, reuseDetector);