diff --git a/.github/workflows/fs-benchmarks.yml b/.github/workflows/fs-benchmarks.yml new file mode 100644 index 0000000..b4db6a4 --- /dev/null +++ b/.github/workflows/fs-benchmarks.yml @@ -0,0 +1,215 @@ +name: Filesystem Benchmark + +on: + pull_request: + paths: + - 'src/fs/**' + - 'src/sandbox/**' + - 'src/util/**' + - 'src/run.ts' + - 'src/merge-results.ts' + - 'package.json' + schedule: + - cron: '0 3 * * *' # Daily at 03:00 UTC + workflow_dispatch: + inputs: + iterations: + description: 'Iterations per provider' + required: false + default: '100' + file_size_mb: + description: 'Large file size in MB' + required: false + default: '64' + small_files: + description: 'Number of small files in workload' + required: false + default: '1000' + +concurrency: + group: fs-benchmarks + cancel-in-progress: true + +permissions: + contents: write + pull-requests: write + +jobs: + bench: + name: Bench ${{ matrix.provider }} + runs-on: namespace-profile-default + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + provider: + - archil + - blaxel + - cloudflare + - codesandbox + - daytona + - declaw + - e2b + - hopx + - modal + - namespace + - runloop + - upstash + - vercel + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 24 + cache: 'npm' + - name: Install dependencies + run: | + if [ "${{ github.event_name }}" = "schedule" ]; then + npm update + else + npm ci + fi + - name: Clear stale results from checkout + run: rm -rf results/fs/ + - name: Run filesystem benchmark + env: + COMPUTESDK_API_KEY: ${{ secrets.COMPUTESDK_API_KEY }} + ARCHIL_API_KEY: ${{ secrets.ARCHIL_API_KEY }} + ARCHIL_REGION: ${{ secrets.ARCHIL_REGION }} + ARCHIL_DISK_ID: ${{ secrets.ARCHIL_DISK_ID }} + BL_API_KEY: ${{ secrets.BL_API_KEY }} + BL_WORKSPACE: ${{ secrets.BL_WORKSPACE }} + CLOUDFLARE_SANDBOX_URL: ${{ secrets.CLOUDFLARE_SANDBOX_URL }} + CLOUDFLARE_SANDBOX_SECRET: ${{ secrets.CLOUDFLARE_SANDBOX_SECRET }} + CSB_API_KEY: ${{ secrets.CSB_API_KEY }} + DAYTONA_API_KEY: ${{ secrets.DAYTONA_API_KEY }} + DECLAW_API_KEY: ${{ secrets.DECLAW_API_KEY }} + E2B_API_KEY: ${{ secrets.E2B_API_KEY }} + HOPX_API_KEY: ${{ secrets.HOPX_API_KEY }} + MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} + MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} + NSC_TOKEN: ${{ secrets.NSC_TOKEN }} + RUNLOOP_API_KEY: ${{ secrets.RUNLOOP_API_KEY }} + UPSTASH_BOX_API_KEY: ${{ secrets.UPSTASH_BOX_API_KEY }} + VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }} + VERCEL_TEAM_ID: ${{ secrets.VERCEL_TEAM_ID }} + VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }} + run: | + npm run bench -- \ + --mode fs \ + --provider ${{ matrix.provider }} \ + --fs-file-size-mb ${{ github.event.inputs.file_size_mb || '64' }} \ + --fs-small-files ${{ github.event.inputs.small_files || '1000' }} \ + --iterations ${{ github.event_name == 'pull_request' && '5' || github.event.inputs.iterations || '100' }} + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: fs-results-${{ matrix.provider }} + path: results/fs/ + if-no-files-found: ignore + retention-days: 7 + + collect: + name: Collect Results + runs-on: namespace-profile-default + needs: bench + if: always() + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 24 + cache: 'npm' + - name: Install dependencies + run: | + if [ "${{ github.event_name }}" = "schedule" ]; then + npm update + else + npm ci + fi + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts/ + pattern: fs-results-* + - name: Merge results + run: npx tsx src/merge-results.ts --input artifacts --mode fs + - name: Post results to PR + if: github.event_name == 'pull_request' + continue-on-error: true + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const path = require('path'); + + const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + const latestPath = path.join('results', 'fs', 'latest.json'); + + let body = '## Filesystem Benchmark Results\n\n'; + + if (!fs.existsSync(latestPath)) { + body += '> No filesystem benchmark results were generated.\n\n'; + } else { + const data = JSON.parse(fs.readFileSync(latestPath, 'utf-8')); + const results = data.results + .filter(r => !r.skipped) + .sort((a, b) => (b.compositeScore || 0) - (a.compositeScore || 0)); + + if (results.length === 0) { + body += '> No filesystem benchmark results were generated.\n\n'; + } else { + body += '| # | Provider | Score | Read | Write | Small Files | Metadata | Status |\n'; + body += '|---|----------|-------|------|-------|-------------|----------|--------|\n'; + + results.forEach((r, i) => { + const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--'; + const read = (r.summary.readMs.median / 1000).toFixed(2) + 's'; + const write = (r.summary.writeMs.median / 1000).toFixed(2) + 's'; + const small = (r.summary.smallFileOpsMs.median / 1000).toFixed(2) + 's'; + const meta = (r.summary.metadataOpsMs.median / 1000).toFixed(2) + 's'; + const ok = r.iterations.filter(it => !it.error).length; + const total = r.iterations.length; + body += `| ${i + 1} | ${r.provider} | ${score} | ${read} | ${write} | ${small} | ${meta} | ${ok}/${total} |\n`; + }); + + body += '\n'; + } + } + + body += `---\n*[View full run](${runUrl})*`; + + const marker = '## Filesystem Benchmark Results'; + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const existing = comments.find(c => c.body.startsWith(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body, + }); + } + - name: Commit and push + if: github.event_name != 'pull_request' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add package.json package-lock.json results/fs/ + git diff --cached --quiet && echo "No changes to commit" && exit 0 + git commit -m "chore: update fs benchmark results [skip ci]" + git push diff --git a/METHODOLOGY.md b/METHODOLOGY.md index 9601bdc..17826dc 100644 --- a/METHODOLOGY.md +++ b/METHODOLOGY.md @@ -133,9 +133,39 @@ Each sandbox still measures its own individual TTI. We also capture: **Why burst matters:** AI agents and orchestration tools often spin up many sandboxes at once. Burst testing reveals how providers handle sudden spikes — provisioning queue depth, rate limiting, and failure rates under peak demand. +### Filesystem (FS) + +FS benchmarks run inside a freshly created sandbox to measure local workspace disk performance after startup. This mode is separate from TTI and object storage tests. + +```bash +npm run bench:fs +``` + +| Parameter | Default | +|-----------|---------| +| Iterations per provider | 100 | +| Large file size | 64MB | +| Small files count | 1000 | +| Timeout per iteration | 120 seconds | + +Each successful iteration runs four workload blocks in sequence: + +| Workload | Description | +|----------|-------------| +| **Large-file write** | Write a fixed-size buffer to disk and measure elapsed time | +| **Large-file read** | Read the same file back and verify byte length | +| **Small-file ops** | Create, read, and delete many small files | +| **Metadata ops** | Repeated `stat` + `rename` operations to stress metadata paths | + +From these timings we derive: +- Read and write latency stats (median, p95, p99) +- Small-file and metadata latency stats (median, p95, p99) +- Read and write throughput (Mbps) +- Success rate and a reliability-weighted composite score + ### Running All Tests -By default, `npm run bench` runs all three tests in sequence: +By default, `npm run bench` runs the three TTI tests in sequence: ```bash npm run bench # Runs sequential → staggered → burst @@ -143,6 +173,7 @@ npm run bench -- --provider e2b # All 3 tests, single provider npm run bench:sequential # Sequential only npm run bench:staggered # Staggered only npm run bench:burst # Burst only +npm run bench:fs # Filesystem only ``` ## Test Configuration diff --git a/README.md b/README.md index 25350b7..f5a8330 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Each benchmark creates a fresh sandbox, runs `node -v`, and records wall-clock t ## Methodology -Each benchmark creates a fresh sandbox, runs `node -v`, and records wall-clock time. We run three test modes daily: +Each benchmark creates a fresh sandbox, runs `node -v`, and records wall-clock time. We run three TTI test modes daily: **Sequential** — Sandboxes are created one at a time. Each is created, tested, and destroyed before the next begins. 100 iterations per provider. This is the baseline — isolated cold-start performance with no contention. @@ -56,7 +56,9 @@ Each benchmark creates a fresh sandbox, runs `node -v`, and records wall-clock t **Burst** — 100 sandboxes are created simultaneously with no delay between launches. Tests how providers handle sudden spikes — provisioning queue depth, rate limiting, and failure rates under peak demand. -For each provider we report min, max, median, P95, P99, and average TTI, plus a **composite score** (0–100) that combines weighted timing metrics with success rate. Providers must be both fast *and* reliable to score well. +**Filesystem (FS)** — In-sandbox disk benchmarks that measure large-file read/write latency and throughput, plus many small-file and metadata-heavy operations. This captures local workspace IO performance after the sandbox is interactive. + +For each provider we report min, max, median, P95, P99, and average TTI, plus a **composite score** (0–100) that combines weighted timing metrics with success rate. Providers must be both fast *and* reliable to score well. FS mode uses the same reliability-weighted score approach across read/write and file-op metrics. ### Composite Score diff --git a/package.json b/package.json index 5fcd889..d48681e 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,9 @@ "bench:storage:4mb": "tsx src/run.ts --mode storage --file-size 4MB", "bench:storage:10mb": "tsx src/run.ts --mode storage --file-size 10MB", "bench:storage:16mb": "tsx src/run.ts --mode storage --file-size 16MB", + "bench:fs": "tsx src/run.ts --mode fs", + "bench:fs:64mb": "tsx src/run.ts --mode fs --fs-file-size-mb 64", + "bench:fs:e2b": "tsx src/run.ts --mode fs --provider e2b", "update-readme": "tsx src/update-readme.ts", "generate-svg": "tsx src/sandbox/generate-svg.ts", "generate-svg:sequential": "tsx src/sandbox/generate-svg.ts --mode sequential", diff --git a/src/fs/benchmark.ts b/src/fs/benchmark.ts new file mode 100644 index 0000000..b10e741 --- /dev/null +++ b/src/fs/benchmark.ts @@ -0,0 +1,282 @@ +import fs from 'fs'; +import type { FsProviderConfig, FsBenchmarkResult, FsTimingResult } from './types.js'; +import { withTimeout } from '../util/timeout.js'; + +function round(n: number): number { + return Math.round(n * 100) / 100; +} + +function percentile(sorted: number[], p: number): number { + const idx = Math.ceil((p / 100) * sorted.length) - 1; + return sorted[Math.min(idx, sorted.length - 1)]; +} + +function computeStats(values: number[]): { median: number; p95: number; p99: number } { + if (values.length === 0) return { median: 0, p95: 0, p99: 0 }; + + const sorted = [...values].sort((a, b) => a - b); + const trimCount = Math.floor(sorted.length * 0.05); + const trimmed = trimCount > 0 && sorted.length - 2 * trimCount > 0 + ? sorted.slice(trimCount, sorted.length - trimCount) + : sorted; + + const mid = Math.floor(trimmed.length / 2); + const median = trimmed.length % 2 === 0 + ? (trimmed[mid - 1] + trimmed[mid]) / 2 + : trimmed[mid]; + + return { + median, + p95: percentile(trimmed, 95), + p99: percentile(trimmed, 99), + }; +} + +function parseMetrics(stdout: string): Omit { + const lastLine = stdout + .trim() + .split('\n') + .map((line) => line.trim()) + .filter(Boolean) + .pop(); + + if (!lastLine) { + throw new Error('FS benchmark command produced empty output'); + } + + let parsed: any; + try { + parsed = JSON.parse(lastLine); + } catch { + throw new Error(`Unable to parse FS benchmark output: ${lastLine.slice(0, 200)}`); + } + + return { + writeMs: Number(parsed.writeMs || 0), + readMs: Number(parsed.readMs || 0), + smallFileOpsMs: Number(parsed.smallFileOpsMs || 0), + metadataOpsMs: Number(parsed.metadataOpsMs || 0), + writeMbps: Number(parsed.writeMbps || 0), + readMbps: Number(parsed.readMbps || 0), + fileSizeBytes: Number(parsed.fileSizeBytes || 0), + smallFilesCount: Number(parsed.smallFilesCount || 0), + }; +} + +function buildFsWorkloadCommand(fileSizeBytes: number, smallFilesCount: number): string { + const nodeScript = [ + 'const fsp=require("fs/promises");', + 'const path=require("path");', + 'const os=require("os");', + `const size=${fileSizeBytes};`, + `const count=${smallFilesCount};`, + 'const root=path.join(os.tmpdir(),`fs-bench-${Date.now()}-${Math.random().toString(36).slice(2)}`);', + 'const bigFile=path.join(root,"payload.bin");', + 'const smallDir=path.join(root,"small");', + 'const metaDir=path.join(root,"meta");', + 'const nowMs=()=>Number(process.hrtime.bigint())/1e6;', + '(async()=>{', + 'await fsp.mkdir(root,{recursive:true});', + 'const payload=Buffer.alloc(size,120);', + 'const writeStart=nowMs();', + 'await fsp.writeFile(bigFile,payload);', + 'const writeMs=nowMs()-writeStart;', + 'const readStart=nowMs();', + 'const buf=await fsp.readFile(bigFile);', + 'const readMs=nowMs()-readStart;', + 'if(buf.length!==size) throw new Error(`read size mismatch: ${buf.length} != ${size}`);', + 'await fsp.mkdir(smallDir,{recursive:true});', + 'const smallStart=nowMs();', + 'for(let i=0;i{try{await fsp.rm(root,{recursive:true,force:true});}catch{} console.error(err?.message||String(err)); process.exit(1);});', + ].join(''); + + return `node -e '${nodeScript}'`; +} + +async function runFsIteration( + compute: any, + timeout: number, + fileSizeBytes: number, + smallFilesCount: number, + sandboxOptions?: Record, + destroyTimeoutMs: number = 15000, +): Promise { + let sandbox: any = null; + + try { + sandbox = await withTimeout(compute.sandbox.create(sandboxOptions), timeout, 'Sandbox creation timed out'); + const command = buildFsWorkloadCommand(fileSizeBytes, smallFilesCount); + const result = await withTimeout( + sandbox.runCommand(command), + timeout, + 'FS benchmark workload timed out', + ) as { exitCode: number; stdout?: string; stderr?: string }; + + if (result.exitCode !== 0) { + throw new Error(result.stderr || `Command failed with exit code ${result.exitCode}`); + } + + return parseMetrics(result.stdout || ''); + } finally { + if (sandbox) { + let timer: ReturnType | undefined; + try { + await Promise.race([ + sandbox.destroy(), + new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error('Destroy timeout')), destroyTimeoutMs); + }), + ]); + } catch (err) { + console.warn(` [cleanup] destroy failed: ${err instanceof Error ? err.message : String(err)}`); + } finally { + if (timer) clearTimeout(timer); + } + } + } +} + +export async function runFsBenchmark( + config: FsProviderConfig, + fileSizeBytes: number, + smallFilesCount: number, +): Promise { + const { name, iterations = 50, timeout = 120000, requiredEnvVars, createCompute, sandboxOptions, destroyTimeoutMs } = config; + const missingVars = requiredEnvVars.filter((v) => !process.env[v]); + if (missingVars.length > 0) { + return { + provider: name, + mode: 'fs', + fileSizeBytes, + smallFilesCount, + iterations: [], + summary: { + writeMs: { median: 0, p95: 0, p99: 0 }, + readMs: { median: 0, p95: 0, p99: 0 }, + smallFileOpsMs: { median: 0, p95: 0, p99: 0 }, + metadataOpsMs: { median: 0, p95: 0, p99: 0 }, + writeMbps: { median: 0, p95: 0, p99: 0 }, + readMbps: { median: 0, p95: 0, p99: 0 }, + }, + skipped: true, + skipReason: `Missing: ${missingVars.join(', ')}`, + }; + } + + const compute = createCompute(); + const results: FsTimingResult[] = []; + + console.log(`\n--- FS Benchmarking: ${name} (${iterations} iterations) ---`); + for (let i = 0; i < iterations; i++) { + console.log(` Iteration ${i + 1}/${iterations}...`); + try { + const iteration = await runFsIteration(compute, timeout, fileSizeBytes, smallFilesCount, sandboxOptions, destroyTimeoutMs); + results.push(iteration); + console.log(` Read: ${(iteration.readMs / 1000).toFixed(2)}s, Write: ${(iteration.writeMs / 1000).toFixed(2)}s, Read Mbps: ${iteration.readMbps.toFixed(2)}`); + } catch (err) { + const error = err instanceof Error ? err.message : String(err); + console.log(` FAILED: ${error}`); + results.push({ + writeMs: 0, + readMs: 0, + smallFileOpsMs: 0, + metadataOpsMs: 0, + writeMbps: 0, + readMbps: 0, + fileSizeBytes, + smallFilesCount, + error, + }); + } + } + + const successful = results.filter((r) => !r.error); + return { + provider: name, + mode: 'fs', + fileSizeBytes, + smallFilesCount, + iterations: results, + summary: { + writeMs: computeStats(successful.map((r) => r.writeMs)), + readMs: computeStats(successful.map((r) => r.readMs)), + smallFileOpsMs: computeStats(successful.map((r) => r.smallFileOpsMs)), + metadataOpsMs: computeStats(successful.map((r) => r.metadataOpsMs)), + writeMbps: computeStats(successful.map((r) => r.writeMbps)), + readMbps: computeStats(successful.map((r) => r.readMbps)), + }, + }; +} + +function roundStats(s: { median: number; p95: number; p99: number }) { + return { median: round(s.median), p95: round(s.p95), p99: round(s.p99) }; +} + +export async function writeFsResultsJson(results: FsBenchmarkResult[], outPath: string): Promise { + const os = await import('os'); + + const cleanResults = results.map((r) => ({ + provider: r.provider, + mode: r.mode, + fileSizeBytes: r.fileSizeBytes, + smallFilesCount: r.smallFilesCount, + iterations: r.iterations.map((i) => ({ + writeMs: round(i.writeMs), + readMs: round(i.readMs), + smallFileOpsMs: round(i.smallFileOpsMs), + metadataOpsMs: round(i.metadataOpsMs), + writeMbps: round(i.writeMbps), + readMbps: round(i.readMbps), + fileSizeBytes: i.fileSizeBytes, + smallFilesCount: i.smallFilesCount, + ...(i.error ? { error: i.error } : {}), + })), + summary: { + writeMs: roundStats(r.summary.writeMs), + readMs: roundStats(r.summary.readMs), + smallFileOpsMs: roundStats(r.summary.smallFileOpsMs), + metadataOpsMs: roundStats(r.summary.metadataOpsMs), + writeMbps: roundStats(r.summary.writeMbps), + readMbps: roundStats(r.summary.readMbps), + }, + ...(r.compositeScore !== undefined ? { compositeScore: round(r.compositeScore) } : {}), + ...(r.successRate !== undefined ? { successRate: round(r.successRate) } : {}), + ...(r.skipped ? { skipped: r.skipped, skipReason: r.skipReason } : {}), + })); + + const output = { + version: '1.0', + timestamp: new Date().toISOString(), + environment: { + node: process.version, + platform: os.platform(), + arch: os.arch(), + }, + config: { + iterations: results[0]?.iterations.length || 0, + timeoutMs: 120000, + fileSizeBytes: results[0]?.fileSizeBytes || 0, + smallFilesCount: results[0]?.smallFilesCount || 0, + }, + results: cleanResults, + }; + + fs.writeFileSync(outPath, JSON.stringify(output, null, 2)); + console.log(`Results written to ${outPath}`); +} diff --git a/src/fs/providers.ts b/src/fs/providers.ts new file mode 100644 index 0000000..fc0bc68 --- /dev/null +++ b/src/fs/providers.ts @@ -0,0 +1,10 @@ +import { providers } from '../sandbox/providers.js'; +import type { FsProviderConfig } from './types.js'; + +export const fsProviders: FsProviderConfig[] = providers.map((p) => ({ + name: p.name, + requiredEnvVars: p.requiredEnvVars, + createCompute: p.createCompute, + sandboxOptions: p.sandboxOptions, + destroyTimeoutMs: p.destroyTimeoutMs, +})); diff --git a/src/fs/scoring.ts b/src/fs/scoring.ts new file mode 100644 index 0000000..f96452f --- /dev/null +++ b/src/fs/scoring.ts @@ -0,0 +1,60 @@ +import type { FsBenchmarkResult } from './types.js'; + +export interface FsScoringWeights { + readMedian: number; + readP95: number; + writeMedian: number; + writeP95: number; + smallFileOpsMedian: number; + metadataOpsMedian: number; +} + +export const DEFAULT_FS_WEIGHTS: FsScoringWeights = { + readMedian: 0.25, + readP95: 0.10, + writeMedian: 0.25, + writeP95: 0.10, + smallFileOpsMedian: 0.20, + metadataOpsMedian: 0.10, +}; + +const LATENCY_CEILING_MS = 30000; + +function scoreLatency(valueMs: number): number { + return Math.max(0, 100 * (1 - valueMs / LATENCY_CEILING_MS)); +} + +export function computeFsSuccessRate(result: FsBenchmarkResult): number { + if (result.skipped || result.iterations.length === 0) return 0; + const successful = result.iterations.filter((i) => !i.error).length; + return successful / result.iterations.length; +} + +function computeFsScore(result: FsBenchmarkResult, weights: FsScoringWeights = DEFAULT_FS_WEIGHTS): number { + return ( + weights.readMedian * scoreLatency(result.summary.readMs.median) + + weights.readP95 * scoreLatency(result.summary.readMs.p95) + + weights.writeMedian * scoreLatency(result.summary.writeMs.median) + + weights.writeP95 * scoreLatency(result.summary.writeMs.p95) + + weights.smallFileOpsMedian * scoreLatency(result.summary.smallFileOpsMs.median) + + weights.metadataOpsMedian * scoreLatency(result.summary.metadataOpsMs.median) + ); +} + +export function computeFsCompositeScores( + results: FsBenchmarkResult[], + weights: FsScoringWeights = DEFAULT_FS_WEIGHTS, +): void { + for (const result of results) { + const successRate = computeFsSuccessRate(result); + result.successRate = successRate; + + if (result.skipped || successRate === 0) { + result.compositeScore = 0; + continue; + } + + const fsScore = computeFsScore(result, weights); + result.compositeScore = Math.round(fsScore * successRate * 100) / 100; + } +} diff --git a/src/fs/types.ts b/src/fs/types.ts new file mode 100644 index 0000000..eb09ce7 --- /dev/null +++ b/src/fs/types.ts @@ -0,0 +1,50 @@ +export interface FsProviderConfig { + /** Provider name */ + name: string; + /** Number of iterations (default: 50) */ + iterations?: number; + /** Timeout per iteration in ms (default: 120000) */ + timeout?: number; + /** Environment variables that must all be set to run this benchmark */ + requiredEnvVars: string[]; + /** Creates a compute instance */ + createCompute: () => any; + /** Options passed to sandbox.create() */ + sandboxOptions?: Record; + /** Timeout for sandbox.destroy() in ms */ + destroyTimeoutMs?: number; +} + +export interface FsTimingResult { + writeMs: number; + readMs: number; + smallFileOpsMs: number; + metadataOpsMs: number; + writeMbps: number; + readMbps: number; + fileSizeBytes: number; + smallFilesCount: number; + error?: string; +} + +export interface FsStats { + writeMs: { median: number; p95: number; p99: number }; + readMs: { median: number; p95: number; p99: number }; + smallFileOpsMs: { median: number; p95: number; p99: number }; + metadataOpsMs: { median: number; p95: number; p99: number }; + writeMbps: { median: number; p95: number; p99: number }; + readMbps: { median: number; p95: number; p99: number }; +} + +export interface FsBenchmarkResult { + provider: string; + mode: 'fs'; + fileSizeBytes: number; + smallFilesCount: number; + iterations: FsTimingResult[]; + summary: FsStats; + compositeScore?: number; + successRate?: number; + skipped?: boolean; + skipReason?: string; +} diff --git a/src/merge-results.ts b/src/merge-results.ts index a75d651..d2b0598 100644 --- a/src/merge-results.ts +++ b/src/merge-results.ts @@ -1,7 +1,7 @@ /** * Merge per-provider benchmark results into combined result files. * - * Usage: tsx src/merge-results.ts --input [--mode storage|browser] + * Usage: tsx src/merge-results.ts --input [--mode storage|browser|fs] * * By default, merges sandbox benchmark results: reads latest.json files from * the input directory, groups by mode (sequential/staggered/burst), computes @@ -14,6 +14,10 @@ * With --mode browser, merges browser benchmark results: deduplicates by * provider, computes browser-specific composite scores, and writes combined * files to results/browser/latest.json. + * + * With --mode fs, merges filesystem benchmark results: deduplicates by + * provider, computes fs-specific composite scores, and writes combined files + * to results/fs/latest.json. */ import fs from 'fs'; import path from 'path'; @@ -21,10 +25,12 @@ import { fileURLToPath } from 'url'; import { computeCompositeScores } from './sandbox/scoring.js'; import { computeStorageCompositeScores, sortStorageByCompositeScore } from './storage/scoring.js'; import { computeBrowserCompositeScores, sortBrowserByCompositeScore } from './browser/scoring.js'; +import { computeFsCompositeScores } from './fs/scoring.js'; import { printResultsTable, writeResultsJson } from './sandbox/table.js'; import type { BenchmarkResult } from './sandbox/types.js'; import type { StorageBenchmarkResult } from './storage/types.js'; import type { BrowserBenchmarkResult } from './browser/types.js'; +import type { FsBenchmarkResult } from './fs/types.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ROOT = path.resolve(__dirname, '..'); @@ -38,7 +44,7 @@ function getArgValue(flag: string): string | undefined { const inputDir = getArgValue('--input'); const mergeMode = getArgValue('--mode'); if (!inputDir) { - console.error('Usage: tsx src/merge-results.ts --input [--mode storage|browser]'); + console.error('Usage: tsx src/merge-results.ts --input [--mode storage|browser|fs]'); process.exit(1); } @@ -366,7 +372,96 @@ async function mainBrowser() { console.log(`Copied latest: ${latestPath}`); } -const runner = mergeMode === 'storage' ? mainStorage : mergeMode === 'browser' ? mainBrowser : main; +async function mainFs() { + const jsonFiles: string[] = []; + function walk(dir: string) { + if (!fs.existsSync(dir)) return; + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) walk(full); + else if (entry.name === 'latest.json') jsonFiles.push(full); + } + } + walk(inputDir!); + + if (jsonFiles.length === 0) { + console.error(`No latest.json files found in ${inputDir}`); + process.exit(1); + } + + console.log(`Found ${jsonFiles.length} result files`); + + const seen = new Map(); + + for (const file of jsonFiles) { + const raw = JSON.parse(fs.readFileSync(file, 'utf-8')) as { results: FsBenchmarkResult[] }; + const fromSingleProvider = raw.results.length === 1; + for (const result of raw.results) { + const existing = seen.get(result.provider); + if (!existing || (fromSingleProvider && !existing.fromSingleProvider)) { + seen.set(result.provider, { result, fromSingleProvider }); + } + } + } + + const deduped = Array.from(seen.values()).map((e) => e.result); + console.log(`\nMerging ${deduped.length} provider results for mode: fs`); + + computeFsCompositeScores(deduped); + + console.log(`\n${'='.repeat(110)}`); + console.log(' FILESYSTEM BENCHMARK RESULTS'); + console.log('='.repeat(110)); + console.log( + ['Provider', 'Score', 'Read', 'Write', 'Small Files', 'Metadata', 'Status'] + .map((h, i) => h.padEnd([14, 8, 12, 12, 14, 12, 10][i])) + .join(' | ') + ); + console.log([14, 8, 12, 12, 14, 12, 10].map((w) => '-'.repeat(w)).join('-+-')); + + const sorted = [...deduped].sort((a, b) => { + if (a.skipped && !b.skipped) return 1; + if (!a.skipped && b.skipped) return -1; + if (a.skipped && b.skipped) return 0; + return (b.compositeScore ?? 0) - (a.compositeScore ?? 0); + }); + + for (const r of sorted) { + if (r.skipped) { + console.log([r.provider.padEnd(14), '--'.padEnd(8), '--'.padEnd(12), '--'.padEnd(12), '--'.padEnd(14), '--'.padEnd(12), 'SKIPPED'.padEnd(10)].join(' | ')); + continue; + } + const ok = r.iterations.filter((i) => !i.error).length; + const total = r.iterations.length; + const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--'; + const read = (r.summary.readMs.median / 1000).toFixed(2) + 's'; + const write = (r.summary.writeMs.median / 1000).toFixed(2) + 's'; + const small = (r.summary.smallFileOpsMs.median / 1000).toFixed(2) + 's'; + const meta = (r.summary.metadataOpsMs.median / 1000).toFixed(2) + 's'; + console.log([r.provider.padEnd(14), score.padEnd(8), read.padEnd(12), write.padEnd(12), small.padEnd(14), meta.padEnd(12), `${ok}/${total} OK`.padEnd(10)].join(' | ')); + } + console.log('='.repeat(110)); + + const { writeFsResultsJson } = await import('./fs/benchmark.js'); + const timestamp = new Date().toISOString().slice(0, 10); + const resultsDir = path.resolve(ROOT, 'results/fs'); + fs.mkdirSync(resultsDir, { recursive: true }); + + const outPath = path.join(resultsDir, `${timestamp}.json`); + await writeFsResultsJson(deduped, outPath); + + const latestPath = path.join(resultsDir, 'latest.json'); + fs.copyFileSync(outPath, latestPath); + console.log(`Copied latest: ${latestPath}`); +} + +const runner = mergeMode === 'storage' + ? mainStorage + : mergeMode === 'browser' + ? mainBrowser + : mergeMode === 'fs' + ? mainFs + : main; runner().catch(err => { console.error('Merge failed:', err); process.exit(1); diff --git a/src/run.ts b/src/run.ts index fdd5962..e13db62 100644 --- a/src/run.ts +++ b/src/run.ts @@ -9,16 +9,20 @@ import { runConcurrentBenchmark } from './sandbox/concurrent.js'; import { runStaggeredBenchmark } from './sandbox/staggered.js'; import { runStorageBenchmark, writeStorageResultsJson } from './storage/benchmark.js'; import { runBrowserBenchmark, writeBrowserResultsJson } from './browser/benchmark.js'; +import { runFsBenchmark, writeFsResultsJson } from './fs/benchmark.js'; import { printResultsTable, writeResultsJson } from './sandbox/table.js'; import { providers } from './sandbox/providers.js'; import { storageProviders } from './storage/providers.js'; import { browserProviders } from './browser/providers.js'; +import { fsProviders } from './fs/providers.js'; import { computeCompositeScores } from './sandbox/scoring.js'; import { computeStorageCompositeScores } from './storage/scoring.js'; import { computeBrowserCompositeScores } from './browser/scoring.js'; +import { computeFsCompositeScores } from './fs/scoring.js'; import type { BenchmarkResult, BenchmarkMode } from './sandbox/types.js'; import type { StorageBenchmarkResult } from './storage/types.js'; import type { BrowserBenchmarkResult } from './browser/types.js'; +import type { FsBenchmarkResult } from './fs/types.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -31,6 +35,8 @@ const concurrency = parseInt(getArgValue(args, '--concurrency') || '100', 10); const storageConcurrency = parseInt(getArgValue(args, '--storage-concurrency') || '1', 10); const staggerDelay = parseInt(getArgValue(args, '--stagger-delay') || '200', 10); const fileSizeArg = getArgValue(args, '--file-size') || '10MB'; +const fsFileSizeMb = parseInt(getArgValue(args, '--fs-file-size-mb') || '64', 10); +const fsSmallFiles = parseInt(getArgValue(args, '--fs-small-files') || '1000', 10); function getArgValue(args: string[], flag: string): string | undefined { const idx = args.indexOf(flag); @@ -38,26 +44,69 @@ function getArgValue(args: string[], flag: string): string | undefined { } /** Resolve which modes to run */ -function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] { +function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] | ['fs'] { if (!rawMode) return ['sequential', 'staggered', 'burst']; if (rawMode === 'storage') return ['storage']; if (rawMode === 'browser') return ['browser']; + if (rawMode === 'fs') return ['fs']; const m = rawMode === 'concurrent' ? 'burst' : rawMode as BenchmarkMode; return [m]; } /** Map mode to results subdirectory name */ -function modeToDir(m: BenchmarkMode | 'storage'): string { +function modeToDir(m: BenchmarkMode | 'storage' | 'fs'): string { switch (m) { case 'sequential': return 'sequential_tti'; case 'staggered': return 'staggered_tti'; case 'burst': case 'concurrent': return 'burst_tti'; case 'storage': return 'storage'; + case 'fs': return 'fs'; default: return `${m}_tti`; } } +async function runFs(toRun: typeof fsProviders, fileSizeBytes: number, smallFilesCount: number): Promise { + console.log('\n' + '='.repeat(70)); + console.log(' MODE: FS'); + console.log(` Iterations per provider: ${iterations}`); + console.log(` Large file size: ${Math.floor(fileSizeBytes / 1024 / 1024)}MB`); + console.log(` Small files count: ${smallFilesCount}`); + console.log('='.repeat(70)); + + const results: FsBenchmarkResult[] = []; + for (const providerConfig of toRun) { + const result = await runFsBenchmark({ ...providerConfig, iterations }, fileSizeBytes, smallFilesCount); + results.push(result); + } + + computeFsCompositeScores(results); + + console.log('\n--- FS Benchmark Results ---'); + for (const r of results) { + if (r.skipped) { + console.log(`${r.provider}: SKIPPED (${r.skipReason})`); + continue; + } + const ok = r.iterations.filter(i => !i.error).length; + const total = r.iterations.length; + console.log(`${r.provider}:`); + console.log(` Read: ${(r.summary.readMs.median / 1000).toFixed(2)}s, Write: ${(r.summary.writeMs.median / 1000).toFixed(2)}s, Small-files: ${(r.summary.smallFileOpsMs.median / 1000).toFixed(2)}s`); + console.log(` Score: ${r.compositeScore?.toFixed(1) || '--'} (${ok}/${total} OK)`); + } + + const timestamp = new Date().toISOString().slice(0, 10); + const resultsDir = path.resolve(__dirname, `../results/${modeToDir('fs')}`); + fs.mkdirSync(resultsDir, { recursive: true }); + + const outPath = path.join(resultsDir, `${timestamp}.json`); + await writeFsResultsJson(results, outPath); + + const latestPath = path.join(resultsDir, 'latest.json'); + fs.copyFileSync(outPath, latestPath); + console.log(`Copied latest: ${latestPath}`); +} + async function runMode(mode: BenchmarkMode, toRun: typeof providers): Promise { console.log('\n' + '='.repeat(70)); console.log(` MODE: ${mode.toUpperCase()}`); @@ -246,6 +295,27 @@ async function main() { return; } + if (modes[0] === 'fs') { + console.log('ComputeSDK Filesystem Benchmarks'); + console.log(`Date: ${new Date().toISOString()}`); + console.log(`File size: ${fsFileSizeMb}MB`); + console.log(`Small files: ${fsSmallFiles}\n`); + + const toRun = providerFilter + ? fsProviders.filter(p => p.name === providerFilter) + : fsProviders; + + if (toRun.length === 0) { + console.error(`Unknown fs provider: ${providerFilter}`); + console.error(`Available: ${fsProviders.map(p => p.name).join(', ')}`); + process.exit(1); + } + + await runFs(toRun, fsFileSizeMb * 1024 * 1024, fsSmallFiles); + console.log('\nAll fs tests complete.'); + return; + } + // Handle storage mode separately if (modes[0] === 'storage') { console.log('ComputeSDK Storage Provider Benchmarks'); diff --git a/src/sandbox/benchmark.ts b/src/sandbox/benchmark.ts index fcb73bf..dcb3999 100644 --- a/src/sandbox/benchmark.ts +++ b/src/sandbox/benchmark.ts @@ -19,6 +19,7 @@ export async function runBenchmark(config: ProviderConfig): Promise(); console.log(`\n--- Benchmarking: ${name} (${iterations} iterations) ---`); @@ -26,7 +27,13 @@ export async function runBenchmark(config: ProviderConfig): Promise, destroyTimeoutMs: number = 15_000): Promise { +function getSandboxFingerprint(sandbox: any): string | null { + const candidateKeys = ['id', 'sandboxId', 'containerId', 'instanceId']; + for (const key of candidateKeys) { + const value = sandbox?.[key]; + if (typeof value === 'string' && value.trim()) { + return `sandbox:${value}`; + } + } + + return null; +} + +export async function runIteration( + compute: any, + timeout: number, + sandboxOptions?: Record, + destroyTimeoutMs: number = 15_000, + seenSandboxFingerprints?: Set, +): Promise { let sandbox: any = null; try { @@ -57,6 +82,27 @@ export async function runIteration(compute: any, timeout: number, sandboxOptions sandbox = await withTimeout(compute.sandbox.create(sandboxOptions), timeout, 'Sandbox creation timed out'); + const identityResult = await withTimeout( + sandbox.runCommand("sh -lc 'echo -n $(hostname)'"), + 30_000, + 'Sandbox identity check timed out' + ) as { exitCode: number; stdout?: string; stderr?: string }; + + if (identityResult.exitCode !== 0) { + throw new Error(`Sandbox identity check failed with exit code ${identityResult.exitCode}: ${identityResult.stderr || 'Unknown error'}`); + } + + const runtimeIdentity = (identityResult.stdout || '').trim(); + const sandboxFingerprint = getSandboxFingerprint(sandbox); + const fingerprint = sandboxFingerprint || (runtimeIdentity ? `runtime:${runtimeIdentity}` : null); + + if (seenSandboxFingerprints && fingerprint) { + if (seenSandboxFingerprints.has(fingerprint)) { + throw new Error('Sandbox/container reuse detected across benchmark iterations'); + } + seenSandboxFingerprints.add(fingerprint); + } + const result = await withTimeout( sandbox.runCommand('node -v'), 30_000, @@ -88,4 +134,3 @@ export async function runIteration(compute: any, timeout: number, sandboxOptions } } } - diff --git a/src/sandbox/concurrent.ts b/src/sandbox/concurrent.ts index 16b2650..0b19d34 100644 --- a/src/sandbox/concurrent.ts +++ b/src/sandbox/concurrent.ts @@ -26,14 +26,14 @@ export async function runConcurrentBenchmark(config: ConcurrentConfig): Promise< } const compute = config.createCompute(); - console.log(`\n--- Concurrent Benchmark: ${name} (${concurrency} sandboxes) ---`); const wallStart = performance.now(); + const seenSandboxFingerprints = new Set(); // Fire all sandbox creations simultaneously — no awaiting between launches const promises = Array.from({ length: concurrency }, (_, i) => - runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs) + runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs, seenSandboxFingerprints) .then(result => { console.log(` Sandbox ${i + 1}/${concurrency}: TTI ${(result.ttiMs / 1000).toFixed(2)}s`); return result; diff --git a/src/sandbox/staggered.ts b/src/sandbox/staggered.ts index 78e63af..26ee054 100644 --- a/src/sandbox/staggered.ts +++ b/src/sandbox/staggered.ts @@ -29,17 +29,17 @@ export async function runStaggeredBenchmark(config: StaggeredConfig): Promise(); const promises: Promise[] = []; const rampProfile: { launchedAt: number; readyAt: number; ttiMs: number }[] = []; for (let i = 0; i < concurrency; i++) { const launchedAt = performance.now() - wallStart; - const p = runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs) + const p = runIteration(compute, timeout, sandboxOptions, destroyTimeoutMs, seenSandboxFingerprints) .then(result => { const readyAt = performance.now() - wallStart; rampProfile.push({ launchedAt, readyAt, ttiMs: result.ttiMs });