From 4ec33ebaecdbe64ca218c38e938dc4cbf1060f92 Mon Sep 17 00:00:00 2001 From: pedrofrxncx Date: Fri, 24 Apr 2026 03:07:21 -0300 Subject: [PATCH] refactor(sandbox): move freestyle runner and docker helpers into mesh-plugin-user-sandbox Freestyle runner (daemon-script, runner, test) migrated from apps/mesh/src/sandbox/ to packages/mesh-plugin-user-sandbox/server/runner/freestyle/ so the plugin owns all sandbox runner implementations; apps/mesh imports it via the new ./runner/freestyle subpath export. Docker runner reorganized into its own directory: docker.ts -> docker/runner.ts, plus local-ingress and sweep moved alongside. Shared helpers (dev-server, handle, inflight, lock, preview-url) extracted into a shared/ directory so freestyle and docker runners stop duplicating them. RunnerKind type moved from runner/index.ts to runner/types.ts (single source of truth); plugin package.json gains ./runner/freestyle subpath export and migrates freestyle SDKs to optionalDependencies since docker-only deploys don't need them. No behavior change. --- apps/mesh/src/sandbox/lifecycle.ts | 4 +- .../mesh-plugin-user-sandbox/package.json | 9 +- .../server/runner/docker.ts | 677 ------------------ .../server/runner/docker/index.ts | 12 + .../runner/{ => docker}/local-ingress.test.ts | 2 +- .../runner/{ => docker}/local-ingress.ts | 2 +- .../{docker.test.ts => docker/runner.test.ts} | 8 +- .../server/runner/docker/runner.ts | 597 +++++++++++++++ .../server/runner/{ => docker}/sweep.ts | 2 +- .../server/runner/freestyle/daemon-script.ts | 0 .../server/runner/freestyle/index.ts | 2 + .../server/runner/freestyle/runner.test.ts | 2 +- .../server/runner/freestyle/runner.ts | 411 ++++++----- .../server/runner/index.ts | 20 +- .../server/runner/shared/dev-server.ts | 57 ++ .../server/runner/shared/handle.ts | 10 + .../server/runner/shared/index.ts | 5 + .../server/runner/shared/inflight.ts | 20 + .../server/runner/shared/lock.ts | 22 + .../server/runner/shared/preview-url.ts | 17 + .../server/runner/types.ts | 12 +- 21 files changed, 979 insertions(+), 912 deletions(-) delete mode 100644 packages/mesh-plugin-user-sandbox/server/runner/docker.ts create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/docker/index.ts rename packages/mesh-plugin-user-sandbox/server/runner/{ => docker}/local-ingress.test.ts (99%) rename packages/mesh-plugin-user-sandbox/server/runner/{ => docker}/local-ingress.ts (99%) rename packages/mesh-plugin-user-sandbox/server/runner/{docker.test.ts => docker/runner.test.ts} (98%) create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/docker/runner.ts rename packages/mesh-plugin-user-sandbox/server/runner/{ => docker}/sweep.ts (99%) rename apps/mesh/src/sandbox/freestyle-daemon-script.ts => packages/mesh-plugin-user-sandbox/server/runner/freestyle/daemon-script.ts (100%) create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/freestyle/index.ts rename apps/mesh/src/sandbox/freestyle-runner.test.ts => packages/mesh-plugin-user-sandbox/server/runner/freestyle/runner.test.ts (97%) rename apps/mesh/src/sandbox/freestyle-runner.ts => packages/mesh-plugin-user-sandbox/server/runner/freestyle/runner.ts (71%) create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/shared/dev-server.ts create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/shared/handle.ts create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/shared/index.ts create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/shared/inflight.ts create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/shared/lock.ts create mode 100644 packages/mesh-plugin-user-sandbox/server/runner/shared/preview-url.ts diff --git a/apps/mesh/src/sandbox/lifecycle.ts b/apps/mesh/src/sandbox/lifecycle.ts index 2e70103f16..95ebaf65d2 100644 --- a/apps/mesh/src/sandbox/lifecycle.ts +++ b/apps/mesh/src/sandbox/lifecycle.ts @@ -29,7 +29,9 @@ async function instantiate( case "freestyle": { // Dynamic import — freestyle SDK is an optionalDependency so // docker-only deploys don't need it installed. - const { FreestyleSandboxRunner } = await import("./freestyle-runner"); + const { FreestyleSandboxRunner } = await import( + "mesh-plugin-user-sandbox/runner/freestyle" + ); return new FreestyleSandboxRunner({ stateStore }); } default: { diff --git a/packages/mesh-plugin-user-sandbox/package.json b/packages/mesh-plugin-user-sandbox/package.json index 4d18a6c113..81d40c249e 100644 --- a/packages/mesh-plugin-user-sandbox/package.json +++ b/packages/mesh-plugin-user-sandbox/package.json @@ -9,7 +9,14 @@ }, "exports": { "./shared": "./shared.ts", - "./runner": "./server/runner/index.ts" + "./runner": "./server/runner/index.ts", + "./runner/freestyle": "./server/runner/freestyle/index.ts" + }, + "optionalDependencies": { + "@freestyle-sh/with-bun": "^0.2.12", + "@freestyle-sh/with-deno": "^0.0.4", + "@freestyle-sh/with-nodejs": "^0.2.9", + "freestyle-sandboxes": "^0.1.46" }, "devDependencies": { "@types/bun": "latest", diff --git a/packages/mesh-plugin-user-sandbox/server/runner/docker.ts b/packages/mesh-plugin-user-sandbox/server/runner/docker.ts deleted file mode 100644 index 8c060cbb5e..0000000000 --- a/packages/mesh-plugin-user-sandbox/server/runner/docker.ts +++ /dev/null @@ -1,677 +0,0 @@ -import { createHash, randomBytes } from "node:crypto"; -import { DAEMON_PORT, DEFAULT_IMAGE, sleep } from "../../shared"; -import { - bootstrapRepo, - daemonBash, - probeDaemonHealth, - proxyDaemonRequest as proxyDaemonRequestClient, - waitForDaemonReady, -} from "../daemon-client"; -import { - DEFAULT_WORKDIR, - dockerExec, - startContainer, - type DockerResult, -} from "../docker-cli"; -import { ensureSandboxImage } from "../image-build"; -import type { RunnerStateStore, RunnerStateStoreOps } from "./state-store"; -import type { - EnsureOptions, - ExecInput, - ExecOutput, - ProxyRequestInit, - Sandbox, - SandboxId, - SandboxRunner, - Workload, -} from "./types"; -import { sandboxIdKey } from "./types"; - -const RUNNER_KIND = "docker" as const; -const LABEL_ROOT = "mesh-sandbox"; -const LABEL_ID = "mesh-sandbox.id"; -const PORT_READBACK_ATTEMPTS = 15; -const PORT_READBACK_INTERVAL_MS = 200; - -export type ExecResult = DockerResult; - -export interface DockerExec { - (args: string[]): Promise; -} - -export interface DockerRunnerOptions { - image?: string; - exec?: DockerExec; - /** Ownership label; override per mesh instance when multiple share one host. */ - labelPrefix?: string; - /** Persistent store consulted before docker discovery; its PK resolves cross-pod races. */ - stateStore?: RunnerStateStore; - /** - * Preview URL template. Resolution order: this → `SANDBOX_ROOT_URL` - * (substitutes `{handle}` or hostname-prefixes) → `http://{handle}.sandboxes.localhost:/`. - */ - previewUrlPattern?: string; -} - -// 32 hex (128 bits) keeps it within DNS's 63-char label cap (RFC 1035) while -// still cryptographically secret; Docker accepts any prefix ≥12 chars. -const HANDLE_LEN = 32; -const toHandle = (rawId: string): string => rawId.slice(0, HANDLE_LEN); - -const DEFAULT_DEV_PORT = 3000; - -/** Private per-handle record. Never escapes the runner. */ -interface DockerRecord { - handle: string; - daemonUrl: string; - token: string; - workdir: string; - id: SandboxId; - /** Host-side port → container dev port. */ - devPort: number; - /** Container-internal dev port (default 3000). */ - devContainerPort: number; - /** Host-side port → container daemon port. */ - daemonPort: number; - /** True once bootstrap has been attempted (success or skipped); prevents retry. */ - repoAttached: boolean; - /** Last-started workload; persisted so mesh restart resumes the same config. */ - workload: Workload | null; -} - -interface PersistedDockerState { - token: string; - workdir: string; - daemonUrl: string; - devPort?: number; - devContainerPort?: number; - daemonPort?: number; - repoAttached?: boolean; - workload?: Workload | null; - [k: string]: unknown; -} - -export class DockerSandboxRunner implements SandboxRunner { - readonly kind = RUNNER_KIND; - private readonly defaultImage: string; - private readonly exec_: DockerExec; - private readonly labelPrefix: string; - private readonly stateStore: RunnerStateStore | null; - private readonly previewUrlPattern: string | null; - private readonly byHandle = new Map(); - private readonly inflight = new Map>(); - - constructor(opts: DockerRunnerOptions = {}) { - this.defaultImage = - opts.image ?? process.env.MESH_SANDBOX_IMAGE ?? DEFAULT_IMAGE; - this.exec_ = opts.exec ?? dockerExec; - this.labelPrefix = opts.labelPrefix ?? LABEL_ROOT; - this.stateStore = opts.stateStore ?? null; - this.previewUrlPattern = opts.previewUrlPattern ?? null; - } - - async ensure(id: SandboxId, opts: EnsureOptions = {}): Promise { - const labelId = hashId(id); - const pending = this.inflight.get(labelId); - if (pending) return pending; - // In-process dedupe + state-store `withLock` (cross-pod). Without withLock - // we're single-pod-safe only — prod MUST ship a store that implements it. - // The scoped store passed to the callback reuses the lock's connection; - // without that, nested stateStore calls race the main pool and can - // deadlock at `databasePoolMax` concurrent provisionings. - const p = - this.stateStore && this.stateStore.withLock - ? this.stateStore.withLock(id, RUNNER_KIND, (scoped) => - this.ensureInner(id, labelId, opts, scoped), - ) - : this.ensureInner(id, labelId, opts, this.stateStore); - this.inflight.set(labelId, p); - try { - return await p; - } finally { - this.inflight.delete(labelId); - } - } - - async exec(handle: string, input: ExecInput): Promise { - const rec = await this.requireRecord(handle); - return daemonBash(rec.daemonUrl, rec.token, input); - } - - async delete(handle: string): Promise { - const rec = await this.lookupRecord(handle); - this.byHandle.delete(handle); - // Best-effort graceful dev-stop before the forcible container teardown; - // log (don't swallow) so daemon outages surface in ops. - if (rec) { - await proxyDaemonRequestClient( - rec.daemonUrl, - rec.token, - "/_daemon/dev/stop", - { method: "POST", headers: new Headers(), body: null }, - ).catch((err) => - console.warn( - `[DockerSandboxRunner] graceful dev-stop failed for ${handle}:`, - err instanceof Error ? err.message : String(err), - ), - ); - } - await this.stopContainer(handle); - if (this.stateStore) { - if (rec) await this.stateStore.delete(rec.id, RUNNER_KIND); - else await this.stateStore.deleteByHandle(RUNNER_KIND, handle); - } - } - - async alive(handle: string): Promise { - const r = await this.exec_([ - "inspect", - "--format", - "{{.State.Running}}", - handle, - ]); - return r.code === 0 && r.stdout.trim() === "true"; - } - - async sweepOrphans(): Promise { - const r = await this.exec_([ - "ps", - "-aq", - "--filter", - `label=${this.labelPrefix}=1`, - ]); - if (r.code !== 0) return 0; - const ids = r.stdout.trim().split("\n").filter(Boolean); - await Promise.all( - ids.map(async (id) => { - await this.stopContainer(id).catch((err) => - console.warn( - `[DockerSandboxRunner] sweep: stopContainer(${id}) failed:`, - err instanceof Error ? err.message : String(err), - ), - ); - if (this.stateStore) { - await this.stateStore - .deleteByHandle(RUNNER_KIND, id) - .catch((err) => - console.warn( - `[DockerSandboxRunner] sweep: state-store deleteByHandle(${id}) failed:`, - err instanceof Error ? err.message : String(err), - ), - ); - } - }), - ); - return ids.length; - } - - async getPreviewUrl(handle: string): Promise { - const rec = await this.lookupRecord(handle); - if (!rec) return null; - return this.composePreviewUrl(handle); - } - - /** Passthrough to `/_daemon/*`; bearer token stays inside the class, body streams. */ - async proxyDaemonRequest( - handle: string, - path: string, - init: ProxyRequestInit, - ): Promise { - const rec = await this.lookupRecord(handle); - if (!rec) { - return new Response(JSON.stringify({ error: "sandbox not found" }), { - status: 404, - headers: { "content-type": "application/json" }, - }); - } - return proxyDaemonRequestClient(rec.daemonUrl, rec.token, path, init); - } - - /** Docker-only: host port → dev server. Used by local ingress; not on `SandboxRunner`. */ - async resolveDevPort(handle: string): Promise { - const rec = await this.lookupRecord(handle); - return rec?.devPort ?? null; - } - - /** Docker-only: host port → daemon. Used by local ingress; not on `SandboxRunner`. */ - async resolveDaemonPort(handle: string): Promise { - const rec = await this.lookupRecord(handle); - return rec?.daemonPort ?? null; - } - - private async ensureInner( - id: SandboxId, - labelId: string, - opts: EnsureOptions, - store: RunnerStateStoreOps | null, - ): Promise { - // 1. State store → survives mesh restart + cross-process race. - if (store) { - const persisted = await store.get(id, RUNNER_KIND); - if (persisted) { - const probed = await this.probePersisted(id, persisted); - if (probed) { - this.byHandle.set(probed.handle, probed); - await this.attachRepoIfNeeded(id, probed, opts, store); - await this.startDevServer(probed, opts); - return this.toSandbox(probed); - } - // Stale row — purge, fall through to discovery. - await store.delete(id, RUNNER_KIND); - } - } - - // 2. Docker discovery → recovers when state store is empty but container lives. - const existingHandle = await this.findExisting(labelId); - if (existingHandle) { - const tracked = this.byHandle.get(existingHandle); - if (tracked) { - await this.attachRepoIfNeeded(id, tracked, opts, store); - await this.startDevServer(tracked, opts); - return this.toSandbox(tracked); - } - const recovered = await this.recoverSandbox(id, existingHandle, opts); - if (recovered) { - this.byHandle.set(existingHandle, recovered); - await this.persist(id, recovered, store); - await this.attachRepoIfNeeded(id, recovered, opts, store); - await this.startDevServer(recovered, opts); - return this.toSandbox(recovered); - } - await this.stopContainer(existingHandle); - } - - // 3. Fresh provision; persist AFTER clone so pollers only see a populated workdir. - const rec = await this.provision(id, labelId, opts); - this.byHandle.set(rec.handle, rec); - try { - await this.attachRepoIfNeeded(id, rec, opts, store); - } catch (err) { - this.byHandle.delete(rec.handle); - await this.stopContainer(rec.handle).catch((stopErr) => { - const attachMsg = err instanceof Error ? err.message : String(err); - const stopMsg = - stopErr instanceof Error ? stopErr.message : String(stopErr); - console.warn( - `[DockerSandboxRunner] orphaned container after attach failure handle=${rec.handle} attachErr="${attachMsg}" stopErr="${stopMsg}"`, - ); - }); - throw err; - } - await this.persist(id, rec, store); - await this.startDevServer(rec, opts); - return this.toSandbox(rec); - } - - private toSandbox(rec: DockerRecord): Sandbox { - return { - handle: rec.handle, - workdir: rec.workdir, - // Docker's preview URL is derived purely from the handle via local - // ingress — the dev server may boot from workload hint OR from the - // daemon auto-sniffing package.json/deno.json, so gating on - // `rec.workload` (which is only set when the caller passed metadata - // hints) would nullify the URL for repos where detection happens on - // the daemon side. Matches Freestyle's unconditional URL. - previewUrl: this.composePreviewUrl(rec.handle), - }; - } - - /** No-op if no repo or already attached. Mutates `rec.repoAttached` and persists. */ - private async attachRepoIfNeeded( - id: SandboxId, - rec: DockerRecord, - opts: EnsureOptions, - store: RunnerStateStoreOps | null, - ): Promise { - if (!opts.repo || rec.repoAttached) return; - await bootstrapRepo(rec.daemonUrl, rec.token, rec.workdir, opts.repo); - rec.repoAttached = true; - await this.persist(id, rec, store); - } - - /** - * Fire-and-forget `/dev/start` (idempotent on daemon); VM_START returns fast. - * Fires unconditionally — when no workload hint is available the daemon - * sniffs runtime/script from the workdir (package.json / deno.json) and - * picks `dev` or `start`. "No script found" surfaces as phase=crashed on - * the daemon rather than a silent no-op here. - */ - private async startDevServer( - rec: DockerRecord, - opts: EnsureOptions, - ): Promise { - const workload = opts.workload ?? rec.workload; - const body = workload - ? JSON.stringify({ runtime: workload.runtime }) - : "{}"; - proxyDaemonRequestClient(rec.daemonUrl, rec.token, "/_daemon/dev/start", { - method: "POST", - headers: new Headers({ "content-type": "application/json" }), - body, - signal: AbortSignal.timeout(30_000), - }).catch((err) => { - const isAbort = err instanceof Error && err.name === "AbortError"; - const detail = isAbort - ? "timed out after 30s" - : err instanceof Error - ? err.message - : String(err); - console.error( - `[DockerSandboxRunner] /dev/start failed for ${rec.handle}: ${detail}`, - ); - }); - } - - private async provision( - id: SandboxId, - labelId: string, - opts: EnsureOptions, - ): Promise { - const token = randomBytes(24).toString("hex"); - const workdir = DEFAULT_WORKDIR; - const image = opts.image ?? this.defaultImage; - const devContainerPort = opts.workload?.devPort ?? DEFAULT_DEV_PORT; - - const portPublishArgs = [ - "-p", - `127.0.0.1:0:${DAEMON_PORT}`, - "-p", - `127.0.0.1:0:${devContainerPort}`, - ]; - - const env: Record = { - DAEMON_TOKEN: token, - DAEMON_PORT: String(DAEMON_PORT), - WORKDIR: workdir, - ...(opts.env ?? {}), - }; - - // Shared singleton: if the CLI already kicked off a background build, - // this awaits that same promise instead of starting a second one. - await ensureSandboxImage({ image, exec: this.exec_ }); - - // Hardening: drop all caps (daemon + dev server don't need any), block - // privilege escalation, cap processes/memory/cpu so a runaway user - // script can't DoS the host. Read-only root FS removes most write-based - // pivots; /tmp is a bounded tmpfs; /app and /home/sandbox are anonymous - // volumes (disk-backed, not RAM) so package-manager caches and install - // artefacts don't blow the 2g memory cap. --rm cleans up the anonymous - // volumes when the container exits. - const { id: rawId } = await startContainer(image, { - label: "sandbox", - exec: this.exec_, - args: [ - "--rm", - "--init", - "--read-only", - "--tmpfs=/tmp:rw,nosuid,nodev,size=256m", - "-v", - "/app", - "-v", - "/home/sandbox", - "--cap-drop=ALL", - "--security-opt=no-new-privileges", - "--pids-limit=512", - "--memory=2g", - "--memory-swap=2g", - "--cpus=1", - "--label", - `${this.labelPrefix}=1`, - "--label", - `${LABEL_ID}=${labelId}`, - ...portPublishArgs, - ...Object.entries(env).flatMap(([k, v]) => ["-e", `${k}=${v}`]), - ], - }); - const handle = toHandle(rawId); - - const daemonPort = await this.readPort(handle, DAEMON_PORT); - const daemonUrl = `http://127.0.0.1:${daemonPort}`; - const devPort = await this.readPort(handle, devContainerPort); - await this.waitForReady(daemonUrl, handle); - return { - handle, - daemonUrl, - token, - workdir, - id, - devPort, - devContainerPort, - daemonPort, - repoAttached: false, - workload: opts.workload ?? null, - }; - } - - /** - * Resolution: (1) `previewUrlPattern` option; (2) `SANDBOX_ROOT_URL` env - * (substitute `{handle}` or hostname-prefix); (3) local-ingress default. - * Env read at call time — deploys rewrite without a rebuild. - */ - private composePreviewUrl(handle: string): string { - const explicit = this.previewUrlPattern; - if (explicit) return this.applyPattern(explicit, handle); - const envRoot = process.env.SANDBOX_ROOT_URL; - if (envRoot) return this.applyPattern(envRoot, handle); - const ingressPort = Number(process.env.SANDBOX_INGRESS_PORT ?? 7070); - return `http://${handle}.sandboxes.localhost:${ingressPort}/`; - } - - private applyPattern(pattern: string, handle: string): string { - const base = pattern.replace(/\/+$/, ""); - if (base.includes("{handle}")) - return `${base.replace("{handle}", handle)}/`; - try { - const u = new URL(base); - u.hostname = `${handle}.${u.hostname}`; - return `${u.toString()}/`; - } catch { - // Invalid URL — fall back to local-ingress shape. - const ingressPort = Number(process.env.SANDBOX_INGRESS_PORT ?? 7070); - return `http://${handle}.sandboxes.localhost:${ingressPort}/`; - } - } - - /** Memory cache → state store. Fallback matters after mesh restart (empty byHandle). */ - private async lookupRecord(handle: string): Promise { - const cached = this.byHandle.get(handle); - if (cached) return cached; - if (!this.stateStore) return null; - const persisted = await this.stateStore.getByHandle(RUNNER_KIND, handle); - if (!persisted) return null; - const rec = await this.hydratePersisted(persisted.id, persisted); - if (rec) this.byHandle.set(handle, rec); - return rec; - } - - private async requireRecord(handle: string): Promise { - const rec = await this.lookupRecord(handle); - if (!rec) throw new Error(`unknown sandbox handle ${handle}`); - return rec; - } - - private async probePersisted( - id: SandboxId, - record: { handle: string; state: Record }, - ): Promise { - const rec = await this.hydratePersisted(id, record); - if (!rec) return null; - return (await probeDaemonHealth(rec.daemonUrl)) ? rec : null; - } - - /** Rehydrate from state-store; re-reads ephemeral ports since mesh memory may be stale. */ - private async hydratePersisted( - id: SandboxId, - record: { handle: string; state: Record }, - ): Promise { - const state = record.state as Partial; - if (!state.token || !state.daemonUrl) return null; - const handle = toHandle(record.handle); - const devContainerPort = state.devContainerPort ?? DEFAULT_DEV_PORT; - try { - const daemonPort = await this.readPort(handle, DAEMON_PORT); - const daemonUrl = `http://127.0.0.1:${daemonPort}`; - const devPort = await this.readPort(handle, devContainerPort); - return { - handle, - daemonUrl, - token: state.token, - workdir: state.workdir ?? DEFAULT_WORKDIR, - id, - devPort, - devContainerPort, - daemonPort, - repoAttached: state.repoAttached ?? false, - workload: state.workload ?? null, - }; - } catch { - return null; - } - } - - private async waitForReady(daemonUrl: string, handle: string): Promise { - try { - await waitForDaemonReady(daemonUrl); - } catch (err) { - await this.stopContainer(handle).catch((stopErr) => - console.warn( - `[DockerSandboxRunner] cleanup stop after waitForDaemonReady failure (${handle}) itself failed:`, - stopErr instanceof Error ? stopErr.message : String(stopErr), - ), - ); - throw err; - } - } - - private async recoverSandbox( - id: SandboxId, - handle: string, - opts: EnsureOptions, - ): Promise { - const r = await this.exec_([ - "inspect", - "--format", - "{{range .Config.Env}}{{println .}}{{end}}", - handle, - ]); - if (r.code !== 0) return null; - let token: string | null = null; - let workdir = DEFAULT_WORKDIR; - for (const line of r.stdout.split("\n")) { - if (line.startsWith("DAEMON_TOKEN=")) { - token = line.slice("DAEMON_TOKEN=".length); - } else if (line.startsWith("WORKDIR=")) { - workdir = line.slice("WORKDIR=".length); - } - } - if (!token) return null; - const daemonPort = await this.readPort(handle, DAEMON_PORT); - const daemonUrl = `http://127.0.0.1:${daemonPort}`; - if (!(await probeDaemonHealth(daemonUrl))) return null; - const devContainerPort = opts.workload?.devPort ?? DEFAULT_DEV_PORT; - const devPort = await this.readPort(handle, devContainerPort); - // Recovered via inspect; repoAttached unknown → leave false (next ensure re-stamps). - return { - handle, - daemonUrl, - token, - workdir, - id, - devPort, - devContainerPort, - daemonPort, - repoAttached: false, - workload: opts.workload ?? null, - }; - } - - private async findExisting(labelId: string): Promise { - const r = await this.exec_([ - "ps", - "--no-trunc", - "-q", - "--filter", - `label=${LABEL_ID}=${labelId}`, - ]); - if (r.code !== 0) return null; - const rawId = r.stdout.trim().split("\n").filter(Boolean)[0]; - return rawId ? toHandle(rawId) : null; - } - - private async stopContainer(handle: string): Promise { - await this.exec_(["stop", "--time", "2", handle]); - } - - private async readPort( - handle: string, - containerPort: number, - ): Promise { - for (let i = 0; i < PORT_READBACK_ATTEMPTS; i++) { - const r = await this.exec_(["port", handle, `${containerPort}/tcp`]); - if (r.code === 0) { - for (const line of r.stdout.split("\n")) { - const match = line.trim().match(/:(\d+)$/); - if (match) return Number(match[1]); - } - } else if (/no such container/i.test(r.stderr)) { - // Container exited before daemon bound the port — fail fast with diagnostics. - const diag = await this.containerExitDiagnostics(handle); - throw new Error( - `sandbox container ${handle} exited before daemon started${diag}`, - ); - } - await sleep(PORT_READBACK_INTERVAL_MS); - } - throw new Error( - `timed out waiting for docker port mapping on container ${handle}`, - ); - } - - private async containerExitDiagnostics(handle: string): Promise { - const parts: string[] = []; - const inspect = await this.exec_([ - "inspect", - "--format", - "{{.State.ExitCode}}", - handle, - ]); - if (inspect.code === 0 && inspect.stdout.trim()) { - parts.push(`exit=${inspect.stdout.trim()}`); - } - const logs = await this.exec_(["logs", "--tail", "20", handle]); - const tail = [logs.stdout, logs.stderr] - .map((s) => s.trim()) - .filter(Boolean) - .join("\n") - .trim(); - if (tail) parts.push(`logs:\n${tail}`); - return parts.length ? ` (${parts.join(" ")})` : ""; - } - - private async persist( - id: SandboxId, - rec: DockerRecord, - store: RunnerStateStoreOps | null, - ): Promise { - if (!store) return; - const state: PersistedDockerState = { - token: rec.token, - workdir: rec.workdir, - daemonUrl: rec.daemonUrl, - devPort: rec.devPort, - devContainerPort: rec.devContainerPort, - daemonPort: rec.daemonPort, - repoAttached: rec.repoAttached, - workload: rec.workload, - }; - await store.put(id, RUNNER_KIND, { handle: rec.handle, state }); - } -} - -function hashId(id: SandboxId): string { - return createHash("sha256") - .update(sandboxIdKey(id)) - .digest("hex") - .slice(0, 16); -} diff --git a/packages/mesh-plugin-user-sandbox/server/runner/docker/index.ts b/packages/mesh-plugin-user-sandbox/server/runner/docker/index.ts new file mode 100644 index 0000000000..02025e8727 --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/docker/index.ts @@ -0,0 +1,12 @@ +export { DockerSandboxRunner } from "./runner"; +export type { + DockerExec, + DockerRunnerOptions, + ExecResult, +} from "./runner"; +export { startLocalSandboxIngress } from "./local-ingress"; +export { + sweepDockerOrphansOnBoot, + sweepDockerOrphansOnShutdown, +} from "./sweep"; +export type { SweepDockerOrphansOnBootOptions } from "./sweep"; diff --git a/packages/mesh-plugin-user-sandbox/server/runner/local-ingress.test.ts b/packages/mesh-plugin-user-sandbox/server/runner/docker/local-ingress.test.ts similarity index 99% rename from packages/mesh-plugin-user-sandbox/server/runner/local-ingress.test.ts rename to packages/mesh-plugin-user-sandbox/server/runner/docker/local-ingress.test.ts index 60a0c63907..0e4bb592cf 100644 --- a/packages/mesh-plugin-user-sandbox/server/runner/local-ingress.test.ts +++ b/packages/mesh-plugin-user-sandbox/server/runner/docker/local-ingress.test.ts @@ -1,7 +1,7 @@ import { afterEach, describe, expect, it } from "bun:test"; import * as net from "node:net"; import type { AddressInfo } from "node:net"; -import type { DockerSandboxRunner } from "./docker"; +import type { DockerSandboxRunner } from "./runner"; import { startLocalSandboxIngress } from "./local-ingress"; // local-ingress is a raw TCP proxy (not fetch-based). Testing it end-to-end diff --git a/packages/mesh-plugin-user-sandbox/server/runner/local-ingress.ts b/packages/mesh-plugin-user-sandbox/server/runner/docker/local-ingress.ts similarity index 99% rename from packages/mesh-plugin-user-sandbox/server/runner/local-ingress.ts rename to packages/mesh-plugin-user-sandbox/server/runner/docker/local-ingress.ts index f79bb81394..3190ed6301 100644 --- a/packages/mesh-plugin-user-sandbox/server/runner/local-ingress.ts +++ b/packages/mesh-plugin-user-sandbox/server/runner/docker/local-ingress.ts @@ -7,7 +7,7 @@ */ import * as net from "node:net"; -import type { DockerSandboxRunner } from "./docker"; +import type { DockerSandboxRunner } from "./runner"; const HOST_RE = /^([^.]+)\.sandboxes\.localhost(?::\d+)?$/i; const MAX_HEADER_BYTES = 16 * 1024; diff --git a/packages/mesh-plugin-user-sandbox/server/runner/docker.test.ts b/packages/mesh-plugin-user-sandbox/server/runner/docker/runner.test.ts similarity index 98% rename from packages/mesh-plugin-user-sandbox/server/runner/docker.test.ts rename to packages/mesh-plugin-user-sandbox/server/runner/docker/runner.test.ts index 5b60a4b1ee..eee7ee0f28 100644 --- a/packages/mesh-plugin-user-sandbox/server/runner/docker.test.ts +++ b/packages/mesh-plugin-user-sandbox/server/runner/docker/runner.test.ts @@ -1,13 +1,13 @@ import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; -import type { DockerExecFn, DockerResult } from "../docker-cli"; -import { DockerSandboxRunner } from "./docker"; +import type { DockerExecFn, DockerResult } from "../../docker-cli"; +import { DockerSandboxRunner } from "./runner"; import type { RunnerStateRecord, RunnerStateRecordWithId, RunnerStatePut, RunnerStateStore, -} from "./state-store"; -import type { SandboxId } from "./types"; +} from "../state-store"; +import type { SandboxId } from "../types"; // ----------------------------------------------------------------------------- // Exec mock: matches on args[0] + sub-arg patterns and returns canned results. diff --git a/packages/mesh-plugin-user-sandbox/server/runner/docker/runner.ts b/packages/mesh-plugin-user-sandbox/server/runner/docker/runner.ts new file mode 100644 index 0000000000..de7d2b0e9c --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/docker/runner.ts @@ -0,0 +1,597 @@ +/** + * Docker sandbox runner — local dev. + * + * One hardened container per (user, projectRef). Daemon + dev ports are + * published to ephemeral host ports; browser traffic routes through + * `startLocalSandboxIngress` (`*.sandboxes.localhost`). Mesh owns teardown + * and sweeps orphans on boot/shutdown. + */ + +import { randomBytes } from "node:crypto"; +import { DAEMON_PORT, DEFAULT_IMAGE, sleep } from "../../../shared"; +import { + bootstrapRepo, + daemonBash, + probeDaemonHealth, + proxyDaemonRequest, + waitForDaemonReady, +} from "../../daemon-client"; +import { + DEFAULT_WORKDIR, + dockerExec, + startContainer, + type DockerExecFn, + type DockerResult, +} from "../../docker-cli"; +import { ensureSandboxImage } from "../../image-build"; +import { + Inflight, + applyPreviewPattern, + hashSandboxId, + startDevServer, + stopDevServer, + withSandboxLock, +} from "../shared"; +import type { RunnerStateStore, RunnerStateStoreOps } from "../state-store"; +import type { + EnsureOptions, + ExecInput, + ExecOutput, + ProxyRequestInit, + Sandbox, + SandboxId, + SandboxRunner, + Workload, +} from "../types"; + +const RUNNER_KIND = "docker" as const; +const LABEL_ROOT = "mesh-sandbox"; +const LABEL_ID = "mesh-sandbox.id"; +const DEFAULT_DEV_PORT = 3000; +const HANDLE_LEN = 32; // 128-bit prefix, within RFC 1035's 63-char DNS cap. +const PORT_READBACK_ATTEMPTS = 15; +const PORT_READBACK_INTERVAL_MS = 200; +const LOG_LABEL = "DockerSandboxRunner"; + +export type ExecResult = DockerResult; +export type DockerExec = DockerExecFn; + +export interface DockerRunnerOptions { + image?: string; + exec?: DockerExecFn; + stateStore?: RunnerStateStore; + previewUrlPattern?: string; + /** Ownership label; override per mesh instance when multiple share one host. */ + labelPrefix?: string; +} + +interface DockerRecord { + id: SandboxId; + handle: string; + token: string; + workdir: string; + daemonUrl: string; + daemonPort: number; + devPort: number; + devContainerPort: number; + repoAttached: boolean; + workload: Workload | null; +} + +interface PersistedDockerState { + token: string; + workdir: string; + daemonUrl: string; + devPort?: number; + devContainerPort?: number; + daemonPort?: number; + repoAttached?: boolean; + workload?: Workload | null; + [k: string]: unknown; +} + +const toHandle = (rawId: string): string => rawId.slice(0, HANDLE_LEN); + +export class DockerSandboxRunner implements SandboxRunner { + readonly kind = RUNNER_KIND; + + private readonly records = new Map(); + private readonly inflight = new Inflight(); + private readonly defaultImage: string; + private readonly exec_: DockerExecFn; + private readonly labelPrefix: string; + private readonly stateStore: RunnerStateStore | null; + private readonly previewUrlPattern: string | null; + + constructor(opts: DockerRunnerOptions = {}) { + this.defaultImage = + opts.image ?? process.env.MESH_SANDBOX_IMAGE ?? DEFAULT_IMAGE; + this.exec_ = opts.exec ?? dockerExec; + this.labelPrefix = opts.labelPrefix ?? LABEL_ROOT; + this.stateStore = opts.stateStore ?? null; + this.previewUrlPattern = opts.previewUrlPattern ?? null; + } + + // ---- SandboxRunner surface ------------------------------------------------ + + async ensure(id: SandboxId, opts: EnsureOptions = {}): Promise { + const labelId = hashSandboxId(id, 16); + return this.inflight.run(labelId, () => + withSandboxLock(this.stateStore, id, RUNNER_KIND, (ops) => + this.ensureLocked(id, labelId, opts, ops), + ), + ); + } + + async exec(handle: string, input: ExecInput): Promise { + const rec = await this.requireRecord(handle); + return daemonBash(rec.daemonUrl, rec.token, input); + } + + async delete(handle: string): Promise { + const rec = await this.getRecord(handle); + this.records.delete(handle); + if (rec) await stopDevServer(rec.daemonUrl, rec.token, LOG_LABEL); + await this.stopContainer(handle); + if (this.stateStore) { + if (rec) await this.stateStore.delete(rec.id, RUNNER_KIND); + else await this.stateStore.deleteByHandle(RUNNER_KIND, handle); + } + } + + async alive(handle: string): Promise { + const r = await this.exec_([ + "inspect", + "--format", + "{{.State.Running}}", + handle, + ]); + return r.code === 0 && r.stdout.trim() === "true"; + } + + async getPreviewUrl(handle: string): Promise { + const rec = await this.getRecord(handle); + return rec ? this.composePreviewUrl(rec) : null; + } + + async proxyDaemonRequest( + handle: string, + path: string, + init: ProxyRequestInit, + ): Promise { + const rec = await this.getRecord(handle); + if (!rec) { + return new Response(JSON.stringify({ error: "sandbox not found" }), { + status: 404, + headers: { "content-type": "application/json" }, + }); + } + return proxyDaemonRequest(rec.daemonUrl, rec.token, path, init); + } + + // ---- Docker-only surface -------------------------------------------------- + + async sweepOrphans(): Promise { + const r = await this.exec_([ + "ps", + "-aq", + "--filter", + `label=${this.labelPrefix}=1`, + ]); + if (r.code !== 0) return 0; + const ids = r.stdout.trim().split("\n").filter(Boolean); + await Promise.all( + ids.map(async (id) => { + await this.stopContainer(id).catch((err) => + console.warn( + `[${LOG_LABEL}] sweep: stopContainer(${id}) failed:`, + err instanceof Error ? err.message : String(err), + ), + ); + if (this.stateStore) { + await this.stateStore + .deleteByHandle(RUNNER_KIND, id) + .catch((err) => + console.warn( + `[${LOG_LABEL}] sweep: state-store deleteByHandle(${id}) failed:`, + err instanceof Error ? err.message : String(err), + ), + ); + } + }), + ); + return ids.length; + } + + /** Docker-only: host port → dev server. Used by local ingress. */ + async resolveDevPort(handle: string): Promise { + const rec = await this.getRecord(handle); + return rec?.devPort ?? null; + } + + /** Docker-only: host port → daemon. Used by local ingress. */ + async resolveDaemonPort(handle: string): Promise { + const rec = await this.getRecord(handle); + return rec?.daemonPort ?? null; + } + + // ---- Ensure flow ---------------------------------------------------------- + + private async ensureLocked( + id: SandboxId, + labelId: string, + opts: EnsureOptions, + ops: RunnerStateStoreOps | null, + ): Promise { + // 1. State-store resume. + if (ops) { + const persisted = await ops.get(id, RUNNER_KIND); + if (persisted) { + const rec = await this.rehydrate(id, persisted); + if (rec) return this.finish(rec, opts, ops, /* persistNow */ false); + await ops.delete(id, RUNNER_KIND); + } + } + // 2. Side-channel adopt: container with our label still running. + const adopted = await this.adoptByLabel(id, labelId, opts); + if (adopted) return this.finish(adopted, opts, ops, /* persistNow */ true); + // 3. Fresh provision. + const fresh = await this.provision(id, labelId, opts); + return this.finish(fresh, opts, ops, /* persistNow */ true); + } + + private async finish( + rec: DockerRecord, + opts: EnsureOptions, + ops: RunnerStateStoreOps | null, + persistNow: boolean, + ): Promise { + this.records.set(rec.handle, rec); + let bootstrapped = false; + if (opts.repo && !rec.repoAttached) { + try { + await bootstrapRepo(rec.daemonUrl, rec.token, rec.workdir, opts.repo); + rec.repoAttached = true; + bootstrapped = true; + } catch (err) { + this.records.delete(rec.handle); + await this.stopContainer(rec.handle).catch((teardownErr) => + console.warn( + `[${LOG_LABEL}] orphan teardown after attach failure handle=${rec.handle} attachErr="${ + err instanceof Error ? err.message : String(err) + }" teardownErr="${ + teardownErr instanceof Error + ? teardownErr.message + : String(teardownErr) + }"`, + ), + ); + throw err; + } + } + // Persist on fresh provision/adopt, OR after we just flipped repoAttached + // on a resumed record (so subsequent ensure() skips bootstrap). + if (persistNow || bootstrapped) await this.persist(ops, rec); + startDevServer( + rec.daemonUrl, + rec.token, + opts.workload ?? rec.workload, + LOG_LABEL, + ); + return this.toSandbox(rec); + } + + private async provision( + id: SandboxId, + labelId: string, + opts: EnsureOptions, + ): Promise { + const token = randomBytes(24).toString("hex"); + const workdir = DEFAULT_WORKDIR; + const image = opts.image ?? this.defaultImage; + const devContainerPort = opts.workload?.devPort ?? DEFAULT_DEV_PORT; + + const env: Record = { + DAEMON_TOKEN: token, + DAEMON_PORT: String(DAEMON_PORT), + WORKDIR: workdir, + ...(opts.env ?? {}), + }; + + // Shared singleton; awaits any background build kicked off by the CLI. + await ensureSandboxImage({ image, exec: this.exec_ }); + + // Hardening: drop caps + block privilege escalation; cap processes/memory/ + // cpu against runaway user scripts. Read-only root removes most write-based + // pivots; /tmp is a bounded tmpfs; /app and /home/sandbox are anonymous + // volumes (disk-backed) so package-manager caches don't blow the mem cap. + const { id: rawId } = await startContainer(image, { + label: "sandbox", + exec: this.exec_, + args: [ + "--rm", + "--init", + "--read-only", + "--tmpfs=/tmp:rw,nosuid,nodev,size=256m", + "-v", + "/app", + "-v", + "/home/sandbox", + "--cap-drop=ALL", + "--security-opt=no-new-privileges", + "--pids-limit=512", + "--memory=2g", + "--memory-swap=2g", + "--cpus=1", + "--label", + `${this.labelPrefix}=1`, + "--label", + `${LABEL_ID}=${labelId}`, + "-p", + `127.0.0.1:0:${DAEMON_PORT}`, + "-p", + `127.0.0.1:0:${devContainerPort}`, + ...Object.entries(env).flatMap(([k, v]) => ["-e", `${k}=${v}`]), + ], + }); + const handle = toHandle(rawId); + + const daemonPort = await this.readPort(handle, DAEMON_PORT); + const daemonUrl = `http://127.0.0.1:${daemonPort}`; + const devPort = await this.readPort(handle, devContainerPort); + try { + await waitForDaemonReady(daemonUrl); + } catch (err) { + await this.stopContainer(handle).catch((stopErr) => + console.warn( + `[${LOG_LABEL}] cleanup stop after waitForDaemonReady failure (${handle}) itself failed:`, + stopErr instanceof Error ? stopErr.message : String(stopErr), + ), + ); + throw err; + } + return { + id, + handle, + token, + workdir, + daemonUrl, + daemonPort, + devPort, + devContainerPort, + repoAttached: false, + workload: opts.workload ?? null, + }; + } + + /** + * Reconstruct a record from persisted state, probing that the container is + * still healthy. Returns null on any mismatch — caller purges and falls + * through to `adoptByLabel`/`provision`. + */ + private async rehydrate( + id: SandboxId, + persisted: { handle: string; state: Record }, + ): Promise { + const state = persisted.state as Partial; + if (!state.token || !state.daemonUrl) return null; + const handle = toHandle(persisted.handle); + const devContainerPort = state.devContainerPort ?? DEFAULT_DEV_PORT; + let daemonPort: number; + let devPort: number; + try { + daemonPort = await this.readPort(handle, DAEMON_PORT); + devPort = await this.readPort(handle, devContainerPort); + } catch { + return null; + } + const daemonUrl = `http://127.0.0.1:${daemonPort}`; + if (!(await probeDaemonHealth(daemonUrl))) return null; + return { + id, + handle, + token: state.token, + workdir: state.workdir ?? DEFAULT_WORKDIR, + daemonUrl, + daemonPort, + devPort, + devContainerPort, + repoAttached: state.repoAttached ?? false, + workload: state.workload ?? null, + }; + } + + /** + * State store empty but a container with our label still runs. Reconstruct + * from `docker inspect` env vars; tear down anything we can't reuse so + * `provision` below doesn't collide on the next ensure. + */ + private async adoptByLabel( + id: SandboxId, + labelId: string, + opts: EnsureOptions, + ): Promise { + const existing = await this.findExisting(labelId); + if (!existing) return null; + + const cached = this.records.get(existing); + if (cached) return cached; + + const recovered = await this.reconstructFromContainer(id, existing, opts); + if (recovered) return recovered; + + await this.stopContainer(existing); + return null; + } + + private async reconstructFromContainer( + id: SandboxId, + handle: string, + opts: EnsureOptions, + ): Promise { + const r = await this.exec_([ + "inspect", + "--format", + "{{range .Config.Env}}{{println .}}{{end}}", + handle, + ]); + if (r.code !== 0) return null; + let token: string | null = null; + let workdir = DEFAULT_WORKDIR; + for (const line of r.stdout.split("\n")) { + if (line.startsWith("DAEMON_TOKEN=")) token = line.slice(13); + else if (line.startsWith("WORKDIR=")) workdir = line.slice(8); + } + if (!token) return null; + const daemonPort = await this.readPort(handle, DAEMON_PORT); + const daemonUrl = `http://127.0.0.1:${daemonPort}`; + if (!(await probeDaemonHealth(daemonUrl))) return null; + const devContainerPort = opts.workload?.devPort ?? DEFAULT_DEV_PORT; + const devPort = await this.readPort(handle, devContainerPort); + return { + id, + handle, + token, + workdir, + daemonUrl, + daemonPort, + devPort, + devContainerPort, + repoAttached: false, + workload: opts.workload ?? null, + }; + } + + // ---- Handle resolution (post-restart) ------------------------------------- + + /** + * Look up a record by handle, rehydrating from persisted state on cache + * miss. The returned record is fully usable for any of the six methods — + * after a mesh restart this is the entry point that reconstructs state. + */ + private async getRecord(handle: string): Promise { + const cached = this.records.get(handle); + if (cached) return cached; + if (!this.stateStore) return null; + const persisted = await this.stateStore.getByHandle(RUNNER_KIND, handle); + if (!persisted) return null; + const rec = await this.rehydrate(persisted.id, persisted); + if (rec) this.records.set(handle, rec); + return rec; + } + + private async requireRecord(handle: string): Promise { + const rec = await this.getRecord(handle); + if (!rec) throw new Error(`unknown sandbox handle ${handle}`); + return rec; + } + + // ---- Preview URL ---------------------------------------------------------- + + /** + * Local-ingress preview URL. Docker's URL is derived purely from the handle, + * not gated on workload — the dev server may boot from a caller workload + * hint OR the daemon auto-sniffing package.json / deno.json. + */ + private composePreviewUrl(rec: DockerRecord): string { + if (this.previewUrlPattern) { + return applyPreviewPattern(this.previewUrlPattern, rec.handle); + } + const envRoot = process.env.SANDBOX_ROOT_URL; + if (envRoot) return applyPreviewPattern(envRoot, rec.handle); + const ingressPort = Number(process.env.SANDBOX_INGRESS_PORT ?? 7070); + return `http://${rec.handle}.sandboxes.localhost:${ingressPort}/`; + } + + private toSandbox(rec: DockerRecord): Sandbox { + return { + handle: rec.handle, + workdir: rec.workdir, + previewUrl: this.composePreviewUrl(rec), + }; + } + + // ---- Persistence ---------------------------------------------------------- + + private async persist( + ops: RunnerStateStoreOps | null, + rec: DockerRecord, + ): Promise { + if (!ops) return; + const state: PersistedDockerState = { + token: rec.token, + workdir: rec.workdir, + daemonUrl: rec.daemonUrl, + daemonPort: rec.daemonPort, + devPort: rec.devPort, + devContainerPort: rec.devContainerPort, + repoAttached: rec.repoAttached, + workload: rec.workload, + }; + await ops.put(rec.id, RUNNER_KIND, { handle: rec.handle, state }); + } + + // ---- Docker CLI helpers --------------------------------------------------- + + private async stopContainer(handle: string): Promise { + await this.exec_(["stop", "--time", "2", handle]); + } + + private async findExisting(labelId: string): Promise { + const r = await this.exec_([ + "ps", + "--no-trunc", + "-q", + "--filter", + `label=${LABEL_ID}=${labelId}`, + ]); + if (r.code !== 0) return null; + const rawId = r.stdout.trim().split("\n").filter(Boolean)[0]; + return rawId ? toHandle(rawId) : null; + } + + private async readPort( + handle: string, + containerPort: number, + ): Promise { + for (let i = 0; i < PORT_READBACK_ATTEMPTS; i++) { + const r = await this.exec_(["port", handle, `${containerPort}/tcp`]); + if (r.code === 0) { + for (const line of r.stdout.split("\n")) { + const match = line.trim().match(/:(\d+)$/); + if (match) return Number(match[1]); + } + } else if (/no such container/i.test(r.stderr)) { + const diag = await this.exitDiagnostics(handle); + throw new Error( + `sandbox container ${handle} exited before daemon started${diag}`, + ); + } + await sleep(PORT_READBACK_INTERVAL_MS); + } + throw new Error( + `timed out waiting for docker port mapping on container ${handle}`, + ); + } + + private async exitDiagnostics(handle: string): Promise { + const parts: string[] = []; + const inspect = await this.exec_([ + "inspect", + "--format", + "{{.State.ExitCode}}", + handle, + ]); + if (inspect.code === 0 && inspect.stdout.trim()) { + parts.push(`exit=${inspect.stdout.trim()}`); + } + const logs = await this.exec_(["logs", "--tail", "20", handle]); + const tail = [logs.stdout, logs.stderr] + .map((s) => s.trim()) + .filter(Boolean) + .join("\n") + .trim(); + if (tail) parts.push(`logs:\n${tail}`); + return parts.length ? ` (${parts.join(" ")})` : ""; + } +} diff --git a/packages/mesh-plugin-user-sandbox/server/runner/sweep.ts b/packages/mesh-plugin-user-sandbox/server/runner/docker/sweep.ts similarity index 99% rename from packages/mesh-plugin-user-sandbox/server/runner/sweep.ts rename to packages/mesh-plugin-user-sandbox/server/runner/docker/sweep.ts index 061a778bbf..16c9393b32 100644 --- a/packages/mesh-plugin-user-sandbox/server/runner/sweep.ts +++ b/packages/mesh-plugin-user-sandbox/server/runner/docker/sweep.ts @@ -4,7 +4,7 @@ * lives on `DockerSandboxRunner`, not on the `SandboxRunner` interface. */ -import { DockerSandboxRunner, type DockerRunnerOptions } from "./docker"; +import { DockerSandboxRunner, type DockerRunnerOptions } from "./runner"; const BOOT_SWEEP_KEY = Symbol.for("mesh.sandbox.bootSweepDone"); diff --git a/apps/mesh/src/sandbox/freestyle-daemon-script.ts b/packages/mesh-plugin-user-sandbox/server/runner/freestyle/daemon-script.ts similarity index 100% rename from apps/mesh/src/sandbox/freestyle-daemon-script.ts rename to packages/mesh-plugin-user-sandbox/server/runner/freestyle/daemon-script.ts diff --git a/packages/mesh-plugin-user-sandbox/server/runner/freestyle/index.ts b/packages/mesh-plugin-user-sandbox/server/runner/freestyle/index.ts new file mode 100644 index 0000000000..47ae90cb17 --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/freestyle/index.ts @@ -0,0 +1,2 @@ +export { FreestyleSandboxRunner, translateDaemonPath } from "./runner"; +export type { FreestyleRunnerOptions } from "./runner"; diff --git a/apps/mesh/src/sandbox/freestyle-runner.test.ts b/packages/mesh-plugin-user-sandbox/server/runner/freestyle/runner.test.ts similarity index 97% rename from apps/mesh/src/sandbox/freestyle-runner.test.ts rename to packages/mesh-plugin-user-sandbox/server/runner/freestyle/runner.test.ts index fa30aa3a95..0edc59f2d2 100644 --- a/apps/mesh/src/sandbox/freestyle-runner.test.ts +++ b/packages/mesh-plugin-user-sandbox/server/runner/freestyle/runner.test.ts @@ -14,7 +14,7 @@ mock.module("@freestyle-sh/with-deno", () => ({ VmDeno: class {} })); mock.module("@freestyle-sh/with-bun", () => ({ VmBun: class {} })); mock.module("@freestyle-sh/with-nodejs", () => ({ VmNodeJs: class {} })); -const { translateDaemonPath } = await import("./freestyle-runner"); +const { translateDaemonPath } = await import("./runner"); describe("translateDaemonPath", () => { it("maps /_daemon/fs/ to /_decopilot_vm/", () => { diff --git a/apps/mesh/src/sandbox/freestyle-runner.ts b/packages/mesh-plugin-user-sandbox/server/runner/freestyle/runner.ts similarity index 71% rename from apps/mesh/src/sandbox/freestyle-runner.ts rename to packages/mesh-plugin-user-sandbox/server/runner/freestyle/runner.ts index 6eff5ccbae..fd1aa0ce9d 100644 --- a/apps/mesh/src/sandbox/freestyle-runner.ts +++ b/packages/mesh-plugin-user-sandbox/server/runner/freestyle/runner.ts @@ -1,40 +1,49 @@ /** - * Freestyle sandbox runner. One VM per (user, projectRef). Persistent - * state in `sandbox_runner_state` lets a mesh restart resume via - * `freestyle.vms.ref({ vmId, spec }).start()` — lose the ref and every - * restart would churn a new VM with a new public URL. - * Payloads to `/_decopilot_vm/*` are base64-encoded to dodge the - * Cloudflare WAF in front of freestyle domains. + * Freestyle sandbox runner — hosted. + * + * One VM per (user, projectRef). Freestyle owns the runtime; mesh calls + * `freestyle.vms.{create, ref({vmId, spec}).start, stop, delete}`. The VM + * bakes in a clone + daemon systemd service (built via `daemon-script.ts`), + * so there's no in-package bootstrap path and no port-forward — the + * preview URL is a Freestyle-provided HTTPS domain. + * + * Daemon traffic at `/_decopilot_vm/*` is base64-encoded body-wise to dodge + * Freestyle's Cloudflare WAF. */ import { createHash, randomBytes } from "node:crypto"; -import { freestyle, VmSpec } from "freestyle-sandboxes"; -import { VmDeno } from "@freestyle-sh/with-deno"; import { VmBun } from "@freestyle-sh/with-bun"; +import { VmDeno } from "@freestyle-sh/with-deno"; import { VmNodeJs } from "@freestyle-sh/with-nodejs"; -import { IFRAME_BOOTSTRAP_SCRIPT } from "mesh-plugin-user-sandbox/shared"; +import { freestyle, VmSpec } from "freestyle-sandboxes"; +import { IFRAME_BOOTSTRAP_SCRIPT } from "../../../shared"; +import { Inflight, withSandboxLock } from "../shared"; +import type { RunnerStateStore, RunnerStateStoreOps } from "../state-store"; import { sandboxIdKey, type EnsureOptions, type ExecInput, type ExecOutput, type ProxyRequestInit, - type RunnerStateStore, - type RunnerStateStoreOps, type Sandbox, type SandboxId, type SandboxRunner, type Workload, -} from "mesh-plugin-user-sandbox/runner"; -import { buildDaemonScript } from "./freestyle-daemon-script"; +} from "../types"; +import { buildDaemonScript } from "./daemon-script"; const RUNNER_KIND = "freestyle" as const; +const LOG_LABEL = "FreestyleSandboxRunner"; const PROXY_PORT = 9000; const APP_WORKDIR = "/app"; +const DAEMON_TOKEN_BYTES = 32; +const ALIVE_PROBE_TIMEOUT_MS = 2_000; +const EXEC_DEFAULT_TIMEOUT_MS = 30_000; +const DISPOSE_TIMEOUT_MS = 10_000; /** Stop running VMs after this much idle time. Freestyle bills per active second. */ const DEFAULT_IDLE_TIMEOUT_SECONDS = 1800; -interface FreestyleRunnerOptions { +export interface FreestyleRunnerOptions { stateStore?: RunnerStateStore; /** Override when the freestyle account uses a custom apex. Default: `deco.studio`. */ previewRootDomain?: string; @@ -43,11 +52,11 @@ interface FreestyleRunnerOptions { } interface FreestyleRecord { + id: SandboxId; handle: string; vmId: string; previewDomain: string; workdir: string; - id: SandboxId; workload: Workload | null; /** Persisted so VmSpec can be rebuilt deterministically on resume. */ repo: NonNullable; @@ -68,11 +77,12 @@ interface PersistedFreestyleState { export class FreestyleSandboxRunner implements SandboxRunner { readonly kind = RUNNER_KIND; + + private readonly records = new Map(); + private readonly inflight = new Inflight(); private readonly stateStore: RunnerStateStore | null; private readonly previewRootDomain: string; private readonly idleTimeoutSeconds: number; - private readonly byHandle = new Map(); - private readonly inflight = new Map>(); constructor(opts: FreestyleRunnerOptions = {}) { this.stateStore = opts.stateStore ?? null; @@ -81,28 +91,25 @@ export class FreestyleSandboxRunner implements SandboxRunner { opts.idleTimeoutSeconds ?? DEFAULT_IDLE_TIMEOUT_SECONDS; } + // ---- SandboxRunner surface ------------------------------------------------ + async ensure(id: SandboxId, opts: EnsureOptions = {}): Promise { - const key = sandboxIdKey(id); - const pending = this.inflight.get(key); - if (pending) { - return pending; + if (!opts.repo) { + throw new Error( + `[${LOG_LABEL}] requires opts.repo — bake-in clone is part of the VmSpec; blank sandboxes aren't supported.`, + ); } - // See DockerSandboxRunner.ensure — state-store lock serializes across - // pods; in-memory inflight dedupes within this process. The scoped - // store reuses the lock connection so nested reads/writes don't starve - // the main pg pool during long provisioning. - const p = - this.stateStore && this.stateStore.withLock - ? this.stateStore.withLock(id, RUNNER_KIND, (scoped) => { - return this.ensureInner(id, opts, scoped); - }) - : this.ensureInner(id, opts, this.stateStore); - this.inflight.set(key, p); - try { - return await p; - } finally { - this.inflight.delete(key); + if (!opts.repo.branch) { + throw new Error( + `[${LOG_LABEL}] requires opts.repo.branch — the daemon clones with -b and the branch is part of the spec.`, + ); } + const key = sandboxIdKey(id); + return this.inflight.run(key, () => + withSandboxLock(this.stateStore, id, RUNNER_KIND, (ops) => + this.ensureLocked(id, opts, ops), + ), + ); } /** Routes through the daemon transport so CORS/bearer match file-ops. */ @@ -110,16 +117,14 @@ export class FreestyleSandboxRunner implements SandboxRunner { const rec = await this.requireRecord(handle); const res = await this.postDaemon(rec, "/_decopilot_vm/bash", { command: input.command, - timeout: input.timeoutMs ?? 30_000, + timeout: input.timeoutMs ?? EXEC_DEFAULT_TIMEOUT_MS, cwd: input.cwd, env: input.env, }); if (!res.ok) { const body = await res.text().catch(() => ""); throw new Error( - `freestyle daemon /_decopilot_vm/bash returned ${res.status}${ - body ? `: ${body}` : "" - }`, + `freestyle daemon /_decopilot_vm/bash returned ${res.status}${body ? `: ${body}` : ""}`, ); } const json = (await res.json()) as { @@ -137,50 +142,26 @@ export class FreestyleSandboxRunner implements SandboxRunner { } async delete(handle: string): Promise { - const rec = await this.lookupRecord(handle); - this.byHandle.delete(handle); + const rec = await this.getRecord(handle); + this.records.delete(handle); if (rec) { - await this.disposeVm(rec.vmId, "delete"); - if (this.stateStore) { - await this.stateStore.delete(rec.id, RUNNER_KIND); - } + await disposeVm(rec.vmId, "delete"); + if (this.stateStore) await this.stateStore.delete(rec.id, RUNNER_KIND); } else if (this.stateStore) { await this.stateStore.deleteByHandle(RUNNER_KIND, handle); } } - /** stop() + delete() a VM; timebound + errors are logged, not thrown. */ - private async disposeVm(vmId: string, reason: string): Promise { - try { - const vm = freestyle.vms.ref({ vmId }); - await Promise.race([ - vm.stop().then(() => vm.delete()), - new Promise((_, reject) => - setTimeout( - () => reject(new Error("freestyle vm.delete() timed out")), - 10_000, - ), - ), - ]); - } catch (err) { - console.error( - `[FreestyleSandboxRunner] dispose vm ${vmId} (${reason}) failed: ${ - err instanceof Error ? err.message : String(err) - }`, - ); - } - } - /** Freestyle SDK has no cheap status check; small GET is our best signal. */ async alive(handle: string): Promise { - const rec = await this.lookupRecord(handle); + const rec = await this.getRecord(handle); if (!rec) return false; try { const res = await fetch( `https://${rec.previewDomain}/_decopilot_vm/scripts`, { headers: { authorization: `Bearer ${rec.daemonToken}` }, - signal: AbortSignal.timeout(2_000), + signal: AbortSignal.timeout(ALIVE_PROBE_TIMEOUT_MS), }, ); return res.ok; @@ -190,9 +171,8 @@ export class FreestyleSandboxRunner implements SandboxRunner { } async getPreviewUrl(handle: string): Promise { - const rec = await this.lookupRecord(handle); - if (!rec) return null; - return `https://${rec.previewDomain}`; + const rec = await this.getRecord(handle); + return rec ? `https://${rec.previewDomain}` : null; } /** @@ -208,7 +188,7 @@ export class FreestyleSandboxRunner implements SandboxRunner { path: string, init: ProxyRequestInit, ): Promise { - const rec = await this.lookupRecord(handle); + const rec = await this.getRecord(handle); if (!rec) { return new Response(JSON.stringify({ error: "sandbox not found" }), { status: 404, @@ -222,9 +202,7 @@ export class FreestyleSandboxRunner implements SandboxRunner { } const target = `https://${rec.previewDomain}${translated}`; const headers = new Headers(init.headers); - // Strip cookies + hop-by-hop, then set our own bearer. Any Authorization - // that arrived from the browser (there shouldn't be one — mesh session - // auth ran upstream) is overwritten with the VM's per-sandbox token. + // Strip cookies + hop-by-hop, then set our own bearer. for (const h of [ "cookie", "host", @@ -266,48 +244,73 @@ export class FreestyleSandboxRunner implements SandboxRunner { }); } - private async ensureInner( + // ---- Ensure flow ---------------------------------------------------------- + + private async ensureLocked( id: SandboxId, opts: EnsureOptions, - store: RunnerStateStoreOps | null, + ops: RunnerStateStoreOps | null, ): Promise { - if (!opts.repo) { - throw new Error( - "FreestyleSandboxRunner requires `opts.repo` — bake-in clone is part of the VmSpec; blank/freestyle sandboxes aren't supported.", - ); - } - if (!opts.repo.branch) { - throw new Error( - "FreestyleSandboxRunner requires `opts.repo.branch` — the daemon clones with -b and the branch is part of the spec.", - ); - } // 1. State-store resume. - if (store) { - const persisted = await store.get(id, RUNNER_KIND); + if (ops) { + const persisted = await ops.get(id, RUNNER_KIND); if (persisted) { - const probed = await this.resume(id, persisted, opts); - if (probed) { - this.byHandle.set(probed.handle, probed); - return this.toSandbox(probed); + const rec = await this.resume(id, persisted, opts); + if (rec) { + this.records.set(rec.handle, rec); + return this.toSandbox(rec); } - await store.delete(id, RUNNER_KIND); + await ops.delete(id, RUNNER_KIND); } } - // 2. Fresh provision. + // 2. Fresh provision. No adopt path: freestyle has no tag-side lookup. const rec = await this.provision(id, opts); - this.byHandle.set(rec.handle, rec); - await this.persist(id, rec, store); + this.records.set(rec.handle, rec); + await this.persist(ops, rec); return this.toSandbox(rec); } - private toSandbox(rec: FreestyleRecord): Sandbox { + private async provision( + id: SandboxId, + opts: EnsureOptions, + ): Promise { + const repo = opts.repo!; + const workload = opts.workload ?? null; + const previewDomain = `${this.computeDomainKey(id)}.${this.previewRootDomain}`; + const daemonToken = randomBytes(DAEMON_TOKEN_BYTES).toString("hex"); + const spec = this.buildSpec({ repo, workload, daemonToken }); + let result: { vmId: string }; + try { + result = await freestyle.vms.create({ + spec, + domains: [{ domain: previewDomain, vmPort: PROXY_PORT }], + recreate: true, + idleTimeoutSeconds: this.idleTimeoutSeconds, + }); + } catch (err) { + throw new Error( + `[${LOG_LABEL}] vms.create failed for domain=${previewDomain} user=${id.userId} projectRef=${id.projectRef}: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } return { - handle: rec.handle, - workdir: rec.workdir, - previewUrl: `https://${rec.previewDomain}`, + id, + handle: result.vmId, + vmId: result.vmId, + previewDomain, + workdir: APP_WORKDIR, + workload, + repo, + daemonToken, }; } + /** + * Resume a persisted record: validate the blob, bail on spec divergence, + * then boot the VM via `freestyle.vms.ref({vmId, spec}).start()`. Returns + * null to trigger purge-and-reprovision in the caller. + */ private async resume( id: SandboxId, persisted: { handle: string; state: Record }, @@ -316,28 +319,24 @@ export class FreestyleSandboxRunner implements SandboxRunner { const state = persisted.state as Partial; if (!state.vmId || !state.previewDomain || !state.repo) return null; // Rows persisted before bearer auth landed have no daemonToken. The - // running VM's daemon script also predates auth, so issuing a new token - // wouldn't match. Dispose the old VM explicitly — relying on idle-timeout - // orphans one VM per stale row, which stacks up and is billed. + // running VM's daemon script also predates auth, so a new token wouldn't + // match. Dispose the old VM explicitly — idle-timeout would orphan one + // VM per stale row, which stacks up and is billed. if (!state.daemonToken) { - await this.disposeVm(state.vmId, "resume:no-daemon-token"); + await disposeVm(state.vmId, "resume:no-daemon-token"); return null; } - // Workload (runtime / packageManager / devPort) is baked into the - // daemon script at VM create time — see buildSpec's additionalFiles. - // `freestyle.vms.ref({ vmId, spec }).start()` boots the existing VM - // with the already-written /opt/daemon.js; the rebuilt spec is - // effectively ignored. When the caller's workload has diverged from - // what was baked, resume would silently keep running the old PM. Bail - // so ensureInner deletes the stale state row and provisions fresh. + // Workload (runtime / packageManager / devPort) is baked into the daemon + // script at VM create time — see buildSpec.additionalFiles. When the + // caller's workload has diverged, resume would silently keep running the + // old PM. Bail so ensure deletes the stale state row and provisions fresh. if (!workloadEquals(opts.workload ?? null, state.workload ?? null)) { console.warn( - `[FreestyleSandboxRunner] resume vm ${state.vmId} skipped: workload changed (persisted=${JSON.stringify(state.workload)} current=${JSON.stringify(opts.workload ?? null)}); will recreate`, + `[${LOG_LABEL}] resume vm ${state.vmId} skipped: workload changed (persisted=${JSON.stringify(state.workload)} current=${JSON.stringify(opts.workload ?? null)}); will recreate`, ); return null; } const workload = opts.workload ?? state.workload ?? null; - // VmSpec is a pure builder — deterministic rebuild matches create-time spec. const spec = this.buildSpec({ repo: state.repo, workload, @@ -350,60 +349,81 @@ export class FreestyleSandboxRunner implements SandboxRunner { return null; } return { + id, handle: state.vmId, vmId: state.vmId, previewDomain: state.previewDomain, workdir: state.workdir ?? APP_WORKDIR, - id, workload, repo: state.repo, daemonToken: state.daemonToken, }; } - private async provision( - id: SandboxId, - opts: EnsureOptions, - ): Promise { - const repo = opts.repo!; - const workload = opts.workload ?? null; - const previewDomain = `${this.computeDomainKey(id)}.${this.previewRootDomain}`; - // 32 bytes (256 bits) of entropy; daemon requires ≥ 32 chars. - const daemonToken = randomBytes(32).toString("hex"); - const spec = this.buildSpec({ repo, workload, daemonToken }); - let result: { vmId: string }; - try { - result = await freestyle.vms.create({ - spec, - domains: [{ domain: previewDomain, vmPort: PROXY_PORT }], - recreate: true, - idleTimeoutSeconds: this.idleTimeoutSeconds, - }); - } catch (err) { - // Freestyle wraps failures as InternalErrorError — name the call site. - throw new Error( - `[FreestyleSandboxRunner] vms.create failed for domain=${previewDomain} user=${id.userId} projectRef=${id.projectRef}: ${ - err instanceof Error ? err.message : String(err) - }`, - ); - } - const handle = result.vmId; + // ---- Handle resolution (post-restart) ------------------------------------- + + private async getRecord(handle: string): Promise { + const cached = this.records.get(handle); + if (cached) return cached; + if (!this.stateStore) return null; + const persisted = await this.stateStore.getByHandle(RUNNER_KIND, handle); + if (!persisted) return null; + const state = persisted.state as Partial; + if (!state.vmId || !state.previewDomain || !state.repo) return null; + // Pre-auth row (no token) — caller can't talk to the daemon. + if (!state.daemonToken) return null; + const rec: FreestyleRecord = { + id: persisted.id, + handle: persisted.handle, + vmId: state.vmId, + previewDomain: state.previewDomain, + workdir: state.workdir ?? APP_WORKDIR, + workload: state.workload ?? null, + repo: state.repo, + daemonToken: state.daemonToken, + }; + this.records.set(handle, rec); + return rec; + } + + private async requireRecord(handle: string): Promise { + const rec = await this.getRecord(handle); + if (!rec) throw new Error(`unknown freestyle sandbox handle ${handle}`); + return rec; + } + + // ---- Persistence ---------------------------------------------------------- + + private async persist( + ops: RunnerStateStoreOps | null, + rec: FreestyleRecord, + ): Promise { + if (!ops) return; + const state: PersistedFreestyleState = { + vmId: rec.vmId, + previewDomain: rec.previewDomain, + workdir: rec.workdir, + workload: rec.workload, + repo: rec.repo, + daemonToken: rec.daemonToken, + }; + await ops.put(rec.id, RUNNER_KIND, { handle: rec.handle, state }); + } + + // ---- Helpers -------------------------------------------------------------- + + private toSandbox(rec: FreestyleRecord): Sandbox { return { - handle, - vmId: result.vmId, - previewDomain, - workdir: APP_WORKDIR, - id, - workload, - repo, - daemonToken, + handle: rec.handle, + workdir: rec.workdir, + previewUrl: `https://${rec.previewDomain}`, }; } /** - * Stable per-(userId, projectRef) domain key. 16 hex (64 bits) is - * enough collision resistance for a per-user routing key; old VMs idle - * out via `idleTimeoutSeconds`. + * Stable per-(userId, projectRef) domain key. 16 hex (64 bits) is enough + * collision resistance for a per-user routing key; old VMs idle out via + * `idleTimeoutSeconds`. */ private computeDomainKey(id: SandboxId): string { return createHash("sha256") @@ -503,55 +523,6 @@ export class FreestyleSandboxRunner implements SandboxRunner { : baseSpec; } - private async lookupRecord(handle: string): Promise { - const cached = this.byHandle.get(handle); - if (cached) return cached; - if (!this.stateStore) return null; - const persisted = await this.stateStore.getByHandle(RUNNER_KIND, handle); - if (!persisted) return null; - const state = persisted.state as Partial; - if (!state.vmId || !state.previewDomain || !state.repo) return null; - // Pre-auth row (no token) — caller can't talk to the daemon. Resume will - // return null on a fresh ensure; here we surface null too so proxy paths - // 404 instead of calling with a missing token. - if (!state.daemonToken) return null; - const rec: FreestyleRecord = { - handle: persisted.handle, - vmId: state.vmId, - previewDomain: state.previewDomain, - workdir: state.workdir ?? APP_WORKDIR, - id: persisted.id, - workload: state.workload ?? null, - repo: state.repo, - daemonToken: state.daemonToken, - }; - this.byHandle.set(handle, rec); - return rec; - } - - private async requireRecord(handle: string): Promise { - const rec = await this.lookupRecord(handle); - if (!rec) throw new Error(`unknown freestyle sandbox handle ${handle}`); - return rec; - } - - private async persist( - id: SandboxId, - rec: FreestyleRecord, - store: RunnerStateStoreOps | null, - ): Promise { - if (!store) return; - const state: PersistedFreestyleState = { - vmId: rec.vmId, - previewDomain: rec.previewDomain, - workdir: rec.workdir, - workload: rec.workload, - repo: rec.repo, - daemonToken: rec.daemonToken, - }; - await store.put(id, RUNNER_KIND, { handle: rec.handle, state }); - } - /** Same base64 scheme as `proxyDaemonRequest` — parity with exec path. */ private async postDaemon( rec: FreestyleRecord, @@ -571,10 +542,34 @@ export class FreestyleSandboxRunner implements SandboxRunner { } } +// ---- Helpers ---------------------------------------------------------------- + +/** stop() + delete() a VM; timebound + errors logged, not thrown. */ +async function disposeVm(vmId: string, reason: string): Promise { + try { + const vm = freestyle.vms.ref({ vmId }); + await Promise.race([ + vm.stop().then(() => vm.delete()), + new Promise((_, reject) => + setTimeout( + () => reject(new Error("freestyle vm.delete() timed out")), + DISPOSE_TIMEOUT_MS, + ), + ), + ]); + } catch (err) { + console.error( + `[${LOG_LABEL}] dispose vm ${vmId} (${reason}) failed: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } +} + /** - * Docker `/_daemon/*` → freestyle `/_decopilot_vm/*`. Returns null for - * paths with no freestyle analogue (caller 204s). Exported for tests — - * easiest Docker-vs-Freestyle surface to break silently. + * Docker `/_daemon/*` → freestyle `/_decopilot_vm/*`. Returns null for paths + * with no freestyle analogue (caller 204s). Exported for tests — easiest + * Docker-vs-Freestyle surface to break silently. */ export function translateDaemonPath(path: string): string | null { const stripped = path.replace(/^\/_daemon(?=\/|$)/, "") || "/"; @@ -609,7 +604,7 @@ function deriveRepoLabel(cloneUrl: string): string { } } -/** Mirrors the decode path in `freestyle-daemon-script.ts`'s `parseJsonBody`. */ +/** Mirrors the decode path in `daemon-script.ts`'s `parseJsonBody`. */ function encodeBase64Utf8(text: string): string { return btoa( encodeURIComponent(text).replace(/%([0-9A-F]{2})/g, (_, p1) => diff --git a/packages/mesh-plugin-user-sandbox/server/runner/index.ts b/packages/mesh-plugin-user-sandbox/server/runner/index.ts index 95f5f9d48c..e9b1281d8a 100644 --- a/packages/mesh-plugin-user-sandbox/server/runner/index.ts +++ b/packages/mesh-plugin-user-sandbox/server/runner/index.ts @@ -1,18 +1,20 @@ /** - * Public surface. Ships `DockerSandboxRunner` only — runners with heavy SDKs - * (Freestyle, K8s) live in apps/mesh and slot into `getRunnerByKind`. + * Public surface. Ships `DockerSandboxRunner` only via the default entry; + * Freestyle sits behind its own subpath export (./runner/freestyle) because + * its SDK is heavy and not every deploy needs it. */ import { spawnSync } from "node:child_process"; import { DockerSandboxRunner, type DockerRunnerOptions } from "./docker"; import type { RunnerStateStore } from "./state-store"; -import type { SandboxRunner } from "./types"; +import type { RunnerKind, SandboxRunner } from "./types"; export type { EnsureOptions, ExecInput, ExecOutput, ProxyRequestInit, + RunnerKind, Sandbox, SandboxId, SandboxRunner, @@ -23,12 +25,12 @@ export { DockerSandboxRunner } from "./docker"; export type { DockerExec, DockerRunnerOptions, ExecResult } from "./docker"; export { ensureSandboxImage } from "../image-build"; export type { EnsureImageOptions } from "../image-build"; -export { startLocalSandboxIngress } from "./local-ingress"; +export { startLocalSandboxIngress } from "./docker"; export { sweepDockerOrphansOnBoot, sweepDockerOrphansOnShutdown, -} from "./sweep"; -export type { SweepDockerOrphansOnBootOptions } from "./sweep"; +} from "./docker"; +export type { SweepDockerOrphansOnBootOptions } from "./docker"; export type { RunnerStateRecord, RunnerStateRecordWithId, @@ -43,12 +45,6 @@ export { type ThreadSandboxRefInput, } from "./sandbox-ref"; -/** - * Discriminator used on `SandboxRunner.kind`, `sandbox_runner_state.runner_kind`, - * and `vmMap` entries. Keep in sync with each runner's `readonly kind`. - */ -export type RunnerKind = "docker" | "freestyle"; - export interface CreateDockerRunnerOptions { stateStore?: RunnerStateStore; docker?: Omit; diff --git a/packages/mesh-plugin-user-sandbox/server/runner/shared/dev-server.ts b/packages/mesh-plugin-user-sandbox/server/runner/shared/dev-server.ts new file mode 100644 index 0000000000..66581c15d6 --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/shared/dev-server.ts @@ -0,0 +1,57 @@ +/** + * Dev-server control on the daemon's /_daemon/dev/* endpoints. Fire-and- + * forget start (the daemon is idempotent); best-effort stop before teardown. + * + * Freestyle runs its dev server under systemd; these are not called there. + */ + +import { proxyDaemonRequest } from "../../daemon-client"; +import type { Workload } from "../types"; + +const DEV_START_TIMEOUT_MS = 30_000; + +/** + * Kick the daemon's dev server. When no workload hint is available the daemon + * sniffs runtime/script from the workdir (package.json / deno.json) and picks + * `dev` or `start`. + */ +export function startDevServer( + daemonUrl: string, + token: string, + workload: Workload | null, + logLabel: string, +): void { + const body = workload ? JSON.stringify({ runtime: workload.runtime }) : "{}"; + proxyDaemonRequest(daemonUrl, token, "/_daemon/dev/start", { + method: "POST", + headers: new Headers({ "content-type": "application/json" }), + body, + signal: AbortSignal.timeout(DEV_START_TIMEOUT_MS), + }).catch((err) => { + const isAbort = err instanceof Error && err.name === "AbortError"; + const detail = isAbort + ? `timed out after ${DEV_START_TIMEOUT_MS}ms` + : err instanceof Error + ? err.message + : String(err); + console.error(`[${logLabel}] /dev/start failed: ${detail}`); + }); +} + +export async function stopDevServer( + daemonUrl: string, + token: string, + logLabel: string, +): Promise { + await proxyDaemonRequest(daemonUrl, token, "/_daemon/dev/stop", { + method: "POST", + headers: new Headers(), + body: null, + }).catch((err) => + console.warn( + `[${logLabel}] graceful dev-stop failed: ${ + err instanceof Error ? err.message : String(err) + }`, + ), + ); +} diff --git a/packages/mesh-plugin-user-sandbox/server/runner/shared/handle.ts b/packages/mesh-plugin-user-sandbox/server/runner/shared/handle.ts new file mode 100644 index 0000000000..74a7af82c3 --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/shared/handle.ts @@ -0,0 +1,10 @@ +import { createHash } from "node:crypto"; +import { sandboxIdKey, type SandboxId } from "../types"; + +/** Stable short hash of a SandboxId. Length in hex chars (default 16). */ +export function hashSandboxId(id: SandboxId, length = 16): string { + return createHash("sha256") + .update(sandboxIdKey(id)) + .digest("hex") + .slice(0, length); +} diff --git a/packages/mesh-plugin-user-sandbox/server/runner/shared/index.ts b/packages/mesh-plugin-user-sandbox/server/runner/shared/index.ts new file mode 100644 index 0000000000..2c4b524ce0 --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/shared/index.ts @@ -0,0 +1,5 @@ +export { Inflight } from "./inflight"; +export { withSandboxLock } from "./lock"; +export { hashSandboxId } from "./handle"; +export { applyPreviewPattern } from "./preview-url"; +export { startDevServer, stopDevServer } from "./dev-server"; diff --git a/packages/mesh-plugin-user-sandbox/server/runner/shared/inflight.ts b/packages/mesh-plugin-user-sandbox/server/runner/shared/inflight.ts new file mode 100644 index 0000000000..3b68f3bce0 --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/shared/inflight.ts @@ -0,0 +1,20 @@ +/** + * In-process dedupe: concurrent calls with the same key share one promise. + * Paired with the state store's advisory lock for cross-pod serialization; + * this map only covers intra-process races. + */ +export class Inflight { + private readonly map = new Map>(); + + async run(key: K, fn: () => Promise): Promise { + const pending = this.map.get(key); + if (pending) return pending; + const p = fn(); + this.map.set(key, p); + try { + return await p; + } finally { + this.map.delete(key); + } + } +} diff --git a/packages/mesh-plugin-user-sandbox/server/runner/shared/lock.ts b/packages/mesh-plugin-user-sandbox/server/runner/shared/lock.ts new file mode 100644 index 0000000000..9f3ad39ffe --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/shared/lock.ts @@ -0,0 +1,22 @@ +/** + * Uniform wrapper over the three state-store shapes: + * - no store → pass null ops + * - store without lock → pass the store itself (tests; single-pod dev) + * - store with lock → serialize on (id, kind); pass the lock-scoped ops + * + * The scoped ops reuse the lock's connection so nested reads/writes don't + * starve the main pool during long provisioning. + */ +import type { RunnerStateStore, RunnerStateStoreOps } from "../state-store"; +import type { RunnerKind, SandboxId } from "../types"; + +export function withSandboxLock( + store: RunnerStateStore | null, + id: SandboxId, + kind: RunnerKind, + fn: (ops: RunnerStateStoreOps | null) => Promise, +): Promise { + if (!store) return fn(null); + if (!store.withLock) return fn(store); + return store.withLock(id, kind, fn); +} diff --git a/packages/mesh-plugin-user-sandbox/server/runner/shared/preview-url.ts b/packages/mesh-plugin-user-sandbox/server/runner/shared/preview-url.ts new file mode 100644 index 0000000000..3ac071d2af --- /dev/null +++ b/packages/mesh-plugin-user-sandbox/server/runner/shared/preview-url.ts @@ -0,0 +1,17 @@ +/** + * `{handle}` placeholder substitutes; otherwise hostname-prefix. Trailing + * slash normalized. Invalid URLs fall back to `${base}/${handle}/`. + */ +export function applyPreviewPattern(pattern: string, handle: string): string { + const base = pattern.replace(/\/+$/, ""); + if (base.includes("{handle}")) { + return `${base.replace("{handle}", handle)}/`; + } + try { + const u = new URL(base); + u.hostname = `${handle}.${u.hostname}`; + return `${u.toString()}/`; + } catch { + return `${base}/${handle}/`; + } +} diff --git a/packages/mesh-plugin-user-sandbox/server/runner/types.ts b/packages/mesh-plugin-user-sandbox/server/runner/types.ts index ebeaf1776b..7e6463fd74 100644 --- a/packages/mesh-plugin-user-sandbox/server/runner/types.ts +++ b/packages/mesh-plugin-user-sandbox/server/runner/types.ts @@ -71,12 +71,14 @@ export interface ProxyRequestInit { signal?: AbortSignal; } +/** + * Persisted on `vmMap` and `sandbox_runner_state.runner_kind`. When widening, + * keep `VmMapEntry.runnerKind` in sync. + */ +export type RunnerKind = "docker" | "freestyle"; + export interface SandboxRunner { - /** - * Persisted on `vmMap` and `sandbox_runner_state.runner_kind`. When - * widening, keep `VmMapEntry.runnerKind` + `RunnerKind` (runner/index.ts) in sync. - */ - readonly kind: "docker" | "freestyle"; + readonly kind: RunnerKind; ensure(id: SandboxId, opts?: EnsureOptions): Promise; exec(handle: string, input: ExecInput): Promise;