From 4758a038774baf6389da5c1bfabd72b5e115aa13 Mon Sep 17 00:00:00 2001 From: David Cramer Date: Fri, 5 Jun 2026 05:17:56 +0200 Subject: [PATCH] feat(http): add HTTP interception packages Add engine-neutral HTTP fixture and replay helpers that share the core replay cassette behavior. Add a Vercel Sandbox adapter for forwarded egress requests with direct fixtures, replay fallback, and live fetch routing. Co-Authored-By: OpenAI Codex --- README.md | 12 + docs/architecture.md | 34 + docs/development-guide.md | 13 + docs/testing.md | 9 + packages/docs/astro.config.mjs | 1 + packages/docs/src/content/docs/docs.mdx | 4 + .../content/docs/docs/http-interceptors.mdx | 199 +++++ packages/http-vercel-sandbox/README.md | 46 ++ packages/http-vercel-sandbox/package.json | 41 + .../http-vercel-sandbox/src/index.test.ts | 182 +++++ packages/http-vercel-sandbox/src/index.ts | 404 ++++++++++ packages/http-vercel-sandbox/tsconfig.json | 4 + packages/http-vercel-sandbox/tsup.config.ts | 11 + packages/http/README.md | 59 ++ packages/http/package.json | 41 + packages/http/src/index.test.ts | 311 ++++++++ packages/http/src/index.ts | 706 ++++++++++++++++++ packages/http/tsconfig.json | 4 + packages/http/tsup.config.ts | 11 + packages/vitest-evals/README.md | 7 + packages/vitest-evals/src/replay.ts | 4 + pnpm-lock.yaml | 12 + tsconfig.base.json | 4 + 23 files changed, 2119 insertions(+) create mode 100644 packages/docs/src/content/docs/docs/http-interceptors.mdx create mode 100644 packages/http-vercel-sandbox/README.md create mode 100644 packages/http-vercel-sandbox/package.json create mode 100644 packages/http-vercel-sandbox/src/index.test.ts create mode 100644 packages/http-vercel-sandbox/src/index.ts create mode 100644 packages/http-vercel-sandbox/tsconfig.json create mode 100644 packages/http-vercel-sandbox/tsup.config.ts create mode 100644 packages/http/README.md create mode 100644 packages/http/package.json create mode 100644 packages/http/src/index.test.ts create mode 100644 packages/http/src/index.ts create mode 100644 packages/http/tsconfig.json create mode 100644 packages/http/tsup.config.ts diff --git a/README.md b/README.md index aeae38c..5039de8 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ Monorepo for the explicit-run `vitest-evals` shape: - `packages/vitest-evals`: core suite API, judges, normalized harness/session types, and reporter +- `packages/http`: engine-neutral HTTP interception and replay helpers +- `packages/http-vercel-sandbox`: Vercel Sandbox forwarded-request adapter for + `@vitest-evals/http` - `packages/harness-ai-sdk`: `ai-sdk`-focused harness adapter - `packages/harness-openai-agents`: `@openai/agents`-focused harness adapter - `packages/harness-pi-ai`: `pi-ai`-focused harness adapter with tool replay @@ -33,6 +36,8 @@ Monorepo for the explicit-run `vitest-evals` shape: ```text packages/ vitest-evals/ + http/ + http-vercel-sandbox/ harness-ai-sdk/ harness-openai-agents/ harness-pi-ai/ @@ -251,6 +256,13 @@ otherwise. `record` always calls live and overwrites recordings — use it to refresh fixtures intentionally. Recordings are stored under `.vitest-evals/recordings//`. +For applications that make outbound service calls outside local tool wrappers, +`@vitest-evals/http` exposes an engine-neutral HTTP interceptor primitive. +Vercel Sandbox support lives in `@vitest-evals/http-vercel-sandbox`, while +Docker proxies, MSW servers, Playwright routes, or fetch shims can adapt +traffic into `{ request, upstreamUrl, provider, engine }` and share fixture +chaining plus replay-aware recording. + `pnpm evals` fans out to each workspace package or app that exposes an `evals` script. The shared eval CLI defaults replay to `auto` and writes recordings under `.vitest-evals/recordings`, unless those environment variables are diff --git a/docs/architecture.md b/docs/architecture.md index a820c8c..04d85f7 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -25,8 +25,11 @@ packages/ harness.ts index.ts reporter.ts + replay.ts judges/ legacy/ + http/ + http-vercel-sandbox/ harness-ai-sdk/ harness-openai-agents/ harness-pi-ai/ @@ -67,6 +70,37 @@ tool counts, retries, provider, and model. Provider-specific cost estimates are not normalized because pricing semantics vary by runtime and can be stale; if a harness needs to retain them, store them under `usage.metadata`. +### `packages/http` + +Defines the engine-neutral HTTP interceptor package: + +- `HttpInterceptRequest` +- `HttpInterceptor` +- `createHttpInterceptor(...)` +- `executeHttpWithReplay(...)` +- `createHttpReplayInterceptor(...)` + +Engines such as Docker egress proxies, MSW, Playwright routing, or fetch shims +own the transport-specific work of constructing a Fetch `Request` for the +intended upstream URL. The package owns the fixture chain, deterministic +unhandled responses, and replay-backed request/response cassette behavior. + +HTTP replay uses the same `VITEST_EVALS_REPLAY_MODE` and +`VITEST_EVALS_REPLAY_DIR` settings as tool replay, but it records serialized +HTTP request/response pairs instead of local tool inputs and outputs. + +### `packages/http-vercel-sandbox` + +Adapts Vercel Sandbox forwarded HTTP requests into `@vitest-evals/http`: + +- validates forwarded host, scheme, port, and path headers +- strips Vercel proxy-only and hop-by-hop headers from the upstream request +- applies app-owned credential/header transforms +- routes traffic through interceptors before optional live fetch fallback + +It intentionally does not own Vercel OIDC verification, requester +authorization, credential issuance, or sandbox network policy. + ### `packages/vitest-evals/src/index.ts` Defines the harness-first public API: diff --git a/docs/development-guide.md b/docs/development-guide.md index 8b962e3..9a1f683 100644 --- a/docs/development-guide.md +++ b/docs/development-guide.md @@ -32,6 +32,7 @@ The repository is now harness-first: When changing behavior, decide first which surface you are touching: - root harness/judge API +- HTTP interception/replay packages - reporter output - GitHub JSON post-processing output - a first-party harness package @@ -59,6 +60,18 @@ Owns: - reporter integration - legacy compatibility exports +### `packages/http` + +Owns engine-neutral HTTP interception and request/response replay. It depends +on the core replay primitive but stays outside the root package so engine +adapters can evolve independently. + +### `packages/http-vercel-sandbox` + +Owns Vercel Sandbox forwarded-request adaptation for `@vitest-evals/http`. +Keep Vercel-specific forwarded header parsing here, not in core and not in the +engine-neutral HTTP package. + ### `packages/harness-ai-sdk` Owns: diff --git a/docs/testing.md b/docs/testing.md index 66d4c7c..e2b3f9e 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -53,6 +53,15 @@ Cover: - matcher behavior for `toSatisfyJudge(...)` - any task metadata the reporter depends on +### HTTP Interceptor Package Changes + +Cover: + +- interceptor chaining and pass-through behavior +- request/response serialization and redaction +- replay modes, cache keys, and recording metadata +- engine adapter assumptions such as cloned request bodies + ### Reporter Changes Cover: diff --git a/packages/docs/astro.config.mjs b/packages/docs/astro.config.mjs index 29175c0..7e396f0 100644 --- a/packages/docs/astro.config.mjs +++ b/packages/docs/astro.config.mjs @@ -53,6 +53,7 @@ export default defineConfig({ ], }, { label: "Tool Replay", link: "/docs/tool-replay" }, + { label: "HTTP Interceptors", link: "/docs/http-interceptors" }, { label: "GitHub Reporting", link: "/docs/github" }, ], }, diff --git a/packages/docs/src/content/docs/docs.mdx b/packages/docs/src/content/docs/docs.mdx index a13bb34..19e1ca2 100644 --- a/packages/docs/src/content/docs/docs.mdx +++ b/packages/docs/src/content/docs/docs.mdx @@ -89,6 +89,10 @@ keeps the documentation focused on the adapter boundary: Tool Replay Record deterministic tool calls without hiding model behavior. + + HTTP Interceptors + Mock, record, or replay outbound HTTP through sandbox, proxy, or browser engines. + GitHub Reporting Publish eval summaries and checks from workflow JSON output. diff --git a/packages/docs/src/content/docs/docs/http-interceptors.mdx b/packages/docs/src/content/docs/docs/http-interceptors.mdx new file mode 100644 index 0000000..daffd9b --- /dev/null +++ b/packages/docs/src/content/docs/docs/http-interceptors.mdx @@ -0,0 +1,199 @@ +--- +title: HTTP Interceptors +description: Intercept, mock, record, or replay outbound HTTP through any engine that can expose Fetch requests. +editUrl: false +--- + +HTTP interceptors are the request-level sibling of tool replay. Use them when +an eval exercises a full application or sandbox where outbound service calls do +not pass through a local tool wrapper. + +The primitive is engine-neutral: Vercel Sandbox forwarding, a Docker egress +proxy, MSW, Playwright routing, or a fetch shim can all adapt outbound traffic +into the same `HttpInterceptRequest` shape. + +## Interceptor Shape + +An interceptor receives the upstream `Request`, the original `URL`, and optional +provider/engine labels. Return a `Response` to handle the request, or +`undefined` to let the next interceptor or live transport decide. + +```ts title="evals/interceptHttp.ts" +import { + createHttpFixtureInterceptor, + createHttpInterceptor, + httpFixture, + unhandledHttpResponse, + type HttpInterceptRequest, +} from "@vitest-evals/http"; + +async function githubFixture( + input: HttpInterceptRequest, +): Promise { + if ( + input.provider !== "github" || + input.upstreamUrl.hostname !== "api.github.com" + ) { + return undefined; + } + + if (input.request.method === "GET" && input.upstreamUrl.pathname === "/user") { + return Response.json({ login: "eval-user" }); + } + + return new Response("missing GitHub fixture\n", { status: 501 }); +} + +export const interceptHttp = createHttpInterceptor([githubFixture], { + unhandled: unhandledHttpResponse, +}); +``` + +The composer clones the request before each interceptor so one fixture can +inspect a body without consuming it for the next fixture. + +Use `createHttpFixtureInterceptor()` when direct fixture injection should be +declarative: + +```ts title="evals/interceptHttp.ts" +const staticFixtures = createHttpFixtureInterceptor([ + httpFixture.get("/health", Response.json({ ok: true })), + httpFixture.post( + { + hostname: "api.github.com", + pathname: "/graphql", + }, + async (input) => + Response.json({ + data: { + request: await input.request.json(), + viewer: { login: "eval-user" }, + }, + }), + ), +]); +``` + +## Replay HTTP + +Use `createHttpReplayInterceptor()` when the request should be recorded on a +cache miss and replayed on later runs. It uses the same +`VITEST_EVALS_REPLAY_MODE` and `VITEST_EVALS_REPLAY_DIR` settings as tool +replay. + +```ts title="evals/interceptHttp.ts" +import { + createHttpInterceptor, + createHttpReplayInterceptor, + httpFixture, +} from "@vitest-evals/http"; + +export const interceptHttp = createHttpInterceptor([ + staticFixtures, + githubFixture, + createHttpReplayInterceptor({ + name: "sandbox-egress", + replay: { + version: "v1", + key: (request) => ({ + method: request.method, + url: request.url, + body: request.body ?? null, + }), + }, + }), +]); +``` + +Direct fixtures should usually come before replay so hand-authored responses +win and the remaining traffic records or replays. By default, HTTP replay keys +on method, URL, and body. Recordings include +request and response headers and bodies, with common sensitive headers such as +`authorization`, `cookie`, and `set-cookie` redacted. Use `sanitize` for body +redaction or fixture-specific response trimming. Use `redactHeaders` when a +contract test needs a narrower header redaction list, or `redactHeaders: false` +when the cassette is already fully sanitized elsewhere. + +## Vercel Sandbox + +Use `@vitest-evals/http-vercel-sandbox` when Vercel Sandbox forwards egress +traffic back to your app. The adapter parses Vercel's forwarded headers, +reconstructs the intended upstream `Request`, strips proxy-only headers, and +then calls the generic HTTP interceptor chain. + +```ts title="api/internal/sandbox-egress.ts" +import { + createHttpInterceptor, + createHttpReplayInterceptor, +} from "@vitest-evals/http"; +import { proxyVercelSandboxHttp } from "@vitest-evals/http-vercel-sandbox"; + +const interceptHttp = createHttpInterceptor([ + createHttpReplayInterceptor({ + name: "sandbox-egress", + replay: { + version: "v1", + }, + }), +]); + +export async function ALL(request: Request): Promise { + await verifyVercelOidc(request); + + return await proxyVercelSandboxHttp(request, { + fixtures: [ + httpFixture.get("/health", Response.json({ ok: true })), + ], + interceptHttp, + provider: ({ upstreamUrl }) => providerForHost(upstreamUrl.hostname), + headers: ({ provider, upstreamUrl }) => + credentialHeadersFor(provider, upstreamUrl), + }); +} +``` + +Your app still owns OIDC verification, requester authorization, credential +issuance, and sandbox network policy. The package only adapts Vercel's +forwarded request format into `@vitest-evals/http`. + +## Other Engine Adapters + +Engine adapters should keep their own transport responsibilities. They should +only call the interceptor after they have reconstructed the upstream request +the app intended to make. + +```ts title="sandbox-egress.ts" +import { interceptHttp } from "./evals/interceptHttp"; + +export async function handleSandboxEgress(proxyRequest: Request) { + const upstreamUrl = forwardedUrlFromEngine(proxyRequest); + const provider = providerForHost(upstreamUrl.hostname); + const body = + proxyRequest.body && + proxyRequest.method !== "GET" && + proxyRequest.method !== "HEAD" + ? await proxyRequest.arrayBuffer() + : undefined; + const request = new Request(upstreamUrl, { + method: proxyRequest.method, + headers: credentialHeadersFor(provider, upstreamUrl), + ...(body ? { body } : {}), + }); + + const intercepted = await interceptHttp({ + engine: "vercel-sandbox", + provider, + request, + upstreamUrl, + }); + if (intercepted) { + return intercepted; + } + + return fetch(request); +} +``` + +That keeps Vercel-specific forwarded headers, Docker proxy routing, browser +route APIs, credential injection, and live network policy outside +`vitest-evals`, while still sharing fixture and replay behavior across engines. diff --git a/packages/http-vercel-sandbox/README.md b/packages/http-vercel-sandbox/README.md new file mode 100644 index 0000000..0218b34 --- /dev/null +++ b/packages/http-vercel-sandbox/README.md @@ -0,0 +1,46 @@ +# @vitest-evals/http-vercel-sandbox + +Vercel Sandbox HTTP adapter for `@vitest-evals/http`. + +## Install + +```sh +npm install -D vitest-evals @vitest-evals/http @vitest-evals/http-vercel-sandbox +``` + +## Usage + +```ts +import { + createHttpInterceptor, + createHttpReplayInterceptor, + httpFixture, +} from "@vitest-evals/http"; +import { proxyVercelSandboxHttp } from "@vitest-evals/http-vercel-sandbox"; + +const interceptHttp = createHttpInterceptor([ + createHttpReplayInterceptor({ + name: "sandbox-egress", + replay: true, + }), +]); + +export async function ALL(request: Request): Promise { + return await proxyVercelSandboxHttp(request, { + fixtures: [ + httpFixture.get("/health", Response.json({ ok: true })), + ], + interceptHttp, + provider: ({ upstreamUrl }) => providerForHost(upstreamUrl.hostname), + headers: ({ provider, upstreamUrl }) => + credentialHeadersFor(provider, upstreamUrl), + }); +} +``` + +This package reconstructs Vercel Sandbox forwarded requests into the generic +`HttpInterceptRequest` shape. Your app remains responsible for OIDC +verification, requester authorization, and credential policy. + +Direct `fixtures` run before `interceptHttp`, so Vercel Sandbox evals can mix +hand-authored responses with record/replay fallback. diff --git a/packages/http-vercel-sandbox/package.json b/packages/http-vercel-sandbox/package.json new file mode 100644 index 0000000..c5ef2bb --- /dev/null +++ b/packages/http-vercel-sandbox/package.json @@ -0,0 +1,41 @@ +{ + "name": "@vitest-evals/http-vercel-sandbox", + "version": "0.11.0", + "description": "Vercel Sandbox HTTP adapter for vitest-evals.", + "repository": { + "type": "git", + "url": "git+https://github.com/getsentry/vitest-evals.git", + "directory": "packages/http-vercel-sandbox" + }, + "author": "David Cramer", + "license": "Apache-2.0", + "bugs": { + "url": "https://github.com/getsentry/vitest-evals/issues" + }, + "homepage": "https://github.com/getsentry/vitest-evals/tree/main/packages/http-vercel-sandbox#readme", + "sideEffects": false, + "types": "./dist/index.d.ts", + "main": "./dist/index.js", + "module": "./dist/index.mjs", + "files": ["dist"], + "publishConfig": { + "access": "public" + }, + "exports": { + ".": { + "source": "./src/index.ts", + "types": "./dist/index.d.ts", + "require": "./dist/index.js", + "import": "./dist/index.mjs" + } + }, + "scripts": { + "build": "tsup --config ./tsup.config.ts" + }, + "peerDependencies": { + "@vitest-evals/http": "*" + }, + "devDependencies": { + "@vitest-evals/http": "workspace:*" + } +} diff --git a/packages/http-vercel-sandbox/src/index.test.ts b/packages/http-vercel-sandbox/src/index.test.ts new file mode 100644 index 0000000..e554b4e --- /dev/null +++ b/packages/http-vercel-sandbox/src/index.test.ts @@ -0,0 +1,182 @@ +import { createHttpInterceptor } from "@vitest-evals/http"; +import { expect, test, vi } from "vitest"; +import { + createVercelSandboxHttpInterceptRequest, + createVercelSandboxUpstreamUrl, + interceptVercelSandboxHttp, + isVercelSandboxForwardedRequest, + proxyVercelSandboxHttp, + VERCEL_FORWARDED_HOST_HEADER, + VERCEL_FORWARDED_PATH_HEADER, + VERCEL_FORWARDED_PORT_HEADER, + VERCEL_FORWARDED_SCHEME_HEADER, + VERCEL_SANDBOX_OIDC_TOKEN_HEADER, + VercelSandboxForwardedRequestError, +} from "./index"; + +function forwardedRequest( + options: { + body?: BodyInit; + headers?: Record; + method?: string; + path?: string; + } = {}, +) { + return new Request("https://app.example.test/api/internal/sandbox-egress", { + method: options.method ?? (options.body ? "POST" : "GET"), + ...(options.body ? { body: options.body } : {}), + headers: { + [VERCEL_FORWARDED_HOST_HEADER]: "API.Example.Test", + [VERCEL_FORWARDED_SCHEME_HEADER]: "https", + [VERCEL_FORWARDED_PATH_HEADER]: options.path ?? "/v1/search?q=first", + [VERCEL_SANDBOX_OIDC_TOKEN_HEADER]: "signed-vercel-token", + "content-type": "application/json", + connection: "keep-alive", + host: "app.example.test", + ...(options.headers ?? {}), + }, + }); +} + +test("detects Vercel Sandbox forwarded requests", () => { + expect(isVercelSandboxForwardedRequest(forwardedRequest())).toBe(true); + expect( + isVercelSandboxForwardedRequest(new Request("https://example.test")), + ).toBe(false); +}); + +test("creates upstream URLs from Vercel forwarded headers", () => { + const request = forwardedRequest({ + headers: { + [VERCEL_FORWARDED_PORT_HEADER]: "8443", + }, + path: "/v1/repos?query=first", + }); + + expect(createVercelSandboxUpstreamUrl(request).toString()).toBe( + "https://api.example.test:8443/v1/repos?query=first", + ); +}); + +test("rejects unsafe forwarded paths", () => { + expect(() => + createVercelSandboxUpstreamUrl(forwardedRequest({ path: "//evil.test" })), + ).toThrow(VercelSandboxForwardedRequestError); +}); + +test("normalizes Vercel Sandbox requests into generic HTTP intercept requests", async () => { + const input = await createVercelSandboxHttpInterceptRequest( + forwardedRequest({ + body: JSON.stringify({ query: "first" }), + }), + { + provider: ({ upstreamUrl }) => + upstreamUrl.hostname === "api.example.test" ? "example" : undefined, + headers: ({ headers, provider }) => { + expect(headers.get("content-type")).toBe("application/json"); + expect(headers.get(VERCEL_SANDBOX_OIDC_TOKEN_HEADER)).toBeNull(); + return { + authorization: `Bearer ${provider}-token`, + }; + }, + metadata: { + scenario: "search", + }, + }, + ); + + expect(input.engine).toBe("vercel-sandbox"); + expect(input.provider).toBe("example"); + expect(input.upstreamUrl.toString()).toBe( + "https://api.example.test/v1/search?q=first", + ); + expect(input.metadata).toMatchObject({ + "vercel.forwarded_host": "api.example.test", + "vercel.forwarded_path": "/v1/search?q=first", + scenario: "search", + }); + expect(input.request.headers.get("authorization")).toBe( + "Bearer example-token", + ); + expect( + input.request.headers.get(VERCEL_SANDBOX_OIDC_TOKEN_HEADER), + ).toBeNull(); + expect(input.request.headers.get("connection")).toBeNull(); + await expect(input.request.json()).resolves.toEqual({ query: "first" }); +}); + +test("routes Vercel Sandbox requests through HTTP interceptors", async () => { + const interceptHttp = createHttpInterceptor([ + async (input) => { + expect(input.engine).toBe("vercel-sandbox"); + return Response.json({ ok: true }); + }, + ]); + + const response = await interceptVercelSandboxHttp(forwardedRequest(), { + interceptHttp, + provider: "example", + }); + + expect(response?.status).toBe(200); + await expect(response?.json()).resolves.toEqual({ ok: true }); +}); + +test("routes Vercel Sandbox requests through direct fixtures", async () => { + const response = await interceptVercelSandboxHttp(forwardedRequest(), { + provider: "example", + fixtures: [ + { + name: "search", + match: (input) => + input.provider === "example" && + input.upstreamUrl.pathname === "/v1/search", + response: Response.json({ fixture: true }), + }, + ], + }); + + expect(response?.status).toBe(200); + await expect(response?.json()).resolves.toEqual({ fixture: true }); +}); + +test("checks direct fixtures before fallback interceptors", async () => { + const interceptHttp = vi.fn(async () => Response.json({ replay: true })); + + const response = await interceptVercelSandboxHttp(forwardedRequest(), { + interceptHttp, + fixtures: [ + { + match: () => true, + response: Response.json({ fixture: true }), + }, + ], + }); + + await expect(response?.json()).resolves.toEqual({ fixture: true }); + expect(interceptHttp).not.toHaveBeenCalled(); +}); + +test("proxies Vercel Sandbox requests to live fetch when no interceptor handles them", async () => { + const fetchHttp = vi.fn(async (request: Request) => { + expect(request.url).toBe("https://api.example.test/v1/search?q=first"); + return new Response("live", { + status: 203, + headers: { + "content-length": "4", + "x-request-id": "req_123", + }, + }); + }); + + const response = await proxyVercelSandboxHttp(forwardedRequest(), { + interceptHttp: async () => undefined, + fetch: fetchHttp as typeof fetch, + }); + + expect(response.status).toBe(203); + expect(response.headers.get("content-length")).toBeNull(); + expect(response.headers.get("x-request-id")).toBe("req_123"); + await expect(response.text()).resolves.toBe("live"); + expect(fetchHttp).toHaveBeenCalledTimes(1); +}); diff --git a/packages/http-vercel-sandbox/src/index.ts b/packages/http-vercel-sandbox/src/index.ts new file mode 100644 index 0000000..ff888b8 --- /dev/null +++ b/packages/http-vercel-sandbox/src/index.ts @@ -0,0 +1,404 @@ +import { + createHttpFixtureInterceptor, + type HttpFixture, + type HttpInterceptRequest, + type HttpInterceptor, +} from "@vitest-evals/http"; + +type MaybePromise = T | Promise; + +const HOP_BY_HOP_HEADERS = new Set([ + "connection", + "host", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailer", + "transfer-encoding", + "upgrade", +]); + +const DECODED_RESPONSE_HEADERS = new Set([ + "content-encoding", + "content-length", +]); + +/** Header Vercel Sandbox sends with the upstream target host. */ +export const VERCEL_FORWARDED_HOST_HEADER = "vercel-forwarded-host"; + +/** Header Vercel Sandbox sends with the upstream target scheme. */ +export const VERCEL_FORWARDED_SCHEME_HEADER = "vercel-forwarded-scheme"; + +/** Header Vercel Sandbox sends with the upstream target port when present. */ +export const VERCEL_FORWARDED_PORT_HEADER = "vercel-forwarded-port"; + +/** Header Vercel Sandbox sends with the upstream target path and query. */ +export const VERCEL_FORWARDED_PATH_HEADER = "vercel-forwarded-path"; + +/** Header Vercel Sandbox sends with the sandbox OIDC token. */ +export const VERCEL_SANDBOX_OIDC_TOKEN_HEADER = "vercel-sandbox-oidc-token"; + +/** Headers used only by Vercel's forwarding layer, not the upstream request. */ +export const VERCEL_SANDBOX_PROXY_HEADERS = [ + VERCEL_FORWARDED_HOST_HEADER, + VERCEL_FORWARDED_SCHEME_HEADER, + VERCEL_FORWARDED_PORT_HEADER, + VERCEL_FORWARDED_PATH_HEADER, + VERCEL_SANDBOX_OIDC_TOKEN_HEADER, +] as const; + +type AdapterContext = { + request: Request; + upstreamUrl: URL; + provider?: string; + headers: Headers; +}; + +/** Provider label or resolver for a Vercel Sandbox upstream request. */ +export type VercelSandboxHttpProvider = + | string + | ((input: { + request: Request; + upstreamUrl: URL; + }) => MaybePromise); + +/** Header overrides or resolver for a Vercel Sandbox upstream request. */ +export type VercelSandboxHttpHeaders = + | HeadersInit + | ((input: AdapterContext) => MaybePromise); + +/** Metadata or resolver attached to the generic HTTP intercept request. */ +export type VercelSandboxHttpMetadata = + | Record + | (( + input: AdapterContext, + ) => MaybePromise< + Record | undefined + >); + +/** Options for adapting Vercel Sandbox forwarded requests to HTTP intercepts. */ +export interface CreateVercelSandboxHttpInterceptRequestOptions { + /** Provider label used by fixtures and recordings. */ + provider?: VercelSandboxHttpProvider; + /** Headers to set after copied proxy headers are stripped. */ + headers?: VercelSandboxHttpHeaders; + /** Extra metadata attached to HTTP recordings. */ + metadata?: VercelSandboxHttpMetadata; +} + +/** Shared options for routing a Vercel Sandbox request through HTTP fixtures/interceptors. */ +export interface VercelSandboxHttpRoutingOptions + extends CreateVercelSandboxHttpInterceptRequestOptions { + /** Direct request/response fixtures checked before `interceptHttp`. */ + fixtures?: readonly HttpFixture[]; + /** HTTP interceptor chain created by `@vitest-evals/http`. */ + interceptHttp?: HttpInterceptor; +} + +/** Options for routing a Vercel Sandbox forwarded request through interceptors. */ +export interface InterceptVercelSandboxHttpOptions + extends VercelSandboxHttpRoutingOptions {} + +/** Options for proxying a Vercel Sandbox request with live-fetch fallback. */ +export interface ProxyVercelSandboxHttpOptions + extends VercelSandboxHttpRoutingOptions { + /** Live fetch implementation used when no interceptor handles the request. */ + fetch?: typeof fetch; +} + +/** Error thrown when Vercel forwarded headers cannot form a safe upstream URL. */ +export class VercelSandboxForwardedRequestError extends Error { + /** HTTP status code suitable for a proxy handler response. */ + status: number; + + constructor(message: string, status = 400) { + super(message); + this.name = "VercelSandboxForwardedRequestError"; + this.status = status; + } +} + +/** Return whether a request carries Vercel Sandbox forwarded HTTP headers. */ +export function isVercelSandboxForwardedRequest(request: Request): boolean { + return Boolean( + request.headers.get(VERCEL_SANDBOX_OIDC_TOKEN_HEADER)?.trim() && + request.headers.get(VERCEL_FORWARDED_HOST_HEADER)?.trim() && + request.headers.get(VERCEL_FORWARDED_SCHEME_HEADER)?.trim() && + request.headers.get(VERCEL_FORWARDED_PATH_HEADER)?.trim(), + ); +} + +/** Convert a Vercel Sandbox forwarded request into a generic HTTP intercept request. */ +export async function createVercelSandboxHttpInterceptRequest( + request: Request, + options: CreateVercelSandboxHttpInterceptRequestOptions = {}, +): Promise { + const upstreamUrl = createVercelSandboxUpstreamUrl(request); + const provider = await resolveProvider(options.provider, { + request, + upstreamUrl, + }); + const baseHeaders = copyRequestHeaders(request); + const adapterContext = { + request, + upstreamUrl, + provider, + headers: new Headers(baseHeaders), + }; + const overrides = await resolveHeaders(options.headers, adapterContext); + if (overrides) { + new Headers(overrides).forEach((value, key) => { + baseHeaders.set(key, value); + }); + } + const body = await requestBodyBytes(request); + const metadata = await resolveMetadata(options.metadata, adapterContext); + + return { + request: new Request(upstreamUrl, { + method: request.method, + headers: baseHeaders, + ...(body ? { body } : {}), + }), + upstreamUrl, + engine: "vercel-sandbox", + ...(provider ? { provider } : {}), + metadata: { + "vercel.forwarded_host": upstreamUrl.hostname, + "vercel.forwarded_path": `${upstreamUrl.pathname}${upstreamUrl.search}`, + ...(metadata ?? {}), + }, + }; +} + +/** Route a Vercel Sandbox forwarded request through an HTTP interceptor chain. */ +export async function interceptVercelSandboxHttp( + request: Request, + options: InterceptVercelSandboxHttpOptions, +): Promise { + return await interceptVercelSandboxInput( + await createVercelSandboxHttpInterceptRequest(request, options), + options, + ); +} + +/** Proxy a Vercel Sandbox forwarded request through interceptors before live fetch. */ +export async function proxyVercelSandboxHttp( + request: Request, + options: ProxyVercelSandboxHttpOptions = {}, +): Promise { + const input = await createVercelSandboxHttpInterceptRequest(request, options); + const intercepted = await interceptVercelSandboxInput(input, options); + if (intercepted) { + return intercepted; + } + + const upstream = await (options.fetch ?? fetch)(input.request); + return new Response(upstream.body, { + status: upstream.status, + statusText: upstream.statusText, + headers: responseHeaders(upstream), + }); +} + +async function interceptVercelSandboxInput( + input: HttpInterceptRequest, + options: VercelSandboxHttpRoutingOptions, +): Promise { + if (options.fixtures) { + const response = await createHttpFixtureInterceptor(options.fixtures)( + cloneHttpInterceptRequest(input), + ); + if (response) { + return response; + } + } + + return await options.interceptHttp?.(cloneHttpInterceptRequest(input)); +} + +function cloneHttpInterceptRequest( + input: HttpInterceptRequest, +): HttpInterceptRequest { + return { + request: input.request.clone(), + upstreamUrl: new URL(input.upstreamUrl.toString()), + ...(input.provider ? { provider: input.provider } : {}), + ...(input.engine ? { engine: input.engine } : {}), + ...(input.metadata ? { metadata: { ...input.metadata } } : {}), + }; +} + +/** Create the upstream URL carried by Vercel Sandbox forwarding headers. */ +export function createVercelSandboxUpstreamUrl(request: Request): URL { + const forwardedHost = requiredHeader(request, VERCEL_FORWARDED_HOST_HEADER); + const host = normalizeHost(forwardedHost); + if (!host) { + throw new VercelSandboxForwardedRequestError("Invalid forwarded host"); + } + + const forwardedScheme = requiredHeader( + request, + VERCEL_FORWARDED_SCHEME_HEADER, + ); + const scheme = normalizeScheme(forwardedScheme); + if (!scheme) { + throw new VercelSandboxForwardedRequestError( + "Forwarded scheme must be https", + ); + } + + const forwardedPort = request.headers.get(VERCEL_FORWARDED_PORT_HEADER); + const port = normalizePort(forwardedPort); + if (forwardedPort && !port) { + throw new VercelSandboxForwardedRequestError("Invalid forwarded port"); + } + + const path = normalizeForwardedPath( + requiredHeader(request, VERCEL_FORWARDED_PATH_HEADER), + ); + + return new URL(`${scheme}://${host}${port ? `:${port}` : ""}${path}`); +} + +function requiredHeader(request: Request, name: string): string { + const value = request.headers.get(name)?.trim(); + if (!value) { + throw new VercelSandboxForwardedRequestError(`Missing ${name}`); + } + return value; +} + +function normalizeHost(value: string): string | undefined { + const trimmed = value.trim().toLowerCase(); + if ( + !trimmed || + trimmed.includes("/") || + trimmed.includes("\\") || + trimmed.includes(":") + ) { + return undefined; + } + return trimmed.replace(/\.$/, ""); +} + +function normalizeScheme(value: string): "https" | undefined { + return value.trim().toLowerCase() === "https" ? "https" : undefined; +} + +function normalizePort(value: string | null): string | undefined { + if (!value) { + return undefined; + } + + const trimmed = value.trim(); + if (!/^\d{1,5}$/.test(trimmed)) { + return undefined; + } + + const port = Number.parseInt(trimmed, 10); + return port >= 1 && port <= 65_535 ? trimmed : undefined; +} + +function normalizeForwardedPath(value: string): string { + const trimmed = value.trim(); + if ( + !trimmed.startsWith("/") || + trimmed.startsWith("//") || + trimmed.includes("#") || + /[\r\n]/.test(trimmed) + ) { + throw new VercelSandboxForwardedRequestError("Invalid forwarded path"); + } + + try { + const url = new URL(trimmed, "https://vitest-evals-forwarded.local"); + return `${url.pathname}${url.search}`; + } catch { + throw new VercelSandboxForwardedRequestError("Invalid forwarded path"); + } +} + +function copyRequestHeaders(request: Request): Headers { + const headers = new Headers(); + request.headers.forEach((value, key) => { + const normalized = key.toLowerCase(); + if ( + HOP_BY_HOP_HEADERS.has(normalized) || + VERCEL_SANDBOX_PROXY_HEADERS.includes( + normalized as (typeof VERCEL_SANDBOX_PROXY_HEADERS)[number], + ) + ) { + return; + } + headers.append(key, value); + }); + return headers; +} + +async function requestBodyBytes( + request: Request, +): Promise { + if ( + request.method === "GET" || + request.method === "HEAD" || + request.body === null + ) { + return undefined; + } + return await request.clone().arrayBuffer(); +} + +function responseHeaders(upstream: Response): Headers { + const headers = new Headers(); + upstream.headers.forEach((value, key) => { + const normalized = key.toLowerCase(); + if ( + !HOP_BY_HOP_HEADERS.has(normalized) && + !DECODED_RESPONSE_HEADERS.has(normalized) + ) { + headers.append(key, value); + } + }); + return headers; +} + +async function resolveProvider( + provider: VercelSandboxHttpProvider | undefined, + input: { request: Request; upstreamUrl: URL }, +): Promise { + if (typeof provider === "function") { + return await provider(input); + } + return provider; +} + +async function resolveHeaders( + headers: VercelSandboxHttpHeaders | undefined, + input: AdapterContext, +): Promise { + if (typeof headers === "function") { + return await headers(input); + } + return headers; +} + +async function resolveMetadata( + metadata: VercelSandboxHttpMetadata | undefined, + input: AdapterContext, +): Promise | undefined> { + const resolved = + typeof metadata === "function" ? await metadata(input) : metadata; + if (!resolved) { + return undefined; + } + + const normalized: Record = {}; + for (const [key, value] of Object.entries(resolved)) { + if (value !== undefined) { + normalized[key] = value; + } + } + return normalized; +} diff --git a/packages/http-vercel-sandbox/tsconfig.json b/packages/http-vercel-sandbox/tsconfig.json new file mode 100644 index 0000000..9e25e6e --- /dev/null +++ b/packages/http-vercel-sandbox/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.base.json", + "include": ["src/**/*.ts"] +} diff --git a/packages/http-vercel-sandbox/tsup.config.ts b/packages/http-vercel-sandbox/tsup.config.ts new file mode 100644 index 0000000..8995989 --- /dev/null +++ b/packages/http-vercel-sandbox/tsup.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: ["src/**/*.ts", "!src/**/*.test.ts", "!src/**/*.test.*.ts"], + format: ["cjs", "esm"], + dts: true, + splitting: false, + sourcemap: true, + clean: true, + external: ["@vitest-evals/http"], +}); diff --git a/packages/http/README.md b/packages/http/README.md new file mode 100644 index 0000000..7787400 --- /dev/null +++ b/packages/http/README.md @@ -0,0 +1,59 @@ +# @vitest-evals/http + +HTTP interception and replay helpers for `vitest-evals`. + +## Install + +```sh +npm install -D vitest-evals @vitest-evals/http +``` + +## Usage + +```ts +import { + createHttpFixtureInterceptor, + createHttpInterceptor, + createHttpReplayInterceptor, + httpFixture, + unhandledHttpResponse, +} from "@vitest-evals/http"; + +export const interceptHttp = createHttpInterceptor( + [ + createHttpFixtureInterceptor([ + httpFixture.get("/health", Response.json({ ok: true })), + httpFixture.post( + { + hostname: "api.github.com", + pathname: "/graphql", + }, + async (input) => + Response.json({ + data: { + request: await input.request.json(), + viewer: { login: "eval-user" }, + }, + }), + ), + ]), + createHttpReplayInterceptor({ + name: "sandbox-egress", + replay: true, + }), + ], + { + unhandled: unhandledHttpResponse, + }, +); +``` + +Use this package when eval traffic leaves the process through HTTP instead of a +local tool wrapper. It shares the same replay modes, cache keys, versions, and +sanitization model as tool replay. + +Compose direct fixtures before replay when some endpoints should always use +hand-authored responses and the rest should record/replay. + +For Vercel Sandbox forwarded egress traffic, use +`@vitest-evals/http-vercel-sandbox` with this package. diff --git a/packages/http/package.json b/packages/http/package.json new file mode 100644 index 0000000..4716a2b --- /dev/null +++ b/packages/http/package.json @@ -0,0 +1,41 @@ +{ + "name": "@vitest-evals/http", + "version": "0.11.0", + "description": "HTTP interception and replay helpers for vitest-evals.", + "repository": { + "type": "git", + "url": "git+https://github.com/getsentry/vitest-evals.git", + "directory": "packages/http" + }, + "author": "David Cramer", + "license": "Apache-2.0", + "bugs": { + "url": "https://github.com/getsentry/vitest-evals/issues" + }, + "homepage": "https://github.com/getsentry/vitest-evals/tree/main/packages/http#readme", + "sideEffects": false, + "types": "./dist/index.d.ts", + "main": "./dist/index.js", + "module": "./dist/index.mjs", + "files": ["dist"], + "publishConfig": { + "access": "public" + }, + "exports": { + ".": { + "source": "./src/index.ts", + "types": "./dist/index.d.ts", + "require": "./dist/index.js", + "import": "./dist/index.mjs" + } + }, + "scripts": { + "build": "tsup --config ./tsup.config.ts" + }, + "peerDependencies": { + "vitest-evals": "*" + }, + "devDependencies": { + "vitest-evals": "workspace:*" + } +} diff --git a/packages/http/src/index.test.ts b/packages/http/src/index.test.ts new file mode 100644 index 0000000..9413265 --- /dev/null +++ b/packages/http/src/index.test.ts @@ -0,0 +1,311 @@ +import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { afterEach, expect, test, vi } from "vitest"; +import { + createHttpFixtureInterceptor, + createHttpInterceptor, + createHttpReplayInterceptor, + executeHttpWithReplay, + getReplayMetadataFromHttpResponse, + httpFixture, + unhandledHttpResponse, + type HttpInterceptRequest, + type HttpRecording, +} from "./index"; + +let replayDir: string | undefined; + +afterEach(() => { + vi.unstubAllEnvs(); + if (replayDir) { + rmSync(replayDir, { recursive: true, force: true }); + replayDir = undefined; + } +}); + +function httpInput() { + const upstreamUrl = new URL("https://api.example.test/v1/search?q=first"); + return { + provider: "example", + engine: "unit-test", + upstreamUrl, + request: new Request(upstreamUrl, { + method: "POST", + headers: { + authorization: "Bearer secret", + "content-type": "application/json", + }, + body: JSON.stringify({ query: "first" }), + }), + metadata: { + scenario: "search", + }, + }; +} + +test("composes HTTP interceptors and gives each one a fresh request clone", async () => { + const first = vi.fn(async (input: HttpInterceptRequest) => { + await input.request.text(); + return undefined; + }); + const second = vi.fn(async (input: HttpInterceptRequest) => { + return Response.json({ + provider: input.provider, + body: await input.request.json(), + }); + }); + const interceptHttp = createHttpInterceptor([first, second], { + unhandled: unhandledHttpResponse, + }); + + const response = await interceptHttp(httpInput()); + + expect(response?.status).toBe(200); + await expect(response?.json()).resolves.toEqual({ + provider: "example", + body: { query: "first" }, + }); + expect(first).toHaveBeenCalledTimes(1); + expect(second).toHaveBeenCalledTimes(1); +}); + +test("returns a deterministic response for unhandled HTTP traffic", async () => { + const response = unhandledHttpResponse(httpInput()); + + expect(response.status).toBe(599); + await expect(response.text()).resolves.toContain( + "POST https://api.example.test/v1/search?q=first", + ); +}); + +test("creates interceptors from direct HTTP fixtures", async () => { + const fixture = createHttpFixtureInterceptor([ + { + name: "user", + match: (input) => + input.request.method === "POST" && + input.upstreamUrl.hostname === "api.example.test", + response: async (input) => + Response.json({ + provider: input.provider, + body: await input.request.json(), + }), + }, + ]); + + const response = await fixture(httpInput()); + + expect(response?.status).toBe(200); + await expect(response?.json()).resolves.toEqual({ + provider: "example", + body: { query: "first" }, + }); +}); + +test("supports route-style direct HTTP fixtures", async () => { + const fixture = createHttpFixtureInterceptor([ + httpFixture.get("/v1/search", Response.json({ method: "get" })), + httpFixture.post( + { + hostname: "api.example.test", + pathname: "/v1/search", + provider: "example", + }, + async (input) => + Response.json({ + method: "post", + body: await input.request.json(), + }), + ), + ]); + + await expect( + ( + await fixture({ + ...httpInput(), + request: new Request("https://api.example.test/v1/search?q=first"), + }) + )?.json(), + ).resolves.toEqual({ method: "get" }); + await expect((await fixture(httpInput()))?.json()).resolves.toEqual({ + method: "post", + body: { query: "first" }, + }); +}); + +test("supports host and regex direct HTTP fixture routes", async () => { + const fixture = createHttpFixtureInterceptor([ + httpFixture.get("api.example.test", Response.json({ host: true })), + httpFixture.all(/other\.example\.test\/v1/, Response.json({ regex: true })), + ]); + + await expect( + ( + await fixture({ + ...httpInput(), + request: new Request("https://api.example.test/anything"), + upstreamUrl: new URL("https://api.example.test/anything"), + }) + )?.json(), + ).resolves.toEqual({ host: true }); + await expect( + ( + await fixture({ + ...httpInput(), + request: new Request("https://other.example.test/v1/user"), + upstreamUrl: new URL("https://other.example.test/v1/user"), + }) + )?.json(), + ).resolves.toEqual({ regex: true }); +}); + +test("clones static direct fixture responses", async () => { + const fixture = createHttpFixtureInterceptor([ + { + match: () => true, + response: Response.json({ ok: true }), + }, + ]); + + await expect((await fixture(httpInput()))?.json()).resolves.toEqual({ + ok: true, + }); + await expect((await fixture(httpInput()))?.json()).resolves.toEqual({ + ok: true, + }); +}); + +test("records and replays HTTP requests with redacted headers", async () => { + replayDir = mkdtempSync(join(process.cwd(), ".tmp-http-replay-")); + vi.stubEnv("VITEST_EVALS_REPLAY_DIR", replayDir); + + const fetchHttp = vi.fn(async (request: Request) => { + await expect(request.json()).resolves.toEqual({ query: "first" }); + return Response.json( + { ok: true }, + { + status: 202, + headers: { + "set-cookie": "session=secret", + "x-request-id": "req_123", + }, + }, + ); + }); + + const first = await executeHttpWithReplay({ + name: "http.example", + input: httpInput(), + fetch: fetchHttp, + replay: true, + }); + + expect(first.response.status).toBe(202); + await expect(first.response.json()).resolves.toEqual({ ok: true }); + expect(first.replay).toMatchObject({ status: "recorded" }); + expect(getReplayMetadataFromHttpResponse(first.response)).toMatchObject({ + status: "recorded", + }); + + const recordingPath = first.replay?.recordingPath; + expect(recordingPath).toBeTruthy(); + expect(existsSync(join(process.cwd(), recordingPath ?? ""))).toBe(true); + const recording = JSON.parse( + readFileSync(join(process.cwd(), recordingPath ?? ""), "utf8"), + ) as HttpRecording; + + expect(recording.input).toMatchObject({ + method: "POST", + url: "https://api.example.test/v1/search?q=first", + headers: { + authorization: "[redacted]", + "content-type": "application/json", + }, + body: { + encoding: "utf8", + value: JSON.stringify({ query: "first" }), + }, + }); + expect(recording.output?.headers).toMatchObject({ + "set-cookie": "[redacted]", + "x-request-id": "req_123", + }); + expect(recording.metadata).toMatchObject({ + kind: "http", + provider: "example", + engine: "unit-test", + scenario: "search", + }); + + fetchHttp.mockImplementationOnce(async () => { + throw new Error("HTTP should replay from cassette"); + }); + + const second = await executeHttpWithReplay({ + name: "http.example", + input: httpInput(), + fetch: fetchHttp, + replay: true, + }); + + expect(second.replay).toMatchObject({ status: "replayed" }); + expect(second.response.status).toBe(202); + await expect(second.response.json()).resolves.toEqual({ ok: true }); + expect(fetchHttp).toHaveBeenCalledTimes(1); +}); + +test("lets HTTP replay customize header redaction", async () => { + replayDir = mkdtempSync(join(process.cwd(), ".tmp-http-redaction-")); + vi.stubEnv("VITEST_EVALS_REPLAY_DIR", replayDir); + + const first = await executeHttpWithReplay({ + name: "http.headers", + input: httpInput(), + fetch: async () => + new Response("ok", { + headers: { + "set-cookie": "session=test", + "x-request-id": "req_456", + }, + }), + replay: { + redactHeaders: ["authorization"], + }, + }); + + const recording = JSON.parse( + readFileSync( + join(process.cwd(), first.replay?.recordingPath ?? ""), + "utf8", + ), + ) as HttpRecording; + + expect(recording.input.headers.authorization).toBe("[redacted]"); + expect(recording.output?.headers["set-cookie"]).toBe("session=test"); +}); + +test("creates replay-backed HTTP interceptors", async () => { + replayDir = mkdtempSync(join(process.cwd(), ".tmp-http-interceptor-")); + vi.stubEnv("VITEST_EVALS_REPLAY_DIR", replayDir); + + const fetchHttp = vi.fn(async () => { + return Response.json({ ok: true }); + }); + const interceptHttp = createHttpReplayInterceptor({ + name: "http.interceptor", + fetch: fetchHttp, + }); + + const first = await interceptHttp(httpInput()); + const second = await interceptHttp(httpInput()); + + expect(first?.status).toBe(200); + expect(second?.status).toBe(200); + expect(getReplayMetadataFromHttpResponse(first as Response)).toMatchObject({ + status: "recorded", + }); + expect(getReplayMetadataFromHttpResponse(second as Response)).toMatchObject({ + status: "replayed", + }); + expect(fetchHttp).toHaveBeenCalledTimes(1); +}); diff --git a/packages/http/src/index.ts b/packages/http/src/index.ts new file mode 100644 index 0000000..6247669 --- /dev/null +++ b/packages/http/src/index.ts @@ -0,0 +1,706 @@ +import { Buffer } from "node:buffer"; +import type { JsonValue } from "vitest-evals"; +import { + executeWithReplay, + getReplayMetadataFromError, + type ReplayMetadata, + type ToolRecording, + type ToolReplayConfig, +} from "vitest-evals/replay"; + +type MaybePromise = T | Promise; + +const HTTP_REPLAY_RESPONSE_METADATA = "vitestEvalsReplay"; + +/** Header names redacted from HTTP replay recordings by default. */ +export const DEFAULT_HTTP_REPLAY_REDACTED_HEADERS = [ + "authorization", + "cookie", + "proxy-authorization", + "set-cookie", + "x-api-key", + "x-auth-token", + "x-csrf-token", + "x-xsrf-token", +] as const; + +/** HTTP request observed by a proxy, browser route, fetch shim, or sandbox engine. */ +export interface HttpInterceptRequest { + /** Engine-specific request normalized into the Fetch API shape. */ + request: Request; + /** Original upstream URL the system under test attempted to call. */ + upstreamUrl: URL; + /** Optional provider label such as `github`, `sentry`, or `slack`. */ + provider?: string; + /** Optional engine label such as `vercel-sandbox`, `docker`, or `msw`. */ + engine?: string; + /** Extra JSON-safe metadata recorded with HTTP replay cassettes. */ + metadata?: Record; +} + +/** HTTP interceptor that may return a mocked/replayed response or pass through. */ +export type HttpInterceptor = ( + input: HttpInterceptRequest, +) => MaybePromise; + +/** Options for composing several HTTP interceptors into one handler. */ +export interface CreateHttpInterceptorOptions { + /** Optional handler invoked when no interceptor returned a response. */ + unhandled?: HttpInterceptor; +} + +/** Options for creating a deterministic unhandled HTTP response. */ +export interface UnhandledHttpResponseOptions { + /** HTTP status used for the unhandled response. Defaults to `599`. */ + status?: number; +} + +/** Static or dynamic response returned by a direct HTTP fixture. */ +export type HttpFixtureResponse = + | Response + | ((input: HttpInterceptRequest) => MaybePromise); + +/** Direct HTTP fixture used for deterministic request-specific injection. */ +export interface HttpFixture { + /** Optional human-readable fixture name for test diagnostics. */ + name?: string; + /** Return whether this fixture should handle the intercepted request. */ + match: (input: HttpInterceptRequest) => MaybePromise; + /** Static response or callback response returned when `match` succeeds. */ + response: HttpFixtureResponse; +} + +/** String, URL, regex, callback, or object matcher for direct HTTP fixtures. */ +export type HttpFixtureRoute = + | string + | URL + | RegExp + | ((input: HttpInterceptRequest) => MaybePromise) + | { + /** Full URL matcher. */ + url?: string | URL | RegExp; + /** Hostname matcher such as `api.github.com`. */ + hostname?: string | RegExp; + /** Pathname matcher without query string. */ + pathname?: string | RegExp; + /** Path plus query-string matcher. */ + path?: string | RegExp; + /** Provider label matcher. */ + provider?: string | RegExp; + /** Engine label matcher. */ + engine?: string | RegExp; + }; + +/** Options for route-style direct HTTP fixtures. */ +export interface HttpFixtureRouteOptions { + /** Optional human-readable fixture name for test diagnostics. */ + name?: string; +} + +/** Recorded HTTP body representation stored in JSON replay cassettes. */ +export type SerializedHttpBody = + | { + /** Body is stored as UTF-8 text for reviewable fixtures. */ + encoding: "utf8"; + /** Encoded body content. */ + value: string; + } + | { + /** Body is stored as base64 when it does not look text-like. */ + encoding: "base64"; + /** Encoded body content. */ + value: string; + }; + +/** Recorded HTTP header map stored in JSON replay cassettes. */ +export type SerializedHttpHeaders = Record; + +/** JSON-serializable HTTP request stored as replay input. */ +export type SerializedHttpRequest = { + /** HTTP method used for the upstream request. */ + method: string; + /** Absolute upstream URL called by the system under test. */ + url: string; + /** Request headers after engine normalization and default redaction. */ + headers: SerializedHttpHeaders; + /** Optional buffered request body. */ + body?: SerializedHttpBody; +}; + +/** JSON-serializable HTTP response stored as replay output. */ +export type SerializedHttpResponse = { + /** HTTP status code returned by the upstream service or fixture. */ + status: number; + /** HTTP status text returned by the upstream service when available. */ + statusText?: string; + /** Response headers after default redaction. */ + headers: SerializedHttpHeaders; + /** Optional buffered response body. */ + body?: SerializedHttpBody; +}; + +/** Replay context passed to HTTP cache-key and live-fetch callbacks. */ +export interface HttpReplayContext { + /** Original upstream URL the system under test attempted to call. */ + upstreamUrl: URL; + /** Optional provider label such as `github`, `sentry`, or `slack`. */ + provider?: string; + /** Optional engine label such as `vercel-sandbox`, `docker`, or `msw`. */ + engine?: string; + /** Extra JSON-safe metadata supplied by the engine adapter. */ + metadata?: Record; + /** Caller-provided context for adapter-specific state. */ + context: TContext; +} + +/** HTTP replay recording shape stored by `executeHttpWithReplay(...)`. */ +export type HttpRecording = ToolRecording< + SerializedHttpRequest, + SerializedHttpResponse +>; + +/** HTTP replay configuration for keying and sanitizing request/response cassettes. */ +export interface HttpReplayConfig + extends ToolReplayConfig< + SerializedHttpRequest, + SerializedHttpResponse, + HttpReplayContext + > { + /** Header names redacted after `sanitize`; use `false` to preserve headers exactly. */ + redactHeaders?: readonly string[] | false; +} + +/** HTTP replay policy accepted by replay-enabled HTTP primitives. */ +export type HttpReplayPolicy = + | boolean + | HttpReplayConfig; + +/** Live HTTP fetch function used when replay records a cache miss. */ +export type HttpFetch = ( + request: Request, + context: HttpReplayContext, +) => MaybePromise; + +/** Options for executing one HTTP request through replay-aware fetching. */ +export interface ExecuteHttpWithReplayOptions { + /** Stable cassette namespace. Defaults to `http`. */ + name?: string; + /** Intercepted HTTP request details from the engine adapter. */ + input: HttpInterceptRequest; + /** Live fetch implementation used when replay does not return a recording. */ + fetch?: HttpFetch; + /** Replay policy. `true` uses the default request key and redaction rules. */ + replay?: HttpReplayPolicy; + /** Caller-provided context forwarded to key, sanitize, and fetch callbacks. */ + context?: TContext; +} + +/** Result of executing one HTTP request through replay-aware fetching. */ +export interface HttpReplayExecution { + /** HTTP response returned from a recording or live fetch. */ + response: Response; + /** Replay metadata when a cassette was recorded or replayed. */ + replay?: ReplayMetadata; +} + +/** Options for creating an interceptor that records or replays HTTP traffic. */ +export interface CreateHttpReplayInterceptorOptions { + /** Stable cassette namespace. Defaults to `http`. */ + name?: string; + /** Live fetch implementation used when replay does not return a recording. */ + fetch?: HttpFetch; + /** Replay policy. Defaults to `true` because creating this interceptor opts in. */ + replay?: HttpReplayPolicy; + /** Caller-provided context forwarded to replay and fetch callbacks. */ + context?: TContext; +} + +/** Compose HTTP interceptors, returning the first response produced. */ +export function createHttpInterceptor( + interceptors: readonly HttpInterceptor[], + options: CreateHttpInterceptorOptions = {}, +): HttpInterceptor { + return async (input) => { + for (const interceptor of interceptors) { + const response = await interceptor(cloneHttpInterceptRequest(input)); + if (response) { + return response; + } + } + + return await options.unhandled?.(cloneHttpInterceptRequest(input)); + }; +} + +/** Create a direct HTTP fixture with route-style matching. */ +export function createHttpFixture( + method: string | undefined, + route: HttpFixtureRoute, + response: HttpFixtureResponse, + options: HttpFixtureRouteOptions = {}, +): HttpFixture { + return { + name: options.name, + match: async (input) => { + if ( + method && + input.request.method.toUpperCase() !== method.toUpperCase() + ) { + return false; + } + return await matchesHttpFixtureRoute(route, input); + }, + response, + }; +} + +/** Route-style helpers for direct HTTP fixtures. */ +export const httpFixture = { + /** Match any HTTP method for a route. */ + all: ( + route: HttpFixtureRoute, + response: HttpFixtureResponse, + options?: HttpFixtureRouteOptions, + ) => createHttpFixture(undefined, route, response, options), + /** Match `DELETE` requests for a route. */ + delete: ( + route: HttpFixtureRoute, + response: HttpFixtureResponse, + options?: HttpFixtureRouteOptions, + ) => createHttpFixture("DELETE", route, response, options), + /** Match `GET` requests for a route. */ + get: ( + route: HttpFixtureRoute, + response: HttpFixtureResponse, + options?: HttpFixtureRouteOptions, + ) => createHttpFixture("GET", route, response, options), + /** Match `PATCH` requests for a route. */ + patch: ( + route: HttpFixtureRoute, + response: HttpFixtureResponse, + options?: HttpFixtureRouteOptions, + ) => createHttpFixture("PATCH", route, response, options), + /** Match `POST` requests for a route. */ + post: ( + route: HttpFixtureRoute, + response: HttpFixtureResponse, + options?: HttpFixtureRouteOptions, + ) => createHttpFixture("POST", route, response, options), + /** Match `PUT` requests for a route. */ + put: ( + route: HttpFixtureRoute, + response: HttpFixtureResponse, + options?: HttpFixtureRouteOptions, + ) => createHttpFixture("PUT", route, response, options), +} as const; + +/** Create an interceptor from direct request/response fixtures. */ +export function createHttpFixtureInterceptor( + fixtures: readonly HttpFixture[], +): HttpInterceptor { + return async (input) => { + for (const fixture of fixtures) { + const fixtureInput = cloneHttpInterceptRequest(input); + if (!(await fixture.match(fixtureInput))) { + continue; + } + + const responseInput = cloneHttpInterceptRequest(input); + return typeof fixture.response === "function" + ? await fixture.response(responseInput) + : fixture.response.clone(); + } + + return undefined; + }; +} + +async function matchesHttpFixtureRoute( + route: HttpFixtureRoute, + input: HttpInterceptRequest, +): Promise { + if (typeof route === "function") { + return await route(input); + } + + if (typeof route === "string") { + return matchRouteString(route, input.upstreamUrl); + } + + if (route instanceof URL) { + return input.upstreamUrl.toString() === route.toString(); + } + + if (route instanceof RegExp) { + return route.test(input.upstreamUrl.toString()); + } + + return ( + matchOptionalRoutePart(route.url, input.upstreamUrl.toString()) && + matchOptionalRoutePart(route.hostname, input.upstreamUrl.hostname) && + matchOptionalRoutePart(route.pathname, input.upstreamUrl.pathname) && + matchOptionalRoutePart( + route.path, + `${input.upstreamUrl.pathname}${input.upstreamUrl.search}`, + ) && + matchOptionalRoutePart(route.provider, input.provider ?? "") && + matchOptionalRoutePart(route.engine, input.engine ?? "") + ); +} + +function matchRouteString(route: string, upstreamUrl: URL): boolean { + if (route.startsWith("http://") || route.startsWith("https://")) { + return upstreamUrl.toString() === route; + } + + if (route.startsWith("/")) { + const path = `${upstreamUrl.pathname}${upstreamUrl.search}`; + return route.includes("?") + ? path === route + : upstreamUrl.pathname === route; + } + + return upstreamUrl.hostname === route; +} + +function matchOptionalRoutePart( + matcher: string | URL | RegExp | undefined, + value: string, +): boolean { + if (matcher === undefined) { + return true; + } + + if (matcher instanceof URL) { + return value === matcher.toString(); + } + + if (matcher instanceof RegExp) { + return matcher.test(value); + } + + return value === matcher; +} + +/** Create a deterministic error response for unhandled intercepted HTTP traffic. */ +export function unhandledHttpResponse( + input: HttpInterceptRequest, + options: UnhandledHttpResponseOptions = {}, +): Response { + return new Response( + `[HTTP INTERCEPT] Unhandled external request: ${input.request.method} ${input.upstreamUrl.toString()}\n`, + { + status: options.status ?? 599, + headers: { "content-type": "text/plain; charset=utf-8" }, + }, + ); +} + +/** Execute an HTTP request using existing replay modes and cassette storage. */ +export async function executeHttpWithReplay({ + name = "http", + input, + fetch, + replay, + context, +}: ExecuteHttpWithReplayOptions): Promise { + const serializedRequest = await serializeHttpRequest(input); + const replayContext = createHttpReplayContext(input, context as TContext); + const execution = await executeWithReplay< + SerializedHttpRequest, + SerializedHttpResponse, + HttpReplayContext + >({ + toolName: name, + args: serializedRequest, + context: replayContext, + execute: async (request, replayContext) => { + const response = await (fetch ?? defaultHttpFetch)( + deserializeHttpRequest(request), + replayContext, + ); + return await serializeHttpResponse(response); + }, + replay: normalizeHttpReplayPolicy(replay), + metadata: httpRecordingMetadata(input), + }); + const response = deserializeHttpResponse(execution.result); + + if (execution.replay) { + attachHttpReplayMetadata(response, execution.replay); + } + + return { + response, + replay: execution.replay, + }; +} + +/** Create an HTTP interceptor that records misses and replays existing cassettes. */ +export function createHttpReplayInterceptor( + options: CreateHttpReplayInterceptorOptions = {}, +): HttpInterceptor { + return async (input) => { + const execution = await executeHttpWithReplay({ + name: options.name, + input, + fetch: options.fetch, + replay: options.replay ?? true, + context: options.context, + }); + return execution.response; + }; +} + +/** Read replay metadata attached to a response returned by HTTP replay helpers. */ +export function getReplayMetadataFromHttpResponse( + response: Response, +): ReplayMetadata | undefined { + return getReplayMetadataFromError(response); +} + +/** Redact sensitive request and response headers from an HTTP replay recording. */ +export function redactHttpRecordingHeaders( + recording: HttpRecording, + headers: readonly string[] = DEFAULT_HTTP_REPLAY_REDACTED_HEADERS, +): HttpRecording { + return { + ...recording, + input: { + ...recording.input, + headers: redactHeaders(recording.input.headers, headers), + }, + ...(recording.output + ? { + output: { + ...recording.output, + headers: redactHeaders(recording.output.headers, headers), + }, + } + : {}), + }; +} + +function cloneHttpInterceptRequest( + input: HttpInterceptRequest, +): HttpInterceptRequest { + return { + request: input.request.clone(), + upstreamUrl: new URL(input.upstreamUrl.toString()), + ...(input.provider ? { provider: input.provider } : {}), + ...(input.engine ? { engine: input.engine } : {}), + ...(input.metadata ? { metadata: { ...input.metadata } } : {}), + }; +} + +async function defaultHttpFetch( + request: Request, + _context: HttpReplayContext, +) { + return await fetch(request); +} + +function createHttpReplayContext( + input: HttpInterceptRequest, + context: TContext, +): HttpReplayContext { + return { + upstreamUrl: new URL(input.upstreamUrl.toString()), + ...(input.provider ? { provider: input.provider } : {}), + ...(input.engine ? { engine: input.engine } : {}), + ...(input.metadata ? { metadata: { ...input.metadata } } : {}), + context, + }; +} + +function httpRecordingMetadata( + input: HttpInterceptRequest, +): Record { + return { + kind: "http", + provider: input.provider, + engine: input.engine, + ...(input.metadata ?? {}), + }; +} + +function normalizeHttpReplayPolicy( + replay: HttpReplayPolicy | undefined, +): HttpReplayPolicy | undefined { + if (!replay) { + return replay; + } + + const config = replay === true ? {} : replay; + + return { + ...config, + key: config.key ?? defaultHttpReplayKey, + sanitize: async (recording) => { + const sanitized = config.sanitize + ? await config.sanitize(recording) + : recording; + return config.redactHeaders === false + ? sanitized + : redactHttpRecordingHeaders( + sanitized, + config.redactHeaders ?? DEFAULT_HTTP_REPLAY_REDACTED_HEADERS, + ); + }, + }; +} + +function defaultHttpReplayKey(request: SerializedHttpRequest): JsonValue { + return { + method: request.method, + url: request.url, + body: request.body ?? null, + }; +} + +async function serializeHttpRequest( + input: HttpInterceptRequest, +): Promise { + return { + method: input.request.method.toUpperCase(), + url: input.upstreamUrl.toString(), + headers: serializeHeaders(input.request.headers), + ...(await serializedRequestBody(input.request)), + }; +} + +async function serializeHttpResponse( + response: Response, +): Promise { + return { + status: response.status, + ...(response.statusText ? { statusText: response.statusText } : {}), + headers: serializeHeaders(response.headers), + ...(await serializedResponseBody(response)), + }; +} + +function serializeHeaders(headers: Headers): SerializedHttpHeaders { + const record: SerializedHttpHeaders = {}; + headers.forEach((value, key) => { + record[key.toLowerCase()] = value; + }); + return record; +} + +async function serializedRequestBody( + request: Request, +): Promise> { + if ( + request.body === null || + request.method.toUpperCase() === "GET" || + request.method.toUpperCase() === "HEAD" + ) { + return {}; + } + + return { + body: serializeBody( + await request.clone().arrayBuffer(), + request.headers.get("content-type"), + ), + }; +} + +async function serializedResponseBody( + response: Response, +): Promise> { + if (response.body === null) { + return {}; + } + + return { + body: serializeBody( + await response.clone().arrayBuffer(), + response.headers.get("content-type"), + ), + }; +} + +function serializeBody( + body: ArrayBuffer, + contentType: string | null, +): SerializedHttpBody { + const bytes = new Uint8Array(body); + if (isTextLikeContentType(contentType)) { + return { + encoding: "utf8", + value: new TextDecoder().decode(bytes), + }; + } + + return { + encoding: "base64", + value: Buffer.from(bytes).toString("base64"), + }; +} + +function isTextLikeContentType(contentType: string | null): boolean { + const normalized = contentType?.split(";")[0]?.trim().toLowerCase() ?? ""; + return ( + normalized.startsWith("text/") || + normalized === "application/json" || + normalized === "application/graphql" || + normalized === "application/javascript" || + normalized === "application/x-www-form-urlencoded" || + normalized.endsWith("+json") || + normalized.endsWith("+xml") + ); +} + +function deserializeHttpRequest(request: SerializedHttpRequest): Request { + return new Request(request.url, { + method: request.method, + headers: request.headers, + ...(request.body && request.method !== "GET" && request.method !== "HEAD" + ? { body: deserializeBody(request.body) } + : {}), + }); +} + +function deserializeHttpResponse(response: SerializedHttpResponse): Response { + return new Response( + response.body ? deserializeBody(response.body) : undefined, + { + status: response.status, + statusText: response.statusText, + headers: response.headers, + }, + ); +} + +function deserializeBody(body: SerializedHttpBody): Uint8Array { + if (body.encoding === "base64") { + return Buffer.from(body.value, "base64"); + } + + return new TextEncoder().encode(body.value); +} + +function redactHeaders( + headers: SerializedHttpHeaders, + redactedHeaders: readonly string[], +): SerializedHttpHeaders { + const redacted = new Set( + redactedHeaders.map((header) => header.toLowerCase()), + ); + const next: SerializedHttpHeaders = {}; + for (const [key, value] of Object.entries(headers)) { + next[key] = redacted.has(key.toLowerCase()) ? "[redacted]" : value; + } + return next; +} + +function attachHttpReplayMetadata( + response: Response, + replay: ReplayMetadata, +): void { + Object.assign(response, { + [HTTP_REPLAY_RESPONSE_METADATA]: replay, + }); +} diff --git a/packages/http/tsconfig.json b/packages/http/tsconfig.json new file mode 100644 index 0000000..9e25e6e --- /dev/null +++ b/packages/http/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.base.json", + "include": ["src/**/*.ts"] +} diff --git a/packages/http/tsup.config.ts b/packages/http/tsup.config.ts new file mode 100644 index 0000000..f8f15bd --- /dev/null +++ b/packages/http/tsup.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: ["src/**/*.ts", "!src/**/*.test.ts", "!src/**/*.test.*.ts"], + format: ["cjs", "esm"], + dts: true, + splitting: false, + sourcemap: true, + clean: true, + external: ["vitest-evals"], +}); diff --git a/packages/vitest-evals/README.md b/packages/vitest-evals/README.md index 9101992..6176f81 100644 --- a/packages/vitest-evals/README.md +++ b/packages/vitest-evals/README.md @@ -184,6 +184,13 @@ Replay opt-in belongs on the harness, via `toolReplay`, while replay mode and recording directory can live in Vitest environment config. Tool definitions should stay free of VCR policy. +For full application or sandbox flows where outbound HTTP does not pass through +local tool wrappers, use `@vitest-evals/http`. Vercel Sandbox egress has a +separate adapter package, `@vitest-evals/http-vercel-sandbox`, while Docker +proxies, MSW, Playwright routes, or fetch shims can use the same request +fixture and replay behavior without putting engine-specific proxy code in the +core package. + For the Pi-specific harness, output/session/usage normalization should usually be inferred automatically. Treat low-level normalization callbacks as an escape hatch, not part of the primary authoring path. diff --git a/packages/vitest-evals/src/replay.ts b/packages/vitest-evals/src/replay.ts index 6949dce..92ee3b9 100644 --- a/packages/vitest-evals/src/replay.ts +++ b/packages/vitest-evals/src/replay.ts @@ -58,12 +58,14 @@ export async function executeWithReplay< context, execute, replay, + metadata, }: { toolName: string; args: TArgs; context: TContext; execute: (args: TArgs, context: TContext) => MaybePromise; replay: boolean | ToolReplayConfig | undefined; + metadata?: Record; }) { const replayConfig = normalizeReplayConfig(replay); const replayMode = resolveReplayMode(); @@ -138,6 +140,7 @@ export async function executeWithReplay< cacheKey, version: replayConfig.version, mode: replayMode, + ...(metadata ?? {}), }, }); @@ -161,6 +164,7 @@ export async function executeWithReplay< cacheKey, version: replayConfig.version, mode: replayMode, + ...(metadata ?? {}), }, }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0151d9b..4ea4fb8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -172,6 +172,18 @@ importers: specifier: workspace:* version: link:../vitest-evals + packages/http: + devDependencies: + vitest-evals: + specifier: workspace:* + version: link:../vitest-evals + + packages/http-vercel-sandbox: + devDependencies: + '@vitest-evals/http': + specifier: workspace:* + version: link:../http + packages/vitest-evals: dependencies: ai: diff --git a/tsconfig.base.json b/tsconfig.base.json index d90a6d7..b1f9ca2 100644 --- a/tsconfig.base.json +++ b/tsconfig.base.json @@ -14,6 +14,10 @@ "@vitest-evals/harness-openai-agents": [ "packages/harness-openai-agents/src/index.ts" ], + "@vitest-evals/http": ["packages/http/src/index.ts"], + "@vitest-evals/http-vercel-sandbox": [ + "packages/http-vercel-sandbox/src/index.ts" + ], "@vitest-evals/harness-pi-ai": ["packages/harness-pi-ai/src/index.ts"], "@vitest-evals/github-reporter": [ "packages/github-reporter/src/index.ts"