CopilotKit · tombeckenham · Jun 20, 2026 · Jun 20, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Added
+
+- fal queue `status`/`result` responses now emit `x-fal-request-id`, and a `billableUnits` fixture field (e.g. `onFalQueue(model, json, { billableUnits })`) emits `x-fal-billable-units` on the completed result so adapters like `@tanstack/ai-fal` can surface `usage.unitsBilled` on replay. Record mode captures the upstream `x-fal-billable-units` header automatically, so recorded fixtures round-trip billing with no hand-editing (#269)
+
 ## [1.31.0] - 2026-06-10
 
 ### Added

diff --git a/docs/fal-ai/index.html b/docs/fal-ai/index.html
@@ -145,6 +145,27 @@ <h2>Typed Helpers: <code>onFalImage</code> / <code>onFalVideo</code></h2>
           <code>file_size: 0</code>, <code>seed: 0</code>.
         </p>
 
+        <h2>Billing Headers: <code>x-fal-request-id</code> / <code>x-fal-billable-units</code></h2>
+        <p>
+          aimock always sets <code>x-fal-request-id</code> on queue <code>status</code> and
+          <code>result</code> responses, matching real fal. To exercise a consumer's cost accounting
+          (e.g. <code>@tanstack/ai-fal</code>'s <code>usage.unitsBilled</code>), pass
+          <code>billableUnits</code> — it rides the completed <code>result</code> response as
+          <code>x-fal-billable-units</code>. Omit it to keep the header-less default. In record mode
+          the upstream <code>x-fal-billable-units</code> header is captured automatically and
+          written into the fixture's <code>response.billableUnits</code>, so recorded fixtures
+          round-trip billing with no hand-editing.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">billing.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="op">mock</span>.<span class="fn">onFalImage</span>(<span class="str">/flux/</span>, {
+  <span class="prop">images</span>: [{ <span class="prop">url</span>: <span class="str">"https://mock.fal.media/x.png"</span> }],
+}, { <span class="prop">billableUnits</span>: <span class="num">42</span> });
+
+<span class="cm">// completed queue-result now carries: x-fal-billable-units: 42</span></code></pre>
+        </div>
+
         <h2>Client Configuration</h2>
         <p>
           Point the <code>@fal-ai/client</code> at aimock using <code>requestMiddleware</code> to

diff --git a/src/__tests__/fal.test.ts b/src/__tests__/fal.test.ts
@@ -47,6 +47,116 @@ describe("fal.ai general handler — fixture lookup", () => {
     expect(resultBody).toEqual({ images: [{ url: "https://example.com/cat.png" }] });
   });
 
+  test("queue status/result responses carry x-fal-request-id", async () => {
+    mock = new LLMock({ port: 0 });
+    mock.onFalQueue(/flux/, { images: [{ url: "https://example.com/cat.png" }] });
+    await mock.start();
+
+    const submit = await fetch(`${mock.url}/fal/fal-ai/flux/dev`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "x-fal-target-host": "queue.fal.run" },
+      body: JSON.stringify({ input: { prompt: "a cat" } }),
+    });
+    const envelope = await submit.json();
+
+    const status = await fetch(
+      `${mock.url}/fal/fal-ai/flux/dev/requests/${envelope.request_id}/status`,
+      { headers: { "x-fal-target-host": "queue.fal.run" } },
+    );
+    expect(status.headers.get("x-fal-request-id")).toBe(envelope.request_id);
+
+    const result = await fetch(`${mock.url}/fal/fal-ai/flux/dev/requests/${envelope.request_id}`, {
+      headers: { "x-fal-target-host": "queue.fal.run" },
+    });
+    expect(result.headers.get("x-fal-request-id")).toBe(envelope.request_id);
+    // No billableUnits configured → no x-fal-billable-units header.
+    expect(result.headers.get("x-fal-billable-units")).toBeNull();
+  });
+
+  test("billableUnits opt-in emits x-fal-billable-units on completed result only", async () => {
+    mock = new LLMock({ port: 0 });
+    mock.onFalQueue(
+      /flux/,
+      { images: [{ url: "https://example.com/cat.png" }] },
+      {
+        billableUnits: 42,
+      },
+    );
+    await mock.start();
+
+    const submit = await fetch(`${mock.url}/fal/fal-ai/flux/dev`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "x-fal-target-host": "queue.fal.run" },
+      body: JSON.stringify({ input: { prompt: "a cat" } }),
+    });
+    const envelope = await submit.json();
+
+    const result = await fetch(`${mock.url}/fal/fal-ai/flux/dev/requests/${envelope.request_id}`, {
+      headers: { "x-fal-target-host": "queue.fal.run" },
+    });
+    expect(result.status).toBe(200);
+    expect(result.headers.get("x-fal-request-id")).toBe(envelope.request_id);
+    expect(result.headers.get("x-fal-billable-units")).toBe("42");
+  });
+
+  test("billableUnits: 0 still emits x-fal-billable-units (zero is a real billed count)", async () => {
+    // Guards the deliberate `!= null` / `Number.isFinite` checks: a truthy
+    // guard (`if (job.billableUnits)`) would drop the header for a zero-cost
+    // call, and every other billableUnits test would stay green.
+    mock = new LLMock({ port: 0 });
+    mock.onFalQueue(
+      /flux/,
+      { images: [{ url: "https://example.com/cat.png" }] },
+      { billableUnits: 0 },
+    );
+    await mock.start();
+
+    const submit = await fetch(`${mock.url}/fal/fal-ai/flux/dev`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "x-fal-target-host": "queue.fal.run" },
+      body: JSON.stringify({ input: { prompt: "a cat" } }),
+    });
+    const envelope = await submit.json();
+
+    const result = await fetch(`${mock.url}/fal/fal-ai/flux/dev/requests/${envelope.request_id}`, {
+      headers: { "x-fal-target-host": "queue.fal.run" },
+    });
+    expect(result.status).toBe(200);
+    expect(result.headers.get("x-fal-billable-units")).toBe("0");
+  });
+
+  test("billableUnits header is withheld until the result completes", async () => {
+    // Progression keeps the job IN_QUEUE on the first result poll, so the
+    // billable-units header must not ride the 202 — only the completed 200.
+    mock = new LLMock({ port: 0, falQueue: { pollsBeforeCompleted: 2 } });
+    mock.onFalQueue(
+      /flux/,
+      { images: [{ url: "https://example.com/cat.png" }] },
+      {
+        billableUnits: 7,
+      },
+    );
+    await mock.start();
+
+    const submit = await fetch(`${mock.url}/fal/fal-ai/flux/dev`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "x-fal-target-host": "queue.fal.run" },
+      body: JSON.stringify({ input: { prompt: "a cat" } }),
+    });
+    const envelope = await submit.json();
+    const url = `${mock.url}/fal/fal-ai/flux/dev/requests/${envelope.request_id}`;
+    const headers = { "x-fal-target-host": "queue.fal.run" };
+
+    const pending = await fetch(url, { headers });
+    expect(pending.status).toBe(202);
+    expect(pending.headers.get("x-fal-request-id")).toBe(envelope.request_id);
+    expect(pending.headers.get("x-fal-billable-units")).toBeNull();
+
+    const done = await fetch(url, { headers });
+    expect(done.status).toBe(200);
+    expect(done.headers.get("x-fal-billable-units")).toBe("7");
+  });
+
   test("body extraction handles input.prompt nesting (fal-client default shape)", async () => {
     mock = new LLMock({ port: 0 });
     mock.onFalQueue(/flux/, { images: [{ url: "https://example.com/x.png" }] });
@@ -211,6 +321,8 @@ function startFalQueueUpstream(opts: {
   finalBody: unknown;
   pollsBeforeCompleted?: number;
   upstreamRequestId?: string;
+  /** When set, the GET result response carries this x-fal-billable-units header. */
+  billableUnits?: string;
 }): Promise<{
   url: string;
   close: () => Promise<void>;
@@ -255,7 +367,12 @@ function startFalQueueUpstream(opts: {
         }
         if (req.method === "GET" && resultMatch && !statusMatch) {
           counts.result++;
-          send(200, opts.finalBody);
+          const resultHeaders: Record<string, string> = { "Content-Type": "application/json" };
+          if (opts.billableUnits !== undefined) {
+            resultHeaders["x-fal-billable-units"] = opts.billableUnits;
+          }
+          res.writeHead(200, resultHeaders);
+          res.end(JSON.stringify(opts.finalBody));
           return;
         }
         if (req.method === "POST") {
@@ -378,6 +495,95 @@ describe("fal.ai general handler — record and replay", () => {
     expect(recorded.fixtures[0].response.json).toEqual(FINAL_BODY);
   });
 
+  test("captures upstream x-fal-billable-units during recording → persists + replays it", async () => {
+    const FINAL_BODY = { images: [{ url: "https://mock.fal.media/files/billed.png" }] };
+    queueUpstream = await startFalQueueUpstream({
+      finalBody: FINAL_BODY,
+      pollsBeforeCompleted: 2,
+      billableUnits: "13",
+    });
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-fal-queue-billed-"));
+
+    mock = new LLMock({
+      port: 0,
+      record: {
+        providers: { fal: queueUpstream.url },
+        fixturePath: tmpDir,
+        fal: { pollIntervalMs: 5, timeoutMs: 5000 },
+      },
+    });
+    await mock.start();
+
+    const submit = await fetch(`${mock.url}/fal/fal-ai/flux/dev`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "x-fal-target-host": "queue.fal.run" },
+      body: JSON.stringify({ input: { prompt: "a cat" } }),
+    });
+    const envelope = await submit.json();
+
+    // Same-session replay surfaces the captured units without reloading the fixture.
+    const result = await fetch(`${mock.url}/fal/fal-ai/flux/dev/requests/${envelope.request_id}`, {
+      headers: { "x-fal-target-host": "queue.fal.run" },
+    });
+    expect(result.status).toBe(200);
+    expect(result.headers.get("x-fal-billable-units")).toBe("13");
+
+    // Persisted fixture carries response.billableUnits so a fresh load also replays it.
+    const files = fs.readdirSync(tmpDir).filter((f) => f.startsWith("fal-") && f.endsWith(".json"));
+    expect(files.length).toBe(1);
+    const recorded = JSON.parse(fs.readFileSync(path.join(tmpDir, files[0]), "utf-8"));
+    expect(recorded.fixtures[0].response.billableUnits).toBe(13);
+  });
+
+  test.each([
+    ["absent", undefined],
+    ["non-numeric", "not-a-number"],
+  ])(
+    "recording omits billableUnits when the upstream header is %s",
+    async (_label, headerValue) => {
+      const FINAL_BODY = { images: [{ url: "https://mock.fal.media/files/unbilled.png" }] };
+      queueUpstream = await startFalQueueUpstream({
+        finalBody: FINAL_BODY,
+        pollsBeforeCompleted: 2,
+        billableUnits: headerValue,
+      });
+      tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-fal-queue-unbilled-"));
+
+      mock = new LLMock({
+        port: 0,
+        record: {
+          providers: { fal: queueUpstream.url },
+          fixturePath: tmpDir,
+          fal: { pollIntervalMs: 5, timeoutMs: 5000 },
+        },
+      });
+      await mock.start();
+
+      const submit = await fetch(`${mock.url}/fal/fal-ai/flux/dev`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json", "x-fal-target-host": "queue.fal.run" },
+        body: JSON.stringify({ input: { prompt: "a cat" } }),
+      });
+      const envelope = await submit.json();
+
+      // No usable upstream units → no header on the same-session replay.
+      const result = await fetch(
+        `${mock.url}/fal/fal-ai/flux/dev/requests/${envelope.request_id}`,
+        { headers: { "x-fal-target-host": "queue.fal.run" } },
+      );
+      expect(result.status).toBe(200);
+      expect(result.headers.get("x-fal-billable-units")).toBeNull();
+
+      // …and the persisted fixture stays clean: no billableUnits key at all.
+      const files = fs
+        .readdirSync(tmpDir)
+        .filter((f) => f.startsWith("fal-") && f.endsWith(".json"));
+      expect(files.length).toBe(1);
+      const recorded = JSON.parse(fs.readFileSync(path.join(tmpDir, files[0]), "utf-8"));
+      expect("billableUnits" in recorded.fixtures[0].response).toBe(false);
+    },
+  );
+
   test("mock-internal headers never reach the upstream on the recorded queue walk", async () => {
     // CHANGELOG/docs claim x-test-id / x-aimock-strict / x-aimock-context /
     // x-aimock-chaos-* are stripped "on every provider proxy path" — pin the

diff --git a/src/fal-audio.ts b/src/fal-audio.ts
@@ -569,7 +569,10 @@ async function tryRecordAudioQueueWalk(args: {
 
   let finalBody: unknown;
   try {
-    finalBody = await walkFalQueue({
+    // The legacy audio queue stores an AudioResponse and serves its own
+    // headers, so the walk's captured billableUnits doesn't apply here — take
+    // the body only.
+    ({ body: finalBody } = await walkFalQueue({
       upstreamBase,
       submitPath: pathname,
       body,
@@ -581,7 +584,7 @@ async function tryRecordAudioQueueWalk(args: {
       fallbackStatusPath: (id) => `/fal/queue/requests/${id}/status`,
       fallbackResultPath: (id) => `/fal/queue/requests/${id}`,
       logger: defaults.logger,
-    });
+    }));
   } catch (err) {
     const msg = err instanceof Error ? err.message : "Unknown queue-walk error";
     defaults.logger.error(`fal-audio queue-walk proxy failed: ${msg}`);