From 9b3fe2cc1615c78781d513b8494fab4890f63509 Mon Sep 17 00:00:00 2001
From: Fsocietyhhh <1211904451@qq.com>
Date: Wed, 3 Jun 2026 01:07:28 -0700
Subject: [PATCH] fix(agent): restore GPT-5 / Grok families and surface real
 errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GPT-5 (mini/nano/5.4/5.5/codex) silently failed on the headless `-p`
path; Grok 3 / 4-0709 / 4-1-fast-reasoning the same way once the Surf
tools (#68) landed. Both were 400s from the gateway that the
headless caller never printed.

1. Drop the GPT-5/Codex `system → role: developer` rewrite (llm.ts).
   We were generating { role: "developer" } messages for any GPT-5
   request, but the gateway speaks Anthropic Messages — only
   user|assistant are accepted in messages[]. Verified live:
     curl -d '{"model":"openai/gpt-5-mini","messages":[{"role":"developer",...}]}'
     → HTTP 400 messages.0.role: Invalid option: expected one of "user"|"assistant"
   Keeping `system` as a top-level field lets the gateway translate to
   whatever the OpenAI upstream actually needs (it already does).

2. Strip slash-bearing enum constraints from tool schemas before
   sending to xAI (llm.ts). xAI's validator hard-rejects any enum
   string containing '/', e.g. Surf's endpoint enum
   ["market/ranking","market/futures",...] → 26 enum violations in
   one request. The path list is still in the tool description, so the
   model sees the legal values; only the schema constraint is dropped.

3. Wrap fetch() so undici's opaque "TypeError: fetch failed" carries
   its .cause.code (llm.ts). Without this, every transient gateway
   blip surfaces as `Network: fetch failed` with no way to triage
   whether it's the user's network, Cloudflare, or upstream. Now the
   message is e.g. `fetch failed (UND_ERR_SOCKET)`.

4. Forward turn-done errors to stderr in headless one-shot mode
   (commands/start.ts). Without this, `franklin -p ...` exits 1 with
   zero stderr — impossible to triage. This is how the 400s above
   were hidden in the first place.

Repro (after fix): all of gpt-5-mini / -nano / 5.4 / 5.5 / grok-3 /
grok-4-0709 / grok-4-1-fast-reasoning return their expected reply
under `franklin start -p "say PONG" -m <model>`.

Out of scope (gateway side):
- gpt-5.3-codex returns 503 "Streaming not supported for Responses
  API models" — now visible thanks to (4), but the fix belongs in
  the gateway's Responses-API path.
- claude-sonnet-4.6 / zai/glm-5.1 intermittent fetch failed —
  cheetah and I both reproduced. Hypothesis: long first-token
  latency on reasoning models with Cloud Run / Cloudflare idle-out
  before any byte hits the response stream. Needs a keepalive
  SSE comment in the gateway's stream entry points (Bedrock + GLM +
  Anthropic transpile). (3) above will pinpoint the exact undici
  cause next time it fires.
---
 src/agent/llm.ts      | 107 +++++++++++++++++++++++++++++++++++-------
 src/commands/start.ts |   6 +++
 2 files changed, 95 insertions(+), 18 deletions(-)
diff --git a/src/agent/llm.ts b/src/agent/llm.ts
index 1caa5c0..fc5ca5f 100644
--- a/src/agent/llm.ts
+++ b/src/agent/llm.ts
@@ -192,6 +192,62 @@ function createModelTimeoutError(stage: 'request' | 'stream', model: string, tim
   return new Error(`Model ${stage} timed out after ${timeoutMs}ms on ${model}`);
 }
 
+/**
+ * Walk a tool-schema object and drop any `enum` whose entries are strings
+ * containing "/". Grok's request validator rejects such enums outright (see
+ * the call site for the verbatim upstream error). The model still sees the
+ * intended values via the tool's description text, so dropping the schema-
+ * level constraint is purely a compatibility shim — no behavioral loss.
+ */
+function stripSlashEnumsForGrok(node: unknown): unknown {
+  if (Array.isArray(node)) return node.map(stripSlashEnumsForGrok);
+  if (!node || typeof node !== 'object') return node;
+  const out: Record<string, unknown> = {};
+  for (const [k, v] of Object.entries(node as Record<string, unknown>)) {
+    if (
+      k === 'enum' &&
+      Array.isArray(v) &&
+      v.some((x) => typeof x === 'string' && x.includes('/'))
+    ) {
+      continue; // drop the constraint entirely
+    }
+    out[k] = stripSlashEnumsForGrok(v);
+  }
+  return out;
+}
+
+/**
+ * Wrap `fetch()` so that undici's opaque `TypeError: fetch failed` is
+ * replaced with the underlying network reason (ECONNRESET, UND_ERR_*,
+ * certificate, DNS, etc.). Without this, every transient connection blip
+ * surfaces to the user as "Network: fetch failed" with no way to tell
+ * whether it's their network, the gateway, or the upstream provider.
+ *
+ * Verified 2026-06-03: stress-testing claude-sonnet-4.6 reproduces
+ * intermittent "fetch failed" (cheetah's report on 3.25.0). With this
+ * helper the message becomes e.g. "fetch failed (UND_ERR_SOCKET: other
+ * side closed)" which is actionable.
+ */
+async function fetchWithUnwrappedCause(
+  url: string,
+  init: RequestInit,
+): Promise<Response> {
+  try {
+    return await fetch(url, init);
+  } catch (err) {
+    if (err instanceof Error && err.message === 'fetch failed' && err.cause) {
+      const cause = err.cause as { code?: string; message?: string; errno?: string };
+      const detail = cause.code || cause.errno || cause.message;
+      if (detail) {
+        const enriched = new Error(`fetch failed (${detail})`);
+        (enriched as Error & { cause?: unknown }).cause = err.cause;
+        throw enriched;
+      }
+    }
+    throw err;
+  }
+}
+
 async function withAbortableTimeout<T>(
   work: () => Promise<T>,
   controller: AbortController,
@@ -543,6 +599,21 @@ export class ModelClient {
       delete requestPayload['tool_choice'];
     }
 
+    // ── Grok: strip enum constraints containing "/" from tool schemas ────────
+    // Verified 2026-06-03 via Franklin repro: xAI's request validator hard-
+    // rejects any tool-schema enum string containing "/", e.g.
+    //   "[engine_imposed] /properties/endpoint/enum/0: '/' in 'enum' string
+    //    value is currently not supported"
+    // The Surf tools (and a few others) use endpoint paths like
+    // "market/ranking" as enum values to constrain the model's choice. The
+    // path list is also enumerated in each tool's description text, so the
+    // model still sees the legal values — only the schema-level constraint
+    // gets dropped. Other providers keep the enum unchanged.
+    if (request.model.startsWith('xai/') && Array.isArray(requestPayload['tools'])) {
+      const tools = requestPayload['tools'] as Record<string, unknown>[];
+      requestPayload['tools'] = tools.map((tool) => stripSlashEnumsForGrok(tool));
+    }
+
     // ── GLM-specific optimizations ───────────────────────────────────────────
     // GLM models work best with temperature=0.8 per official zai spec.
     // Enable thinking mode only for explicit reasoning variants (-thinking-).
@@ -611,20 +682,20 @@ export class ModelClient {
       requestPayload = applyAnthropicPromptCaching(requestPayload, request);
     }
 
-    // ── GPT-5 / Codex: use "developer" role for system prompt ──────────────
-    // OpenAI GPT models give stronger instruction-following weight to the
-    // "developer" role. Move the top-level system prompt into messages[0]
-    // with role "developer" instead of the default "system".
-    const isGPT5OrCodex = request.model.includes('gpt-5') || request.model.includes('codex');
-    if (isGPT5OrCodex && typeof request.system === 'string' && request.system.length > 0) {
-      const systemRole = 'developer';
-      const existingMessages = (requestPayload['messages'] as unknown[]) || [];
-      requestPayload['messages'] = [
-        { role: systemRole, content: request.system },
-        ...existingMessages,
-      ];
-      delete requestPayload['system'];
-    }
+    // ── No client-side system → developer role rewrite for GPT-5/Codex ─────
+    // We used to move the top-level `system` field into `messages[0]` with
+    // role "developer" for GPT-5/Codex (OpenAI docs say that role gets
+    // stronger instruction-following weight). But the BlockRun gateway
+    // speaks Anthropic Messages, which only accepts user|assistant in
+    // messages[] — the developer-role payload returns HTTP 400 from the
+    // gateway's protocol validator BEFORE it ever reaches OpenAI:
+    //   {"error":{"message":"messages.0.role: Invalid option: expected
+    //    one of \"user\"|\"assistant\""}}
+    // Verified 2026-06-03 via direct curl + Franklin repro: all GPT-5
+    // family models (mini/nano/5.4/5.5) were silently failing under
+    // headless -p mode. Keep `system` as a top-level field and let the
+    // gateway translate to whatever the upstream needs (it already knows
+    // gpt-5 expects developer role internally).
 
     const body = JSON.stringify(requestPayload);
 
@@ -652,7 +723,7 @@ export class ModelClient {
 
     try {
       let response = await withAbortableTimeout(
-        () => fetch(endpoint, {
+        () => fetchWithUnwrappedCause(endpoint, {
           method: 'POST',
           headers,
           body,
@@ -673,7 +744,7 @@ export class ModelClient {
         }
 
         response = await withAbortableTimeout(
-          () => fetch(endpoint, {
+          () => fetchWithUnwrappedCause(endpoint, {
             method: 'POST',
             headers: { ...headers, ...paymentHeader },
             body,
@@ -733,7 +804,7 @@ export class ModelClient {
             console.error(`[franklin] tool_choice rejected by upstream; retrying without it (model=${request.model})`);
           }
           response = await withAbortableTimeout(
-            () => fetch(endpoint, {
+            () => fetchWithUnwrappedCause(endpoint, {
               method: 'POST',
               headers,
               body: retryBody,
@@ -750,7 +821,7 @@ export class ModelClient {
               return;
             }
             response = await withAbortableTimeout(
-              () => fetch(endpoint, {
+              () => fetchWithUnwrappedCause(endpoint, {
                 method: 'POST',
                 headers: { ...headers, ...paymentHeader },
                 body: retryBody,
diff --git a/src/commands/start.ts b/src/commands/start.ts
index 5a4d2c8..e5f2638 100644
--- a/src/commands/start.ts
+++ b/src/commands/start.ts
@@ -456,6 +456,12 @@ async function runOneShot(agentConfig: AgentConfig, prompt: string): Promise<num
       process.stdout.write(event.text);
     } else if (event.kind === 'turn_done') {
       exitCode = oneShotExitCodeForTurnReason(event.reason);
+      // Without this, headless callers see exit 1 + zero stderr — impossible
+      // to triage. Verified 2026-06-03: GPT-5 family failing with HTTP 400
+      // from the gateway looked identical to a network timeout in `-p` mode.
+      if (event.reason !== 'completed' && event.error) {
+        process.stderr.write(`\n${event.error}\n`);
+      }
       process.stdout.write('\n');
     }
   });