From 9b3fe2cc1615c78781d513b8494fab4890f63509 Mon Sep 17 00:00:00 2001 From: Fsocietyhhh <1211904451@qq.com> Date: Wed, 3 Jun 2026 01:07:28 -0700 Subject: [PATCH] fix(agent): restore GPT-5 / Grok families and surface real errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPT-5 (mini/nano/5.4/5.5/codex) silently failed on the headless `-p` path; Grok 3 / 4-0709 / 4-1-fast-reasoning the same way once the Surf tools (#68) landed. Both were 400s from the gateway that the headless caller never printed. 1. Drop the GPT-5/Codex `system → role: developer` rewrite (llm.ts). We were generating { role: "developer" } messages for any GPT-5 request, but the gateway speaks Anthropic Messages — only user|assistant are accepted in messages[]. Verified live: curl -d '{"model":"openai/gpt-5-mini","messages":[{"role":"developer",...}]}' → HTTP 400 messages.0.role: Invalid option: expected one of "user"|"assistant" Keeping `system` as a top-level field lets the gateway translate to whatever the OpenAI upstream actually needs (it already does). 2. Strip slash-bearing enum constraints from tool schemas before sending to xAI (llm.ts). xAI's validator hard-rejects any enum string containing '/', e.g. Surf's endpoint enum ["market/ranking","market/futures",...] → 26 enum violations in one request. The path list is still in the tool description, so the model sees the legal values; only the schema constraint is dropped. 3. Wrap fetch() so undici's opaque "TypeError: fetch failed" carries its .cause.code (llm.ts). Without this, every transient gateway blip surfaces as `Network: fetch failed` with no way to triage whether it's the user's network, Cloudflare, or upstream. Now the message is e.g. `fetch failed (UND_ERR_SOCKET)`. 4. Forward turn-done errors to stderr in headless one-shot mode (commands/start.ts). Without this, `franklin -p ...` exits 1 with zero stderr — impossible to triage. This is how the 400s above were hidden in the first place. Repro (after fix): all of gpt-5-mini / -nano / 5.4 / 5.5 / grok-3 / grok-4-0709 / grok-4-1-fast-reasoning return their expected reply under `franklin start -p "say PONG" -m `. Out of scope (gateway side): - gpt-5.3-codex returns 503 "Streaming not supported for Responses API models" — now visible thanks to (4), but the fix belongs in the gateway's Responses-API path. - claude-sonnet-4.6 / zai/glm-5.1 intermittent fetch failed — cheetah and I both reproduced. Hypothesis: long first-token latency on reasoning models with Cloud Run / Cloudflare idle-out before any byte hits the response stream. Needs a keepalive SSE comment in the gateway's stream entry points (Bedrock + GLM + Anthropic transpile). (3) above will pinpoint the exact undici cause next time it fires. --- src/agent/llm.ts | 107 +++++++++++++++++++++++++++++++++++------- src/commands/start.ts | 6 +++ 2 files changed, 95 insertions(+), 18 deletions(-) diff --git a/src/agent/llm.ts b/src/agent/llm.ts index 1caa5c0..fc5ca5f 100644 --- a/src/agent/llm.ts +++ b/src/agent/llm.ts @@ -192,6 +192,62 @@ function createModelTimeoutError(stage: 'request' | 'stream', model: string, tim return new Error(`Model ${stage} timed out after ${timeoutMs}ms on ${model}`); } +/** + * Walk a tool-schema object and drop any `enum` whose entries are strings + * containing "/". Grok's request validator rejects such enums outright (see + * the call site for the verbatim upstream error). The model still sees the + * intended values via the tool's description text, so dropping the schema- + * level constraint is purely a compatibility shim — no behavioral loss. + */ +function stripSlashEnumsForGrok(node: unknown): unknown { + if (Array.isArray(node)) return node.map(stripSlashEnumsForGrok); + if (!node || typeof node !== 'object') return node; + const out: Record = {}; + for (const [k, v] of Object.entries(node as Record)) { + if ( + k === 'enum' && + Array.isArray(v) && + v.some((x) => typeof x === 'string' && x.includes('/')) + ) { + continue; // drop the constraint entirely + } + out[k] = stripSlashEnumsForGrok(v); + } + return out; +} + +/** + * Wrap `fetch()` so that undici's opaque `TypeError: fetch failed` is + * replaced with the underlying network reason (ECONNRESET, UND_ERR_*, + * certificate, DNS, etc.). Without this, every transient connection blip + * surfaces to the user as "Network: fetch failed" with no way to tell + * whether it's their network, the gateway, or the upstream provider. + * + * Verified 2026-06-03: stress-testing claude-sonnet-4.6 reproduces + * intermittent "fetch failed" (cheetah's report on 3.25.0). With this + * helper the message becomes e.g. "fetch failed (UND_ERR_SOCKET: other + * side closed)" which is actionable. + */ +async function fetchWithUnwrappedCause( + url: string, + init: RequestInit, +): Promise { + try { + return await fetch(url, init); + } catch (err) { + if (err instanceof Error && err.message === 'fetch failed' && err.cause) { + const cause = err.cause as { code?: string; message?: string; errno?: string }; + const detail = cause.code || cause.errno || cause.message; + if (detail) { + const enriched = new Error(`fetch failed (${detail})`); + (enriched as Error & { cause?: unknown }).cause = err.cause; + throw enriched; + } + } + throw err; + } +} + async function withAbortableTimeout( work: () => Promise, controller: AbortController, @@ -543,6 +599,21 @@ export class ModelClient { delete requestPayload['tool_choice']; } + // ── Grok: strip enum constraints containing "/" from tool schemas ──────── + // Verified 2026-06-03 via Franklin repro: xAI's request validator hard- + // rejects any tool-schema enum string containing "/", e.g. + // "[engine_imposed] /properties/endpoint/enum/0: '/' in 'enum' string + // value is currently not supported" + // The Surf tools (and a few others) use endpoint paths like + // "market/ranking" as enum values to constrain the model's choice. The + // path list is also enumerated in each tool's description text, so the + // model still sees the legal values — only the schema-level constraint + // gets dropped. Other providers keep the enum unchanged. + if (request.model.startsWith('xai/') && Array.isArray(requestPayload['tools'])) { + const tools = requestPayload['tools'] as Record[]; + requestPayload['tools'] = tools.map((tool) => stripSlashEnumsForGrok(tool)); + } + // ── GLM-specific optimizations ─────────────────────────────────────────── // GLM models work best with temperature=0.8 per official zai spec. // Enable thinking mode only for explicit reasoning variants (-thinking-). @@ -611,20 +682,20 @@ export class ModelClient { requestPayload = applyAnthropicPromptCaching(requestPayload, request); } - // ── GPT-5 / Codex: use "developer" role for system prompt ────────────── - // OpenAI GPT models give stronger instruction-following weight to the - // "developer" role. Move the top-level system prompt into messages[0] - // with role "developer" instead of the default "system". - const isGPT5OrCodex = request.model.includes('gpt-5') || request.model.includes('codex'); - if (isGPT5OrCodex && typeof request.system === 'string' && request.system.length > 0) { - const systemRole = 'developer'; - const existingMessages = (requestPayload['messages'] as unknown[]) || []; - requestPayload['messages'] = [ - { role: systemRole, content: request.system }, - ...existingMessages, - ]; - delete requestPayload['system']; - } + // ── No client-side system → developer role rewrite for GPT-5/Codex ───── + // We used to move the top-level `system` field into `messages[0]` with + // role "developer" for GPT-5/Codex (OpenAI docs say that role gets + // stronger instruction-following weight). But the BlockRun gateway + // speaks Anthropic Messages, which only accepts user|assistant in + // messages[] — the developer-role payload returns HTTP 400 from the + // gateway's protocol validator BEFORE it ever reaches OpenAI: + // {"error":{"message":"messages.0.role: Invalid option: expected + // one of \"user\"|\"assistant\""}} + // Verified 2026-06-03 via direct curl + Franklin repro: all GPT-5 + // family models (mini/nano/5.4/5.5) were silently failing under + // headless -p mode. Keep `system` as a top-level field and let the + // gateway translate to whatever the upstream needs (it already knows + // gpt-5 expects developer role internally). const body = JSON.stringify(requestPayload); @@ -652,7 +723,7 @@ export class ModelClient { try { let response = await withAbortableTimeout( - () => fetch(endpoint, { + () => fetchWithUnwrappedCause(endpoint, { method: 'POST', headers, body, @@ -673,7 +744,7 @@ export class ModelClient { } response = await withAbortableTimeout( - () => fetch(endpoint, { + () => fetchWithUnwrappedCause(endpoint, { method: 'POST', headers: { ...headers, ...paymentHeader }, body, @@ -733,7 +804,7 @@ export class ModelClient { console.error(`[franklin] tool_choice rejected by upstream; retrying without it (model=${request.model})`); } response = await withAbortableTimeout( - () => fetch(endpoint, { + () => fetchWithUnwrappedCause(endpoint, { method: 'POST', headers, body: retryBody, @@ -750,7 +821,7 @@ export class ModelClient { return; } response = await withAbortableTimeout( - () => fetch(endpoint, { + () => fetchWithUnwrappedCause(endpoint, { method: 'POST', headers: { ...headers, ...paymentHeader }, body: retryBody, diff --git a/src/commands/start.ts b/src/commands/start.ts index 5a4d2c8..e5f2638 100644 --- a/src/commands/start.ts +++ b/src/commands/start.ts @@ -456,6 +456,12 @@ async function runOneShot(agentConfig: AgentConfig, prompt: string): Promise