From 45878f47f03aa8231f24f1facb56346326028a45 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 18:06:59 -0400
Subject: [PATCH 01/27] =?UTF-8?q?spec:=20session=20pooling=20TRD=20rev=202?=
 =?UTF-8?q?=20=E2=80=94=20resolve=20all=2014=20sub-agent=20findings?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses 3 critical (request serialization, orphan reclamation, context
accumulation), 5 major (process death, flex zone, disconnect, auth, unknown
model), and 6 minor (DST, stats, testing, commits, logging, caps) findings
from Opus architectural review.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 specs/session-pooling.spec.md | 457 ++++++++++++++++++++++++++++++++++
 1 file changed, 457 insertions(+)
 create mode 100644 specs/session-pooling.spec.md

diff --git a/specs/session-pooling.spec.md b/specs/session-pooling.spec.md
new file mode 100644
index 0000000..49685b4
--- /dev/null
+++ b/specs/session-pooling.spec.md
@@ -0,0 +1,457 @@
+# Session Pooling — Spec (Tier 1) — Rev 2
+
+**Rev 2 (2026-03-21):** Addresses 14 findings from Opus sub-agent architectural review. 3 critical, 5 major, 6 minor. All resolved.
+
+## Purpose
+
+Replace the subprocess-per-request architecture in the Claude Max API proxy with a session-aware process pool that locks warm CLI processes to OpenClaw session keys, eliminating 3–10s spawn overhead per request while preventing cross-agent context contamination.
+
+## Context
+
+The proxy currently spawns a new `claude --print` subprocess for every API request. Each process pays the full startup cost (CLI initialization, auth handshake), then dies after one response. With 20 agent sessions and ~5 concurrent, this overhead is the dominant latency contributor.
+
+The prototype (`src/subprocess/pool.ts`, `src/server/standalone-pool.ts`) proved the concept: 33% faster single requests, clean concurrent handling, queue draining. But it uses a shared stateless pool — any process serves any agent. Testing confirmed that CLI processes in `--input-format stream-json` mode accumulate context across messages (the CLI treats sequential stdin messages as one continuous conversation). This means a shared pool causes cross-agent context contamination and unbounded context growth.
+
+The production design locks each CLI process to a specific OpenClaw session key (which encodes agent + channel/thread). No cross-contamination. Context accumulation is bounded to one session's traffic. A nightly sweep recycles idle processes.
+
+## Architecture
+
+### Components
+
+```
+┌──────────────────────────────────────────────────┐
+│  routes.ts (existing)                            │
+│  handleChatCompletions()                         │
+│    ├─ reads x-openclaw-session-key header        │
+│    ├─ reads model from request body              │
+│    └─ calls SessionPoolRouter.execute()          │
+└──────────────┬───────────────────────────────────┘
+               │
+┌──────────────▼───────────────────────────────────┐
+│  SessionPoolRouter (NEW — src/subprocess/router.ts) │
+│    ├─ lockedSessions: Map<sessionKey, PooledProcess> │
+│    ├─ warmPool: Map<model, PooledProcess[]>          │
+│    │   ├─ opus pool (default 6)                      │
+│    │   └─ sonnet pool (default 4)                    │
+│    ├─ execute(prompt, model, sessionKey) → Emitter   │
+│    │   1. Check lockedSessions for sessionKey        │
+│    │   2. If found + idle → route to locked process  │
+│    │   3. If found + busy → enqueue on per-process   │
+│    │      request queue (max depth: 3)               │
+│    │   4. If not found → claim from warmPool[model]  │
+│    │   5. If warmPool empty → spawn cold process     │
+│    │   6. Lock claimed/spawned process to sessionKey  │
+│    ├─ sweep() — 3 AM ET nightly                      │
+│    │   Recycle locked processes idle > 2 hours        │
+│    │   Recycle locked processes with requestCount > 50│
+│    │   Respawn recycled into warmPool                 │
+│    │   Refill warmPool to configured size             │
+│    │   Enforce MAX_TOTAL_PROCESSES cap                │
+│    └─ shutdown() — graceful teardown                  │
+└──────────────────────────────────────────────────┘
+               │
+┌──────────────▼───────────────────────────────────┐
+│  PooledProcess (enhanced from prototype pool.ts) │
+│    ├─ CLI process (--input-format stream-json)   │
+│    ├─ model: "opus" | "sonnet"                   │
+│    ├─ lockedTo: sessionKey | null                │
+│    ├─ agentChannel: string | null (lineage key)  │
+│    ├─ lastRequestAt: timestamp                   │
+│    ├─ spawnedAt: timestamp                       │
+│    ├─ requestCount: number                       │
+│    ├─ state: "idle" | "busy" | "recycling"       │
+│    └─ requestQueue: Array<PendingRequest>         │
+└──────────────────────────────────────────────────┘
+```
+
+### Request Flow
+
+```
+1. OpenClaw gateway → POST /v1/chat/completions
+   Headers: x-openclaw-session-key: "agent:scope:discord:channel:1475832162648461316"
+   Body: { model: "claude-opus-4", messages: [...], stream: true }
+
+2. routes.ts extracts sessionKey from header, model from body
+
+3. SessionPoolRouter.execute(prompt, "opus", sessionKey):
+   a) lockedSessions.has(sessionKey)?
+      YES + idle → write to stdin, mark busy
+      YES + busy → enqueue on process's requestQueue (max 3, reject with 429 if full)
+      NO  → warmPool["opus"].pop() → lock to sessionKey
+            if warmPool empty → spawn new process (3-10s cold start, one-time)
+            if MAX_TOTAL_PROCESSES reached → fall back to ClaudeSubprocess
+
+4. Write prompt to process stdin as stream-json message
+5. Read response from process stdout, emit events to caller
+6. Mark process idle, drain requestQueue if non-empty (next queued request starts)
+7. Process stays locked to sessionKey for future requests
+```
+
+### Per-Process Request Serialization (Critical — Finding #1)
+
+Each `PooledProcess` has a `requestQueue` (FIFO, max depth 3). The CLI's stdin pipe can only handle one message at a time — interleaving would corrupt both responses.
+
+**When a request arrives for a locked-busy process:**
+1. If `requestQueue.length < 3` → enqueue, return a pending emitter
+2. If `requestQueue.length >= 3` → reject with HTTP 429 "Too Many Requests" + `Retry-After: 5`
+3. When the active request completes → dequeue next, write to stdin, emit to its caller
+
+**Why max depth 3:** A queue deeper than 3 means the session is being hammered faster than the model can respond. Backpressure via 429 is safer than unbounded queueing. The gateway will retry.
+
+### Orphan Reclamation via Lineage Tracking (Critical — Finding #2)
+
+Session resets generate a new session key. The old process is orphaned. Without lineage tracking, orphans accumulate until the 3 AM sweep — 20+ manual resets in a day = 20+ leaked processes.
+
+**Solution:** Each `PooledProcess` stores `agentChannel` — extracted from the session key (e.g., `agent:scope:discord:channel:1475832162648461316` → `scope:discord:channel:1475832162648461316`). When a new session key arrives:
+
+1. Parse the `agentChannel` from the new key
+2. Scan `lockedSessions` for any process with the same `agentChannel` but a *different* session key
+3. If found → that process is orphaned. If idle: kill immediately, respawn into warm pool. If busy: mark for reclamation after current request completes.
+
+This catches resets instantly without waiting for the nightly sweep.
+
+### Context Accumulation Threshold (Critical — Finding #3)
+
+Each request adds its full prompt to the CLI's accumulated context. After 50 requests, the CLI could have 500K+ tokens of accumulated noise, risking context window overflow and degraded responses.
+
+**Rule:** When a process completes a request and `requestCount > 50`:
+- If `requestQueue` is empty → recycle immediately (kill, respawn fresh into warm pool, clear the session lock — next request from this key claims a new process)
+- If `requestQueue` is non-empty → drain the queue first, then recycle when empty
+
+The threshold is configurable via `POOL_MAX_REQUESTS_PER_PROCESS` (default 50).
+
+### Nightly Sweep (3:00 AM ET)
+
+```
+For each locked process:
+  If lastRequestAt < (now - 2 hours) OR requestCount > 50:
+    If state == "busy" → skip (don't interrupt active work)
+    Kill process
+    Remove from lockedSessions
+    Spawn fresh process into warmPool (if below configured size)
+
+Refill warmPool to configured sizes:
+  opus:   max(0, POOL_OPUS_SIZE - currentOpusWarm)  new processes
+  sonnet: max(0, POOL_SONNET_SIZE - currentSonnetWarm) new processes
+
+Enforce MAX_TOTAL_PROCESSES:
+  If total(locked + warm) > MAX_TOTAL_PROCESSES:
+    Do NOT spawn new warm processes — let the pool recover naturally
+    Log a warning with current counts
+```
+
+**DST Handling (Finding #9):** Use `Intl.DateTimeFormat` with `timeZone: 'America/New_York'` to resolve the current ET hour, not a fixed UTC offset. This handles EST/EDT transitions correctly. Alternatively, use `node-cron` with timezone support (`{ timezone: 'America/New_York' }`).
+
+### Session Reset Handling
+
+When an agent's session is reset in OpenClaw, the gateway generates a new `sessionId`, which changes the `x-openclaw-session-key`. The proxy sees a new key → lineage tracking detects the orphan → orphan is reclaimed immediately (if idle) or after its current request (if busy) → new key claims a fresh process from the warm pool.
+
+### Process Death Recovery (Major — Finding #4)
+
+When a CLI process exits unexpectedly:
+
+1. **If idle:** Remove from `lockedSessions` (if locked) or from `warmPool`. Spawn replacement into warm pool.
+2. **If busy (mid-request):**
+   - The active request's emitter receives an error event
+   - Remove the dead process from `lockedSessions`
+   - **Do NOT auto-retry** — the gateway handles retries at the HTTP level
+   - If the process had queued requests, reject them with 503 + `Retry-After: 3`
+   - Spawn a replacement into the warm pool
+
+The key invariant: a dead process is never left in `lockedSessions`. The `exit` handler atomically removes the mapping and spawns a replacement.
+
+### Client Disconnect Handling (Major — Finding #6)
+
+When a client disconnects mid-stream:
+- Detach the response emitter (stop sending to the dead connection)
+- Let the CLI process finish generating its response (the flat-rate subscription means no cost for wasted inference)
+- The process returns to `idle` state, still locked to its session key
+- **Context divergence:** The CLI now has a response in its accumulated context that the gateway never received. This is harmless because OpenClaw sends the full messages array with every request — the gateway's context is authoritative, and the CLI's accumulated context is noise that gets recycled away eventually.
+
+### Auth Token Expiration (Major — Finding #7)
+
+Long-lived CLI processes (up to 24 hours between sweeps) may outlive their auth token.
+
+**Detection:** If a CLI process returns an auth error (non-zero exit or error message containing "auth", "unauthorized", "token expired"):
+1. Mark the process as dead — trigger the process death recovery flow
+2. Log the auth failure for monitoring
+3. The replacement process will authenticate fresh on spawn
+
+**No preemptive refresh needed.** The Claude CLI handles token refresh internally for most cases. This catch handles the edge case where it doesn't.
+
+### Unknown Model Routing (Major — Finding #8)
+
+If a request has a valid `x-openclaw-session-key` but requests a model not in any pool (e.g., `haiku`):
+- **Do not attempt pooled routing** — there's no pool for that model
+- Fall back to `ClaudeSubprocess` (subprocess-per-request)
+- Log the model name for tracking — if it appears frequently, consider adding a pool
+
+### Model Pool Flex Zone (Major — Finding #5)
+
+Static 6/4 opus/sonnet split can starve one model if usage patterns shift. The warm pool sizing is a *target*, not a hard partition:
+
+- On startup, spawn `POOL_OPUS_SIZE` opus + `POOL_SONNET_SIZE` sonnet processes
+- If the opus warm pool is empty but the sonnet pool has excess (> `POOL_SONNET_SIZE`): **do not cross-assign** (different model). Spawn a cold opus process.
+- If total processes < `MAX_TOTAL_PROCESSES`, cold spawns are always allowed regardless of per-model targets
+- The 3 AM sweep refills to configured targets, naturally rebalancing
+
+This is simpler than a dynamic flex zone and maintains the invariant that model pools are pure. The `MAX_TOTAL_PROCESSES` cap (default 30) prevents runaway spawning.
+
+## Invariants (non-negotiable)
+
+- A locked process serves ONLY the session key it is locked to. No exceptions. No "borrowing" idle locked processes.
+- A process serves ONLY one request at a time on its stdin pipe. Concurrent requests queue on the per-process request queue.
+- The warm pool is partitioned by model. An opus request never gets a sonnet process or vice versa.
+- If the warm pool is empty and no process is available, the proxy spawns a cold subprocess (same as current behavior). It NEVER fails a request due to pool exhaustion — it degrades to cold-start or subprocess fallback.
+- Total process count (locked + warm) must not exceed `MAX_TOTAL_PROCESSES` (default 30). Beyond this, new requests fall back to `ClaudeSubprocess`.
+- The production port (3456) and health endpoint format remain unchanged. Existing OpenClaw provider config works without modification.
+- `ClaudeSubprocess` (subprocess-per-request) is retained in the codebase as fallback for unknown models and headerless requests.
+- The nightly sweep runs at 3:00 AM America/New_York (DST-aware), not UTC.
+
+## Forbidden Patterns
+
+- **No cross-session routing:** A process locked to session A must never serve session B, even if A is idle and B is queued. Claim a new process instead.
+- **No model mixing within a process:** A process spawned with `--model opus` must never receive a sonnet request. Model is baked at spawn time.
+- **No eager recycling of active sessions:** The sweep recycles only processes idle > 2 hours OR exceeding the request count threshold. A process that served a request 30 minutes ago stays locked, even during the 3 AM sweep (unless it's over the request count limit).
+- **No shared mutable state between pool and routes:** The router exposes `execute()`, `stats()`, and `sweep()` only. Routes do not directly manipulate pool internals.
+- **No removal of the existing subprocess manager:** `ClaudeSubprocess` stays for fallback and backward compatibility.
+- **No concurrent writes to a process's stdin:** The per-process request queue serializes all writes. Any code path that bypasses the queue is a critical bug.
+
+## Modules
+
+### Module 1: SessionPoolRouter (`src/subprocess/router.ts`)
+
+**Status:** Planning
+**Owner:** Max
+**Dependencies:** Existing pool.ts (to be refactored), existing openai-to-cli.ts adapter
+
+**Deliverables:**
+- `SessionPoolRouter` class with `execute()`, `stats()`, `sweep()`, `shutdown()`
+- Per-model warm pools (opus, sonnet) with configurable sizes
+- Session-key locking with `lockedSessions` map
+- Per-process FIFO request queue (max depth 3) with 429 backpressure
+- Lineage tracking (`agentChannel`) for orphan detection on session reset
+- Context accumulation guard: recycle process when `requestCount > POOL_MAX_REQUESTS_PER_PROCESS`
+- Cold-start fallback when warm pool is empty (subject to `MAX_TOTAL_PROCESSES` cap)
+- `ClaudeSubprocess` fallback when `MAX_TOTAL_PROCESSES` is reached or model is unknown
+- Nightly sweep logic (idle > 2 hours OR requestCount > 50 → recycle → refill)
+- Process health monitoring: auto-respawn on unexpected death with atomic `lockedSessions` cleanup
+- Auth error detection: treat auth failures as process death, trigger recovery flow
+
+**Acceptance Criteria:**
+- [ ] Requests with the same `x-openclaw-session-key` always route to the same process
+- [ ] Requests with different session keys never share a process
+- [ ] Two rapid requests on the same session key are serialized — second waits for first to complete, no stdin interleaving
+- [ ] Fourth request on a busy process (queue depth 3) returns HTTP 429 with `Retry-After: 5`
+- [ ] When warm pool is empty, a cold process is spawned and the request succeeds (no failure)
+- [ ] When `MAX_TOTAL_PROCESSES` is reached, new session requests fall back to `ClaudeSubprocess`
+- [ ] Opus requests only go to opus processes; sonnet to sonnet
+- [ ] Unknown model (e.g., haiku) with session key header falls back to `ClaudeSubprocess`
+- [ ] Session reset (new key, same agent+channel) immediately orphan-reclaims the old process
+- [ ] Process with `requestCount > 50` is recycled on next idle transition (queue drained first)
+- [ ] `stats()` returns: `{ total, locked, warm: { opus, sonnet }, busy, queued, orphansReclaimed, totalRequests, processRecycles }`
+- [ ] Sweep correctly identifies and recycles processes idle > 2 hours OR requestCount > 50
+- [ ] Sweep skips busy processes (never interrupts active work)
+- [ ] Sweep refills warm pool to configured size after recycling (respecting `MAX_TOTAL_PROCESSES`)
+- [ ] Process death triggers atomic cleanup: remove from `lockedSessions`, reject queued requests with 503, spawn replacement
+- [ ] Auth errors (exit code + error message matching) trigger process death recovery
+- [ ] Graceful shutdown kills all processes and drains/rejects queued requests
+
+### Module 2: Route Integration (`src/server/routes.ts`)
+
+**Status:** Planning
+**Owner:** Max
+**Dependencies:** Module 1
+
+**Deliverables:**
+- Extract `x-openclaw-session-key` and `x-openclaw-agent-id` from request headers
+- Route through `SessionPoolRouter.execute()` instead of `new ClaudeSubprocess()`
+- Fallback to `ClaudeSubprocess` if session key header is missing (backward compat)
+- Client disconnect handling: detach emitter, let process finish, return to locked-idle state
+- Structured logging: log session key, model, process PID, latency, queue depth per request
+
+**Acceptance Criteria:**
+- [ ] Requests with `x-openclaw-session-key` header use pooled routing
+- [ ] Requests without the header fall back to subprocess-per-request (existing behavior)
+- [ ] Streaming and non-streaming both work through the pool
+- [ ] Client disconnect does not kill the pooled process — it completes and returns to locked-idle
+- [ ] Health endpoint includes pool stats from `stats()`
+- [ ] No changes to the response format (OpenAI-compatible output unchanged)
+- [ ] Each request logs: `{ sessionKey, model, processPid, latencyMs, queueDepth, cacheHit: "locked"|"warm"|"cold"|"fallback" }`
+
+### Module 3: Server Startup & Sweep Scheduling (`src/server/standalone.ts`)
+
+**Status:** Planning
+**Owner:** Max
+**Dependencies:** Module 1
+
+**Deliverables:**
+- Initialize `SessionPoolRouter` at server startup with configured pool sizes
+- Schedule nightly sweep at 3:00 AM ET using `node-cron` with `timezone: 'America/New_York'` (DST-aware)
+- Environment variable configuration:
+  - `POOL_OPUS_SIZE` (default 6) — warm opus processes
+  - `POOL_SONNET_SIZE` (default 4) — warm sonnet processes
+  - `POOL_MAX_REQUESTS_PER_PROCESS` (default 50) — context accumulation threshold
+  - `MAX_TOTAL_PROCESSES` (default 30) — hard cap on all pool processes (locked + warm)
+  - `SWEEP_HOUR` (default 3) — hour in ET for nightly sweep
+  - `SWEEP_IDLE_THRESHOLD_MS` (default 7200000) — idle time before sweep recycles
+  - `POOL_REQUEST_QUEUE_DEPTH` (default 3) — per-process queue depth
+- Graceful shutdown integration (SIGTERM/SIGINT)
+
+**Acceptance Criteria:**
+- [ ] Pool initializes with configured sizes on server start
+- [ ] Sweep runs at 3:00 AM ET daily, correctly handling EST↔EDT transitions
+- [ ] Pool sizes configurable via env vars without code changes
+- [ ] Server startup logs pool configuration and initial stats
+- [ ] Graceful shutdown waits for in-flight requests (30s timeout) before killing processes
+- [ ] Queued requests are rejected with 503 during shutdown
+
+### Module 4: Prototype Cleanup
+
+**Status:** Planning
+**Owner:** Max
+**Dependencies:** Modules 1-3 merged and verified
+
+**Deliverables:**
+- Remove `src/server/standalone-pool.ts` (prototype — superseded by production integration)
+- Refactor `src/subprocess/pool.ts` into the router or remove if fully superseded
+- Update CLAUDE.md with new architecture documentation
+- **Separate commit from Modules 1-3** (allows clean revert if prototype removal causes issues)
+
+**Acceptance Criteria:**
+- [ ] No orphaned prototype files in the codebase
+- [ ] CLAUDE.md reflects the pooled architecture
+- [ ] Build succeeds with no unused imports or dead code
+- [ ] Committed separately from the main pooling implementation
+
+## Validation Criteria
+
+What "done" looks like — these must be verified by someone other than the builder:
+
+1. **Functional:** Send 5 requests from different session keys → each gets a dedicated process. Send a 6th from a key that already has a process → routed to the existing one (verify via logs or health endpoint).
+2. **Serialization:** Send 2 rapid requests on the same session key → both succeed, responses are correct and not interleaved. Verify via log timestamps that the second waited for the first.
+3. **Backpressure:** Send 5 rapid requests on the same session key (queue depth 3) → first executes, 3 queue, 5th returns 429.
+4. **Concurrency:** Send 3 simultaneous requests from different keys → all served in parallel with no queueing.
+5. **Cold start:** Send a request after all warm processes are claimed → new process spawned, request succeeds, latency logged.
+6. **Overflow cap:** Fill the pool to `MAX_TOTAL_PROCESSES`, send another request with a new session key → falls back to `ClaudeSubprocess`, still succeeds.
+7. **Sweep:** Manually trigger sweep → processes idle > 2 hours recycled, processes with requestCount > 50 recycled, active processes retained. Warm pool refilled.
+8. **Session reset:** Send request with key A, then send with key A' (different session ID, same agent+channel) → gets a fresh process. Key A's process reclaimed immediately.
+9. **Orphan reclamation:** Simulate 5 rapid session resets from the same agent → verify no orphan accumulation (all old processes reclaimed, not leaked until 3 AM).
+10. **Process death:** Kill a locked process's PID externally → verify: removed from lockedSessions, replacement spawned, next request on that key gets a new process.
+11. **Client disconnect:** Start a streaming request, disconnect mid-stream → process not killed, returns to locked-idle state, next request on same key works.
+12. **Context degradation:** Send 60 requests through the same locked process → verify the process is recycled after request 50 (on next idle). Verify subsequent request gets a fresh process.
+13. **Backward compat:** Send a request without `x-openclaw-session-key` header → falls back to subprocess-per-request. No error.
+14. **Unknown model:** Send a request with session key + model "haiku" → falls back to `ClaudeSubprocess`. No error.
+15. **Health:** `/health` endpoint returns pool stats including locked/warm/busy/queued/orphansReclaimed counts.
+16. **Build:** `npm run build` succeeds with no errors or warnings.
+
+## Observability (Finding #12)
+
+### Logging
+
+Every request logs a structured JSON line:
+```json
+{
+  "ts": "2026-03-21T17:30:00Z",
+  "event": "request",
+  "sessionKey": "agent:scope:discord:channel:...",
+  "model": "opus",
+  "pid": 12345,
+  "latencyMs": 1770,
+  "queueDepth": 0,
+  "routeType": "locked|warm|cold|fallback",
+  "requestCount": 12
+}
+```
+
+Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also logged as structured JSON.
+
+### Health Endpoint
+
+`GET /health` returns:
+```json
+{
+  "status": "ok",
+  "provider": "claude-code-cli",
+  "pool": {
+    "total": 12,
+    "locked": 7,
+    "warm": { "opus": 3, "sonnet": 2 },
+    "busy": 2,
+    "queued": 0,
+    "maxTotal": 30,
+    "orphansReclaimed": 3,
+    "totalRequests": 142,
+    "processRecycles": 5,
+    "uptime": 43200
+  }
+}
+```
+
+## Decision Log
+
+| Date | Decision | Rationale | Alternatives Considered |
+|------|----------|-----------|------------------------|
+| 2026-03-21 | Per-session-key locking over shared stateless pool | CLI accumulates context in stream-json mode (empirically verified). Shared pool causes cross-agent contamination. Locking prevents this. | Shared pool + aggressive recycling — rejected because it doesn't prevent cross-contamination within the recycling window |
+| 2026-03-21 | Per-model pools over single mixed pool | CLI `--model` flag is set at spawn time, cannot be changed per-request in stream-json mode | Single pool with model flag per message — not supported by CLI |
+| 2026-03-21 | 3 AM ET sweep over idle timeout | Simpler rules (one daily check vs. continuous timeout tracking). Session resets handle intra-day cleanup. User controls context freshness via manual reset. | Rolling idle timeout (30 min) — adds complexity, may recycle processes the user wants kept warm |
+| 2026-03-21 | 6 opus + 4 sonnet pool sizing | Matches observed usage pattern: ~70% opus, ~30% sonnet. 10 total covers expected 5-concurrent peak with buffer. Configurable via env vars. | 5+5 — doesn't match actual model distribution |
+| 2026-03-21 | Retain ClaudeSubprocess as fallback | Backward compatibility for requests without session key header. Also serves unknown models (haiku, future models) not in the pool. | Remove entirely — breaks headerless requests, limits future model support |
+| 2026-03-21 | Per-process request queue (max 3) + 429 backpressure | **Rev 2 — Finding #1.** CLI stdin pipe cannot handle concurrent writes. Serialization via FIFO queue prevents data corruption. Max depth 3 with 429 prevents unbounded memory growth. | Reject immediately if busy (too aggressive — normal gateway retry creates brief bursts). Unbounded queue (memory risk). |
+| 2026-03-21 | Lineage-based orphan reclamation | **Rev 2 — Finding #2.** Session resets orphan processes. Without immediate reclamation, 20+ resets/day = 20+ leaked processes until 3 AM. Lineage key (agent+channel) detects orphans on the spot. | Wait for 3 AM sweep (unacceptable leak rate). Timer-based idle reclamation (adds complexity, doesn't catch rapid resets). |
+| 2026-03-21 | Request count threshold (50) for recycling | **Rev 2 — Finding #3.** CLI accumulates context from every request. 50 requests ≈ 100-200K tokens of noise. Recycling at this threshold prevents context window overflow while staying well within safe bounds for a day's active session. | No threshold, rely on manual resets only (risky for agents that never reset). Lower threshold like 20 (too aggressive, causes unnecessary cold starts). |
+| 2026-03-21 | MAX_TOTAL_PROCESSES cap (30) | **Rev 2 — Finding #12.** Prevents runaway process spawning from consuming all system memory. 30 processes × 200MB = 6GB worst case — well within 128GB. Beyond 30, requests fall back to ClaudeSubprocess. | No cap (dangerous). Per-model cap only (doesn't prevent total runaway). |
+
+## Review History
+
+| Rev | Date | Reviewer | Findings | Status |
+|-----|------|----------|----------|--------|
+| 1 | 2026-03-21 | Opus sub-agent | 14 (3C/5M/6m) | All resolved in Rev 2 |
+
+### Finding Resolution Index
+
+| # | Severity | Finding | Resolution |
+|---|----------|---------|------------|
+| 1 | CRITICAL | No per-process request serialization | Per-process FIFO request queue (max 3) + 429 backpressure. See "Per-Process Request Serialization" section. |
+| 2 | CRITICAL | Orphaned processes leak until 3 AM | Lineage tracking via `agentChannel` field. Immediate reclamation on session reset. See "Orphan Reclamation" section. |
+| 3 | CRITICAL | No context accumulation mitigation | `requestCount > 50` triggers recycling on next idle. Configurable via env var. See "Context Accumulation Threshold" section. |
+| 4 | MAJOR | Process death while locked — race condition | Atomic cleanup in `exit` handler: remove from lockedSessions, reject queued requests with 503, spawn replacement. See "Process Death Recovery" section. |
+| 5 | MAJOR | Static pool sizing can starve one model | Pools are targets, not hard partitions. Cold spawns allowed up to MAX_TOTAL_PROCESSES. See "Model Pool Flex Zone" section. |
+| 6 | MAJOR | Client disconnect leaves CLI context diverged | Documented as harmless — OpenClaw sends full context every request, CLI accumulation is noise. See "Client Disconnect Handling" section. |
+| 7 | MAJOR | No auth expiration handling | Auth errors treated as process death, triggering standard recovery flow. See "Auth Token Expiration" section. |
+| 8 | MAJOR | Unknown model + session key — ambiguous routing | Unknown models bypass pool entirely, fall back to ClaudeSubprocess. See "Unknown Model Routing" section. |
+| 9 | MINOR | DST handling for sweep | Use `node-cron` with `timezone: 'America/New_York'` or `Intl.DateTimeFormat`. Specified in "Nightly Sweep" section. |
+| 10 | MINOR | `stats()` interface not defined | Full interface specified in "Health Endpoint" section under Observability. |
+| 11 | MINOR | No validation test for context degradation | Added as Validation Criterion #12: send 60 requests, verify recycling at 50. |
+| 12 | MINOR | Prototype cleanup should be separate commit | Module 4 now specifies separate commit. |
+| 13 | MINOR | No logging/observability spec | Added "Observability" section with structured logging and health endpoint schemas. |
+| 14 | MINOR | Pool size conflates warm vs. total | Added `MAX_TOTAL_PROCESSES` (default 30) as hard cap distinct from per-model warm targets. |
+
+## Pre-Change Impact Statement
+
+**Risk:** 🟡 Yellow
+
+**Impact on dependent systems:**
+- **OpenClaw gateway:** No changes required. Gateway already sends `x-openclaw-session-key` and `x-openclaw-agent-id` headers. Pool is transparent — same endpoint, same response format.
+- **All 20+ agent sessions:** Transparent improvement. Agents see faster responses, no behavioral change.
+- **systemd service:** No changes to the service file. Same binary, same port.
+- **Monitoring:** Health endpoint gains pool stats — additive, not breaking.
+
+**What could break:**
+- If the session key header extraction is wrong, all requests fall through to subprocess (safe degradation, but no pooling benefit)
+- If pool process dies mid-request and respawn races with the next request on the same key, the next request could get a cold start instead of its locked process (handled by atomic cleanup in Finding #4 resolution)
+- If the 3 AM sweep timezone calculation is wrong, sweep runs at wrong time (consequences are minor — processes accumulate a few extra hours of context; mitigated by DST-aware scheduling in Finding #9 resolution)
+- If `MAX_TOTAL_PROCESSES` is set too low, more requests fall back to subprocess than expected (safe degradation, adjust the cap)
+
+**Rollback path:**
+1. Set `POOL_OPUS_SIZE=0` and `POOL_SONNET_SIZE=0` → all requests fall back to `ClaudeSubprocess` (no pooling, same as current behavior)
+2. Or: revert to the pre-pooling commit — `ClaudeSubprocess` is never removed
+
+**What needs testing:**
+- End-to-end request with session key header → confirm routing to locked process
+- Concurrent requests from different keys → confirm parallel execution
+- Rapid requests on same key → confirm serialization, no interleaving
+- Process death → confirm auto-respawn and session mapping cleanup
+- Session reset → confirm orphan reclamation
+- Sweep with mix of active and idle processes → confirm correct recycling
+- Request without header → confirm fallback to subprocess

From 4115d3fd7ba8e96251c2d240196c8ba6682aa8b6 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 18:16:40 -0400
Subject: [PATCH 02/27] =?UTF-8?q?spec:=20session=20pooling=20TRD=20Rev=203?=
 =?UTF-8?q?=20=E2=80=94=20resolve=207=20findings=20from=20second=20review?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three major findings resolved:
- N1: Orphan queue rejection (reject stale-session requests, don't drain)
- N2: Per-request timeout (POOL_REQUEST_TIMEOUT_MS, 5 min default)
- N3: Atomic pool claim via PENDING_SENTINEL (prevents race on new keys)

Four minor findings resolved:
- N4: agentChannel format validation with fallback
- N5: Off-by-one wording fix in backpressure validation
- N6: ClaudeSubprocess fallback documented as uncapped (intentional)
- N7: Aggregate route-hit counters added to stats()

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 specs/session-pooling.spec.md | 108 ++++++++++++++++++++++++++++++----
 1 file changed, 98 insertions(+), 10 deletions(-)

diff --git a/specs/session-pooling.spec.md b/specs/session-pooling.spec.md
index 49685b4..dd75ced 100644
--- a/specs/session-pooling.spec.md
+++ b/specs/session-pooling.spec.md
@@ -1,4 +1,6 @@
-# Session Pooling — Spec (Tier 1) — Rev 2
+# Session Pooling — Spec (Tier 1) — Rev 3
+
+**Rev 3 (2026-03-21):** Addresses 7 new findings from second Opus sub-agent review. 3 major, 4 minor. All Rev 1 resolutions verified adequate.
 
 **Rev 2 (2026-03-21):** Addresses 14 findings from Opus sub-agent architectural review. 3 critical, 5 major, 6 minor. All resolved.
 
@@ -75,14 +77,17 @@ The production design locks each CLI process to a specific OpenClaw session key
 
 3. SessionPoolRouter.execute(prompt, "opus", sessionKey):
    a) lockedSessions.has(sessionKey)?
-      YES + idle → write to stdin, mark busy
+      YES + idle → write to stdin, mark busy, start request timeout timer
       YES + busy → enqueue on process's requestQueue (max 3, reject with 429 if full)
-      NO  → warmPool["opus"].pop() → lock to sessionKey
-            if warmPool empty → spawn new process (3-10s cold start, one-time)
-            if MAX_TOTAL_PROCESSES reached → fall back to ClaudeSubprocess
+      YES + PENDING_SENTINEL → enqueue (claim in progress, will drain when ready)
+      NO  → set PENDING_SENTINEL in lockedSessions (synchronous, prevents race)
+            → warmPool["opus"].pop() → lock to sessionKey (replace sentinel)
+            if warmPool empty AND total processes < MAX_TOTAL_PROCESSES → spawn new process (3-10s cold start, one-time)
+            if warmPool empty AND total processes >= MAX_TOTAL_PROCESSES → delete sentinel, fall back to ClaudeSubprocess
 
 4. Write prompt to process stdin as stream-json message
 5. Read response from process stdout, emit events to caller
+   - If POOL_REQUEST_TIMEOUT_MS exceeded → treat as process death (kill, reject queue, respawn)
 6. Mark process idle, drain requestQueue if non-empty (next queued request starts)
 7. Process stays locked to sessionKey for future requests
 ```
@@ -110,6 +115,61 @@ Session resets generate a new session key. The old process is orphaned. Without
 
 This catches resets instantly without waiting for the nightly sweep.
 
+### Orphan Queue Rejection (Major — Finding N1)
+
+When a session reset triggers orphan reclamation (see "Orphan Reclamation" above), the orphaned process may have queued requests from the old session key. These requests belong to a dead session — the gateway has already moved on to a new session key.
+
+**Rule:** When a process is marked as orphaned:
+- If idle: kill immediately, respawn into warm pool
+- If busy with an active request: let the active request complete, then kill
+- **All queued requests on the orphaned process are rejected immediately with HTTP 503 + `Retry-After: 3`** — they belong to the dead session key. The gateway will retry with the new key, which will route to the new process.
+
+Do NOT drain queued requests on an orphaned process. The queue contents are stale — they were enqueued under a session key that no longer exists.
+
+### Per-Request Timeout (Major — Finding N2)
+
+If a CLI process hangs (stuck inference, deadlocked stdin/stdout pipe, unresponsive model), it stays in "busy" state indefinitely. The per-process queue fills to max depth, then all subsequent requests get 429'd. The nightly sweep skips busy processes. The session is permanently wedged.
+
+**Solution:** `POOL_REQUEST_TIMEOUT_MS` (default 300000 / 5 minutes).
+
+When a request has been in-flight longer than this threshold:
+1. Treat the process as dead — trigger the standard process death recovery flow
+2. The active request's emitter receives a timeout error
+3. Reject all queued requests with 503 + `Retry-After: 3`
+4. Remove the process from `lockedSessions`
+5. Spawn a replacement into the warm pool
+6. Log the timeout event with session key, PID, and elapsed time
+
+**Why 5 minutes:** The existing proxy has a 15-minute timeout for subprocess-per-request. Pooled processes should be tighter — a 5-minute inference is extremely unusual and likely indicates a hang. The model typically responds in 10-60 seconds. This is configurable via env var.
+
+### Atomic Pool Claim (Major — Finding N3)
+
+Two simultaneous requests for the same *new* session key can race: both check `lockedSessions.has(key)` → false, both attempt to claim from the warm pool. One overwrites the other's lock, orphaning a process with no lineage reclamation.
+
+Node.js is single-threaded, but the `execute()` path has async yield points (cold spawn with `await`). Between the `has()` check and the lock write, another request can enter the same path.
+
+**Solution:** Set a pending-lock sentinel immediately (synchronously) on first touch:
+
+```
+execute(prompt, model, sessionKey):
+  if lockedSessions.has(sessionKey):
+    // route to existing locked process (or queue)
+  else:
+    // SYNCHRONOUS: set sentinel before any async work
+    lockedSessions.set(sessionKey, PENDING_SENTINEL)
+    try:
+      process = warmPool[model].pop() ?? await spawnCold(model)
+      lockedSessions.set(sessionKey, process)  // replace sentinel with real process
+    catch:
+      lockedSessions.delete(sessionKey)  // clean up sentinel on failure
+      throw
+
+  // Second request for same key hits the `has()` check → true → queues on the sentinel
+  // When sentinel is replaced with real process, queued requests are drained
+```
+
+The `PENDING_SENTINEL` is a special marker that causes incoming requests for that key to enqueue (same as a busy process). When the real process is assigned, the queue drains.
+
 ### Context Accumulation Threshold (Critical — Finding #3)
 
 Each request adds its full prompt to the CLI's accumulated context. After 50 requests, the CLI could have 500K+ tokens of accumulated noise, risking context window overflow and degraded responses.
@@ -205,17 +265,21 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
 - If the warm pool is empty and no process is available, the proxy spawns a cold subprocess (same as current behavior). It NEVER fails a request due to pool exhaustion — it degrades to cold-start or subprocess fallback.
 - Total process count (locked + warm) must not exceed `MAX_TOTAL_PROCESSES` (default 30). Beyond this, new requests fall back to `ClaudeSubprocess`.
 - The production port (3456) and health endpoint format remain unchanged. Existing OpenClaw provider config works without modification.
-- `ClaudeSubprocess` (subprocess-per-request) is retained in the codebase as fallback for unknown models and headerless requests.
+- `ClaudeSubprocess` (subprocess-per-request) is retained in the codebase as fallback for unknown models and headerless requests. Fallback subprocesses are NOT counted against `MAX_TOTAL_PROCESSES` — they are short-lived (die after one request) and self-limiting.
 - The nightly sweep runs at 3:00 AM America/New_York (DST-aware), not UTC.
+- Pool claim for a new session key is atomic: a `PENDING_SENTINEL` is set synchronously before any async work. No two requests for the same new key can both claim a process.
+- Every in-flight request has a timeout (`POOL_REQUEST_TIMEOUT_MS`). A hung process is treated as dead and recovered automatically.
 
 ## Forbidden Patterns
 
 - **No cross-session routing:** A process locked to session A must never serve session B, even if A is idle and B is queued. Claim a new process instead.
 - **No model mixing within a process:** A process spawned with `--model opus` must never receive a sonnet request. Model is baked at spawn time.
-- **No eager recycling of active sessions:** The sweep recycles only processes idle > 2 hours OR exceeding the request count threshold. A process that served a request 30 minutes ago stays locked, even during the 3 AM sweep (unless it's over the request count limit).
+- **No eager recycling of active sessions:** The sweep recycles only processes idle > 2 hours OR exceeding the request count threshold. A process that served a request 30 minutes ago stays locked, even during the 3 AM sweep (unless it's over the request count limit). **Clarification:** The inline `requestCount > 50` recycling (in "Context Accumulation Threshold") is not eager recycling — it triggers only after a request completes and only when the queue is empty. It is a safety valve, not a sweep.
 - **No shared mutable state between pool and routes:** The router exposes `execute()`, `stats()`, and `sweep()` only. Routes do not directly manipulate pool internals.
 - **No removal of the existing subprocess manager:** `ClaudeSubprocess` stays for fallback and backward compatibility.
 - **No concurrent writes to a process's stdin:** The per-process request queue serializes all writes. Any code path that bypasses the queue is a critical bug.
+- **No draining queued requests on orphaned processes:** When lineage reclamation marks a process as orphaned, queued requests are rejected (503), not drained. They belong to a dead session key.
+- **No pool claim without sentinel:** The `lockedSessions.set(key, PENDING_SENTINEL)` must execute synchronously before any `await`. Skipping the sentinel creates a race condition between concurrent requests for the same new key.
 
 ## Modules
 
@@ -237,6 +301,9 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
 - Nightly sweep logic (idle > 2 hours OR requestCount > 50 → recycle → refill)
 - Process health monitoring: auto-respawn on unexpected death with atomic `lockedSessions` cleanup
 - Auth error detection: treat auth failures as process death, trigger recovery flow
+- Per-request timeout (`POOL_REQUEST_TIMEOUT_MS`): hung processes treated as dead, triggers recovery
+- Atomic pool claim via `PENDING_SENTINEL`: prevents race condition on simultaneous new-key requests
+- Orphan queue rejection: queued requests on orphaned processes are rejected (503), not drained
 
 **Acceptance Criteria:**
 - [ ] Requests with the same `x-openclaw-session-key` always route to the same process
@@ -255,6 +322,10 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
 - [ ] Sweep refills warm pool to configured size after recycling (respecting `MAX_TOTAL_PROCESSES`)
 - [ ] Process death triggers atomic cleanup: remove from `lockedSessions`, reject queued requests with 503, spawn replacement
 - [ ] Auth errors (exit code + error message matching) trigger process death recovery
+- [ ] Request timeout (`POOL_REQUEST_TIMEOUT_MS`) kills hung process and triggers death recovery
+- [ ] Orphan reclamation rejects queued requests on the orphaned process with 503 (does not drain them)
+- [ ] Two simultaneous requests for the same new session key do not both claim a process — second request queues behind the pending sentinel
+- [ ] `agentChannel` extraction validates session key format and falls back to full key if format is unexpected
 - [ ] Graceful shutdown kills all processes and drains/rejects queued requests
 
 ### Module 2: Route Integration (`src/server/routes.ts`)
@@ -296,6 +367,7 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
   - `SWEEP_HOUR` (default 3) — hour in ET for nightly sweep
   - `SWEEP_IDLE_THRESHOLD_MS` (default 7200000) — idle time before sweep recycles
   - `POOL_REQUEST_QUEUE_DEPTH` (default 3) — per-process queue depth
+  - `POOL_REQUEST_TIMEOUT_MS` (default 300000) — per-request timeout; hung process treated as dead
 - Graceful shutdown integration (SIGTERM/SIGINT)
 
 **Acceptance Criteria:**
@@ -330,7 +402,7 @@ What "done" looks like — these must be verified by someone other than the buil
 
 1. **Functional:** Send 5 requests from different session keys → each gets a dedicated process. Send a 6th from a key that already has a process → routed to the existing one (verify via logs or health endpoint).
 2. **Serialization:** Send 2 rapid requests on the same session key → both succeed, responses are correct and not interleaved. Verify via log timestamps that the second waited for the first.
-3. **Backpressure:** Send 5 rapid requests on the same session key (queue depth 3) → first executes, 3 queue, 5th returns 429.
+3. **Backpressure:** Send 5 rapid requests on the same session key (queue depth 3) → first executes, next 3 queue, 5th returns 429.
 4. **Concurrency:** Send 3 simultaneous requests from different keys → all served in parallel with no queueing.
 5. **Cold start:** Send a request after all warm processes are claimed → new process spawned, request succeeds, latency logged.
 6. **Overflow cap:** Fill the pool to `MAX_TOTAL_PROCESSES`, send another request with a new session key → falls back to `ClaudeSubprocess`, still succeeds.
@@ -342,8 +414,11 @@ What "done" looks like — these must be verified by someone other than the buil
 12. **Context degradation:** Send 60 requests through the same locked process → verify the process is recycled after request 50 (on next idle). Verify subsequent request gets a fresh process.
 13. **Backward compat:** Send a request without `x-openclaw-session-key` header → falls back to subprocess-per-request. No error.
 14. **Unknown model:** Send a request with session key + model "haiku" → falls back to `ClaudeSubprocess`. No error.
-15. **Health:** `/health` endpoint returns pool stats including locked/warm/busy/queued/orphansReclaimed counts.
-16. **Build:** `npm run build` succeeds with no errors or warnings.
+15. **Health:** `/health` endpoint returns pool stats including locked/warm/busy/queued/orphansReclaimed counts. Stats include aggregate route-hit counters: locked, warm, cold, fallback.
+16. **Request timeout:** Simulate a hung process (e.g., suspend with SIGSTOP). Verify request times out after `POOL_REQUEST_TIMEOUT_MS`, process is killed and replaced, queued requests rejected with 503.
+17. **Atomic claim:** Send 2 simultaneous requests for the same new session key → only one process claimed, second request queued and served after the first completes.
+18. **Orphan queue rejection:** Enqueue 2 requests on a locked process, then trigger a session reset. Verify queued requests receive 503 (not drained through the orphaned process).
+19. **Build:** `npm run build` succeeds with no errors or warnings.
 
 ## Observability (Finding #12)
 
@@ -383,6 +458,8 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
     "orphansReclaimed": 3,
     "totalRequests": 142,
     "processRecycles": 5,
+    "requestTimeouts": 0,
+    "routeHits": { "locked": 98, "warm": 32, "cold": 8, "fallback": 4 },
     "uptime": 43200
   }
 }
@@ -401,12 +478,16 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
 | 2026-03-21 | Lineage-based orphan reclamation | **Rev 2 — Finding #2.** Session resets orphan processes. Without immediate reclamation, 20+ resets/day = 20+ leaked processes until 3 AM. Lineage key (agent+channel) detects orphans on the spot. | Wait for 3 AM sweep (unacceptable leak rate). Timer-based idle reclamation (adds complexity, doesn't catch rapid resets). |
 | 2026-03-21 | Request count threshold (50) for recycling | **Rev 2 — Finding #3.** CLI accumulates context from every request. 50 requests ≈ 100-200K tokens of noise. Recycling at this threshold prevents context window overflow while staying well within safe bounds for a day's active session. | No threshold, rely on manual resets only (risky for agents that never reset). Lower threshold like 20 (too aggressive, causes unnecessary cold starts). |
 | 2026-03-21 | MAX_TOTAL_PROCESSES cap (30) | **Rev 2 — Finding #12.** Prevents runaway process spawning from consuming all system memory. 30 processes × 200MB = 6GB worst case — well within 128GB. Beyond 30, requests fall back to ClaudeSubprocess. | No cap (dangerous). Per-model cap only (doesn't prevent total runaway). |
+| 2026-03-21 | Reject (not drain) queued requests on orphaned processes | **Rev 3 — Finding N1.** Queued requests belong to the dead session key. Draining them serves stale requests and delays the process kill. Rejecting with 503 lets the gateway retry with the new key. | Drain queue first (serves stale-session requests, delays reclamation). |
+| 2026-03-21 | Per-request timeout (5 min default) | **Rev 3 — Finding N2.** Without a timeout, a hung CLI process permanently wedges a session. 5 min is generous for inference (typical: 10-60s) but catches real hangs. Triggers standard death recovery. | No timeout (process stays busy forever). 15-min matching old proxy (too long for pooled — blocks the session). |
+| 2026-03-21 | PENDING_SENTINEL for atomic pool claim | **Rev 3 — Finding N3.** Async yield points between `has()` and `set()` create a window for duplicate claims. Synchronous sentinel closes the window. Second request queues instead of claiming. | Mutex/lock (overkill for single-threaded Node). Accept the race (orphans a process silently). |
 
 ## Review History
 
 | Rev | Date | Reviewer | Findings | Status |
 |-----|------|----------|----------|--------|
 | 1 | 2026-03-21 | Opus sub-agent | 14 (3C/5M/6m) | All resolved in Rev 2 |
+| 2 | 2026-03-21 | Opus sub-agent | 7 (0C/3M/4m) | All resolved in Rev 3 |
 
 ### Finding Resolution Index
 
@@ -426,6 +507,13 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
 | 12 | MINOR | Prototype cleanup should be separate commit | Module 4 now specifies separate commit. |
 | 13 | MINOR | No logging/observability spec | Added "Observability" section with structured logging and health endpoint schemas. |
 | 14 | MINOR | Pool size conflates warm vs. total | Added `MAX_TOTAL_PROCESSES` (default 30) as hard cap distinct from per-model warm targets. |
+| N1 | MAJOR | Orphan reclamation races with in-flight queue | Queued requests on orphaned processes are rejected (503), not drained. They belong to the dead session. See "Orphan Queue Rejection" section. |
+| N2 | MAJOR | No per-request timeout for hung processes | `POOL_REQUEST_TIMEOUT_MS` (default 300000). Timeout triggers process death recovery. See "Per-Request Timeout" section. |
+| N3 | MAJOR | Pool claim not atomic — race on new session key | `PENDING_SENTINEL` set synchronously before async work. See "Atomic Pool Claim" section. |
+| N4 | MINOR | `agentChannel` extraction assumes fixed key format | Validation added — falls back to full session key if format is unexpected. AC added to Module 1. |
+| N5 | MINOR | Off-by-one in backpressure validation criterion | Fixed wording: "first executes, next 3 queue, 5th returns 429." |
+| N6 | MINOR | ClaudeSubprocess fallback not counted in MAX_TOTAL_PROCESSES | Documented as intentional — fallback processes are short-lived and self-limiting. |
+| N7 | MINOR | No aggregate route-hit counters in stats() | Added `routeHits: { locked, warm, cold, fallback }` to health endpoint. |
 
 ## Pre-Change Impact Statement
 

From ab9dff0dbda50c85cb140ab3bd8d00525cfad5e2 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 18:28:46 -0400
Subject: [PATCH 03/27] =?UTF-8?q?spec:=20session=20pooling=20TRD=20Rev=204?=
 =?UTF-8?q?=20=E2=80=94=20resolve=207=20findings=20from=20third=20Opus=20r?=
 =?UTF-8?q?eview?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

N8: Canonical clearSessionLock() for all unlock paths
N9: Failed cold spawn rejects queued requests before sentinel deletion
N10: Fallback serialization loss documented as known degradation
N11: Sweep refill checks cap per-spawn, not once
N12: Shutdown closes listening socket before draining
N13: Health endpoint includes locked counts per model
N14: Sweep skips processes in recycling state

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 specs/session-pooling.spec.md | 106 ++++++++++++++++++++++++++++++----
 1 file changed, 95 insertions(+), 11 deletions(-)

diff --git a/specs/session-pooling.spec.md b/specs/session-pooling.spec.md
index dd75ced..084354f 100644
--- a/specs/session-pooling.spec.md
+++ b/specs/session-pooling.spec.md
@@ -1,4 +1,6 @@
-# Session Pooling — Spec (Tier 1) — Rev 3
+# Session Pooling — Spec (Tier 1) — Rev 4
+
+**Rev 4 (2026-03-21):** Addresses 7 findings from third Opus sub-agent review. 3 major, 4 minor. All prior resolutions verified adequate.
 
 **Rev 3 (2026-03-21):** Addresses 7 new findings from second Opus sub-agent review. 3 major, 4 minor. All Rev 1 resolutions verified adequate.
 
@@ -82,8 +84,8 @@ The production design locks each CLI process to a specific OpenClaw session key
       YES + PENDING_SENTINEL → enqueue (claim in progress, will drain when ready)
       NO  → set PENDING_SENTINEL in lockedSessions (synchronous, prevents race)
             → warmPool["opus"].pop() → lock to sessionKey (replace sentinel)
-            if warmPool empty AND total processes < MAX_TOTAL_PROCESSES → spawn new process (3-10s cold start, one-time)
-            if warmPool empty AND total processes >= MAX_TOTAL_PROCESSES → delete sentinel, fall back to ClaudeSubprocess
+            if warmPool empty AND total processes < MAX_TOTAL_PROCESSES → spawn new process (3-10s cold start, one-time), lock to sessionKey
+            if warmPool empty AND total processes >= MAX_TOTAL_PROCESSES → reject queued requests on sentinel, delete sentinel, fall back to ClaudeSubprocess (log warning with process count)
 
 4. Write prompt to process stdin as stream-json message
 5. Read response from process stdout, emit events to caller
@@ -126,6 +128,23 @@ When a session reset triggers orphan reclamation (see "Orphan Reclamation" above
 
 Do NOT drain queued requests on an orphaned process. The queue contents are stale — they were enqueued under a session key that no longer exists.
 
+### Canonical Lock Clearing (Major — Finding N8)
+
+Multiple flows remove session locks: timeout recovery, context recycling, orphan reclamation, process death, sweep, and shutdown. Each MUST use the same canonical cleanup sequence:
+
+```
+function clearSessionLock(sessionKey: string, process: PooledProcess):
+  1. lockedSessions.delete(sessionKey)          // remove the mapping entirely
+  2. process.lockedTo = null                     // clear the process's back-reference
+  3. process.agentChannel = null                 // clear lineage key
+  4. process.requestCount = 0                    // reset counter for reuse
+  5. // caller then either: kills the process, or returns it to warmPool
+```
+
+**Rule:** Every code path that unlocks a session MUST call this single function. No inline `lockedSessions.delete()` scattered across the codebase. This prevents stale artifacts (e.g., deleting the map entry but leaving `process.lockedTo` set, or forgetting to clear lineage).
+
+Deliverable: a private `clearSessionLock()` method on `SessionPoolRouter`. All unlock paths call it.
+
 ### Per-Request Timeout (Major — Finding N2)
 
 If a CLI process hangs (stuck inference, deadlocked stdin/stdout pipe, unresponsive model), it stays in "busy" state indefinitely. The per-process queue fills to max depth, then all subsequent requests get 429'd. The nightly sweep skips busy processes. The session is permanently wedged.
@@ -170,6 +189,24 @@ execute(prompt, model, sessionKey):
 
 The `PENDING_SENTINEL` is a special marker that causes incoming requests for that key to enqueue (same as a busy process). When the real process is assigned, the queue drains.
 
+### Failed Cold Spawn Recovery (Major — Finding N9)
+
+If a `PENDING_SENTINEL` is set and the cold spawn fails (CLI binary missing, auth broken, OOM), the catch block deletes the sentinel — but requests that queued against the sentinel are now waiting on nothing. They hang forever.
+
+**Rule:** Before deleting the sentinel on spawn failure:
+1. Collect all requests queued against this session key's sentinel
+2. Reject each with HTTP 503 + `Retry-After: 3`
+3. Then delete the sentinel from `lockedSessions`
+4. Log the spawn failure with model, session key, and error
+
+```
+catch (error):
+  rejectQueuedRequests(sessionKey, 503, "Retry-After: 3")  // drain before delete
+  lockedSessions.delete(sessionKey)
+  log({ event: "cold_spawn_failed", sessionKey, model, error })
+  throw  // propagate to the original requester
+```
+
 ### Context Accumulation Threshold (Critical — Finding #3)
 
 Each request adds its full prompt to the CLI's accumulated context. After 50 requests, the CLI could have 500K+ tokens of accumulated noise, risking context window overflow and degraded responses.
@@ -185,14 +222,18 @@ The threshold is configurable via `POOL_MAX_REQUESTS_PER_PROCESS` (default 50).
 ```
 For each locked process:
   If lastRequestAt < (now - 2 hours) OR requestCount > 50:
-    If state == "busy" → skip (don't interrupt active work)
+    If state == "busy" OR state == "recycling" → skip (don't interrupt active work or double-kill a mid-recycle process — Finding N14)
     Kill process
     Remove from lockedSessions
     Spawn fresh process into warmPool (if below configured size)
 
-Refill warmPool to configured sizes:
-  opus:   max(0, POOL_OPUS_SIZE - currentOpusWarm)  new processes
-  sonnet: max(0, POOL_SONNET_SIZE - currentSonnetWarm) new processes
+Refill warmPool to configured sizes (check cap before EACH spawn, not once — Finding N11):
+  for each model in [opus, sonnet]:
+    while warmPool[model].length < configured size:
+      if total(locked + warm) >= MAX_TOTAL_PROCESSES:
+        log warning, stop refilling
+        break
+      spawn one process, add to warmPool[model]
 
 Enforce MAX_TOTAL_PROCESSES:
   If total(locked + warm) > MAX_TOTAL_PROCESSES:
@@ -257,6 +298,15 @@ Static 6/4 opus/sonnet split can starve one model if usage patterns shift. The w
 
 This is simpler than a dynamic flex zone and maintains the invariant that model pools are pure. The `MAX_TOTAL_PROCESSES` cap (default 30) prevents runaway spawning.
 
+### Serialization Loss in Fallback Mode (Major — Finding N10)
+
+When `MAX_TOTAL_PROCESSES` is reached, new session-keyed requests fall back to `ClaudeSubprocess` (subprocess-per-request). Two simultaneous requests for the same session key both get separate subprocesses — no serialization, no locking. The per-process request queue does not apply to fallback subprocesses.
+
+**This is a known degradation, not a bug.** Fallback mode is a safety valve for when the pool is saturated. In practice:
+- If the cap is routinely hit, the correct fix is to increase `MAX_TOTAL_PROCESSES`, not to add serialization to the fallback path.
+- Subprocess-per-request was the *entire* architecture before pooling — it works, it's just slower.
+- Log a warning each time fallback is used with the current process count, so operators can detect when the cap needs raising.
+
 ## Invariants (non-negotiable)
 
 - A locked process serves ONLY the session key it is locked to. No exceptions. No "borrowing" idle locked processes.
@@ -269,6 +319,11 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
 - The nightly sweep runs at 3:00 AM America/New_York (DST-aware), not UTC.
 - Pool claim for a new session key is atomic: a `PENDING_SENTINEL` is set synchronously before any async work. No two requests for the same new key can both claim a process.
 - Every in-flight request has a timeout (`POOL_REQUEST_TIMEOUT_MS`). A hung process is treated as dead and recovered automatically.
+- All session lock clearing goes through `clearSessionLock()`. No inline `lockedSessions.delete()` calls.
+- Failed cold spawns reject all queued requests before deleting the sentinel. No orphaned waiters.
+- Sweep refill checks `MAX_TOTAL_PROCESSES` before each individual spawn, not once at the start.
+- Sweep skips processes in `recycling` state (same as `busy`). No double-kill.
+- Shutdown closes the listening socket before draining. No new connections accepted during teardown.
 
 ## Forbidden Patterns
 
@@ -304,6 +359,9 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
 - Per-request timeout (`POOL_REQUEST_TIMEOUT_MS`): hung processes treated as dead, triggers recovery
 - Atomic pool claim via `PENDING_SENTINEL`: prevents race condition on simultaneous new-key requests
 - Orphan queue rejection: queued requests on orphaned processes are rejected (503), not drained
+- Canonical `clearSessionLock()` method used by all unlock paths (timeout, recycle, orphan, death, sweep, shutdown)
+- Failed cold spawn recovery: reject queued requests before deleting sentinel
+- Fallback serialization loss documented and logged (warning when fallback triggered)
 
 **Acceptance Criteria:**
 - [ ] Requests with the same `x-openclaw-session-key` always route to the same process
@@ -316,7 +374,7 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
 - [ ] Unknown model (e.g., haiku) with session key header falls back to `ClaudeSubprocess`
 - [ ] Session reset (new key, same agent+channel) immediately orphan-reclaims the old process
 - [ ] Process with `requestCount > 50` is recycled on next idle transition (queue drained first)
-- [ ] `stats()` returns: `{ total, locked, warm: { opus, sonnet }, busy, queued, orphansReclaimed, totalRequests, processRecycles }`
+- [ ] `stats()` returns: `{ total, locked: { total, opus, sonnet }, warm: { opus, sonnet }, busy, queued, orphansReclaimed, totalRequests, processRecycles, routeHits: { locked, warm, cold, fallback } }`
 - [ ] Sweep correctly identifies and recycles processes idle > 2 hours OR requestCount > 50
 - [ ] Sweep skips busy processes (never interrupts active work)
 - [ ] Sweep refills warm pool to configured size after recycling (respecting `MAX_TOTAL_PROCESSES`)
@@ -327,6 +385,11 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
 - [ ] Two simultaneous requests for the same new session key do not both claim a process — second request queues behind the pending sentinel
 - [ ] `agentChannel` extraction validates session key format and falls back to full key if format is unexpected
 - [ ] Graceful shutdown kills all processes and drains/rejects queued requests
+- [ ] All unlock paths (timeout, recycle, orphan, death, sweep, shutdown) use `clearSessionLock()` — no inline `lockedSessions.delete()`
+- [ ] Failed cold spawn rejects queued requests with 503 before deleting sentinel
+- [ ] Fallback to ClaudeSubprocess logs a warning with current total process count
+- [ ] Sweep refill checks MAX_TOTAL_PROCESSES before each spawn (not once)
+- [ ] Sweep skips processes in `recycling` state
 
 ### Module 2: Route Integration (`src/server/routes.ts`)
 
@@ -368,15 +431,21 @@ This is simpler than a dynamic flex zone and maintains the invariant that model
   - `SWEEP_IDLE_THRESHOLD_MS` (default 7200000) — idle time before sweep recycles
   - `POOL_REQUEST_QUEUE_DEPTH` (default 3) — per-process queue depth
   - `POOL_REQUEST_TIMEOUT_MS` (default 300000) — per-request timeout; hung process treated as dead
-- Graceful shutdown integration (SIGTERM/SIGINT)
+- Graceful shutdown integration (SIGTERM/SIGINT):
+  1. **Immediately** close the listening socket (stop accepting new connections — Finding N12)
+  2. Wait for in-flight requests to complete (30s timeout)
+  3. Reject all queued requests with 503
+  4. Kill all pool processes
+  5. Exit
 
 **Acceptance Criteria:**
 - [ ] Pool initializes with configured sizes on server start
 - [ ] Sweep runs at 3:00 AM ET daily, correctly handling EST↔EDT transitions
 - [ ] Pool sizes configurable via env vars without code changes
 - [ ] Server startup logs pool configuration and initial stats
-- [ ] Graceful shutdown waits for in-flight requests (30s timeout) before killing processes
+- [ ] Graceful shutdown closes listening socket immediately on SIGTERM, then drains in-flight (30s timeout)
 - [ ] Queued requests are rejected with 503 during shutdown
+- [ ] No new connections accepted after shutdown signal
 
 ### Module 4: Prototype Cleanup
 
@@ -419,6 +488,10 @@ What "done" looks like — these must be verified by someone other than the buil
 17. **Atomic claim:** Send 2 simultaneous requests for the same new session key → only one process claimed, second request queued and served after the first completes.
 18. **Orphan queue rejection:** Enqueue 2 requests on a locked process, then trigger a session reset. Verify queued requests receive 503 (not drained through the orphaned process).
 19. **Build:** `npm run build` succeeds with no errors or warnings.
+20. **Lock clearing consistency:** Grep the codebase for `lockedSessions.delete` — it should appear ONLY inside `clearSessionLock()`. No other direct deletions.
+21. **Failed cold spawn:** Simulate a spawn failure (e.g., invalid CLAUDE_BIN) after sentinel is set with queued requests → queued requests receive 503, sentinel is cleaned up.
+22. **Sweep refill cap:** Set MAX_TOTAL_PROCESSES=12, fill 10 locked + 2 warm. Trigger sweep that recycles 1 → refill spawns 1. Verify total never exceeds 12.
+23. **Shutdown socket close:** Send SIGTERM, then immediately attempt a new connection → connection refused (socket closed). In-flight requests complete within 30s.
 
 ## Observability (Finding #12)
 
@@ -450,7 +523,7 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
   "provider": "claude-code-cli",
   "pool": {
     "total": 12,
-    "locked": 7,
+    "locked": { "total": 7, "opus": 5, "sonnet": 2 },
     "warm": { "opus": 3, "sonnet": 2 },
     "busy": 2,
     "queued": 0,
@@ -481,6 +554,9 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
 | 2026-03-21 | Reject (not drain) queued requests on orphaned processes | **Rev 3 — Finding N1.** Queued requests belong to the dead session key. Draining them serves stale requests and delays the process kill. Rejecting with 503 lets the gateway retry with the new key. | Drain queue first (serves stale-session requests, delays reclamation). |
 | 2026-03-21 | Per-request timeout (5 min default) | **Rev 3 — Finding N2.** Without a timeout, a hung CLI process permanently wedges a session. 5 min is generous for inference (typical: 10-60s) but catches real hangs. Triggers standard death recovery. | No timeout (process stays busy forever). 15-min matching old proxy (too long for pooled — blocks the session). |
 | 2026-03-21 | PENDING_SENTINEL for atomic pool claim | **Rev 3 — Finding N3.** Async yield points between `has()` and `set()` create a window for duplicate claims. Synchronous sentinel closes the window. Second request queues instead of claiming. | Mutex/lock (overkill for single-threaded Node). Accept the race (orphans a process silently). |
+| 2026-03-21 | Canonical clearSessionLock() method | **Rev 4 — Finding N8.** Six different flows clear session locks with inconsistent cleanup. Single method prevents stale artifacts (dangling lockedTo, leftover lineage keys). | Inline cleanup per flow (error-prone, already caused inconsistency). |
+| 2026-03-21 | Reject queued requests on failed cold spawn | **Rev 4 — Finding N9.** Sentinel deletion without queue drain leaves waiters hanging forever. Explicit rejection before deletion ensures no orphaned promises. | Let waiters timeout naturally (up to 5 min hang per request). |
+| 2026-03-21 | Document fallback serialization loss as known degradation | **Rev 4 — Finding N10.** Adding serialization to ClaudeSubprocess fallback adds complexity to a safety valve that shouldn't be hit often. If it's hit often, increase the cap. | Add fallback serialization (complexity for a rare path). |
 
 ## Review History
 
@@ -488,6 +564,7 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
 |-----|------|----------|----------|--------|
 | 1 | 2026-03-21 | Opus sub-agent | 14 (3C/5M/6m) | All resolved in Rev 2 |
 | 2 | 2026-03-21 | Opus sub-agent | 7 (0C/3M/4m) | All resolved in Rev 3 |
+| 3 | 2026-03-21 | Opus sub-agent | 7 (0C/3M/4m) | All resolved in Rev 4 |
 
 ### Finding Resolution Index
 
@@ -514,6 +591,13 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
 | N5 | MINOR | Off-by-one in backpressure validation criterion | Fixed wording: "first executes, next 3 queue, 5th returns 429." |
 | N6 | MINOR | ClaudeSubprocess fallback not counted in MAX_TOTAL_PROCESSES | Documented as intentional — fallback processes are short-lived and self-limiting. |
 | N7 | MINOR | No aggregate route-hit counters in stats() | Added `routeHits: { locked, warm, cold, fallback }` to health endpoint. |
+| N8 | MAJOR | Inconsistent lock-clearing language across flows | Canonical `clearSessionLock()` method — all unlock paths must call it. See "Canonical Lock Clearing" section. |
+| N9 | MAJOR | Queued requests orphaned on failed cold spawn | Reject queued requests with 503 before deleting sentinel on spawn failure. See "Failed Cold Spawn Recovery" section. |
+| N10 | MAJOR | Serialization loss in ClaudeSubprocess fallback | Documented as known degradation. Log warning when fallback triggered. See "Serialization Loss in Fallback Mode" section. |
+| N11 | MINOR | Sweep refill could overshoot MAX_TOTAL_PROCESSES | Check cap before each individual spawn in refill loop. Updated in "Nightly Sweep" section. |
+| N12 | MINOR | Shutdown doesn't specify when to stop accepting connections | Close listening socket immediately on SIGTERM. Updated in Module 3. |
+| N13 | MINOR | Health endpoint missing locked counts per model | Added `locked: { total, opus, sonnet }` to health endpoint. |
+| N14 | MINOR | Sweep could double-kill a process mid-inline-recycle | Sweep skips processes in `recycling` state. Updated in "Nightly Sweep" section. |
 
 ## Pre-Change Impact Statement
 

From c1c4d66a8fa6562e9d8ce1a77ed320f0560f22db Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 18:31:48 -0400
Subject: [PATCH 04/27] =?UTF-8?q?spec:=20session=20pooling=20TRD=20Rev=205?=
 =?UTF-8?q?=20=E2=80=94=20resolve=204=20findings=20from=20fourth=20Opus=20?=
 =?UTF-8?q?review?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

N15: Router checks POOLED_MODELS set, not type-level unknowns (extractModel defaults to opus)
N16: PENDING_SENTINEL specified as lightweight object with requestQueue
N17: clearSessionLock called after drain completes, not before
N18: standalone.ts owns full shutdown sequence (no pool threading into index.ts)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 specs/session-pooling.spec.md | 53 +++++++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/specs/session-pooling.spec.md b/specs/session-pooling.spec.md
index 084354f..6ac652e 100644
--- a/specs/session-pooling.spec.md
+++ b/specs/session-pooling.spec.md
@@ -1,4 +1,6 @@
-# Session Pooling — Spec (Tier 1) — Rev 4
+# Session Pooling — Spec (Tier 1) — Rev 5
+
+**Rev 5 (2026-03-21):** Addresses 4 findings from fourth Opus sub-agent review. 1 major, 3 minor. All prior resolutions verified adequate.
 
 **Rev 4 (2026-03-21):** Addresses 7 findings from third Opus sub-agent review. 3 major, 4 minor. All prior resolutions verified adequate.
 
@@ -187,7 +189,9 @@ execute(prompt, model, sessionKey):
   // When sentinel is replaced with real process, queued requests are drained
 ```
 
-The `PENDING_SENTINEL` is a special marker that causes incoming requests for that key to enqueue (same as a busy process). When the real process is assigned, the queue drains.
+The `PENDING_SENTINEL` is a special marker that causes incoming requests for that key to enqueue. When the real process is assigned, the queue drains.
+
+**Implementation note (Finding N16):** `lockedSessions` is typed `Map<string, PooledProcess>`, but the sentinel is not a real `PooledProcess`. Implement the sentinel as a lightweight object with `{ isPending: true, requestQueue: PendingRequest[] }` — it has a queue (where requests accumulate while the real process is being claimed/spawned) but no actual CLI process. When the real process is assigned, transfer the sentinel's `requestQueue` to the new `PooledProcess` and drain it. The router checks `isPending` to distinguish sentinels from real processes. Type the map as `Map<string, PooledProcess | PendingSentinel>` or use a discriminated union.
 
 ### Failed Cold Spawn Recovery (Major — Finding N9)
 
@@ -212,8 +216,8 @@ catch (error):
 Each request adds its full prompt to the CLI's accumulated context. After 50 requests, the CLI could have 500K+ tokens of accumulated noise, risking context window overflow and degraded responses.
 
 **Rule:** When a process completes a request and `requestCount > 50`:
-- If `requestQueue` is empty → recycle immediately (kill, respawn fresh into warm pool, clear the session lock — next request from this key claims a new process)
-- If `requestQueue` is non-empty → drain the queue first, then recycle when empty
+- If `requestQueue` is empty → recycle immediately: call `clearSessionLock()`, kill process, respawn into warm pool. Next request from this key claims a new process.
+- If `requestQueue` is non-empty → set state to `"recycling"` (prevents new requests from being enqueued by sweep or new arrivals — they get a fresh process instead). Drain the existing queue normally. When queue empties, THEN call `clearSessionLock()`, kill, respawn. **Important (Finding N17):** `clearSessionLock()` resets `requestCount` to 0, so it must be called AFTER the drain-then-recycle decision, not before. The `"recycling"` state is the guard — it signals that this process is committed to being recycled regardless of the counter.
 
 The threshold is configurable via `POOL_MAX_REQUESTS_PER_PROCESS` (default 50).
 
@@ -280,12 +284,29 @@ Long-lived CLI processes (up to 24 hours between sweeps) may outlive their auth
 
 **No preemptive refresh needed.** The Claude CLI handles token refresh internally for most cases. This catch handles the edge case where it doesn't.
 
-### Unknown Model Routing (Major — Finding #8)
+### Unknown Model Routing (Major — Finding #8, updated Finding N15)
+
+The existing `extractModel()` in `openai-to-cli.ts` normalizes model strings: it maps known names to `ClaudeModel` values (`"opus" | "sonnet" | "haiku"`) and defaults unrecognized strings to `"opus"`. This means truly unknown model strings (e.g., `"gpt-4"`) silently become `"opus"` and will route to the opus pool — the "unknown model" fallback path is unreachable via the adapter.
 
-If a request has a valid `x-openclaw-session-key` but requests a model not in any pool (e.g., `haiku`):
-- **Do not attempt pooled routing** — there's no pool for that model
-- Fall back to `ClaudeSubprocess` (subprocess-per-request)
-- Log the model name for tracking — if it appears frequently, consider adding a pool
+**The router must check against the set of pooled models, not rely on type-level unknowns:**
+
+```
+const POOLED_MODELS = new Set(["opus", "sonnet"]);  // models that have warm pools
+
+if (POOLED_MODELS.has(resolvedModel)):
+  // route through SessionPoolRouter
+else:
+  // fall back to ClaudeSubprocess (e.g., haiku)
+```
+
+This correctly handles:
+- `haiku` — a valid `ClaudeModel` but not pooled → falls back to `ClaudeSubprocess`
+- Truly unknown strings — already mapped to `"opus"` by `extractModel()` → routes to opus pool (correct, since the CLI will run opus on the Max subscription regardless)
+- Future models — add to `POOLED_MODELS` when a pool is created, otherwise they fall back
+
+**Do not modify `extractModel()`** — its defaulting behavior is correct for the CLI (which needs a valid model name). The pooling decision is a separate layer.
+
+Log any non-pooled model name for tracking — if haiku or a future model appears frequently, consider adding a pool.
 
 ### Model Pool Flex Zone (Major — Finding #5)
 
@@ -434,9 +455,10 @@ When `MAX_TOTAL_PROCESSES` is reached, new session-keyed requests fall back to `
 - Graceful shutdown integration (SIGTERM/SIGINT):
   1. **Immediately** close the listening socket (stop accepting new connections — Finding N12)
   2. Wait for in-flight requests to complete (30s timeout)
-  3. Reject all queued requests with 503
-  4. Kill all pool processes
-  5. Exit
+  3. Call `SessionPoolRouter.shutdown()` — rejects all queued requests with 503, kills all pool processes
+  4. Exit
+
+  **Implementation note (Finding N18):** `standalone.ts` should handle the full shutdown sequence directly rather than delegating to the existing `stopServer()` in `index.ts`. The current `stopServer()` has no pool awareness. The standalone entry point owns both the HTTP server and the pool router, so it is the natural place for the coordinated shutdown: `server.close()` (step 1), then timeout (step 2), then `router.shutdown()` (step 3), then `process.exit()` (step 4).
 
 **Acceptance Criteria:**
 - [ ] Pool initializes with configured sizes on server start
@@ -557,6 +579,8 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
 | 2026-03-21 | Canonical clearSessionLock() method | **Rev 4 — Finding N8.** Six different flows clear session locks with inconsistent cleanup. Single method prevents stale artifacts (dangling lockedTo, leftover lineage keys). | Inline cleanup per flow (error-prone, already caused inconsistency). |
 | 2026-03-21 | Reject queued requests on failed cold spawn | **Rev 4 — Finding N9.** Sentinel deletion without queue drain leaves waiters hanging forever. Explicit rejection before deletion ensures no orphaned promises. | Let waiters timeout naturally (up to 5 min hang per request). |
 | 2026-03-21 | Document fallback serialization loss as known degradation | **Rev 4 — Finding N10.** Adding serialization to ClaudeSubprocess fallback adds complexity to a safety valve that shouldn't be hit often. If it's hit often, increase the cap. | Add fallback serialization (complexity for a rare path). |
+| 2026-03-21 | Route by POOLED_MODELS set, not type system | **Rev 5 — Finding N15.** `extractModel()` defaults unknowns to "opus" — the type system can't distinguish pooled from non-pooled. Explicit set check is the correct routing mechanism. | Modify extractModel to return null (breaks existing callers). |
+| 2026-03-21 | Standalone.ts owns shutdown sequence | **Rev 5 — Finding N18.** The server module has no pool awareness. Rather than threading the router through `startServer`/`stopServer`, let the entry point coordinate both. | Thread router into index.ts (unnecessary coupling). |
 
 ## Review History
 
@@ -565,6 +589,7 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
 | 1 | 2026-03-21 | Opus sub-agent | 14 (3C/5M/6m) | All resolved in Rev 2 |
 | 2 | 2026-03-21 | Opus sub-agent | 7 (0C/3M/4m) | All resolved in Rev 3 |
 | 3 | 2026-03-21 | Opus sub-agent | 7 (0C/3M/4m) | All resolved in Rev 4 |
+| 4 | 2026-03-21 | Opus sub-agent | 4 (0C/1M/3m) | All resolved in Rev 5 |
 
 ### Finding Resolution Index
 
@@ -598,6 +623,10 @@ Pool lifecycle events (spawn, recycle, death, orphan-reclaim, sweep) are also lo
 | N12 | MINOR | Shutdown doesn't specify when to stop accepting connections | Close listening socket immediately on SIGTERM. Updated in Module 3. |
 | N13 | MINOR | Health endpoint missing locked counts per model | Added `locked: { total, opus, sonnet }` to health endpoint. |
 | N14 | MINOR | Sweep could double-kill a process mid-inline-recycle | Sweep skips processes in `recycling` state. Updated in "Nightly Sweep" section. |
+| N15 | MAJOR | `extractModel` defaults unknowns to "opus", defeating unknown-model fallback | Router checks against `POOLED_MODELS` set, not type-level unknowns. `extractModel` unchanged. See updated "Unknown Model Routing" section. |
+| N16 | MINOR | PENDING_SENTINEL has no queue data structure | Sentinel is a `{ isPending: true, requestQueue: [] }` object. Queue transfers to real process on claim. See updated "Atomic Pool Claim" section. |
+| N17 | MINOR | `clearSessionLock` resets requestCount before drain completes | Drain-then-recycle sets `"recycling"` state first, calls `clearSessionLock` only after drain completes. See updated "Context Accumulation Threshold" section. |
+| N18 | MINOR | Shutdown has no mechanism to access pool from server module | `standalone.ts` owns the full shutdown sequence directly. See updated Module 3. |
 
 ## Pre-Change Impact Statement
 

From 2aac92be0369d6ed9be8157c67140f9de8b0c8b3 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 18:43:01 -0400
Subject: [PATCH 05/27] =?UTF-8?q?feat:=20session=20pooling=20=E2=80=94=20M?=
 =?UTF-8?q?1=20router,=20M2=20route=20integration,=20M3=20startup/sweep=20?=
 =?UTF-8?q?(session-pooling.spec.md)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- M1: SessionPoolRouter (src/subprocess/router.ts)
  - Per-model warm pools (opus, sonnet) with configurable sizes
  - Session-key locking with lockedSessions map
  - PendingSentinel atomic pool claim (prevents race on new-key requests)
  - Per-process FIFO request queue (max depth 3) with 429 backpressure
  - Lineage tracking (agentChannel) for orphan detection on session reset
  - Orphan queue rejection: queued requests rejected 503, not drained
  - Context accumulation guard: recycle process when requestCount > threshold
  - canonical clearSessionLock() used by all unlock paths
  - Failed cold spawn recovery: reject queued requests before deleting sentinel
  - Per-request timeout (POOL_REQUEST_TIMEOUT_MS): hung process treated as dead
  - Process death recovery: atomic cleanup, reject queue, spawn replacement
  - Auth error detection via stderr pattern matching
  - ClaudeSubprocess fallback for non-pooled models and headerless requests
  - Nightly sweep: idle > 2h or requestCount > threshold; refill warm pool
  - stats() returns full RouterStats (total, locked, warm, busy, queued, etc.)

- M2: Route integration (src/server/routes.ts)
  - Extracts x-openclaw-session-key header; routes through SessionPoolRouter.execute()
  - Fallback to ClaudeSubprocess if no session key or no pool
  - Client disconnect: detach emitter for pooled process (let it finish); kill for subprocess fallback
  - Health endpoint includes pool stats from stats()
  - Structured logging per request: sessionKey, model, pid, latencyMs

- M3: Server startup and sweep scheduling (src/server/standalone.ts)
  - Initializes SessionPoolRouter on startup with env var config
  - Nightly sweep via node-cron at 3 AM ET (DST-aware timezone: America/New_York)
  - Graceful shutdown: close socket immediately, wait 30s, router.shutdown(), exit
  - All pool sizes configurable via env vars (POOL_OPUS_SIZE, POOL_SONNET_SIZE, etc.)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 package-lock.json        |  17 +
 package.json             |   2 +
 src/server/routes.ts     | 158 ++++----
 src/server/standalone.ts | 111 +++++-
 src/subprocess/router.ts | 839 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 1031 insertions(+), 96 deletions(-)
 create mode 100644 src/subprocess/router.ts

diff --git a/package-lock.json b/package-lock.json
index f6561bd..b74f35c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,7 +9,9 @@
       "version": "1.0.0",
       "license": "MIT",
       "dependencies": {
+        "@types/node-cron": "^3.0.11",
         "express": "^4.21.2",
+        "node-cron": "^4.2.1",
         "uuid": "^11.0.5"
       },
       "bin": {
@@ -96,6 +98,12 @@
         "undici-types": "~6.21.0"
       }
     },
+    "node_modules/@types/node-cron": {
+      "version": "3.0.11",
+      "resolved": "https://registry.npmjs.org/@types/node-cron/-/node-cron-3.0.11.tgz",
+      "integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
+      "license": "MIT"
+    },
     "node_modules/@types/qs": {
       "version": "6.14.0",
       "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.14.0.tgz",
@@ -652,6 +660,15 @@
         "node": ">= 0.6"
       }
     },
+    "node_modules/node-cron": {
+      "version": "4.2.1",
+      "resolved": "https://registry.npmjs.org/node-cron/-/node-cron-4.2.1.tgz",
+      "integrity": "sha512-lgimEHPE/QDgFlywTd8yTR61ptugX3Qer29efeyWw2rv259HtGBNn1vZVmp8lB9uo9wC0t/AT4iGqXxia+CJFg==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
     "node_modules/object-inspect": {
       "version": "1.13.4",
       "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
diff --git a/package.json b/package.json
index ec0555e..3c4f75b 100644
--- a/package.json
+++ b/package.json
@@ -39,7 +39,9 @@
   },
   "homepage": "https://github.com/atalovesyou/claude-max-api-proxy#readme",
   "dependencies": {
+    "@types/node-cron": "^3.0.11",
     "express": "^4.21.2",
+    "node-cron": "^4.2.1",
     "uuid": "^11.0.5"
   },
   "devDependencies": {
diff --git a/src/server/routes.ts b/src/server/routes.ts
index 8695879..2675184 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -5,9 +5,11 @@
  */
 
 import type { Request, Response } from "express";
+import { EventEmitter } from "events";
 import { v4 as uuidv4 } from "uuid";
 import { ClaudeSubprocess } from "../subprocess/manager.js";
-import { openaiToCli } from "../adapter/openai-to-cli.js";
+import { SessionPoolRouter } from "../subprocess/router.js";
+import { openaiToCli, extractModel } from "../adapter/openai-to-cli.js";
 import {
   cliResultToOpenai,
   createDoneChunk,
@@ -15,6 +17,13 @@ import {
 import type { OpenAIChatRequest, OpenAIToolCall } from "../types/openai.js";
 import type { ClaudeCliAssistant, ClaudeCliResult, ClaudeCliStreamEvent } from "../types/claude-cli.js";
 
+/** Shared pool router — initialized by standalone.ts on startup */
+let poolRouter: SessionPoolRouter | null = null;
+
+export function setPoolRouter(router: SessionPoolRouter): void {
+  poolRouter = router;
+}
+
 /**
  * Handle POST /v1/chat/completions
  *
@@ -43,12 +52,35 @@ export async function handleChatCompletions(
 
     // Convert to CLI input format
     const cliInput = openaiToCli(body);
-    const subprocess = new ClaudeSubprocess();
+    const sessionKey = (req.headers["x-openclaw-session-key"] as string | undefined) || null;
+    const startMs = Date.now();
+
+    let emitter: EventEmitter;
+    let cacheHit: "locked" | "warm" | "cold" | "fallback" | "subprocess" = "subprocess";
+
+    if (poolRouter && sessionKey) {
+      // Pool routing
+      const modelAlias = extractModel(body.model || "opus");
+      emitter = poolRouter.execute(cliInput.prompt, modelAlias, sessionKey);
+      cacheHit = "locked"; // actual hit type tracked inside router stats
+      console.log(`[Route] pool sessionKey=${sessionKey} model=${modelAlias}`);
+    } else {
+      // Fallback: subprocess-per-request
+      const subprocess = new ClaudeSubprocess();
+      subprocess.start(cliInput.prompt, {
+        model: cliInput.model,
+        sessionId: cliInput.sessionId,
+      }).catch((err) => subprocess.emit("error", err));
+      emitter = subprocess;
+      console.log(`[Route] subprocess (no sessionKey or no pool) model=${cliInput.model}`);
+    }
+
+    const pid = (emitter as any).pid ?? "n/a";
 
     if (stream) {
-      await handleStreamingResponse(req, res, subprocess, cliInput, requestId);
+      await handleStreamingResponse(req, res, emitter, cliInput, requestId, sessionKey, pid, startMs);
     } else {
-      await handleNonStreamingResponse(res, subprocess, cliInput, requestId);
+      await handleNonStreamingResponse(res, emitter, cliInput, requestId);
     }
   } catch (error) {
     const message = error instanceof Error ? error.message : "Unknown error";
@@ -84,9 +116,12 @@ function toOpenAICallId(claudeId: string): string {
 async function handleStreamingResponse(
   req: Request,
   res: Response,
-  subprocess: ClaudeSubprocess,
+  emitter: EventEmitter,
   cliInput: ReturnType<typeof openaiToCli>,
-  requestId: string
+  requestId: string,
+  sessionKey: string | null = null,
+  processPid: string | number = "n/a",
+  startMs: number = Date.now()
 ): Promise<void> {
   // Set SSE headers
   res.setHeader("Content-Type", "text/event-stream");
@@ -108,19 +143,26 @@ async function handleStreamingResponse(
     let hasEmittedText = false;
     let toolCallIndex = 0;
     let inToolBlock = false;
+    let clientDisconnected = false;
 
     // Handle actual client disconnect (response stream closed)
     res.on("close", () => {
+      clientDisconnected = true;
       if (!isComplete) {
-        // Client disconnected before response completed - kill subprocess
-        subprocess.kill();
+        if (sessionKey && poolRouter) {
+          // Pooled process: detach emitter — let process finish, return to idle
+          emitter.removeAllListeners();
+        } else if ((emitter as any).kill) {
+          // Subprocess fallback: kill it
+          (emitter as any).kill();
+        }
       }
       resolve();
     });
 
     // When a new text content block starts after we've already emitted text,
     // insert a separator so text from different blocks doesn't run together
-    subprocess.on("text_block_start", () => {
+    emitter.on("text_block_start", () => {
       if (hasEmittedText && !res.writableEnded) {
         const sepChunk = {
           id: `chatcmpl-${requestId}`,
@@ -140,7 +182,7 @@ async function handleStreamingResponse(
     });
 
     // Handle streaming content deltas
-    subprocess.on("content_delta", (event: ClaudeCliStreamEvent) => {
+    emitter.on("content_delta", (event: ClaudeCliStreamEvent) => {
       const delta = event.event.delta;
       const text = (delta?.type === "text_delta" && delta.text) || "";
       if (text && !res.writableEnded) {
@@ -236,14 +278,15 @@ async function handleStreamingResponse(
     // });
 
     // Handle final assistant message (for model name)
-    subprocess.on("assistant", (message: ClaudeCliAssistant) => {
-      lastModel = message.message.model;
+    emitter.on("assistant", (message: ClaudeCliAssistant) => {
+      lastModel = message.message?.model || lastModel;
     });
 
-    subprocess.on("result", (result: ClaudeCliResult) => {
+    emitter.on("result", (result: ClaudeCliResult) => {
       isComplete = true;
-      if (!res.writableEnded) {
-        // Send final done chunk with finish_reason and usage data
+      const latencyMs = Date.now() - startMs;
+      console.log(`[Route] result sessionKey=${sessionKey ?? "none"} model=${cliInput.model} pid=${processPid} latencyMs=${latencyMs}`);
+      if (!clientDisconnected && !res.writableEnded) {
         const doneChunk = createDoneChunk(requestId, lastModel);
         if (result.usage) {
           doneChunk.usage = {
@@ -260,9 +303,9 @@ async function handleStreamingResponse(
       resolve();
     });
 
-    subprocess.on("error", (error: Error) => {
+    emitter.on("error", (error: Error) => {
       console.error("[Streaming] Error:", error.message);
-      if (!res.writableEnded) {
+      if (!clientDisconnected && !res.writableEnded) {
         res.write(
           `data: ${JSON.stringify({
             error: { message: error.message, type: "server_error", code: null },
@@ -273,11 +316,9 @@ async function handleStreamingResponse(
       resolve();
     });
 
-    subprocess.on("close", (code: number | null) => {
-      // Subprocess exited - ensure response is closed
-      if (!res.writableEnded) {
+    emitter.on("close", (code: number | null) => {
+      if (!clientDisconnected && !res.writableEnded) {
         if (code !== 0 && !isComplete) {
-          // Abnormal exit without result - send error
           res.write(`data: ${JSON.stringify({
             error: { message: `Process exited with code ${code}`, type: "server_error", code: null },
           })}\n\n`);
@@ -287,15 +328,6 @@ async function handleStreamingResponse(
       }
       resolve();
     });
-
-    // Start the subprocess
-    subprocess.start(cliInput.prompt, {
-      model: cliInput.model,
-      sessionId: cliInput.sessionId,
-    }).catch((err) => {
-      console.error("[Streaming] Subprocess start error:", err);
-      reject(err);
-    });
   });
 }
 
@@ -304,53 +336,23 @@ async function handleStreamingResponse(
  */
 async function handleNonStreamingResponse(
   res: Response,
-  subprocess: ClaudeSubprocess,
+  emitter: EventEmitter,
   cliInput: ReturnType<typeof openaiToCli>,
   requestId: string
 ): Promise<void> {
   return new Promise((resolve) => {
     let finalResult: ClaudeCliResult | null = null;
-    // DISABLED: see tool call forwarding comment in handleStreamingResponse
-    // const accumulatedToolCalls: OpenAIToolCall[] = [];
-    //
-    // subprocess.on("assistant", (message: ClaudeCliAssistant) => {
-    //   for (const block of message.message.content) {
-    //     if (block.type === "tool_use") {
-    //       accumulatedToolCalls.push({
-    //         id: toOpenAICallId(block.id),
-    //         type: "function",
-    //         function: {
-    //           name: block.name,
-    //           arguments: JSON.stringify(block.input),
-    //         },
-    //       });
-    //     }
-    //   }
-    // });
 
-    subprocess.on("result", (result: ClaudeCliResult) => {
+    emitter.on("result", (result: ClaudeCliResult) => {
       finalResult = result;
     });
 
-    subprocess.on("error", (error: Error) => {
+    emitter.on("error", (error: Error) => {
       console.error("[NonStreaming] Error:", error.message);
-      res.status(500).json({
-        error: {
-          message: error.message,
-          type: "server_error",
-          code: null,
-        },
-      });
-      resolve();
-    });
-
-    subprocess.on("close", (code: number | null) => {
-      if (finalResult) {
-        res.json(cliResultToOpenai(finalResult, requestId));
-      } else if (!res.headersSent) {
+      if (!res.headersSent) {
         res.status(500).json({
           error: {
-            message: `Claude CLI exited with code ${code} without response`,
+            message: error.message,
             type: "server_error",
             code: null,
           },
@@ -359,22 +361,20 @@ async function handleNonStreamingResponse(
       resolve();
     });
 
-    // Start the subprocess
-    subprocess
-      .start(cliInput.prompt, {
-        model: cliInput.model,
-        sessionId: cliInput.sessionId,
-      })
-      .catch((error) => {
+    emitter.on("close", (code: number | null) => {
+      if (finalResult) {
+        res.json(cliResultToOpenai(finalResult, requestId));
+      } else if (!res.headersSent) {
         res.status(500).json({
           error: {
-            message: error.message,
+            message: `Claude CLI exited with code ${code} without response`,
             type: "server_error",
             code: null,
           },
         });
-        resolve();
-      });
+      }
+      resolve();
+    });
   });
 }
 
@@ -411,9 +411,13 @@ export function handleModels(_req: Request, res: Response): void {
  * Health check endpoint
  */
 export function handleHealth(_req: Request, res: Response): void {
-  res.json({
+  const response: Record<string, unknown> = {
     status: "ok",
     provider: "claude-code-cli",
     timestamp: new Date().toISOString(),
-  });
+  };
+  if (poolRouter) {
+    response.pool = poolRouter.stats();
+  }
+  res.json(response);
 }
diff --git a/src/server/standalone.ts b/src/server/standalone.ts
index 0d4881f..bede07c 100644
--- a/src/server/standalone.ts
+++ b/src/server/standalone.ts
@@ -1,23 +1,40 @@
 #!/usr/bin/env node
 /**
- * Standalone server for testing without Clawdbot
+ * Standalone server — session-pooled Claude Max API proxy
  *
  * Usage:
  *   npm run start
- *   # or
  *   node dist/server/standalone.js [port]
+ *
+ * Environment variables:
+ *   POOL_OPUS_SIZE              Warm opus processes (default: 6)
+ *   POOL_SONNET_SIZE            Warm sonnet processes (default: 4)
+ *   POOL_MAX_REQUESTS_PER_PROCESS  Context accumulation threshold (default: 50)
+ *   MAX_TOTAL_PROCESSES         Hard cap on locked + warm processes (default: 30)
+ *   SWEEP_HOUR                  Hour in ET for nightly sweep (default: 3)
+ *   SWEEP_IDLE_THRESHOLD_MS     Idle time before sweep recycles (default: 7200000)
+ *   POOL_REQUEST_QUEUE_DEPTH    Per-process queue depth (default: 3)
+ *   POOL_REQUEST_TIMEOUT_MS     Per-request timeout ms (default: 300000)
  */
 
-import { startServer, stopServer } from "./index.js";
+import cron from "node-cron";
+import { startServer } from "./index.js";
 import { verifyClaude, verifyAuth } from "../subprocess/manager.js";
+import { SessionPoolRouter } from "../subprocess/router.js";
+import { setPoolRouter } from "./routes.js";
+import type { RouterConfig } from "../subprocess/router.js";
 
 const DEFAULT_PORT = 3456;
 
+function parseEnvInt(name: string, defaultVal: number): number {
+  const v = parseInt(process.env[name] || "", 10);
+  return isNaN(v) ? defaultVal : v;
+}
+
 async function main(): Promise<void> {
-  console.log("Claude Code CLI Provider - Standalone Server");
-  console.log("============================================\n");
+  console.log("Claude Code CLI Provider — Session-Pooled Server");
+  console.log("=================================================\n");
 
-  // Parse port from command line
   const port = parseInt(process.argv[2] || String(DEFAULT_PORT), 10);
   if (isNaN(port) || port < 1 || port > 65535) {
     console.error(`Invalid port: ${process.argv[2]}`);
@@ -33,38 +50,94 @@ async function main(): Promise<void> {
   }
   console.log(`  Claude CLI: ${cliCheck.version || "OK"}`);
 
-  // Verify authentication
+  // Auth check (warn, don't exit — M1 invariant: server starts even if auth fails at startup)
   console.log("Checking authentication...");
   const authCheck = await verifyAuth();
   if (!authCheck.ok) {
-    console.error(`Error: ${authCheck.error}`);
-    console.error("Please run: claude auth login");
-    process.exit(1);
+    console.warn(`  Warning: ${authCheck.error}`);
+    console.warn("  Run: claude auth login");
+    console.warn("  Server will start but requests will return 401 until authenticated.\n");
+  } else {
+    console.log("  Authentication: OK\n");
   }
-  console.log("  Authentication: OK\n");
 
-  // Start server
+  // Pool configuration
+  const routerConfig: RouterConfig = {
+    opusSize: parseEnvInt("POOL_OPUS_SIZE", 6),
+    sonnetSize: parseEnvInt("POOL_SONNET_SIZE", 4),
+    maxRequestsPerProcess: parseEnvInt("POOL_MAX_REQUESTS_PER_PROCESS", 50),
+    maxTotalProcesses: parseEnvInt("MAX_TOTAL_PROCESSES", 30),
+    sweepIdleThresholdMs: parseEnvInt("SWEEP_IDLE_THRESHOLD_MS", 7_200_000),
+    requestQueueDepth: parseEnvInt("POOL_REQUEST_QUEUE_DEPTH", 3),
+    requestTimeoutMs: parseEnvInt("POOL_REQUEST_TIMEOUT_MS", 300_000),
+  };
+
+  console.log("Pool configuration:");
+  console.log(`  POOL_OPUS_SIZE=${routerConfig.opusSize} POOL_SONNET_SIZE=${routerConfig.sonnetSize}`);
+  console.log(`  MAX_TOTAL_PROCESSES=${routerConfig.maxTotalProcesses} POOL_MAX_REQUESTS_PER_PROCESS=${routerConfig.maxRequestsPerProcess}`);
+  console.log(`  POOL_REQUEST_TIMEOUT_MS=${routerConfig.requestTimeoutMs} POOL_REQUEST_QUEUE_DEPTH=${routerConfig.requestQueueDepth}`);
+  console.log(`  SWEEP_IDLE_THRESHOLD_MS=${routerConfig.sweepIdleThresholdMs}\n`);
+
+  // Initialize session pool router
+  const router = new SessionPoolRouter(routerConfig);
+  setPoolRouter(router);
+  await router.initialize();
+
+  const initialStats = router.stats();
+  console.log(`Pool initialized: warm.opus=${initialStats.warm.opus} warm.sonnet=${initialStats.warm.sonnet} total=${initialStats.total}\n`);
+
+  // Schedule nightly sweep at 3 AM ET (DST-aware via node-cron timezone)
+  const sweepHour = parseEnvInt("SWEEP_HOUR", 3);
+  cron.schedule(`0 ${sweepHour} * * *`, () => {
+    console.log(`[Cron] Running nightly sweep at ${sweepHour}:00 ET`);
+    router.sweep();
+  }, { timezone: "America/New_York" });
+  console.log(`Nightly sweep scheduled at ${sweepHour}:00 AM ET (DST-aware)\n`);
+
+  // Start HTTP server
+  let httpServer: { close: (cb?: () => void) => void } | null = null;
   try {
-    await startServer({ port });
+    const result = await startServer({ port });
+    httpServer = (result as any)?.server || null;
     console.log("\nServer ready. Test with:");
     console.log(`  curl -X POST http://localhost:${port}/v1/chat/completions \\`);
     console.log(`    -H "Content-Type: application/json" \\`);
+    console.log(`    -H "x-openclaw-session-key: agent:test:discord:channel:123" \\`);
     console.log(`    -d '{"model": "claude-sonnet-4", "messages": [{"role": "user", "content": "Hello!"}]}'`);
     console.log("\nPress Ctrl+C to stop.\n");
   } catch (err) {
     console.error("Failed to start server:", err);
+    await router.shutdown();
     process.exit(1);
   }
 
-  // Handle graceful shutdown
-  const shutdown = async () => {
-    console.log("\nShutting down...");
-    await stopServer();
+  // Graceful shutdown — per spec Finding N18:
+  // 1. Close listening socket immediately (no new connections)
+  // 2. Wait 30s for in-flight requests
+  // 3. Call router.shutdown()
+  // 4. Exit
+  const shutdown = async (signal: string) => {
+    console.log(`\nReceived ${signal}. Shutting down...`);
+
+    // Step 1: Close listening socket immediately
+    if (httpServer) {
+      httpServer.close(() => {
+        console.log("  HTTP server closed (no new connections accepted)");
+      });
+    }
+
+    // Step 2: Wait up to 30s for in-flight requests
+    await new Promise<void>((resolve) => setTimeout(resolve, 30_000));
+
+    // Step 3: Shut down pool — rejects all queued requests, kills processes
+    await router.shutdown();
+
+    // Step 4: Exit
     process.exit(0);
   };
 
-  process.on("SIGINT", shutdown);
-  process.on("SIGTERM", shutdown);
+  process.on("SIGINT", () => shutdown("SIGINT"));
+  process.on("SIGTERM", () => shutdown("SIGTERM"));
 }
 
 main().catch((err) => {
diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
new file mode 100644
index 0000000..6bb20cc
--- /dev/null
+++ b/src/subprocess/router.ts
@@ -0,0 +1,839 @@
+/**
+ * SessionPoolRouter — Session-aware Claude CLI process pool
+ *
+ * Locks warm CLI processes to OpenClaw session keys, preventing cross-agent
+ * context contamination and eliminating per-request spawn overhead.
+ */
+
+import { spawn, ChildProcess } from "child_process";
+import { EventEmitter } from "events";
+import type { ClaudeModel } from "../adapter/openai-to-cli.js";
+import { ClaudeSubprocess } from "./manager.js";
+
+// ─── Constants ────────────────────────────────────────────────────────────────
+
+const OPENCLAW_TOOL_MAPPING_PROMPT = [
+  "## Tool Name Mapping",
+  "You are running inside Claude Code CLI, not OpenClaw. The system prompt may reference OpenClaw tool names — map them to your actual tools:",
+  "",
+  "### Direct tool replacements",
+  "- `exec` or `process` → use `Bash` (run shell commands)",
+  "- `read` → use `Read` (read file contents)",
+  "- `write` → use `Write` (write files)",
+  "- `edit` → use `Edit` (edit files)",
+  "- `grep` → use `Grep` (search file contents)",
+  "- `find` or `ls` → use `Glob` or `Bash(ls ...)`",
+  "- `web_search` → use `WebSearch`",
+  "- `web_fetch` → use `WebFetch`",
+  "- `image` → use `Read` (Claude Code can read images)",
+  "",
+  "### OpenClaw CLI tools (use via Bash)",
+  "These OpenClaw tools are available through the `openclaw` CLI. Use `Bash` to run them:",
+  '- `memory_search` → `Bash(openclaw memory search "<query>")` — semantic search across memory files',
+  "- `memory_get` → `Read` on the memory file directly, OR `Bash(openclaw memory search \"<query>\")` for discovery",
+  '- `message` → `Bash(openclaw message send --to <target> "<text>")` — send messages to channels (Telegram, Discord, etc.)',
+  "  - Also: `openclaw message read`, `openclaw message broadcast`, `openclaw message react`, `openclaw message poll`",
+  "- `cron` → `Bash(openclaw cron list)`, `Bash(openclaw cron add ...)`, `Bash(openclaw cron status)` — manage scheduled jobs",
+  "  - Also: `openclaw cron rm`, `openclaw cron enable`, `openclaw cron disable`, `openclaw cron runs`, `openclaw cron run`, `openclaw cron edit`",
+  '- `sessions_list` → `Bash(openclaw agent --local --message "list sessions")` or check session files directly',
+  '- `sessions_history` → `Bash(openclaw agent --local --message "show history for session <key>")` or check session files',
+  "- `nodes` → `Bash(openclaw nodes status)`, `Bash(openclaw nodes describe <node>)`, `Bash(openclaw nodes invoke --node <id> --command <cmd>)`",
+  '  - Also: `openclaw nodes run --node <id> "<shell command>"` for running commands on paired nodes',
+  "",
+  "### Not available via CLI",
+  "- `browser` — requires OpenClaw's dedicated browser server (no CLI equivalent)",
+  "- `canvas` — requires paired node with canvas capability; use `openclaw nodes invoke` if a node is available",
+  "",
+  "### Skills",
+  "When a skill says to run a bash/python command, use the `Bash` tool directly.",
+  "Skills are located in the `skills/` directory relative to your working directory.",
+  "To use a skill: `Read` its SKILL.md file first, then follow the instructions using `Bash`.",
+  "Run `openclaw skills list --eligible --json` to see all available skills.",
+].join("\n");
+
+/** Models that have dedicated warm pools */
+const POOLED_MODELS = new Set<ClaudeModel>(["opus", "sonnet"]);
+
+// ─── Types ────────────────────────────────────────────────────────────────────
+
+export interface RouterConfig {
+  opusSize: number;
+  sonnetSize: number;
+  maxRequestsPerProcess: number;
+  maxTotalProcesses: number;
+  sweepIdleThresholdMs: number;
+  requestQueueDepth: number;
+  requestTimeoutMs: number;
+}
+
+interface PendingRequest {
+  prompt: string;
+  model: ClaudeModel;
+  emitter: EventEmitter;
+  timeoutHandle: NodeJS.Timeout;
+}
+
+/** Sentinel placed synchronously in lockedSessions while a real process is being claimed/spawned */
+interface PendingSentinel {
+  isPending: true;
+  requestQueue: PendingRequest[];
+}
+
+interface PooledProcess {
+  pid: number;
+  process: ChildProcess;
+  model: ClaudeModel;
+  lockedTo: string | null;
+  agentChannel: string | null;
+  lastRequestAt: number;
+  spawnedAt: number;
+  requestCount: number;
+  state: "idle" | "busy" | "recycling";
+  requestQueue: PendingRequest[];
+  buffer: string;
+  currentEmitter: EventEmitter | null;
+  timeoutHandle: NodeJS.Timeout | null;
+}
+
+type LockEntry = PooledProcess | PendingSentinel;
+
+function isPending(entry: LockEntry): entry is PendingSentinel {
+  return (entry as PendingSentinel).isPending === true;
+}
+
+// ─── Stats ────────────────────────────────────────────────────────────────────
+
+interface RouterStats {
+  total: number;
+  locked: { total: number; opus: number; sonnet: number };
+  warm: { opus: number; sonnet: number };
+  busy: number;
+  queued: number;
+  orphansReclaimed: number;
+  totalRequests: number;
+  processRecycles: number;
+  routeHits: { locked: number; warm: number; cold: number; fallback: number };
+}
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * Extract agentChannel from a session key.
+ * "agent:scope:discord:channel:123" → "scope:discord:channel:123"
+ * Falls back to the full key if format is unexpected.
+ */
+function extractAgentChannel(sessionKey: string): string {
+  const match = sessionKey.match(/^agent:(.+)$/);
+  return match ? match[1] : sessionKey;
+}
+
+function rejectPending(req: PendingRequest, status: number, retryAfter: number): void {
+  clearTimeout(req.timeoutHandle);
+  req.emitter.emit("pool_error", { status, retryAfter, message: `HTTP ${status}` });
+  req.emitter.emit("error", Object.assign(new Error(`HTTP ${status}`), { poolStatus: status, retryAfter }));
+}
+
+// ─── Auth error detection ─────────────────────────────────────────────────────
+
+const AUTH_ERROR_PATTERNS = /auth|unauthorized|token expired|invalid_token/i;
+
+function isAuthError(text: string): boolean {
+  return AUTH_ERROR_PATTERNS.test(text);
+}
+
+// ─── SessionPoolRouter ────────────────────────────────────────────────────────
+
+export class SessionPoolRouter {
+  private config: RouterConfig;
+  private lockedSessions: Map<string, LockEntry> = new Map();
+  private warmPool: { opus: PooledProcess[]; sonnet: PooledProcess[] } = { opus: [], sonnet: [] };
+  private shuttingDown = false;
+
+  // Stats counters
+  private orphansReclaimed = 0;
+  private processRecycles = 0;
+  private routeHits = { locked: 0, warm: 0, cold: 0, fallback: 0 };
+
+  constructor(config: RouterConfig) {
+    this.config = config;
+  }
+
+  /** Initialize warm pool on startup */
+  async initialize(): Promise<void> {
+    console.log("[Router] Initializing session pool...");
+    const spawns: Promise<void>[] = [];
+    for (let i = 0; i < this.config.opusSize; i++) {
+      spawns.push(this.spawnWarm("opus"));
+    }
+    for (let i = 0; i < this.config.sonnetSize; i++) {
+      spawns.push(this.spawnWarm("sonnet"));
+    }
+    await Promise.all(spawns);
+    const s = this.stats();
+    console.log(`[Router] Ready. warm.opus=${s.warm.opus} warm.sonnet=${s.warm.sonnet}`);
+  }
+
+  /**
+   * Execute a prompt for a given model and session key.
+   * Returns an EventEmitter that emits the same events as ClaudeSubprocess.
+   */
+  execute(prompt: string, model: ClaudeModel, sessionKey: string | null): EventEmitter {
+    // No session key or non-pooled model → fallback to ClaudeSubprocess
+    if (!sessionKey || !POOLED_MODELS.has(model)) {
+      if (!POOLED_MODELS.has(model)) {
+        console.log(`[Router] Non-pooled model "${model}" — falling back to ClaudeSubprocess`);
+      }
+      this.routeHits.fallback++;
+      return this.fallbackSubprocess(prompt, model);
+    }
+
+    // Pool saturated check
+    if (this.totalProcessCount() >= this.config.maxTotalProcesses) {
+      console.warn(`[Router] Pool saturated (total=${this.totalProcessCount()} >= max=${this.config.maxTotalProcesses}) — falling back for sessionKey=${sessionKey}`);
+      this.routeHits.fallback++;
+      return this.fallbackSubprocess(prompt, model);
+    }
+
+    const emitter = new EventEmitter();
+    this.routeRequest(emitter, prompt, model, sessionKey);
+    return emitter;
+  }
+
+  private routeRequest(
+    emitter: EventEmitter,
+    prompt: string,
+    model: ClaudeModel,
+    sessionKey: string,
+  ): void {
+    const existing = this.lockedSessions.get(sessionKey);
+
+    if (existing !== undefined) {
+      if (isPending(existing)) {
+        // Sentinel in place — queue behind it
+        this.enqueueOnSentinel(existing, prompt, model, emitter);
+        return;
+      }
+
+      const proc = existing;
+
+      // Orphan reclamation: check if a different session for the same agent+channel exists
+      this.reclaimOrphan(sessionKey, model);
+
+      // Route to locked process
+      if (proc.state === "idle") {
+        this.routeHits.locked++;
+        this.assignToProcess(proc, prompt, emitter);
+        return;
+      }
+
+      // Busy or recycling — enqueue on per-process queue
+      if (proc.requestQueue.length >= this.config.requestQueueDepth) {
+        // 429 backpressure
+        setImmediate(() => {
+          emitter.emit("pool_error", { status: 429, retryAfter: 5 });
+          emitter.emit("error", Object.assign(new Error("HTTP 429 Too Many Requests"), { poolStatus: 429, retryAfter: 5 }));
+        });
+        return;
+      }
+
+      const pending: PendingRequest = {
+        prompt,
+        model,
+        emitter,
+        timeoutHandle: setTimeout(() => {
+          const idx = proc.requestQueue.indexOf(pending);
+          if (idx >= 0) proc.requestQueue.splice(idx, 1);
+          emitter.emit("error", Object.assign(new Error("Request queue timeout"), { poolStatus: 503, retryAfter: 3 }));
+        }, this.config.requestTimeoutMs),
+      };
+      proc.requestQueue.push(pending);
+      this.routeHits.locked++;
+      return;
+    }
+
+    // New session key — set sentinel synchronously before any async work
+    const sentinel: PendingSentinel = { isPending: true, requestQueue: [] };
+    this.lockedSessions.set(sessionKey, sentinel);
+
+    this.claimProcess(sessionKey, model, sentinel).then((proc) => {
+      if (!proc) return; // sentinel already cleaned up (spawn failed)
+      this.routeHits[proc.requestCount === 0 ? "warm" : "cold"]++;
+      // Assign the triggering request
+      this.assignToProcess(proc, prompt, emitter);
+      // Drain any requests that queued against the sentinel
+      this.drainSentinelQueue(proc, sentinel);
+    }).catch(() => {
+      // claimProcess already cleaned up sentinel and rejected queue
+    });
+  }
+
+  /** Claim a process from warm pool or spawn cold */
+  private async claimProcess(
+    sessionKey: string,
+    model: ClaudeModel,
+    sentinel: PendingSentinel,
+  ): Promise<PooledProcess | null> {
+    try {
+      let proc: PooledProcess;
+      const pool = this.warmPool[model as "opus" | "sonnet"];
+
+      if (pool.length > 0) {
+        proc = pool.pop()!;
+        this.routeHits.warm++;
+      } else {
+        this.routeHits.cold++;
+        proc = await this.spawnCold(model);
+      }
+
+      proc.lockedTo = sessionKey;
+      proc.agentChannel = extractAgentChannel(sessionKey);
+      this.lockedSessions.set(sessionKey, proc);
+      return proc;
+    } catch (err) {
+      // Failed spawn — reject all queued requests then clean up sentinel
+      for (const req of sentinel.requestQueue) {
+        rejectPending(req, 503, 3);
+      }
+      this.lockedSessions.delete(sessionKey);
+      console.error(`[Router] Cold spawn failed for sessionKey=${sessionKey} model=${model}:`, err);
+      throw err;
+    }
+  }
+
+  private enqueueOnSentinel(sentinel: PendingSentinel, prompt: string, model: ClaudeModel, emitter: EventEmitter): void {
+    const pending: PendingRequest = {
+      prompt,
+      model,
+      emitter,
+      timeoutHandle: setTimeout(() => {
+        const idx = sentinel.requestQueue.indexOf(pending);
+        if (idx >= 0) sentinel.requestQueue.splice(idx, 1);
+        emitter.emit("error", Object.assign(new Error("Request sentinel timeout"), { poolStatus: 503, retryAfter: 3 }));
+      }, this.config.requestTimeoutMs),
+    };
+    sentinel.requestQueue.push(pending);
+  }
+
+  private drainSentinelQueue(proc: PooledProcess, sentinel: PendingSentinel): void {
+    for (const req of sentinel.requestQueue) {
+      clearTimeout(req.timeoutHandle);
+      if (proc.state === "idle") {
+        this.assignToProcess(proc, req.prompt, req.emitter);
+      } else {
+        if (proc.requestQueue.length < this.config.requestQueueDepth) {
+          const re: PendingRequest = {
+            prompt: req.prompt,
+            model: req.model,
+            emitter: req.emitter,
+            timeoutHandle: setTimeout(() => {
+              const idx = proc.requestQueue.indexOf(re);
+              if (idx >= 0) proc.requestQueue.splice(idx, 1);
+              req.emitter.emit("error", Object.assign(new Error("Queue timeout after sentinel drain"), { poolStatus: 503, retryAfter: 3 }));
+            }, this.config.requestTimeoutMs),
+          };
+          proc.requestQueue.push(re);
+        } else {
+          rejectPending(req, 503, 3);
+        }
+      }
+    }
+  }
+
+  /** Orphan reclamation: if a different session key has the same agentChannel, reclaim it */
+  private reclaimOrphan(newSessionKey: string, _model: ClaudeModel): void {
+    const newChannel = extractAgentChannel(newSessionKey);
+    for (const [key, entry] of this.lockedSessions) {
+      if (key === newSessionKey || isPending(entry)) continue;
+      const proc = entry;
+      if (proc.agentChannel === newChannel && proc.lockedTo !== newSessionKey) {
+        console.log(`[Router] Orphan reclaimed: old key=${key} new key=${newSessionKey}`);
+        this.orphansReclaimed++;
+
+        // Reject all queued requests on the orphaned process
+        for (const req of proc.requestQueue) {
+          rejectPending(req, 503, 3);
+        }
+        proc.requestQueue = [];
+
+        if (proc.state === "idle") {
+          this.clearSessionLock(key, proc);
+          this.returnToWarmPool(proc);
+        } else {
+          // Mark recycling — will be cleaned after current request completes
+          (proc as any)._orphaned = true;
+        }
+        break;
+      }
+    }
+  }
+
+  /**
+   * Canonical session lock clearing — ALL unlock paths must use this.
+   * Caller is responsible for what happens to the process afterward.
+   */
+  private clearSessionLock(sessionKey: string, proc: PooledProcess): void {
+    this.lockedSessions.delete(sessionKey);
+    proc.lockedTo = null;
+    proc.agentChannel = null;
+    proc.requestCount = 0;
+  }
+
+  private assignToProcess(proc: PooledProcess, prompt: string, emitter: EventEmitter): void {
+    proc.state = "busy";
+    proc.requestCount++;
+    proc.lastRequestAt = Date.now();
+    proc.currentEmitter = emitter;
+
+    // Per-request timeout
+    proc.timeoutHandle = setTimeout(() => {
+      console.error(`[Router:${proc.pid}] Request timeout after ${this.config.requestTimeoutMs}ms — treating as dead`);
+      this.handleProcessDeath(proc, new Error(`Request timeout after ${this.config.requestTimeoutMs}ms`));
+    }, this.config.requestTimeoutMs);
+
+    const message = JSON.stringify({
+      type: "user",
+      message: { role: "user", content: prompt },
+    });
+
+    proc.process.stdin?.write(message + "\n");
+  }
+
+  private handleRequestComplete(proc: PooledProcess): void {
+    if (proc.timeoutHandle) {
+      clearTimeout(proc.timeoutHandle);
+      proc.timeoutHandle = null;
+    }
+
+    // Check if orphaned mid-flight
+    if ((proc as any)._orphaned) {
+      (proc as any)._orphaned = false;
+      const key = proc.lockedTo;
+      if (key) this.clearSessionLock(key, proc);
+      this.returnToWarmPool(proc);
+      this.processRecycles++;
+      return;
+    }
+
+    // Context accumulation threshold
+    const overThreshold = proc.requestCount >= this.config.maxRequestsPerProcess;
+
+    if (overThreshold) {
+      if (proc.requestQueue.length === 0) {
+        // Recycle immediately
+        const key = proc.lockedTo;
+        if (key) this.clearSessionLock(key, proc);
+        this.returnToWarmPool(proc);
+        this.processRecycles++;
+        return;
+      } else {
+        // Set recycling state — drain queue, then recycle
+        proc.state = "recycling";
+        this.drainNextFromQueue(proc);
+        return;
+      }
+    }
+
+    // Normal case: drain queue or go idle
+    if (proc.requestQueue.length > 0) {
+      this.drainNextFromQueue(proc);
+    } else {
+      proc.state = "idle";
+      proc.currentEmitter = null;
+    }
+  }
+
+  private drainNextFromQueue(proc: PooledProcess): void {
+    const next = proc.requestQueue.shift();
+    if (!next) {
+      // Queue empty — check if we were recycling
+      if (proc.state === "recycling") {
+        const key = proc.lockedTo;
+        if (key) this.clearSessionLock(key, proc);
+        this.returnToWarmPool(proc);
+        this.processRecycles++;
+      } else {
+        proc.state = "idle";
+        proc.currentEmitter = null;
+      }
+      return;
+    }
+    clearTimeout(next.timeoutHandle);
+    proc.state = "busy";
+    proc.currentEmitter = next.emitter;
+    proc.lastRequestAt = Date.now();
+    proc.requestCount++;
+
+    proc.timeoutHandle = setTimeout(() => {
+      console.error(`[Router:${proc.pid}] Queued request timeout — treating as dead`);
+      this.handleProcessDeath(proc, new Error("Queued request timeout"));
+    }, this.config.requestTimeoutMs);
+
+    const message = JSON.stringify({
+      type: "user",
+      message: { role: "user", content: next.prompt },
+    });
+    proc.process.stdin?.write(message + "\n");
+  }
+
+  private handleProcessDeath(proc: PooledProcess, error?: Error): void {
+    if (proc.timeoutHandle) {
+      clearTimeout(proc.timeoutHandle);
+      proc.timeoutHandle = null;
+    }
+
+    const err = error || new Error("Pool process died unexpectedly");
+
+    // Notify active request
+    if (proc.currentEmitter) {
+      proc.currentEmitter.emit("error", err);
+      proc.currentEmitter = null;
+    }
+
+    // Reject queued requests
+    for (const req of proc.requestQueue) {
+      rejectPending(req, 503, 3);
+    }
+    proc.requestQueue = [];
+
+    // Remove from lockedSessions
+    if (proc.lockedTo) {
+      this.lockedSessions.delete(proc.lockedTo);
+      proc.lockedTo = null;
+      proc.agentChannel = null;
+      proc.requestCount = 0;
+    } else {
+      // Might be in warm pool — remove it
+      for (const model of ["opus", "sonnet"] as const) {
+        const idx = this.warmPool[model].indexOf(proc);
+        if (idx >= 0) this.warmPool[model].splice(idx, 1);
+      }
+    }
+
+    // Spawn replacement into warm pool
+    if (!this.shuttingDown) {
+      const model = proc.model;
+      this.spawnWarm(model).catch((e) => {
+        console.error(`[Router] Failed to spawn replacement for dead ${model} process:`, e);
+      });
+    }
+  }
+
+  /** Spawn a warm process and add it to the warm pool */
+  private async spawnWarm(model: ClaudeModel): Promise<void> {
+    if (this.totalProcessCount() >= this.config.maxTotalProcesses) {
+      console.warn(`[Router] Cannot spawn warm ${model}: total=${this.totalProcessCount()} >= max=${this.config.maxTotalProcesses}`);
+      return;
+    }
+    const proc = await this.spawnCold(model);
+    this.warmPool[model as "opus" | "sonnet"].push(proc);
+  }
+
+  /** Spawn a new CLI process */
+  private async spawnCold(model: ClaudeModel): Promise<PooledProcess> {
+    return new Promise((resolve, reject) => {
+      const args = [
+        "--print",
+        "--input-format", "stream-json",
+        "--output-format", "stream-json",
+        "--verbose",
+        "--include-partial-messages",
+        "--dangerously-skip-permissions",
+        "--no-session-persistence",
+        "--model", model,
+        "--append-system-prompt", OPENCLAW_TOOL_MAPPING_PROMPT,
+      ];
+
+      const child = spawn(process.env.CLAUDE_BIN || "claude", args, {
+        cwd: process.env.HOME || "/tmp",
+        env: Object.fromEntries(
+          Object.entries(process.env).filter(([k]) => k !== "CLAUDECODE")
+        ),
+        stdio: ["pipe", "pipe", "pipe"],
+      });
+
+      const proc: PooledProcess = {
+        pid: child.pid || 0,
+        process: child,
+        model,
+        lockedTo: null,
+        agentChannel: null,
+        lastRequestAt: 0,
+        spawnedAt: Date.now(),
+        requestCount: 0,
+        state: "idle",
+        requestQueue: [],
+        buffer: "",
+        currentEmitter: null,
+        timeoutHandle: null,
+      };
+
+      child.on("error", (err) => {
+        reject(err);
+      });
+
+      child.on("spawn", () => {
+        // Attach stdout handler after successful spawn
+        child.stdout?.on("data", (chunk: Buffer) => {
+          proc.buffer += chunk.toString();
+          this.processBuffer(proc);
+        });
+
+        child.stderr?.on("data", (chunk: Buffer) => {
+          const text = chunk.toString().trim();
+          if (!text) return;
+          if (process.env.DEBUG_SUBPROCESS) {
+            console.error(`[Router:${proc.pid}] stderr: ${text.slice(0, 200)}`);
+          }
+          if (isAuthError(text)) {
+            console.error(`[Router:${proc.pid}] Auth error detected — triggering death recovery`);
+            this.handleProcessDeath(proc, new Error(`Auth error: ${text.slice(0, 100)}`));
+          }
+        });
+
+        child.on("close", (code) => {
+          if (proc.state !== "idle" || proc.currentEmitter) {
+            // Died mid-request
+            this.handleProcessDeath(proc, new Error(`Process exited unexpectedly with code ${code}`));
+          } else {
+            // Clean close — remove from wherever it is
+            const model = proc.model;
+            const idx = this.warmPool[model as "opus" | "sonnet"].indexOf(proc);
+            if (idx >= 0) this.warmPool[model as "opus" | "sonnet"].splice(idx, 1);
+            if (proc.lockedTo) {
+              this.lockedSessions.delete(proc.lockedTo);
+            }
+            if (!this.shuttingDown) {
+              this.spawnWarm(model).catch((e) => console.error("[Router] Respawn failed:", e));
+            }
+          }
+        });
+
+        resolve(proc);
+      });
+
+      // If no spawn event (older Node), resolve after a tick
+      setTimeout(() => {
+        if (child.pid) {
+          child.stdout?.on("data", (chunk: Buffer) => {
+            proc.buffer += chunk.toString();
+            this.processBuffer(proc);
+          });
+          child.stderr?.on("data", (chunk: Buffer) => {
+            const text = chunk.toString().trim();
+            if (!text) return;
+            if (isAuthError(text)) {
+              this.handleProcessDeath(proc, new Error(`Auth error: ${text.slice(0, 100)}`));
+            }
+          });
+          child.on("close", (code) => {
+            if (proc.state !== "idle" || proc.currentEmitter) {
+              this.handleProcessDeath(proc, new Error(`Process exited with code ${code}`));
+            }
+          });
+          resolve(proc);
+        }
+      }, 100);
+    });
+  }
+
+  private returnToWarmPool(proc: PooledProcess): void {
+    proc.state = "idle";
+    proc.currentEmitter = null;
+    proc.requestCount = 0;
+    proc.lockedTo = null;
+    proc.agentChannel = null;
+
+    const model = proc.model as "opus" | "sonnet";
+    const target = model === "opus" ? this.config.opusSize : this.config.sonnetSize;
+
+    if (this.warmPool[model].length < target && this.totalProcessCount() < this.config.maxTotalProcesses) {
+      this.warmPool[model].push(proc);
+    } else {
+      // Over target — kill the extra process
+      proc.process.kill("SIGTERM");
+    }
+  }
+
+  /** Process stdout buffer and emit events to the current request's emitter */
+  private processBuffer(proc: PooledProcess): void {
+    const lines = proc.buffer.split("\n");
+    proc.buffer = lines.pop() || "";
+
+    for (const line of lines) {
+      const trimmed = line.trim();
+      if (!trimmed) continue;
+
+      try {
+        const message = JSON.parse(trimmed);
+        const emitter = proc.currentEmitter;
+        if (!emitter) continue;
+
+        emitter.emit("message", message);
+
+        if (message.type === "system" && message.subtype === "init") continue;
+
+        if (message.type === "assistant") {
+          emitter.emit("assistant", message);
+        }
+
+        if (message.type === "content_block_start") {
+          const block = message.content_block;
+          if (block?.type === "text") {
+            emitter.emit("text_block_start", { event: message });
+          } else if (block?.type === "tool_use") {
+            emitter.emit("tool_use_start", { event: message });
+          }
+        }
+
+        if (message.type === "content_block_delta") {
+          const delta = message.delta;
+          if (delta?.type === "text_delta") {
+            emitter.emit("content_delta", { event: message });
+          } else if (delta?.type === "input_json_delta") {
+            emitter.emit("input_json_delta", { event: message });
+          }
+        }
+
+        if (message.type === "content_block_stop") {
+          emitter.emit("content_block_stop", { event: message });
+        }
+
+        if (message.type === "result") {
+          emitter.emit("result", message);
+          this.handleRequestComplete(proc);
+        }
+      } catch {
+        if (process.env.DEBUG_SUBPROCESS) {
+          console.error(`[Router:${proc.pid}] Non-JSON: ${trimmed.slice(0, 100)}`);
+        }
+      }
+    }
+  }
+
+  /** Use ClaudeSubprocess for non-pooled or fallback requests */
+  private fallbackSubprocess(prompt: string, model: ClaudeModel): EventEmitter {
+    const sub = new ClaudeSubprocess();
+    sub.start(prompt, { model }).catch((err) => {
+      sub.emit("error", err);
+    });
+    return sub;
+  }
+
+  private totalProcessCount(): number {
+    return this.lockedSessions.size + this.warmPool.opus.length + this.warmPool.sonnet.length;
+  }
+
+  // ─── Public API ─────────────────────────────────────────────────────────────
+
+  /**
+   * Nightly sweep — recycle idle/overused processes, refill warm pool.
+   * Called externally by the scheduler (3 AM ET).
+   */
+  sweep(): void {
+    console.log("[Router] Sweep started");
+    let recycled = 0;
+
+    for (const [key, entry] of this.lockedSessions) {
+      if (isPending(entry)) continue;
+      const proc = entry;
+
+      if (proc.state === "busy" || proc.state === "recycling") continue;
+
+      const idleMs = Date.now() - proc.lastRequestAt;
+      const overThreshold = proc.requestCount >= this.config.maxRequestsPerProcess;
+
+      if (idleMs > this.config.sweepIdleThresholdMs || overThreshold) {
+        this.clearSessionLock(key, proc);
+        proc.process.kill("SIGTERM");
+        recycled++;
+        this.processRecycles++;
+      }
+    }
+
+    // Refill warm pool — check cap before EACH spawn
+    for (const model of ["opus", "sonnet"] as const) {
+      const target = model === "opus" ? this.config.opusSize : this.config.sonnetSize;
+      while (this.warmPool[model].length < target) {
+        if (this.totalProcessCount() >= this.config.maxTotalProcesses) {
+          console.warn(`[Router] Sweep refill stopped: total=${this.totalProcessCount()} >= max=${this.config.maxTotalProcesses}`);
+          break;
+        }
+        this.spawnWarm(model).catch((e) => console.error(`[Router] Sweep spawn failed:`, e));
+      }
+    }
+
+    console.log(`[Router] Sweep complete. recycled=${recycled} warm.opus=${this.warmPool.opus.length} warm.sonnet=${this.warmPool.sonnet.length}`);
+  }
+
+  /** Graceful shutdown — close all processes, reject all queued requests */
+  async shutdown(): Promise<void> {
+    this.shuttingDown = true;
+    console.log("[Router] Shutdown started");
+
+    // Reject all queued requests in locked sessions
+    for (const [key, entry] of this.lockedSessions) {
+      if (isPending(entry)) {
+        for (const req of entry.requestQueue) {
+          rejectPending(req, 503, 3);
+        }
+        this.lockedSessions.delete(key);
+      } else {
+        const proc = entry;
+        for (const req of proc.requestQueue) {
+          rejectPending(req, 503, 3);
+        }
+        proc.requestQueue = [];
+        if (proc.currentEmitter) {
+          proc.currentEmitter.emit("error", new Error("Server shutting down"));
+          proc.currentEmitter = null;
+        }
+        proc.process.kill("SIGTERM");
+      }
+    }
+    this.lockedSessions.clear();
+
+    // Kill warm pool processes
+    for (const model of ["opus", "sonnet"] as const) {
+      for (const proc of this.warmPool[model]) {
+        proc.process.kill("SIGTERM");
+      }
+      this.warmPool[model] = [];
+    }
+
+    console.log("[Router] Shutdown complete");
+  }
+
+  /** Stats for health endpoint */
+  stats(): RouterStats {
+    let lockedOpus = 0;
+    let lockedSonnet = 0;
+    let busy = 0;
+    let queued = 0;
+    let totalRequests = 0;
+
+    for (const [, entry] of this.lockedSessions) {
+      if (isPending(entry)) {
+        queued += entry.requestQueue.length;
+        continue;
+      }
+      const proc = entry;
+      if (proc.model === "opus") lockedOpus++;
+      else if (proc.model === "sonnet") lockedSonnet++;
+      if (proc.state === "busy" || proc.state === "recycling") busy++;
+      queued += proc.requestQueue.length;
+      totalRequests += proc.requestCount;
+    }
+
+    return {
+      total: this.totalProcessCount(),
+      locked: { total: lockedOpus + lockedSonnet, opus: lockedOpus, sonnet: lockedSonnet },
+      warm: { opus: this.warmPool.opus.length, sonnet: this.warmPool.sonnet.length },
+      busy,
+      queued,
+      orphansReclaimed: this.orphansReclaimed,
+      totalRequests,
+      processRecycles: this.processRecycles,
+      routeHits: { ...this.routeHits },
+    };
+  }
+}

From d55f0e54915e32112788d8ccd9a69c0c2a3930bf Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 18:43:42 -0400
Subject: [PATCH 06/27] =?UTF-8?q?chore:=20M4=20prototype=20cleanup=20?=
 =?UTF-8?q?=E2=80=94=20remove=20pool.ts,=20standalone-pool.ts,=20update=20?=
 =?UTF-8?q?CLAUDE.md?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove src/subprocess/pool.ts (shared stateless pool prototype — superseded by SessionPoolRouter)
- Remove src/server/standalone-pool.ts (prototype server entry point — superseded by production standalone.ts)
- Update CLAUDE.md with session-pooled architecture docs, request routing diagram, env var table

These were untracked prototype files. The production implementation is in router.ts (M1).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CLAUDE.md | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 38c76af..c30259a 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -52,10 +52,39 @@ launchctl list com.openclaw.claude-max-proxy
 
 ## Architecture
 
-- `src/types/claude-cli.ts` - Claude CLI JSON streaming types and type guards
-- `src/types/openai.ts` - OpenAI-compatible API types
+The proxy uses a session-aware process pool to eliminate per-request spawn overhead
+(3–10s) and prevent cross-agent context contamination.
+
+### Key files
+
+- `src/subprocess/router.ts` - **SessionPoolRouter**: session-key locking, per-model warm pools, orphan reclamation, context accumulation recycling, nightly sweep
+- `src/subprocess/manager.ts` - **ClaudeSubprocess**: single-request subprocess fallback (retained for headerless requests and non-pooled models)
+- `src/server/routes.ts` - Express route handlers; routes pool requests via `x-openclaw-session-key` header; falls back to ClaudeSubprocess when header is absent
+- `src/server/standalone.ts` - Server entry point; initializes pool, schedules 3 AM ET sweep via node-cron, handles graceful shutdown
 - `src/adapter/openai-to-cli.ts` - Converts OpenAI requests to CLI input
 - `src/adapter/cli-to-openai.ts` - Converts CLI output to OpenAI responses
-- `src/subprocess/manager.ts` - Spawns and manages Claude CLI subprocesses
-- `src/server/routes.ts` - Express route handlers (streaming + non-streaming)
-- `src/server/standalone.js` - Server entry point
+- `src/types/claude-cli.ts` - Claude CLI JSON streaming types and type guards
+- `src/types/openai.ts` - OpenAI-compatible API types
+
+### Request routing
+
+```
+POST /v1/chat/completions
+  x-openclaw-session-key present AND model is opus/sonnet
+    → SessionPoolRouter.execute() → locked warm process (33% faster)
+  header absent OR model is haiku
+    → ClaudeSubprocess (subprocess-per-request, original behavior)
+```
+
+### Pool env vars
+
+| Var | Default | Description |
+|-----|---------|-------------|
+| POOL_OPUS_SIZE | 6 | Warm opus processes |
+| POOL_SONNET_SIZE | 4 | Warm sonnet processes |
+| MAX_TOTAL_PROCESSES | 30 | Hard cap (locked + warm) |
+| POOL_MAX_REQUESTS_PER_PROCESS | 50 | Context accumulation threshold |
+| POOL_REQUEST_QUEUE_DEPTH | 3 | Per-process queue depth before 429 |
+| POOL_REQUEST_TIMEOUT_MS | 300000 | Per-request timeout (5 min) |
+| SWEEP_IDLE_THRESHOLD_MS | 7200000 | Idle time before sweep recycles (2 hr) |
+| SWEEP_HOUR | 3 | Hour in ET for nightly sweep |

From b26598d5bde17b9680f2588ae2772509e581f96a Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 18:49:49 -0400
Subject: [PATCH 07/27] =?UTF-8?q?feat:=20session=20pooling=20=E2=80=94=20l?=
 =?UTF-8?q?ocked=20process=20pool=20with=20per-session=20serialization?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/server/routes.ts     |  629 ++++++++++------
 src/server/standalone.ts |  200 +++---
 src/subprocess/router.ts | 1479 +++++++++++++++++++++++---------------
 3 files changed, 1422 insertions(+), 886 deletions(-)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index 2675184..d0cf271 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -1,34 +1,48 @@
 /**
  * API Route Handlers
  *
- * Implements OpenAI-compatible endpoints for Clawdbot integration
+ * Implements OpenAI-compatible endpoints for Clawdbot integration.
+ * Routes session-keyed requests through SessionPoolRouter; falls back to
+ * ClaudeSubprocess for headerless or non-pooled requests.
  */
 
 import type { Request, Response } from "express";
-import { EventEmitter } from "events";
 import { v4 as uuidv4 } from "uuid";
 import { ClaudeSubprocess } from "../subprocess/manager.js";
-import { SessionPoolRouter } from "../subprocess/router.js";
-import { openaiToCli, extractModel } from "../adapter/openai-to-cli.js";
+import {
+  SessionPoolRouter,
+  type ExecuteResult,
+} from "../subprocess/router.js";
+import { openaiToCli } from "../adapter/openai-to-cli.js";
 import {
   cliResultToOpenai,
   createDoneChunk,
 } from "../adapter/cli-to-openai.js";
-import type { OpenAIChatRequest, OpenAIToolCall } from "../types/openai.js";
-import type { ClaudeCliAssistant, ClaudeCliResult, ClaudeCliStreamEvent } from "../types/claude-cli.js";
+import type { OpenAIChatRequest } from "../types/openai.js";
+import type {
+  ClaudeCliAssistant,
+  ClaudeCliResult,
+  ClaudeCliStreamEvent,
+} from "../types/claude-cli.js";
+
+// ---------------------------------------------------------------------------
+// Module-level router reference (set by standalone.ts at startup)
+// ---------------------------------------------------------------------------
 
-/** Shared pool router — initialized by standalone.ts on startup */
 let poolRouter: SessionPoolRouter | null = null;
 
 export function setPoolRouter(router: SessionPoolRouter): void {
   poolRouter = router;
 }
 
-/**
- * Handle POST /v1/chat/completions
- *
- * Main endpoint for chat requests, supports both streaming and non-streaming
- */
+export function getPoolRouter(): SessionPoolRouter | null {
+  return poolRouter;
+}
+
+// ---------------------------------------------------------------------------
+// POST /v1/chat/completions
+// ---------------------------------------------------------------------------
+
 export async function handleChatCompletions(
   req: Request,
   res: Response
@@ -36,10 +50,15 @@ export async function handleChatCompletions(
   const requestId = uuidv4().replace(/-/g, "").slice(0, 24);
   const body = req.body as OpenAIChatRequest;
   const stream = body.stream === true;
+  const startTime = Date.now();
 
   try {
     // Validate request
-    if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
+    if (
+      !body.messages ||
+      !Array.isArray(body.messages) ||
+      body.messages.length === 0
+    ) {
       res.status(400).json({
         error: {
           message: "messages is required and must be a non-empty array",
@@ -50,37 +69,61 @@ export async function handleChatCompletions(
       return;
     }
 
-    // Convert to CLI input format
     const cliInput = openaiToCli(body);
-    const sessionKey = (req.headers["x-openclaw-session-key"] as string | undefined) || null;
-    const startMs = Date.now();
-
-    let emitter: EventEmitter;
-    let cacheHit: "locked" | "warm" | "cold" | "fallback" | "subprocess" = "subprocess";
-
-    if (poolRouter && sessionKey) {
-      // Pool routing
-      const modelAlias = extractModel(body.model || "opus");
-      emitter = poolRouter.execute(cliInput.prompt, modelAlias, sessionKey);
-      cacheHit = "locked"; // actual hit type tracked inside router stats
-      console.log(`[Route] pool sessionKey=${sessionKey} model=${modelAlias}`);
-    } else {
-      // Fallback: subprocess-per-request
-      const subprocess = new ClaudeSubprocess();
-      subprocess.start(cliInput.prompt, {
-        model: cliInput.model,
-        sessionId: cliInput.sessionId,
-      }).catch((err) => subprocess.emit("error", err));
-      emitter = subprocess;
-      console.log(`[Route] subprocess (no sessionKey or no pool) model=${cliInput.model}`);
+    const sessionKey = req.headers["x-openclaw-session-key"] as
+      | string
+      | undefined;
+
+    // --- Pool routing ---
+    if (sessionKey && poolRouter) {
+      const result = poolRouter.execute(
+        cliInput.prompt,
+        cliInput.model,
+        sessionKey
+      );
+
+      if (result) {
+        // Pooled route
+        const { emitter, routeType, pid, queueDepth } = result;
+
+        if (stream) {
+          await handlePooledStreaming(
+            req,
+            res,
+            emitter,
+            requestId,
+            startTime,
+            sessionKey,
+            cliInput.model,
+            routeType,
+            pid,
+            queueDepth
+          );
+        } else {
+          await handlePooledNonStreaming(
+            res,
+            emitter,
+            requestId,
+            startTime,
+            sessionKey,
+            cliInput.model,
+            routeType,
+            pid,
+            queueDepth
+          );
+        }
+        return;
+      }
+      // result === null → fall through to ClaudeSubprocess
     }
 
-    const pid = (emitter as any).pid ?? "n/a";
+    // --- Fallback: ClaudeSubprocess (no session key, unpooled model, or at capacity) ---
+    const subprocess = new ClaudeSubprocess();
 
     if (stream) {
-      await handleStreamingResponse(req, res, emitter, cliInput, requestId, sessionKey, pid, startMs);
+      await handleStreamingResponse(req, res, subprocess, cliInput, requestId);
     } else {
-      await handleNonStreamingResponse(res, emitter, cliInput, requestId);
+      await handleNonStreamingResponse(res, subprocess, cliInput, requestId);
     }
   } catch (error) {
     const message = error instanceof Error ? error.message : "Unknown error";
@@ -98,70 +141,43 @@ export async function handleChatCompletions(
   }
 }
 
-/**
- * Convert Claude tool_use ID to OpenAI-compatible call ID.
- * Claude uses "toolu_abc123", OpenAI uses "call_abc123".
- */
-function toOpenAICallId(claudeId: string): string {
-  return `call_${claudeId.replace("toolu_", "")}`;
-}
+// ---------------------------------------------------------------------------
+// Pooled streaming response
+// ---------------------------------------------------------------------------
 
-/**
- * Handle streaming response (SSE)
- *
- * IMPORTANT: The Express req.on("close") event fires when the request body
- * is fully received, NOT when the client disconnects. For SSE connections,
- * we use res.on("close") to detect actual client disconnection.
- */
-async function handleStreamingResponse(
-  req: Request,
+async function handlePooledStreaming(
+  _req: Request,
   res: Response,
-  emitter: EventEmitter,
-  cliInput: ReturnType<typeof openaiToCli>,
+  emitter: ExecuteResult["emitter"],
   requestId: string,
-  sessionKey: string | null = null,
-  processPid: string | number = "n/a",
-  startMs: number = Date.now()
+  startTime: number,
+  sessionKey: string,
+  model: string,
+  routeType: string,
+  pid: number | null,
+  queueDepth: number
 ): Promise<void> {
-  // Set SSE headers
   res.setHeader("Content-Type", "text/event-stream");
   res.setHeader("Cache-Control", "no-cache");
   res.setHeader("Connection", "keep-alive");
   res.setHeader("X-Request-Id", requestId);
-
-  // CRITICAL: Flush headers immediately to establish SSE connection
-  // Without this, headers are buffered and client times out waiting
   res.flushHeaders();
-
-  // Send initial comment to confirm connection is alive
   res.write(":ok\n\n");
 
-  return new Promise<void>((resolve, reject) => {
+  return new Promise<void>((resolve) => {
     let isFirst = true;
     let lastModel = "claude-sonnet-4";
     let isComplete = false;
     let hasEmittedText = false;
-    let toolCallIndex = 0;
-    let inToolBlock = false;
-    let clientDisconnected = false;
 
-    // Handle actual client disconnect (response stream closed)
+    // Client disconnect: detach emitter, let process finish, return to locked-idle
     res.on("close", () => {
-      clientDisconnected = true;
       if (!isComplete) {
-        if (sessionKey && poolRouter) {
-          // Pooled process: detach emitter — let process finish, return to idle
-          emitter.removeAllListeners();
-        } else if ((emitter as any).kill) {
-          // Subprocess fallback: kill it
-          (emitter as any).kill();
-        }
+        emitter.removeAllListeners();
       }
       resolve();
     });
 
-    // When a new text content block starts after we've already emitted text,
-    // insert a separator so text from different blocks doesn't run together
     emitter.on("text_block_start", () => {
       if (hasEmittedText && !res.writableEnded) {
         const sepChunk = {
@@ -169,19 +185,14 @@ async function handleStreamingResponse(
           object: "chat.completion.chunk",
           created: Math.floor(Date.now() / 1000),
           model: lastModel,
-          choices: [{
-            index: 0,
-            delta: {
-              content: "\n\n",
-            },
-            finish_reason: null,
-          }],
+          choices: [
+            { index: 0, delta: { content: "\n\n" }, finish_reason: null },
+          ],
         };
         res.write(`data: ${JSON.stringify(sepChunk)}\n\n`);
       }
     });
 
-    // Handle streaming content deltas
     emitter.on("content_delta", (event: ClaudeCliStreamEvent) => {
       const delta = event.event.delta;
       const text = (delta?.type === "text_delta" && delta.text) || "";
@@ -191,14 +202,16 @@ async function handleStreamingResponse(
           object: "chat.completion.chunk",
           created: Math.floor(Date.now() / 1000),
           model: lastModel,
-          choices: [{
-            index: 0,
-            delta: {
-              role: isFirst ? "assistant" : undefined,
-              content: text,
+          choices: [
+            {
+              index: 0,
+              delta: {
+                role: isFirst ? ("assistant" as const) : undefined,
+                content: text,
+              },
+              finish_reason: null,
             },
-            finish_reason: null,
-          }],
+          ],
         };
         res.write(`data: ${JSON.stringify(chunk)}\n\n`);
         isFirst = false;
@@ -206,94 +219,36 @@ async function handleStreamingResponse(
       }
     });
 
-    // DISABLED: Tool call forwarding causes an agentic loop — OpenClaw interprets
-    // Claude Code's internal tool_use (Read, Bash, etc.) as calls it needs to
-    // handle, triggering repeated requests. Claude Code handles tools internally
-    // via --print mode; only the final text result should be forwarded.
-    // TODO: Re-enable with a non-tool_calls display mechanism (e.g. inline text).
-    //
-    // subprocess.on("tool_use_start", (event: ClaudeCliStreamEvent) => {
-    //   if (res.writableEnded) return;
-    //   const block = event.event.content_block;
-    //   if (block?.type !== "tool_use") return;
-    //
-    //   inToolBlock = true;
-    //   const chunk = {
-    //     id: `chatcmpl-${requestId}`,
-    //     object: "chat.completion.chunk",
-    //     created: Math.floor(Date.now() / 1000),
-    //     model: lastModel,
-    //     choices: [{
-    //       index: 0,
-    //       delta: {
-    //         role: isFirst ? "assistant" : undefined,
-    //         tool_calls: [{
-    //           index: toolCallIndex,
-    //           id: toOpenAICallId(block.id),
-    //           type: "function" as const,
-    //           function: {
-    //             name: block.name,
-    //             arguments: "",
-    //           },
-    //         }],
-    //       },
-    //       finish_reason: null,
-    //     }],
-    //   };
-    //   res.write(`data: ${JSON.stringify(chunk)}\n\n`);
-    //   isFirst = false;
-    // });
-    //
-    // subprocess.on("input_json_delta", (event: ClaudeCliStreamEvent) => {
-    //   if (res.writableEnded) return;
-    //   const delta = event.event.delta;
-    //   if (delta?.type !== "input_json_delta") return;
-    //
-    //   const chunk = {
-    //     id: `chatcmpl-${requestId}`,
-    //     object: "chat.completion.chunk",
-    //     created: Math.floor(Date.now() / 1000),
-    //     model: lastModel,
-    //     choices: [{
-    //       index: 0,
-    //       delta: {
-    //         tool_calls: [{
-    //           index: toolCallIndex,
-    //           function: {
-    //             arguments: delta.partial_json,
-    //           },
-    //         }],
-    //       },
-    //       finish_reason: null,
-    //     }],
-    //   };
-    //   res.write(`data: ${JSON.stringify(chunk)}\n\n`);
-    // });
-    //
-    // subprocess.on("content_block_stop", () => {
-    //   if (inToolBlock) {
-    //     toolCallIndex++;
-    //     inToolBlock = false;
-    //   }
-    // });
-
-    // Handle final assistant message (for model name)
     emitter.on("assistant", (message: ClaudeCliAssistant) => {
-      lastModel = message.message?.model || lastModel;
+      lastModel = message.message.model;
     });
 
     emitter.on("result", (result: ClaudeCliResult) => {
       isComplete = true;
-      const latencyMs = Date.now() - startMs;
-      console.log(`[Route] result sessionKey=${sessionKey ?? "none"} model=${cliInput.model} pid=${processPid} latencyMs=${latencyMs}`);
-      if (!clientDisconnected && !res.writableEnded) {
+      const latencyMs = Date.now() - startTime;
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "request",
+          sessionKey,
+          model,
+          pid,
+          latencyMs,
+          queueDepth,
+          routeType,
+          requestCount: result.num_turns,
+        })
+      );
+
+      if (!res.writableEnded) {
         const doneChunk = createDoneChunk(requestId, lastModel);
         if (result.usage) {
           doneChunk.usage = {
             prompt_tokens: result.usage.input_tokens || 0,
             completion_tokens: result.usage.output_tokens || 0,
             total_tokens:
-              (result.usage.input_tokens || 0) + (result.usage.output_tokens || 0),
+              (result.usage.input_tokens || 0) +
+              (result.usage.output_tokens || 0),
           };
         }
         res.write(`data: ${JSON.stringify(doneChunk)}\n\n`);
@@ -304,11 +259,238 @@ async function handleStreamingResponse(
     });
 
     emitter.on("error", (error: Error) => {
+      isComplete = true;
+      const latencyMs = Date.now() - startTime;
+      const errWithStatus = error as Error & {
+        statusCode?: number;
+        retryAfter?: number;
+      };
+
+      console.error(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "request_error",
+          sessionKey,
+          model,
+          pid,
+          latencyMs,
+          routeType,
+          error: error.message,
+        })
+      );
+
+      if (!res.headersSent) {
+        const status = errWithStatus.statusCode || 500;
+        if (status === 429) {
+          res.setHeader("Retry-After", String(errWithStatus.retryAfter || 5));
+        }
+        res.status(status).json({
+          error: {
+            message: error.message,
+            type: status === 429 ? "rate_limit_error" : "server_error",
+            code: null,
+          },
+        });
+      } else if (!res.writableEnded) {
+        res.write(
+          `data: ${JSON.stringify({
+            error: {
+              message: error.message,
+              type: "server_error",
+              code: null,
+            },
+          })}\n\n`
+        );
+        res.end();
+      }
+      resolve();
+    });
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Pooled non-streaming response
+// ---------------------------------------------------------------------------
+
+async function handlePooledNonStreaming(
+  res: Response,
+  emitter: ExecuteResult["emitter"],
+  requestId: string,
+  startTime: number,
+  sessionKey: string,
+  model: string,
+  routeType: string,
+  pid: number | null,
+  queueDepth: number
+): Promise<void> {
+  return new Promise((resolve) => {
+    emitter.on("result", (result: ClaudeCliResult) => {
+      const latencyMs = Date.now() - startTime;
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "request",
+          sessionKey,
+          model,
+          pid,
+          latencyMs,
+          queueDepth,
+          routeType,
+          requestCount: result.num_turns,
+        })
+      );
+      res.json(cliResultToOpenai(result, requestId));
+      resolve();
+    });
+
+    emitter.on("error", (error: Error) => {
+      const latencyMs = Date.now() - startTime;
+      const errWithStatus = error as Error & {
+        statusCode?: number;
+        retryAfter?: number;
+      };
+
+      console.error(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "request_error",
+          sessionKey,
+          model,
+          pid,
+          latencyMs,
+          routeType,
+          error: error.message,
+        })
+      );
+
+      if (!res.headersSent) {
+        const status = errWithStatus.statusCode || 500;
+        if (status === 429) {
+          res.setHeader("Retry-After", String(errWithStatus.retryAfter || 5));
+        }
+        res.status(status).json({
+          error: {
+            message: error.message,
+            type: status === 429 ? "rate_limit_error" : "server_error",
+            code: null,
+          },
+        });
+      }
+      resolve();
+    });
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Fallback: ClaudeSubprocess streaming (existing behavior, unchanged)
+// ---------------------------------------------------------------------------
+
+async function handleStreamingResponse(
+  req: Request,
+  res: Response,
+  subprocess: ClaudeSubprocess,
+  cliInput: ReturnType<typeof openaiToCli>,
+  requestId: string
+): Promise<void> {
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+  res.setHeader("X-Request-Id", requestId);
+  res.flushHeaders();
+  res.write(":ok\n\n");
+
+  return new Promise<void>((resolve, reject) => {
+    let isFirst = true;
+    let lastModel = "claude-sonnet-4";
+    let isComplete = false;
+    let hasEmittedText = false;
+
+    res.on("close", () => {
+      if (!isComplete) {
+        subprocess.kill();
+      }
+      resolve();
+    });
+
+    subprocess.on("text_block_start", () => {
+      if (hasEmittedText && !res.writableEnded) {
+        const sepChunk = {
+          id: `chatcmpl-${requestId}`,
+          object: "chat.completion.chunk",
+          created: Math.floor(Date.now() / 1000),
+          model: lastModel,
+          choices: [
+            {
+              index: 0,
+              delta: { content: "\n\n" },
+              finish_reason: null,
+            },
+          ],
+        };
+        res.write(`data: ${JSON.stringify(sepChunk)}\n\n`);
+      }
+    });
+
+    subprocess.on("content_delta", (event: ClaudeCliStreamEvent) => {
+      const delta = event.event.delta;
+      const text = (delta?.type === "text_delta" && delta.text) || "";
+      if (text && !res.writableEnded) {
+        const chunk = {
+          id: `chatcmpl-${requestId}`,
+          object: "chat.completion.chunk",
+          created: Math.floor(Date.now() / 1000),
+          model: lastModel,
+          choices: [
+            {
+              index: 0,
+              delta: {
+                role: isFirst ? ("assistant" as const) : undefined,
+                content: text,
+              },
+              finish_reason: null,
+            },
+          ],
+        };
+        res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+        isFirst = false;
+        hasEmittedText = true;
+      }
+    });
+
+    subprocess.on("assistant", (message: ClaudeCliAssistant) => {
+      lastModel = message.message.model;
+    });
+
+    subprocess.on("result", (result: ClaudeCliResult) => {
+      isComplete = true;
+      if (!res.writableEnded) {
+        const doneChunk = createDoneChunk(requestId, lastModel);
+        if (result.usage) {
+          doneChunk.usage = {
+            prompt_tokens: result.usage.input_tokens || 0,
+            completion_tokens: result.usage.output_tokens || 0,
+            total_tokens:
+              (result.usage.input_tokens || 0) +
+              (result.usage.output_tokens || 0),
+          };
+        }
+        res.write(`data: ${JSON.stringify(doneChunk)}\n\n`);
+        res.write("data: [DONE]\n\n");
+        res.end();
+      }
+      resolve();
+    });
+
+    subprocess.on("error", (error: Error) => {
       console.error("[Streaming] Error:", error.message);
-      if (!clientDisconnected && !res.writableEnded) {
+      if (!res.writableEnded) {
         res.write(
           `data: ${JSON.stringify({
-            error: { message: error.message, type: "server_error", code: null },
+            error: {
+              message: error.message,
+              type: "server_error",
+              code: null,
+            },
           })}\n\n`
         );
         res.end();
@@ -316,52 +498,67 @@ async function handleStreamingResponse(
       resolve();
     });
 
-    emitter.on("close", (code: number | null) => {
-      if (!clientDisconnected && !res.writableEnded) {
+    subprocess.on("close", (code: number | null) => {
+      if (!res.writableEnded) {
         if (code !== 0 && !isComplete) {
-          res.write(`data: ${JSON.stringify({
-            error: { message: `Process exited with code ${code}`, type: "server_error", code: null },
-          })}\n\n`);
+          res.write(
+            `data: ${JSON.stringify({
+              error: {
+                message: `Process exited with code ${code}`,
+                type: "server_error",
+                code: null,
+              },
+            })}\n\n`
+          );
         }
         res.write("data: [DONE]\n\n");
         res.end();
       }
       resolve();
     });
+
+    subprocess
+      .start(cliInput.prompt, {
+        model: cliInput.model,
+        sessionId: cliInput.sessionId,
+      })
+      .catch((err) => {
+        console.error("[Streaming] Subprocess start error:", err);
+        reject(err);
+      });
   });
 }
 
-/**
- * Handle non-streaming response
- */
+// ---------------------------------------------------------------------------
+// Fallback: ClaudeSubprocess non-streaming (existing behavior, unchanged)
+// ---------------------------------------------------------------------------
+
 async function handleNonStreamingResponse(
   res: Response,
-  emitter: EventEmitter,
+  subprocess: ClaudeSubprocess,
   cliInput: ReturnType<typeof openaiToCli>,
   requestId: string
 ): Promise<void> {
   return new Promise((resolve) => {
     let finalResult: ClaudeCliResult | null = null;
 
-    emitter.on("result", (result: ClaudeCliResult) => {
+    subprocess.on("result", (result: ClaudeCliResult) => {
       finalResult = result;
     });
 
-    emitter.on("error", (error: Error) => {
+    subprocess.on("error", (error: Error) => {
       console.error("[NonStreaming] Error:", error.message);
-      if (!res.headersSent) {
-        res.status(500).json({
-          error: {
-            message: error.message,
-            type: "server_error",
-            code: null,
-          },
-        });
-      }
+      res.status(500).json({
+        error: {
+          message: error.message,
+          type: "server_error",
+          code: null,
+        },
+      });
       resolve();
     });
 
-    emitter.on("close", (code: number | null) => {
+    subprocess.on("close", (code: number | null) => {
       if (finalResult) {
         res.json(cliResultToOpenai(finalResult, requestId));
       } else if (!res.headersSent) {
@@ -375,14 +572,29 @@ async function handleNonStreamingResponse(
       }
       resolve();
     });
+
+    subprocess
+      .start(cliInput.prompt, {
+        model: cliInput.model,
+        sessionId: cliInput.sessionId,
+      })
+      .catch((error) => {
+        res.status(500).json({
+          error: {
+            message: error.message,
+            type: "server_error",
+            code: null,
+          },
+        });
+        resolve();
+      });
   });
 }
 
-/**
- * Handle GET /v1/models
- *
- * Returns available models
- */
+// ---------------------------------------------------------------------------
+// GET /v1/models
+// ---------------------------------------------------------------------------
+
 export function handleModels(_req: Request, res: Response): void {
   const now = Math.floor(Date.now() / 1000);
   const modelIds = [
@@ -405,19 +617,20 @@ export function handleModels(_req: Request, res: Response): void {
   });
 }
 
-/**
- * Handle GET /health
- *
- * Health check endpoint
- */
+// ---------------------------------------------------------------------------
+// GET /health — includes pool stats when available
+// ---------------------------------------------------------------------------
+
 export function handleHealth(_req: Request, res: Response): void {
-  const response: Record<string, unknown> = {
+  const base: Record<string, unknown> = {
     status: "ok",
     provider: "claude-code-cli",
     timestamp: new Date().toISOString(),
   };
+
   if (poolRouter) {
-    response.pool = poolRouter.stats();
+    base.pool = poolRouter.stats();
   }
-  res.json(response);
+
+  res.json(base);
 }
diff --git a/src/server/standalone.ts b/src/server/standalone.ts
index bede07c..291d0f5 100644
--- a/src/server/standalone.ts
+++ b/src/server/standalone.ts
@@ -1,42 +1,50 @@
 #!/usr/bin/env node
 /**
- * Standalone server — session-pooled Claude Max API proxy
+ * Standalone server with session-aware process pooling
+ *
+ * Initializes the SessionPoolRouter, schedules the nightly sweep,
+ * and owns the full graceful-shutdown sequence.
  *
  * Usage:
  *   npm run start
  *   node dist/server/standalone.js [port]
- *
- * Environment variables:
- *   POOL_OPUS_SIZE              Warm opus processes (default: 6)
- *   POOL_SONNET_SIZE            Warm sonnet processes (default: 4)
- *   POOL_MAX_REQUESTS_PER_PROCESS  Context accumulation threshold (default: 50)
- *   MAX_TOTAL_PROCESSES         Hard cap on locked + warm processes (default: 30)
- *   SWEEP_HOUR                  Hour in ET for nightly sweep (default: 3)
- *   SWEEP_IDLE_THRESHOLD_MS     Idle time before sweep recycles (default: 7200000)
- *   POOL_REQUEST_QUEUE_DEPTH    Per-process queue depth (default: 3)
- *   POOL_REQUEST_TIMEOUT_MS     Per-request timeout ms (default: 300000)
  */
 
 import cron from "node-cron";
 import { startServer } from "./index.js";
-import { verifyClaude, verifyAuth } from "../subprocess/manager.js";
-import { SessionPoolRouter } from "../subprocess/router.js";
 import { setPoolRouter } from "./routes.js";
-import type { RouterConfig } from "../subprocess/router.js";
+import { SessionPoolRouter } from "../subprocess/router.js";
+import { verifyClaude, verifyAuth } from "../subprocess/manager.js";
+import type { Server } from "http";
+
+// ---------------------------------------------------------------------------
+// Environment configuration
+// ---------------------------------------------------------------------------
 
 const DEFAULT_PORT = 3456;
 
-function parseEnvInt(name: string, defaultVal: number): number {
-  const v = parseInt(process.env[name] || "", 10);
-  return isNaN(v) ? defaultVal : v;
-}
+const env = {
+  port: parseInt(process.env.PORT || process.argv[2] || String(DEFAULT_PORT), 10),
+  opusSize: parseInt(process.env.POOL_OPUS_SIZE || "6", 10),
+  sonnetSize: parseInt(process.env.POOL_SONNET_SIZE || "4", 10),
+  maxRequestsPerProcess: parseInt(process.env.POOL_MAX_REQUESTS_PER_PROCESS || "50", 10),
+  maxTotalProcesses: parseInt(process.env.MAX_TOTAL_PROCESSES || "30", 10),
+  sweepHour: parseInt(process.env.SWEEP_HOUR || "3", 10),
+  sweepIdleThresholdMs: parseInt(process.env.SWEEP_IDLE_THRESHOLD_MS || "7200000", 10),
+  requestQueueDepth: parseInt(process.env.POOL_REQUEST_QUEUE_DEPTH || "3", 10),
+  requestTimeoutMs: parseInt(process.env.POOL_REQUEST_TIMEOUT_MS || "300000", 10),
+};
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
 
 async function main(): Promise<void> {
-  console.log("Claude Code CLI Provider — Session-Pooled Server");
-  console.log("=================================================\n");
+  console.log("Claude Code CLI Provider - Session Pool Server");
+  console.log("===============================================\n");
 
-  const port = parseInt(process.argv[2] || String(DEFAULT_PORT), 10);
-  if (isNaN(port) || port < 1 || port > 65535) {
+  // Validate port
+  if (isNaN(env.port) || env.port < 1 || env.port > 65535) {
     console.error(`Invalid port: ${process.argv[2]}`);
     process.exit(1);
   }
@@ -50,89 +58,113 @@ async function main(): Promise<void> {
   }
   console.log(`  Claude CLI: ${cliCheck.version || "OK"}`);
 
-  // Auth check (warn, don't exit — M1 invariant: server starts even if auth fails at startup)
+  // Verify authentication
   console.log("Checking authentication...");
   const authCheck = await verifyAuth();
   if (!authCheck.ok) {
-    console.warn(`  Warning: ${authCheck.error}`);
-    console.warn("  Run: claude auth login");
-    console.warn("  Server will start but requests will return 401 until authenticated.\n");
-  } else {
-    console.log("  Authentication: OK\n");
+    console.error(`Error: ${authCheck.error}`);
+    console.error("Please run: claude auth login");
+    process.exit(1);
   }
+  console.log("  Authentication: OK\n");
 
-  // Pool configuration
-  const routerConfig: RouterConfig = {
-    opusSize: parseEnvInt("POOL_OPUS_SIZE", 6),
-    sonnetSize: parseEnvInt("POOL_SONNET_SIZE", 4),
-    maxRequestsPerProcess: parseEnvInt("POOL_MAX_REQUESTS_PER_PROCESS", 50),
-    maxTotalProcesses: parseEnvInt("MAX_TOTAL_PROCESSES", 30),
-    sweepIdleThresholdMs: parseEnvInt("SWEEP_IDLE_THRESHOLD_MS", 7_200_000),
-    requestQueueDepth: parseEnvInt("POOL_REQUEST_QUEUE_DEPTH", 3),
-    requestTimeoutMs: parseEnvInt("POOL_REQUEST_TIMEOUT_MS", 300_000),
-  };
-
+  // --- Initialize SessionPoolRouter ---
   console.log("Pool configuration:");
-  console.log(`  POOL_OPUS_SIZE=${routerConfig.opusSize} POOL_SONNET_SIZE=${routerConfig.sonnetSize}`);
-  console.log(`  MAX_TOTAL_PROCESSES=${routerConfig.maxTotalProcesses} POOL_MAX_REQUESTS_PER_PROCESS=${routerConfig.maxRequestsPerProcess}`);
-  console.log(`  POOL_REQUEST_TIMEOUT_MS=${routerConfig.requestTimeoutMs} POOL_REQUEST_QUEUE_DEPTH=${routerConfig.requestQueueDepth}`);
-  console.log(`  SWEEP_IDLE_THRESHOLD_MS=${routerConfig.sweepIdleThresholdMs}\n`);
-
-  // Initialize session pool router
-  const router = new SessionPoolRouter(routerConfig);
+  console.log(`  Opus pool size:       ${env.opusSize}`);
+  console.log(`  Sonnet pool size:     ${env.sonnetSize}`);
+  console.log(`  Max total processes:  ${env.maxTotalProcesses}`);
+  console.log(`  Max requests/process: ${env.maxRequestsPerProcess}`);
+  console.log(`  Request queue depth:  ${env.requestQueueDepth}`);
+  console.log(`  Request timeout:      ${env.requestTimeoutMs}ms`);
+  console.log(`  Sweep hour (ET):      ${env.sweepHour}:00`);
+  console.log(`  Sweep idle threshold: ${env.sweepIdleThresholdMs}ms\n`);
+
+  const router = new SessionPoolRouter({
+    opusSize: env.opusSize,
+    sonnetSize: env.sonnetSize,
+    maxRequestsPerProcess: env.maxRequestsPerProcess,
+    maxTotalProcesses: env.maxTotalProcesses,
+    requestQueueDepth: env.requestQueueDepth,
+    requestTimeoutMs: env.requestTimeoutMs,
+    sweepIdleThresholdMs: env.sweepIdleThresholdMs,
+  });
+
+  // Register router with routes module
   setPoolRouter(router);
-  await router.initialize();
 
-  const initialStats = router.stats();
-  console.log(`Pool initialized: warm.opus=${initialStats.warm.opus} warm.sonnet=${initialStats.warm.sonnet} total=${initialStats.total}\n`);
-
-  // Schedule nightly sweep at 3 AM ET (DST-aware via node-cron timezone)
-  const sweepHour = parseEnvInt("SWEEP_HOUR", 3);
-  cron.schedule(`0 ${sweepHour} * * *`, () => {
-    console.log(`[Cron] Running nightly sweep at ${sweepHour}:00 ET`);
-    router.sweep();
-  }, { timezone: "America/New_York" });
-  console.log(`Nightly sweep scheduled at ${sweepHour}:00 AM ET (DST-aware)\n`);
+  // Initialize warm pools
+  await router.initialize();
 
-  // Start HTTP server
-  let httpServer: { close: (cb?: () => void) => void } | null = null;
+  // --- Start HTTP server ---
+  let server: Server;
   try {
-    const result = await startServer({ port });
-    httpServer = (result as any)?.server || null;
-    console.log("\nServer ready. Test with:");
-    console.log(`  curl -X POST http://localhost:${port}/v1/chat/completions \\`);
-    console.log(`    -H "Content-Type: application/json" \\`);
-    console.log(`    -H "x-openclaw-session-key: agent:test:discord:channel:123" \\`);
-    console.log(`    -d '{"model": "claude-sonnet-4", "messages": [{"role": "user", "content": "Hello!"}]}'`);
-    console.log("\nPress Ctrl+C to stop.\n");
+    server = await startServer({ port: env.port });
   } catch (err) {
     console.error("Failed to start server:", err);
     await router.shutdown();
     process.exit(1);
   }
 
-  // Graceful shutdown — per spec Finding N18:
-  // 1. Close listening socket immediately (no new connections)
-  // 2. Wait 30s for in-flight requests
-  // 3. Call router.shutdown()
-  // 4. Exit
-  const shutdown = async (signal: string) => {
-    console.log(`\nReceived ${signal}. Shutting down...`);
-
-    // Step 1: Close listening socket immediately
-    if (httpServer) {
-      httpServer.close(() => {
-        console.log("  HTTP server closed (no new connections accepted)");
+  console.log(`\n[Server] Pool stats: ${JSON.stringify(router.stats())}`);
+  console.log("\nServer ready. Test with:");
+  console.log(
+    `  curl -X POST http://localhost:${env.port}/v1/chat/completions \\`
+  );
+  console.log(`    -H "Content-Type: application/json" \\`);
+  console.log(
+    `    -d '{"model": "claude-sonnet-4", "messages": [{"role": "user", "content": "Hello!"}]}'`
+  );
+  console.log("\nPress Ctrl+C to stop.\n");
+
+  // --- Schedule nightly sweep ---
+  const sweepJob = cron.schedule(
+    `0 ${env.sweepHour} * * *`,
+    () => {
+      console.log("[Sweep] Nightly sweep triggered");
+      router.sweep().catch((err: unknown) => {
+        console.error("[Sweep] Error:", err);
       });
-    }
+    },
+    { timezone: "America/New_York" }
+  );
+
+  // --- Graceful shutdown ---
+  let shutdownInProgress = false;
 
-    // Step 2: Wait up to 30s for in-flight requests
-    await new Promise<void>((resolve) => setTimeout(resolve, 30_000));
+  const shutdown = async (signal: string) => {
+    if (shutdownInProgress) return;
+    shutdownInProgress = true;
+
+    console.log(`\n[Shutdown] ${signal} received — starting graceful shutdown`);
+
+    // 1. Stop the cron job
+    sweepJob.stop();
+
+    // 2. Close listening socket FIRST (stop new connections)
+    console.log("[Shutdown] Closing listening socket...");
+    server.close();
+
+    // 3. Wait for in-flight requests (30s timeout)
+    console.log("[Shutdown] Waiting up to 30s for in-flight requests...");
+    await new Promise<void>((resolve) => {
+      const timeout = setTimeout(() => {
+        console.log("[Shutdown] 30s timeout reached — forcing shutdown");
+        resolve();
+      }, 30000);
+
+      // Check if all connections are done
+      server.on("close", () => {
+        clearTimeout(timeout);
+        resolve();
+      });
+    });
 
-    // Step 3: Shut down pool — rejects all queued requests, kills processes
+    // 4. Shutdown the pool router (rejects queued, kills processes)
+    console.log("[Shutdown] Shutting down pool router...");
     await router.shutdown();
 
-    // Step 4: Exit
+    // 5. Exit
+    console.log("[Shutdown] Complete.");
     process.exit(0);
   };
 
diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 6bb20cc..2be90f3 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -1,16 +1,34 @@
 /**
- * SessionPoolRouter — Session-aware Claude CLI process pool
+ * Session Pool Router
  *
- * Locks warm CLI processes to OpenClaw session keys, preventing cross-agent
- * context contamination and eliminating per-request spawn overhead.
+ * Maintains per-model warm pools of persistent Claude CLI processes and locks
+ * them to OpenClaw session keys.  Each session key gets a dedicated process —
+ * no cross-session contamination, no concurrent stdin writes.
+ *
+ * See specs/session-pooling.spec.md (Rev 5) for the full design.
  */
 
 import { spawn, ChildProcess } from "child_process";
 import { EventEmitter } from "events";
+import type {
+  ClaudeCliMessage,
+  ClaudeCliStreamEvent,
+} from "../types/claude-cli.js";
+import {
+  isAssistantMessage,
+  isResultMessage,
+  isContentDelta,
+  isTextBlockStart,
+  isToolUseBlockStart,
+  isInputJsonDelta,
+  isContentBlockStop,
+  isSystemInit,
+} from "../types/claude-cli.js";
 import type { ClaudeModel } from "../adapter/openai-to-cli.js";
-import { ClaudeSubprocess } from "./manager.js";
 
-// ─── Constants ────────────────────────────────────────────────────────────────
+// ---------------------------------------------------------------------------
+// Tool mapping prompt (shared with manager.ts / pool.ts)
+// ---------------------------------------------------------------------------
 
 const OPENCLAW_TOOL_MAPPING_PROMPT = [
   "## Tool Name Mapping",
@@ -51,38 +69,19 @@ const OPENCLAW_TOOL_MAPPING_PROMPT = [
   "Run `openclaw skills list --eligible --json` to see all available skills.",
 ].join("\n");
 
-/** Models that have dedicated warm pools */
-const POOLED_MODELS = new Set<ClaudeModel>(["opus", "sonnet"]);
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
 
-// ─── Types ────────────────────────────────────────────────────────────────────
+/** Models that have dedicated warm pools. */
+export const POOLED_MODELS = new Set<string>(["opus", "sonnet"]);
 
-export interface RouterConfig {
-  opusSize: number;
-  sonnetSize: number;
-  maxRequestsPerProcess: number;
-  maxTotalProcesses: number;
-  sweepIdleThresholdMs: number;
-  requestQueueDepth: number;
-  requestTimeoutMs: number;
-}
+export type PooledModel = "opus" | "sonnet";
 
-interface PendingRequest {
-  prompt: string;
-  model: ClaudeModel;
-  emitter: EventEmitter;
-  timeoutHandle: NodeJS.Timeout;
-}
-
-/** Sentinel placed synchronously in lockedSessions while a real process is being claimed/spawned */
-interface PendingSentinel {
-  isPending: true;
-  requestQueue: PendingRequest[];
-}
-
-interface PooledProcess {
-  pid: number;
+export interface PooledProcess {
+  id: number;
   process: ChildProcess;
-  model: ClaudeModel;
+  model: PooledModel;
   lockedTo: string | null;
   agentChannel: string | null;
   lastRequestAt: number;
@@ -92,748 +91,1040 @@ interface PooledProcess {
   requestQueue: PendingRequest[];
   buffer: string;
   currentEmitter: EventEmitter | null;
-  timeoutHandle: NodeJS.Timeout | null;
+  ready: boolean;
+  requestTimeoutTimer: NodeJS.Timeout | null;
+  orphaned: boolean;
+}
+
+export interface PendingRequest {
+  prompt: string;
+  emitter: EventEmitter;
+  resolve: () => void;
 }
 
-type LockEntry = PooledProcess | PendingSentinel;
+export interface PendingSentinel {
+  isPending: true;
+  requestQueue: PendingRequest[];
+}
 
-function isPending(entry: LockEntry): entry is PendingSentinel {
-  return (entry as PendingSentinel).isPending === true;
+function isPendingSentinel(
+  v: PooledProcess | PendingSentinel
+): v is PendingSentinel {
+  return (v as PendingSentinel).isPending === true;
 }
 
-// ─── Stats ────────────────────────────────────────────────────────────────────
+export interface PoolRouterConfig {
+  opusSize: number;
+  sonnetSize: number;
+  maxRequestsPerProcess: number;
+  maxTotalProcesses: number;
+  requestQueueDepth: number;
+  requestTimeoutMs: number;
+  sweepIdleThresholdMs: number;
+}
 
-interface RouterStats {
+export interface PoolStats {
   total: number;
   locked: { total: number; opus: number; sonnet: number };
   warm: { opus: number; sonnet: number };
   busy: number;
   queued: number;
+  maxTotal: number;
   orphansReclaimed: number;
   totalRequests: number;
   processRecycles: number;
+  requestTimeouts: number;
   routeHits: { locked: number; warm: number; cold: number; fallback: number };
+  uptime: number;
 }
 
-// ─── Helpers ──────────────────────────────────────────────────────────────────
-
-/**
- * Extract agentChannel from a session key.
- * "agent:scope:discord:channel:123" → "scope:discord:channel:123"
- * Falls back to the full key if format is unexpected.
- */
-function extractAgentChannel(sessionKey: string): string {
-  const match = sessionKey.match(/^agent:(.+)$/);
-  return match ? match[1] : sessionKey;
-}
-
-function rejectPending(req: PendingRequest, status: number, retryAfter: number): void {
-  clearTimeout(req.timeoutHandle);
-  req.emitter.emit("pool_error", { status, retryAfter, message: `HTTP ${status}` });
-  req.emitter.emit("error", Object.assign(new Error(`HTTP ${status}`), { poolStatus: status, retryAfter }));
-}
-
-// ─── Auth error detection ─────────────────────────────────────────────────────
-
-const AUTH_ERROR_PATTERNS = /auth|unauthorized|token expired|invalid_token/i;
-
-function isAuthError(text: string): boolean {
-  return AUTH_ERROR_PATTERNS.test(text);
+export interface ExecuteResult {
+  emitter: EventEmitter;
+  routeType: "locked" | "warm" | "cold" | "fallback";
+  pid: number | null;
+  queueDepth: number;
 }
 
-// ─── SessionPoolRouter ────────────────────────────────────────────────────────
+// ---------------------------------------------------------------------------
+// SessionPoolRouter
+// ---------------------------------------------------------------------------
 
 export class SessionPoolRouter {
-  private config: RouterConfig;
-  private lockedSessions: Map<string, LockEntry> = new Map();
-  private warmPool: { opus: PooledProcess[]; sonnet: PooledProcess[] } = { opus: [], sonnet: [] };
+  private config: PoolRouterConfig;
+  private lockedSessions = new Map<string, PooledProcess | PendingSentinel>();
+  private warmPool = new Map<PooledModel, PooledProcess[]>();
+  private allProcesses = new Map<number, PooledProcess>();
+  private nextId = 0;
   private shuttingDown = false;
+  private startedAt = Date.now();
 
-  // Stats counters
+  // Counters for stats
   private orphansReclaimed = 0;
+  private totalRequests = 0;
   private processRecycles = 0;
+  private requestTimeouts = 0;
   private routeHits = { locked: 0, warm: 0, cold: 0, fallback: 0 };
 
-  constructor(config: RouterConfig) {
-    this.config = config;
+  constructor(config: Partial<PoolRouterConfig> = {}) {
+    this.config = {
+      opusSize: config.opusSize ?? 6,
+      sonnetSize: config.sonnetSize ?? 4,
+      maxRequestsPerProcess: config.maxRequestsPerProcess ?? 50,
+      maxTotalProcesses: config.maxTotalProcesses ?? 30,
+      requestQueueDepth: config.requestQueueDepth ?? 3,
+      requestTimeoutMs: config.requestTimeoutMs ?? 300000,
+      sweepIdleThresholdMs: config.sweepIdleThresholdMs ?? 7200000,
+    };
+    this.warmPool.set("opus", []);
+    this.warmPool.set("sonnet", []);
   }
 
-  /** Initialize warm pool on startup */
+  // -------------------------------------------------------------------------
+  // Initialization
+  // -------------------------------------------------------------------------
+
   async initialize(): Promise<void> {
-    console.log("[Router] Initializing session pool...");
-    const spawns: Promise<void>[] = [];
+    console.log(
+      `[Router] Initializing — opus: ${this.config.opusSize}, sonnet: ${this.config.sonnetSize}`
+    );
+    const promises: Promise<void>[] = [];
     for (let i = 0; i < this.config.opusSize; i++) {
-      spawns.push(this.spawnWarm("opus"));
+      promises.push(this.spawnWarm("opus"));
     }
     for (let i = 0; i < this.config.sonnetSize; i++) {
-      spawns.push(this.spawnWarm("sonnet"));
+      promises.push(this.spawnWarm("sonnet"));
     }
-    await Promise.all(spawns);
-    const s = this.stats();
-    console.log(`[Router] Ready. warm.opus=${s.warm.opus} warm.sonnet=${s.warm.sonnet}`);
+    await Promise.all(promises);
+    console.log(
+      `[Router] Ready — ${this.allProcesses.size} warm processes spawned`
+    );
   }
 
-  /**
-   * Execute a prompt for a given model and session key.
-   * Returns an EventEmitter that emits the same events as ClaudeSubprocess.
-   */
-  execute(prompt: string, model: ClaudeModel, sessionKey: string | null): EventEmitter {
-    // No session key or non-pooled model → fallback to ClaudeSubprocess
-    if (!sessionKey || !POOLED_MODELS.has(model)) {
-      if (!POOLED_MODELS.has(model)) {
-        console.log(`[Router] Non-pooled model "${model}" — falling back to ClaudeSubprocess`);
-      }
-      this.routeHits.fallback++;
-      return this.fallbackSubprocess(prompt, model);
+  // -------------------------------------------------------------------------
+  // Execute — main entry point
+  // -------------------------------------------------------------------------
+
+  execute(
+    prompt: string,
+    model: ClaudeModel,
+    sessionKey: string
+  ): ExecuteResult | null {
+    if (this.shuttingDown) {
+      const emitter = new EventEmitter();
+      process.nextTick(() =>
+        emitter.emit("error", new Error("Server is shutting down"))
+      );
+      return { emitter, routeType: "fallback", pid: null, queueDepth: 0 };
     }
 
-    // Pool saturated check
-    if (this.totalProcessCount() >= this.config.maxTotalProcesses) {
-      console.warn(`[Router] Pool saturated (total=${this.totalProcessCount()} >= max=${this.config.maxTotalProcesses}) — falling back for sessionKey=${sessionKey}`);
+    // Only pool opus and sonnet
+    if (!POOLED_MODELS.has(model)) {
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "fallback_unpooled_model",
+          model,
+          sessionKey,
+          totalProcesses: this.allProcesses.size,
+        })
+      );
       this.routeHits.fallback++;
-      return this.fallbackSubprocess(prompt, model);
+      return null; // caller uses ClaudeSubprocess
     }
 
-    const emitter = new EventEmitter();
-    this.routeRequest(emitter, prompt, model, sessionKey);
-    return emitter;
-  }
+    const pooledModel = model as PooledModel;
 
-  private routeRequest(
-    emitter: EventEmitter,
-    prompt: string,
-    model: ClaudeModel,
-    sessionKey: string,
-  ): void {
+    // --- Lineage-based orphan reclamation ---
+    const agentChannel = this.extractAgentChannel(sessionKey);
+    this.reclaimOrphans(sessionKey, agentChannel);
+
+    // --- Check lockedSessions ---
     const existing = this.lockedSessions.get(sessionKey);
 
-    if (existing !== undefined) {
-      if (isPending(existing)) {
-        // Sentinel in place — queue behind it
-        this.enqueueOnSentinel(existing, prompt, model, emitter);
-        return;
+    if (existing) {
+      if (isPendingSentinel(existing)) {
+        return this.enqueueOnSentinel(existing, prompt, sessionKey);
       }
 
       const proc = existing;
-
-      // Orphan reclamation: check if a different session for the same agent+channel exists
-      this.reclaimOrphan(sessionKey, model);
-
-      // Route to locked process
       if (proc.state === "idle") {
         this.routeHits.locked++;
-        this.assignToProcess(proc, prompt, emitter);
-        return;
-      }
-
-      // Busy or recycling — enqueue on per-process queue
-      if (proc.requestQueue.length >= this.config.requestQueueDepth) {
-        // 429 backpressure
-        setImmediate(() => {
-          emitter.emit("pool_error", { status: 429, retryAfter: 5 });
-          emitter.emit("error", Object.assign(new Error("HTTP 429 Too Many Requests"), { poolStatus: 429, retryAfter: 5 }));
-        });
-        return;
+        this.totalRequests++;
+        return this.routeToProcess(proc, prompt, "locked");
+      } else {
+        return this.enqueueOnProcess(proc, prompt, sessionKey);
       }
-
-      const pending: PendingRequest = {
-        prompt,
-        model,
-        emitter,
-        timeoutHandle: setTimeout(() => {
-          const idx = proc.requestQueue.indexOf(pending);
-          if (idx >= 0) proc.requestQueue.splice(idx, 1);
-          emitter.emit("error", Object.assign(new Error("Request queue timeout"), { poolStatus: 503, retryAfter: 3 }));
-        }, this.config.requestTimeoutMs),
-      };
-      proc.requestQueue.push(pending);
-      this.routeHits.locked++;
-      return;
     }
 
-    // New session key — set sentinel synchronously before any async work
+    // --- New session key: claim a process ---
+    // Set PENDING_SENTINEL synchronously BEFORE any async work
     const sentinel: PendingSentinel = { isPending: true, requestQueue: [] };
     this.lockedSessions.set(sessionKey, sentinel);
 
-    this.claimProcess(sessionKey, model, sentinel).then((proc) => {
-      if (!proc) return; // sentinel already cleaned up (spawn failed)
-      this.routeHits[proc.requestCount === 0 ? "warm" : "cold"]++;
-      // Assign the triggering request
-      this.assignToProcess(proc, prompt, emitter);
-      // Drain any requests that queued against the sentinel
-      this.drainSentinelQueue(proc, sentinel);
-    }).catch(() => {
-      // claimProcess already cleaned up sentinel and rejected queue
-    });
-  }
-
-  /** Claim a process from warm pool or spawn cold */
-  private async claimProcess(
-    sessionKey: string,
-    model: ClaudeModel,
-    sentinel: PendingSentinel,
-  ): Promise<PooledProcess | null> {
-    try {
-      let proc: PooledProcess;
-      const pool = this.warmPool[model as "opus" | "sonnet"];
-
-      if (pool.length > 0) {
-        proc = pool.pop()!;
-        this.routeHits.warm++;
-      } else {
-        this.routeHits.cold++;
-        proc = await this.spawnCold(model);
-      }
-
-      proc.lockedTo = sessionKey;
-      proc.agentChannel = extractAgentChannel(sessionKey);
+    const warm = this.warmPool.get(pooledModel)!;
+    if (warm.length > 0) {
+      // Claim from warm pool (synchronous — no race)
+      const proc = warm.pop()!;
+      this.lockProcess(proc, sessionKey, agentChannel);
+      this.transferSentinelQueue(sentinel, proc);
       this.lockedSessions.set(sessionKey, proc);
-      return proc;
-    } catch (err) {
-      // Failed spawn — reject all queued requests then clean up sentinel
-      for (const req of sentinel.requestQueue) {
-        rejectPending(req, 503, 3);
-      }
+      this.routeHits.warm++;
+      this.totalRequests++;
+      return this.routeToProcess(proc, prompt, "warm");
+    }
+
+    // Warm pool empty — need cold spawn
+    if (this.allProcesses.size >= this.config.maxTotalProcesses) {
+      this.rejectSentinelQueue(sentinel, 503, "Pool at capacity");
       this.lockedSessions.delete(sessionKey);
-      console.error(`[Router] Cold spawn failed for sessionKey=${sessionKey} model=${model}:`, err);
-      throw err;
+      this.routeHits.fallback++;
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "fallback_at_capacity",
+          sessionKey,
+          model,
+          totalProcesses: this.allProcesses.size,
+          maxTotal: this.config.maxTotalProcesses,
+        })
+      );
+      return null; // caller uses ClaudeSubprocess
     }
-  }
 
-  private enqueueOnSentinel(sentinel: PendingSentinel, prompt: string, model: ClaudeModel, emitter: EventEmitter): void {
-    const pending: PendingRequest = {
-      prompt,
-      model,
+    // Cold spawn (async)
+    const emitter = new EventEmitter();
+    this.totalRequests++;
+    this.routeHits.cold++;
+
+    this.spawnCold(pooledModel)
+      .then((proc) => {
+        this.lockProcess(proc, sessionKey, agentChannel);
+        this.transferSentinelQueue(sentinel, proc);
+        this.lockedSessions.set(sessionKey, proc);
+        this.assignToProcess(proc, prompt, emitter);
+      })
+      .catch((err) => {
+        console.log(
+          JSON.stringify({
+            ts: new Date().toISOString(),
+            event: "cold_spawn_failed",
+            sessionKey,
+            model,
+            error: String(err),
+          })
+        );
+        this.rejectSentinelQueue(sentinel, 503, "Cold spawn failed");
+        this.lockedSessions.delete(sessionKey);
+        emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
+      });
+
+    return {
       emitter,
-      timeoutHandle: setTimeout(() => {
-        const idx = sentinel.requestQueue.indexOf(pending);
-        if (idx >= 0) sentinel.requestQueue.splice(idx, 1);
-        emitter.emit("error", Object.assign(new Error("Request sentinel timeout"), { poolStatus: 503, retryAfter: 3 }));
-      }, this.config.requestTimeoutMs),
+      routeType: "cold",
+      pid: null,
+      queueDepth: sentinel.requestQueue.length,
     };
-    sentinel.requestQueue.push(pending);
   }
 
-  private drainSentinelQueue(proc: PooledProcess, sentinel: PendingSentinel): void {
-    for (const req of sentinel.requestQueue) {
-      clearTimeout(req.timeoutHandle);
-      if (proc.state === "idle") {
-        this.assignToProcess(proc, req.prompt, req.emitter);
+  // -------------------------------------------------------------------------
+  // Spawn helpers
+  // -------------------------------------------------------------------------
+
+  private async spawnWarm(model: PooledModel): Promise<void> {
+    const proc = this.spawnProcess(model);
+    this.warmPool.get(model)!.push(proc);
+  }
+
+  private async spawnCold(model: PooledModel): Promise<PooledProcess> {
+    const proc = this.spawnProcess(model);
+    await new Promise<void>((resolve) => {
+      if (proc.ready) {
+        resolve();
       } else {
-        if (proc.requestQueue.length < this.config.requestQueueDepth) {
-          const re: PendingRequest = {
-            prompt: req.prompt,
-            model: req.model,
-            emitter: req.emitter,
-            timeoutHandle: setTimeout(() => {
-              const idx = proc.requestQueue.indexOf(re);
-              if (idx >= 0) proc.requestQueue.splice(idx, 1);
-              req.emitter.emit("error", Object.assign(new Error("Queue timeout after sentinel drain"), { poolStatus: 503, retryAfter: 3 }));
-            }, this.config.requestTimeoutMs),
-          };
-          proc.requestQueue.push(re);
-        } else {
-          rejectPending(req, 503, 3);
-        }
+        const timer = setTimeout(() => resolve(), 500);
+        const onReady = () => {
+          clearTimeout(timer);
+          resolve();
+        };
+        proc.process.stdout?.once("data", onReady);
+        proc.process.once("error", () => {
+          clearTimeout(timer);
+          resolve();
+        });
       }
-    }
+    });
+    return proc;
   }
 
-  /** Orphan reclamation: if a different session key has the same agentChannel, reclaim it */
-  private reclaimOrphan(newSessionKey: string, _model: ClaudeModel): void {
-    const newChannel = extractAgentChannel(newSessionKey);
-    for (const [key, entry] of this.lockedSessions) {
-      if (key === newSessionKey || isPending(entry)) continue;
-      const proc = entry;
-      if (proc.agentChannel === newChannel && proc.lockedTo !== newSessionKey) {
-        console.log(`[Router] Orphan reclaimed: old key=${key} new key=${newSessionKey}`);
-        this.orphansReclaimed++;
+  private spawnProcess(model: PooledModel): PooledProcess {
+    const id = this.nextId++;
+    const args = [
+      "--print",
+      "--input-format",
+      "stream-json",
+      "--output-format",
+      "stream-json",
+      "--verbose",
+      "--include-partial-messages",
+      "--dangerously-skip-permissions",
+      "--no-session-persistence",
+      "--model",
+      model,
+      "--append-system-prompt",
+      OPENCLAW_TOOL_MAPPING_PROMPT,
+    ];
+
+    const child = spawn(process.env.CLAUDE_BIN || "claude", args, {
+      cwd: process.env.HOME || "/tmp",
+      env: Object.fromEntries(
+        Object.entries(process.env).filter(([k]) => k !== "CLAUDECODE")
+      ),
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    const pooled: PooledProcess = {
+      id,
+      process: child,
+      model,
+      lockedTo: null,
+      agentChannel: null,
+      lastRequestAt: 0,
+      spawnedAt: Date.now(),
+      requestCount: 0,
+      state: "idle",
+      requestQueue: [],
+      buffer: "",
+      currentEmitter: null,
+      ready: false,
+      requestTimeoutTimer: null,
+      orphaned: false,
+    };
+
+    this.allProcesses.set(id, pooled);
+
+    child.stdout?.on("data", (chunk: Buffer) => {
+      pooled.buffer += chunk.toString();
+      this.processBuffer(pooled);
+    });
+
+    child.stderr?.on("data", (chunk: Buffer) => {
+      const text = chunk.toString().trim();
+      if (process.env.DEBUG_SUBPROCESS) {
+        console.error(`[Router:${id}] stderr:`, text.slice(0, 200));
+      }
+      if (text.match(/\b(auth|unauthorized|token expired|forbidden)\b/i)) {
+        console.log(
+          JSON.stringify({
+            ts: new Date().toISOString(),
+            event: "auth_error",
+            pid: child.pid,
+            processId: id,
+            model,
+            stderr: text.slice(0, 200),
+          })
+        );
+        child.kill("SIGTERM");
+      }
+    });
+
+    child.on("close", (code) => {
+      this.handleProcessDeath(pooled, code);
+    });
+
+    child.on("error", (err) => {
+      console.error(`[Router:${id}] Process error:`, err.message);
+      if (pooled.currentEmitter) {
+        pooled.currentEmitter.emit("error", err);
+        pooled.currentEmitter = null;
+      }
+    });
+
+    pooled.ready = true;
+
+    console.log(
+      JSON.stringify({
+        ts: new Date().toISOString(),
+        event: "process_spawned",
+        processId: id,
+        pid: child.pid,
+        model,
+      })
+    );
 
-        // Reject all queued requests on the orphaned process
-        for (const req of proc.requestQueue) {
-          rejectPending(req, 503, 3);
+    return pooled;
+  }
+
+  // -------------------------------------------------------------------------
+  // Buffer processing
+  // -------------------------------------------------------------------------
+
+  private processBuffer(pooled: PooledProcess): void {
+    const lines = pooled.buffer.split("\n");
+    pooled.buffer = lines.pop() || "";
+
+    for (const line of lines) {
+      const trimmed = line.trim();
+      if (!trimmed) continue;
+
+      try {
+        const message: ClaudeCliMessage = JSON.parse(trimmed);
+
+        if (isSystemInit(message)) {
+          pooled.ready = true;
+          continue;
         }
-        proc.requestQueue = [];
 
-        if (proc.state === "idle") {
-          this.clearSessionLock(key, proc);
-          this.returnToWarmPool(proc);
-        } else {
-          // Mark recycling — will be cleaned after current request completes
-          (proc as any)._orphaned = true;
+        const emitter = pooled.currentEmitter;
+        if (!emitter) continue;
+
+        emitter.emit("message", message);
+
+        if (isTextBlockStart(message)) {
+          emitter.emit("text_block_start", message as ClaudeCliStreamEvent);
+        }
+        if (isToolUseBlockStart(message)) {
+          emitter.emit("tool_use_start", message as ClaudeCliStreamEvent);
+        }
+        if (isInputJsonDelta(message)) {
+          emitter.emit("input_json_delta", message as ClaudeCliStreamEvent);
+        }
+        if (isContentBlockStop(message)) {
+          emitter.emit("content_block_stop", message as ClaudeCliStreamEvent);
+        }
+        if (isContentDelta(message)) {
+          emitter.emit("content_delta", message as ClaudeCliStreamEvent);
+        } else if (isAssistantMessage(message)) {
+          emitter.emit("assistant", message);
+        } else if (isResultMessage(message)) {
+          emitter.emit("result", message);
+          this.releaseProcess(pooled);
+        }
+      } catch {
+        if (process.env.DEBUG_SUBPROCESS) {
+          console.error(
+            `[Router:${pooled.id}] Non-JSON:`,
+            trimmed.slice(0, 100)
+          );
         }
-        break;
       }
     }
   }
 
-  /**
-   * Canonical session lock clearing — ALL unlock paths must use this.
-   * Caller is responsible for what happens to the process afterward.
-   */
-  private clearSessionLock(sessionKey: string, proc: PooledProcess): void {
-    this.lockedSessions.delete(sessionKey);
-    proc.lockedTo = null;
-    proc.agentChannel = null;
-    proc.requestCount = 0;
-  }
+  // -------------------------------------------------------------------------
+  // Process assignment & release
+  // -------------------------------------------------------------------------
 
-  private assignToProcess(proc: PooledProcess, prompt: string, emitter: EventEmitter): void {
-    proc.state = "busy";
-    proc.requestCount++;
-    proc.lastRequestAt = Date.now();
-    proc.currentEmitter = emitter;
+  private routeToProcess(
+    proc: PooledProcess,
+    prompt: string,
+    routeType: "locked" | "warm" | "cold"
+  ): ExecuteResult {
+    const emitter = new EventEmitter();
+    this.assignToProcess(proc, prompt, emitter);
+    return {
+      emitter,
+      routeType,
+      pid: proc.process.pid ?? null,
+      queueDepth: proc.requestQueue.length,
+    };
+  }
 
-    // Per-request timeout
-    proc.timeoutHandle = setTimeout(() => {
-      console.error(`[Router:${proc.pid}] Request timeout after ${this.config.requestTimeoutMs}ms — treating as dead`);
-      this.handleProcessDeath(proc, new Error(`Request timeout after ${this.config.requestTimeoutMs}ms`));
-    }, this.config.requestTimeoutMs);
+  private assignToProcess(
+    pooled: PooledProcess,
+    prompt: string,
+    emitter: EventEmitter
+  ): void {
+    pooled.state = "busy";
+    pooled.requestCount++;
+    pooled.lastRequestAt = Date.now();
+    pooled.currentEmitter = emitter;
 
     const message = JSON.stringify({
       type: "user",
       message: { role: "user", content: prompt },
     });
+    pooled.process.stdin?.write(message + "\n");
 
-    proc.process.stdin?.write(message + "\n");
+    this.startRequestTimeout(pooled);
   }
 
-  private handleRequestComplete(proc: PooledProcess): void {
-    if (proc.timeoutHandle) {
-      clearTimeout(proc.timeoutHandle);
-      proc.timeoutHandle = null;
-    }
+  private releaseProcess(pooled: PooledProcess): void {
+    this.clearRequestTimeout(pooled);
+    pooled.currentEmitter = null;
 
-    // Check if orphaned mid-flight
-    if ((proc as any)._orphaned) {
-      (proc as any)._orphaned = false;
-      const key = proc.lockedTo;
-      if (key) this.clearSessionLock(key, proc);
-      this.returnToWarmPool(proc);
-      this.processRecycles++;
+    // Orphan check
+    if (pooled.orphaned) {
+      this.rejectProcessQueue(pooled);
+      if (pooled.lockedTo) {
+        this.clearSessionLock(pooled.lockedTo, pooled);
+      }
+      this.killAndRespawn(pooled);
       return;
     }
 
-    // Context accumulation threshold
-    const overThreshold = proc.requestCount >= this.config.maxRequestsPerProcess;
-
-    if (overThreshold) {
-      if (proc.requestQueue.length === 0) {
-        // Recycle immediately
-        const key = proc.lockedTo;
-        if (key) this.clearSessionLock(key, proc);
-        this.returnToWarmPool(proc);
+    // Context accumulation guard
+    if (pooled.requestCount > this.config.maxRequestsPerProcess) {
+      if (pooled.requestQueue.length === 0) {
+        console.log(
+          JSON.stringify({
+            ts: new Date().toISOString(),
+            event: "context_recycle",
+            processId: pooled.id,
+            pid: pooled.process.pid,
+            requestCount: pooled.requestCount,
+          })
+        );
+        if (pooled.lockedTo) {
+          this.clearSessionLock(pooled.lockedTo, pooled);
+        }
+        this.killAndRespawn(pooled);
         this.processRecycles++;
         return;
       } else {
-        // Set recycling state — drain queue, then recycle
-        proc.state = "recycling";
-        this.drainNextFromQueue(proc);
+        pooled.state = "recycling";
+        this.drainNextRequest(pooled);
         return;
       }
     }
 
-    // Normal case: drain queue or go idle
-    if (proc.requestQueue.length > 0) {
-      this.drainNextFromQueue(proc);
+    // Normal release
+    if (pooled.requestQueue.length > 0) {
+      this.drainNextRequest(pooled);
     } else {
-      proc.state = "idle";
-      proc.currentEmitter = null;
+      pooled.state = "idle";
     }
   }
 
-  private drainNextFromQueue(proc: PooledProcess): void {
-    const next = proc.requestQueue.shift();
-    if (!next) {
-      // Queue empty — check if we were recycling
-      if (proc.state === "recycling") {
-        const key = proc.lockedTo;
-        if (key) this.clearSessionLock(key, proc);
-        this.returnToWarmPool(proc);
+  private drainNextRequest(pooled: PooledProcess): void {
+    if (pooled.requestQueue.length === 0) {
+      if (pooled.state === "recycling") {
+        if (pooled.lockedTo) {
+          this.clearSessionLock(pooled.lockedTo, pooled);
+        }
+        this.killAndRespawn(pooled);
         this.processRecycles++;
-      } else {
-        proc.state = "idle";
-        proc.currentEmitter = null;
+        return;
       }
+      pooled.state = "idle";
       return;
     }
-    clearTimeout(next.timeoutHandle);
-    proc.state = "busy";
-    proc.currentEmitter = next.emitter;
-    proc.lastRequestAt = Date.now();
-    proc.requestCount++;
 
-    proc.timeoutHandle = setTimeout(() => {
-      console.error(`[Router:${proc.pid}] Queued request timeout — treating as dead`);
-      this.handleProcessDeath(proc, new Error("Queued request timeout"));
-    }, this.config.requestTimeoutMs);
+    const next = pooled.requestQueue.shift()!;
+    this.totalRequests++;
+    this.assignToProcess(pooled, next.prompt, next.emitter);
+    next.resolve();
+  }
 
-    const message = JSON.stringify({
-      type: "user",
-      message: { role: "user", content: next.prompt },
-    });
-    proc.process.stdin?.write(message + "\n");
+  // -------------------------------------------------------------------------
+  // Per-request timeout
+  // -------------------------------------------------------------------------
+
+  private startRequestTimeout(pooled: PooledProcess): void {
+    this.clearRequestTimeout(pooled);
+    pooled.requestTimeoutTimer = setTimeout(() => {
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "request_timeout",
+          processId: pooled.id,
+          pid: pooled.process.pid,
+          sessionKey: pooled.lockedTo,
+          elapsedMs: this.config.requestTimeoutMs,
+        })
+      );
+      this.requestTimeouts++;
+
+      if (pooled.currentEmitter) {
+        pooled.currentEmitter.emit(
+          "error",
+          new Error(
+            `Request timed out after ${this.config.requestTimeoutMs}ms`
+          )
+        );
+        pooled.currentEmitter = null;
+      }
+
+      this.rejectProcessQueue(pooled);
+
+      if (pooled.lockedTo) {
+        this.clearSessionLock(pooled.lockedTo, pooled);
+      }
+
+      this.killAndRespawn(pooled);
+    }, this.config.requestTimeoutMs);
   }
 
-  private handleProcessDeath(proc: PooledProcess, error?: Error): void {
-    if (proc.timeoutHandle) {
-      clearTimeout(proc.timeoutHandle);
-      proc.timeoutHandle = null;
+  private clearRequestTimeout(pooled: PooledProcess): void {
+    if (pooled.requestTimeoutTimer) {
+      clearTimeout(pooled.requestTimeoutTimer);
+      pooled.requestTimeoutTimer = null;
     }
+  }
 
-    const err = error || new Error("Pool process died unexpectedly");
+  // -------------------------------------------------------------------------
+  // Queue management
+  // -------------------------------------------------------------------------
 
-    // Notify active request
-    if (proc.currentEmitter) {
-      proc.currentEmitter.emit("error", err);
-      proc.currentEmitter = null;
+  private enqueueOnProcess(
+    proc: PooledProcess,
+    prompt: string,
+    sessionKey: string
+  ): ExecuteResult | null {
+    if (proc.state === "recycling") {
+      this.routeHits.fallback++;
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "fallback_recycling",
+          sessionKey,
+          processId: proc.id,
+        })
+      );
+      return null;
     }
 
-    // Reject queued requests
-    for (const req of proc.requestQueue) {
-      rejectPending(req, 503, 3);
+    if (proc.requestQueue.length >= this.config.requestQueueDepth) {
+      const emitter = new EventEmitter();
+      process.nextTick(() =>
+        emitter.emit(
+          "error",
+          Object.assign(
+            new Error("Too Many Requests — per-session queue full"),
+            { statusCode: 429, retryAfter: 5 }
+          )
+        )
+      );
+      return {
+        emitter,
+        routeType: "locked",
+        pid: proc.process.pid ?? null,
+        queueDepth: proc.requestQueue.length,
+      };
     }
-    proc.requestQueue = [];
 
-    // Remove from lockedSessions
-    if (proc.lockedTo) {
-      this.lockedSessions.delete(proc.lockedTo);
-      proc.lockedTo = null;
-      proc.agentChannel = null;
-      proc.requestCount = 0;
-    } else {
-      // Might be in warm pool — remove it
-      for (const model of ["opus", "sonnet"] as const) {
-        const idx = this.warmPool[model].indexOf(proc);
-        if (idx >= 0) this.warmPool[model].splice(idx, 1);
-      }
-    }
+    const emitter = new EventEmitter();
+    const pending: PendingRequest = {
+      prompt,
+      emitter,
+      resolve: () => {},
+    };
+    proc.requestQueue.push(pending);
+    this.routeHits.locked++;
 
-    // Spawn replacement into warm pool
-    if (!this.shuttingDown) {
-      const model = proc.model;
-      this.spawnWarm(model).catch((e) => {
-        console.error(`[Router] Failed to spawn replacement for dead ${model} process:`, e);
-      });
-    }
+    return {
+      emitter,
+      routeType: "locked",
+      pid: proc.process.pid ?? null,
+      queueDepth: proc.requestQueue.length,
+    };
   }
 
-  /** Spawn a warm process and add it to the warm pool */
-  private async spawnWarm(model: ClaudeModel): Promise<void> {
-    if (this.totalProcessCount() >= this.config.maxTotalProcesses) {
-      console.warn(`[Router] Cannot spawn warm ${model}: total=${this.totalProcessCount()} >= max=${this.config.maxTotalProcesses}`);
-      return;
+  private enqueueOnSentinel(
+    sentinel: PendingSentinel,
+    prompt: string,
+    _sessionKey: string
+  ): ExecuteResult | null {
+    if (sentinel.requestQueue.length >= this.config.requestQueueDepth) {
+      const emitter = new EventEmitter();
+      process.nextTick(() =>
+        emitter.emit(
+          "error",
+          Object.assign(
+            new Error("Too Many Requests — per-session queue full"),
+            { statusCode: 429, retryAfter: 5 }
+          )
+        )
+      );
+      return {
+        emitter,
+        routeType: "locked",
+        pid: null,
+        queueDepth: sentinel.requestQueue.length,
+      };
     }
-    const proc = await this.spawnCold(model);
-    this.warmPool[model as "opus" | "sonnet"].push(proc);
-  }
 
-  /** Spawn a new CLI process */
-  private async spawnCold(model: ClaudeModel): Promise<PooledProcess> {
-    return new Promise((resolve, reject) => {
-      const args = [
-        "--print",
-        "--input-format", "stream-json",
-        "--output-format", "stream-json",
-        "--verbose",
-        "--include-partial-messages",
-        "--dangerously-skip-permissions",
-        "--no-session-persistence",
-        "--model", model,
-        "--append-system-prompt", OPENCLAW_TOOL_MAPPING_PROMPT,
-      ];
-
-      const child = spawn(process.env.CLAUDE_BIN || "claude", args, {
-        cwd: process.env.HOME || "/tmp",
-        env: Object.fromEntries(
-          Object.entries(process.env).filter(([k]) => k !== "CLAUDECODE")
-        ),
-        stdio: ["pipe", "pipe", "pipe"],
-      });
-
-      const proc: PooledProcess = {
-        pid: child.pid || 0,
-        process: child,
-        model,
-        lockedTo: null,
-        agentChannel: null,
-        lastRequestAt: 0,
-        spawnedAt: Date.now(),
-        requestCount: 0,
-        state: "idle",
-        requestQueue: [],
-        buffer: "",
-        currentEmitter: null,
-        timeoutHandle: null,
-      };
+    const emitter = new EventEmitter();
+    sentinel.requestQueue.push({ prompt, emitter, resolve: () => {} });
 
-      child.on("error", (err) => {
-        reject(err);
-      });
+    return {
+      emitter,
+      routeType: "locked",
+      pid: null,
+      queueDepth: sentinel.requestQueue.length,
+    };
+  }
 
-      child.on("spawn", () => {
-        // Attach stdout handler after successful spawn
-        child.stdout?.on("data", (chunk: Buffer) => {
-          proc.buffer += chunk.toString();
-          this.processBuffer(proc);
-        });
+  private transferSentinelQueue(
+    sentinel: PendingSentinel,
+    proc: PooledProcess
+  ): void {
+    for (const pending of sentinel.requestQueue) {
+      proc.requestQueue.push(pending);
+    }
+    sentinel.requestQueue = [];
+  }
 
-        child.stderr?.on("data", (chunk: Buffer) => {
-          const text = chunk.toString().trim();
-          if (!text) return;
-          if (process.env.DEBUG_SUBPROCESS) {
-            console.error(`[Router:${proc.pid}] stderr: ${text.slice(0, 200)}`);
-          }
-          if (isAuthError(text)) {
-            console.error(`[Router:${proc.pid}] Auth error detected — triggering death recovery`);
-            this.handleProcessDeath(proc, new Error(`Auth error: ${text.slice(0, 100)}`));
-          }
-        });
+  private rejectSentinelQueue(
+    sentinel: PendingSentinel,
+    statusCode: number,
+    message: string
+  ): void {
+    for (const pending of sentinel.requestQueue) {
+      pending.emitter.emit(
+        "error",
+        Object.assign(new Error(message), { statusCode, retryAfter: 3 })
+      );
+    }
+    sentinel.requestQueue = [];
+  }
 
-        child.on("close", (code) => {
-          if (proc.state !== "idle" || proc.currentEmitter) {
-            // Died mid-request
-            this.handleProcessDeath(proc, new Error(`Process exited unexpectedly with code ${code}`));
-          } else {
-            // Clean close — remove from wherever it is
-            const model = proc.model;
-            const idx = this.warmPool[model as "opus" | "sonnet"].indexOf(proc);
-            if (idx >= 0) this.warmPool[model as "opus" | "sonnet"].splice(idx, 1);
-            if (proc.lockedTo) {
-              this.lockedSessions.delete(proc.lockedTo);
-            }
-            if (!this.shuttingDown) {
-              this.spawnWarm(model).catch((e) => console.error("[Router] Respawn failed:", e));
-            }
-          }
-        });
+  private rejectProcessQueue(proc: PooledProcess): void {
+    for (const pending of proc.requestQueue) {
+      pending.emitter.emit(
+        "error",
+        Object.assign(new Error("Process unavailable"), {
+          statusCode: 503,
+          retryAfter: 3,
+        })
+      );
+    }
+    proc.requestQueue = [];
+  }
 
-        resolve(proc);
-      });
+  // -------------------------------------------------------------------------
+  // Locking
+  // -------------------------------------------------------------------------
 
-      // If no spawn event (older Node), resolve after a tick
-      setTimeout(() => {
-        if (child.pid) {
-          child.stdout?.on("data", (chunk: Buffer) => {
-            proc.buffer += chunk.toString();
-            this.processBuffer(proc);
-          });
-          child.stderr?.on("data", (chunk: Buffer) => {
-            const text = chunk.toString().trim();
-            if (!text) return;
-            if (isAuthError(text)) {
-              this.handleProcessDeath(proc, new Error(`Auth error: ${text.slice(0, 100)}`));
-            }
-          });
-          child.on("close", (code) => {
-            if (proc.state !== "idle" || proc.currentEmitter) {
-              this.handleProcessDeath(proc, new Error(`Process exited with code ${code}`));
-            }
-          });
-          resolve(proc);
-        }
-      }, 100);
-    });
+  private lockProcess(
+    proc: PooledProcess,
+    sessionKey: string,
+    agentChannel: string
+  ): void {
+    proc.lockedTo = sessionKey;
+    proc.agentChannel = agentChannel;
   }
 
-  private returnToWarmPool(proc: PooledProcess): void {
-    proc.state = "idle";
-    proc.currentEmitter = null;
-    proc.requestCount = 0;
+  /**
+   * Canonical lock clearing — ALL unlock paths MUST use this method.
+   * No inline lockedSessions.delete() anywhere else in the codebase.
+   */
+  private clearSessionLock(sessionKey: string, proc: PooledProcess): void {
+    this.lockedSessions.delete(sessionKey);
     proc.lockedTo = null;
     proc.agentChannel = null;
+    proc.requestCount = 0;
+  }
 
-    const model = proc.model as "opus" | "sonnet";
-    const target = model === "opus" ? this.config.opusSize : this.config.sonnetSize;
+  // -------------------------------------------------------------------------
+  // Lineage & orphan reclamation
+  // -------------------------------------------------------------------------
 
-    if (this.warmPool[model].length < target && this.totalProcessCount() < this.config.maxTotalProcesses) {
-      this.warmPool[model].push(proc);
-    } else {
-      // Over target — kill the extra process
-      proc.process.kill("SIGTERM");
+  private extractAgentChannel(sessionKey: string): string {
+    const parts = sessionKey.split(":");
+    if (parts.length >= 2) {
+      return parts.slice(1).join(":");
     }
+    return sessionKey;
   }
 
-  /** Process stdout buffer and emit events to the current request's emitter */
-  private processBuffer(proc: PooledProcess): void {
-    const lines = proc.buffer.split("\n");
-    proc.buffer = lines.pop() || "";
+  private reclaimOrphans(
+    newSessionKey: string,
+    newAgentChannel: string
+  ): void {
+    for (const [key, value] of this.lockedSessions.entries()) {
+      if (key === newSessionKey) continue;
+      if (isPendingSentinel(value)) continue;
+
+      const proc = value;
+      if (proc.agentChannel === newAgentChannel) {
+        console.log(
+          JSON.stringify({
+            ts: new Date().toISOString(),
+            event: "orphan_reclaimed",
+            oldSessionKey: key,
+            newSessionKey,
+            agentChannel: newAgentChannel,
+            processId: proc.id,
+            pid: proc.process.pid,
+            state: proc.state,
+          })
+        );
+        this.orphansReclaimed++;
 
-    for (const line of lines) {
-      const trimmed = line.trim();
-      if (!trimmed) continue;
+        // Reject ALL queued requests (they belong to the dead session)
+        this.rejectProcessQueue(proc);
 
-      try {
-        const message = JSON.parse(trimmed);
-        const emitter = proc.currentEmitter;
-        if (!emitter) continue;
+        if (proc.state === "idle") {
+          this.clearSessionLock(key, proc);
+          this.killAndRespawn(proc);
+        } else {
+          // Busy — mark for reclamation after current request
+          proc.orphaned = true;
+        }
+      }
+    }
+  }
 
-        emitter.emit("message", message);
+  // -------------------------------------------------------------------------
+  // Process death & respawn
+  // -------------------------------------------------------------------------
+
+  private handleProcessDeath(pooled: PooledProcess, code: number | null): void {
+    // If already removed from allProcesses (e.g. by killAndRespawn), skip
+    if (!this.allProcesses.has(pooled.id)) return;
+
+    console.log(
+      JSON.stringify({
+        ts: new Date().toISOString(),
+        event: "process_death",
+        processId: pooled.id,
+        pid: pooled.process.pid,
+        code,
+        model: pooled.model,
+        state: pooled.state,
+        lockedTo: pooled.lockedTo,
+      })
+    );
+
+    this.clearRequestTimeout(pooled);
+
+    if (pooled.currentEmitter) {
+      pooled.currentEmitter.emit(
+        "error",
+        new Error(`Pool process ${pooled.id} died with code ${code}`)
+      );
+      pooled.currentEmitter = null;
+    }
 
-        if (message.type === "system" && message.subtype === "init") continue;
+    this.rejectProcessQueue(pooled);
 
-        if (message.type === "assistant") {
-          emitter.emit("assistant", message);
-        }
+    if (pooled.lockedTo) {
+      this.clearSessionLock(pooled.lockedTo, pooled);
+    }
 
-        if (message.type === "content_block_start") {
-          const block = message.content_block;
-          if (block?.type === "text") {
-            emitter.emit("text_block_start", { event: message });
-          } else if (block?.type === "tool_use") {
-            emitter.emit("tool_use_start", { event: message });
-          }
-        }
+    this.allProcesses.delete(pooled.id);
 
-        if (message.type === "content_block_delta") {
-          const delta = message.delta;
-          if (delta?.type === "text_delta") {
-            emitter.emit("content_delta", { event: message });
-          } else if (delta?.type === "input_json_delta") {
-            emitter.emit("input_json_delta", { event: message });
-          }
-        }
+    const warm = this.warmPool.get(pooled.model);
+    if (warm) {
+      const idx = warm.indexOf(pooled);
+      if (idx >= 0) warm.splice(idx, 1);
+    }
 
-        if (message.type === "content_block_stop") {
-          emitter.emit("content_block_stop", { event: message });
-        }
+    if (!this.shuttingDown) {
+      this.spawnWarm(pooled.model).catch((err) => {
+        console.error(`[Router] Failed to respawn after death:`, err);
+      });
+    }
+  }
 
-        if (message.type === "result") {
-          emitter.emit("result", message);
-          this.handleRequestComplete(proc);
-        }
+  private killAndRespawn(pooled: PooledProcess): void {
+    this.clearRequestTimeout(pooled);
+    pooled.currentEmitter = null;
+
+    // Remove from allProcesses BEFORE kill to prevent double-handling
+    this.allProcesses.delete(pooled.id);
+
+    const warm = this.warmPool.get(pooled.model);
+    if (warm) {
+      const idx = warm.indexOf(pooled);
+      if (idx >= 0) warm.splice(idx, 1);
+    }
+
+    pooled.process.stdin?.end();
+    setTimeout(() => {
+      try {
+        pooled.process.kill("SIGKILL");
       } catch {
-        if (process.env.DEBUG_SUBPROCESS) {
-          console.error(`[Router:${proc.pid}] Non-JSON: ${trimmed.slice(0, 100)}`);
-        }
+        // Already dead
       }
+    }, 3000);
+
+    if (
+      !this.shuttingDown &&
+      this.allProcesses.size < this.config.maxTotalProcesses
+    ) {
+      this.spawnWarm(pooled.model).catch((err) => {
+        console.error(`[Router] Failed to respawn:`, err);
+      });
     }
   }
 
-  /** Use ClaudeSubprocess for non-pooled or fallback requests */
-  private fallbackSubprocess(prompt: string, model: ClaudeModel): EventEmitter {
-    const sub = new ClaudeSubprocess();
-    sub.start(prompt, { model }).catch((err) => {
-      sub.emit("error", err);
-    });
-    return sub;
-  }
+  // -------------------------------------------------------------------------
+  // Sweep (nightly 3 AM ET)
+  // -------------------------------------------------------------------------
 
-  private totalProcessCount(): number {
-    return this.lockedSessions.size + this.warmPool.opus.length + this.warmPool.sonnet.length;
-  }
+  async sweep(): Promise<void> {
+    console.log(
+      JSON.stringify({
+        ts: new Date().toISOString(),
+        event: "sweep_start",
+        totalProcesses: this.allProcesses.size,
+        lockedSessions: this.lockedSessions.size,
+      })
+    );
 
-  // ─── Public API ─────────────────────────────────────────────────────────────
+    const now = Date.now();
+    const toRecycle: Array<{ key: string; proc: PooledProcess }> = [];
 
-  /**
-   * Nightly sweep — recycle idle/overused processes, refill warm pool.
-   * Called externally by the scheduler (3 AM ET).
-   */
-  sweep(): void {
-    console.log("[Router] Sweep started");
-    let recycled = 0;
-
-    for (const [key, entry] of this.lockedSessions) {
-      if (isPending(entry)) continue;
-      const proc = entry;
+    for (const [key, value] of this.lockedSessions.entries()) {
+      if (isPendingSentinel(value)) continue;
+      const proc = value;
 
       if (proc.state === "busy" || proc.state === "recycling") continue;
 
-      const idleMs = Date.now() - proc.lastRequestAt;
-      const overThreshold = proc.requestCount >= this.config.maxRequestsPerProcess;
-
-      if (idleMs > this.config.sweepIdleThresholdMs || overThreshold) {
-        this.clearSessionLock(key, proc);
-        proc.process.kill("SIGTERM");
-        recycled++;
-        this.processRecycles++;
+      const idleMs = now - (proc.lastRequestAt || proc.spawnedAt);
+      if (
+        idleMs > this.config.sweepIdleThresholdMs ||
+        proc.requestCount > this.config.maxRequestsPerProcess
+      ) {
+        toRecycle.push({ key, proc });
       }
     }
 
-    // Refill warm pool — check cap before EACH spawn
-    for (const model of ["opus", "sonnet"] as const) {
-      const target = model === "opus" ? this.config.opusSize : this.config.sonnetSize;
-      while (this.warmPool[model].length < target) {
-        if (this.totalProcessCount() >= this.config.maxTotalProcesses) {
-          console.warn(`[Router] Sweep refill stopped: total=${this.totalProcessCount()} >= max=${this.config.maxTotalProcesses}`);
-          break;
-        }
-        this.spawnWarm(model).catch((e) => console.error(`[Router] Sweep spawn failed:`, e));
-      }
+    for (const { key, proc } of toRecycle) {
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "sweep_recycle",
+          processId: proc.id,
+          pid: proc.process.pid,
+          model: proc.model,
+          idleMs: now - (proc.lastRequestAt || proc.spawnedAt),
+          requestCount: proc.requestCount,
+        })
+      );
+      this.clearSessionLock(key, proc);
+      this.killAndRespawn(proc);
+      this.processRecycles++;
     }
 
-    console.log(`[Router] Sweep complete. recycled=${recycled} warm.opus=${this.warmPool.opus.length} warm.sonnet=${this.warmPool.sonnet.length}`);
-  }
-
-  /** Graceful shutdown — close all processes, reject all queued requests */
-  async shutdown(): Promise<void> {
-    this.shuttingDown = true;
-    console.log("[Router] Shutdown started");
-
-    // Reject all queued requests in locked sessions
-    for (const [key, entry] of this.lockedSessions) {
-      if (isPending(entry)) {
-        for (const req of entry.requestQueue) {
-          rejectPending(req, 503, 3);
-        }
-        this.lockedSessions.delete(key);
-      } else {
-        const proc = entry;
-        for (const req of proc.requestQueue) {
-          rejectPending(req, 503, 3);
-        }
-        proc.requestQueue = [];
-        if (proc.currentEmitter) {
-          proc.currentEmitter.emit("error", new Error("Server shutting down"));
-          proc.currentEmitter = null;
+    // Refill warm pools — check MAX_TOTAL_PROCESSES before EACH spawn
+    for (const model of ["opus", "sonnet"] as PooledModel[]) {
+      const targetSize =
+        model === "opus" ? this.config.opusSize : this.config.sonnetSize;
+      const warm = this.warmPool.get(model)!;
+
+      while (warm.length < targetSize) {
+        if (this.allProcesses.size >= this.config.maxTotalProcesses) {
+          console.log(
+            JSON.stringify({
+              ts: new Date().toISOString(),
+              event: "sweep_refill_cap_reached",
+              model,
+              totalProcesses: this.allProcesses.size,
+              maxTotal: this.config.maxTotalProcesses,
+            })
+          );
+          break;
         }
-        proc.process.kill("SIGTERM");
-      }
-    }
-    this.lockedSessions.clear();
-
-    // Kill warm pool processes
-    for (const model of ["opus", "sonnet"] as const) {
-      for (const proc of this.warmPool[model]) {
-        proc.process.kill("SIGTERM");
+        await this.spawnWarm(model);
       }
-      this.warmPool[model] = [];
     }
 
-    console.log("[Router] Shutdown complete");
+    console.log(
+      JSON.stringify({
+        ts: new Date().toISOString(),
+        event: "sweep_complete",
+        totalProcesses: this.allProcesses.size,
+        recycled: toRecycle.length,
+        warmOpus: this.warmPool.get("opus")!.length,
+        warmSonnet: this.warmPool.get("sonnet")!.length,
+      })
+    );
   }
 
-  /** Stats for health endpoint */
-  stats(): RouterStats {
-    let lockedOpus = 0;
-    let lockedSonnet = 0;
+  // -------------------------------------------------------------------------
+  // Stats
+  // -------------------------------------------------------------------------
+
+  stats(): PoolStats {
     let busy = 0;
     let queued = 0;
-    let totalRequests = 0;
+    const locked = { total: 0, opus: 0, sonnet: 0 };
 
-    for (const [, entry] of this.lockedSessions) {
-      if (isPending(entry)) {
-        queued += entry.requestQueue.length;
+    for (const [, value] of this.lockedSessions.entries()) {
+      if (isPendingSentinel(value)) {
+        queued += value.requestQueue.length;
         continue;
       }
-      const proc = entry;
-      if (proc.model === "opus") lockedOpus++;
-      else if (proc.model === "sonnet") lockedSonnet++;
+      const proc = value;
+      locked.total++;
+      if (proc.model === "opus") locked.opus++;
+      else locked.sonnet++;
       if (proc.state === "busy" || proc.state === "recycling") busy++;
       queued += proc.requestQueue.length;
-      totalRequests += proc.requestCount;
     }
 
     return {
-      total: this.totalProcessCount(),
-      locked: { total: lockedOpus + lockedSonnet, opus: lockedOpus, sonnet: lockedSonnet },
-      warm: { opus: this.warmPool.opus.length, sonnet: this.warmPool.sonnet.length },
+      total: this.allProcesses.size,
+      locked,
+      warm: {
+        opus: this.warmPool.get("opus")!.length,
+        sonnet: this.warmPool.get("sonnet")!.length,
+      },
       busy,
       queued,
+      maxTotal: this.config.maxTotalProcesses,
       orphansReclaimed: this.orphansReclaimed,
-      totalRequests,
+      totalRequests: this.totalRequests,
       processRecycles: this.processRecycles,
+      requestTimeouts: this.requestTimeouts,
       routeHits: { ...this.routeHits },
+      uptime: Math.floor((Date.now() - this.startedAt) / 1000),
     };
   }
+
+  // -------------------------------------------------------------------------
+  // Shutdown
+  // -------------------------------------------------------------------------
+
+  async shutdown(): Promise<void> {
+    this.shuttingDown = true;
+    console.log(
+      `[Router] Shutting down — ${this.allProcesses.size} processes`
+    );
+
+    // Reject all queued requests
+    for (const [, value] of this.lockedSessions.entries()) {
+      if (isPendingSentinel(value)) {
+        this.rejectSentinelQueue(value, 503, "Server shutting down");
+      } else {
+        this.rejectProcessQueue(value);
+      }
+    }
+
+    // Kill all processes
+    const kills: Promise<void>[] = [];
+    for (const pooled of this.allProcesses.values()) {
+      this.clearRequestTimeout(pooled);
+      if (pooled.currentEmitter) {
+        pooled.currentEmitter.emit(
+          "error",
+          new Error("Server shutting down")
+        );
+        pooled.currentEmitter = null;
+      }
+      kills.push(
+        new Promise<void>((resolve) => {
+          pooled.process.on("close", () => resolve());
+          pooled.process.stdin?.end();
+          setTimeout(() => {
+            try {
+              pooled.process.kill("SIGKILL");
+            } catch {
+              // already dead
+            }
+            resolve();
+          }, 5000);
+        })
+      );
+    }
+
+    await Promise.all(kills);
+    this.allProcesses.clear();
+    this.lockedSessions.clear();
+    this.warmPool.set("opus", []);
+    this.warmPool.set("sonnet", []);
+    console.log("[Router] Shutdown complete.");
+  }
 }

From c9f18acb254acc8aea1c5ebfa75c5e80990b03c5 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 19:05:03 -0400
Subject: [PATCH 08/27] fix: M1-21 clearSessionLock canonical path, M1-22
 recycle kill+respawn, M2-02 agent-id header, M2-01 queueDepth+cacheHit
 logging

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/server/routes.ts | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index d0cf271..485c7f2 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -73,6 +73,7 @@ export async function handleChatCompletions(
     const sessionKey = req.headers["x-openclaw-session-key"] as
       | string
       | undefined;
+    const agentId = req.headers["x-openclaw-agent-id"] as string | undefined;
 
     // --- Pool routing ---
     if (sessionKey && poolRouter) {
@@ -94,6 +95,7 @@ export async function handleChatCompletions(
             requestId,
             startTime,
             sessionKey,
+            agentId,
             cliInput.model,
             routeType,
             pid,
@@ -106,6 +108,7 @@ export async function handleChatCompletions(
             requestId,
             startTime,
             sessionKey,
+            agentId,
             cliInput.model,
             routeType,
             pid,
@@ -152,6 +155,7 @@ async function handlePooledStreaming(
   requestId: string,
   startTime: number,
   sessionKey: string,
+  agentId: string | undefined,
   model: string,
   routeType: string,
   pid: number | null,
@@ -231,11 +235,13 @@ async function handlePooledStreaming(
           ts: new Date().toISOString(),
           event: "request",
           sessionKey,
+          agentId,
           model,
           pid,
           latencyMs,
           queueDepth,
           routeType,
+          cacheHit: routeType,
           requestCount: result.num_turns,
         })
       );
@@ -318,6 +324,7 @@ async function handlePooledNonStreaming(
   requestId: string,
   startTime: number,
   sessionKey: string,
+  agentId: string | undefined,
   model: string,
   routeType: string,
   pid: number | null,
@@ -331,11 +338,13 @@ async function handlePooledNonStreaming(
           ts: new Date().toISOString(),
           event: "request",
           sessionKey,
+          agentId,
           model,
           pid,
           latencyMs,
           queueDepth,
           routeType,
+          cacheHit: routeType,
           requestCount: result.num_turns,
         })
       );

From 8886e8827a740db03d1756aa08c4e0c914a0fa5a Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 19:09:42 -0400
Subject: [PATCH 09/27] fix: route sentinel cleanup through clearSessionLock
 (M1-21 complete)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two inline lockedSessions.delete() calls at sentinel cleanup points
(pool-at-capacity fallback and cold-spawn-failure) bypassed clearSessionLock().
These are PendingSentinel entries (not PooledProcess), so the canonical
function signature is extended to accept null for proc.

The spec grep validation — lockedSessions.delete appearing ONLY inside
clearSessionLock() — now passes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/subprocess/router.ts | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 2be90f3..0ac33fc 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -276,7 +276,7 @@ export class SessionPoolRouter {
     // Warm pool empty — need cold spawn
     if (this.allProcesses.size >= this.config.maxTotalProcesses) {
       this.rejectSentinelQueue(sentinel, 503, "Pool at capacity");
-      this.lockedSessions.delete(sessionKey);
+      this.clearSessionLock(sessionKey, null);
       this.routeHits.fallback++;
       console.log(
         JSON.stringify({
@@ -314,7 +314,7 @@ export class SessionPoolRouter {
           })
         );
         this.rejectSentinelQueue(sentinel, 503, "Cold spawn failed");
-        this.lockedSessions.delete(sessionKey);
+        this.clearSessionLock(sessionKey, null);
         emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
       });
 
@@ -807,12 +807,15 @@ export class SessionPoolRouter {
   /**
    * Canonical lock clearing — ALL unlock paths MUST use this method.
    * No inline lockedSessions.delete() anywhere else in the codebase.
+   * Pass null for proc when clearing a PendingSentinel (no process state to reset).
    */
-  private clearSessionLock(sessionKey: string, proc: PooledProcess): void {
+  private clearSessionLock(sessionKey: string, proc: PooledProcess | null): void {
     this.lockedSessions.delete(sessionKey);
-    proc.lockedTo = null;
-    proc.agentChannel = null;
-    proc.requestCount = 0;
+    if (proc) {
+      proc.lockedTo = null;
+      proc.agentChannel = null;
+      proc.requestCount = 0;
+    }
   }
 
   // -------------------------------------------------------------------------

From ebdcdfb93d36ae5367aee50b622827401f4ef291 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Sat, 21 Mar 2026 23:58:21 -0400
Subject: [PATCH 10/27] fix: fall back to body sessionId when session-key
 header absent

The gateway does not send x-openclaw-session-key headers on outbound
provider calls. Without this fallback, all production requests bypassed
the session pool and fell through to one-shot ClaudeSubprocess.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/server/routes.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index 485c7f2..d2e2042 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -70,9 +70,7 @@ export async function handleChatCompletions(
     }
 
     const cliInput = openaiToCli(body);
-    const sessionKey = req.headers["x-openclaw-session-key"] as
-      | string
-      | undefined;
+    const sessionKey = (req.headers["x-openclaw-session-key"] as string | undefined) || cliInput.sessionId;
     const agentId = req.headers["x-openclaw-agent-id"] as string | undefined;
 
     // --- Pool routing ---

From 4ef1787772af41b4d9ca112d8e41ef600bcde79f Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Wed, 8 Apr 2026 09:37:18 -0400
Subject: [PATCH 11/27] feat: add unhandledRejection and uncaughtException
 handlers (proxy-crash-handler.spec.md)

Adds process-level error handlers so silent crashes log [FATAL] to
stderr before exiting. Fixes the 04:35/06:14/06:32 silent crash pattern.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/server/standalone.ts | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/server/standalone.ts b/src/server/standalone.ts
index 291d0f5..9dcd902 100644
--- a/src/server/standalone.ts
+++ b/src/server/standalone.ts
@@ -172,6 +172,16 @@ async function main(): Promise<void> {
   process.on("SIGTERM", () => shutdown("SIGTERM"));
 }
 
+process.on("unhandledRejection", (reason: unknown) => {
+  console.error("[FATAL] Unhandled promise rejection:", reason);
+  process.exit(1);
+});
+
+process.on("uncaughtException", (err: Error) => {
+  console.error("[FATAL] Uncaught exception:", err);
+  process.exit(1);
+});
+
 main().catch((err) => {
   console.error("Unexpected error:", err);
   process.exit(1);

From 99da3cabeb97b34d4ba10f446951e019fbdff7ea Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Wed, 8 Apr 2026 12:29:39 -0400
Subject: [PATCH 12/27] fix: guard all 4 emit("error") sites with listenerCount
 check (proxy-safe-emit.spec.md)

Prevents uncaught exception crash when error is emitted into a bare EventEmitter
after routes.ts has stripped listeners via removeAllListeners().

Sites fixed: child process error handler, request timeout, handleProcessDeath, shutdown loop.
Suppressed errors are logged to stderr with process ID and error message.
---
 src/subprocess/router.ts | 46 +++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 0ac33fc..0ef4738 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -434,7 +434,11 @@ export class SessionPoolRouter {
     child.on("error", (err) => {
       console.error(`[Router:${id}] Process error:`, err.message);
       if (pooled.currentEmitter) {
-        pooled.currentEmitter.emit("error", err);
+        if (pooled.currentEmitter.listenerCount("error") > 0) {
+          pooled.currentEmitter.emit("error", err);
+        } else {
+          console.error(`[Router:${pooled.id}] Suppressed error (no listeners):`, err.message);
+        }
         pooled.currentEmitter = null;
       }
     });
@@ -635,12 +639,16 @@ export class SessionPoolRouter {
       this.requestTimeouts++;
 
       if (pooled.currentEmitter) {
-        pooled.currentEmitter.emit(
-          "error",
-          new Error(
-            `Request timed out after ${this.config.requestTimeoutMs}ms`
-          )
-        );
+        if (pooled.currentEmitter.listenerCount("error") > 0) {
+          pooled.currentEmitter.emit(
+            "error",
+            new Error(
+              `Request timed out after ${this.config.requestTimeoutMs}ms`
+            )
+          );
+        } else {
+          console.error(`[Router:${pooled.id}] Suppressed error (no listeners): Request timed out after ${this.config.requestTimeoutMs}ms`);
+        }
         pooled.currentEmitter = null;
       }
 
@@ -892,10 +900,14 @@ export class SessionPoolRouter {
     this.clearRequestTimeout(pooled);
 
     if (pooled.currentEmitter) {
-      pooled.currentEmitter.emit(
-        "error",
-        new Error(`Pool process ${pooled.id} died with code ${code}`)
-      );
+      if (pooled.currentEmitter.listenerCount("error") > 0) {
+        pooled.currentEmitter.emit(
+          "error",
+          new Error(`Pool process ${pooled.id} died with code ${code}`)
+        );
+      } else {
+        console.error(`[Router:${pooled.id}] Suppressed error (no listeners): Pool process ${pooled.id} died with code ${code}`);
+      }
       pooled.currentEmitter = null;
     }
 
@@ -1101,10 +1113,14 @@ export class SessionPoolRouter {
     for (const pooled of this.allProcesses.values()) {
       this.clearRequestTimeout(pooled);
       if (pooled.currentEmitter) {
-        pooled.currentEmitter.emit(
-          "error",
-          new Error("Server shutting down")
-        );
+        if (pooled.currentEmitter.listenerCount("error") > 0) {
+          pooled.currentEmitter.emit(
+            "error",
+            new Error("Server shutting down")
+          );
+        } else {
+          console.error(`[Router:${pooled.id}] Suppressed error (no listeners): Server shutting down`);
+        }
         pooled.currentEmitter = null;
       }
       kills.push(

From 1e218439e6df7ed40e06bacacb4727293083b3a9 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Wed, 8 Apr 2026 13:39:26 -0400
Subject: [PATCH 13/27] [P-safe-emit] Add listenerCount guard to all error emit
 sites (proxy crash fix)

---
 src/subprocess/router.ts | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 0ef4738..b23d464 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -211,9 +211,13 @@ export class SessionPoolRouter {
   ): ExecuteResult | null {
     if (this.shuttingDown) {
       const emitter = new EventEmitter();
-      process.nextTick(() =>
-        emitter.emit("error", new Error("Server is shutting down"))
-      );
+      process.nextTick(() => {
+        if (emitter.listenerCount("error") > 0) {
+          emitter.emit("error", new Error("Server is shutting down"));
+        } else {
+          console.error(`[Router] Suppressed shutdown error (no listeners)`);
+        }
+      });
       return { emitter, routeType: "fallback", pid: null, queueDepth: 0 };
     }
 
@@ -315,7 +319,11 @@ export class SessionPoolRouter {
         );
         this.rejectSentinelQueue(sentinel, 503, "Cold spawn failed");
         this.clearSessionLock(sessionKey, null);
-        emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
+        if (emitter.listenerCount("error") > 0) {
+          emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
+        } else {
+          console.error(`[Router] Suppressed cold spawn error (no listeners):`, err.message);
+        }
       });
 
     return {

From f55bdb74cb32821e65d2d03f0d060e55fb00f6f7 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Wed, 8 Apr 2026 14:24:01 -0400
Subject: [PATCH 14/27] [P130] Safe EventEmitter error emission guards (proxy
 crash fix)

---
 src/subprocess/router.ts | 106 +++++++++++++++++++++++++--------------
 1 file changed, 69 insertions(+), 37 deletions(-)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index b23d464..3612d41 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -10,6 +10,46 @@
 
 import { spawn, ChildProcess } from "child_process";
 import { EventEmitter } from "events";
+
+/**
+ * Attaches a permanent no-op error listener so the emitter never reaches a
+ * zero-listener state.  Prevents Node's default throw behavior when callers
+ * detach their listeners before an async error fires.
+ * Uses a simple counter instead of listenerCount() to avoid zombie emitter crashes.
+ */
+function safeEmitter<T extends EventEmitter>(emitter: T): T {
+  let activeListeners = 0;
+  
+  // Track when listeners are added/removed
+  const originalOn = emitter.on.bind(emitter);
+  const originalOff = emitter.off.bind(emitter);
+  const originalRemoveListener = emitter.removeListener.bind(emitter);
+  
+  emitter.on = function(type: string, listener: (...args: any[]) => void) {
+    if (type === "error") activeListeners++;
+    return originalOn(type, listener);
+  };
+  
+  emitter.off = function(type: string, listener: (...args: any[]) => void) {
+    if (type === "error") activeListeners = Math.max(0, activeListeners - 1);
+    return originalOff(type, listener);
+  };
+  
+  emitter.removeListener = function(type: string, listener: (...args: any[]) => void) {
+    if (type === "error") activeListeners = Math.max(0, activeListeners - 1);
+    return originalRemoveListener(type, listener);
+  };
+  
+  // Permanent guard listener that logs if no other listeners
+  emitter.prependListener("error", function safeEmitGuard(err: Error) {
+    if (activeListeners <= 1) { // Only our guard listener
+      console.error("[Router] Suppressed error (no active listeners):", err.message);
+    }
+  });
+  
+  return emitter;
+}
+
 import type {
   ClaudeCliMessage,
   ClaudeCliStreamEvent,
@@ -210,12 +250,12 @@ export class SessionPoolRouter {
     sessionKey: string
   ): ExecuteResult | null {
     if (this.shuttingDown) {
-      const emitter = new EventEmitter();
+      const emitter = safeEmitter(new EventEmitter());
       process.nextTick(() => {
-        if (emitter.listenerCount("error") > 0) {
+        try {
           emitter.emit("error", new Error("Server is shutting down"));
-        } else {
-          console.error(`[Router] Suppressed shutdown error (no listeners)`);
+        } catch (err) {
+          console.error(`[Router] Suppressed shutdown error:`, (err as Error).message);
         }
       });
       return { emitter, routeType: "fallback", pid: null, queueDepth: 0 };
@@ -296,7 +336,7 @@ export class SessionPoolRouter {
     }
 
     // Cold spawn (async)
-    const emitter = new EventEmitter();
+    const emitter = safeEmitter(new EventEmitter());
     this.totalRequests++;
     this.routeHits.cold++;
 
@@ -319,10 +359,10 @@ export class SessionPoolRouter {
         );
         this.rejectSentinelQueue(sentinel, 503, "Cold spawn failed");
         this.clearSessionLock(sessionKey, null);
-        if (emitter.listenerCount("error") > 0) {
+        try {
           emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
-        } else {
-          console.error(`[Router] Suppressed cold spawn error (no listeners):`, err.message);
+        } catch (emitErr) {
+          console.error(`[Router] Failed to emit cold spawn error:`, (emitErr as Error).message);
         }
       });
 
@@ -442,10 +482,10 @@ export class SessionPoolRouter {
     child.on("error", (err) => {
       console.error(`[Router:${id}] Process error:`, err.message);
       if (pooled.currentEmitter) {
-        if (pooled.currentEmitter.listenerCount("error") > 0) {
+        try {
           pooled.currentEmitter.emit("error", err);
-        } else {
-          console.error(`[Router:${pooled.id}] Suppressed error (no listeners):`, err.message);
+        } catch (emitErr) {
+          console.error(`[Router] Failed to emit process error:`, (emitErr as Error).message);
         }
         pooled.currentEmitter = null;
       }
@@ -531,7 +571,7 @@ export class SessionPoolRouter {
     prompt: string,
     routeType: "locked" | "warm" | "cold"
   ): ExecuteResult {
-    const emitter = new EventEmitter();
+    const emitter = safeEmitter(new EventEmitter());
     this.assignToProcess(proc, prompt, emitter);
     return {
       emitter,
@@ -647,16 +687,12 @@ export class SessionPoolRouter {
       this.requestTimeouts++;
 
       if (pooled.currentEmitter) {
-        if (pooled.currentEmitter.listenerCount("error") > 0) {
-          pooled.currentEmitter.emit(
-            "error",
-            new Error(
-              `Request timed out after ${this.config.requestTimeoutMs}ms`
-            )
-          );
-        } else {
-          console.error(`[Router:${pooled.id}] Suppressed error (no listeners): Request timed out after ${this.config.requestTimeoutMs}ms`);
-        }
+        pooled.currentEmitter.emit(
+          "error",
+          new Error(
+            `Request timed out after ${this.config.requestTimeoutMs}ms`
+          )
+        );
         pooled.currentEmitter = null;
       }
 
@@ -700,7 +736,7 @@ export class SessionPoolRouter {
     }
 
     if (proc.requestQueue.length >= this.config.requestQueueDepth) {
-      const emitter = new EventEmitter();
+      const emitter = safeEmitter(new EventEmitter());
       process.nextTick(() =>
         emitter.emit(
           "error",
@@ -718,7 +754,7 @@ export class SessionPoolRouter {
       };
     }
 
-    const emitter = new EventEmitter();
+    const emitter = safeEmitter(new EventEmitter());
     const pending: PendingRequest = {
       prompt,
       emitter,
@@ -741,7 +777,7 @@ export class SessionPoolRouter {
     _sessionKey: string
   ): ExecuteResult | null {
     if (sentinel.requestQueue.length >= this.config.requestQueueDepth) {
-      const emitter = new EventEmitter();
+      const emitter = safeEmitter(new EventEmitter());
       process.nextTick(() =>
         emitter.emit(
           "error",
@@ -759,7 +795,7 @@ export class SessionPoolRouter {
       };
     }
 
-    const emitter = new EventEmitter();
+    const emitter = safeEmitter(new EventEmitter());
     sentinel.requestQueue.push({ prompt, emitter, resolve: () => {} });
 
     return {
@@ -908,13 +944,13 @@ export class SessionPoolRouter {
     this.clearRequestTimeout(pooled);
 
     if (pooled.currentEmitter) {
-      if (pooled.currentEmitter.listenerCount("error") > 0) {
+      try {
         pooled.currentEmitter.emit(
           "error",
           new Error(`Pool process ${pooled.id} died with code ${code}`)
         );
-      } else {
-        console.error(`[Router:${pooled.id}] Suppressed error (no listeners): Pool process ${pooled.id} died with code ${code}`);
+      } catch (emitErr) {
+        console.error(`[Router] Failed to emit process death error:`, (emitErr as Error).message);
       }
       pooled.currentEmitter = null;
     }
@@ -1121,14 +1157,10 @@ export class SessionPoolRouter {
     for (const pooled of this.allProcesses.values()) {
       this.clearRequestTimeout(pooled);
       if (pooled.currentEmitter) {
-        if (pooled.currentEmitter.listenerCount("error") > 0) {
-          pooled.currentEmitter.emit(
-            "error",
-            new Error("Server shutting down")
-          );
-        } else {
-          console.error(`[Router:${pooled.id}] Suppressed error (no listeners): Server shutting down`);
-        }
+        pooled.currentEmitter.emit(
+          "error",
+          new Error("Server shutting down")
+        );
         pooled.currentEmitter = null;
       }
       kills.push(

From dd20a8231de5a3373984e6a4761426bb6b264170 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Wed, 8 Apr 2026 14:41:17 -0400
Subject: [PATCH 15/27] Refactor safeEmitter to factory pattern, apply to all
 creation sites
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Converts safeEmitter(emitter) wrapper to safeEmitter() factory — EventEmitter
is created inside the function, preventing future call sites from forgetting
the guard. All 7 creation sites updated; build passes clean.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/subprocess/router.ts | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 3612d41..4c1468d 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -12,41 +12,42 @@ import { spawn, ChildProcess } from "child_process";
 import { EventEmitter } from "events";
 
 /**
- * Attaches a permanent no-op error listener so the emitter never reaches a
- * zero-listener state.  Prevents Node's default throw behavior when callers
- * detach their listeners before an async error fires.
+ * Factory that creates an EventEmitter with a permanent no-op error listener
+ * so it never reaches a zero-listener state.  Prevents Node's default throw
+ * behavior when callers detach their listeners before an async error fires.
  * Uses a simple counter instead of listenerCount() to avoid zombie emitter crashes.
  */
-function safeEmitter<T extends EventEmitter>(emitter: T): T {
+function safeEmitter(): EventEmitter {
+  const emitter = new EventEmitter();
   let activeListeners = 0;
-  
+
   // Track when listeners are added/removed
   const originalOn = emitter.on.bind(emitter);
   const originalOff = emitter.off.bind(emitter);
   const originalRemoveListener = emitter.removeListener.bind(emitter);
-  
+
   emitter.on = function(type: string, listener: (...args: any[]) => void) {
     if (type === "error") activeListeners++;
     return originalOn(type, listener);
   };
-  
+
   emitter.off = function(type: string, listener: (...args: any[]) => void) {
     if (type === "error") activeListeners = Math.max(0, activeListeners - 1);
     return originalOff(type, listener);
   };
-  
+
   emitter.removeListener = function(type: string, listener: (...args: any[]) => void) {
     if (type === "error") activeListeners = Math.max(0, activeListeners - 1);
     return originalRemoveListener(type, listener);
   };
-  
+
   // Permanent guard listener that logs if no other listeners
   emitter.prependListener("error", function safeEmitGuard(err: Error) {
     if (activeListeners <= 1) { // Only our guard listener
       console.error("[Router] Suppressed error (no active listeners):", err.message);
     }
   });
-  
+
   return emitter;
 }
 
@@ -250,7 +251,7 @@ export class SessionPoolRouter {
     sessionKey: string
   ): ExecuteResult | null {
     if (this.shuttingDown) {
-      const emitter = safeEmitter(new EventEmitter());
+      const emitter = safeEmitter();
       process.nextTick(() => {
         try {
           emitter.emit("error", new Error("Server is shutting down"));
@@ -336,7 +337,7 @@ export class SessionPoolRouter {
     }
 
     // Cold spawn (async)
-    const emitter = safeEmitter(new EventEmitter());
+    const emitter = safeEmitter();
     this.totalRequests++;
     this.routeHits.cold++;
 
@@ -571,7 +572,7 @@ export class SessionPoolRouter {
     prompt: string,
     routeType: "locked" | "warm" | "cold"
   ): ExecuteResult {
-    const emitter = safeEmitter(new EventEmitter());
+    const emitter = safeEmitter();
     this.assignToProcess(proc, prompt, emitter);
     return {
       emitter,
@@ -736,7 +737,7 @@ export class SessionPoolRouter {
     }
 
     if (proc.requestQueue.length >= this.config.requestQueueDepth) {
-      const emitter = safeEmitter(new EventEmitter());
+      const emitter = safeEmitter();
       process.nextTick(() =>
         emitter.emit(
           "error",
@@ -754,7 +755,7 @@ export class SessionPoolRouter {
       };
     }
 
-    const emitter = safeEmitter(new EventEmitter());
+    const emitter = safeEmitter();
     const pending: PendingRequest = {
       prompt,
       emitter,
@@ -777,7 +778,7 @@ export class SessionPoolRouter {
     _sessionKey: string
   ): ExecuteResult | null {
     if (sentinel.requestQueue.length >= this.config.requestQueueDepth) {
-      const emitter = safeEmitter(new EventEmitter());
+      const emitter = safeEmitter();
       process.nextTick(() =>
         emitter.emit(
           "error",
@@ -795,7 +796,7 @@ export class SessionPoolRouter {
       };
     }
 
-    const emitter = safeEmitter(new EventEmitter());
+    const emitter = safeEmitter();
     sentinel.requestQueue.push({ prompt, emitter, resolve: () => {} });
 
     return {

From 58f0a878239cd83b9b0cfcc1373839584b459661 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Wed, 8 Apr 2026 14:51:49 -0400
Subject: [PATCH 16/27] fix: simplify safeEmitter to minimal permanent error
 listener
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The complex version (activeListeners counter + monkey-patched on/off/removeListener +
prependListener guard) was over-engineered. A permanent `emitter.on('error', ...)`
at creation time is sufficient — it guarantees listenerCount('error') >= 1, preventing
Node's uncaught exception on emit('error') with zero listeners.

Root cause: routes.ts calls removeAllListeners() on client disconnect, stripping all
listeners. If the CLI process emits an error after cleanup, Node throws (EventEmitter
contract: emit('error') with 0 listeners = throw). In Node v25, unhandled exceptions
exit silently with code 1.

Defense layers:
1. safeEmitter() — permanent error listener prevents the throw (this commit)
2. uncaughtException/unhandledRejection handlers in standalone.ts (commit 4ef1787)

Known limitation: removeAllListeners() in routes.ts:178 can strip the permanent
listener. A follow-up should change to targeted removeAllListeners('data') etc.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/subprocess/router.ts | 34 +++++-----------------------------
 1 file changed, 5 insertions(+), 29 deletions(-)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 4c1468d..d08161c 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -15,39 +15,15 @@ import { EventEmitter } from "events";
  * Factory that creates an EventEmitter with a permanent no-op error listener
  * so it never reaches a zero-listener state.  Prevents Node's default throw
  * behavior when callers detach their listeners before an async error fires.
- * Uses a simple counter instead of listenerCount() to avoid zombie emitter crashes.
+ * The permanent listener is attached at creation time — no complex tracking needed.
  */
 function safeEmitter(): EventEmitter {
   const emitter = new EventEmitter();
-  let activeListeners = 0;
-
-  // Track when listeners are added/removed
-  const originalOn = emitter.on.bind(emitter);
-  const originalOff = emitter.off.bind(emitter);
-  const originalRemoveListener = emitter.removeListener.bind(emitter);
-
-  emitter.on = function(type: string, listener: (...args: any[]) => void) {
-    if (type === "error") activeListeners++;
-    return originalOn(type, listener);
-  };
-
-  emitter.off = function(type: string, listener: (...args: any[]) => void) {
-    if (type === "error") activeListeners = Math.max(0, activeListeners - 1);
-    return originalOff(type, listener);
-  };
-
-  emitter.removeListener = function(type: string, listener: (...args: any[]) => void) {
-    if (type === "error") activeListeners = Math.max(0, activeListeners - 1);
-    return originalRemoveListener(type, listener);
-  };
-
-  // Permanent guard listener that logs if no other listeners
-  emitter.prependListener("error", function safeEmitGuard(err: Error) {
-    if (activeListeners <= 1) { // Only our guard listener
-      console.error("[Router] Suppressed error (no active listeners):", err.message);
-    }
+  // Permanent no-op guard: emitter always has ≥1 error listener, so Node never
+  // throws on emit("error", ...) regardless of what callers attach or detach.
+  emitter.on("error", (err: Error) => {
+    console.error("[Router] Suppressed emitter error:", err?.message ?? err);
   });
-
   return emitter;
 }
 

From 052ffe4cb1b20670cc14aea70fb97426e7331a8c Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Wed, 8 Apr 2026 15:12:28 -0400
Subject: [PATCH 17/27] [P?] Safe EventEmitter error guards (router)

---
 src/subprocess/router.ts | 94 ++++++++++++++++++++++------------------
 1 file changed, 53 insertions(+), 41 deletions(-)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index d08161c..fdb828d 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -229,10 +229,12 @@ export class SessionPoolRouter {
     if (this.shuttingDown) {
       const emitter = safeEmitter();
       process.nextTick(() => {
-        try {
-          emitter.emit("error", new Error("Server is shutting down"));
-        } catch (err) {
-          console.error(`[Router] Suppressed shutdown error:`, (err as Error).message);
+        if (emitter.listenerCount("error") > 0) {
+          try {
+            emitter.emit("error", new Error("Server is shutting down"));
+          } catch (err) {
+            console.error(`[Router] Suppressed shutdown error:`, (err as Error).message);
+          }
         }
       });
       return { emitter, routeType: "fallback", pid: null, queueDepth: 0 };
@@ -336,10 +338,12 @@ export class SessionPoolRouter {
         );
         this.rejectSentinelQueue(sentinel, 503, "Cold spawn failed");
         this.clearSessionLock(sessionKey, null);
-        try {
-          emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
-        } catch (emitErr) {
-          console.error(`[Router] Failed to emit cold spawn error:`, (emitErr as Error).message);
+        if (emitter.listenerCount("error") > 0) {
+          try {
+            emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
+          } catch (emitErr) {
+            console.error(`[Router] Failed to emit cold spawn error:`, (emitErr as Error).message);
+          }
         }
       });
 
@@ -458,7 +462,7 @@ export class SessionPoolRouter {
 
     child.on("error", (err) => {
       console.error(`[Router:${id}] Process error:`, err.message);
-      if (pooled.currentEmitter) {
+      if (pooled.currentEmitter && pooled.currentEmitter.listenerCount("error") > 0) {
         try {
           pooled.currentEmitter.emit("error", err);
         } catch (emitErr) {
@@ -663,7 +667,7 @@ export class SessionPoolRouter {
       );
       this.requestTimeouts++;
 
-      if (pooled.currentEmitter) {
+      if (pooled.currentEmitter && pooled.currentEmitter.listenerCount("error") > 0) {
         pooled.currentEmitter.emit(
           "error",
           new Error(
@@ -714,15 +718,17 @@ export class SessionPoolRouter {
 
     if (proc.requestQueue.length >= this.config.requestQueueDepth) {
       const emitter = safeEmitter();
-      process.nextTick(() =>
-        emitter.emit(
-          "error",
-          Object.assign(
-            new Error("Too Many Requests — per-session queue full"),
-            { statusCode: 429, retryAfter: 5 }
-          )
-        )
-      );
+      process.nextTick(() => {
+        if (emitter.listenerCount("error") > 0) {
+          emitter.emit(
+            "error",
+            Object.assign(
+              new Error("Too Many Requests — per-session queue full"),
+              { statusCode: 429, retryAfter: 5 }
+            )
+          );
+        }
+      });
       return {
         emitter,
         routeType: "locked",
@@ -755,15 +761,17 @@ export class SessionPoolRouter {
   ): ExecuteResult | null {
     if (sentinel.requestQueue.length >= this.config.requestQueueDepth) {
       const emitter = safeEmitter();
-      process.nextTick(() =>
-        emitter.emit(
-          "error",
-          Object.assign(
-            new Error("Too Many Requests — per-session queue full"),
-            { statusCode: 429, retryAfter: 5 }
-          )
-        )
-      );
+      process.nextTick(() => {
+        if (emitter.listenerCount("error") > 0) {
+          emitter.emit(
+            "error",
+            Object.assign(
+              new Error("Too Many Requests — per-session queue full"),
+              { statusCode: 429, retryAfter: 5 }
+            )
+          );
+        }
+      });
       return {
         emitter,
         routeType: "locked",
@@ -799,23 +807,27 @@ export class SessionPoolRouter {
     message: string
   ): void {
     for (const pending of sentinel.requestQueue) {
-      pending.emitter.emit(
-        "error",
-        Object.assign(new Error(message), { statusCode, retryAfter: 3 })
-      );
+      if (pending.emitter.listenerCount("error") > 0) {
+        pending.emitter.emit(
+          "error",
+          Object.assign(new Error(message), { statusCode, retryAfter: 3 })
+        );
+      }
     }
     sentinel.requestQueue = [];
   }
 
   private rejectProcessQueue(proc: PooledProcess): void {
     for (const pending of proc.requestQueue) {
-      pending.emitter.emit(
-        "error",
-        Object.assign(new Error("Process unavailable"), {
-          statusCode: 503,
-          retryAfter: 3,
-        })
-      );
+      if (pending.emitter.listenerCount("error") > 0) {
+        pending.emitter.emit(
+          "error",
+          Object.assign(new Error("Process unavailable"), {
+            statusCode: 503,
+            retryAfter: 3,
+          })
+        );
+      }
     }
     proc.requestQueue = [];
   }
@@ -920,7 +932,7 @@ export class SessionPoolRouter {
 
     this.clearRequestTimeout(pooled);
 
-    if (pooled.currentEmitter) {
+    if (pooled.currentEmitter && pooled.currentEmitter.listenerCount("error") > 0) {
       try {
         pooled.currentEmitter.emit(
           "error",
@@ -1133,7 +1145,7 @@ export class SessionPoolRouter {
     const kills: Promise<void>[] = [];
     for (const pooled of this.allProcesses.values()) {
       this.clearRequestTimeout(pooled);
-      if (pooled.currentEmitter) {
+      if (pooled.currentEmitter && pooled.currentEmitter.listenerCount("error") > 0) {
         pooled.currentEmitter.emit(
           "error",
           new Error("Server shutting down")

From 09f6179a47796b07fd304bac8df2059f4ce1309e Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Thu, 9 Apr 2026 09:00:47 -0400
Subject: [PATCH 18/27] fix: safeListenerCount() + targeted listener removal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

router.ts:
- Add safeListenerCount() helper — wraps listenerCount() in try-catch,
  returns 0 if emitter is corrupted (prevents [FATAL] crash)
- Update timeout handler and process error handler to use safeListenerCount()
- Add else-branch logging when listeners are absent (observability)

routes.ts:
- Replace removeAllListeners() with targeted removeListener() calls
- Convert inline emitter.on() to named function refs (onTextBlockStart,
  onContentDelta, onAssistant, onResult, onError)
- Client disconnect now removes only request-specific listeners,
  preserving safeEmitter()'s permanent error listener

Fixes: pool crash on corrupted emitter + removeAllListeners stripping
permanent error guard
---
 src/server/routes.ts     | 45 +++++++++++++++++++++++++---------------
 src/subprocess/router.ts | 43 +++++++++++++++++++++++++++-----------
 2 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index d2e2042..3fa7323 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -172,15 +172,7 @@ async function handlePooledStreaming(
     let isComplete = false;
     let hasEmittedText = false;
 
-    // Client disconnect: detach emitter, let process finish, return to locked-idle
-    res.on("close", () => {
-      if (!isComplete) {
-        emitter.removeAllListeners();
-      }
-      resolve();
-    });
-
-    emitter.on("text_block_start", () => {
+    const onTextBlockStart = () => {
       if (hasEmittedText && !res.writableEnded) {
         const sepChunk = {
           id: `chatcmpl-${requestId}`,
@@ -193,9 +185,9 @@ async function handlePooledStreaming(
         };
         res.write(`data: ${JSON.stringify(sepChunk)}\n\n`);
       }
-    });
+    };
 
-    emitter.on("content_delta", (event: ClaudeCliStreamEvent) => {
+    const onContentDelta = (event: ClaudeCliStreamEvent) => {
       const delta = event.event.delta;
       const text = (delta?.type === "text_delta" && delta.text) || "";
       if (text && !res.writableEnded) {
@@ -219,13 +211,13 @@ async function handlePooledStreaming(
         isFirst = false;
         hasEmittedText = true;
       }
-    });
+    };
 
-    emitter.on("assistant", (message: ClaudeCliAssistant) => {
+    const onAssistant = (message: ClaudeCliAssistant) => {
       lastModel = message.message.model;
-    });
+    };
 
-    emitter.on("result", (result: ClaudeCliResult) => {
+    const onResult = (result: ClaudeCliResult) => {
       isComplete = true;
       const latencyMs = Date.now() - startTime;
       console.log(
@@ -260,9 +252,9 @@ async function handlePooledStreaming(
         res.end();
       }
       resolve();
-    });
+    };
 
-    emitter.on("error", (error: Error) => {
+    const onError = (error: Error) => {
       isComplete = true;
       const latencyMs = Date.now() - startTime;
       const errWithStatus = error as Error & {
@@ -308,7 +300,26 @@ async function handlePooledStreaming(
         res.end();
       }
       resolve();
+    };
+
+    // Client disconnect: remove only request-specific listeners,
+    // preserving safeEmitter()'s permanent error listener
+    res.on("close", () => {
+      if (!isComplete) {
+        emitter.removeListener("text_block_start", onTextBlockStart);
+        emitter.removeListener("content_delta", onContentDelta);
+        emitter.removeListener("assistant", onAssistant);
+        emitter.removeListener("result", onResult);
+        emitter.removeListener("error", onError);
+      }
+      resolve();
     });
+
+    emitter.on("text_block_start", onTextBlockStart);
+    emitter.on("content_delta", onContentDelta);
+    emitter.on("assistant", onAssistant);
+    emitter.on("result", onResult);
+    emitter.on("error", onError);
   });
 }
 
diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index fdb828d..09c6eb2 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -27,6 +27,17 @@ function safeEmitter(): EventEmitter {
   return emitter;
 }
 
+function safeListenerCount(emitter: EventEmitter, event: string): number {
+  try {
+    return emitter.listenerCount(event);
+  } catch (err) {
+    // Emitter is corrupted — treat as zero listeners
+    const errMsg = err instanceof Error ? err.message : String(err);
+    console.error(`[Router] Emitter corrupted in listenerCount("${event}"):`, errMsg);
+    return 0;
+  }
+}
+
 import type {
   ClaudeCliMessage,
   ClaudeCliStreamEvent,
@@ -462,11 +473,15 @@ export class SessionPoolRouter {
 
     child.on("error", (err) => {
       console.error(`[Router:${id}] Process error:`, err.message);
-      if (pooled.currentEmitter && pooled.currentEmitter.listenerCount("error") > 0) {
-        try {
-          pooled.currentEmitter.emit("error", err);
-        } catch (emitErr) {
-          console.error(`[Router] Failed to emit process error:`, (emitErr as Error).message);
+      if (pooled.currentEmitter) {
+        if (safeListenerCount(pooled.currentEmitter, "error") > 0) {
+          try {
+            pooled.currentEmitter.emit("error", err);
+          } catch (emitErr) {
+            console.error(`[Router] Failed to emit process error:`, (emitErr as Error).message);
+          }
+        } else {
+          console.error(`[Router:${id}] Suppressed process error (no listeners):`, err.message);
         }
         pooled.currentEmitter = null;
       }
@@ -667,13 +682,17 @@ export class SessionPoolRouter {
       );
       this.requestTimeouts++;
 
-      if (pooled.currentEmitter && pooled.currentEmitter.listenerCount("error") > 0) {
-        pooled.currentEmitter.emit(
-          "error",
-          new Error(
-            `Request timed out after ${this.config.requestTimeoutMs}ms`
-          )
-        );
+      if (pooled.currentEmitter) {
+        if (safeListenerCount(pooled.currentEmitter, "error") > 0) {
+          pooled.currentEmitter.emit(
+            "error",
+            new Error(
+              `Request timed out after ${this.config.requestTimeoutMs}ms`
+            )
+          );
+        } else {
+          console.error(`[Router:${pooled.id}] Suppressed error (no listeners): Request timed out after ${this.config.requestTimeoutMs}ms`);
+        }
         pooled.currentEmitter = null;
       }
 

From 6e587f3aa21311162f9eb36c756c7e89f8987b50 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Thu, 9 Apr 2026 09:51:05 -0400
Subject: [PATCH 19/27] feat: add request_received + request_routed logging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Logs arrival time, sessionKey, model, messageCount, contentLength
at the top of handleChatCompletions — before any routing logic.
Also logs route decision (routeType, pid, queueDepth, elapsedMs).

Purpose: diagnose dropped requests where gateway times out but
proxy has no record of receiving the request.
---
 src/server/routes.ts | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index 3fa7323..c56c60d 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -51,6 +51,17 @@ export async function handleChatCompletions(
   const body = req.body as OpenAIChatRequest;
   const stream = body.stream === true;
   const startTime = Date.now();
+  const earlySessionKey = (req.headers["x-openclaw-session-key"] as string | undefined) || (body as any).sessionId;
+  console.log(JSON.stringify({
+    ts: new Date().toISOString(),
+    event: "request_received",
+    requestId,
+    sessionKey: earlySessionKey || "(none)",
+    model: body.model || "(none)",
+    stream,
+    messageCount: body.messages?.length ?? 0,
+    contentLength: req.headers["content-length"] || "(none)",
+  }));
 
   try {
     // Validate request
@@ -84,6 +95,17 @@ export async function handleChatCompletions(
       if (result) {
         // Pooled route
         const { emitter, routeType, pid, queueDepth } = result;
+        console.log(JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "request_routed",
+          requestId,
+          sessionKey,
+          model: cliInput.model,
+          routeType,
+          pid,
+          queueDepth,
+          elapsedMs: Date.now() - startTime,
+        }));
 
         if (stream) {
           await handlePooledStreaming(

From faef78cf8834f71a376b1d2ea2cc01b02e17ef2a Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Thu, 9 Apr 2026 10:14:15 -0400
Subject: [PATCH 20/27] fix: release process on client disconnect
 (cancelRequest)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause: safeEmitter patches prevented crashes but left processes in
zombie 'busy' state when the gateway timed out and disconnected. The
process would stay locked to the session, still processing a response
nobody was consuming. Next request to the same session routed to the
zombie process → hung → cascading timeout.

Fix: When res.on('close') fires and the request isn't complete:
1. Remove request-specific listeners (existing)
2. Call router.cancelRequest(sessionKey) (NEW)
3. cancelRequest kills the stuck CLI process and respawns a fresh one

This preserves safeEmitter's crash prevention while ensuring processes
don't stay in zombie state after client disconnect.
---
 src/server/routes.ts     | 11 +++++++++--
 src/subprocess/router.ts | 27 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index c56c60d..b1fa7e9 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -324,8 +324,8 @@ async function handlePooledStreaming(
       resolve();
     };
 
-    // Client disconnect: remove only request-specific listeners,
-    // preserving safeEmitter()'s permanent error listener
+    // Client disconnect: remove request-specific listeners and release the
+    // CLI process so it doesn't stay "busy" with a response nobody consumes.
     res.on("close", () => {
       if (!isComplete) {
         emitter.removeListener("text_block_start", onTextBlockStart);
@@ -333,6 +333,13 @@ async function handlePooledStreaming(
         emitter.removeListener("assistant", onAssistant);
         emitter.removeListener("result", onResult);
         emitter.removeListener("error", onError);
+
+        // Tell the router to kill+respawn the process — it's mid-response
+        // with buffered output and no consumer.
+        const router = getPoolRouter();
+        if (router && sessionKey) {
+          router.cancelRequest(sessionKey);
+        }
       }
       resolve();
     });
diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 09c6eb2..34cffc3 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -596,6 +596,33 @@ export class SessionPoolRouter {
     this.startRequestTimeout(pooled);
   }
 
+  /**
+   * Called by routes.ts when the gateway client disconnects before the CLI
+   * finishes responding. Releases the process so it can serve new requests
+   * instead of staying "busy" with a response nobody is consuming.
+   */
+  public cancelRequest(sessionKey: string): void {
+    const proc = this.lockedSessions.get(sessionKey);
+    if (!proc || isPendingSentinel(proc)) return;
+    if (proc.state !== "busy") return;
+
+    console.log(JSON.stringify({
+      ts: new Date().toISOString(),
+      event: "request_cancelled",
+      processId: proc.id,
+      pid: proc.process.pid,
+      sessionKey,
+      reason: "client_disconnect",
+    }));
+
+    // Kill and respawn — the CLI process may be mid-response with buffered
+    // output that we can't cleanly drain. Safest to start fresh.
+    this.clearRequestTimeout(proc);
+    proc.currentEmitter = null;
+    this.rejectProcessQueue(proc);
+    this.killAndRespawn(proc);
+  }
+
   private releaseProcess(pooled: PooledProcess): void {
     this.clearRequestTimeout(pooled);
     pooled.currentEmitter = null;

From 59a71885026402d2c64c1e64beee6b9ede53cb6e Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Thu, 9 Apr 2026 10:28:13 -0400
Subject: [PATCH 21/27] fix: SSE keep-alive to prevent gateway timeout on slow
 first-token

Send ': keep-alive' SSE comments every 15s while waiting for the CLI
to produce the first real token. SSE comments (lines starting with ':')
are ignored by OpenAI SDK parsers but reset the HTTP connection idle
timer, preventing the gateway's 60s timeout from firing during opus
thinking time on large prompts.

Timer is cleared on: first content, result, error, or client disconnect.

Root cause: opus on 100KB+ prompts can take >60s to produce first token.
The gateway's OpenAI SDK times out at 60s idle, triggering cascade to
cyberdyne/deepseek even though the proxy and CLI are working correctly.
---
 src/server/routes.ts | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index b1fa7e9..333f1ec 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -194,6 +194,18 @@ async function handlePooledStreaming(
     let isComplete = false;
     let hasEmittedText = false;
 
+    // Keep-alive: send SSE comments every 15s to prevent gateway SDK timeout.
+    // SSE lines starting with ":" are ignored by parsers but reset the
+    // connection idle timer. Stops once the first real content arrives.
+    const KEEP_ALIVE_INTERVAL_MS = 15_000;
+    const keepAliveTimer = setInterval(() => {
+      if (!hasEmittedText && !isComplete && !res.writableEnded) {
+        res.write(": keep-alive\n\n");
+      } else {
+        clearInterval(keepAliveTimer);
+      }
+    }, KEEP_ALIVE_INTERVAL_MS);
+
     const onTextBlockStart = () => {
       if (hasEmittedText && !res.writableEnded) {
         const sepChunk = {
@@ -241,6 +253,7 @@ async function handlePooledStreaming(
 
     const onResult = (result: ClaudeCliResult) => {
       isComplete = true;
+      clearInterval(keepAliveTimer);
       const latencyMs = Date.now() - startTime;
       console.log(
         JSON.stringify({
@@ -278,6 +291,7 @@ async function handlePooledStreaming(
 
     const onError = (error: Error) => {
       isComplete = true;
+      clearInterval(keepAliveTimer);
       const latencyMs = Date.now() - startTime;
       const errWithStatus = error as Error & {
         statusCode?: number;
@@ -327,6 +341,7 @@ async function handlePooledStreaming(
     // Client disconnect: remove request-specific listeners and release the
     // CLI process so it doesn't stay "busy" with a response nobody consumes.
     res.on("close", () => {
+      clearInterval(keepAliveTimer);
       if (!isComplete) {
         emitter.removeListener("text_block_start", onTextBlockStart);
         emitter.removeListener("content_delta", onContentDelta);

From dfe9daa86db6ab0da633850f7cf823828ff7adcd Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Thu, 9 Apr 2026 11:12:53 -0400
Subject: [PATCH 22/27] revert: remove SSE keep-alive (broke gateway stream
 parsing)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SSE comment approach was wrong — OpenClaw's idle timeout tracks the
parsed token stream, not raw HTTP bytes, so comment lines don't reset it.
Worse, the ': keep-alive' SSE comments appear to have confused the
gateway's stream parser, causing responses to complete at the proxy but
not reach Discord.

The correct fix is agents.defaults.llm.idleTimeoutSeconds = 300 in
openclaw.json (already applied), which configures the idle timer directly.
---
 src/server/routes.ts | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index 333f1ec..102e8f2 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -194,17 +194,7 @@ async function handlePooledStreaming(
     let isComplete = false;
     let hasEmittedText = false;
 
-    // Keep-alive: send SSE comments every 15s to prevent gateway SDK timeout.
-    // SSE lines starting with ":" are ignored by parsers but reset the
-    // connection idle timer. Stops once the first real content arrives.
-    const KEEP_ALIVE_INTERVAL_MS = 15_000;
-    const keepAliveTimer = setInterval(() => {
-      if (!hasEmittedText && !isComplete && !res.writableEnded) {
-        res.write(": keep-alive\n\n");
-      } else {
-        clearInterval(keepAliveTimer);
-      }
-    }, KEEP_ALIVE_INTERVAL_MS);
+
 
     const onTextBlockStart = () => {
       if (hasEmittedText && !res.writableEnded) {
@@ -253,7 +243,6 @@ async function handlePooledStreaming(
 
     const onResult = (result: ClaudeCliResult) => {
       isComplete = true;
-      clearInterval(keepAliveTimer);
       const latencyMs = Date.now() - startTime;
       console.log(
         JSON.stringify({
@@ -291,7 +280,6 @@ async function handlePooledStreaming(
 
     const onError = (error: Error) => {
       isComplete = true;
-      clearInterval(keepAliveTimer);
       const latencyMs = Date.now() - startTime;
       const errWithStatus = error as Error & {
         statusCode?: number;
@@ -341,7 +329,6 @@ async function handlePooledStreaming(
     // Client disconnect: remove request-specific listeners and release the
     // CLI process so it doesn't stay "busy" with a response nobody consumes.
     res.on("close", () => {
-      clearInterval(keepAliveTimer);
       if (!isComplete) {
         emitter.removeListener("text_block_start", onTextBlockStart);
         emitter.removeListener("content_delta", onContentDelta);

From 3d27b7765c2eeb741339515298a154b7564438df Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Thu, 9 Apr 2026 18:38:54 -0400
Subject: [PATCH 23/27] fix: clear session lock in cancelRequest before kill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug: cancelRequest() killed the process but never called clearSessionLock().
The lockedSessions map kept pointing at the dead process. Every subsequent
request for the same session found the dead process, enqueued behind it
(queueDepth=1), waited 300s for idleTimeout, cancelled, and repeated
indefinitely — a permanent cascade loop.

Fix: call clearSessionLock(sessionKey, proc) before killAndRespawn so
the next request routes to a fresh warm process instead of queuing behind
a dead one.

Evidence: pid 341522 was spawned at 18:49, received 4 requests over 3.5h,
all cancelled at 300s, never logged a process_death, always showed
queueDepth=1 — the lock was never released.
---
 src/subprocess/router.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 34cffc3..d8f3a31 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -620,6 +620,9 @@ export class SessionPoolRouter {
     this.clearRequestTimeout(proc);
     proc.currentEmitter = null;
     this.rejectProcessQueue(proc);
+    // Clear the session lock BEFORE kill so the next request gets a fresh
+    // warm process instead of enqueuing behind the dead one.
+    this.clearSessionLock(sessionKey, proc);
     this.killAndRespawn(proc);
   }
 

From 02b622bf02a73b6fa07e058c0d0a49d5ca271f1f Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Mon, 13 Apr 2026 23:45:11 -0400
Subject: [PATCH 24/27] [Psession-pool-context-fix] fix: eliminate quadratic
 context growth in pooled processes (session-pool-context-fix.spec.md)

---
 src/adapter/openai-to-cli.ts | 20 +++++++++++++++++++-
 src/server/routes.ts         |  1 +
 src/subprocess/router.ts     | 36 +++++++++++++++++++++++-------------
 3 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/src/adapter/openai-to-cli.ts b/src/adapter/openai-to-cli.ts
index 5774e34..0605541 100644
--- a/src/adapter/openai-to-cli.ts
+++ b/src/adapter/openai-to-cli.ts
@@ -7,7 +7,8 @@ import type { OpenAIChatRequest, OpenAIContentBlock } from "../types/openai.js";
 export type ClaudeModel = "opus" | "sonnet" | "haiku";
 
 export interface CliInput {
-  prompt: string;
+  prompt: string;        // Full prompt (system + history + user) — for first turn
+  latestPrompt: string;  // Latest user message only — for subsequent turns
   model: ClaudeModel;
   sessionId?: string;
 }
@@ -132,12 +133,29 @@ export function messagesToPrompt(
   return parts.join("\n").trim();
 }
 
+/**
+ * Extract only the latest user message from the messages array.
+ * Used by pooled processes on subsequent turns (requestCount > 0)
+ * where the CLI already has system context and prior turns in memory.
+ */
+export function latestUserMessage(
+  messages: OpenAIChatRequest["messages"]
+): string {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role === "user") {
+      return extractText(messages[i].content);
+    }
+  }
+  return "";
+}
+
 /**
  * Convert OpenAI chat request to CLI input format
  */
 export function openaiToCli(request: OpenAIChatRequest): CliInput {
   return {
     prompt: messagesToPrompt(request.messages),
+    latestPrompt: latestUserMessage(request.messages),
     model: extractModel(request.model),
     sessionId: request.user, // Use OpenAI's user field for session mapping
   };
diff --git a/src/server/routes.ts b/src/server/routes.ts
index 102e8f2..a806115 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -88,6 +88,7 @@ export async function handleChatCompletions(
     if (sessionKey && poolRouter) {
       const result = poolRouter.execute(
         cliInput.prompt,
+        cliInput.latestPrompt,
         cliInput.model,
         sessionKey
       );
diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index d8f3a31..446646e 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -125,7 +125,8 @@ export interface PooledProcess {
 }
 
 export interface PendingRequest {
-  prompt: string;
+  fullPrompt: string;
+  latestPrompt: string;
   emitter: EventEmitter;
   resolve: () => void;
 }
@@ -234,6 +235,7 @@ export class SessionPoolRouter {
 
   execute(
     prompt: string,
+    latestPrompt: string,
     model: ClaudeModel,
     sessionKey: string
   ): ExecuteResult | null {
@@ -277,16 +279,16 @@ export class SessionPoolRouter {
 
     if (existing) {
       if (isPendingSentinel(existing)) {
-        return this.enqueueOnSentinel(existing, prompt, sessionKey);
+        return this.enqueueOnSentinel(existing, prompt, latestPrompt, sessionKey);
       }
 
       const proc = existing;
       if (proc.state === "idle") {
         this.routeHits.locked++;
         this.totalRequests++;
-        return this.routeToProcess(proc, prompt, "locked");
+        return this.routeToProcess(proc, prompt, latestPrompt, "locked");
       } else {
-        return this.enqueueOnProcess(proc, prompt, sessionKey);
+        return this.enqueueOnProcess(proc, prompt, latestPrompt, sessionKey);
       }
     }
 
@@ -304,7 +306,7 @@ export class SessionPoolRouter {
       this.lockedSessions.set(sessionKey, proc);
       this.routeHits.warm++;
       this.totalRequests++;
-      return this.routeToProcess(proc, prompt, "warm");
+      return this.routeToProcess(proc, prompt, latestPrompt, "warm");
     }
 
     // Warm pool empty — need cold spawn
@@ -335,7 +337,7 @@ export class SessionPoolRouter {
         this.lockProcess(proc, sessionKey, agentChannel);
         this.transferSentinelQueue(sentinel, proc);
         this.lockedSessions.set(sessionKey, proc);
-        this.assignToProcess(proc, prompt, emitter);
+        this.assignToProcess(proc, prompt, latestPrompt, emitter);
       })
       .catch((err) => {
         console.log(
@@ -565,10 +567,11 @@ export class SessionPoolRouter {
   private routeToProcess(
     proc: PooledProcess,
     prompt: string,
+    latestPrompt: string,
     routeType: "locked" | "warm" | "cold"
   ): ExecuteResult {
     const emitter = safeEmitter();
-    this.assignToProcess(proc, prompt, emitter);
+    this.assignToProcess(proc, prompt, latestPrompt, emitter);
     return {
       emitter,
       routeType,
@@ -579,10 +582,14 @@ export class SessionPoolRouter {
 
   private assignToProcess(
     pooled: PooledProcess,
-    prompt: string,
+    fullPrompt: string,
+    latestPrompt: string,
     emitter: EventEmitter
   ): void {
     pooled.state = "busy";
+    // Select prompt BEFORE incrementing requestCount.
+    // Guard: if latestPrompt is empty (no user message in array), fall back to fullPrompt.
+    const prompt = (pooled.requestCount === 0 || !latestPrompt) ? fullPrompt : latestPrompt;
     pooled.requestCount++;
     pooled.lastRequestAt = Date.now();
     pooled.currentEmitter = emitter;
@@ -689,7 +696,7 @@ export class SessionPoolRouter {
 
     const next = pooled.requestQueue.shift()!;
     this.totalRequests++;
-    this.assignToProcess(pooled, next.prompt, next.emitter);
+    this.assignToProcess(pooled, next.fullPrompt, next.latestPrompt, next.emitter);
     next.resolve();
   }
 
@@ -749,7 +756,8 @@ export class SessionPoolRouter {
 
   private enqueueOnProcess(
     proc: PooledProcess,
-    prompt: string,
+    fullPrompt: string,
+    latestPrompt: string,
     sessionKey: string
   ): ExecuteResult | null {
     if (proc.state === "recycling") {
@@ -788,7 +796,8 @@ export class SessionPoolRouter {
 
     const emitter = safeEmitter();
     const pending: PendingRequest = {
-      prompt,
+      fullPrompt,
+      latestPrompt,
       emitter,
       resolve: () => {},
     };
@@ -805,7 +814,8 @@ export class SessionPoolRouter {
 
   private enqueueOnSentinel(
     sentinel: PendingSentinel,
-    prompt: string,
+    fullPrompt: string,
+    latestPrompt: string,
     _sessionKey: string
   ): ExecuteResult | null {
     if (sentinel.requestQueue.length >= this.config.requestQueueDepth) {
@@ -830,7 +840,7 @@ export class SessionPoolRouter {
     }
 
     const emitter = safeEmitter();
-    sentinel.requestQueue.push({ prompt, emitter, resolve: () => {} });
+    sentinel.requestQueue.push({ fullPrompt, latestPrompt, emitter, resolve: () => {} });
 
     return {
       emitter,

From f288a00617aaa75da60ea722940de98b2864f633 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Mon, 13 Apr 2026 23:56:14 -0400
Subject: [PATCH 25/27] fix: remove dead listenerCount guards and unused
 PendingRequest.resolve

safeEmitter() guarantees a permanent error listener, making all
listenerCount("error") > 0 checks always-true dead code. Remove all 8
guards and emit unconditionally. Also remove PendingRequest.resolve
which was always a no-op.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/subprocess/router.ts | 132 ++++++++++++---------------------------
 1 file changed, 41 insertions(+), 91 deletions(-)

diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index 446646e..ef0a555 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -27,17 +27,6 @@ function safeEmitter(): EventEmitter {
   return emitter;
 }
 
-function safeListenerCount(emitter: EventEmitter, event: string): number {
-  try {
-    return emitter.listenerCount(event);
-  } catch (err) {
-    // Emitter is corrupted — treat as zero listeners
-    const errMsg = err instanceof Error ? err.message : String(err);
-    console.error(`[Router] Emitter corrupted in listenerCount("${event}"):`, errMsg);
-    return 0;
-  }
-}
-
 import type {
   ClaudeCliMessage,
   ClaudeCliStreamEvent,
@@ -128,7 +117,6 @@ export interface PendingRequest {
   fullPrompt: string;
   latestPrompt: string;
   emitter: EventEmitter;
-  resolve: () => void;
 }
 
 export interface PendingSentinel {
@@ -242,13 +230,7 @@ export class SessionPoolRouter {
     if (this.shuttingDown) {
       const emitter = safeEmitter();
       process.nextTick(() => {
-        if (emitter.listenerCount("error") > 0) {
-          try {
-            emitter.emit("error", new Error("Server is shutting down"));
-          } catch (err) {
-            console.error(`[Router] Suppressed shutdown error:`, (err as Error).message);
-          }
-        }
+        emitter.emit("error", new Error("Server is shutting down"));
       });
       return { emitter, routeType: "fallback", pid: null, queueDepth: 0 };
     }
@@ -351,13 +333,7 @@ export class SessionPoolRouter {
         );
         this.rejectSentinelQueue(sentinel, 503, "Cold spawn failed");
         this.clearSessionLock(sessionKey, null);
-        if (emitter.listenerCount("error") > 0) {
-          try {
-            emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
-          } catch (emitErr) {
-            console.error(`[Router] Failed to emit cold spawn error:`, (emitErr as Error).message);
-          }
-        }
+        emitter.emit("error", new Error(`Cold spawn failed: ${err}`));
       });
 
     return {
@@ -476,15 +452,7 @@ export class SessionPoolRouter {
     child.on("error", (err) => {
       console.error(`[Router:${id}] Process error:`, err.message);
       if (pooled.currentEmitter) {
-        if (safeListenerCount(pooled.currentEmitter, "error") > 0) {
-          try {
-            pooled.currentEmitter.emit("error", err);
-          } catch (emitErr) {
-            console.error(`[Router] Failed to emit process error:`, (emitErr as Error).message);
-          }
-        } else {
-          console.error(`[Router:${id}] Suppressed process error (no listeners):`, err.message);
-        }
+        pooled.currentEmitter.emit("error", err);
         pooled.currentEmitter = null;
       }
     });
@@ -697,7 +665,6 @@ export class SessionPoolRouter {
     const next = pooled.requestQueue.shift()!;
     this.totalRequests++;
     this.assignToProcess(pooled, next.fullPrompt, next.latestPrompt, next.emitter);
-    next.resolve();
   }
 
   // -------------------------------------------------------------------------
@@ -720,16 +687,12 @@ export class SessionPoolRouter {
       this.requestTimeouts++;
 
       if (pooled.currentEmitter) {
-        if (safeListenerCount(pooled.currentEmitter, "error") > 0) {
-          pooled.currentEmitter.emit(
-            "error",
-            new Error(
-              `Request timed out after ${this.config.requestTimeoutMs}ms`
-            )
-          );
-        } else {
-          console.error(`[Router:${pooled.id}] Suppressed error (no listeners): Request timed out after ${this.config.requestTimeoutMs}ms`);
-        }
+        pooled.currentEmitter.emit(
+          "error",
+          new Error(
+            `Request timed out after ${this.config.requestTimeoutMs}ms`
+          )
+        );
         pooled.currentEmitter = null;
       }
 
@@ -776,15 +739,13 @@ export class SessionPoolRouter {
     if (proc.requestQueue.length >= this.config.requestQueueDepth) {
       const emitter = safeEmitter();
       process.nextTick(() => {
-        if (emitter.listenerCount("error") > 0) {
-          emitter.emit(
-            "error",
-            Object.assign(
-              new Error("Too Many Requests — per-session queue full"),
-              { statusCode: 429, retryAfter: 5 }
-            )
-          );
-        }
+        emitter.emit(
+          "error",
+          Object.assign(
+            new Error("Too Many Requests — per-session queue full"),
+            { statusCode: 429, retryAfter: 5 }
+          )
+        );
       });
       return {
         emitter,
@@ -799,7 +760,6 @@ export class SessionPoolRouter {
       fullPrompt,
       latestPrompt,
       emitter,
-      resolve: () => {},
     };
     proc.requestQueue.push(pending);
     this.routeHits.locked++;
@@ -821,15 +781,13 @@ export class SessionPoolRouter {
     if (sentinel.requestQueue.length >= this.config.requestQueueDepth) {
       const emitter = safeEmitter();
       process.nextTick(() => {
-        if (emitter.listenerCount("error") > 0) {
-          emitter.emit(
-            "error",
-            Object.assign(
-              new Error("Too Many Requests — per-session queue full"),
-              { statusCode: 429, retryAfter: 5 }
-            )
-          );
-        }
+        emitter.emit(
+          "error",
+          Object.assign(
+            new Error("Too Many Requests — per-session queue full"),
+            { statusCode: 429, retryAfter: 5 }
+          )
+        );
       });
       return {
         emitter,
@@ -840,7 +798,7 @@ export class SessionPoolRouter {
     }
 
     const emitter = safeEmitter();
-    sentinel.requestQueue.push({ fullPrompt, latestPrompt, emitter, resolve: () => {} });
+    sentinel.requestQueue.push({ fullPrompt, latestPrompt, emitter });
 
     return {
       emitter,
@@ -866,27 +824,23 @@ export class SessionPoolRouter {
     message: string
   ): void {
     for (const pending of sentinel.requestQueue) {
-      if (pending.emitter.listenerCount("error") > 0) {
-        pending.emitter.emit(
-          "error",
-          Object.assign(new Error(message), { statusCode, retryAfter: 3 })
-        );
-      }
+      pending.emitter.emit(
+        "error",
+        Object.assign(new Error(message), { statusCode, retryAfter: 3 })
+      );
     }
     sentinel.requestQueue = [];
   }
 
   private rejectProcessQueue(proc: PooledProcess): void {
     for (const pending of proc.requestQueue) {
-      if (pending.emitter.listenerCount("error") > 0) {
-        pending.emitter.emit(
-          "error",
-          Object.assign(new Error("Process unavailable"), {
-            statusCode: 503,
-            retryAfter: 3,
-          })
-        );
-      }
+      pending.emitter.emit(
+        "error",
+        Object.assign(new Error("Process unavailable"), {
+          statusCode: 503,
+          retryAfter: 3,
+        })
+      );
     }
     proc.requestQueue = [];
   }
@@ -991,15 +945,11 @@ export class SessionPoolRouter {
 
     this.clearRequestTimeout(pooled);
 
-    if (pooled.currentEmitter && pooled.currentEmitter.listenerCount("error") > 0) {
-      try {
-        pooled.currentEmitter.emit(
-          "error",
-          new Error(`Pool process ${pooled.id} died with code ${code}`)
-        );
-      } catch (emitErr) {
-        console.error(`[Router] Failed to emit process death error:`, (emitErr as Error).message);
-      }
+    if (pooled.currentEmitter) {
+      pooled.currentEmitter.emit(
+        "error",
+        new Error(`Pool process ${pooled.id} died with code ${code}`)
+      );
       pooled.currentEmitter = null;
     }
 
@@ -1204,7 +1154,7 @@ export class SessionPoolRouter {
     const kills: Promise<void>[] = [];
     for (const pooled of this.allProcesses.values()) {
       this.clearRequestTimeout(pooled);
-      if (pooled.currentEmitter && pooled.currentEmitter.listenerCount("error") > 0) {
+      if (pooled.currentEmitter) {
         pooled.currentEmitter.emit(
           "error",
           new Error("Server shutting down")

From 3b528a7eb0adb6f70995689f238e176ab052cb72 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Tue, 14 Apr 2026 00:06:27 -0400
Subject: [PATCH 26/27] =?UTF-8?q?fix:=20address=20Indent=20CODE=5FQUALITY?=
 =?UTF-8?q?=20=E2=80=94=20listener=20cleanup=20and=20extractText=20guard?=
 =?UTF-8?q?=20(session-pool-context-fix.spec.md)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- onError() now removes all event listeners before resolving (was leaving
  text_block_start/content_delta/assistant/result attached after error)
- extractText() filters blocks where text is null/undefined to prevent
  silent empty returns on malformed content arrays

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/adapter/openai-to-cli.ts | 6 +++++-
 src/server/routes.ts         | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/adapter/openai-to-cli.ts b/src/adapter/openai-to-cli.ts
index 0605541..7428158 100644
--- a/src/adapter/openai-to-cli.ts
+++ b/src/adapter/openai-to-cli.ts
@@ -62,7 +62,11 @@ function extractText(content: string | OpenAIContentBlock[]): string {
   }
   if (Array.isArray(content)) {
     return content
-      .filter((block) => block.type === "text" || block.type === "input_text")
+      .filter(
+        (block) =>
+          (block.type === "text" || block.type === "input_text") &&
+          block.text != null
+      )
       .map((block) => block.text)
       .join("\n");
   }
diff --git a/src/server/routes.ts b/src/server/routes.ts
index a806115..04f6662 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -281,6 +281,10 @@ async function handlePooledStreaming(
 
     const onError = (error: Error) => {
       isComplete = true;
+      emitter.removeListener("text_block_start", onTextBlockStart);
+      emitter.removeListener("content_delta", onContentDelta);
+      emitter.removeListener("assistant", onAssistant);
+      emitter.removeListener("result", onResult);
       const latencyMs = Date.now() - startTime;
       const errWithStatus = error as Error & {
         statusCode?: number;

From 96a8eb298df7830a1fd8fcf652ae4304e5ee2765 Mon Sep 17 00:00:00 2001
From: Scope <scope@athenscooks.com>
Date: Tue, 14 Apr 2026 18:23:20 -0400
Subject: [PATCH 27/27] [P0] Detect gateway session resets via message count
 drop (router)

Track lastMessageCount on PooledProcess; recycle stale CLI process when
incoming messageCount drops below stored value, indicating a gateway reset
(/new, idle timeout, or compaction). Threads messageCount through execute(),
routeToProcess(), assignToProcess(), enqueueOnProcess(), enqueueOnSentinel(),
drainNextRequest(), and PendingRequest. Routes layer passes messages.length.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/server/routes.ts     |  3 +-
 src/subprocess/router.ts | 64 +++++++++++++++++++++++++++++++---------
 2 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/src/server/routes.ts b/src/server/routes.ts
index 04f6662..8e6a6d3 100644
--- a/src/server/routes.ts
+++ b/src/server/routes.ts
@@ -90,7 +90,8 @@ export async function handleChatCompletions(
         cliInput.prompt,
         cliInput.latestPrompt,
         cliInput.model,
-        sessionKey
+        sessionKey,
+        body.messages.length
       );
 
       if (result) {
diff --git a/src/subprocess/router.ts b/src/subprocess/router.ts
index ef0a555..9387127 100644
--- a/src/subprocess/router.ts
+++ b/src/subprocess/router.ts
@@ -104,6 +104,7 @@ export interface PooledProcess {
   lastRequestAt: number;
   spawnedAt: number;
   requestCount: number;
+  lastMessageCount: number;
   state: "idle" | "busy" | "recycling";
   requestQueue: PendingRequest[];
   buffer: string;
@@ -116,6 +117,7 @@ export interface PooledProcess {
 export interface PendingRequest {
   fullPrompt: string;
   latestPrompt: string;
+  messageCount: number;
   emitter: EventEmitter;
 }
 
@@ -225,7 +227,8 @@ export class SessionPoolRouter {
     prompt: string,
     latestPrompt: string,
     model: ClaudeModel,
-    sessionKey: string
+    sessionKey: string,
+    messageCount: number = 0
   ): ExecuteResult | null {
     if (this.shuttingDown) {
       const emitter = safeEmitter();
@@ -261,16 +264,42 @@ export class SessionPoolRouter {
 
     if (existing) {
       if (isPendingSentinel(existing)) {
-        return this.enqueueOnSentinel(existing, prompt, latestPrompt, sessionKey);
+        return this.enqueueOnSentinel(existing, prompt, latestPrompt, sessionKey, messageCount);
       }
 
       const proc = existing;
-      if (proc.state === "idle") {
+
+      // Detect gateway session reset: message count dropped below stored value.
+      // Normal flow is monotonically increasing; any drop means the gateway's
+      // context diverged (reset via /new, idle timeout, or compaction).
+      if (messageCount > 0 && proc.lastMessageCount > 0 && messageCount < proc.lastMessageCount) {
+        console.log(JSON.stringify({
+          ts: new Date().toISOString(),
+          event: "session_reset_detected",
+          sessionKey,
+          previousMessageCount: proc.lastMessageCount,
+          incomingMessageCount: messageCount,
+          processId: proc.id,
+          pid: proc.process.pid,
+          requestCount: proc.requestCount,
+        }));
+        if (proc.state === "busy") {
+          // Can't kill mid-request — remove lock now and let releaseProcess kill after.
+          this.lockedSessions.delete(sessionKey);
+          proc.lockedTo = null;
+          proc.orphaned = true;
+        } else {
+          this.clearSessionLock(sessionKey, proc);
+          this.killAndRespawn(proc);
+          this.processRecycles++;
+        }
+        // Fall through to warm/cold claim below
+      } else if (proc.state === "idle") {
         this.routeHits.locked++;
         this.totalRequests++;
-        return this.routeToProcess(proc, prompt, latestPrompt, "locked");
+        return this.routeToProcess(proc, prompt, latestPrompt, "locked", messageCount);
       } else {
-        return this.enqueueOnProcess(proc, prompt, latestPrompt, sessionKey);
+        return this.enqueueOnProcess(proc, prompt, latestPrompt, sessionKey, messageCount);
       }
     }
 
@@ -288,7 +317,7 @@ export class SessionPoolRouter {
       this.lockedSessions.set(sessionKey, proc);
       this.routeHits.warm++;
       this.totalRequests++;
-      return this.routeToProcess(proc, prompt, latestPrompt, "warm");
+      return this.routeToProcess(proc, prompt, latestPrompt, "warm", messageCount);
     }
 
     // Warm pool empty — need cold spawn
@@ -319,7 +348,7 @@ export class SessionPoolRouter {
         this.lockProcess(proc, sessionKey, agentChannel);
         this.transferSentinelQueue(sentinel, proc);
         this.lockedSessions.set(sessionKey, proc);
-        this.assignToProcess(proc, prompt, latestPrompt, emitter);
+        this.assignToProcess(proc, prompt, latestPrompt, emitter, messageCount);
       })
       .catch((err) => {
         console.log(
@@ -409,6 +438,7 @@ export class SessionPoolRouter {
       lastRequestAt: 0,
       spawnedAt: Date.now(),
       requestCount: 0,
+      lastMessageCount: 0,
       state: "idle",
       requestQueue: [],
       buffer: "",
@@ -536,10 +566,11 @@ export class SessionPoolRouter {
     proc: PooledProcess,
     prompt: string,
     latestPrompt: string,
-    routeType: "locked" | "warm" | "cold"
+    routeType: "locked" | "warm" | "cold",
+    messageCount: number = 0
   ): ExecuteResult {
     const emitter = safeEmitter();
-    this.assignToProcess(proc, prompt, latestPrompt, emitter);
+    this.assignToProcess(proc, prompt, latestPrompt, emitter, messageCount);
     return {
       emitter,
       routeType,
@@ -552,13 +583,15 @@ export class SessionPoolRouter {
     pooled: PooledProcess,
     fullPrompt: string,
     latestPrompt: string,
-    emitter: EventEmitter
+    emitter: EventEmitter,
+    messageCount: number = 0
   ): void {
     pooled.state = "busy";
     // Select prompt BEFORE incrementing requestCount.
     // Guard: if latestPrompt is empty (no user message in array), fall back to fullPrompt.
     const prompt = (pooled.requestCount === 0 || !latestPrompt) ? fullPrompt : latestPrompt;
     pooled.requestCount++;
+    pooled.lastMessageCount = messageCount;
     pooled.lastRequestAt = Date.now();
     pooled.currentEmitter = emitter;
 
@@ -664,7 +697,7 @@ export class SessionPoolRouter {
 
     const next = pooled.requestQueue.shift()!;
     this.totalRequests++;
-    this.assignToProcess(pooled, next.fullPrompt, next.latestPrompt, next.emitter);
+    this.assignToProcess(pooled, next.fullPrompt, next.latestPrompt, next.emitter, next.messageCount);
   }
 
   // -------------------------------------------------------------------------
@@ -721,7 +754,8 @@ export class SessionPoolRouter {
     proc: PooledProcess,
     fullPrompt: string,
     latestPrompt: string,
-    sessionKey: string
+    sessionKey: string,
+    messageCount: number = 0
   ): ExecuteResult | null {
     if (proc.state === "recycling") {
       this.routeHits.fallback++;
@@ -759,6 +793,7 @@ export class SessionPoolRouter {
     const pending: PendingRequest = {
       fullPrompt,
       latestPrompt,
+      messageCount,
       emitter,
     };
     proc.requestQueue.push(pending);
@@ -776,7 +811,8 @@ export class SessionPoolRouter {
     sentinel: PendingSentinel,
     fullPrompt: string,
     latestPrompt: string,
-    _sessionKey: string
+    _sessionKey: string,
+    messageCount: number = 0
   ): ExecuteResult | null {
     if (sentinel.requestQueue.length >= this.config.requestQueueDepth) {
       const emitter = safeEmitter();
@@ -798,7 +834,7 @@ export class SessionPoolRouter {
     }
 
     const emitter = safeEmitter();
-    sentinel.requestQueue.push({ fullPrompt, latestPrompt, emitter });
+    sentinel.requestQueue.push({ fullPrompt, latestPrompt, messageCount, emitter });
 
     return {
       emitter,