From 4629e80cab2295a9c014f20b67b6b4aa88988c89 Mon Sep 17 00:00:00 2001
From: Mike Rapadas <mike@mrap.me>
Date: Wed, 29 Apr 2026 18:48:09 -0400
Subject: [PATCH] daemon: batched dequeue per tick + SIGHUP config hot-reload

- Add spawns_per_tick config field (default 4) to cap workers spawned per tick
- Rewrite daemon dequeue loop: drain up to spawns_per_tick per tick with 50-150ms jitter
- SIGHUP handler: live-reload max_workers/spawns_per_tick/claude_bin without restart
- Add `boi daemon reload` subcommand (sends SIGHUP to daemon.pid)
- Add try_load()/try_load_from() for fallible config parsing (bad config = no-op)
- docs/daemon.md: tick cadence, spawns_per_tick semantics, hot-reload behavior
- 14 new tests: daemon_batch (8) + daemon_hotreload (6), all passing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md             |  48 +++++--
 SKILL.md              |   8 +-
 docs/daemon.md        |  78 +++++++++++
 src/cli/config_cmd.rs |   6 +-
 src/cli/daemon.rs     | 297 +++++++++++++++++++++++++++++++++++++++++-
 src/config.rs         | 126 ++++++++++++++++++
 6 files changed, 539 insertions(+), 24 deletions(-)
 create mode 100644 docs/daemon.md

diff --git a/README.md b/README.md
index 0ed76b2..67b1f34 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,9 @@ prompt_template = "templates/my-prompt.md"  # required for claude/default phases
 model = "claude-sonnet-4-6"                  # default: claude-sonnet-4-6
 effort = "medium"                            # low | medium | high
 timeout = 300                                # seconds; must be > 0
-runtime = "claude"                           # "claude" (default) | "deterministic"
+runtime = "claude"                           # "claude" (default) | "openrouter" | "deterministic"
+api_key_env = "OPENROUTER_API_KEY"           # openrouter only — env var holding the API key (default: OPENROUTER_API_KEY)
+bare = false                                 # true → append --bare (skips session/MCP/skill loading; ~96% cold-start reduction)
 
 # Completion routing
 [completion]
@@ -315,18 +317,20 @@ Exit 0 = passed. Any non-zero exit = failed. Stdout/stderr are captured as the f
 
 ## Runtime Configuration
 
-BOI is runtime-agnostic. The default runtime is `claude` (Claude Code CLI). `codex` (Codex CLI) is also supported.
+BOI is runtime-agnostic. The default runtime is `claude` (Claude Code CLI). `codex` (Codex CLI) and `openrouter` (direct HTTP to OpenRouter API) are also supported.
 
 ### Global Default
 
-Set in `~/.boi/config.json`:
+Set in `~/.boi/config.yaml`:
 
-```json
-{
-  "runtime": { "default": "claude" }
-}
+```yaml
+runtime:
+  default: claude
+brain: ~/mrap-hex   # optional — path to brain dir; must contain CLAUDE.md
 ```
 
+`brain` sets the default brain directory for all specs. Workers read `{brain}/CLAUDE.md` as system context before each task. BOI errors early if `brain` is set but the path or `CLAUDE.md` is missing.
+
 ### Per-Spec Override
 
 Add a `runtime:` field to any spec:
@@ -339,13 +343,18 @@ Spec-level override takes precedence over the global default.
 
 ### Model Mappings
 
-Phase config accepts either full model IDs or aliases (`opus`, `sonnet`, `haiku`). The runtime resolves them:
+Phase config accepts either full model IDs or aliases. The runtime resolves them:
 
-| Alias | Claude | Codex |
-|-------|--------|-------|
-| `opus` | claude-opus-4-6 | o3 |
-| `sonnet` | claude-sonnet-4-6 | o4-mini |
-| `haiku` | claude-haiku-4-5-20251001 | o4-mini |
+| Alias | Claude | Codex | OpenRouter |
+|-------|--------|-------|------------|
+| `opus` | claude-opus-4-6 | o3 | — |
+| `sonnet` | claude-sonnet-4-6 | o4-mini | — |
+| `haiku` | claude-haiku-4-5-20251001 | o4-mini | anthropic/claude-haiku-4-5 |
+| `gemini-flash` | — | — | google/gemini-2.0-flash-001 |
+| `grok` | — | — | x-ai/grok-beta |
+| `qwen-coder` | — | — | qwen/qwen-2.5-coder-32b-instruct |
+
+OpenRouter phases require `OPENROUTER_API_KEY` in the environment and a `model` field in `[worker]`. Use `openrouter` runtime for text-only judgment phases (critic, plan-critique, spec-critique) to skip Claude cold-start and reduce cost.
 
 ### CLI Check
 
@@ -358,6 +367,7 @@ boi dispatch <file.yaml> [options]        Submit a spec to the queue
 boi status [--watch] [--json]             Show queue and worker status
 boi log <queue-id> [--full] [-f|--follow] Tail worker output for a spec
 boi cancel <queue-id>                     Cancel a running or queued spec
+boi daemon reload                         Send SIGHUP to reload max_workers/spawns_per_tick/claude_bin
 boi stop                                  Stop daemon and all workers
 boi install [--workers N]                 One-time setup (run outside Claude Code)
 boi resume <queue-id> | --all            Resume failed or canceled specs
@@ -372,6 +382,8 @@ boi dep add|remove|set|clear|show|viz|check
 boi project create|list|status|context|delete
 boi bench --pipeline name:path [--pipeline ...] --spec FILE | --battery DIR [--runs N]  Benchmark N pipelines
 boi bench --phase <name> --spec FILE [--runs N]  Benchmark a single phase in isolation
+boi plan [spec.yaml ...] [--force-refresh]        Build DAG + LLM critique for in-flight and new specs
+boi dispatch-many <spec1.yaml> [spec2.yaml ...]   DAG-ordered multi-spec dispatch with LLM gate
 ```
 
 **`dispatch` options:**
@@ -385,6 +397,16 @@ boi bench --phase <name> --spec FILE [--runs N]  Benchmark a single phase in iso
 | `--after SA7F3,TB2E1` | Wait for listed specs to complete before starting |
 | `--project NAME` | Associate with a project (injects project context) |
 
+**`dispatch-many` options:**
+
+| Flag | Description |
+|------|-------------|
+| `--yes` | Auto-approve dispatch without interactive prompt |
+| `--force` | Override warn-level concerns (cannot override blocks) |
+| `--priority N` | Priority applied to all dispatched specs (default: 100) |
+| `--mode MODE` | Mode applied to all specs |
+| `--after SA7F3` | Additional upstream dep for all dispatched specs |
+
 ## Output Preservation
 
 BOI automatically preserves the work product of every completed spec so outputs are never lost when the worktree is cleaned up.
diff --git a/SKILL.md b/SKILL.md
index 60c6da5..cf7e7f0 100644
--- a/SKILL.md
+++ b/SKILL.md
@@ -66,6 +66,8 @@ boi critic status | run | enable | disable | checks
 boi spec <queue-id> [add|skip|next|block|edit|deps]
 boi dep add|remove|set|clear|show|viz|check
 boi project create|list|status|context|delete
+boi plan [spec.yaml ...] [--force-refresh]        Build DAG + LLM critique for in-flight and new specs
+boi dispatch-many <spec1.yaml> [spec2.yaml ...]   DAG-ordered multi-spec dispatch with LLM gate
 ```
 
 ## Spec Format
@@ -141,7 +143,9 @@ prompt_template = "path/to/prompt.md"   # required for claude phases
 model = "claude-sonnet-4-6"
 effort = "medium"                        # low | medium | high
 timeout = 300                            # seconds
-runtime = "claude"                       # "claude" (default) | "deterministic"
+runtime = "claude"                       # "claude" (default) | "openrouter" | "deterministic"
+api_key_env = "OPENROUTER_API_KEY"       # openrouter only — env var holding the API key (default: OPENROUTER_API_KEY)
+bare = false                             # true → --bare flag (skips session/MCP/skill loading; ~96% cold-start reduction)
 
 [completion]
 approve_signal = "## Approved"
@@ -293,7 +297,7 @@ Exit 0 = passed. Any non-zero = failed.
 ## Constraints
 
 - `boi install` runs **outside Claude Code** in a terminal.
-- Workers are headless, non-interactive CLI agent sessions. Default runtime: `claude -p`. Codex runtime: `codex exec`. Configured globally in `~/.boi/config.json` or per-spec via `**Runtime:** codex` header.
+- Workers are headless, non-interactive CLI agent sessions. Default runtime: `claude -p`. Codex runtime: `codex exec`. OpenRouter runtime: direct HTTP to `openrouter.ai/api/v1/chat/completions` (requires `OPENROUTER_API_KEY`; used for text-only judgment phases). Configured globally in `~/.boi/config.yaml` or per-spec via `**Runtime:** codex` header.
 - Daemon polls every 5 seconds. Status may lag slightly.
 - Default 3 workers, max 5. Set during install.
 - Workers get fresh context each iteration. No memory of previous iterations.
diff --git a/docs/daemon.md b/docs/daemon.md
new file mode 100644
index 0000000..5416c14
--- /dev/null
+++ b/docs/daemon.md
@@ -0,0 +1,78 @@
+# BOI Daemon
+
+## Overview
+
+The BOI daemon is a long-running process that monitors the queue and dispatches workers for pending specs. It is started with `boi daemon start` (background) or `boi daemon foreground` (attached to the terminal).
+
+## Tick Cadence
+
+The daemon polls every ~5 seconds (10 × 500 ms sleep increments). Each tick:
+
+1. Writes a heartbeat timestamp to `~/.boi/daemon.heartbeat`.
+2. Checks the SIGHUP reload flag and applies config changes if set.
+3. Reaps finished worker threads.
+4. Computes how many new workers to spawn this tick and drains the queue up to that cap.
+5. Sleeps 500 ms × 10 before the next tick (interruptible by SIGTERM).
+
+## Batched Dequeue (`spawns_per_tick`)
+
+Rather than spawning one worker per tick, the daemon drains up to `spawns_per_tick` eligible specs per tick (default 4). The actual number spawned is:
+
+```
+to_spawn = min(max_workers - current_workers, spawns_per_tick)
+```
+
+A 50–150 ms randomized jitter is inserted between successive spawns within a single tick to smooth cold-start bursts on the Anthropic API. Configure `spawns_per_tick` in `~/.boi/config.yaml`:
+
+```yaml
+spawns_per_tick: 4   # default; raise once cold-start behavior is validated
+```
+
+## SIGHUP Config Hot-Reload
+
+Sending SIGHUP to the daemon triggers a live config reload **without restarting** or interrupting in-flight workers.
+
+### What reloads
+
+| Setting | Reloaded? |
+|---------|-----------|
+| `max_workers` | Yes |
+| `spawns_per_tick` | Yes |
+| `claude_bin` | Yes |
+| `task_timeout_minutes` | No — startup snapshot |
+| `retry_count` | No — startup snapshot |
+| `cleanup_on_failure` | No — startup snapshot |
+| `paths.*` | No — startup snapshot |
+
+### Reload semantics
+
+- **Parse failure is a no-op.** If the config file is syntactically invalid, the daemon logs `[boi daemon] reload FAILED: ...; keeping current config` and retains the current values.
+- **In-flight workers are unaffected.** Workers receive a snapshot of `WorkerConfig` at spawn time; live config mutation never reaches them.
+- **No restart required.** The daemon process continues running; only the three live fields are updated.
+
+### Triggering a reload
+
+```bash
+# Recommended: set a value then reload in one step
+boi config set max_workers 10
+boi daemon reload
+
+# Or send SIGHUP directly
+kill -HUP $(cat ~/.boi/daemon.lock)
+```
+
+`boi daemon reload` reads the PID from `~/.boi/daemon.lock`, verifies the process is alive, and sends SIGHUP. The reload takes effect within the next tick (≤ 5 seconds).
+
+## Daemon Commands
+
+| Command | Description |
+|---------|-------------|
+| `boi daemon start` | Start daemon in the background |
+| `boi daemon stop` | Send SIGTERM; waits up to 10s, then SIGKILL |
+| `boi daemon restart` | Stop + start |
+| `boi daemon foreground` | Run attached to the terminal |
+| `boi daemon reload` | Send SIGHUP to reload `max_workers`, `spawns_per_tick`, `claude_bin` |
+
+## PID and Lock File
+
+The daemon uses an exclusive `flock` on `~/.boi/daemon.lock` (which also stores the PID) as its singleton guard. This is crash-safe: the lock auto-releases when the process exits, so stale PID files can never block a restart.
diff --git a/src/cli/config_cmd.rs b/src/cli/config_cmd.rs
index 5c9490f..f3c929e 100644
--- a/src/cli/config_cmd.rs
+++ b/src/cli/config_cmd.rs
@@ -4,6 +4,7 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config)
     match (key, value) {
         (None, _) => {
             println!("max_workers:          {}", cfg.max_workers());
+            println!("spawns_per_tick:      {}", cfg.spawns_per_tick());
             println!("task_timeout_minutes: {}", cfg.task_timeout_secs() / 60);
             println!("retry_count:          {}", cfg.retry_count());
             println!("db_path:              {}", cfg.db_path().display());
@@ -21,6 +22,7 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config)
         (Some(k), None) => {
             let val = match k {
                 "max_workers" => cfg.max_workers().to_string(),
+                "spawns_per_tick" => cfg.spawns_per_tick().to_string(),
                 "task_timeout_minutes" => (cfg.task_timeout_secs() / 60).to_string(),
                 "retry_count" => cfg.retry_count().to_string(),
                 "db_path" => cfg.db_path().display().to_string(),
@@ -36,9 +38,9 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config)
         (Some(k), Some(v)) => {
             // Validate key
             match k {
-                "max_workers" | "task_timeout_minutes" | "retry_count" => {}
+                "max_workers" | "spawns_per_tick" | "task_timeout_minutes" | "retry_count" => {}
                 _ => {
-                    eprintln!("unknown config key: {} (supported: max_workers, task_timeout_minutes, retry_count)", k);
+                    eprintln!("unknown config key: {} (supported: max_workers, spawns_per_tick, task_timeout_minutes, retry_count)", k);
                     std::process::exit(1);
                 }
             }
diff --git a/src/cli/daemon.rs b/src/cli/daemon.rs
index 99cc880..5e31fd1 100644
--- a/src/cli/daemon.rs
+++ b/src/cli/daemon.rs
@@ -199,8 +199,18 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi
         .ok();
     }
 
-    let wc = worker::WorkerConfig {
+    // SIGHUP hot-reload flag: set to true by signal_hook when SIGHUP arrives.
+    let reload_flag = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
+    if let Err(e) = signal_hook::flag::register(
+        signal_hook::consts::SIGHUP,
+        std::sync::Arc::clone(&reload_flag),
+    ) {
+        eprintln!("[boi daemon] WARNING: failed to install SIGHUP handler: {}", e);
+    }
+
+    let mut wc = worker::WorkerConfig {
         max_workers: cfg.max_workers(),
+        spawns_per_tick: cfg.spawns_per_tick(),
         task_timeout_secs: cfg.task_timeout_secs(),
         retry_count: cfg.retry_count(),
         cleanup_on_failure: cfg.cleanup_on_failure(),
@@ -244,6 +254,21 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi
             eprintln!("[boi daemon] ERROR: failed to write heartbeat: {}", e);
         }
 
+        // SIGHUP hot-reload: only max_workers, spawns_per_tick, claude_bin are live-updated.
+        // All other settings remain frozen at startup. In-flight workers keep their original config.
+        if reload_flag.swap(false, std::sync::atomic::Ordering::SeqCst) {
+            match config::try_load() {
+                Ok(new_cfg) => {
+                    apply_reload(&mut wc, &new_cfg);
+                    eprintln!(
+                        "[boi daemon] reloaded config: max_workers={}, spawns_per_tick={}, claude_bin={}",
+                        wc.max_workers, wc.spawns_per_tick, wc.claude_bin
+                    );
+                }
+                Err(e) => eprintln!("[boi daemon] reload FAILED: {}; keeping current config", e),
+            }
+        }
+
         {
             let mut workers = active.lock().unwrap_or_else(|e| {
                 eprintln!("[boi daemon] worker mutex poisoned, recovering: {}", e);
@@ -251,7 +276,9 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi
             });
             workers.retain(|h| !h.is_finished());
 
-            if workers.len() < wc.max_workers as usize {
+            let to_spawn = compute_to_spawn(workers.len(), wc.max_workers, wc.spawns_per_tick);
+
+            for slot in 0..to_spawn {
                 match queue::Queue::open(db_str) {
                     Ok(queue) => match queue.dequeue() {
                         Ok(Some(rec)) => {
@@ -268,7 +295,7 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi
                                             eprintln!("[boi daemon] ERROR: failed to mark spec {} as failed: {}", spec_id, e);
                                         }
                                     }
-                                    continue;
+                                    continue; // skip to next batch slot
                                 }
                             };
                             let qpath = db_str.to_string();
@@ -285,10 +312,16 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi
                                 .unwrap_or(timeout);
 
                             let tel = Telemetry::new(PathBuf::from(&qpath));
-                            eprintln!("[boi daemon] starting worker for {}", spec_id);
+                            eprintln!(
+                                "[boi daemon] starting worker for {} (batch slot {}/{})",
+                                spec_id,
+                                slot + 1,
+                                to_spawn
+                            );
                             let handle = std::thread::spawn(move || {
                                 let wc = worker::WorkerConfig {
                                     max_workers: 1,
+                                    spawns_per_tick: 1,
                                     task_timeout_secs: spec_timeout,
                                     retry_count: retries,
                                     cleanup_on_failure: cleanup_fail,
@@ -301,11 +334,27 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi
                                 }
                             });
                             workers.push(handle);
+
+                            // Micro-jitter between successive spawns to smooth cold-start burst
+                            if slot + 1 < to_spawn {
+                                let jitter_ns = std::time::SystemTime::now()
+                                    .duration_since(std::time::UNIX_EPOCH)
+                                    .map(|d| d.subsec_nanos() as u64)
+                                    .unwrap_or(0);
+                                let jitter_ms = 50 + (jitter_ns % 101);
+                                std::thread::sleep(std::time::Duration::from_millis(jitter_ms));
+                            }
+                        }
+                        Ok(None) => break, // queue drained
+                        Err(e) => {
+                            eprintln!("[boi daemon] dequeue error: {}", e);
+                            break;
                         }
-                        Ok(None) => {}
-                        Err(e) => eprintln!("[boi daemon] dequeue error: {}", e),
                     },
-                    Err(e) => eprintln!("[boi daemon] queue open error: {}", e),
+                    Err(e) => {
+                        eprintln!("[boi daemon] queue open error: {}", e);
+                        break;
+                    }
                 }
             }
         }
@@ -388,6 +437,240 @@ pub fn cmd_stop() {
     let _ = std::fs::remove_file(daemon_heartbeat_path()); // intentional: best-effort heartbeat cleanup
 }
 
+/// How many workers to spawn this tick: capped by capacity and per-tick limit.
+pub(crate) fn compute_to_spawn(workers_len: usize, max_workers: u32, spawns_per_tick: u32) -> u32 {
+    let cap_remaining = max_workers.saturating_sub(workers_len as u32);
+    cap_remaining.min(spawns_per_tick)
+}
+
+/// Hot-reload the three live-mutable fields from a freshly parsed config.
+/// All other WorkerConfig fields remain at their startup values.
+pub(crate) fn apply_reload(wc: &mut worker::WorkerConfig, new_cfg: &config::Config) {
+    wc.max_workers = new_cfg.max_workers();
+    wc.spawns_per_tick = new_cfg.spawns_per_tick();
+    wc.claude_bin = new_cfg.claude_bin();
+}
+
+/// Send SIGHUP to the running daemon so it picks up config changes.
+pub fn cmd_reload() {
+    let pid = match read_daemon_pid() {
+        Some(p) => p,
+        None => {
+            eprintln!("no daemon running (PID file not found)");
+            std::process::exit(1);
+        }
+    };
+
+    if !crate::fmt::is_pid_alive(pid) {
+        eprintln!("daemon process {} is not running", pid);
+        std::process::exit(1);
+    }
+
+    // SAFETY: `pid` was read from the daemon lock file and verified alive above.
+    // SIGHUP to a known-live PID is a standard POSIX config-reload signal.
+    unsafe { libc::kill(pid as i32, libc::SIGHUP) };
+    println!("sent SIGHUP to daemon (pid {}); config will reload within one tick", pid);
+}
+
+#[cfg(test)]
+mod daemon_batch {
+    use super::*;
+    use crate::{queue, spec, test_utils};
+
+    const SIMPLE_SPEC: &str = "title: \"Batch Test\"\ntasks:\n  - id: t-1\n    title: \"Step\"\n    status: PENDING\n    spec: \"Do it\"\n";
+
+    fn open_queue(label: &str) -> (queue::Queue, String) {
+        let db_file = test_utils::test_file(label, "db");
+        let _ = std::fs::remove_file(&db_file);
+        let db_path = db_file.to_str().unwrap().to_string();
+        let q = queue::Queue::open(&db_path).unwrap();
+        (q, db_path)
+    }
+
+    fn enqueue_n(q: &queue::Queue, n: usize) {
+        let boi_spec = spec::parse(SIMPLE_SPEC).unwrap();
+        for _ in 0..n {
+            q.enqueue(&boi_spec, None).unwrap();
+        }
+    }
+
+    fn drain_n(q: &queue::Queue, to_spawn: u32) -> usize {
+        let mut count = 0;
+        for _ in 0..to_spawn {
+            match q.dequeue() {
+                Ok(Some(_)) => count += 1,
+                Ok(None) => break,
+                Err(_) => break,
+            }
+        }
+        count
+    }
+
+    #[test]
+    fn test_compute_to_spawn_at_capacity() {
+        // workers_len == max_workers → 0 slots remaining
+        assert_eq!(compute_to_spawn(4, 4, 4), 0);
+    }
+
+    #[test]
+    fn test_compute_to_spawn_limited_by_spawns_per_tick() {
+        // cap_remaining=8 but spawns_per_tick=4 → 4
+        assert_eq!(compute_to_spawn(0, 8, 4), 4);
+    }
+
+    #[test]
+    fn test_compute_to_spawn_limited_by_cap_remaining() {
+        // cap_remaining=2, spawns_per_tick=4 → 2
+        assert_eq!(compute_to_spawn(6, 8, 4), 2);
+    }
+
+    #[test]
+    fn test_empty_queue_zero_spawns() {
+        let (q, _db) = open_queue("batch-empty");
+        let to_spawn = compute_to_spawn(0, 4, 4);
+        let spawned = drain_n(&q, to_spawn);
+        assert_eq!(spawned, 0);
+    }
+
+    #[test]
+    fn test_one_eligible_cap4_tick4_spawns_one() {
+        let (q, _db) = open_queue("batch-one");
+        enqueue_n(&q, 1);
+        let to_spawn = compute_to_spawn(0, 4, 4); // = 4
+        let spawned = drain_n(&q, to_spawn);
+        assert_eq!(spawned, 1, "only 1 item in queue, expect 1 spawn");
+    }
+
+    #[test]
+    fn test_six_eligible_cap4_tick4_spawns_four_then_two() {
+        let (q, _db) = open_queue("batch-six-cap4");
+        enqueue_n(&q, 6);
+        let to_spawn = compute_to_spawn(0, 4, 4); // = 4
+        let first_tick = drain_n(&q, to_spawn);
+        assert_eq!(first_tick, 4, "first tick: 4 spawned");
+
+        // Second tick: 2 remain
+        let to_spawn2 = compute_to_spawn(4, 8, 4); // simulate 4 workers running, max=8
+        let second_tick = drain_n(&q, to_spawn2);
+        assert_eq!(second_tick, 2, "second tick: remaining 2 spawned");
+    }
+
+    #[test]
+    fn test_six_eligible_cap8_tick4_spawns_four() {
+        let (q, _db) = open_queue("batch-six-cap8");
+        enqueue_n(&q, 6);
+        let to_spawn = compute_to_spawn(0, 8, 4); // = 4 (tick limit)
+        let spawned = drain_n(&q, to_spawn);
+        assert_eq!(spawned, 4);
+    }
+
+    #[test]
+    fn test_four_eligible_cap2_tick4_spawns_two() {
+        let (q, _db) = open_queue("batch-four-cap2");
+        enqueue_n(&q, 4);
+        let to_spawn = compute_to_spawn(6, 8, 4); // cap_remaining=2, tick=4 → 2
+        let spawned = drain_n(&q, to_spawn);
+        assert_eq!(spawned, 2);
+    }
+}
+
+#[cfg(test)]
+mod daemon_hotreload {
+    use super::*;
+    use crate::{config, test_utils, worker};
+
+    fn make_wc(max_workers: u32, spawns_per_tick: u32, claude_bin: &str) -> worker::WorkerConfig {
+        worker::WorkerConfig {
+            max_workers,
+            spawns_per_tick,
+            task_timeout_secs: 1800,
+            retry_count: 3,
+            cleanup_on_failure: false,
+            claude_bin: claude_bin.to_string(),
+        }
+    }
+
+    #[test]
+    fn test_apply_reload_updates_hot_fields() {
+        let mut wc = make_wc(4, 2, "claude");
+        let new_cfg = config::Config {
+            max_workers: Some(8),
+            spawns_per_tick: Some(6),
+            claude_bin: Some("/usr/bin/claude".to_string()),
+            ..Default::default()
+        };
+        apply_reload(&mut wc, &new_cfg);
+        assert_eq!(wc.max_workers, 8);
+        assert_eq!(wc.spawns_per_tick, 6);
+        assert_eq!(wc.claude_bin, "/usr/bin/claude");
+    }
+
+    #[test]
+    fn test_apply_reload_leaves_other_fields_unchanged() {
+        let mut wc = make_wc(4, 2, "claude");
+        wc.task_timeout_secs = 7200;
+        wc.retry_count = 5;
+        let new_cfg = config::Config {
+            max_workers: Some(8),
+            ..Default::default()
+        };
+        apply_reload(&mut wc, &new_cfg);
+        assert_eq!(wc.task_timeout_secs, 7200, "task_timeout_secs must not change on reload");
+        assert_eq!(wc.retry_count, 5, "retry_count must not change on reload");
+    }
+
+    #[test]
+    fn test_bad_config_returns_err() {
+        use std::io::Write;
+        let path = test_utils::test_file("hotreload-bad-config", "yaml");
+        let mut f = std::fs::File::create(&path).unwrap();
+        // Deliberately invalid YAML
+        f.write_all(b"max_workers: [this is: not: valid yaml\n").unwrap();
+        let result = config::try_load_from(&path);
+        assert!(result.is_err(), "invalid YAML should return Err, got: {:?}", result);
+        let _ = std::fs::remove_file(&path);
+    }
+
+    #[test]
+    fn test_missing_config_returns_defaults() {
+        let path = test_utils::test_file("hotreload-missing", "yaml");
+        let _ = std::fs::remove_file(&path);
+        let cfg = config::try_load_from(&path)
+            .expect("missing config file should return Ok with defaults");
+        assert_eq!(cfg.max_workers(), 5);
+        assert_eq!(cfg.spawns_per_tick(), 4);
+    }
+
+    #[test]
+    fn test_noop_reload_same_values() {
+        // Default config → default wc values; apply_reload is a no-op
+        let mut wc = make_wc(5, 4, "claude");
+        let same_cfg = config::Config::default();
+        apply_reload(&mut wc, &same_cfg);
+        assert_eq!(wc.max_workers, 5);
+        assert_eq!(wc.spawns_per_tick, 4);
+        assert_eq!(wc.claude_bin, "claude");
+    }
+
+    #[test]
+    fn test_bad_config_keeps_original_wc() {
+        use std::io::Write;
+        let mut wc = make_wc(8, 3, "my-claude");
+        let path = test_utils::test_file("hotreload-bad-keep", "yaml");
+        let mut f = std::fs::File::create(&path).unwrap();
+        f.write_all(b"max_workers: [broken\n").unwrap();
+        // Simulate what the daemon does: if load fails, don't call apply_reload
+        if let Ok(new_cfg) = config::try_load_from(&path) {
+            apply_reload(&mut wc, &new_cfg);
+        }
+        // Values must be unchanged
+        assert_eq!(wc.max_workers, 8, "max_workers must be retained on bad config");
+        assert_eq!(wc.spawns_per_tick, 3, "spawns_per_tick must be retained on bad config");
+        assert_eq!(wc.claude_bin, "my-claude", "claude_bin must be retained on bad config");
+        let _ = std::fs::remove_file(&path);
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/config.rs b/src/config.rs
index 4ed3fc1..5b62a13 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -13,12 +13,37 @@ pub struct Paths {
 #[derive(Debug, Default, Deserialize, Serialize)]
 pub struct Config {
     pub max_workers: Option<u32>,
+    pub spawns_per_tick: Option<u32>,
     pub task_timeout_minutes: Option<u32>,
     pub retry_count: Option<u32>,
     pub cleanup_on_failure: Option<bool>,
     pub hooks: Option<HashMap<String, HookEntry>>,
     pub paths: Option<Paths>,
     pub claude_bin: Option<String>,
+    pub brain: Option<PathBuf>,
+}
+
+/// Resolve brain directory: spec-level overrides config-level, falls back to None.
+pub fn resolve_brain(
+    spec_brain: Option<&PathBuf>,
+    config_brain: Option<&PathBuf>,
+) -> Option<PathBuf> {
+    spec_brain.or(config_brain).cloned()
+}
+
+/// Validate that the brain path exists and contains CLAUDE.md.
+pub fn validate_brain(path: &Path) -> Result<(), String> {
+    if !path.exists() {
+        return Err(format!("brain directory not found: {}", path.display()));
+    }
+    let claude_md = path.join("CLAUDE.md");
+    if !claude_md.exists() {
+        return Err(format!(
+            "brain directory missing CLAUDE.md: {}",
+            path.display()
+        ));
+    }
+    Ok(())
 }
 
 pub fn load() -> Config {
@@ -26,6 +51,23 @@ pub fn load() -> Config {
     Config::load_from(&config_path)
 }
 
+/// Fallible load — returns Err on parse failure rather than silently defaulting.
+/// Used by SIGHUP hot-reload so a bad config file is a no-op instead of a reset.
+pub fn try_load() -> Result<Config, String> {
+    try_load_from(&default_config_path())
+}
+
+pub fn try_load_from(path: &std::path::Path) -> Result<Config, String> {
+    if path.exists() {
+        let content = std::fs::read_to_string(path)
+            .map_err(|e| format!("failed to read config {}: {}", path.display(), e))?;
+        serde_yml::from_str::<Config>(&content)
+            .map_err(|e| format!("config parse error in {}: {}", path.display(), e))
+    } else {
+        Ok(Config::default())
+    }
+}
+
 pub fn default_config_path() -> PathBuf {
     let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
     PathBuf::from(home).join(".boi").join("config.yaml")
@@ -56,6 +98,10 @@ impl Config {
         self.max_workers.unwrap_or(5)
     }
 
+    pub fn spawns_per_tick(&self) -> u32 {
+        self.spawns_per_tick.unwrap_or(4)
+    }
+
     pub fn task_timeout_secs(&self) -> u64 {
         self.task_timeout_minutes.unwrap_or(30) as u64 * 60
     }
@@ -116,6 +162,26 @@ mod tests {
         assert_eq!(cfg.max_workers(), 5);
         assert_eq!(cfg.task_timeout_secs(), 30 * 60);
         assert_eq!(cfg.retry_count(), 3);
+        assert_eq!(cfg.spawns_per_tick(), 4);
+    }
+
+    #[test]
+    fn test_spawns_per_tick_default() {
+        let cfg = Config::default();
+        assert_eq!(cfg.spawns_per_tick(), 4);
+    }
+
+    #[test]
+    fn test_spawns_per_tick_explicit() {
+        let path = test_utils::test_file("config-spawns", "yaml");
+        let yaml = "spawns_per_tick: 8\n";
+        let mut f = fs::File::create(&path).unwrap();
+        f.write_all(yaml.as_bytes()).unwrap();
+
+        let cfg = Config::load_from(&path);
+        assert_eq!(cfg.spawns_per_tick(), 8);
+
+        let _ = fs::remove_file(&path);
     }
 
     #[test]
@@ -161,4 +227,64 @@ mod tests {
 
         let _ = fs::remove_file(&path);
     }
+
+    #[test]
+    fn test_brain_field_deserializes() {
+        let path = test_utils::test_file("config-brain", "yaml");
+        let yaml = "brain: /some/brain/dir\n";
+        let mut f = fs::File::create(&path).unwrap();
+        f.write_all(yaml.as_bytes()).unwrap();
+
+        let cfg = Config::load_from(&path);
+        assert_eq!(cfg.brain, Some(PathBuf::from("/some/brain/dir")));
+
+        let _ = fs::remove_file(&path);
+    }
+
+    #[test]
+    fn test_brain_defaults_to_none() {
+        let cfg = Config::default();
+        assert!(cfg.brain.is_none());
+    }
+
+    #[test]
+    fn test_brain_validate_path_missing() {
+        let err = validate_brain(Path::new("/nonexistent/brain/path")).unwrap_err();
+        assert!(err.contains("not found"), "err={}", err);
+    }
+
+    #[test]
+    fn test_brain_validate_missing_claude_md() {
+        let dir = test_utils::test_dir("brain-no-claude-md");
+        let err = validate_brain(&dir).unwrap_err();
+        assert!(err.contains("CLAUDE.md"), "err={}", err);
+    }
+
+    #[test]
+    fn test_brain_validate_ok() {
+        let dir = test_utils::test_dir("brain-valid");
+        fs::write(dir.join("CLAUDE.md"), "# context").unwrap();
+        validate_brain(&dir).expect("valid brain should pass validation");
+    }
+
+    #[test]
+    fn test_brain_resolve_spec_overrides_config() {
+        let spec_brain = PathBuf::from("/spec/brain");
+        let config_brain = PathBuf::from("/config/brain");
+        let resolved = resolve_brain(Some(&spec_brain), Some(&config_brain));
+        assert_eq!(resolved, Some(PathBuf::from("/spec/brain")));
+    }
+
+    #[test]
+    fn test_brain_resolve_config_fallback() {
+        let config_brain = PathBuf::from("/config/brain");
+        let resolved = resolve_brain(None, Some(&config_brain));
+        assert_eq!(resolved, Some(PathBuf::from("/config/brain")));
+    }
+
+    #[test]
+    fn test_brain_resolve_none_when_unset() {
+        let resolved = resolve_brain(None, None);
+        assert!(resolved.is_none());
+    }
 }