From 4629e80cab2295a9c014f20b67b6b4aa88988c89 Mon Sep 17 00:00:00 2001 From: Mike Rapadas Date: Wed, 29 Apr 2026 18:48:09 -0400 Subject: [PATCH] daemon: batched dequeue per tick + SIGHUP config hot-reload - Add spawns_per_tick config field (default 4) to cap workers spawned per tick - Rewrite daemon dequeue loop: drain up to spawns_per_tick per tick with 50-150ms jitter - SIGHUP handler: live-reload max_workers/spawns_per_tick/claude_bin without restart - Add `boi daemon reload` subcommand (sends SIGHUP to daemon.pid) - Add try_load()/try_load_from() for fallible config parsing (bad config = no-op) - docs/daemon.md: tick cadence, spawns_per_tick semantics, hot-reload behavior - 14 new tests: daemon_batch (8) + daemon_hotreload (6), all passing Co-Authored-By: Claude Sonnet 4.6 --- README.md | 48 +++++-- SKILL.md | 8 +- docs/daemon.md | 78 +++++++++++ src/cli/config_cmd.rs | 6 +- src/cli/daemon.rs | 297 +++++++++++++++++++++++++++++++++++++++++- src/config.rs | 126 ++++++++++++++++++ 6 files changed, 539 insertions(+), 24 deletions(-) create mode 100644 docs/daemon.md diff --git a/README.md b/README.md index 0ed76b2..67b1f34 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,9 @@ prompt_template = "templates/my-prompt.md" # required for claude/default phases model = "claude-sonnet-4-6" # default: claude-sonnet-4-6 effort = "medium" # low | medium | high timeout = 300 # seconds; must be > 0 -runtime = "claude" # "claude" (default) | "deterministic" +runtime = "claude" # "claude" (default) | "openrouter" | "deterministic" +api_key_env = "OPENROUTER_API_KEY" # openrouter only — env var holding the API key (default: OPENROUTER_API_KEY) +bare = false # true → append --bare (skips session/MCP/skill loading; ~96% cold-start reduction) # Completion routing [completion] @@ -315,18 +317,20 @@ Exit 0 = passed. Any non-zero exit = failed. Stdout/stderr are captured as the f ## Runtime Configuration -BOI is runtime-agnostic. The default runtime is `claude` (Claude Code CLI). `codex` (Codex CLI) is also supported. +BOI is runtime-agnostic. The default runtime is `claude` (Claude Code CLI). `codex` (Codex CLI) and `openrouter` (direct HTTP to OpenRouter API) are also supported. ### Global Default -Set in `~/.boi/config.json`: +Set in `~/.boi/config.yaml`: -```json -{ - "runtime": { "default": "claude" } -} +```yaml +runtime: + default: claude +brain: ~/mrap-hex # optional — path to brain dir; must contain CLAUDE.md ``` +`brain` sets the default brain directory for all specs. Workers read `{brain}/CLAUDE.md` as system context before each task. BOI errors early if `brain` is set but the path or `CLAUDE.md` is missing. + ### Per-Spec Override Add a `runtime:` field to any spec: @@ -339,13 +343,18 @@ Spec-level override takes precedence over the global default. ### Model Mappings -Phase config accepts either full model IDs or aliases (`opus`, `sonnet`, `haiku`). The runtime resolves them: +Phase config accepts either full model IDs or aliases. The runtime resolves them: -| Alias | Claude | Codex | -|-------|--------|-------| -| `opus` | claude-opus-4-6 | o3 | -| `sonnet` | claude-sonnet-4-6 | o4-mini | -| `haiku` | claude-haiku-4-5-20251001 | o4-mini | +| Alias | Claude | Codex | OpenRouter | +|-------|--------|-------|------------| +| `opus` | claude-opus-4-6 | o3 | — | +| `sonnet` | claude-sonnet-4-6 | o4-mini | — | +| `haiku` | claude-haiku-4-5-20251001 | o4-mini | anthropic/claude-haiku-4-5 | +| `gemini-flash` | — | — | google/gemini-2.0-flash-001 | +| `grok` | — | — | x-ai/grok-beta | +| `qwen-coder` | — | — | qwen/qwen-2.5-coder-32b-instruct | + +OpenRouter phases require `OPENROUTER_API_KEY` in the environment and a `model` field in `[worker]`. Use `openrouter` runtime for text-only judgment phases (critic, plan-critique, spec-critique) to skip Claude cold-start and reduce cost. ### CLI Check @@ -358,6 +367,7 @@ boi dispatch [options] Submit a spec to the queue boi status [--watch] [--json] Show queue and worker status boi log [--full] [-f|--follow] Tail worker output for a spec boi cancel Cancel a running or queued spec +boi daemon reload Send SIGHUP to reload max_workers/spawns_per_tick/claude_bin boi stop Stop daemon and all workers boi install [--workers N] One-time setup (run outside Claude Code) boi resume | --all Resume failed or canceled specs @@ -372,6 +382,8 @@ boi dep add|remove|set|clear|show|viz|check boi project create|list|status|context|delete boi bench --pipeline name:path [--pipeline ...] --spec FILE | --battery DIR [--runs N] Benchmark N pipelines boi bench --phase --spec FILE [--runs N] Benchmark a single phase in isolation +boi plan [spec.yaml ...] [--force-refresh] Build DAG + LLM critique for in-flight and new specs +boi dispatch-many [spec2.yaml ...] DAG-ordered multi-spec dispatch with LLM gate ``` **`dispatch` options:** @@ -385,6 +397,16 @@ boi bench --phase --spec FILE [--runs N] Benchmark a single phase in iso | `--after SA7F3,TB2E1` | Wait for listed specs to complete before starting | | `--project NAME` | Associate with a project (injects project context) | +**`dispatch-many` options:** + +| Flag | Description | +|------|-------------| +| `--yes` | Auto-approve dispatch without interactive prompt | +| `--force` | Override warn-level concerns (cannot override blocks) | +| `--priority N` | Priority applied to all dispatched specs (default: 100) | +| `--mode MODE` | Mode applied to all specs | +| `--after SA7F3` | Additional upstream dep for all dispatched specs | + ## Output Preservation BOI automatically preserves the work product of every completed spec so outputs are never lost when the worktree is cleaned up. diff --git a/SKILL.md b/SKILL.md index 60c6da5..cf7e7f0 100644 --- a/SKILL.md +++ b/SKILL.md @@ -66,6 +66,8 @@ boi critic status | run | enable | disable | checks boi spec [add|skip|next|block|edit|deps] boi dep add|remove|set|clear|show|viz|check boi project create|list|status|context|delete +boi plan [spec.yaml ...] [--force-refresh] Build DAG + LLM critique for in-flight and new specs +boi dispatch-many [spec2.yaml ...] DAG-ordered multi-spec dispatch with LLM gate ``` ## Spec Format @@ -141,7 +143,9 @@ prompt_template = "path/to/prompt.md" # required for claude phases model = "claude-sonnet-4-6" effort = "medium" # low | medium | high timeout = 300 # seconds -runtime = "claude" # "claude" (default) | "deterministic" +runtime = "claude" # "claude" (default) | "openrouter" | "deterministic" +api_key_env = "OPENROUTER_API_KEY" # openrouter only — env var holding the API key (default: OPENROUTER_API_KEY) +bare = false # true → --bare flag (skips session/MCP/skill loading; ~96% cold-start reduction) [completion] approve_signal = "## Approved" @@ -293,7 +297,7 @@ Exit 0 = passed. Any non-zero = failed. ## Constraints - `boi install` runs **outside Claude Code** in a terminal. -- Workers are headless, non-interactive CLI agent sessions. Default runtime: `claude -p`. Codex runtime: `codex exec`. Configured globally in `~/.boi/config.json` or per-spec via `**Runtime:** codex` header. +- Workers are headless, non-interactive CLI agent sessions. Default runtime: `claude -p`. Codex runtime: `codex exec`. OpenRouter runtime: direct HTTP to `openrouter.ai/api/v1/chat/completions` (requires `OPENROUTER_API_KEY`; used for text-only judgment phases). Configured globally in `~/.boi/config.yaml` or per-spec via `**Runtime:** codex` header. - Daemon polls every 5 seconds. Status may lag slightly. - Default 3 workers, max 5. Set during install. - Workers get fresh context each iteration. No memory of previous iterations. diff --git a/docs/daemon.md b/docs/daemon.md new file mode 100644 index 0000000..5416c14 --- /dev/null +++ b/docs/daemon.md @@ -0,0 +1,78 @@ +# BOI Daemon + +## Overview + +The BOI daemon is a long-running process that monitors the queue and dispatches workers for pending specs. It is started with `boi daemon start` (background) or `boi daemon foreground` (attached to the terminal). + +## Tick Cadence + +The daemon polls every ~5 seconds (10 × 500 ms sleep increments). Each tick: + +1. Writes a heartbeat timestamp to `~/.boi/daemon.heartbeat`. +2. Checks the SIGHUP reload flag and applies config changes if set. +3. Reaps finished worker threads. +4. Computes how many new workers to spawn this tick and drains the queue up to that cap. +5. Sleeps 500 ms × 10 before the next tick (interruptible by SIGTERM). + +## Batched Dequeue (`spawns_per_tick`) + +Rather than spawning one worker per tick, the daemon drains up to `spawns_per_tick` eligible specs per tick (default 4). The actual number spawned is: + +``` +to_spawn = min(max_workers - current_workers, spawns_per_tick) +``` + +A 50–150 ms randomized jitter is inserted between successive spawns within a single tick to smooth cold-start bursts on the Anthropic API. Configure `spawns_per_tick` in `~/.boi/config.yaml`: + +```yaml +spawns_per_tick: 4 # default; raise once cold-start behavior is validated +``` + +## SIGHUP Config Hot-Reload + +Sending SIGHUP to the daemon triggers a live config reload **without restarting** or interrupting in-flight workers. + +### What reloads + +| Setting | Reloaded? | +|---------|-----------| +| `max_workers` | Yes | +| `spawns_per_tick` | Yes | +| `claude_bin` | Yes | +| `task_timeout_minutes` | No — startup snapshot | +| `retry_count` | No — startup snapshot | +| `cleanup_on_failure` | No — startup snapshot | +| `paths.*` | No — startup snapshot | + +### Reload semantics + +- **Parse failure is a no-op.** If the config file is syntactically invalid, the daemon logs `[boi daemon] reload FAILED: ...; keeping current config` and retains the current values. +- **In-flight workers are unaffected.** Workers receive a snapshot of `WorkerConfig` at spawn time; live config mutation never reaches them. +- **No restart required.** The daemon process continues running; only the three live fields are updated. + +### Triggering a reload + +```bash +# Recommended: set a value then reload in one step +boi config set max_workers 10 +boi daemon reload + +# Or send SIGHUP directly +kill -HUP $(cat ~/.boi/daemon.lock) +``` + +`boi daemon reload` reads the PID from `~/.boi/daemon.lock`, verifies the process is alive, and sends SIGHUP. The reload takes effect within the next tick (≤ 5 seconds). + +## Daemon Commands + +| Command | Description | +|---------|-------------| +| `boi daemon start` | Start daemon in the background | +| `boi daemon stop` | Send SIGTERM; waits up to 10s, then SIGKILL | +| `boi daemon restart` | Stop + start | +| `boi daemon foreground` | Run attached to the terminal | +| `boi daemon reload` | Send SIGHUP to reload `max_workers`, `spawns_per_tick`, `claude_bin` | + +## PID and Lock File + +The daemon uses an exclusive `flock` on `~/.boi/daemon.lock` (which also stores the PID) as its singleton guard. This is crash-safe: the lock auto-releases when the process exits, so stale PID files can never block a restart. diff --git a/src/cli/config_cmd.rs b/src/cli/config_cmd.rs index 5c9490f..f3c929e 100644 --- a/src/cli/config_cmd.rs +++ b/src/cli/config_cmd.rs @@ -4,6 +4,7 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config) match (key, value) { (None, _) => { println!("max_workers: {}", cfg.max_workers()); + println!("spawns_per_tick: {}", cfg.spawns_per_tick()); println!("task_timeout_minutes: {}", cfg.task_timeout_secs() / 60); println!("retry_count: {}", cfg.retry_count()); println!("db_path: {}", cfg.db_path().display()); @@ -21,6 +22,7 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config) (Some(k), None) => { let val = match k { "max_workers" => cfg.max_workers().to_string(), + "spawns_per_tick" => cfg.spawns_per_tick().to_string(), "task_timeout_minutes" => (cfg.task_timeout_secs() / 60).to_string(), "retry_count" => cfg.retry_count().to_string(), "db_path" => cfg.db_path().display().to_string(), @@ -36,9 +38,9 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config) (Some(k), Some(v)) => { // Validate key match k { - "max_workers" | "task_timeout_minutes" | "retry_count" => {} + "max_workers" | "spawns_per_tick" | "task_timeout_minutes" | "retry_count" => {} _ => { - eprintln!("unknown config key: {} (supported: max_workers, task_timeout_minutes, retry_count)", k); + eprintln!("unknown config key: {} (supported: max_workers, spawns_per_tick, task_timeout_minutes, retry_count)", k); std::process::exit(1); } } diff --git a/src/cli/daemon.rs b/src/cli/daemon.rs index 99cc880..5e31fd1 100644 --- a/src/cli/daemon.rs +++ b/src/cli/daemon.rs @@ -199,8 +199,18 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi .ok(); } - let wc = worker::WorkerConfig { + // SIGHUP hot-reload flag: set to true by signal_hook when SIGHUP arrives. + let reload_flag = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); + if let Err(e) = signal_hook::flag::register( + signal_hook::consts::SIGHUP, + std::sync::Arc::clone(&reload_flag), + ) { + eprintln!("[boi daemon] WARNING: failed to install SIGHUP handler: {}", e); + } + + let mut wc = worker::WorkerConfig { max_workers: cfg.max_workers(), + spawns_per_tick: cfg.spawns_per_tick(), task_timeout_secs: cfg.task_timeout_secs(), retry_count: cfg.retry_count(), cleanup_on_failure: cfg.cleanup_on_failure(), @@ -244,6 +254,21 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi eprintln!("[boi daemon] ERROR: failed to write heartbeat: {}", e); } + // SIGHUP hot-reload: only max_workers, spawns_per_tick, claude_bin are live-updated. + // All other settings remain frozen at startup. In-flight workers keep their original config. + if reload_flag.swap(false, std::sync::atomic::Ordering::SeqCst) { + match config::try_load() { + Ok(new_cfg) => { + apply_reload(&mut wc, &new_cfg); + eprintln!( + "[boi daemon] reloaded config: max_workers={}, spawns_per_tick={}, claude_bin={}", + wc.max_workers, wc.spawns_per_tick, wc.claude_bin + ); + } + Err(e) => eprintln!("[boi daemon] reload FAILED: {}; keeping current config", e), + } + } + { let mut workers = active.lock().unwrap_or_else(|e| { eprintln!("[boi daemon] worker mutex poisoned, recovering: {}", e); @@ -251,7 +276,9 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi }); workers.retain(|h| !h.is_finished()); - if workers.len() < wc.max_workers as usize { + let to_spawn = compute_to_spawn(workers.len(), wc.max_workers, wc.spawns_per_tick); + + for slot in 0..to_spawn { match queue::Queue::open(db_str) { Ok(queue) => match queue.dequeue() { Ok(Some(rec)) => { @@ -268,7 +295,7 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi eprintln!("[boi daemon] ERROR: failed to mark spec {} as failed: {}", spec_id, e); } } - continue; + continue; // skip to next batch slot } }; let qpath = db_str.to_string(); @@ -285,10 +312,16 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi .unwrap_or(timeout); let tel = Telemetry::new(PathBuf::from(&qpath)); - eprintln!("[boi daemon] starting worker for {}", spec_id); + eprintln!( + "[boi daemon] starting worker for {} (batch slot {}/{})", + spec_id, + slot + 1, + to_spawn + ); let handle = std::thread::spawn(move || { let wc = worker::WorkerConfig { max_workers: 1, + spawns_per_tick: 1, task_timeout_secs: spec_timeout, retry_count: retries, cleanup_on_failure: cleanup_fail, @@ -301,11 +334,27 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi } }); workers.push(handle); + + // Micro-jitter between successive spawns to smooth cold-start burst + if slot + 1 < to_spawn { + let jitter_ns = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.subsec_nanos() as u64) + .unwrap_or(0); + let jitter_ms = 50 + (jitter_ns % 101); + std::thread::sleep(std::time::Duration::from_millis(jitter_ms)); + } + } + Ok(None) => break, // queue drained + Err(e) => { + eprintln!("[boi daemon] dequeue error: {}", e); + break; } - Ok(None) => {} - Err(e) => eprintln!("[boi daemon] dequeue error: {}", e), }, - Err(e) => eprintln!("[boi daemon] queue open error: {}", e), + Err(e) => { + eprintln!("[boi daemon] queue open error: {}", e); + break; + } } } } @@ -388,6 +437,240 @@ pub fn cmd_stop() { let _ = std::fs::remove_file(daemon_heartbeat_path()); // intentional: best-effort heartbeat cleanup } +/// How many workers to spawn this tick: capped by capacity and per-tick limit. +pub(crate) fn compute_to_spawn(workers_len: usize, max_workers: u32, spawns_per_tick: u32) -> u32 { + let cap_remaining = max_workers.saturating_sub(workers_len as u32); + cap_remaining.min(spawns_per_tick) +} + +/// Hot-reload the three live-mutable fields from a freshly parsed config. +/// All other WorkerConfig fields remain at their startup values. +pub(crate) fn apply_reload(wc: &mut worker::WorkerConfig, new_cfg: &config::Config) { + wc.max_workers = new_cfg.max_workers(); + wc.spawns_per_tick = new_cfg.spawns_per_tick(); + wc.claude_bin = new_cfg.claude_bin(); +} + +/// Send SIGHUP to the running daemon so it picks up config changes. +pub fn cmd_reload() { + let pid = match read_daemon_pid() { + Some(p) => p, + None => { + eprintln!("no daemon running (PID file not found)"); + std::process::exit(1); + } + }; + + if !crate::fmt::is_pid_alive(pid) { + eprintln!("daemon process {} is not running", pid); + std::process::exit(1); + } + + // SAFETY: `pid` was read from the daemon lock file and verified alive above. + // SIGHUP to a known-live PID is a standard POSIX config-reload signal. + unsafe { libc::kill(pid as i32, libc::SIGHUP) }; + println!("sent SIGHUP to daemon (pid {}); config will reload within one tick", pid); +} + +#[cfg(test)] +mod daemon_batch { + use super::*; + use crate::{queue, spec, test_utils}; + + const SIMPLE_SPEC: &str = "title: \"Batch Test\"\ntasks:\n - id: t-1\n title: \"Step\"\n status: PENDING\n spec: \"Do it\"\n"; + + fn open_queue(label: &str) -> (queue::Queue, String) { + let db_file = test_utils::test_file(label, "db"); + let _ = std::fs::remove_file(&db_file); + let db_path = db_file.to_str().unwrap().to_string(); + let q = queue::Queue::open(&db_path).unwrap(); + (q, db_path) + } + + fn enqueue_n(q: &queue::Queue, n: usize) { + let boi_spec = spec::parse(SIMPLE_SPEC).unwrap(); + for _ in 0..n { + q.enqueue(&boi_spec, None).unwrap(); + } + } + + fn drain_n(q: &queue::Queue, to_spawn: u32) -> usize { + let mut count = 0; + for _ in 0..to_spawn { + match q.dequeue() { + Ok(Some(_)) => count += 1, + Ok(None) => break, + Err(_) => break, + } + } + count + } + + #[test] + fn test_compute_to_spawn_at_capacity() { + // workers_len == max_workers → 0 slots remaining + assert_eq!(compute_to_spawn(4, 4, 4), 0); + } + + #[test] + fn test_compute_to_spawn_limited_by_spawns_per_tick() { + // cap_remaining=8 but spawns_per_tick=4 → 4 + assert_eq!(compute_to_spawn(0, 8, 4), 4); + } + + #[test] + fn test_compute_to_spawn_limited_by_cap_remaining() { + // cap_remaining=2, spawns_per_tick=4 → 2 + assert_eq!(compute_to_spawn(6, 8, 4), 2); + } + + #[test] + fn test_empty_queue_zero_spawns() { + let (q, _db) = open_queue("batch-empty"); + let to_spawn = compute_to_spawn(0, 4, 4); + let spawned = drain_n(&q, to_spawn); + assert_eq!(spawned, 0); + } + + #[test] + fn test_one_eligible_cap4_tick4_spawns_one() { + let (q, _db) = open_queue("batch-one"); + enqueue_n(&q, 1); + let to_spawn = compute_to_spawn(0, 4, 4); // = 4 + let spawned = drain_n(&q, to_spawn); + assert_eq!(spawned, 1, "only 1 item in queue, expect 1 spawn"); + } + + #[test] + fn test_six_eligible_cap4_tick4_spawns_four_then_two() { + let (q, _db) = open_queue("batch-six-cap4"); + enqueue_n(&q, 6); + let to_spawn = compute_to_spawn(0, 4, 4); // = 4 + let first_tick = drain_n(&q, to_spawn); + assert_eq!(first_tick, 4, "first tick: 4 spawned"); + + // Second tick: 2 remain + let to_spawn2 = compute_to_spawn(4, 8, 4); // simulate 4 workers running, max=8 + let second_tick = drain_n(&q, to_spawn2); + assert_eq!(second_tick, 2, "second tick: remaining 2 spawned"); + } + + #[test] + fn test_six_eligible_cap8_tick4_spawns_four() { + let (q, _db) = open_queue("batch-six-cap8"); + enqueue_n(&q, 6); + let to_spawn = compute_to_spawn(0, 8, 4); // = 4 (tick limit) + let spawned = drain_n(&q, to_spawn); + assert_eq!(spawned, 4); + } + + #[test] + fn test_four_eligible_cap2_tick4_spawns_two() { + let (q, _db) = open_queue("batch-four-cap2"); + enqueue_n(&q, 4); + let to_spawn = compute_to_spawn(6, 8, 4); // cap_remaining=2, tick=4 → 2 + let spawned = drain_n(&q, to_spawn); + assert_eq!(spawned, 2); + } +} + +#[cfg(test)] +mod daemon_hotreload { + use super::*; + use crate::{config, test_utils, worker}; + + fn make_wc(max_workers: u32, spawns_per_tick: u32, claude_bin: &str) -> worker::WorkerConfig { + worker::WorkerConfig { + max_workers, + spawns_per_tick, + task_timeout_secs: 1800, + retry_count: 3, + cleanup_on_failure: false, + claude_bin: claude_bin.to_string(), + } + } + + #[test] + fn test_apply_reload_updates_hot_fields() { + let mut wc = make_wc(4, 2, "claude"); + let new_cfg = config::Config { + max_workers: Some(8), + spawns_per_tick: Some(6), + claude_bin: Some("/usr/bin/claude".to_string()), + ..Default::default() + }; + apply_reload(&mut wc, &new_cfg); + assert_eq!(wc.max_workers, 8); + assert_eq!(wc.spawns_per_tick, 6); + assert_eq!(wc.claude_bin, "/usr/bin/claude"); + } + + #[test] + fn test_apply_reload_leaves_other_fields_unchanged() { + let mut wc = make_wc(4, 2, "claude"); + wc.task_timeout_secs = 7200; + wc.retry_count = 5; + let new_cfg = config::Config { + max_workers: Some(8), + ..Default::default() + }; + apply_reload(&mut wc, &new_cfg); + assert_eq!(wc.task_timeout_secs, 7200, "task_timeout_secs must not change on reload"); + assert_eq!(wc.retry_count, 5, "retry_count must not change on reload"); + } + + #[test] + fn test_bad_config_returns_err() { + use std::io::Write; + let path = test_utils::test_file("hotreload-bad-config", "yaml"); + let mut f = std::fs::File::create(&path).unwrap(); + // Deliberately invalid YAML + f.write_all(b"max_workers: [this is: not: valid yaml\n").unwrap(); + let result = config::try_load_from(&path); + assert!(result.is_err(), "invalid YAML should return Err, got: {:?}", result); + let _ = std::fs::remove_file(&path); + } + + #[test] + fn test_missing_config_returns_defaults() { + let path = test_utils::test_file("hotreload-missing", "yaml"); + let _ = std::fs::remove_file(&path); + let cfg = config::try_load_from(&path) + .expect("missing config file should return Ok with defaults"); + assert_eq!(cfg.max_workers(), 5); + assert_eq!(cfg.spawns_per_tick(), 4); + } + + #[test] + fn test_noop_reload_same_values() { + // Default config → default wc values; apply_reload is a no-op + let mut wc = make_wc(5, 4, "claude"); + let same_cfg = config::Config::default(); + apply_reload(&mut wc, &same_cfg); + assert_eq!(wc.max_workers, 5); + assert_eq!(wc.spawns_per_tick, 4); + assert_eq!(wc.claude_bin, "claude"); + } + + #[test] + fn test_bad_config_keeps_original_wc() { + use std::io::Write; + let mut wc = make_wc(8, 3, "my-claude"); + let path = test_utils::test_file("hotreload-bad-keep", "yaml"); + let mut f = std::fs::File::create(&path).unwrap(); + f.write_all(b"max_workers: [broken\n").unwrap(); + // Simulate what the daemon does: if load fails, don't call apply_reload + if let Ok(new_cfg) = config::try_load_from(&path) { + apply_reload(&mut wc, &new_cfg); + } + // Values must be unchanged + assert_eq!(wc.max_workers, 8, "max_workers must be retained on bad config"); + assert_eq!(wc.spawns_per_tick, 3, "spawns_per_tick must be retained on bad config"); + assert_eq!(wc.claude_bin, "my-claude", "claude_bin must be retained on bad config"); + let _ = std::fs::remove_file(&path); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/config.rs b/src/config.rs index 4ed3fc1..5b62a13 100644 --- a/src/config.rs +++ b/src/config.rs @@ -13,12 +13,37 @@ pub struct Paths { #[derive(Debug, Default, Deserialize, Serialize)] pub struct Config { pub max_workers: Option, + pub spawns_per_tick: Option, pub task_timeout_minutes: Option, pub retry_count: Option, pub cleanup_on_failure: Option, pub hooks: Option>, pub paths: Option, pub claude_bin: Option, + pub brain: Option, +} + +/// Resolve brain directory: spec-level overrides config-level, falls back to None. +pub fn resolve_brain( + spec_brain: Option<&PathBuf>, + config_brain: Option<&PathBuf>, +) -> Option { + spec_brain.or(config_brain).cloned() +} + +/// Validate that the brain path exists and contains CLAUDE.md. +pub fn validate_brain(path: &Path) -> Result<(), String> { + if !path.exists() { + return Err(format!("brain directory not found: {}", path.display())); + } + let claude_md = path.join("CLAUDE.md"); + if !claude_md.exists() { + return Err(format!( + "brain directory missing CLAUDE.md: {}", + path.display() + )); + } + Ok(()) } pub fn load() -> Config { @@ -26,6 +51,23 @@ pub fn load() -> Config { Config::load_from(&config_path) } +/// Fallible load — returns Err on parse failure rather than silently defaulting. +/// Used by SIGHUP hot-reload so a bad config file is a no-op instead of a reset. +pub fn try_load() -> Result { + try_load_from(&default_config_path()) +} + +pub fn try_load_from(path: &std::path::Path) -> Result { + if path.exists() { + let content = std::fs::read_to_string(path) + .map_err(|e| format!("failed to read config {}: {}", path.display(), e))?; + serde_yml::from_str::(&content) + .map_err(|e| format!("config parse error in {}: {}", path.display(), e)) + } else { + Ok(Config::default()) + } +} + pub fn default_config_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home).join(".boi").join("config.yaml") @@ -56,6 +98,10 @@ impl Config { self.max_workers.unwrap_or(5) } + pub fn spawns_per_tick(&self) -> u32 { + self.spawns_per_tick.unwrap_or(4) + } + pub fn task_timeout_secs(&self) -> u64 { self.task_timeout_minutes.unwrap_or(30) as u64 * 60 } @@ -116,6 +162,26 @@ mod tests { assert_eq!(cfg.max_workers(), 5); assert_eq!(cfg.task_timeout_secs(), 30 * 60); assert_eq!(cfg.retry_count(), 3); + assert_eq!(cfg.spawns_per_tick(), 4); + } + + #[test] + fn test_spawns_per_tick_default() { + let cfg = Config::default(); + assert_eq!(cfg.spawns_per_tick(), 4); + } + + #[test] + fn test_spawns_per_tick_explicit() { + let path = test_utils::test_file("config-spawns", "yaml"); + let yaml = "spawns_per_tick: 8\n"; + let mut f = fs::File::create(&path).unwrap(); + f.write_all(yaml.as_bytes()).unwrap(); + + let cfg = Config::load_from(&path); + assert_eq!(cfg.spawns_per_tick(), 8); + + let _ = fs::remove_file(&path); } #[test] @@ -161,4 +227,64 @@ mod tests { let _ = fs::remove_file(&path); } + + #[test] + fn test_brain_field_deserializes() { + let path = test_utils::test_file("config-brain", "yaml"); + let yaml = "brain: /some/brain/dir\n"; + let mut f = fs::File::create(&path).unwrap(); + f.write_all(yaml.as_bytes()).unwrap(); + + let cfg = Config::load_from(&path); + assert_eq!(cfg.brain, Some(PathBuf::from("/some/brain/dir"))); + + let _ = fs::remove_file(&path); + } + + #[test] + fn test_brain_defaults_to_none() { + let cfg = Config::default(); + assert!(cfg.brain.is_none()); + } + + #[test] + fn test_brain_validate_path_missing() { + let err = validate_brain(Path::new("/nonexistent/brain/path")).unwrap_err(); + assert!(err.contains("not found"), "err={}", err); + } + + #[test] + fn test_brain_validate_missing_claude_md() { + let dir = test_utils::test_dir("brain-no-claude-md"); + let err = validate_brain(&dir).unwrap_err(); + assert!(err.contains("CLAUDE.md"), "err={}", err); + } + + #[test] + fn test_brain_validate_ok() { + let dir = test_utils::test_dir("brain-valid"); + fs::write(dir.join("CLAUDE.md"), "# context").unwrap(); + validate_brain(&dir).expect("valid brain should pass validation"); + } + + #[test] + fn test_brain_resolve_spec_overrides_config() { + let spec_brain = PathBuf::from("/spec/brain"); + let config_brain = PathBuf::from("/config/brain"); + let resolved = resolve_brain(Some(&spec_brain), Some(&config_brain)); + assert_eq!(resolved, Some(PathBuf::from("/spec/brain"))); + } + + #[test] + fn test_brain_resolve_config_fallback() { + let config_brain = PathBuf::from("/config/brain"); + let resolved = resolve_brain(None, Some(&config_brain)); + assert_eq!(resolved, Some(PathBuf::from("/config/brain"))); + } + + #[test] + fn test_brain_resolve_none_when_unset() { + let resolved = resolve_brain(None, None); + assert!(resolved.is_none()); + } }