From da452463eb49d7b90401a621c576da2f6f91469e Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 23 Feb 2026 09:32:09 -0300 Subject: [PATCH] fix: prevent loop from getting stuck when research is complete The main research loop would exit with "circuit_breaker_open" errors even when research was effectively complete. Root cause: circuit breaker check ran before graceful exit check, so 3 "no progress" loops after completion would halt the loop before detecting the RESEARCH_COMPLETE marker. Changes: - Swap exit check order: graceful_exit now runs BEFORE circuit_breaker - Read exit_signal directly from .response_analysis in should_exit_gracefully() - Case-insensitive RESEARCH_COMPLETE grep (handles Research_Complete, etc.) - Multi-location SYNTHESIS.md search (evidence dir, parent dir, project root) - Circuit breaker recognizes SYNTHESIS.md creation as research progress - Circuit breaker won't open when is_research_complete() returns true - Fix --continue to use --resume with session ID - Validate CLAUDE_PERMISSION_MODE against allowlist - Update CLAUDE.md with exit priority order and completion detection docs 14 new tests added across 3 test files (test_circuit_breaker.bats is new). Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 21 +++ lib/circuit_breaker.sh | 27 ++++ lib/phase_state_machine.sh | 20 ++- smithers_loop.sh | 88 ++++++++--- smithers_status.sh | 2 +- tests/unit/test_circuit_breaker.bats | 190 +++++++++++++++++++++++ tests/unit/test_exit_detection.bats | 113 ++++++++++---- tests/unit/test_phase_state_machine.bats | 63 +++++++- 8 files changed, 468 insertions(+), 56 deletions(-) create mode 100644 tests/unit/test_circuit_breaker.bats diff --git a/CLAUDE.md b/CLAUDE.md index 73f9daf..24a1db5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,6 +15,8 @@ The system consists of main bash scripts and a modular library system designed f ### Main Scripts 1. **smithers_loop.sh** - The main autonomous research loop that executes Claude Code repeatedly + - Checks graceful exit conditions BEFORE circuit breaker to ensure completion is detected first + - Exit priority: evidence-based completion → permission denials → minimum loops → exit_signal → safety limits 2. **smithers_monitor.sh** - Live monitoring dashboard for tracking research progress 3. **setup.sh** - Project initialization script for new Smithers research projects 4. **smithers_import.sh** - Research brief import tool that converts documents to Smithers format @@ -59,6 +61,8 @@ The system uses a modular architecture with research-focused components: 5. **lib/circuit_breaker.sh** - Research-Specific Stagnation Detection - Three states: CLOSED (normal), HALF_OPEN (monitoring), OPEN (halted) + - Recognizes SYNTHESIS.md creation/updates as evidence of research progress + - Won't open when research is complete (calls `is_research_complete()` as safety net) - Research thresholds: - `CB_SEARCH_REPETITION_THRESHOLD=3` - Repeated identical searches - `CB_NO_NEW_SOURCES_THRESHOLD=5` - Loops without new sources @@ -69,6 +73,7 @@ The system uses a modular architecture with research-focused components: 6. **lib/response_analyzer.sh** - Intelligent Research Response Analysis - Analyzes Claude Code output for completion signals - Detects SMITHERS_STATUS blocks with research-specific fields + - `exit_signal` from `.response_analysis` is read directly by `should_exit_gracefully()` - Research patterns: `PHASE_COMPLETE_PATTERNS`, `RESEARCH_COMPLETE_PATTERNS` - Session management with `.smithers/.smithers_session` @@ -123,6 +128,22 @@ Smithers enforces a 5-phase research methodology: - Provide actionable recommendations - Requirements: All prior phases complete +### Research Completion Detection +Research is considered complete when any of these conditions are met (checked in order): +1. **SYNTHESIS.md marker**: File containing `RESEARCH_COMPLETE` (case-insensitive) found in any of: `.smithers/evidence/SYNTHESIS.md`, `.smithers/SYNTHESIS.md`, or `SYNTHESIS.md` at project root +2. **Phase status**: `current_phase == "COMPLETE"` in `phases/status.json` +3. **Threshold-based**: In SYNTHESIZE phase with all evidence thresholds met (12+ sources, 15+ claims, 6+ validations) + +### Exit Priority Order +The loop checks completion conditions in this priority: +1. **Evidence-Based Completion** (PRIORITY 0): `is_research_complete()` — bypasses minimum loops +2. **Permission Denials** (PRIORITY 1): Blocked tools requiring `.smithersrc` updates +3. **Minimum Loops** (PRIORITY 2): Must reach `MIN_RESEARCH_LOOPS` (default: 3) +4. **Exit Signal** (PRIORITY 3): Claude set `EXIT_SIGNAL: true` in SMITHERS_STATUS +5. **Safety Limits** (PRIORITY 4): Test-only loop stagnation detection + +Graceful exit is checked BEFORE the circuit breaker to prevent false stagnation halts. + ## Key Commands ### Installation diff --git a/lib/circuit_breaker.sh b/lib/circuit_breaker.sh index 2fb2a5b..f02edeb 100644 --- a/lib/circuit_breaker.sh +++ b/lib/circuit_breaker.sh @@ -201,6 +201,21 @@ record_loop_result() { [[ "${VERBOSE_PROGRESS:-}" == "true" ]] && echo "DEBUG: Research progress - phase: $last_phase -> $current_phase" >&2 fi fi + + # Check if SYNTHESIS.md was created or updated (research completion artifact) + if [[ -f "$SMITHERS_DIR/evidence/SYNTHESIS.md" ]]; then + local synthesis_mtime + synthesis_mtime=$(stat -c %Y "$SMITHERS_DIR/evidence/SYNTHESIS.md" 2>/dev/null || \ + stat -f %m "$SMITHERS_DIR/evidence/SYNTHESIS.md" 2>/dev/null || echo 0) + local last_synthesis_mtime=$(cat "$SMITHERS_DIR/.last_synthesis_mtime" 2>/dev/null || echo 0) + synthesis_mtime=$((synthesis_mtime + 0)) + last_synthesis_mtime=$((last_synthesis_mtime + 0)) + if [[ $synthesis_mtime -gt $last_synthesis_mtime ]]; then + research_progress=true + echo "$synthesis_mtime" > "$SMITHERS_DIR/.last_synthesis_mtime" + [[ "${VERBOSE_PROGRESS:-}" == "true" ]] && echo "DEBUG: Research progress - SYNTHESIS.md updated" >&2 + fi + fi # ========================================================================= # Determine if progress was made @@ -229,6 +244,18 @@ record_loop_result() { consecutive_no_progress=$((consecutive_no_progress + 1)) fi + # SAFETY: If research is complete, force progress=true to prevent CB from opening + # This handles the case where research is done but no new files are being created + if [[ "$has_progress" != "true" ]] && type is_research_complete &>/dev/null 2>&1; then + local _phases_file="$SMITHERS_DIR/phases/status.json" + local _evidence_dir="$SMITHERS_DIR/evidence" + if [[ -f "$_phases_file" ]] && [[ $(is_research_complete "$_phases_file" "$_evidence_dir") == "true" ]]; then + has_progress=true + consecutive_no_progress=0 + [[ "${VERBOSE_PROGRESS:-}" == "true" ]] && echo "DEBUG: Research complete - overriding no-progress" >&2 + fi + fi + # Detect same error repetition if [[ "$has_errors" == "true" ]]; then consecutive_same_error=$((consecutive_same_error + 1)) diff --git a/lib/phase_state_machine.sh b/lib/phase_state_machine.sh index ab51cc7..0ca7813 100644 --- a/lib/phase_state_machine.sh +++ b/lib/phase_state_machine.sh @@ -240,13 +240,21 @@ is_research_complete() { # PRIMARY CHECK: SYNTHESIS.md with RESEARCH_COMPLETE marker = done # This is the ground truth - if Claude wrote this, research is complete - local synthesis_file="$evidence_dir/SYNTHESIS.md" - if [[ -f "$synthesis_file" ]]; then - if grep -q "RESEARCH_COMPLETE" "$synthesis_file" 2>/dev/null; then - echo "true" - return 0 + # Check multiple locations where Claude might write the synthesis + local synthesis_locations=( + "$evidence_dir/SYNTHESIS.md" + "${evidence_dir%/*}/SYNTHESIS.md" + "SYNTHESIS.md" + ) + + for synthesis_file in "${synthesis_locations[@]}"; do + if [[ -f "$synthesis_file" ]]; then + if grep -qi "RESEARCH_COMPLETE" "$synthesis_file" 2>/dev/null; then + echo "true" + return 0 + fi fi - fi + done # FALLBACK: Phase-based completion (legacy) if [[ "$current_phase" == "COMPLETE" ]]; then diff --git a/smithers_loop.sh b/smithers_loop.sh index f192274..1b20796 100755 --- a/smithers_loop.sh +++ b/smithers_loop.sh @@ -303,7 +303,7 @@ setup_tmux_session() { # Split right pane horizontally (top: Claude output, bottom: status) tmux split-window -v -t "$session_name:0.1" -c "$project_dir" - # Right-top pane (pane 1): Live Claude Code output + # Right-top pane (pane 1): Live research output tmux send-keys -t "$session_name:0.1" "tail -f '$project_dir/$LIVE_LOG_FILE'" Enter # Right-bottom pane (pane 2): Smithers status monitor @@ -322,8 +322,9 @@ setup_tmux_session() { smithers_cmd="'$smithers_home/smithers_loop.sh'" fi - # Always use --live mode in tmux for real-time streaming - smithers_cmd="$smithers_cmd --live" + # Note: --live streaming mode (stdbuf | tee | jq pipeline) is fragile and can + # cause Claude to receive SIGSTOP. Background mode works reliably with the + # tail -f pane already providing real-time output via the log file. # Forward --calls if non-default if [[ "$MAX_CALLS_PER_HOUR" != "100" ]]; then @@ -570,7 +571,23 @@ should_exit_gracefully() { fi # ================================================================= - # PRIORITY 2: Safety limits (prevent infinite loops) + # PRIORITY 3: Direct exit_signal from response analysis + # If Claude explicitly signaled EXIT_SIGNAL: true in its output, + # respect it as a completion signal. + # ================================================================= + if [[ -f "$RESPONSE_ANALYSIS_FILE" ]]; then + local exit_signal=$(jq -r '.analysis.exit_signal // false' "$RESPONSE_ANALYSIS_FILE" 2>/dev/null || echo "false") + if [[ "$exit_signal" == "true" ]]; then + local evidence_counts=$(get_evidence_counts) + log_status "SUCCESS" "Exit condition: Claude exit signal (exit_signal=true)" >&2 + log_status "INFO" "Evidence: $evidence_counts" >&2 + echo "exit_signal" + return 0 + fi + fi + + # ================================================================= + # PRIORITY 4: Safety limits (prevent infinite loops) # ================================================================= if [[ ! -f "$EXIT_SIGNALS_FILE" ]]; then echo "" @@ -779,7 +796,7 @@ init_claude_session() { # Handle stat failure (-1) - treat as needing new session # Don't expire sessions when we can't determine age if [[ $age_hours -eq -1 ]]; then - log_status "WARN" "Could not determine session age, starting new session" + log_status "WARN" "Could not determine session age, starting new session" >&2 rm -f "$CLAUDE_SESSION_FILE" echo "" return 0 @@ -787,7 +804,7 @@ init_claude_session() { # Check if session has expired if [[ $age_hours -ge $CLAUDE_SESSION_EXPIRY_HOURS ]]; then - log_status "INFO" "Session expired (${age_hours}h old, max ${CLAUDE_SESSION_EXPIRY_HOURS}h), starting new session" + log_status "INFO" "Session expired (${age_hours}h old, max ${CLAUDE_SESSION_EXPIRY_HOURS}h), starting new session" >&2 rm -f "$CLAUDE_SESSION_FILE" echo "" return 0 @@ -796,13 +813,13 @@ init_claude_session() { # Session is valid, try to read it local session_id=$(cat "$CLAUDE_SESSION_FILE" 2>/dev/null) if [[ -n "$session_id" ]]; then - log_status "INFO" "Resuming Claude session: ${session_id:0:20}... (${age_hours}h old)" + log_status "INFO" "Resuming Claude session: ${session_id:0:20}... (${age_hours}h old)" >&2 echo "$session_id" return 0 fi fi - log_status "INFO" "Starting new Claude session" + log_status "INFO" "Starting new Claude session" >&2 echo "" } @@ -1049,6 +1066,15 @@ build_claude_command() { CLAUDE_CMD_ARGS+=("--output-format" "json") fi + # Auto-approve tool use for allowed tools (prevents interactive permission prompts) + local perm_mode="${CLAUDE_PERMISSION_MODE:-acceptEdits}" + case "$perm_mode" in + acceptEdits|default|plan) ;; + *) log_status "WARN" "Invalid CLAUDE_PERMISSION_MODE '$perm_mode', using 'acceptEdits'" >&2 + perm_mode="acceptEdits" ;; + esac + CLAUDE_CMD_ARGS+=("--permission-mode" "$perm_mode") + # Add allowed tools (each tool as separate array element) if [[ -n "$CLAUDE_ALLOWED_TOOLS" ]]; then CLAUDE_CMD_ARGS+=("--allowedTools") @@ -1066,7 +1092,11 @@ build_claude_command() { # Add session continuity flag if [[ "$CLAUDE_USE_CONTINUE" == "true" ]]; then - CLAUDE_CMD_ARGS+=("--continue") + if [[ -n "$session_id" ]]; then + CLAUDE_CMD_ARGS+=("--resume" "$session_id") + else + CLAUDE_CMD_ARGS+=("--continue") + fi fi # Add loop context as system prompt (no escaping needed - array handles it) @@ -1376,6 +1406,9 @@ execute_claude_code() { last_line=$(tail -1 "$output_file" 2>/dev/null | head -c 80) # Copy to live.log for tmux monitoring cp "$output_file" "$LIVE_LOG_FILE" 2>/dev/null + else + # Output file empty (JSON mode buffers until done) — show waiting status + echo -e "$(date '+%H:%M:%S') $progress_indicator Phase: $(get_current_phase) | Claude working... (${progress_counter}0s)" >> "$LIVE_LOG_FILE" fi # Update progress file for monitor @@ -1415,6 +1448,23 @@ EOF log_status "SUCCESS" "✅ Claude Code execution completed successfully" + # Write research summary to live.log for tmux monitoring + { + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo " Loop #$loop_count completed — $(date '+%Y-%m-%d %H:%M:%S')" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + # Extract the result text from JSON output + local result_text + result_text=$(jq -r '.result // empty' "$output_file" 2>/dev/null) + if [[ -n "$result_text" ]]; then + echo "$result_text" + else + echo "(No result text in output)" + fi + echo "" + } > "$LIVE_LOG_FILE" + # Save session ID from JSON output if [[ "$CLAUDE_USE_CONTINUE" == "true" ]]; then save_claude_session "$output_file" @@ -1572,16 +1622,8 @@ main() { break fi - # Check circuit breaker before attempting execution - if should_halt_execution; then - reset_session "circuit_breaker_open" - update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "circuit_breaker_open" "halted" "stagnation_detected" - log_status "ERROR" "🛑 Circuit breaker has opened - execution halted" - break - fi - - # Check for graceful exit conditions BEFORE rate limiting - # This ensures we exit even if rate limited with research complete + # Check for graceful exit conditions BEFORE circuit breaker + # This ensures research completion is detected before the CB can halt the loop local exit_reason=$(should_exit_gracefully) if [[ "$exit_reason" != "" ]]; then # Handle permission_denied specially @@ -1642,6 +1684,14 @@ main() { break fi + # Check circuit breaker after graceful exit conditions + if should_halt_execution; then + reset_session "circuit_breaker_open" + update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "circuit_breaker_open" "halted" "stagnation_detected" + log_status "ERROR" "🛑 Circuit breaker has opened - execution halted" + break + fi + # Check rate limits (AFTER exit conditions so we can exit even when rate limited) if ! can_make_call; then wait_for_reset diff --git a/smithers_status.sh b/smithers_status.sh index 57c9565..2487be7 100755 --- a/smithers_status.sh +++ b/smithers_status.sh @@ -69,7 +69,7 @@ if [[ -f "$SYNTHESIS_FILE" ]]; then echo " Modified: $MODIFIED" # Check for completion markers - if grep -q "RESEARCH_COMPLETE" "$SYNTHESIS_FILE" 2>/dev/null; then + if grep -qi "RESEARCH_COMPLETE" "$SYNTHESIS_FILE" 2>/dev/null; then echo -e "${GREEN}✅ Contains RESEARCH_COMPLETE marker${NC}" COMPLETE=true else diff --git a/tests/unit/test_circuit_breaker.bats b/tests/unit/test_circuit_breaker.bats new file mode 100644 index 0000000..b445a50 --- /dev/null +++ b/tests/unit/test_circuit_breaker.bats @@ -0,0 +1,190 @@ +#!/usr/bin/env bats +# Unit Tests for Circuit Breaker Research Awareness +# Tests that the circuit breaker detects SYNTHESIS.md as progress +# and stays CLOSED when research is complete. + +setup() { + # Create temporary test directory + TEST_DIR=$(mktemp -d) + export SMITHERS_DIR="$TEST_DIR/.smithers" + mkdir -p "$SMITHERS_DIR/evidence" + mkdir -p "$SMITHERS_DIR/phases" + mkdir -p "$SMITHERS_DIR/logs" + + # cd into the test dir so cwd-relative SYNTHESIS.md lookups + # in is_research_complete() don't pick up stale project root files + cd "$TEST_DIR" + + # Source the libraries + source "$(dirname "$BATS_TEST_DIRNAME")/../lib/date_utils.sh" + source "$(dirname "$BATS_TEST_DIRNAME")/../lib/phase_state_machine.sh" + source "$(dirname "$BATS_TEST_DIRNAME")/../lib/circuit_breaker.sh" + + # Update CB paths to use our test SMITHERS_DIR + export CB_STATE_FILE="$SMITHERS_DIR/.circuit_breaker_state" + export CB_HISTORY_FILE="$SMITHERS_DIR/.circuit_breaker_history" + + # Initialize empty evidence files + echo "[]" > "$SMITHERS_DIR/evidence/sources.json" + echo "[]" > "$SMITHERS_DIR/evidence/claims.json" + echo "[]" > "$SMITHERS_DIR/evidence/validations.json" + + # Initialize phases status + cat > "$SMITHERS_DIR/phases/status.json" << 'EOF' +{ + "current_phase": "SCOPE", + "phases": { + "SCOPE": {"status": "active", "started_at": null, "completed_at": null}, + "DISCOVER": {"status": "pending", "started_at": null, "completed_at": null}, + "DEEP_READ": {"status": "pending", "started_at": null, "completed_at": null}, + "VALIDATE": {"status": "pending", "started_at": null, "completed_at": null}, + "SYNTHESIZE": {"status": "pending", "started_at": null, "completed_at": null} + } +} +EOF + + # Initialize circuit breaker + init_circuit_breaker +} + +teardown() { + cd / + rm -rf "$TEST_DIR" +} + +# ============================================================================= +# SYNTHESIS.MD PROGRESS DETECTION TESTS +# ============================================================================= + +@test "record_loop_result detects SYNTHESIS.md creation as progress" { + # Create SYNTHESIS.md (simulates research completion artifact) + echo "# Research Synthesis" > "$SMITHERS_DIR/evidence/SYNTHESIS.md" + + # Record a loop with no file changes and no other progress signals + # files_changed=0, has_errors=false, output_length=100 + record_loop_result 1 0 false 100 + + # The no-progress counter should be 0 because SYNTHESIS.md was detected + local cnp=$(jq -r '.consecutive_no_progress' "$CB_STATE_FILE") + [[ "$cnp" -eq 0 ]] +} + +@test "record_loop_result does not detect SYNTHESIS.md progress on second call without update" { + # Create SYNTHESIS.md + echo "# Research Synthesis" > "$SMITHERS_DIR/evidence/SYNTHESIS.md" + + # First call - should detect SYNTHESIS.md and record its mtime + record_loop_result 1 0 false 100 + + # Second call without updating SYNTHESIS.md - should NOT count as progress + record_loop_result 2 0 false 100 + + # Now consecutive_no_progress should be 1 since SYNTHESIS.md mtime didn't change + local cnp=$(jq -r '.consecutive_no_progress' "$CB_STATE_FILE") + [[ "$cnp" -eq 1 ]] +} + +@test "record_loop_result detects SYNTHESIS.md update as new progress" { + # Create SYNTHESIS.md + echo "# Research Synthesis" > "$SMITHERS_DIR/evidence/SYNTHESIS.md" + + # First call detects initial creation + record_loop_result 1 0 false 100 + + # Wait a moment and update SYNTHESIS.md to get a new mtime + sleep 1 + echo "# Updated Synthesis" > "$SMITHERS_DIR/evidence/SYNTHESIS.md" + + # Second call should detect the update + record_loop_result 2 0 false 100 + + local cnp=$(jq -r '.consecutive_no_progress' "$CB_STATE_FILE") + [[ "$cnp" -eq 0 ]] +} + +@test "record_loop_result without SYNTHESIS.md increments no-progress counter" { + # No SYNTHESIS.md, no file changes, no other progress + record_loop_result 1 0 false 100 + + local cnp=$(jq -r '.consecutive_no_progress' "$CB_STATE_FILE") + [[ "$cnp" -eq 1 ]] +} + +# ============================================================================= +# CIRCUIT BREAKER STAYS CLOSED WHEN RESEARCH IS COMPLETE +# ============================================================================= + +@test "circuit breaker stays CLOSED after 4+ no-progress loops when research is complete" { + # Set up research as complete: SYNTHESIS.md with RESEARCH_COMPLETE marker + echo "# Synthesis +RESEARCH_COMPLETE +Final research output" > "$SMITHERS_DIR/evidence/SYNTHESIS.md" + + # Pre-set the synthesis mtime so the SYNTHESIS.md mtime check does NOT fire + # (we want to test the is_research_complete safety net, not the mtime check) + local synth_mtime + synth_mtime=$(stat -c %Y "$SMITHERS_DIR/evidence/SYNTHESIS.md" 2>/dev/null || \ + stat -f %m "$SMITHERS_DIR/evidence/SYNTHESIS.md" 2>/dev/null || echo 0) + echo "$synth_mtime" > "$SMITHERS_DIR/.last_synthesis_mtime" + + # Run 4 loops with no file changes (exceeds CB_NO_PROGRESS_THRESHOLD=3) + # Without the safety net, this would open the circuit breaker + for i in 1 2 3 4; do + record_loop_result "$i" 0 false 100 + done + + # Circuit breaker should still be CLOSED because research is complete + local state=$(jq -r '.state' "$CB_STATE_FILE") + [[ "$state" == "CLOSED" ]] +} + +@test "circuit breaker stays CLOSED with COMPLETE phase and no file changes" { + # Set phase to COMPLETE + jq '.current_phase = "COMPLETE"' "$SMITHERS_DIR/phases/status.json" > "$TEST_DIR/tmp.json" \ + && mv "$TEST_DIR/tmp.json" "$SMITHERS_DIR/phases/status.json" + + # Pre-set synthesis mtime to avoid mtime-based detection + # (no SYNTHESIS.md exists, so mtime check won't fire anyway) + + # Run 4 loops with no file changes + for i in 1 2 3 4; do + record_loop_result "$i" 0 false 100 + done + + # Circuit breaker should still be CLOSED + local state=$(jq -r '.state' "$CB_STATE_FILE") + [[ "$state" == "CLOSED" ]] +} + +@test "circuit breaker OPENS after 3+ no-progress loops when research is NOT complete" { + # Research is not complete (still in SCOPE, no SYNTHESIS.md) + # Run enough loops to trigger the circuit breaker + # record_loop_result returns exit code 1 when circuit opens, so use || true + for i in 1 2 3; do + record_loop_result "$i" 0 false 100 || true + done + + # Circuit breaker should be OPEN because no progress and research not complete + local state=$(jq -r '.state' "$CB_STATE_FILE") + [[ "$state" == "OPEN" ]] +} + +@test "consecutive_no_progress resets to 0 when research is complete" { + # Set up completed research + echo "# Synthesis +RESEARCH_COMPLETE" > "$SMITHERS_DIR/evidence/SYNTHESIS.md" + + # Pre-set synthesis mtime so mtime check doesn't fire + local synth_mtime + synth_mtime=$(stat -c %Y "$SMITHERS_DIR/evidence/SYNTHESIS.md" 2>/dev/null || \ + stat -f %m "$SMITHERS_DIR/evidence/SYNTHESIS.md" 2>/dev/null || echo 0) + echo "$synth_mtime" > "$SMITHERS_DIR/.last_synthesis_mtime" + + # Run loops with no progress + record_loop_result 1 0 false 100 + record_loop_result 2 0 false 100 + + # Check that consecutive_no_progress is 0 (reset by is_research_complete safety) + local cnp=$(jq -r '.consecutive_no_progress' "$CB_STATE_FILE") + [[ "$cnp" -eq 0 ]] +} diff --git a/tests/unit/test_exit_detection.bats b/tests/unit/test_exit_detection.bats index 3c2a6fa..10c9479 100644 --- a/tests/unit/test_exit_detection.bats +++ b/tests/unit/test_exit_detection.bats @@ -7,17 +7,17 @@ setup() { # Source helper functions source "$(dirname "$BATS_TEST_FILENAME")/../helpers/test_helper.bash" - # Set up environment with .ralph/ subfolder structure - export RALPH_DIR=".ralph" - export EXIT_SIGNALS_FILE="$RALPH_DIR/.exit_signals" - export RESPONSE_ANALYSIS_FILE="$RALPH_DIR/.response_analysis" + # Set up environment with .smithers/ subfolder structure + export SMITHERS_DIR=".smithers" + export EXIT_SIGNALS_FILE="$SMITHERS_DIR/.exit_signals" + export RESPONSE_ANALYSIS_FILE="$SMITHERS_DIR/.response_analysis" export MAX_CONSECUTIVE_TEST_LOOPS=3 export MAX_CONSECUTIVE_DONE_SIGNALS=2 # Create temp test directory - export TEST_TEMP_DIR="$(mktemp -d /tmp/ralph-test.XXXXXX)" + export TEST_TEMP_DIR="$(mktemp -d /tmp/smithers-test.XXXXXX)" cd "$TEST_TEMP_DIR" - mkdir -p "$RALPH_DIR" + mkdir -p "$SMITHERS_DIR" # Initialize exit signals file echo '{"test_only_loops": [], "done_signals": [], "completion_indicators": []}' > "$EXIT_SIGNALS_FILE" @@ -28,7 +28,7 @@ teardown() { rm -rf "$TEST_TEMP_DIR" } -# Helper function: should_exit_gracefully (extracted from ralph_loop.sh) +# Helper function: should_exit_gracefully (extracted from smithers_loop.sh) # Updated to respect EXIT_SIGNAL from .response_analysis for completion indicators should_exit_gracefully() { if [[ ! -f "$EXIT_SIGNALS_FILE" ]]; then @@ -61,7 +61,18 @@ should_exit_gracefully() { return 0 fi - # 3. Strong completion indicators (only if Claude's EXIT_SIGNAL is true) + # 3. Direct exit_signal from response analysis + # If Claude explicitly signaled EXIT_SIGNAL: true in its output, + # respect it as a completion signal. + if [[ -f "$RESPONSE_ANALYSIS_FILE" ]]; then + local direct_exit_signal=$(jq -r '.analysis.exit_signal // false' "$RESPONSE_ANALYSIS_FILE" 2>/dev/null || echo "false") + if [[ "$direct_exit_signal" == "true" ]]; then + echo "exit_signal" + return 0 + fi + fi + + # 4. Strong completion indicators (only if Claude's EXIT_SIGNAL is true) # This prevents premature exits when heuristics detect completion patterns # but Claude explicitly indicates work is still in progress local claude_exit_signal="false" @@ -74,10 +85,10 @@ should_exit_gracefully() { return 0 fi - # 4. Check fix_plan.md for completion - if [[ -f "$RALPH_DIR/fix_plan.md" ]]; then - local total_items=$(grep -c "^- \[" "$RALPH_DIR/fix_plan.md" 2>/dev/null) - local completed_items=$(grep -c "^- \[x\]" "$RALPH_DIR/fix_plan.md" 2>/dev/null) + # 5. Check fix_plan.md for completion + if [[ -f "$SMITHERS_DIR/fix_plan.md" ]]; then + local total_items=$(grep -c "^- \[" "$SMITHERS_DIR/fix_plan.md" 2>/dev/null) + local completed_items=$(grep -c "^- \[x\]" "$SMITHERS_DIR/fix_plan.md" 2>/dev/null) # Handle case where grep returns no matches (exit code 1) [[ -z "$total_items" ]] && total_items=0 @@ -150,10 +161,12 @@ should_exit_gracefully() { } # Test 8: Exit on completion indicators (2 indicators) with EXIT_SIGNAL=true -@test "should_exit_gracefully exits on 2 completion indicators" { +# Note: With the new direct exit_signal check (PRIORITY 3), exit_signal=true +# is now caught before the completion_indicators check. +@test "should_exit_gracefully exits on exit_signal with 2 completion indicators" { echo '{"test_only_loops": [], "done_signals": [], "completion_indicators": [1,2]}' > "$EXIT_SIGNALS_FILE" - # Must also have exit_signal=true in .response_analysis (after fix) + # exit_signal=true in .response_analysis triggers direct exit cat > "$RESPONSE_ANALYSIS_FILE" << 'EOF' { "loop_number": 2, @@ -165,7 +178,7 @@ should_exit_gracefully() { EOF result=$(should_exit_gracefully || true) - assert_equal "$result" "project_complete" + assert_equal "$result" "exit_signal" } # Test 9: No exit with only 1 completion indicator @@ -178,7 +191,7 @@ EOF # Test 10: Exit when fix_plan.md all items complete @test "should_exit_gracefully exits when all fix_plan items complete" { - cat > "$RALPH_DIR/fix_plan.md" << 'EOF' + cat > "$SMITHERS_DIR/fix_plan.md" << 'EOF' # Fix Plan - [x] Task 1 - [x] Task 2 @@ -191,7 +204,7 @@ EOF # Test 11: No exit when fix_plan.md partially complete @test "should_exit_gracefully continues when fix_plan partially complete" { - cat > "$RALPH_DIR/fix_plan.md" << 'EOF' + cat > "$SMITHERS_DIR/fix_plan.md" << 'EOF' # Fix Plan - [x] Task 1 - [ ] Task 2 @@ -238,7 +251,7 @@ EOF # Test 16: fix_plan.md with no checkboxes @test "should_exit_gracefully handles fix_plan with no checkboxes" { - cat > "$RALPH_DIR/fix_plan.md" << 'EOF' + cat > "$SMITHERS_DIR/fix_plan.md" << 'EOF' # Fix Plan This is just text, no tasks yet. EOF @@ -249,7 +262,7 @@ EOF # Test 17: fix_plan.md with mixed checkbox formats @test "should_exit_gracefully handles mixed checkbox formats" { - cat > "$RALPH_DIR/fix_plan.md" << 'EOF' + cat > "$SMITHERS_DIR/fix_plan.md" << 'EOF' # Fix Plan - [x] Task 1 completed - [ ] Task 2 pending @@ -325,8 +338,8 @@ EOF assert_equal "$result" "" } -# Test 22: Completion indicators with EXIT_SIGNAL=true should exit -@test "should_exit_gracefully exits when completion indicators high AND EXIT_SIGNAL=true" { +# Test 22: EXIT_SIGNAL=true triggers direct exit_signal before completion indicators +@test "should_exit_gracefully returns exit_signal when EXIT_SIGNAL=true" { # Setup: High completion indicators echo '{"test_only_loops": [], "done_signals": [], "completion_indicators": [1,2]}' > "$EXIT_SIGNALS_FILE" @@ -351,8 +364,8 @@ EOF EOF result=$(should_exit_gracefully) - # Should exit because BOTH conditions are met - assert_equal "$result" "project_complete" + # exit_signal=true is caught by PRIORITY 3 (direct exit_signal check) + assert_equal "$result" "exit_signal" } # Test 23: Completion indicators without .response_analysis file should continue @@ -381,8 +394,8 @@ EOF assert_equal "$result" "" } -# Test 25: EXIT_SIGNAL=true but completion indicators below threshold should continue -@test "should_exit_gracefully continues when EXIT_SIGNAL=true but indicators below threshold" { +# Test 25: EXIT_SIGNAL=true triggers exit_signal regardless of completion indicators count +@test "should_exit_gracefully returns exit_signal when EXIT_SIGNAL=true even with low indicators" { # Setup: Only 1 completion indicator (below threshold of 2) echo '{"test_only_loops": [], "done_signals": [], "completion_indicators": [1]}' > "$EXIT_SIGNALS_FILE" @@ -398,8 +411,8 @@ EOF EOF result=$(should_exit_gracefully || true) - # Should NOT exit because indicators below threshold - assert_equal "$result" "" + # exit_signal=true is caught by PRIORITY 3 (direct exit_signal check) + assert_equal "$result" "exit_signal" } # Test 26: EXIT_SIGNAL=false with explicit false value in JSON @@ -520,6 +533,50 @@ EOF assert_equal "$result" "" } +# ============================================================================= +# DIRECT EXIT_SIGNAL TESTS (PRIORITY 3: exit_signal from response analysis) +# ============================================================================= +# These tests verify that should_exit_gracefully returns "exit_signal" when +# .response_analysis has exit_signal=true, and continues when exit_signal=false. + +# Test 32a: should_exit_gracefully returns "exit_signal" when exit_signal=true +@test "should_exit_gracefully returns exit_signal when response_analysis has exit_signal=true" { + echo '{"test_only_loops": [], "done_signals": [], "completion_indicators": []}' > "$EXIT_SIGNALS_FILE" + + cat > "$RESPONSE_ANALYSIS_FILE" << 'EOF' +{ + "loop_number": 5, + "analysis": { + "exit_signal": true, + "confidence_score": 100, + "work_summary": "Research complete" + } +} +EOF + + result=$(should_exit_gracefully) + assert_equal "$result" "exit_signal" +} + +# Test 32b: should_exit_gracefully continues when exit_signal=false +@test "should_exit_gracefully continues when response_analysis has exit_signal=false" { + echo '{"test_only_loops": [], "done_signals": [], "completion_indicators": []}' > "$EXIT_SIGNALS_FILE" + + cat > "$RESPONSE_ANALYSIS_FILE" << 'EOF' +{ + "loop_number": 3, + "analysis": { + "exit_signal": false, + "confidence_score": 70, + "work_summary": "Still working" + } +} +EOF + + result=$(should_exit_gracefully || true) + assert_equal "$result" "" +} + # ============================================================================= # UPDATE_EXIT_SIGNALS TESTS (Issue: Confidence-based completion indicators) # ============================================================================= @@ -703,7 +760,7 @@ EOF # ============================================================================= # PERMISSION DENIAL EXIT TESTS (Issue #101) # ============================================================================= -# When Claude Code is denied permission to run commands, Ralph should detect +# When Claude Code is denied permission to run commands, Smithers should detect # this from the permission_denials field and halt the loop to allow user intervention. # Helper function with permission denial support diff --git a/tests/unit/test_phase_state_machine.bats b/tests/unit/test_phase_state_machine.bats index e46f46b..2e62755 100644 --- a/tests/unit/test_phase_state_machine.bats +++ b/tests/unit/test_phase_state_machine.bats @@ -9,7 +9,10 @@ setup() { export SMITHERS_DIR="$TEST_DIR/.smithers" mkdir -p "$SMITHERS_DIR/evidence" mkdir -p "$SMITHERS_DIR/phases" - + + # Run from TEST_DIR so relative "SYNTHESIS.md" resolves inside temp dir + cd "$TEST_DIR" + # Source the libraries source "$(dirname "$BATS_TEST_DIRNAME")/../lib/date_utils.sh" source "$(dirname "$BATS_TEST_DIRNAME")/../lib/phase_state_machine.sh" @@ -272,9 +275,65 @@ Some research content but no completion marker # Setup echo '{"current_phase": "SYNTHESIZE"}' > "$SMITHERS_DIR/phases/status.json" rm -f "$SMITHERS_DIR/evidence/SYNTHESIS.md" - + # Test - should return false even in SYNTHESIZE phase run is_research_complete "$SMITHERS_DIR/phases/status.json" "$SMITHERS_DIR/evidence" [ "$status" -eq 0 ] [ "$output" = "false" ] } + +# ============================================================================= +# CASE-INSENSITIVE RESEARCH_COMPLETE MARKER TESTS +# ============================================================================= + +@test "is_research_complete returns true with mixed-case Research_Complete marker" { + echo '{"current_phase": "DISCOVER"}' > "$SMITHERS_DIR/phases/status.json" + echo "# Synthesis +Some research content here +**Research_Complete** +" > "$SMITHERS_DIR/evidence/SYNTHESIS.md" + + run is_research_complete "$SMITHERS_DIR/phases/status.json" "$SMITHERS_DIR/evidence" + [ "$status" -eq 0 ] + [ "$output" = "true" ] +} + +@test "is_research_complete returns true with lowercase research_complete marker" { + echo '{"current_phase": "DISCOVER"}' > "$SMITHERS_DIR/phases/status.json" + echo "# Synthesis +Some research content here +research_complete +" > "$SMITHERS_DIR/evidence/SYNTHESIS.md" + + run is_research_complete "$SMITHERS_DIR/phases/status.json" "$SMITHERS_DIR/evidence" + [ "$status" -eq 0 ] + [ "$output" = "true" ] +} + +# ============================================================================= +# MULTI-LOCATION SYNTHESIS.md SEARCH TESTS +# ============================================================================= + +@test "is_research_complete returns true when SYNTHESIS.md is in parent dir (.smithers/)" { + echo '{"current_phase": "DISCOVER"}' > "$SMITHERS_DIR/phases/status.json" + # Place SYNTHESIS.md in .smithers/ (parent of .smithers/evidence/) + echo "# Synthesis +RESEARCH_COMPLETE +" > "$SMITHERS_DIR/SYNTHESIS.md" + + run is_research_complete "$SMITHERS_DIR/phases/status.json" "$SMITHERS_DIR/evidence" + [ "$status" -eq 0 ] + [ "$output" = "true" ] +} + +@test "is_research_complete returns true when SYNTHESIS.md is at project root" { + echo '{"current_phase": "DISCOVER"}' > "$SMITHERS_DIR/phases/status.json" + # Place SYNTHESIS.md at project root (cwd) + echo "# Synthesis +RESEARCH_COMPLETE +" > "SYNTHESIS.md" + + run is_research_complete "$SMITHERS_DIR/phases/status.json" "$SMITHERS_DIR/evidence" + [ "$status" -eq 0 ] + [ "$output" = "true" ] +}