Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions agent_fox/archetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class ArchetypeEntry:
"pre-review": ModeConfig(
injection="auto_pre",
allowlist=[], # no shell access
retry_predecessor=True,
),
"drift-review": ModeConfig(
injection="auto_pre",
Expand Down
2 changes: 1 addition & 1 deletion agent_fox/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ class ReviewerConfig(BaseModel):
model_config = ConfigDict(extra="ignore")

pre_review_block_threshold: Annotated[int, Clamped(ge=0)] = Field(
default=3,
default=1,
description="Finding count to block merge for pre-review mode",
)
drift_review_block_threshold: int | None = Field(
Expand Down
5 changes: 3 additions & 2 deletions agent_fox/engine/blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ def evaluate_review_blocking(

parsed = parse_node_id(record.node_id)
spec_name = parsed.spec_name
task_group = str(parsed.group_number) if parsed.group_number else "1"
# Group-0 nodes are auto_pre reviewers; the first coder group is always 1
task_group = "1" if parsed.group_number == 0 else str(parsed.group_number)
coder_node_id = f"{spec_name}:{task_group}"

# Display label for log messages
Expand Down Expand Up @@ -182,7 +183,7 @@ def evaluate_review_blocking(
return BlockDecision(should_block=False)
configured_threshold = rc.drift_review_block_threshold

blocked = critical_count > configured_threshold
blocked = critical_count >= configured_threshold

if blocked:
reason = _format_block_reason(
Expand Down
93 changes: 91 additions & 2 deletions agent_fox/engine/result_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,99 @@ def check_skeptic_blocking(
sink=self._sink,
run_id=self._run_id,
)
if decision.should_block:
self._block_task(decision.coder_node_id, state, decision.reason)
if not decision.should_block:
return False

node_archetype = self._get_node_archetype(record.node_id)
node_mode = self._get_node_mode(record.node_id)
archetype_entry = get_archetype(node_archetype)
if node_mode is not None:
from agent_fox.archetypes import resolve_effective_config

archetype_entry = resolve_effective_config(archetype_entry, node_mode)

if archetype_entry.retry_predecessor:
return self._retry_on_review_block(record, decision, state)

self._block_task(decision.coder_node_id, state, decision.reason)
self._generate_errata(record)
return True

def _retry_on_review_block(
self,
record: SessionRecord,
decision: Any,
state: ExecutionState,
) -> bool:
"""Convert a review block to a coder retry when retry_predecessor is set.

Instead of permanently blocking the coder, lets it proceed with review
findings injected as context. Uses the coder's escalation ladder to
prevent infinite retries; permanently blocks when the ladder is exhausted.

Returns True if the coder was permanently blocked (ladder exhausted),
False if converted to a retry.
"""
from agent_fox.routing.escalation import EscalationLadder

coder_node_id = decision.coder_node_id

pred_ladder = self._routing_ladders.get(coder_node_id)
if pred_ladder is None:
coder_archetype = self._get_node_archetype(coder_node_id)
coder_entry = get_archetype(coder_archetype)
pred_ladder = EscalationLadder(
starting_tier=ModelTier(coder_entry.default_model_tier),
tier_ceiling=ModelTier.ADVANCED,
retries_before_escalation=self._retries_before_escalation,
)
self._routing_ladders[coder_node_id] = pred_ladder

pred_ladder.record_failure()

if pred_ladder.is_exhausted:
logger.warning(
"Review retry-predecessor exhausted for %s, permanently blocking",
coder_node_id,
)
self._block_task(coder_node_id, state, decision.reason)
self._generate_errata(record)
return True

logger.info(
"Review blocking converted to retry for %s (findings injected as context)",
coder_node_id,
)
coder_status = self._graph_sync.node_states.get(coder_node_id)
if coder_status == "completed":
self._graph_sync.node_states[coder_node_id] = "pending"
self._graph_sync.node_states[record.node_id] = "pending"

emit_audit_event(
self._sink,
self._run_id,
AuditEventType.TASK_STATUS_CHANGE,
node_id=record.node_id,
payload={
"from_status": "completed",
"to_status": "retry_predecessor",
"reason": decision.reason,
"coder_node_id": coder_node_id,
},
)

if self._task_callback is not None:
self._task_callback(
TaskEvent(
node_id=record.node_id,
status="disagreed",
duration_s=0,
archetype=self._get_node_archetype(record.node_id),
predecessor_node=coder_node_id,
)
)

self._generate_errata(record)
return False

def _generate_errata(self, record: SessionRecord) -> None:
Expand Down
4 changes: 2 additions & 2 deletions agent_fox/session/convergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def converge_skeptic(
if severity == "critical" and count >= majority_threshold:
majority_critical_count += 1

blocked = majority_critical_count > block_threshold
blocked = majority_critical_count > 0 and majority_critical_count >= block_threshold

return merged, blocked

Expand Down Expand Up @@ -191,7 +191,7 @@ def converge_skeptic_records(
if severity == "critical" and count >= majority_threshold:
majority_critical_count += 1

blocked = majority_critical_count > block_threshold
blocked = majority_critical_count > 0 and majority_critical_count >= block_threshold

return merged, blocked

Expand Down
4 changes: 2 additions & 2 deletions tests/unit/core/test_reviewer_consolidation.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,8 @@ def test_reviewer_config(self) -> None:
from agent_fox.core.config import ReviewerConfig

rc = ReviewerConfig()
assert rc.pre_review_block_threshold == 3, (
f"pre_review_block_threshold should be 3, got {rc.pre_review_block_threshold}"
assert rc.pre_review_block_threshold == 1, (
f"pre_review_block_threshold should be 1, got {rc.pre_review_block_threshold}"
)
assert rc.drift_review_block_threshold is None, (
f"drift_review_block_threshold should be None (advisory), got {rc.drift_review_block_threshold}"
Expand Down
50 changes: 39 additions & 11 deletions tests/unit/engine/test_block_budget.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,22 +204,50 @@ class TestSkepticBlocking:
async def test_reviewer_blocks_coder_on_critical_findings(
self,
) -> None:
"""When reviewer:pre-review finds criticals above threshold, coder is blocked."""
db_conn = _chain_with_reviewer()
"""When skeptic finds criticals above threshold, coder is blocked.

Uses the legacy skeptic archetype (no retry_predecessor) to test
permanent blocking. Pre-review mode now has retry_predecessor=True
and converts blocks to retries instead.
"""
db_conn = write_plan_to_db(
nodes={
"spec_a:1:skeptic": {
"title": "Skeptic Review",
"spec_name": "spec_a",
"group_number": 1,
"archetype": "skeptic",
},
"spec_a:1": {
"title": "Implement spec_a",
"spec_name": "spec_a",
"group_number": 1,
"archetype": "coder",
},
"spec_a:1:verifier": {
"title": "Verify spec_a",
"spec_name": "spec_a",
"group_number": 1,
"archetype": "verifier",
},
},
edges=[
{"source": "spec_a:1", "target": "spec_a:1:skeptic", "kind": "intra_spec"},
{"source": "spec_a:1", "target": "spec_a:1:verifier", "kind": "intra_spec"},
],
order=["spec_a:1", "spec_a:1:skeptic", "spec_a:1:verifier"],
)

runner = MockSessionRunner()
runner.configure(
"spec_a:0:reviewer:pre-review",
[
MockSessionOutcome(
"spec_a:0:reviewer:pre-review",
"completed",
archetype="reviewer",
)
],
"spec_a:1",
[MockSessionOutcome("spec_a:1", "completed", archetype="coder")],
)
runner.configure(
"spec_a:1:skeptic",
[MockSessionOutcome("spec_a:1:skeptic", "completed", archetype="skeptic")],
)

# Mock review findings with 4 criticals (threshold default is 3)
mock_findings = []
for i in range(4):
finding = MagicMock()
Expand Down
Loading