From 101f3afb409a5b9a56410cc476e8d47e19bbe57a Mon Sep 17 00:00:00 2001 From: Nirvana Tikku <26783+nirvanatikku@users.noreply.github.com> Date: Tue, 24 Mar 2026 19:27:05 -0400 Subject: [PATCH] Add rfc26 - clarify intent suspension in containers and human retry config --- CHANGELOG.md | 77 + docs/changelog.md | 217 +- docs/guide/human-in-the-loop.md | 369 ++++ docs/overrides/home.html | 15 +- docs/rfcs/0002-intent-graphs.md | 19 +- docs/rfcs/0007-intent-portfolios.md | 30 + docs/rfcs/0010-retry-policies.md | 1 + docs/rfcs/0012-task-decomposition-planning.md | 35 +- docs/rfcs/0024-workflow-io-contracts.md | 691 +++++++ docs/rfcs/0025-human-in-the-loop.md | 316 +++ .../0026-suspension-container-interaction.md | 426 ++++ docs/spec/workflow-yaml.md | 140 +- mcp-server/package.json | 2 +- mcp-server/src/index.ts | 2 +- mkdocs.yml | 14 +- openintent/__init__.py | 45 +- openintent/adapters/openai_adapter.py | 11 + openintent/agents.py | 1090 +++++++++- openintent/client.py | 15 + openintent/exceptions.py | 28 + openintent/models.py | 434 ++++ openintent/server/app.py | 382 ++++ openintent/server/config.py | 10 +- openintent/workflow.py | 585 +++++- pyproject.toml | 2 +- tests/test_adapters.py | 96 + tests/test_hitl.py | 1785 +++++++++++++++++ tests/test_workflow_io.py | 1591 +++++++++++++++ 28 files changed, 8325 insertions(+), 103 deletions(-) create mode 100644 docs/guide/human-in-the-loop.md create mode 100644 docs/rfcs/0024-workflow-io-contracts.md create mode 100644 docs/rfcs/0025-human-in-the-loop.md create mode 100644 docs/rfcs/0026-suspension-container-interaction.md create mode 100644 tests/test_hitl.py create mode 100644 tests/test_workflow_io.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e9b3a7..2cbe262 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,83 @@ All notable changes to the OpenIntent SDK will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.17.0] - 2026-03-24 + +### Added + +- **RFC-0026: Suspension Propagation & Retry** — Closes three gaps left by RFC-0025. + + - **Container propagation rules (5 normative rules)** — Intent graph: suspended child blocks dependents; parent aggregate status becomes `suspended_awaiting_input` when any child is suspended. Portfolio: aggregate gains `has_suspended_members`/`suspended_member_count`; `portfolio.member_suspended` / `portfolio.member_resumed` events. Plan/Task: bidirectional mirror — when an intent suspends, its task transitions to `blocked` with `blocked_reason: "intent_suspended"` and `suspended_intent_id`; plan progress gains `suspended_tasks`. Workflow: downstream phases receive `UpstreamIntentSuspendedError` at claim time; workflow progress gains `suspended_phases`. Deadline: suspension deadline governs expiry and MUST NOT exceed portfolio `due_before` constraint. + - **`HumanRetryPolicy` dataclass** — Re-notification and escalation policy with `max_attempts`, `interval_seconds`, `strategy` (`"fixed"` / `"linear"` / `"exponential"`), `escalation_ladder` (list of `EscalationStep`), and `final_fallback_policy`. Serialises to/from dict via `to_dict()` / `from_dict()`. + - **`EscalationStep` dataclass** — A single escalation step with `attempt`, `channel_hint`, and `notify_to`. Triggers `intent.suspension_escalated` event. Backwards-compatible aliases: `after_attempt`, `channel`, `notify`. + - **`UpstreamIntentSuspendedError`** — Raised by `WorkflowSpec.validate_claim_inputs()` when a declared input references an upstream phase whose intent is currently `suspended_awaiting_input`. Carries `task_id`, `phase_name`, `suspended_intent_id`, `expected_resume_at`. Subclass of `WorkflowError`. + - **`BaseAgent.default_human_retry_policy`** — Class-level or instance-level attribute (`None` by default). Applied to all `request_input()` calls that do not supply an explicit `retry_policy` argument. + - **`request_input(retry_policy=…)` parameter** — Accepts a `HumanRetryPolicy`. When supplied, the SDK re-fires `@on_input_requested` hooks on each attempt, emits `intent.suspension_renotified` per attempt, emits `intent.suspension_escalated` for escalation steps, and applies `final_fallback_policy` after all attempts are exhausted. + - **Three-level policy cascade** — call-site policy → `default_human_retry_policy` on the agent → server-configured platform default (`GET /api/v1/server/config` → `suspension.default_retry_policy`). + - **Four new `EventType` constants** — `intent.suspension_renotified`, `intent.suspension_escalated`, `portfolio.member_suspended`, `portfolio.member_resumed`. + - **`retry_policy` field on `SuspensionRecord`** — Optional; additive; existing single-attempt behaviour preserved when absent. + - **RFC-0026 protocol document** — `docs/rfcs/0026-suspension-container-interaction.md` with five container rules, `HumanRetryPolicy` schema, three-level cascade, coordinator policy extension, RFC-0010 relationship note, end-to-end example, and cross-RFC patch summary. + - **Cross-RFC patches** — RFC-0002 (aggregate `suspended_awaiting_input` status counter and completion-gate clarification), RFC-0007 (portfolio suspension-aware aggregate and events), RFC-0010 (RFC-0026 relationship note), RFC-0012 (task/intent suspension mirror, `suspended_tasks` plan progress field), RFC-0024 (`UpstreamIntentSuspendedError`, `suspended_phases` workflow progress field), RFC-0025 (`retry_policy` on `SuspensionRecord`, per-attempt `timeout_seconds` semantics, `fallback_policy` alias note, extended cross-RFC table). + - **41 new tests** — `tests/test_hitl.py` (`HumanRetryPolicy`, `EscalationStep`, `SuspensionRecord.retry_policy`, new `EventType` constants, `request_input` signature, `BaseAgent.default_human_retry_policy`, package exports) and `tests/test_workflow_io.py` (`UpstreamIntentSuspendedError` construction, attributes, hierarchy, package export). + - **Package exports** — `HumanRetryPolicy`, `EscalationStep`, `UpstreamIntentSuspendedError` exported from `openintent` top-level. + +### Updated + +- `SuspensionRecord.fallback_policy` documented as alias for `retry_policy.final_fallback_policy` when `retry_policy` is set. +- `SuspensionRecord.timeout_seconds` documented as per-attempt window when `retry_policy` is set. +- `BaseAgent.request_input()` docstring updated to describe `retry_policy` and `default_human_retry_policy`. +- All version references updated to 0.17.0 across Python SDK, MCP server, documentation, and changelog. + +--- + +## [0.16.0] - 2026-03-23 + +### Added + +- **RFC-0024: Workflow I/O Contracts** — Typed input/output contracts at the task and phase level, with executor-owned wiring (resolves RFC-0012 Open Question #4). + + - **`outputs` schema on `PhaseConfig`** — Each phase can declare `outputs` as a mapping from key name to type (`string`, `number`, `boolean`, `object`, `array`, a named type from the `types` block, or an inline `{type, required}` dict for optional fields). Legacy list-of-strings form is normalised automatically. + - **`inputs` wiring on `PhaseConfig`** — Each phase can declare `inputs` as a mapping from local key name to a mapping expression: `phase_name.key`, `$trigger.key`, or `$initial_state.key`. The executor resolves these before invoking the agent handler and places the resolved dict in `ctx.input`. + - **Parse-time validation** — `WorkflowSpec._validate_io_wiring()` is called on every `from_yaml()` / `from_string()`. Checks that every `phase_name.key` reference names a phase in `depends_on`, that the phase exists, and that the key appears in the upstream phase's declared outputs (if it has any). Raises `InputWiringError` on failure. + - **`WorkflowSpec.resolve_task_inputs()`** — Executor pre-handoff step. Pre-populates `ctx.input` from upstream phase outputs, trigger payload, or initial state. Raises `UnresolvableInputError` if any declared input cannot be resolved. + - **`WorkflowSpec.validate_claim_inputs()`** — Executor claim-time gate. Rejects a task claim early if declared inputs are not yet resolvable from completed upstream phases. + - **`WorkflowSpec.validate_task_outputs()`** — Executor completion-time gate. Validates the agent's return dict against declared `outputs`. Raises `MissingOutputError` for absent required keys; raises `OutputTypeMismatchError` for type mismatches. Supports primitive types, named struct types (top-level key presence), and enum types (`{enum: [...]}`). + - **`MissingOutputError`** — Raised when one or more required output keys are absent. Carries `task_id`, `phase_name`, `missing_keys`. + - **`OutputTypeMismatchError`** — Raised when an output value does not match its declared type. Carries `task_id`, `phase_name`, `key`, `expected_type`, `actual_type`. + - **`UnresolvableInputError`** — Raised when one or more declared inputs cannot be resolved from available upstream outputs. Carries `task_id`, `phase_name`, `unresolvable_refs`. + - **`InputWiringError`** — Raised at parse time when an input mapping expression is structurally invalid. Subclass of `WorkflowValidationError`. Carries `phase_name`, `invalid_refs`, `suggestion`. + - **`types` block in YAML** — Top-level `types:` map defines named struct and enum types reusable across output schemas. Persisted into `intent.state._io_types` so agent-side validation works without a `WorkflowSpec` reference at runtime. + - **Incremental adoption** — Phases without `outputs` or `inputs` are fully unaffected. Parse-time and runtime validation only applies to phases that declare contracts. + - **Package exports** — `MissingOutputError`, `OutputTypeMismatchError`, `UnresolvableInputError`, `InputWiringError` all exported from `openintent` top-level. + - **RFC-0024 protocol document** — `docs/rfcs/0024-workflow-io-contracts.md` covering output schema declaration, input wiring, executor semantics, named error types, `TaskContext` API, parse-time validation, incremental adoption, and a complete example. + +- **RFC-0025: Human-in-the-Loop Intent Suspension** — First-class protocol primitive for suspending an intent mid-execution and awaiting operator input before proceeding. + + - **`suspended_awaiting_input` lifecycle state** — New `IntentStatus` value. Reaper and lease-expiry workers skip intents in this state; lease renewal succeeds for suspended intents so agents retain ownership across the suspension period. + - **Four new `EventType` constants** — `intent.suspended`, `intent.resumed`, `intent.suspension_expired`, `engagement.decision`. All events are stored in the intent event log and emitted via the SSE bus. + - **`ResponseType` enum** — `choice`, `confirm`, `text`, `form` — specifies what kind of response is expected from the operator. Agents set this when calling `request_input()`. + - **`SuspensionChoice` dataclass** — A single operator-facing option with `value` (machine-readable), `label` (human-readable), optional `description`, `style` hint (`"primary"` / `"danger"` / `"default"`), and arbitrary `metadata`. Channels render these as buttons, dropdowns, or radio options. + - **`SuspensionRecord` dataclass** — Captures the full context of a suspension: question, `response_type`, list of `SuspensionChoice` objects, structured context, channel hint, timeout, fallback policy, confidence at suspension time, and response metadata. Stored in `intent.state._suspension`. Includes `valid_values()` helper that returns the allowed values for `choice`/`confirm` types. + - **`EngagementSignals` dataclass** — Carries `confidence`, `risk`, and `reversibility` scores (all float [0, 1]) used by the engagement-decision engine. + - **`EngagementDecision` dataclass** — Output of `should_request_input()`. Has `mode` (one of `autonomous`, `request_input`, `require_input`, `defer`), `should_ask` bool, `rationale` string, and embedded `EngagementSignals`. + - **`InputResponse` dataclass** — Represents an operator's response: `suspension_id`, `value`, `responded_by`, `responded_at`, and optional `metadata`. + - **`InputTimeoutError` exception** — Raised when `fallback_policy="fail"` and the suspension expires. Carries `suspension_id` and `fallback_policy` attributes. + - **`InputCancelledError` exception** — Raised when a suspension is explicitly cancelled. Carries `suspension_id`. + - **`BaseAgent.request_input()`** — Suspends the intent, fires `@on_input_requested` hooks, polls `intent.state._suspension.resolution` every 2 seconds, and returns the operator's response value. Accepts `response_type` and `choices` parameters; for `confirm` type auto-populates yes/no choices if none are supplied. Also supports `timeout_seconds`, `fallback_policy`, `fallback_value`, `channel_hint`, and `confidence`. + - **`BaseAgent.should_request_input()`** — Implements the default rule-based engagement-decision logic (high-confidence/low-risk → autonomous; moderate uncertainty → request_input; low confidence or high risk → require_input; extreme risk/irreversibility → defer). Emits `engagement.decision` event and fires `@on_engagement_decision` hooks. + - **Four new lifecycle decorators** — `@on_input_requested`, `@on_input_received`, `@on_suspension_expired`, `@on_engagement_decision`. Auto-discovered by `_discover_handlers()` and routed via `_on_generic_event()`. + - **`POST /api/v1/intents/{id}/suspend/respond`** — REST endpoint for operators (or bots) to submit a response. Validates `suspension_id` matches the active suspension, validates response value against defined choices (for `choice`/`confirm` types — returns 422 with `valid_choices` on mismatch), patches `state._suspension` with the response, transitions the intent to `active`, emits `intent.resumed`, and broadcasts via SSE. Response includes `choice_label` and `choice_description` for matched choices. + - **RFC-0025 protocol document** — `docs/rfcs/0025-human-in-the-loop.md` covering lifecycle state, event types, SuspensionRecord schema, fallback policies, engagement decision modes, REST endpoint, and security considerations. + - **HITL user guide** — `docs/guide/human-in-the-loop.md` with quick-start, `request_input()` reference, fallback policies, engagement decisions, decorator reference, operator response example, exception reference, and a full refund-agent example. + - **62 new tests** — `tests/test_hitl.py` covers all new models, exceptions, decorators, engagement-decision modes, and the suspend/respond endpoint. + +### Updated + +- `mkdocs.yml` — RFC-0024 and RFC-0025 entries added to the RFCs nav; Human-in-the-Loop guide added to the User Guide nav; announcement bar updated to v0.16.0. +- All version references updated to 0.16.0 across Python SDK and changelog. + +--- + ## [0.15.1] - 2026-03-06 ### Changed diff --git a/docs/changelog.md b/docs/changelog.md index 93f90c1..84e85ae 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -5,6 +5,82 @@ All notable changes to the OpenIntent SDK will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.17.0] - 2026-03-24 + +### Added + +- **RFC-0026: Suspension Propagation & Retry** — Closes three gaps left by RFC-0025. + + - **Container propagation rules (5 normative rules)** — Intent graph: suspended child blocks dependents; parent aggregate status becomes `suspended_awaiting_input` when any child is suspended. Portfolio: aggregate gains `has_suspended_members`/`suspended_member_count`; `portfolio.member_suspended` / `portfolio.member_resumed` events. Plan/Task: bidirectional mirror — when an intent suspends, its task transitions to `blocked` with `blocked_reason: "intent_suspended"` and `suspended_intent_id`; plan progress gains `suspended_tasks`. Workflow: downstream phases receive `UpstreamIntentSuspendedError` at claim time; workflow progress gains `suspended_phases`. Deadline: suspension deadline governs expiry and MUST NOT exceed portfolio `due_before` constraint. + - **`HumanRetryPolicy` dataclass** — Re-notification and escalation policy with `max_attempts`, `interval_seconds`, `strategy` ("fixed"/"linear"/"exponential"), `escalation_ladder` (list of `EscalationStep`), and `final_fallback_policy`. Serialises to/from dict via `to_dict()` / `from_dict()`. + - **`EscalationStep` dataclass** — A single escalation step: `after_attempt`, `channel`, `notify`. Triggers `intent.suspension_escalated` event. + - **`UpstreamIntentSuspendedError`** — Raised by `WorkflowSpec.validate_claim_inputs()` when a declared input references an upstream phase whose intent is currently `suspended_awaiting_input`. Carries `task_id`, `phase_name`, `suspended_intent_id`, `expected_resume_at`. Subclass of `WorkflowError`. + - **`BaseAgent.default_human_retry_policy`** — Class-level or instance-level attribute (`None` by default). Applied to all `request_input()` calls that do not supply an explicit `retry_policy` argument. + - **`request_input(retry_policy=…)` parameter** — Accepts a `HumanRetryPolicy`. When supplied, the SDK re-fires `@on_input_requested` hooks on each attempt, emits `intent.suspension_renotified` per attempt, emits `intent.suspension_escalated` for escalation steps, and applies `final_fallback_policy` after all attempts are exhausted. + - **Three-level policy cascade** — call-site policy → `default_human_retry_policy` on the agent → server-configured platform default. + - **Four new `EventType` constants** — `intent.suspension_renotified`, `intent.suspension_escalated`, `portfolio.member_suspended`, `portfolio.member_resumed`. + - **`retry_policy` field on `SuspensionRecord`** — Optional; additive; existing single-attempt behaviour preserved when absent. + - **RFC-0026 protocol document** — `docs/rfcs/0026-suspension-container-interaction.md` with five container rules, HumanRetryPolicy schema, three-level cascade, coordinator policy, RFC-0010 relationship, end-to-end example, and cross-RFC patch summary. + - **Cross-RFC patches** — RFC-0002 (aggregate `suspended_awaiting_input` status), RFC-0007 (portfolio suspension-aware aggregate), RFC-0010 (RFC-0026 relationship note), RFC-0012 (task/intent suspension mirror, `suspended_tasks`), RFC-0024 (`UpstreamIntentSuspendedError`, `suspended_phases`), RFC-0025 (`retry_policy` on `SuspensionRecord`, `timeout_seconds` per-attempt semantics, `fallback_policy` alias note, Cross-RFC table). + - **41 new tests** — `tests/test_hitl.py` (HumanRetryPolicy, EscalationStep, SuspensionRecord.retry_policy, EventType, request_input signature, BaseAgent.default_human_retry_policy, package exports) and `tests/test_workflow_io.py` (UpstreamIntentSuspendedError construction, attributes, hierarchy, package export). + - **Package exports** — `HumanRetryPolicy`, `EscalationStep`, `UpstreamIntentSuspendedError` exported from `openintent` top-level. + +### Updated + +- `SuspensionRecord.fallback_policy` documented as alias for `retry_policy.final_fallback_policy` when retry_policy is set. +- `SuspensionRecord.timeout_seconds` documented as per-attempt window when `retry_policy` is set. +- `BaseAgent.request_input()` docstring updated to describe retry_policy and default_human_retry_policy. + +--- + +## [0.16.0] - 2026-03-23 + +### Added + +- **RFC-0024: Workflow I/O Contracts** — Typed input/output contracts at the task and phase level, with executor-owned wiring (resolves RFC-0012 Open Question #4). + + - **`outputs` schema on `PhaseConfig`** — Each phase can declare `outputs` as a mapping from key name to type (`string`, `number`, `boolean`, `object`, `array`, a named type from the `types` block, or an inline `{type, required}` dict for optional fields). Legacy list-of-strings form is normalised automatically. + - **`inputs` wiring on `PhaseConfig`** — Each phase can declare `inputs` as a mapping from local key name to a mapping expression: `phase_name.key`, `$trigger.key`, or `$initial_state.key`. The executor resolves these before invoking the agent handler and places the resolved dict in `ctx.input`. + - **Parse-time validation** — `WorkflowSpec._validate_io_wiring()` is called on every `from_yaml()` / `from_string()`. Checks that every `phase_name.key` reference names a phase in `depends_on`, that the phase exists, and that the key appears in the upstream phase's declared outputs (if it has any). Raises `InputWiringError` on failure. + - **`WorkflowSpec.resolve_task_inputs()`** — Executor pre-handoff step. Pre-populates `ctx.input` from upstream phase outputs, trigger payload, or initial state. Raises `UnresolvableInputError` if any declared input cannot be resolved. + - **`WorkflowSpec.validate_claim_inputs()`** — Executor claim-time gate. Rejects a task claim early if declared inputs are not yet resolvable from completed upstream phases. + - **`WorkflowSpec.validate_task_outputs()`** — Executor completion-time gate. Validates the agent's return dict against declared `outputs`. Raises `MissingOutputError` for absent required keys; raises `OutputTypeMismatchError` for type mismatches. Supports primitive types, named struct types (top-level key presence), and enum types (`{enum: [...]}`). + - **`MissingOutputError`** — Raised when one or more required output keys are absent. Carries `task_id`, `phase_name`, `missing_keys`. + - **`OutputTypeMismatchError`** — Raised when an output value does not match its declared type. Carries `task_id`, `phase_name`, `key`, `expected_type`, `actual_type`. + - **`UnresolvableInputError`** — Raised when one or more declared inputs cannot be resolved from available upstream outputs. Carries `task_id`, `phase_name`, `unresolvable_refs`. + - **`InputWiringError`** — Raised at parse time when an input mapping expression is structurally invalid. Subclass of `WorkflowValidationError`. Carries `phase_name`, `invalid_refs`, `suggestion`. + - **`types` block in YAML** — Top-level `types:` map defines named struct and enum types reusable across output schemas. Persisted into `intent.state._io_types` so agent-side validation works without a `WorkflowSpec` reference at runtime. + - **Incremental adoption** — Phases without `outputs` or `inputs` are fully unaffected. Parse-time and runtime validation only applies to phases that declare contracts. + - **Package exports** — `MissingOutputError`, `OutputTypeMismatchError`, `UnresolvableInputError`, `InputWiringError` all exported from `openintent` top-level. + - **RFC-0024 protocol document** — `docs/rfcs/0024-workflow-io-contracts.md` covering output schema declaration, input wiring, executor semantics, named error types, `TaskContext` API, parse-time validation, incremental adoption, and a complete example. + +- **RFC-0025: Human-in-the-Loop Intent Suspension** — First-class protocol primitive for suspending an intent mid-execution and awaiting operator input before proceeding. + + - **`suspended_awaiting_input` lifecycle state** — New `IntentStatus` value. Reaper and lease-expiry workers skip intents in this state; lease renewal succeeds for suspended intents so agents retain ownership across the suspension period. + - **Four new `EventType` constants** — `intent.suspended`, `intent.resumed`, `intent.suspension_expired`, `engagement.decision`. All events are stored in the intent event log and emitted via the SSE bus. + - **`ResponseType` enum** — `choice`, `confirm`, `text`, `form` — specifies what kind of response is expected from the operator. Agents set this when calling `request_input()`. + - **`SuspensionChoice` dataclass** — A single operator-facing option with `value` (machine-readable), `label` (human-readable), optional `description`, `style` hint (`"primary"` / `"danger"` / `"default"`), and arbitrary `metadata`. Channels render these as buttons, dropdowns, or radio options. + - **`SuspensionRecord` dataclass** — Captures the full context of a suspension: question, `response_type`, list of `SuspensionChoice` objects, structured context, channel hint, timeout, fallback policy, confidence at suspension time, and response metadata. Stored in `intent.state._suspension`. Includes `valid_values()` helper that returns the allowed values for `choice`/`confirm` types. + - **`EngagementSignals` dataclass** — Carries `confidence`, `risk`, and `reversibility` scores (all float [0, 1]) used by the engagement-decision engine. + - **`EngagementDecision` dataclass** — Output of `should_request_input()`. Has `mode` (one of `autonomous`, `request_input`, `require_input`, `defer`), `should_ask` bool, `rationale` string, and embedded `EngagementSignals`. + - **`InputResponse` dataclass** — Represents an operator's response: `suspension_id`, `value`, `responded_by`, `responded_at`, and optional `metadata`. + - **`InputTimeoutError` exception** — Raised when `fallback_policy="fail"` and the suspension expires. Carries `suspension_id` and `fallback_policy` attributes. + - **`InputCancelledError` exception** — Raised when a suspension is explicitly cancelled. Carries `suspension_id`. + - **`BaseAgent.request_input()`** — Suspends the intent, fires `@on_input_requested` hooks, polls `intent.state._suspension.resolution` every 2 seconds, and returns the operator's response value. Accepts `response_type` and `choices` parameters; for `confirm` type auto-populates yes/no choices if none are supplied. Also supports `timeout_seconds`, `fallback_policy`, `fallback_value`, `channel_hint`, and `confidence`. + - **`BaseAgent.should_request_input()`** — Implements the default rule-based engagement-decision logic (high-confidence/low-risk → autonomous; moderate uncertainty → request_input; low confidence or high risk → require_input; extreme risk/irreversibility → defer). Emits `engagement.decision` event and fires `@on_engagement_decision` hooks. + - **Four new lifecycle decorators** — `@on_input_requested`, `@on_input_received`, `@on_suspension_expired`, `@on_engagement_decision`. Auto-discovered by `_discover_handlers()` and routed via `_on_generic_event()`. + - **`POST /api/v1/intents/{id}/suspend/respond`** — REST endpoint for operators (or bots) to submit a response. Validates `suspension_id` matches the active suspension, validates response value against defined choices (for `choice`/`confirm` types — returns 422 with `valid_choices` on mismatch), patches `state._suspension` with the response, transitions the intent to `active`, emits `intent.resumed`, and broadcasts via SSE. Response includes `choice_label` and `choice_description` for matched choices. + - **RFC-0025 protocol document** — `docs/rfcs/0025-human-in-the-loop.md` covering lifecycle state, event types, SuspensionRecord schema, fallback policies, engagement decision modes, REST endpoint, and security considerations. + - **HITL user guide** — `docs/guide/human-in-the-loop.md` with quick-start, `request_input()` reference, fallback policies, engagement decisions, decorator reference, operator response example, exception reference, and a full refund-agent example. + - **62 new tests** — `tests/test_hitl.py` covers all new models, exceptions, decorators, engagement-decision modes, and the suspend/respond endpoint. + +### Updated + +- `mkdocs.yml` — RFC-0024 and RFC-0025 entries added to the RFCs nav; Human-in-the-Loop guide added to the User Guide nav; announcement bar updated to v0.16.0. +- All version references updated to 0.16.0 across Python SDK and changelog. + +--- + ## [0.15.1] - 2026-03-06 ### Changed @@ -26,13 +102,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- **RFC-0010 Retry Policy MCP Tools** — 4 new MCP tools for retry policy management and failure tracking: - - `set_retry_policy` — Set or update retry policy on an intent (admin tier). - - `get_retry_policy` — Retrieve the current retry policy for an intent (reader tier). - - `record_failure` — Record a failure event against an intent for retry tracking (operator tier). - - `get_failures` — List recorded failures for an intent (reader tier). -- **`build_retry_failure_tools()`** — New helper in the Python MCP bridge (`openintent.mcp`) that builds the 4 retry/failure tool definitions for use with `MCPToolProvider` and `MCPToolExporter`. -- **MCP Tool Surface Expansion** — MCP tool surface expanded from 66 to 70 tools; RBAC counts: reader=25, operator=43, admin=70. +- **RFC-0010 Retry Policy MCP Tools** — 4 new MCP tools: `set_retry_policy` (admin), `get_retry_policy` (read), `record_failure` (write), `get_failures` (read). MCP tool surface expanded from 66 to 70 tools; RBAC counts: reader=25, operator=43, admin=70. +- **`build_retry_failure_tools()`** — New helper in the Python MCP bridge (`openintent.mcp`) that constructs retry policy and failure-tracking tool definitions for MCP integration, enabling agents to manage retry policies and record/query failures through MCP. ### Changed @@ -243,29 +314,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- **RFC-0021: Agent-to-Agent Messaging** — Structured channels for direct agent-to-agent communication within intent scope. - - `Channel`, `ChannelMessage`, `MessageType`, `ChannelStatus`, `MemberPolicy`, `MessageStatus` data models. - - 11 server endpoints (10 REST + 1 SSE) under `/api/v1/intents/{id}/channels/`. - - `@on_message` lifecycle decorator for reactive message handling. - - `_ChannelsProxy` / `_ChannelHandle` agent abstractions with `ask()`, `notify()`, `broadcast()`. -- **YAML `channels:` block** — Declarative channel definitions in workflow specifications. +- **Declarative Messaging (RFC-0021)** + - YAML `channels:` block for declarative agent-to-agent messaging configuration. + - `@on_message` decorator for zero-boilerplate, reactive message handling with auto-reply. + - Channel proxy (`self.channels`) with `ask()`, `notify()`, and `broadcast()` convenience methods. + - Three messaging patterns: request/response, fire-and-forget, and broadcast. --- -## [0.10.1] - 2026-02-12 +## [0.10.1] - 2026-02-13 ### Added -- **Tool Execution Adapters** — Pluggable adapter system for real external API execution through `POST /api/v1/tools/invoke`. Three built-in adapters: `RestToolAdapter` (API key, Bearer, Basic Auth), `OAuth2ToolAdapter` (automatic token refresh on 401), `WebhookToolAdapter` (HMAC-SHA256 signed dispatch). -- **Adapter Registry** — Resolves adapters from credential metadata via explicit `adapter` key, `auth_type` mapping, or placeholder fallback. -- **Security Controls** — URL validation (blocks private IPs, metadata endpoints, non-HTTP schemes), timeout bounds (1–120s), response size limits (1 MB), secret sanitization, request fingerprinting, redirect blocking. -- **Custom Adapter Registration** — `register_adapter(name, adapter)` for non-standard protocols. -- **OAuth2 Integration Guide** — Documentation for integrating OAuth2 services: platform handles authorization code flow, stores tokens in vault, SDK manages refresh and execution. Templates for Salesforce, Google APIs, Microsoft Graph, HubSpot. +- **Tool Execution Adapters** — Pluggable adapter system for real external API execution through the Tool Proxy (`POST /api/v1/tools/invoke`). Three built-in adapters: + - `RestToolAdapter` — API key (header/query), Bearer token, and Basic Auth for standard REST APIs. + - `OAuth2ToolAdapter` — OAuth2 with automatic token refresh on 401 responses using `refresh_token` + `token_url`. + - `WebhookToolAdapter` — HMAC-SHA256 signed dispatch for webhook receivers. +- **Adapter Registry** — `AdapterRegistry` resolves the correct adapter from credential metadata: explicit `adapter` key, `auth_type` mapping, or placeholder fallback for backward compatibility. +- **Security Controls** — All outbound tool execution requests enforce: + - URL validation blocking private IPs (RFC-1918, loopback, link-local), cloud metadata endpoints (`169.254.169.254`), and non-HTTP schemes. + - Timeout bounds clamped to 1–120 seconds (default 30s). + - Response size limit of 1 MB. + - Secret sanitization replacing API keys, tokens, and passwords with `[REDACTED]` in all outputs. + - Request fingerprinting via SHA-256 hash stored per invocation for audit correlation. + - HTTP redirect blocking to prevent SSRF via redirect chains. +- **Custom Adapter Registration** — `register_adapter(name, adapter)` to add adapters for services with non-standard protocols (e.g., GraphQL). +- **OAuth2 Integration Guide** — Comprehensive documentation for integrating OAuth2 services: platform handles the authorization code flow, stores tokens in the vault, SDK manages refresh and execution. Includes ready-to-use metadata templates for Salesforce, Google APIs, Microsoft Graph, and HubSpot. ### Changed -- Credential `metadata` supports execution config (`base_url`, `endpoints`, `auth`) for real API calls. Backward compatible — credentials without execution config return placeholder responses. -- 57 new tests covering security utilities, all three adapters, and the registry. +- Credential `metadata` field now supports execution config (`base_url`, `endpoints`, `auth`) to enable real API calls. Credentials without execution config continue to return placeholder responses (backward compatible). +- 57 new tests covering security utilities, all three adapters, and the adapter registry. - Documentation updated across guide, RFC-0014, examples, API reference, and website. --- @@ -274,20 +353,44 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- **RFC-0018: Cryptographic Agent Identity** — Ed25519 key pairs, `did:key` decentralized identifiers, challenge-response registration, signed events with non-repudiation, key rotation, and portable identity across servers. -- **RFC-0019: Verifiable Event Logs** — SHA-256 hash chains linking every event to its predecessor, Merkle tree checkpoints with compact inclusion proofs, consistency verification between checkpoints, and optional external timestamp anchoring. -- **RFC-0020: Distributed Tracing** — `trace_id` and `parent_event_id` fields on IntentEvent, `TracingContext` dataclass for automatic propagation through agent-tool-agent call chains, W3C-aligned 128-bit trace identifiers. -- **`@Identity` decorator** — Declarative cryptographic identity with `auto_sign=True` and `auto_register=True`. -- **`TracingContext`** — New dataclass with `new_root()`, `child()`, `to_dict()`, `from_dict()` for trace propagation. -- **11 new client methods** — `register_identity()`, `complete_identity_challenge()`, `verify_signature()`, `rotate_key()`, `get_agent_keys()`, `revoke_key()`, `resolve_did()`, `verify_event_chain()`, `list_checkpoints()`, `get_merkle_proof()`, `verify_consistency()`. -- **13 new server endpoint stubs** — Identity key management, challenge-response, DID resolution, hash chain verification, checkpoint management, Merkle proofs, consistency verification. -- **Automatic tracing in `_emit_tool_event`** — Tool invocation events include `trace_id` and `parent_event_id` from the agent's active `TracingContext`. -- **Tracing injection in `_execute_tool`** — Tool handlers that accept a `tracing` keyword argument receive the current `TracingContext` automatically. +- **RFC-0018: Cryptographic Agent Identity** + - `AgentIdentity`, `IdentityChallenge`, `IdentityVerification` data models for key-based agent identity. + - Ed25519 key pairs with `did:key:z6Mk...` decentralized identifiers. + - Challenge-response identity registration via `register_identity()` and `complete_identity_challenge()`. + - Key rotation with `rotate_key()` preserving previous key history. + - Signature verification with `verify_signature()`. + - `@Identity` decorator for zero-boilerplate identity setup on agents. + - `@on_identity_registered` lifecycle hook. + - `IdentityConfig` for YAML workflow configuration. + - `AgentRecord` extended with `public_key`, `did`, `key_algorithm`, `key_registered_at`, `key_expires_at`, `previous_keys` fields (all optional for backward compatibility). + - 5 new server endpoints: `POST /api/v1/agents/{id}/identity`, `POST .../identity/challenge`, `GET .../identity`, `POST .../identity/verify`, `POST .../identity/rotate`. + +- **RFC-0019: Verifiable Event Logs** + - `LogCheckpoint`, `MerkleProof`, `MerkleProofEntry`, `ChainVerification`, `ConsistencyProof`, `TimestampAnchor` data models. + - SHA-256 hash chains linking each event to its predecessor. + - Merkle tree checkpoints with `MerkleProof.verify()` for client-side inclusion verification. + - `verify_event_chain()` to validate an intent's full hash chain integrity. + - `list_checkpoints()`, `get_checkpoint()`, `get_merkle_proof()`, `verify_consistency()` client methods. + - `VerificationConfig` for YAML workflow configuration. + - `IntentEvent` extended with optional `proof`, `event_hash`, `previous_event_hash`, `sequence` fields. + - 8 new server endpoints for checkpoints, Merkle proofs, chain verification, and optional external anchoring. + +- **RFC-0020: Distributed Tracing** + - `TracingContext` dataclass for propagating trace state through agent → tool → agent call chains. + - `IntentEvent` extended with optional `trace_id` (128-bit hex) and `parent_event_id` fields. + - `log_event()` on both sync and async clients accepts `trace_id` and `parent_event_id` parameters. + - `_emit_tool_event()` automatically includes tracing context in tool invocation events. + - `_execute_tool()` passes `tracing` keyword argument to local tool handlers that accept it. + - `TracingContext.new_root()` generates fresh 128-bit trace IDs (W3C-aligned format). + - `TracingContext.child()` creates child contexts with updated parent references. + - Cross-intent tracing via `trace_id` in event payloads. + +- **Sync & Async Client Parity** — All new RFC-0018/0019/0020 methods available on both `OpenIntentClient` and `AsyncOpenIntentClient`. ### Changed -- All documentation, READMEs, and examples updated from 17 to 20 RFCs. -- `log_event()` on both sync and async clients now accepts optional `trace_id` and `parent_event_id` parameters. +- Version bumped to 0.10.0. +- `__all__` exports updated with all new public symbols including `TracingContext`. - 690+ tests passing across all 20 RFCs (104 model tests + 26 server tests for RFC-0018/0019/0020). --- @@ -296,11 +399,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- **Streaming token usage capture** — All 7 LLM provider adapters (OpenAI, DeepSeek, Gemini, Anthropic, Azure OpenAI, OpenRouter, Grok) now capture actual `prompt_tokens`, `completion_tokens`, and `total_tokens` during streaming responses. -- **OpenAI-compatible adapters** — OpenAI, DeepSeek, Azure OpenAI, OpenRouter, and Grok adapters inject `stream_options={"include_usage": True}` to receive usage data in the final stream chunk. -- **Gemini adapter** — Captures `usage_metadata` from stream chunks and maps to standard token count fields. -- **Anthropic adapter** — Extracts usage from the stream's internal message snapshot automatically. -- **`tokens_streamed` field** — Reports actual completion token counts, falling back to character count only when unavailable. +- **Streaming token usage capture** — All 7 LLM provider adapters (OpenAI, DeepSeek, Gemini, Anthropic, Azure OpenAI, OpenRouter, Grok) now capture actual `prompt_tokens`, `completion_tokens`, and `total_tokens` during streaming responses. Previously, `tokens_streamed` used character count instead of real token counts. +- **OpenAI-compatible adapters** — OpenAI, DeepSeek, Azure OpenAI, OpenRouter, and Grok adapters now inject `stream_options={"include_usage": True}` to receive usage data in the final stream chunk. Token counts are extracted and passed through to `complete_stream()` and `log_llm_request_completed()`. +- **Gemini adapter** — Captures `usage_metadata` from stream chunks (`prompt_token_count`, `candidates_token_count`, `total_token_count`) and maps to standard fields. +- **Anthropic adapter** — Extracts usage from the stream's internal message snapshot in `__exit__`, removing the need for a manual `get_final_message()` call. +- **`tokens_streamed` field** — Now reports actual completion token counts from provider APIs, falling back to character count only when usage data is unavailable. --- @@ -309,27 +412,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - **Server-Side Tool Invocation** — `POST /api/v1/tools/invoke` endpoint enables agents to invoke tools through the server proxy without ever accessing raw credentials. The server resolves the appropriate grant, injects credentials from the vault, enforces rate limits, and records the invocation for audit. -- **3-Tier Grant Resolution** — Tool invocations are matched to grants using a three-tier resolution strategy: (1) `grant.scopes` contains the tool name, (2) `grant.context["tools"]` contains the tool name, (3) `credential.service` matches the tool name. +- **3-Tier Grant Resolution** — Tool invocations are matched to grants using a three-tier resolution strategy: (1) `grant.scopes` contains the tool name, (2) `grant.context["tools"]` contains the tool name, (3) `credential.service` matches the tool name. This resolves the common mismatch where tool names differ from credential service names. - **Client `invoke_tool()` Methods** — `OpenIntentClient.invoke_tool(tool_name, agent_id, parameters)` (sync) and `AsyncOpenIntentClient.invoke_tool(tool_name, agent_id, parameters)` (async) for programmatic server-side tool invocation. -- **Agent `self.tools.invoke()` via Server Proxy** — `_ToolsProxy` on agents delegates string tool names to `client.invoke_tool()`, completing the server-side invocation chain. -- **Invocation Audit Trail** — Every server-side tool invocation is recorded with agent ID, tool name, parameters, result, duration, and timestamp. +- **Agent `self.tools.invoke()` via Server Proxy** — `_ToolsProxy` on agents delegates string tool names to `client.invoke_tool()`, completing the chain: `self.tools.invoke("web_search", {...})` → server resolves grant → injects credentials → executes → returns result. +- **Invocation Audit Trail** — Every server-side tool invocation is recorded with agent ID, tool name, parameters, result, duration, and timestamp for compliance and debugging. -- **`@on_handoff` Decorator** — Lifecycle hook for delegated assignments. Handler receives intent and delegating agent's ID. -- **`@on_retry` Decorator** — Lifecycle hook for retry assignments (RFC-0010). Handler receives intent, attempt number, and last error. -- **`@input_guardrail` / `@output_guardrail` Decorators** — Validation pipeline: input guardrails reject before processing, output guardrails validate before commit. Raise `GuardrailError` to reject. -- **Built-in Coordinator Guardrails** — `guardrails=` on `@Coordinator` is now active: `"require_approval"`, `"budget_limit"`, `"agent_allowlist"`. +- **`@on_handoff` Decorator** — Lifecycle hook that fires when an agent receives work delegated from another agent. The handler receives the intent and the delegating agent's ID, allowing context-aware handoff processing distinct from fresh assignments. +- **`@on_retry` Decorator** — Lifecycle hook that fires when an intent is reassigned after a previous failure (RFC-0010). The handler receives the intent, attempt number, and last error, allowing agents to adapt retry strategy. +- **`@input_guardrail` / `@output_guardrail` Decorators** — Validation pipeline for agent processing. Input guardrails run before `@on_assignment` handlers and can reject intents. Output guardrails validate handler results before they are committed to state. Both support `GuardrailError` for rejection. +- **Built-in Coordinator Guardrails** — The `guardrails=` parameter on `@Coordinator` is now active. Supported policies: `"require_approval"` (logs decision records before assignment), `"budget_limit"` (rejects intents exceeding cost constraints), `"agent_allowlist"` (rejects delegation to agents outside the managed list). ### Fixed -- **`_ToolsProxy` duplicate class** — Removed duplicate `_ToolsProxy` definition that caused agent tool proxy to silently fail. -- **Dead proxy code** — Removed shadowed `_MemoryProxy` and `_TasksProxy` duplicate definitions. -- **Grant matching for mismatched tool/service names** — `find_agent_grant_for_tool()` now correctly resolves grants where tool name differs from credential service name. -- **Inert `guardrails=` parameter** — `guardrails=` on `@Coordinator` was accepted but unused. Now wires into guardrail pipeline. +- **`_ToolsProxy` duplicate class** — Removed duplicate `_ToolsProxy` definition that caused agent tool proxy to silently fail. Single definition at top of `agents.py`, constructor takes `agent` only. +- **Dead proxy code** — Removed shadowed `_MemoryProxy` and `_TasksProxy` definitions (originally at lines 47-79, shadowed by full implementations later in the file). +- **Grant matching for mismatched tool/service names** — `find_agent_grant_for_tool()` in Database class now correctly resolves grants where the tool name (e.g. `"web_search"`) differs from the credential service name (e.g. `"serpapi"`). +- **Inert `guardrails=` parameter** — The `guardrails=` parameter on `@Coordinator` was accepted but completely unused. Now wires into the input/output guardrail pipeline. ### Changed -- Tool execution priority enforced: protocol tools > local `ToolDef` handlers > remote RFC-0014 server grants. -- 556+ tests passing across all 17 RFCs. +- Tool execution priority clarified and enforced: protocol tools (remember, recall, clarify, escalate, update_status) > local `ToolDef` handlers > remote RFC-0014 server grants. +- 556+ tests passing across test_llm, test_agents, test_server suites. --- @@ -337,18 +440,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- **Tool → ToolDef rename** — `Tool` is now `ToolDef`, `@tool` is now `@define_tool` for clarity. The old names remain as backwards-compatible aliases. -- **Type annotations** — `llm.py` fully type-annotated, passes mypy strict mode. +- **Tool → ToolDef rename** — `Tool` is now `ToolDef`, `@tool` is now `@define_tool` for clarity. The old names remain as backwards-compatible aliases and will not be removed. +- **Type annotations** — `llm.py` is now fully type-annotated and passes mypy strict mode (previously suppressed via `# mypy: disable-error-code`). ### Added -- **LLM-Powered Agents** — `model=` on `@Agent`/`@Coordinator` for agentic tool loops with `self.think()`, `self.think_stream()`, `self.reset_conversation()`, and protocol-native tools. -- **Custom Tools with ToolDef** — `ToolDef(name, description, parameters, handler)` and `@define_tool` decorator. -- **Automatic Tool Tracing** — Local `ToolDef` invocations emit `tool_invocation` protocol events (best-effort, never blocks). +- **LLM-Powered Agents** — Add `model=` to `@Agent` or `@Coordinator` to enable agentic tool loops with `self.think(prompt)`, streaming via `self.think_stream(prompt)`, conversation reset via `self.reset_conversation()`, and protocol-native tools (remember, recall, clarify, escalate, update_status, delegate, record_decision, create_plan). +- **Custom Tools with ToolDef** — `ToolDef(name, description, parameters, handler)` for rich tool definitions with local execution, and `@define_tool(description=, parameters=)` decorator to turn functions into `ToolDef` objects. +- **Mixed tool sources** — `tools=` on `@Agent`/`@Coordinator` accepts both `ToolDef` objects (local handlers with rich schemas) and plain strings (RFC-0014 protocol grants). +- **Automatic Tool Tracing** — Every local `ToolDef` handler invocation is automatically traced as a `tool_invocation` protocol event when the agent is connected to an OpenIntent server. Each event records tool name, arguments, result, and execution duration. Tracing is best-effort and never blocks tool execution. +- **Backwards-compatible aliases** — `Tool` = `ToolDef`, `@tool` = `@define_tool`. ### Fixed -- Unified tool execution model documentation. +- Unified tool execution model documentation to clarify the three-tier priority: protocol tools > local handlers (`ToolDef`) > remote protocol grants (RFC-0014 strings). --- diff --git a/docs/guide/human-in-the-loop.md b/docs/guide/human-in-the-loop.md new file mode 100644 index 0000000..57e1815 --- /dev/null +++ b/docs/guide/human-in-the-loop.md @@ -0,0 +1,369 @@ +# Human-in-the-Loop (HITL) — RFC-0025 + +OpenIntent v0.16.0 introduces first-class support for suspending an intent mid-execution and waiting for operator input before proceeding. This guide covers everything from quick-start usage to advanced fallback policies and engagement-decision logic. + +--- + +## Why HITL? + +Autonomous agents are fast and consistent, but sometimes an action requires a human sanity-check before proceeding: + +- Refunding a large payment +- Sending a legally sensitive communication +- Deleting irreversible data +- Approving a budget overrun + +RFC-0025 provides a single, protocol-level primitive — **intent suspension** — that handles all of these cases, with built-in audit trails, fallback policies, and lifecycle hooks. + +--- + +## Quick Start + +```python +from openintent import Agent, on_assignment, on_input_requested + +@Agent("approvals-agent") +class ApprovalsAgent: + + @on_assignment + async def handle(self, intent): + # Ask the operator before proceeding + decision = await self.request_input( + intent.id, + question="Should we issue a refund for order #12345?", + context={ + "order_id": "12345", + "amount": 499.99, + "currency": "USD", + "customer": "alice@example.com", + }, + channel_hint="slack", + timeout_seconds=3600, # 1 hour + fallback_policy="complete_with_fallback", + fallback_value="deny", # deny if no response + confidence=0.55, + ) + + if decision == "approve": + await self.issue_refund(intent) + else: + await self.notify_customer_denied(intent) + + @on_input_requested + async def notify_operator(self, intent, suspension): + # Route the question to your notification channel + await send_slack_message( + channel="#approvals", + text=suspension.question, + context=suspension.context, + suspension_id=suspension.id, + intent_id=intent.id, + ) + +ApprovalsAgent.run() +``` + +--- + +## Lifecycle Overview + +``` +active + │ + │ agent calls request_input() + ▼ +suspended_awaiting_input + │ + │ operator POSTs to /intents/{id}/suspend/respond + ▼ +active (agent continues with the response value) +``` + +If the suspension times out, the **fallback policy** is applied (see [Fallback Policies](#fallback-policies)). + +--- + +## `request_input()` Reference + +```python +value = await self.request_input( + intent_id, # str — the intent to suspend + question, # str — prompt for the operator + context={}, # dict — structured context + channel_hint=None, # str — e.g. "slack", "email" + timeout_seconds=None,# int — None = no timeout + fallback_policy="fail", # str — see below + fallback_value=None, # any — used by complete_with_fallback + confidence=None, # float [0,1] — your confidence at suspension time +) +``` + +### What happens internally + +1. A `SuspensionRecord` is created and stored in `intent.state._suspension`. +2. The intent transitions to `suspended_awaiting_input`. +3. An `intent.suspended` event is emitted. +4. `@on_input_requested` hooks are fired so you can notify operators. +5. The agent polls `intent.state._suspension.resolution` every 2 seconds. +6. When an operator responds, the intent transitions back to `active` and the response value is returned. + +--- + +## Fallback Policies + +| Policy | What happens on timeout | +|---|---| +| `"fail"` (default) | `InputTimeoutError` is raised | +| `"complete_with_fallback"` | `fallback_value` is returned; agent continues | +| `"use_default_and_continue"` | Same as `complete_with_fallback` | + +```python +from openintent.exceptions import InputTimeoutError + +try: + answer = await self.request_input( + intent_id, + question="Approve?", + timeout_seconds=300, + fallback_policy="fail", + ) +except InputTimeoutError as e: + await self.log(intent_id, f"Suspension {e.suspension_id} expired") + await self.abandon(intent_id, reason="No operator response") +``` + +--- + +## Engagement Decisions + +Before calling `request_input()`, use `should_request_input()` to decide whether human input is actually needed: + +```python +from openintent.models import EngagementSignals + +signals = EngagementSignals( + confidence=0.55, # agent confidence in autonomous answer + risk=0.70, # risk of acting without input + reversibility=0.80,# how reversible the action is +) + +decision = await self.should_request_input(intent_id, signals=signals) + +print(decision.mode) # "require_input" +print(decision.should_ask)# True +print(decision.rationale) # Human-readable explanation + +if decision.should_ask: + value = await self.request_input(intent_id, question="Proceed?") +else: + value = await self.autonomous_action(intent_id) +``` + +### Decision Modes + +| Mode | When | `should_ask` | +|---|---|---| +| `autonomous` | High confidence, low risk, reversible | `False` | +| `request_input` | Moderate uncertainty | `True` | +| `require_input` | Low confidence or high risk | `True` | +| `defer` | Risk or irreversibility too high | `False` | + +### Keyword shorthand + +```python +decision = await self.should_request_input( + intent_id, + confidence=0.9, + risk=0.05, + reversibility=0.95, +) +``` + +--- + +## HITL Lifecycle Decorators + +### `@on_input_requested` + +Called after the suspension is persisted, before polling begins. Use this to notify operators via your preferred channel. + +```python +@on_input_requested +async def notify(self, intent, suspension): + # suspension is a SuspensionRecord + await slack.post( + channel=suspension.channel_hint or "#general", + text=f"*Human input required*\n{suspension.question}", + ) +``` + +### `@on_input_received` + +Called when an operator response arrives, before `request_input()` returns. Use this for logging or routing. + +```python +@on_input_received +async def log_response(self, intent, response): + # response is an InputResponse + await self.log(intent.id, f"Operator {response.responded_by}: {response.value}") +``` + +### `@on_suspension_expired` + +Called when a suspension times out, before the fallback policy is applied. + +```python +@on_suspension_expired +async def handle_timeout(self, intent, suspension): + await alert_on_call(f"Suspension {suspension.id} expired on intent {intent.id}") +``` + +### `@on_engagement_decision` + +Called after `should_request_input()` computes a decision. Use this to audit or override decisions. + +```python +@on_engagement_decision +async def audit(self, intent, decision): + await self.log(intent.id, f"Engagement: {decision.mode} ({decision.rationale})") +``` + +--- + +## Operator Responds via REST + +Operators (or your UI/bot) submit responses via: + +```http +POST /api/v1/intents/{intent_id}/suspend/respond +X-API-Key: +Content-Type: application/json + +{ + "suspension_id": "susp-uuid", + "value": "approve", + "responded_by": "alice@example.com" +} +``` + +**Response (200):** + +```json +{ + "intent_id": "intent-uuid", + "suspension_id": "susp-uuid", + "resolution": "responded", + "value": "approve", + "responded_by": "alice@example.com", + "responded_at": "2026-03-23T10:01:00" +} +``` + +The intent immediately transitions back to `active` and the polling agent unblocks. + +--- + +## Exception Reference + +| Exception | When raised | +|---|---| +| `InputTimeoutError` | `fallback_policy="fail"` and timeout expires | +| `InputCancelledError` | Suspension is cancelled (resolution="cancelled") | + +Both inherit from `OpenIntentError`. + +```python +from openintent.exceptions import InputCancelledError, InputTimeoutError +``` + +--- + +## Full Example: Refund Agent with Engagement Logic + +```python +from openintent import Agent, on_assignment, on_input_requested, on_suspension_expired +from openintent.exceptions import InputTimeoutError +from openintent.models import EngagementSignals + + +@Agent("refund-agent") +class RefundAgent: + + @on_assignment + async def handle(self, intent): + order_id = intent.ctx.data.get("order_id") + amount = intent.ctx.data.get("amount", 0) + + # Compute engagement signals + confidence = 0.9 if amount < 100 else 0.4 + risk = 0.8 if amount > 1000 else 0.3 + + signals = EngagementSignals(confidence=confidence, risk=risk, reversibility=0.5) + decision = await self.should_request_input(intent.id, signals=signals) + + if decision.should_ask: + try: + answer = await self.request_input( + intent.id, + question=f"Approve refund of ${amount} for order {order_id}?", + context={"order_id": order_id, "amount": amount}, + channel_hint="slack", + timeout_seconds=7200, + fallback_policy="complete_with_fallback", + fallback_value="deny", + confidence=confidence, + ) + except InputTimeoutError: + answer = "deny" + else: + answer = "approve" if confidence >= 0.85 else "deny" + + return {"order_id": order_id, "refund_decision": answer} + + @on_input_requested + async def notify_slack(self, intent, suspension): + await post_slack( + f"Refund approval needed: {suspension.question}", + context=suspension.context, + ) + + @on_suspension_expired + async def alert_on_timeout(self, intent, suspension): + await post_slack(f"Refund approval timed out for suspension {suspension.id}") +``` + +--- + +## Testing HITL Agents + +Use the `POST /suspend/respond` endpoint in your integration tests: + +```python +import httpx + +async def test_refund_agent(client, intent_id): + # Trigger the agent assignment + ... + + # Simulate operator response + resp = httpx.post( + f"http://localhost:8000/api/v1/intents/{intent_id}/suspend/respond", + headers={"X-API-Key": "test-key"}, + json={ + "suspension_id": suspension_id, + "value": "approve", + "responded_by": "test-operator", + }, + ) + assert resp.status_code == 200 + assert resp.json()["resolution"] == "responded" +``` + +--- + +## See Also + +- [RFC-0025: Human-in-the-Loop Intent Suspension](../rfcs/0025-human-in-the-loop.md) +- [RFC-0001: Intent Objects](../rfcs/0001-intent-objects.md) — lifecycle states +- [RFC-0013: Coordinator Governance](../rfcs/0013-coordinator-governance.md) — escalation +- [RFC-0019: Verifiable Event Logs](../rfcs/0019-verifiable-event-logs.md) — audit trail diff --git a/docs/overrides/home.html b/docs/overrides/home.html index ccf1fb0..f1a0dce 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -24,7 +24,7 @@
-
v0.15.1 — Gemini Adapter Rebuild for google-genai SDK
+
v0.17.0 — Suspension Propagation & Retry (RFC-0026)

Stop Duct-Taping Your Agents Together

OpenIntent is a durable, auditable protocol for multi-agent coordination. Structured intents replace fragile chat chains. Versioned state replaces guesswork. Ship agent systems that actually work in production. @@ -38,7 +38,7 @@

Stop Duct-Taping Your Agents Together

-
23
+
26
RFCs
@@ -46,11 +46,11 @@

Stop Duct-Taping Your Agents Together

LLM Adapters
-
800+
+
1,088+
Tests
-
v0.15.1
+
v0.17.0
Latest
@@ -97,7 +97,7 @@

Core Capabilities

Built-in Server
-

FastAPI server implementing all 23 RFCs. SQLite or PostgreSQL. One command to start.

+

FastAPI server implementing all 26 RFCs. SQLite or PostgreSQL. One command to start.

Learn more
@@ -300,7 +300,7 @@

Protocol Architecture

Complete RFC Coverage

-

23 RFCs implemented. From intents to cross-server federation, every primitive you need.

+

26 RFCs implemented. From intents to cross-server federation, every primitive you need.

@@ -330,6 +330,9 @@

Complete RFC Coverage

+ + +
0021Agent-to-Agent MessagingProposed
0022Federation ProtocolProposed
0023Federation SecurityProposed
0024Workflow I/O ContractsProposed
0025Human-in-the-Loop Intent SuspensionProposed
0026Suspension Container Interaction & Human RetryProposed
diff --git a/docs/rfcs/0002-intent-graphs.md b/docs/rfcs/0002-intent-graphs.md index 32ddb23..aae8818 100644 --- a/docs/rfcs/0002-intent-graphs.md +++ b/docs/rfcs/0002-intent-graphs.md @@ -71,15 +71,16 @@ Dependencies affect status transitions: - **Blocked by dependencies:** An intent with incomplete dependencies is automatically `blocked` - **Auto-unblock:** When all dependencies complete, intent transitions from `blocked` to `active` -- **Completion gate:** Cannot transition to `completed` until all dependencies are `completed` +- **Completion gate:** Cannot transition to `completed` until all dependencies are `completed`. `suspended_awaiting_input` (RFC-0026) does **not** satisfy this gate. - **Parent completion:** Parent intent cannot complete until all children complete - **Cascade abandonment:** Abandoning a parent MAY cascade to children (configurable) +- **Upstream suspension (RFC-0026):** When an active dependency transitions to `suspended_awaiting_input`, any dependent that is `active` transitions to `blocked`. Auto-unblock fires when the dependency *resumes and subsequently completes*, not on resume alone. ``` draft → active (if no unmet dependencies, else → blocked) blocked → active (when dependencies resolve) active → completed (if all dependencies + children completed) -active → blocked (if dependency becomes incomplete) +active → blocked (if dependency becomes incomplete OR upstream suspends) any → abandoned ``` @@ -94,7 +95,8 @@ Parent intents track aggregate status of their children: "by_status": { "completed": 3, "active": 2, - "blocked": 1 + "blocked": 1, + "suspended_awaiting_input": 0 }, "completion_percentage": 50, "blocking_intents": ["intent-uuid-1"], @@ -103,6 +105,8 @@ Parent intents track aggregate status of their children: } ``` +`suspended_awaiting_input` (RFC-0026) is included in `by_status` but does **not** satisfy the completion gate. A suspended child does not count toward aggregate completion. + ## Endpoints | Method | Path | Description | @@ -170,3 +174,12 @@ ready = client.get_ready_intents(parent.id) # Unblocked intents - **Progress visibility:** Aggregate status shows overall completion percentage - **Multi-agent orchestration:** Different agents can work on different branches in parallel - **Audit trail:** Parent-child relationships provide clear provenance for all work + +## Cross-RFC Interactions + +| RFC | Interaction | +|-----|------------| +| RFC-0001 (Intents) | Extends intent objects with parent_intent_id and depends_on | +| RFC-0006 (Subscriptions) | Parent intent events include aggregate status changes | +| RFC-0025 (HITL) | `suspended_awaiting_input` is a valid child status | +| RFC-0026 (Suspension Containers) | `suspended_awaiting_input` in aggregate `by_status`; `active → blocked` trigger for upstream suspension; completion gate clarification | diff --git a/docs/rfcs/0007-intent-portfolios.md b/docs/rfcs/0007-intent-portfolios.md index 0e534f6..52335d9 100644 --- a/docs/rfcs/0007-intent-portfolios.md +++ b/docs/rfcs/0007-intent-portfolios.md @@ -59,6 +59,34 @@ Portfolios serve as namespaces for organizing work: } ``` +## Suspension-Aware Aggregate Status (RFC-0026) + +When one or more portfolio members are `suspended_awaiting_input`, the portfolio GET response includes: + +```json +{ + "has_suspended_members": true, + "suspended_member_count": 1 +} +``` + +**Aggregate status algorithm:** + +| Condition | Aggregate status | +|---|---| +| All members `completed` | `completed` | +| Any member `failed` or `abandoned` | `failed` | +| Otherwise (including any suspended members) | `in_progress` | + +**New portfolio events (RFC-0026):** + +| Event | When emitted | +|---|---| +| `portfolio.member_suspended` | A member intent transitions to `suspended_awaiting_input` | +| `portfolio.member_resumed` | A suspended member intent resumes | + +**Portfolio deadline precedence:** If `governance.deadline` fires while a member intent is `suspended_awaiting_input`, the server MUST abandon that intent with `abandonment_reason: "portfolio_deadline_exceeded"`, bypassing `fallback_policy`. See RFC-0026 §2 Rule 5. + ## Cross-RFC Interactions | RFC | Interaction | @@ -69,6 +97,8 @@ Portfolios serve as namespaces for organizing work: | RFC-0009 (Costs) | Aggregate cost tracking across portfolio intents | | RFC-0012 (Planning) | Plans can scope to portfolio intents | | RFC-0013 (Coordinators) | Coordinator lease can scope to a portfolio | +| RFC-0025 (HITL) | Member intents may suspend awaiting operator input | +| RFC-0026 (Suspension Containers) | Aggregate algorithm, `has_suspended_members`, `suspended_member_count`, portfolio member events | ## Endpoints diff --git a/docs/rfcs/0010-retry-policies.md b/docs/rfcs/0010-retry-policies.md index cc3fc92..6be5580 100644 --- a/docs/rfcs/0010-retry-policies.md +++ b/docs/rfcs/0010-retry-policies.md @@ -124,3 +124,4 @@ curl -X POST http://localhost:8000/api/v1/intents/{id}/failures \ | RFC-0003 (Leasing) | Lease released on fallback; new lease acquired by fallback agent | | RFC-0009 (Costs) | Failed attempts still record costs | | RFC-0012 (Tasks) | Task-level retry policies override intent-level policies | +| RFC-0026 (Suspension Containers) | Parallel construct: RFC-0010 retries when the *agent* fails; RFC-0026 (`HumanRetryPolicy`) retries when the *human* fails to respond. The server SHOULD use the same scheduled-job infrastructure for both. | diff --git a/docs/rfcs/0012-task-decomposition-planning.md b/docs/rfcs/0012-task-decomposition-planning.md index 54f18b1..28059eb 100644 --- a/docs/rfcs/0012-task-decomposition-planning.md +++ b/docs/rfcs/0012-task-decomposition-planning.md @@ -588,6 +588,39 @@ New event types added to the intent event log: | RFC-0009 (Cost Tracking) | Task-level cost tracking. Plan aggregates costs across tasks. | | RFC-0010 (Retry Policies) | Task retry uses existing retry policy definitions. | | RFC-0011 (Access Control) | Tasks inherit permissions from intents. Task-level overrides supported. | +| RFC-0025 (HITL) | `request_input()` is the canonical mechanism for checkpoint human approval. | +| RFC-0026 (Suspension Containers) | Bidirectional task/intent suspension mirror; `blocked_reason: "intent_suspended"`; `suspended_tasks` in plan progress. | + +### 9. RFC-0026 Patch: Task/Intent Suspension Mirror + +When an intent transitions to `suspended_awaiting_input` (via `request_input()`), its corresponding plan task MUST transition to `blocked`: + +```json +{ + "state": "blocked", + "blocked_reason": "intent_suspended", + "suspended_intent_id": "" +} +``` + +On `intent.resumed`, the task transitions back to `running`. + +**Plan progress gains `suspended_tasks`:** + +```json +{ + "suspended_tasks": [ + { + "task_id": "task_01XYZ", + "intent_id": "intent_01ABC", + "suspended_since": "2026-03-24T10:00:00Z", + "expires_at": "2026-03-24T13:00:00Z" + } + ] +} +``` + +**Checkpoints as `request_input()` triggers:** RFC-0012 plan checkpoints that `require_approval: true` SHOULD be implemented by the assigned agent calling `request_input()`. This is the canonical pattern for human-in-the-loop gates within plans. The plan transitions to `paused` and the task to `blocked` (via the mirror rule above) until the operator responds. ## Open Questions @@ -597,7 +630,7 @@ New event types added to the intent event log: 3. **Cross-portfolio task dependencies**: Should tasks be allowed to depend on tasks from intents in different portfolios, or should portfolio boundaries be strict? -4. **Task output schema**: Should task definitions include expected output schemas for validation, or is this left to the agent? +4. ~~**Task output schema**: Should task definitions include expected output schemas for validation, or is this left to the agent?~~ **Resolved by RFC-0024.** Output schemas are declared in the workflow definition (in the phase's `outputs` field), and all validation responsibility belongs to the executor. An agent receives a pre-populated `ctx.input` wired by the executor from upstream phase outputs and returns a plain dict. The executor validates that dict against declared outputs before recording task completion. See [RFC-0024: Workflow I/O Contracts](./0024-workflow-io-contracts.md). ## References diff --git a/docs/rfcs/0024-workflow-io-contracts.md b/docs/rfcs/0024-workflow-io-contracts.md new file mode 100644 index 0000000..edfae64 --- /dev/null +++ b/docs/rfcs/0024-workflow-io-contracts.md @@ -0,0 +1,691 @@ +# RFC-0024: Workflow I/O Contracts + +**Status:** Proposed +**Created:** 2026-03-19 +**Authors:** OpenIntent Contributors +**Requires:** RFC-0012 (Task Decomposition & Planning), RFC-0001 (Intents), RFC-0004 (Portfolios) +**Addendum to:** RFC-0012 + +--- + +## Abstract + +This RFC establishes typed input/output contracts at the task and phase level and assigns responsibility for wiring those contracts to the **executor**, not the agent. An agent receives a pre-populated `ctx.input` dict and returns a plain dict. The executor resolves dependency graphs, maps declared outputs from completed upstream tasks into the consuming task's `ctx.input`, and validates outputs against declared schemas before marking a task complete. Agents are fully decoupled from workflow topology and from each other's internal naming conventions. + +This RFC directly answers **Open Question #4** from RFC-0012: _"Should task definitions include expected output schemas for validation, or is this left to the agent?"_ The answer is: output schemas are declared in the workflow definition, and validation is owned by the executor — not the agent. + +--- + +## Motivation + +RFC-0012 introduced Task as a first-class primitive and established that a task receives `input` and produces `output`. However, it left the responsibility of wiring those values across tasks ambiguous. In practice, agents working within a plan must reach into raw intent state using magic key names, relying on upstream agents to have written the right values in the right places. This creates three concrete problems: + +1. **Implicit coupling.** Agent B cannot be written or tested without knowing exactly what key Agent A wrote to `intent.state`. Any rename in Agent A breaks Agent B silently. + +2. **No executor-level guarantee.** Nothing in the protocol ensures that `ctx.input["quarter"]` is actually present when the task starts. Absence errors are only discovered at runtime, deep inside the agent's execution. + +3. **Validation gap.** An agent can return any dict and call itself done. Downstream tasks that depend on specific output keys discover the missing data only when they attempt to read it. + +### The Executor Must Own the Wiring + +The solution is a clean inversion: the executor — not the agent — is responsible for: + +- Reading declared `inputs`/`outputs` from the workflow definition +- Resolving the dependency graph to identify which upstream task produced each declared output +- Pre-populating `ctx.input` with exactly the keyed values the workflow declared, before the agent handler is invoked +- Validating the agent's return dict against declared `outputs` before accepting task completion +- Rejecting a task claim if the declared inputs cannot yet be resolved from upstream outputs + +The agent's contract becomes simple and self-contained: + +```python +async def handle(ctx: TaskContext) -> dict: + # ctx.input is guaranteed to be pre-populated by the executor + quarter = ctx.input["quarter"] + revenue = await fetch(quarter) + # return must satisfy the declared outputs schema + return {"revenue": revenue, "expenses": 0} +``` + +The agent does not know about workflow topology. It does not call `get_sibling_output`. It reads from `ctx.input`, does work, and returns a dict. + +--- + +## Design + +### 1. Output Schema Declaration + +Output schemas are declared inline in the phase definition, referencing types from the workflow's `types` block. A phase's `outputs` field is a mapping from output key names to type references. + +#### 1.1 Simple Output Declaration + +```yaml +types: + FinancialSummary: + revenue: number + expenses: number + net_profit: number + +workflow: + fetch_financials: + title: "Fetch Financials" + assign: data-agent + outputs: + revenue: number + expenses: number +``` + +Each key in `outputs` is a name that the executor will require in the agent's return dict. The value is a type name (from `types`) or a primitive type string (`string`, `number`, `boolean`, `object`, `array`). + +#### 1.2 Output Declaration with Type References + +```yaml +types: + Finding: + source: string + content: string + confidence: number + +workflow: + research: + title: "Research Phase" + assign: researcher + outputs: + sources: array + findings: Finding +``` + +When a type name from `types` is used, the executor validates that the returned value matches the declared shape. + +#### 1.3 Optional Outputs + +Individual output keys may be marked optional: + +```yaml +outputs: + summary: string + citations: array + warnings: + type: array + required: false +``` + +The `required` modifier (default: `true`) lets a phase declare outputs it may or may not produce, without causing a validation failure when absent. + +--- + +### 2. Input Wiring + +The `inputs` field on a phase declares which upstream outputs should be mapped into `ctx.input` before the agent runs. The executor resolves these mappings automatically. + +#### 2.1 Input Mapping Syntax + +```yaml +workflow: + analysis: + title: "Analyze Findings" + assign: analyst + depends_on: [research] + inputs: + research_findings: research.findings + sources_list: research.sources + outputs: + insights: string + recommendations: array +``` + +The format for input mapping values is `{phase_name}.{output_key}`. The executor reads the named key from the completed upstream phase's recorded output and places it at the declared `inputs` key name in `ctx.input`. + +In the example above, before the `analysis` agent handler is called, the executor ensures: + +```python +ctx.input == { + "research_findings": , + "sources_list": , +} +``` + +The agent reads `ctx.input["research_findings"]` directly and never touches raw intent state. + +#### 2.2 Multi-Phase Input Wiring + +A phase may draw inputs from multiple upstream phases: + +```yaml +workflow: + generate_report: + title: "Generate Report" + assign: reporter + depends_on: [analysis, compliance_check] + inputs: + insights: analysis.insights + recommendations: analysis.recommendations + compliance_status: compliance_check.status + violations: compliance_check.violations + outputs: + report_url: string + report_summary: string +``` + +All declared input mappings must be resolvable from completed upstream phases. If any referenced key is missing from the upstream phase's recorded output, the executor raises `UnresolvableInputError` at claim time. + +#### 2.3 Static Inputs + +A phase may also declare static inputs that come from the workflow trigger or initial state, not from upstream phases. Static values use the `$` prefix to distinguish from phase references: + +```yaml +workflow: + fetch_data: + title: "Fetch Data" + assign: data-agent + inputs: + quarter: $trigger.quarter + source: $initial_state.source + outputs: + data: object +``` + +Static input expressions are resolved at task creation time. The executor injects these alongside any dynamic (upstream phase) mappings. + +**Implementation note:** `$trigger.*` and `$initial_state.*` values are projected into the intent's `initial_state` at portfolio creation time. At runtime, both the server-side and agent-side input resolution logic resolve these references from the intent's stored state. Trigger payloads that are not projected into `initial_state` will cause `UnresolvableInputError` at claim time. + +--- + +### 3. Executor Wiring Semantics + +This section is normative. Implementations conforming to this RFC must exhibit the following behavior. + +#### 3.1 Claim-Time Validation + +When an agent attempts to claim a task, the executor checks that all declared `inputs` can be resolved before granting the claim. A task is only claimable when: + +1. All tasks in `depends_on` are in `completed` state. +2. All keys declared in `inputs` that reference upstream phase outputs exist in the recorded output of the referenced upstream phase. +3. All static input expressions (`$trigger.*`, `$initial_state.*`) resolve to non-null values. + +If any check fails, the executor rejects the claim with `UnresolvableInputError` and the task remains in `ready` state. + +#### 3.2 Pre-Handoff Population + +Before dispatching a task to an agent handler, the executor: + +1. Resolves each entry in the phase's `inputs` mapping. +2. Constructs a `ctx.input` dict containing the fully resolved key-value pairs. +3. Passes this dict to the agent as the `input` field of `TaskContext`. + +The agent handler is never called with a partially populated or empty `ctx.input` when inputs are declared. The executor guarantees presence before handoff. + +#### 3.3 Completion-Time Validation + +When an agent calls the task completion endpoint (or returns from its handler), the executor: + +1. Receives the agent's output dict. +2. Checks that every key declared as `required: true` (the default) in the phase's `outputs` is present in the output dict. +3. If type information is available, validates that each key's value matches the declared type. +4. If all checks pass, records the output against the completed task and transitions the task to `completed`. +5. If any check fails, rejects the completion with `MissingOutputError` or `OutputTypeMismatchError`, and the task remains in `running` state. The agent may retry the completion with a corrected output. + +#### 3.4 Downstream Unblocking + +After a task is marked `completed` and its outputs are recorded, the executor: + +1. Identifies all downstream tasks whose `depends_on` includes the completed task. +2. For each downstream task: checks whether all of its other dependencies are also complete. +3. If all dependencies are complete, transitions the downstream task from `pending` to `ready`. +4. For each `ready` downstream task: pre-evaluates whether its `inputs` can now be fully resolved (claim-time validation). If any input remains unresolvable, the task stays in `ready` but will fail the claim check if an agent attempts to claim it. + +--- + +### 4. Named Error Types + +All executor I/O errors are named types that appear in task event logs and API error responses. + +#### 4.1 `MissingOutputError` + +**When raised:** Completion-time validation finds that a required output key is absent from the agent's returned dict. + +**Fields:** + +| Field | Type | Description | +|-------|------|-------------| +| `task_id` | string | The task whose completion was rejected | +| `phase_name` | string | The phase definition name | +| `missing_keys` | list[string] | Output keys that were declared but not returned | + +**Example event payload:** + +```json +{ + "error": "MissingOutputError", + "task_id": "task_01HXYZ", + "phase_name": "fetch_financials", + "missing_keys": ["expenses"], + "message": "Task completion rejected: declared output key 'expenses' was not present in agent return value" +} +``` + +#### 4.2 `OutputTypeMismatchError` + +**When raised:** Completion-time validation finds that a returned output key's value does not match the declared type. + +**Fields:** + +| Field | Type | Description | +|-------|------|-------------| +| `task_id` | string | The task whose completion was rejected | +| `phase_name` | string | The phase definition name | +| `key` | string | The output key with the type mismatch | +| `expected_type` | string | Declared type | +| `actual_type` | string | Runtime type of the returned value | + +**Note:** Type validation is structural, not coercive. The executor validates and rejects; it never casts values. + +#### 4.3 `UnresolvableInputError` + +**When raised:** Claim-time validation finds that one or more declared inputs cannot be resolved from completed upstream outputs. + +**Fields:** + +| Field | Type | Description | +|-------|------|-------------| +| `task_id` | string | The task whose claim was rejected | +| `phase_name` | string | The phase definition name | +| `unresolvable_refs` | list[string] | Input mapping expressions that could not be resolved | + +**Example event payload:** + +```json +{ + "error": "UnresolvableInputError", + "task_id": "task_01HABC", + "phase_name": "analysis", + "unresolvable_refs": ["research.findings"], + "message": "Task claim rejected: upstream phase 'research' did not record output key 'findings'" +} +``` + +#### 4.4 `InputWiringError` + +**When raised:** A structural problem with the `inputs` declaration is detected at workflow validation time (not at runtime). Examples: referencing a phase not in `depends_on`, referencing a non-existent phase, or using malformed mapping syntax. + +**Fields:** + +| Field | Type | Description | +|-------|------|-------------| +| `phase_name` | string | The phase with the invalid inputs declaration | +| `invalid_refs` | list[string] | The malformed or invalid mapping expressions | +| `suggestion` | string | Human-readable fix hint | + +--- + +### 5. `TaskContext` API + +The `TaskContext` object passed to agent handlers is updated to reflect executor-managed input. + +#### 5.1 `ctx.input` + +`ctx.input` is a `dict` that is pre-populated by the executor before the agent handler is called. Its contents are determined by the phase's `inputs` declaration — not by raw intent state. + +**Before RFC-0024:** `ctx.input` reflected whatever was stored in `intent.state` under certain keys, requiring agents to know upstream state naming conventions. + +**After RFC-0024:** `ctx.input` contains exactly and only the keys declared in the phase's `inputs` mapping, populated from resolved upstream outputs. An agent that declares no `inputs` receives an empty dict (or the initial static inputs, if any are declared). + +```python +@task(name="analysis") +async def run_analysis(ctx: TaskContext) -> dict: + # ctx.input is guaranteed to contain exactly what the workflow declared + # No reaching into intent.state, no magic key names + findings = ctx.input["research_findings"] + sources = ctx.input["sources_list"] + + result = analyze(findings, sources) + + return { + "insights": result.insights, + "recommendations": result.recommendations, + } +``` + +#### 5.2 `ctx.get_sibling_output()` — Escape Hatch Only + +`TaskContext.get_sibling_output(task_name: str) -> dict` remains available as a low-level escape hatch for exceptional circumstances, but is **not** the primary interface for passing data between tasks. It bypasses the executor's wiring guarantees and should be treated like `eval()` in Python: available when you need it, not the expected approach for normal use. + +Use `ctx.get_sibling_output()` only when: + +- You are integrating with a legacy workflow definition that has no `inputs`/`outputs` declarations. +- You need to inspect all sibling outputs for diagnostic purposes. +- You are in a dynamic context where the upstream key name cannot be known at workflow-definition time. + +In all other cases, declare `inputs` in the workflow YAML and read from `ctx.input`. + +--- + +### 6. Python SDK — Error Types + +The SDK raises the named error types from Section 4 as Python exceptions. + +```python +from openintent.workflow import ( + MissingOutputError, + OutputTypeMismatchError, + UnresolvableInputError, + InputWiringError, +) +``` + +#### 6.1 `MissingOutputError` + +```python +class MissingOutputError(WorkflowError): + """ + Raised when a task completion is rejected because one or more + declared output keys are absent from the agent's returned dict. + + Attributes: + task_id: The ID of the task whose completion was rejected. + phase_name: The name of the phase definition. + missing_keys: The declared output keys that were not returned. + """ + task_id: str + phase_name: str + missing_keys: list[str] +``` + +#### 6.2 `OutputTypeMismatchError` + +```python +class OutputTypeMismatchError(WorkflowError): + """ + Raised when a returned output key's value does not match the + declared type. No coercion is attempted. + + Attributes: + task_id: The ID of the task whose completion was rejected. + phase_name: The name of the phase definition. + key: The output key with the type mismatch. + expected_type: The type declared in the workflow definition. + actual_type: The Python type of the value returned by the agent. + """ + task_id: str + phase_name: str + key: str + expected_type: str + actual_type: str +``` + +#### 6.3 `UnresolvableInputError` + +```python +class UnresolvableInputError(WorkflowError): + """ + Raised at claim time when one or more declared inputs cannot be + resolved from completed upstream task outputs. + + Attributes: + task_id: The ID of the task whose claim was rejected. + phase_name: The name of the phase definition. + unresolvable_refs: The input mapping expressions that could not + be resolved (e.g. ["research.findings"]). + """ + task_id: str + phase_name: str + unresolvable_refs: list[str] +``` + +#### 6.4 `InputWiringError` + +```python +class InputWiringError(WorkflowValidationError): + """ + Raised at workflow validation time when an inputs declaration is + structurally invalid — for example, referencing a phase that is not + in depends_on, referencing a non-existent phase, or using malformed + mapping syntax. + + Attributes: + phase_name: The phase with the invalid inputs declaration. + invalid_refs: The malformed or invalid mapping expressions. + """ + phase_name: str + invalid_refs: list[str] +``` + +--- + +### 7. `to_portfolio_spec()` Wiring + +When a `WorkflowSpec` is converted to a `PortfolioSpec` via `to_portfolio_spec()`, the `inputs` and `outputs` declarations from each phase must be preserved and threaded through so that the executor can perform wiring at runtime. + +The `IntentSpec` dataclass gains two new fields: + +```python +@dataclass +class IntentSpec: + title: str + description: str = "" + assign: Optional[str] = None + depends_on: Optional[list[str]] = None + constraints: dict[str, Any] = field(default_factory=dict) + initial_state: dict[str, Any] = field(default_factory=dict) + # RFC-0024: I/O contracts + inputs: dict[str, str] = field(default_factory=dict) + outputs: dict[str, Any] = field(default_factory=dict) +``` + +`to_portfolio_spec()` maps each `PhaseConfig`'s `inputs` and `outputs` directly onto the corresponding `IntentSpec`. The executor accesses these fields when constructing `ctx.input` and when running completion-time validation. + +--- + +### 8. Validation at Workflow Parse Time + +In addition to runtime validation, the parser performs structural checks on `inputs`/`outputs` at workflow load time. These checks raise `InputWiringError` immediately, before any task runs. + +**Checks performed:** + +1. **Phase reference exists:** Every `phase_name` in an input mapping expression (`phase_name.key`) must name a phase that exists in the `workflow` section. + +2. **Reference is a declared dependency:** Every phase referenced in an input mapping must appear in the consuming phase's `depends_on` list. A phase cannot wire inputs from a phase it does not depend on. + +3. **Output key is declared:** If the upstream phase declares an `outputs` block, then the referenced key must appear in that block. (If the upstream phase has no `outputs` block, this check is skipped to allow incremental adoption.) + +4. **Mapping syntax is valid:** Each input mapping value must match the pattern `{phase_name}.{key}` or `$trigger.{key}` or `$initial_state.{key}`. Other formats raise `InputWiringError`. + +--- + +### 9. Incremental Adoption + +Not all phases need to declare `inputs`/`outputs` immediately. The contract is opt-in per phase: + +- A phase with **no `inputs` declaration**: `ctx.input` is empty (or contains only static workflow-level inputs). The agent is responsible for obtaining any data it needs from `ctx.get_sibling_output()` or other mechanisms. + +- A phase with **no `outputs` declaration**: The executor skips completion-time output validation. The agent may return any dict (or nothing). + +- A phase with **partial declarations**: Only the declared keys are validated. Additional keys returned by the agent beyond those declared are accepted and recorded. + +This allows gradual adoption: start by declaring outputs on the most critical phases, then progressively add `inputs` declarations to their consumers. + +--- + +### 10. Complete Example + +```yaml +openintent: "1.0" + +info: + name: "Quarterly Compliance Report" + version: "1.0.0" + +types: + FinancialData: + revenue: number + expenses: number + quarter: string + + HRData: + headcount: number + attrition_rate: number + + AnalysisResult: + findings: array + risk_level: string + violations_found: boolean + +workflow: + fetch_financials: + title: "Fetch Financial Data" + assign: data-agent + inputs: + quarter: $trigger.quarter + source: $initial_state.source + outputs: + revenue: number + expenses: number + + fetch_hr_data: + title: "Fetch HR Data" + assign: data-agent + inputs: + quarter: $trigger.quarter + outputs: + headcount: number + attrition_rate: number + + run_analysis: + title: "Run Compliance Analysis" + assign: analytics-agent + depends_on: [fetch_financials, fetch_hr_data] + inputs: + fin_revenue: fetch_financials.revenue + fin_expenses: fetch_financials.expenses + hr_headcount: fetch_hr_data.headcount + hr_attrition: fetch_hr_data.attrition_rate + outputs: + findings: array + risk_level: string + violations_found: boolean + + generate_report: + title: "Generate Report" + assign: reporting-agent + depends_on: [run_analysis] + inputs: + analysis_findings: run_analysis.findings + risk_level: run_analysis.risk_level + has_violations: run_analysis.violations_found + outputs: + report_url: string + report_summary: string +``` + +The agent for `run_analysis` looks like: + +```python +@task(name="run_analysis") +async def run_compliance_analysis(ctx: TaskContext) -> dict: + # All inputs are guaranteed present by the executor + revenue = ctx.input["fin_revenue"] + expenses = ctx.input["fin_expenses"] + headcount = ctx.input["hr_headcount"] + attrition = ctx.input["hr_attrition"] + + result = await compliance_engine.analyze( + revenue=revenue, + expenses=expenses, + headcount=headcount, + attrition=attrition, + ) + + # Return must include all declared outputs + return { + "findings": result.findings, + "risk_level": result.risk_level, + "violations_found": result.violations_found, + } +``` + +--- + +## Relationship to RFC-0012 + +RFC-0012 introduced Task as a protocol primitive and established that tasks have `input` and `output` fields. It left **Open Question #4** — _whether task definitions should include expected output schemas for validation, or leave that to the agent_ — explicitly unresolved. + +**RFC-0024 resolves Open Question #4** with the following answer: + +> Output schemas are declared in the workflow definition (in the phase's `outputs` field), and all validation responsibility belongs to the executor — not the agent. An agent receives a pre-populated `ctx.input` (wired by the executor from upstream phase outputs) and returns a plain dict. The executor validates that dict against declared outputs before recording task completion. Agents are decoupled from each other and from workflow topology. + +This RFC does not modify: +- The Task state machine (RFC-0012 §1.2) +- The Plan object or plan states (RFC-0012 §2) +- The `get_sibling_output()` method signature (remains as an escape hatch) +- Any other RFC-0012 design choices + +--- + +## Out of Scope + +- **Channel I/O (RFC-0021):** Channel semantics are unchanged. This RFC only governs task-level `ctx.input`/`outputs`. +- **Intent-level state:** Only task-level input/output is in scope. `intent.state` continues to exist and function as defined in RFC-0001. +- **Runtime type coercion:** The executor validates types and rejects mismatches. It never casts or coerces values. An `int` returned for a declared `number` field passes; a `str` returned for a declared `number` field raises `OutputTypeMismatchError`. +- **Cross-portfolio task I/O wiring:** Input mappings may only reference phases within the same workflow. Cross-portfolio data passing is a separate concern. + +--- + +## Open Questions + +1. **Schema versioning:** When a workflow version is bumped and an output key is renamed, how are in-flight tasks (running under the old version) handled? Task outputs should be validated against the schema version active at task creation time. + +2. **Array item typing:** The current proposal allows `outputs: findings: array` but does not specify array element types. A future extension could allow `array` syntax for element-level validation. + +3. **Nested object validation depth:** For `object` types, should validation be shallow (key presence only) or deep (recursive against the `types` block)? This RFC leaves it implementation-defined; a future RFC may standardize. + +--- + +## RFC-0026 Patch: Upstream Suspension Rejection + +When an agent attempts to claim a task whose declared inputs reference an upstream phase that is currently `suspended_awaiting_input`, `validate_claim_inputs()` MUST reject with `UpstreamIntentSuspendedError`: + +```python +from openintent.workflow import UpstreamIntentSuspendedError + +try: + spec.validate_claim_inputs(phase_name, upstream_outputs, task_id=task_id) +except UpstreamIntentSuspendedError as e: + # e.suspended_intent_id — the upstream intent that is suspended + # e.expected_resume_at — ISO-8601 estimate or None + logger.info(f"Claim deferred: upstream intent {e.suspended_intent_id} is suspended") +``` + +**Workflow progress gains `suspended_phases`:** + +```json +{ + "suspended_phases": [ + { + "phase_name": "compliance_review", + "intent_id": "intent_01ABC", + "suspended_since": "2026-03-24T10:00:00Z", + "expires_at": "2026-03-24T13:00:00Z" + } + ] +} +``` + +## Cross-RFC Interactions + +| RFC | Interaction | +|-----|------------| +| RFC-0012 (Planning) | Addendum to RFC-0012; resolves Open Question #4 | +| RFC-0001 (Intents) | Intent state holds _io_inputs/_io_outputs for executor wiring | +| RFC-0004 (Portfolios) | Portfolios scope workflows | +| RFC-0025 (HITL) | Agents calling request_input() affect claim-time validation | +| RFC-0026 (Suspension Containers) | `upstream_intent_suspended` rejection reason; `suspended_phases` in workflow progress | + +## References + +- [RFC-0012: Task Decomposition & Planning](./0012-task-decomposition-planning.md) — parent RFC; defines Task, Plan, TaskContext +- [RFC-0001: Intent Objects](./0001-intent-objects.md) — intent state model +- [RFC-0004: Intent Portfolios](./0004-governance-arbitration.md) — portfolio boundaries +- [RFC-0021: Agent-to-Agent Messaging](./0021-agent-to-agent-messaging.md) — channel messaging (out of scope for this RFC) +- [RFC-0026: Suspension Propagation & Retry](./0026-suspension-container-interaction.md) — upstream suspension rejection +- [Temporal Activity Input/Output](https://docs.temporal.io/activities) — reference design for typed activity I/O +- [Prefect Task Parameters](https://docs.prefect.io/concepts/tasks/) — reference for task input contracts diff --git a/docs/rfcs/0025-human-in-the-loop.md b/docs/rfcs/0025-human-in-the-loop.md new file mode 100644 index 0000000..731cc3f --- /dev/null +++ b/docs/rfcs/0025-human-in-the-loop.md @@ -0,0 +1,316 @@ +# RFC-0025: Human-in-the-Loop Intent Suspension + +**Status:** Accepted +**Version:** v0.16.0 +**Date:** 2026-03-23 +**Authors:** OpenIntent Working Group + +--- + +## Abstract + +This RFC defines the protocol for suspending an intent mid-execution to obtain operator input before proceeding. It introduces the `suspended_awaiting_input` lifecycle state, four new event types, a REST endpoint for operator responses, engagement-decision logic for when to invoke the human loop, and fallback policies for handling timeouts. + +--- + +## 1. Motivation + +Autonomous agents operating in high-stakes environments (finance, healthcare, legal, operations) encounter situations where acting without a human sanity-check is unacceptable. RFC-0025 provides a first-class protocol primitive — intent suspension — that: + +- Integrates cleanly with the existing intent lifecycle (RFC-0001). +- Preserves audit trails via the event log (RFC-0019). +- Supports structured engagement-decision logic to minimise unnecessary interruptions. +- Defines deterministic fallback behaviour when operators are unresponsive. + +--- + +## 2. New Lifecycle State + +``` +draft → active ⇄ suspended_awaiting_input → active → completed + ↘ abandoned (via fallback) +``` + +| Transition | Trigger | +|---|---| +| `active → suspended_awaiting_input` | Agent calls `request_input()` | +| `suspended_awaiting_input → active` | Operator responds via `POST /intents/{id}/suspend/respond` | +| `suspended_awaiting_input → abandoned` | `fallback_policy: "fail"` and timeout expires | + +**Reaper / lease-expiry workers MUST skip intents in `suspended_awaiting_input`** status — these intents are intentionally blocked pending human input. + +**Lease renewal MUST succeed** for intents in `suspended_awaiting_input` so the holding agent retains ownership across the suspension period. + +--- + +## 3. New Event Types + +| Event | When emitted | +|---|---| +| `intent.suspended` | When `request_input()` transitions the intent | +| `intent.resumed` | When an operator response is accepted | +| `intent.suspension_expired` | When a suspension timeout fires before a response | +| `engagement.decision` | When `should_request_input()` returns a decision | + +All events are stored in the intent event log and are visible via `GET /intents/{id}/events`. + +--- + +## 4. Response Types and Choices + +Every suspension declares the kind of input it expects from the operator via `response_type` and an optional list of `choices`. This gives operators clear, actionable options and lets the server validate responses before they reach the agent. + +### 4.1 ResponseType + +| Value | Description | Choices required | Server-validated | +|---|---|---|---| +| `choice` | Operator selects one of the defined choices | Yes | Yes — value must match a choice | +| `confirm` | Binary yes/no confirmation | Optional (defaults to yes/no) | Yes — value must be `"yes"` or `"no"` | +| `text` | Free-form text input | No | No | +| `form` | Structured key/value fields (keys defined in context) | No | No | + +### 4.2 SuspensionChoice + +Each choice presented to the operator is a `SuspensionChoice`: + +| Field | Type | Required | Description | +|---|---|---|---| +| `value` | string | ✓ | Machine-readable value returned to the agent when selected | +| `label` | string | ✓ | Human-readable label displayed to the operator | +| `description` | string | — | Longer explanation to help the operator decide | +| `style` | string | — | Visual hint for the channel UI: `"primary"`, `"danger"`, `"default"` | +| `metadata` | object | — | Arbitrary extra data attached to this choice | + +When `response_type` is `choice`, the agent MUST supply at least one `SuspensionChoice`. When `response_type` is `confirm` and no explicit choices are supplied, the server assumes `[{value: "yes", label: "Yes"}, {value: "no", label: "No"}]`. + +--- + +## 5. SuspensionRecord + +A `SuspensionRecord` is created by the agent and persisted in `intent.state._suspension`. + +| Field | Type | Required | Description | +|---|---|---|---| +| `id` | string (UUID) | ✓ | Unique suspension identifier | +| `question` | string | ✓ | Human-readable question/prompt | +| `response_type` | enum | ✓ | Expected response type (see §4.1) — default `"choice"` | +| `choices` | SuspensionChoice[] | — | Available options for the operator (see §4.2) | +| `context` | object | — | Structured context for the operator | +| `channel_hint` | string | — | Preferred delivery channel (`"slack"`, `"email"`) | +| `suspended_at` | ISO-8601 | — | When the suspension started | +| `timeout_seconds` | integer | — | Per-attempt expiry window (omit for no timeout). When `retry_policy` is set, this is the per-attempt window, not the total. Total window = `interval_seconds × max_attempts`. | +| `expires_at` | ISO-8601 | — | Total deadline: `suspended_at + (interval_seconds × max_attempts)` when `retry_policy` is set, otherwise `suspended_at + timeout_seconds`. | +| `fallback_value` | any | — | Value for `complete_with_fallback` policy | +| `fallback_policy` | enum | ✓ | See §6. Alias for `retry_policy.final_fallback_policy` when `retry_policy` is set. | +| `retry_policy` | HumanRetryPolicy | — | Re-notification and escalation policy (RFC-0026). When absent, single-attempt behaviour (original RFC-0025 semantics). | +| `confidence_at_suspension` | float [0,1] | — | Agent confidence at suspension time | +| `decision_record` | object | — | EngagementDecision that triggered suspension | +| `response` | any | — | Operator's response (set on resume) | +| `responded_at` | ISO-8601 | — | When the operator responded | +| `resolution` | enum | — | `"responded"`, `"expired"`, `"cancelled"` | + +--- + +## 6. Fallback Policies + +| Policy | On timeout | +|---|---| +| `fail` | Raise `InputTimeoutError`; intent remains suspended or transitions to abandoned | +| `complete_with_fallback` | Return `fallback_value` and continue execution | +| `use_default_and_continue` | Return `fallback_value` and continue execution (alias for compatibility) | + +--- + +## 7. EngagementSignals and EngagementDecision + +Before calling `request_input()`, agents SHOULD call `should_request_input()` to obtain an engagement decision. + +### 7.1 EngagementSignals + +| Field | Type | Default | Description | +|---|---|---|---| +| `confidence` | float [0,1] | 1.0 | Agent confidence in autonomous answer | +| `risk` | float [0,1] | 0.0 | Estimated risk of acting autonomously | +| `reversibility` | float [0,1] | 1.0 | How reversible the action is | +| `context` | object | `{}` | Additional key/value context | + +### 7.2 Decision Modes + +| Mode | Condition | `should_ask` | +|---|---|---| +| `autonomous` | confidence ≥ 0.85, risk ≤ 0.20, reversibility ≥ 0.50 | `false` | +| `request_input` | moderate uncertainty | `true` | +| `require_input` | confidence < 0.50 or risk > 0.50 | `true` | +| `defer` | risk ≥ 0.80 or reversibility ≤ 0.10 | `false` | + +--- + +## 8. REST Endpoint: `POST /intents/{id}/suspend/respond` + +**Authentication:** X-API-Key header required. + +### Request body + +```json +{ + "suspension_id": "susp-uuid", + "value": "", + "responded_by": "alice@example.com", + "metadata": {} +} +``` + +### Success response (200) + +```json +{ + "intent_id": "intent-uuid", + "suspension_id": "susp-uuid", + "resolution": "responded", + "value": "approve", + "choice_label": "Approve refund", + "choice_description": "Issue full refund to original payment method", + "responded_by": "alice@example.com", + "responded_at": "2026-03-23T10:01:00" +} +``` + +When the selected value matches a `SuspensionChoice`, the response includes `choice_label` and `choice_description` for downstream audit/display. + +### Validation behaviour + +The server validates the `value` field against the suspension's `response_type` and `choices`: + +| `response_type` | Validation | +|---|---| +| `choice` | `value` MUST match one of the defined `SuspensionChoice.value` entries | +| `confirm` | `value` MUST be `"yes"` or `"no"` (checked even if no explicit choices are defined) | +| `text` | No validation — any non-empty string is accepted | +| `form` | No validation — value is passed through as-is | + +### Error responses + +| Status | Condition | +|---|---| +| 401 | Missing or invalid API key | +| 404 | Intent not found | +| 409 | Intent is not in `suspended_awaiting_input` status, or `suspension_id` does not match the active suspension | +| 422 | `suspension_id` is missing/empty, or `value` is invalid for the declared `response_type` | + +On a 422 for invalid choice, the response body includes `valid_choices` listing the accepted values. + +--- + +## 9. Agent SDK + +### 9.1 `request_input()` + +```python +from openintent import SuspensionChoice + +value = await self.request_input( + intent_id, + question="Should we refund order #12345?", + response_type="choice", + choices=[ + SuspensionChoice(value="approve", label="Approve refund", + description="Issue full refund to original payment method", + style="primary"), + SuspensionChoice(value="deny", label="Deny refund", + description="Reject and close the case", + style="danger"), + SuspensionChoice(value="escalate", label="Escalate", + description="Route to a senior operator"), + ], + context={"order_id": "12345", "amount": 499.99}, + channel_hint="slack", + timeout_seconds=3600, + fallback_policy="complete_with_fallback", + fallback_value="deny", + confidence=0.55, +) +``` + +Returns the operator's response value. Raises `InputTimeoutError` (fallback_policy="fail") or `InputCancelledError`. + +For `confirm` type, choices default to yes/no if omitted: + +```python +value = await self.request_input( + intent_id, + question="Deploy to production?", + response_type="confirm", + timeout_seconds=600, + fallback_policy="fail", +) +# value will be "yes" or "no" +``` + +### 9.2 `should_request_input()` + +```python +decision = await self.should_request_input( + intent_id, + confidence=0.55, + risk=0.60, + reversibility=0.80, +) +if decision.should_ask: + value = await self.request_input(intent_id, question="Proceed?", + response_type="confirm") +``` + +### 9.3 Lifecycle Decorators + +```python +@on_input_requested # fired after suspension is written +@on_input_received # fired when operator response arrives +@on_suspension_expired # fired when timeout expires +@on_engagement_decision # fired after should_request_input() returns +``` + +--- + +## 10. InputResponse + +| Field | Type | Description | +|---|---|---| +| `suspension_id` | string | ID of the SuspensionRecord | +| `value` | any | Operator's answer | +| `choice_label` | string | Label of the selected choice (if applicable) | +| `choice_description` | string | Description of the selected choice (if applicable) | +| `responded_by` | string | Operator identifier | +| `responded_at` | ISO-8601 | When the operator responded | +| `metadata` | object | Optional channel metadata | + +--- + +## 11. Security Considerations + +- The `POST /suspend/respond` endpoint MUST be authenticated. Implementors SHOULD apply role-based access control to restrict which API keys can respond. +- `suspension_id` SHOULD be treated as a secret capability token when transmitted via external channels (Slack, email). +- Suspension payloads MUST NOT include secrets or PII in the `context` field unless the delivery channel is encrypted end-to-end. + +--- + +## 12. Backwards Compatibility + +- Adds a new `suspended_awaiting_input` status string — existing clients that enumerate statuses must be updated to handle this value. +- The `response_type` field defaults to `"choice"` — suspensions created without it behave identically to pre-0.16.0 behaviour. +- All new event types, endpoint, decorators, and structured choice fields are additive. +- Servers that do not implement this suspension protocol will return 404 for `POST /suspend/respond`; agents SHOULD handle this gracefully. +- RFC-0026: `retry_policy` field on `SuspensionRecord` is optional and additive. Existing `fallback_policy` field is unchanged; when `retry_policy` is absent, single-attempt behaviour is preserved. + +## Cross-RFC Interactions + +| RFC | Interaction | +|-----|------------| +| RFC-0001 (Intents) | Adds `suspended_awaiting_input` to the intent lifecycle | +| RFC-0002 (Intent Graphs) | Suspended status in aggregate counter; completion gate clarified | +| RFC-0006 (Subscriptions) | All suspension events propagate via existing subscription infrastructure | +| RFC-0007 (Portfolios) | Portfolio aggregate gains suspension-aware fields (RFC-0026) | +| RFC-0010 (Retry Policies) | Parallel construct: RFC-0010 retries agent failures; RFC-0026 retries human non-response | +| RFC-0012 (Planning) | Task blocked state mirrors intent suspension bidirectionally (RFC-0026) | +| RFC-0019 (Verifiable Logs) | Suspension events are stored in the append-only event log | +| RFC-0024 (Workflow I/O) | validate_claim_inputs() gains upstream_intent_suspended rejection (RFC-0026) | +| RFC-0026 (Suspension Containers) | Defines HumanRetryPolicy, three-level cascade, container rules, UpstreamIntentSuspendedError | diff --git a/docs/rfcs/0026-suspension-container-interaction.md b/docs/rfcs/0026-suspension-container-interaction.md new file mode 100644 index 0000000..b1e0e83 --- /dev/null +++ b/docs/rfcs/0026-suspension-container-interaction.md @@ -0,0 +1,426 @@ +# RFC-0026: Suspension Propagation & Retry v1.0 + +**Status:** Accepted +**Version:** v0.17.0 +**Date:** 2026-03-24 +**Authors:** OpenIntent Working Group +**Extends:** [RFC-0025 (Human-in-the-Loop)](./0025-human-in-the-loop.md) + +--- + +## Abstract + +RFC-0025 introduced `suspended_awaiting_input` as an intent-level lifecycle state but left three gaps: (1) how suspension interacts with container structures (intent graphs, portfolios, plans, workflows); (2) a single-shot timeout model with no re-notification or escalation ladder; and (3) no platform- or agent-level default for suspension policy. RFC-0026 closes all three gaps in a single coherent extension so the protocol has complete, end-to-end coverage of human engagement. + +--- + +## 1. Motivation + +### Gap 1 — Container semantics + +RFC-0025 defines suspension at the intent level but does not specify how containers observe it: + +- **RFC-0002 (Intent Graphs):** `aggregate_status.by_status` has no entry for `suspended_awaiting_input`. The completion gate does not explicitly say whether a suspended dependency satisfies it. No `active → blocked` trigger is defined for upstream suspension. +- **RFC-0007 (Portfolios):** The aggregate status algorithm does not enumerate suspension. The GET response has no suspension-aware fields. +- **RFC-0012 (Plans & Tasks):** The task `blocked` state was designed before RFC-0025 and has no defined relationship to `suspended_awaiting_input`. When a phase-agent calls `request_input()`, the plan task does not transition to `blocked`. The coordinator sees the task as still running. +- **RFC-0024 (Workflows):** `validate_claim_inputs()` has no rejection reason for upstream suspension. The workflow progress object has no `suspended_phases` field. + +### Gap 2 — Human retry / re-notification + +RFC-0025 timeout model is single-shot: one window, then fallback policy fires. A missed Slack notification should not immediately trigger `complete_with_fallback="deny"`. Systems need grace — notify once, re-notify, escalate, then fail. + +### Gap 3 — Platform and agent-level defaults + +Every `request_input()` call must specify its own policy from scratch. There is no platform-level constant and no agent-level default. + +--- + +## 2. Container Rules (Five, Non-Negotiable) + +### Rule 1 — Suspension is always intent-local + +Only the suspended intent changes to `suspended_awaiting_input`. Container structures (parent intents, portfolios, plans, workflows) observe it; they never absorb it into their own state. A portfolio does not become suspended because a member is suspended. + +### Rule 2 — Suspended intent is "not completed"; dependents stay blocked + +`suspended_awaiting_input` does **NOT** satisfy the RFC-0002 completion gate. A dependent intent that is `active` and whose upstream suspends MUST transition to `blocked` (new `active → blocked` trigger). Auto-unblock fires when the dependency *resumes and subsequently completes*, not on resume alone. + +### Rule 3 — RFC-0012 tasks mirror intent suspension bidirectionally + +When an intent transitions to `suspended_awaiting_input`, its corresponding plan task MUST transition to `blocked` with: + +```json +{ + "blocked_reason": "intent_suspended", + "suspended_intent_id": "" +} +``` + +On `intent.resumed`, the task transitions back to `running`. RFC-0012 checkpoints that require human approval SHOULD be implemented via RFC-0025 `request_input()` — this is the canonical pattern going forward. + +### Rule 4 — Container aggregates gain suspension-aware fields + +**RFC-0002 parent intents:** + +```json +{ + "aggregate_status": { + "total": 6, + "by_status": { + "completed": 3, + "active": 2, + "blocked": 0, + "suspended_awaiting_input": 1 + } + } +} +``` + +**RFC-0007 portfolios:** + +Portfolio GET response adds two fields: + +```json +{ + "has_suspended_members": true, + "suspended_member_count": 1 +} +``` + +Aggregate status algorithm (revised): + +| Condition | Aggregate status | +|---|---| +| All members `completed` | `completed` | +| Any member `failed` or `abandoned` | `failed` | +| Otherwise (including any suspended) | `in_progress` | + +**RFC-0012 plans:** + +Plan progress object gains: + +```json +{ + "suspended_tasks": [ + { + "task_id": "task_01XYZ", + "intent_id": "intent_01ABC", + "suspended_since": "2026-03-24T10:00:00Z", + "expires_at": "2026-03-24T13:00:00Z" + } + ] +} +``` + +**RFC-0024 workflows:** + +Workflow progress object gains: + +```json +{ + "suspended_phases": [ + { + "phase_name": "compliance_review", + "intent_id": "intent_01ABC", + "suspended_since": "2026-03-24T10:00:00Z", + "expires_at": "2026-03-24T13:00:00Z" + } + ] +} +``` + +### Rule 5 — Portfolio deadline takes precedence over suspension timeout + +If `governance.deadline` fires while a member intent is `suspended_awaiting_input`, the server MUST abandon the intent with `abandonment_reason: "portfolio_deadline_exceeded"`, bypassing `fallback_policy`. `intent.suspension_expired` is still emitted with `reason: "portfolio_deadline"` for audit. + +--- + +## 3. Coordinator Suspension Policy (RFC-0013 Extension) + +Coordinator leases gain an optional `suspension_policy` field: + +| Value | Behaviour | +|---|---| +| `isolate` | Default. No action beyond aggregate status update. | +| `block_dependents` | Coordinator explicitly pauses RFC-0024-wired downstream phases. | +| `escalate` | Emits `coordinator.escalation_required` or self-suspends. | + +--- + +## 4. Human Retry / Re-notification Policy + +### 4.1 The `HumanRetryPolicy` Object + +```json +{ + "max_attempts": 3, + "interval_seconds": 3600, + "strategy": "fixed", + "escalation_ladder": [ + { "attempt": 2, "channel_hint": "email", "notify_to": null }, + { "attempt": 3, "channel_hint": "pagerduty", "notify_to": "supervisor@example.com" } + ], + "final_fallback_policy": "fail" +} +``` + +| Field | Type | Default | Description | +|---|---|---|---| +| `max_attempts` | integer | 1 | Total notification attempts (including initial) | +| `interval_seconds` | integer | — | Seconds between re-notification attempts (≤ `timeout_seconds`) | +| `strategy` | `"fixed"` | `"fixed"` | Re-notification cadence strategy | +| `escalation_ladder` | array | `[]` | Per-attempt channel/recipient overrides | +| `final_fallback_policy` | enum | (inherited) | Policy to apply after all attempts exhausted | + +### 4.2 How It Works + +1. **Attempt 1** fires immediately when `request_input()` is called. `timeout_seconds` becomes the *per-attempt* window. +2. If the operator does not respond within `interval_seconds` (≤ `timeout_seconds`), a re-notification fires and the attempt counter increments. +3. Each `escalation_ladder` entry triggers at its `attempt` number, overriding `channel_hint` and optionally routing to a different `notify_to` identity. +4. After `max_attempts` notifications with no response, `final_fallback_policy` is applied. +5. **Total suspension window** = `interval_seconds × max_attempts`. `expires_at` on `SuspensionRecord` reflects this total deadline. +6. `suspension_id` is **unchanged** across all attempts — the operator can respond to the original request at any point. + +### 4.3 Backwards Compatibility + +The existing `fallback_policy` field on `SuspensionRecord` is kept as an alias: + +- `fallback_policy` with no `retry_policy` is equivalent to `HumanRetryPolicy(max_attempts=1)`. +- When a `retry_policy` is present, `final_fallback_policy` inside it takes precedence over the top-level `fallback_policy`. + +### 4.4 New Events + +| Event | When emitted | +|---|---| +| `intent.suspension_renotified` | Before each re-notification attempt (attempt ≥ 2) | +| `intent.suspension_escalated` | When an `escalation_ladder` entry triggers | + +**`intent.suspension_renotified` payload:** + +```json +{ + "suspension_id": "susp-uuid", + "attempt": 2, + "max_attempts": 3, + "channel_hint": "email", + "notify_to": null, + "next_attempt_at": "2026-03-24T11:00:00Z" +} +``` + +**`intent.suspension_escalated` payload:** + +```json +{ + "suspension_id": "susp-uuid", + "attempt": 3, + "escalated_to": "supervisor@example.com", + "channel_hint": "pagerduty" +} +``` + +Existing `intent.suspension_expired` fires after all attempts exhausted, then `final_fallback_policy` executes. + +### 4.5 `@on_input_requested` Re-fired on Each Attempt + +The existing `@on_input_requested` decorator is called again with `attempt` number in the suspension context on each re-notification. Agents can customize messages: + +```python +@on_input_requested +async def notify_operator(self, intent, suspension): + attempt = suspension.context.get("_attempt", 1) + if attempt == 1: + msg = f"Input needed: {suspension.question}" + elif attempt < suspension.context.get("_max_attempts", 1): + msg = f"Reminder ({attempt}): {suspension.question}" + else: + msg = f"URGENT — final reminder: {suspension.question}" + await send_notification(msg, channel=suspension.channel_hint) +``` + +--- + +## 5. Three-Level Configuration Cascade + +``` +server config → BaseAgent default → request_input() call +───────────────────── ──────────────────────── ────────────────────── +default_human_retry_ default_human_retry_ retry_policy= + policy: { policy: { HumanRetryPolicy( + max_attempts: 3, max_attempts: 2, max_attempts: 1, + interval_seconds: 3600 interval_seconds: 1800 interval_seconds: 300 +} } ) +``` + +Resolution: per-suspension overrides agent default overrides platform default. Any field not specified at a lower level inherits from the level above. + +**Platform constant location:** Server config file (`openintent.yaml`) under `suspension.default_retry_policy`. Exposed via `GET /v1/server/config` (read-only, for client introspection). + +**Agent-level default:** `BaseAgent.default_human_retry_policy` — a `HumanRetryPolicy` instance set in the agent definition or `__init__`. If `None`, platform default applies. + +--- + +## 6. RFC-0024 Patch: `validate_claim_inputs()` Rejection Reason + +When an agent attempts to claim a task whose declared inputs reference an upstream phase that is currently `suspended_awaiting_input`, `validate_claim_inputs()` MUST reject with: + +```python +raise UpstreamIntentSuspendedError( + task_id=task_id, + phase_name=phase_name, + suspended_intent_id="", + expected_resume_at="", +) +``` + +This is a new exception type (`upstream_intent_suspended`) that the executor surfaces as a claim rejection reason. The downstream task stays in `pending` / `ready` state and retries the claim check after the upstream resumes. + +--- + +## 7. Relationship to RFC-0010 (Retry Policies) + +RFC-0010 defines retry when the *agent* fails (picks a new agent attempt). RFC-0026 defines retry when the *human* fails to respond (resends notification, escalates channel). They are parallel constructs at different layers: + +| Dimension | RFC-0010 | RFC-0026 | +|---|---|---| +| What failed? | Agent execution | Human responsiveness | +| What retries? | Agent assignment | Human notification | +| State during retry | Intent may be reassigned | Intent stays `suspended_awaiting_input` | +| Infrastructure | Scheduled retry job | Scheduled re-notification job | + +The server SHOULD use the same scheduled-job infrastructure for both. + +--- + +## 8. Python SDK — `HumanRetryPolicy` + +```python +from openintent import HumanRetryPolicy + +policy = HumanRetryPolicy( + max_attempts=3, + interval_seconds=3600, + escalation_ladder=[ + {"attempt": 2, "channel_hint": "email"}, + {"attempt": 3, "channel_hint": "pagerduty", "notify_to": "supervisor@example.com"}, + ], + final_fallback_policy="fail", +) + +value = await self.request_input( + intent_id, + question="Should we proceed with the refund?", + response_type="choice", + choices=[...], + timeout_seconds=3600, + retry_policy=policy, +) +``` + +`BaseAgent` gains `default_human_retry_policy`: + +```python +@Agent("my-agent") +class MyAgent: + default_human_retry_policy = HumanRetryPolicy( + max_attempts=2, + interval_seconds=1800, + final_fallback_policy="complete_with_fallback", + ) +``` + +--- + +## 9. End-to-End Motivating Example + +**Scenario:** Multi-phase compliance workflow. Phase 2 (`compliance_review`) requires human sign-off before Phase 3 (`generate_report`) can run. + +``` +Phase 1: fetch_data → completes OK +Phase 2: compliance_review → agent calls request_input() +Phase 3: generate_report → depends_on: compliance_review +``` + +**Timeline:** + +| Time | Event | +|---|---| +| T+0 | Phase 2 agent calls `request_input()` with `retry_policy(max_attempts=3, interval_seconds=3600)` | +| T+0 | Intent 2 → `suspended_awaiting_input` | +| T+0 | Task 2 → `blocked` (`blocked_reason: "intent_suspended"`) | +| T+0 | Intent 3 → `blocked` (upstream suspended, does not satisfy completion gate) | +| T+0 | `intent.suspended` emitted; `@on_input_requested` fires (attempt=1) → Slack message sent | +| T+0 | Portfolio: `has_suspended_members: true`, `suspended_member_count: 1` | +| T+3600 | No response. `intent.suspension_renotified` emitted (attempt=2) | +| T+3600 | `@on_input_requested` fires again (attempt=2, channel_hint="email") → email sent | +| T+3600 | `intent.suspension_escalated` emitted (attempt=2) | +| T+5400 | Operator responds via `POST /intents/{id}/suspend/respond` | +| T+5400 | Intent 2 → `active` → `completed` | +| T+5400 | Task 2 → `running` → `completed` | +| T+5400 | Intent 3 → `active` (dependency now completed) | +| T+5400 | Task 3 claims Phase 3 inputs from Phase 2 outputs — validate_claim_inputs() succeeds | +| T+5500 | Phase 3 completes. Workflow done. | + +**What did NOT happen:** Phase 3 did not try to claim while Phase 2 was suspended. The coordinator saw the suspension in the aggregate. The portfolio deadline was not exceeded. + +--- + +## 10. Cross-RFC Patch Summary + +### RFC-0002 patches + +- Status enum: add `suspended_awaiting_input` to `by_status` in `aggregate_status`. +- Completion gate: explicitly states `suspended_awaiting_input` does NOT satisfy the gate. +- New `active → blocked` trigger: upstream dependency transitions to `suspended_awaiting_input`. +- Cross-RFC table: add RFC-0026. + +### RFC-0007 patches + +- Aggregate status algorithm: enumerated explicitly (completed/failed/in_progress). +- GET response: add `has_suspended_members: bool`, `suspended_member_count: int`. +- New events: `portfolio.member_suspended`, `portfolio.member_resumed`. +- Cross-RFC table: add RFC-0026. + +### RFC-0012 patches + +- Bidirectional task/intent relationship: task `blocked` ↔ intent `suspended_awaiting_input`. +- `blocked_reason: "intent_suspended"` and `suspended_intent_id` on blocked task. +- Plan progress: add `suspended_tasks` array. +- Checkpoints: explicitly documented as RFC-0025 `request_input()` triggers (canonical pattern). +- Cross-RFC table: add RFC-0026. + +### RFC-0024 patches + +- `validate_claim_inputs()`: add `upstream_intent_suspended` rejection reason (`UpstreamIntentSuspendedError`). +- Workflow progress: add `suspended_phases` array. +- Cross-RFC table: add RFC-0026. + +### RFC-0025 patches + +- `SuspensionRecord`: add `retry_policy` field (optional `HumanRetryPolicy`). +- `timeout_seconds` semantics: clarified as per-attempt window when `retry_policy` is set. +- `fallback_policy`: documented as alias for `HumanRetryPolicy(max_attempts=1, final_fallback_policy=...)`. +- Cross-RFC table: add RFC-0026, RFC-0010. +- Backwards compatibility: note `fallback_policy` unchanged; `retry_policy` is additive. + +### RFC-0010 patches + +- Cross-RFC table: add RFC-0026 with note on parallel retry constructs. + +--- + +## 11. Security Considerations + +- Re-notification payloads to external channels (Slack, PagerDuty) MUST NOT include secrets or PII in `question` or `context` fields. +- `escalation_ladder.notify_to` identity values should be validated against an allowlist before delivery. +- Multiple re-notification attempts increase the attack surface for replay; `suspension_id` SHOULD remain the same (see §4.2 item 6) and the server MUST reject duplicate responses after the first. + +--- + +## 12. Backwards Compatibility + +- `retry_policy` on `SuspensionRecord` is optional. Existing `fallback_policy` field continues to work unchanged with single-attempt semantics. +- `UpstreamIntentSuspendedError` is a new exception class; callers that only catch `UnresolvableInputError` will see uncaught exceptions if they don't update. Callers should catch `WorkflowError` for robust handling. +- New events (`intent.suspension_renotified`, `intent.suspension_escalated`) are additive; existing subscriptions propagate them through the same infrastructure. +- `has_suspended_members` / `suspended_member_count` are additive fields on portfolio GET responses; existing clients that ignore unknown fields are unaffected. +- `suspended_tasks` / `suspended_phases` are additive fields on progress objects. diff --git a/docs/spec/workflow-yaml.md b/docs/spec/workflow-yaml.md index e99ca5e..ecb6312 100644 --- a/docs/spec/workflow-yaml.md +++ b/docs/spec/workflow-yaml.md @@ -106,10 +106,146 @@ Each phase in the `workflow` section supports these fields: | `depends_on` | list[string] | No | Phase names that must complete first | | `constraints` | list[string] | No | Rules/parameters for the agent | | `initial_state` | object | No | Initial state values | -| `inputs` | object | No | Input mappings from dependencies | -| `outputs` | list[string] | No | State keys to expose to dependents | +| `inputs` | object | No | Input mappings wired by the executor before handoff (RFC-0024) | +| `outputs` | object | No | Declared output keys and types; executor validates on completion (RFC-0024) | | `skip_when` | string | No | Condition expression to skip phase | +### Input/Output Contracts (RFC-0024) + +The `inputs` and `outputs` fields establish typed I/O contracts between phases. The **executor** — not the agent — owns the wiring: it pre-populates `ctx.input` from resolved upstream outputs before calling the agent handler, and it validates the agent's return dict against declared `outputs` before marking the task complete. + +#### `inputs` Field + +`inputs` is a mapping from local key names to upstream phase output references. The executor resolves each reference and injects the value into `ctx.input` before the agent handler is called. + +**Syntax for input mapping values:** + +| Pattern | Description | +|---------|-------------| +| `{phase_name}.{key}` | Output key `{key}` from completed upstream phase `{phase_name}` | +| `$trigger.{key}` | Value from the workflow trigger payload | +| `$initial_state.{key}` | Value from the phase's `initial_state` | + +**Rules:** +- Every `{phase_name}` referenced in an input mapping must appear in the phase's `depends_on` list. +- If the upstream phase declares `outputs`, the referenced key must appear there. +- If any declared input cannot be resolved, the executor rejects the task claim with `UnresolvableInputError`. + +```yaml +workflow: + run_analysis: + title: "Run Analysis" + assign: analytics-agent + depends_on: [fetch_financials, fetch_hr_data] + inputs: + fin_revenue: fetch_financials.revenue + fin_expenses: fetch_financials.expenses + hr_headcount: fetch_hr_data.headcount + quarter: $trigger.quarter + outputs: + findings: array + risk_level: string + violations_found: boolean +``` + +The agent for `run_analysis` reads: +```python +revenue = ctx.input["fin_revenue"] # from fetch_financials output +headcount = ctx.input["hr_headcount"] # from fetch_hr_data output +quarter = ctx.input["quarter"] # from trigger +``` + +#### `outputs` Field + +`outputs` declares the keys and types that the agent must return. The executor validates the agent's output dict against this declaration at completion time. + +**Type values** may be: +- Primitive strings: `string`, `number`, `boolean`, `object`, `array` +- Type names from the workflow's `types` block +- An object with `type` and optional `required` modifier + +```yaml +types: + Finding: + source: string + content: string + confidence: number + +workflow: + research: + title: "Research Phase" + assign: researcher + outputs: + sources: array + findings: Finding + summary: string + warnings: + type: array + required: false # optional output — will not fail validation if absent +``` + +**Validation behavior:** +- Required keys (default) must be present in the agent's return dict, or the executor raises `MissingOutputError`. +- If a value's type does not match the declaration, the executor raises `OutputTypeMismatchError`. +- No type coercion is performed. Validation only. +- Extra keys returned beyond what is declared are accepted and recorded. + +#### End-to-End Wiring Example + +```yaml +openintent: "1.0" + +info: + name: "Research Pipeline" + +types: + Finding: + source: string + content: string + confidence: number + +workflow: + research: + title: "Gather Research" + assign: researcher + inputs: + topic: $trigger.topic + outputs: + sources: array + findings: Finding + + analysis: + title: "Analyze Findings" + assign: analyst + depends_on: [research] + inputs: + research_findings: research.findings + source_list: research.sources + outputs: + insights: string + recommendations: array + + report: + title: "Write Report" + assign: writer + depends_on: [analysis] + inputs: + insights: analysis.insights + recommendations: analysis.recommendations + outputs: + report_url: string + report_summary: string +``` + +The executor guarantees: +1. `research` is called with `ctx.input = {"topic": }` +2. `analysis` is only claimable after `research` completes with both `sources` and `findings` present +3. `analysis` is called with `ctx.input = {"research_findings": ..., "source_list": ...}` +4. `analysis` completion is rejected if `insights` or `recommendations` are missing +5. `report` is called with `ctx.input = {"insights": ..., "recommendations": ...}` + +No agent knows about the others. No agent reads raw intent state. + ### RFC-Specific Phase Fields | Field | Type | RFC | Description | diff --git a/mcp-server/package.json b/mcp-server/package.json index 4a882d2..9b09850 100644 --- a/mcp-server/package.json +++ b/mcp-server/package.json @@ -1,6 +1,6 @@ { "name": "@openintentai/mcp-server", - "version": "0.15.1", + "version": "0.17.0", "description": "MCP server exposing the OpenIntent Coordination Protocol as MCP tools and resources", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts index d97fcc6..2579100 100644 --- a/mcp-server/src/index.ts +++ b/mcp-server/src/index.ts @@ -30,7 +30,7 @@ async function main() { const server = new Server( { name: "openintent-mcp", - version: "0.15.1", + version: "0.17.0", }, { capabilities: { diff --git a/mkdocs.yml b/mkdocs.yml index f516a5f..d55e384 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,5 +1,5 @@ site_name: OpenIntent SDK -site_description: "The Python SDK for structured multi-agent coordination. 23 RFCs. Decorator-first agents. Built-in server. Federation. MCP integration." +site_description: "The Python SDK for structured multi-agent coordination. 26 RFCs. Decorator-first agents. Built-in server. Federation. MCP integration." site_url: https://openintent-ai.github.io/openintent/ repo_url: https://github.com/openintent-ai/openintent repo_name: openintent-ai/openintent @@ -127,6 +127,7 @@ nav: - Verifiable Event Logs: guide/verifiable-logs.md - Distributed Tracing: guide/distributed-tracing.md - Agent-to-Agent Messaging: guide/messaging.md + - Human-in-the-Loop: guide/human-in-the-loop.md - Federation: guide/federation.md - MCP Integration: guide/mcp.md - YAML Workflows: guide/workflows.md @@ -169,6 +170,9 @@ nav: - "0021 \u2014 Agent-to-Agent Messaging": rfcs/0021-agent-to-agent-messaging.md - "0022 \u2014 Federation Protocol": rfcs/0022-federation-protocol.md - "0023 \u2014 Federation Security": rfcs/0023-federation-security.md + - "0024 \u2014 Workflow I/O Contracts": rfcs/0024-workflow-io-contracts.md + - "0025 \u2014 Human-in-the-Loop": rfcs/0025-human-in-the-loop.md + - "0026 \u2014 Suspension Containers": rfcs/0026-suspension-container-interaction.md - Changelog: changelog.md - Examples: - Multi-Agent Workflow: examples/multi-agent.md @@ -204,14 +208,14 @@ extra: link: https://pypi.org/project/openintent/ version: provider: mike - announcement: "v0.15.1 is here — Gemini adapter rebuilt for google-genai SDK v1.0+, full LLMEngine Gemini integration. Read the changelog →" + announcement: "v0.17.0 is here — RFC-0026 Intent Suspension: container interaction, HumanRetryPolicy with escalation ladders, UpstreamIntentSuspendedError, and platform-level default policies. Read the changelog →" meta: - name: description - content: "OpenIntent Python SDK — structured multi-agent coordination protocol with decorator-first agents, 23 RFCs, 7 LLM adapters, federation, MCP integration, and built-in FastAPI server." + content: "OpenIntent Python SDK — structured multi-agent coordination protocol with decorator-first agents, 26 RFCs, 7 LLM adapters, HITL suspension, federation, MCP integration, and built-in FastAPI server." - name: og:title content: "OpenIntent SDK — Multi-Agent Coordination Protocol" - name: og:description - content: "Production-ready Python SDK for structured multi-agent coordination. Decorator-first agents, YAML workflows, 7 LLM adapters, MCP integration, built-in server." + content: "Production-ready Python SDK for structured multi-agent coordination. Decorator-first agents, YAML workflows, HITL suspension, 7 LLM adapters, MCP integration, built-in server." - name: og:type content: website - name: og:url @@ -221,7 +225,7 @@ extra: - name: twitter:title content: "OpenIntent SDK" - name: twitter:description - content: "The Python SDK for structured multi-agent coordination. 23 RFCs. Decorator-first agents. Federation. MCP integration. Built-in server." + content: "The Python SDK for structured multi-agent coordination. 26 RFCs. Decorator-first agents. Federation. MCP integration. Built-in server." extra_css: - stylesheets/extra.css diff --git a/openintent/__init__.py b/openintent/__init__.py index e24aed3..b2ae02c 100644 --- a/openintent/__init__.py +++ b/openintent/__init__.py @@ -25,16 +25,20 @@ on_complete, on_conflict, on_drain, + on_engagement_decision, on_escalation, on_event, on_governance_blocked, on_handoff, on_identity_registered, + on_input_received, + on_input_requested, on_lease_available, on_message, on_quorum, on_retry, on_state_change, + on_suspension_expired, on_task, on_trigger, output_guardrail, @@ -42,6 +46,8 @@ from .client import AsyncOpenIntentClient, OpenIntentClient from .exceptions import ( ConflictError, + InputCancelledError, + InputTimeoutError, LeaseConflictError, NotFoundError, OpenIntentError, @@ -115,17 +121,24 @@ Decision, DecisionRecord, DecisionType, + # RFC-0025: Human-in-the-Loop + EngagementDecision, + EngagementSignals, Escalation, EscalationPriority, EscalationStatus, + # RFC-0026: Suspension Container Interaction & Human Retry + EscalationStep, EventProof, EventType, GrantConstraints, Guardrails, Heartbeat, HeartbeatConfig, + HumanRetryPolicy, IdentityChallenge, IdentityVerification, + InputResponse, Intent, IntentACL, IntentAttachment, @@ -160,10 +173,13 @@ PlanState, PortfolioMembership, PortfolioStatus, + ResponseType, RetryPolicy, RetryStrategy, StreamState, StreamStatus, + SuspensionChoice, + SuspensionRecord, # RFC-0012: Task Decomposition & Planning Task, TaskStatus, @@ -208,9 +224,14 @@ DelegateConfig, # noqa: F401 GovernanceConfig, IdentityConfig, + InputWiringError, + MissingOutputError, + OutputTypeMismatchError, PermissionLevel, # noqa: F401 PermissionsConfig, # noqa: F401 PhaseConfig, + UnresolvableInputError, + UpstreamIntentSuspendedError, VerificationConfig, WorkflowError, WorkflowNotFoundError, @@ -233,7 +254,7 @@ def get_server() -> tuple[Any, Any, Any]: ) -__version__ = "0.15.1" +__version__ = "0.17.0" __all__ = [ "OpenIntentClient", "AsyncOpenIntentClient", @@ -332,6 +353,8 @@ def get_server() -> tuple[Any, Any, Any]: "LeaseConflictError", "ValidationError", "InputValidationError", + "InputTimeoutError", + "InputCancelledError", "validate_required", "validate_string_length", "validate_positive_int", @@ -367,19 +390,39 @@ def get_server() -> tuple[Any, Any, Any]: "on_complete", "on_conflict", "on_drain", + "on_engagement_decision", "on_escalation", "on_event", "on_governance_blocked", "on_handoff", "on_identity_registered", + "on_input_received", + "on_input_requested", "on_lease_available", "on_message", "on_quorum", "on_retry", "on_state_change", + "on_suspension_expired", "on_task", "on_trigger", "output_guardrail", + # RFC-0024: Workflow I/O Contracts + "MissingOutputError", + "OutputTypeMismatchError", + "UnresolvableInputError", + "UpstreamIntentSuspendedError", + "InputWiringError", + # RFC-0025: HITL Models + "ResponseType", + "SuspensionChoice", + "SuspensionRecord", + "EngagementSignals", + "EngagementDecision", + "InputResponse", + # RFC-0026: Suspension Container Interaction & Human Retry + "EscalationStep", + "HumanRetryPolicy", "WorkflowSpec", "WorkflowError", "WorkflowValidationError", diff --git a/openintent/adapters/openai_adapter.py b/openintent/adapters/openai_adapter.py index 051f082..e0c2dc3 100644 --- a/openintent/adapters/openai_adapter.py +++ b/openintent/adapters/openai_adapter.py @@ -171,6 +171,13 @@ def _is_completions_model(self, model: str, kwargs: dict[str, Any]) -> bool: return True return is_codex_model(model) + @staticmethod + def _requires_max_completion_tokens(model: str) -> bool: + """Return True for models that require max_completion_tokens instead of max_tokens.""" + import re + + return bool(re.search(r"(^o1|^o3|gpt-5)", model)) + def _create_completion(self, **kwargs: Any) -> Any: """Create a completion with automatic event logging.""" stream = kwargs.get("stream", False) @@ -180,6 +187,10 @@ def _create_completion(self, **kwargs: Any) -> Any: tools = kwargs.get("tools", []) temperature = kwargs.get("temperature") + if self._requires_max_completion_tokens(model): + if "max_tokens" in kwargs and "max_completion_tokens" not in kwargs: + kwargs["max_completion_tokens"] = kwargs.pop("max_tokens") + request_id = self._generate_id() if self._config.log_requests: diff --git a/openintent/agents.py b/openintent/agents.py index 183d367..6dfa0af 100644 --- a/openintent/agents.py +++ b/openintent/agents.py @@ -27,11 +27,16 @@ async def work(self, intent): from typing import Any, Callable, Optional, TypeVar, Union from .client import AsyncOpenIntentClient, OpenIntentClient +from .exceptions import InputCancelledError, InputTimeoutError from .models import ( AccessRequest, ACLEntry, + EngagementDecision, + EngagementSignals, Escalation, EventType, + HumanRetryPolicy, + InputResponse, Intent, IntentContext, IntentPortfolio, @@ -39,6 +44,9 @@ async def work(self, intent): MembershipRole, PeerInfo, Permission, + ResponseType, + SuspensionChoice, + SuspensionRecord, ) from .streaming import SSEEvent, SSEEventType, SSEStream, SSESubscription @@ -389,6 +397,78 @@ def on_drain(func: Callable) -> Callable: return func +def on_input_requested(func: Callable) -> Callable: + """ + Decorator: Called after suspension is written, before the agent blocks on input (RFC-0025). + + The handler receives the SuspensionRecord. Use this to route the question + to an operator (e.g. send a Slack message, email, or dashboard notification). + + Example: + ```python + @on_input_requested + async def notify_operator(self, intent, suspension): + await send_slack(suspension.question, channel=suspension.channel_hint) + ``` + """ + func._openintent_handler = "input_requested" + return func + + +def on_input_received(func: Callable) -> Callable: + """ + Decorator: Called when operator input arrives, before the awaiting call unblocks (RFC-0025). + + The handler receives the InputResponse. Use this to log the response or + route it before it reaches the suspended agent. + + Example: + ```python + @on_input_received + async def log_response(self, intent, response): + await self.log(intent.id, f"Operator responded: {response.value}") + ``` + """ + func._openintent_handler = "input_received" + return func + + +def on_suspension_expired(func: Callable) -> Callable: + """ + Decorator: Called when a suspension times out before the fallback policy is applied (RFC-0025). + + The handler receives the SuspensionRecord. Use this to send alerts or + adjust state before the fallback kicks in. + + Example: + ```python + @on_suspension_expired + async def alert_timeout(self, intent, suspension): + await send_alert(f"Suspension {suspension.id} expired — applying fallback") + ``` + """ + func._openintent_handler = "suspension_expired" + return func + + +def on_engagement_decision(func: Callable) -> Callable: + """ + Decorator: Called after should_request_input() returns an EngagementDecision (RFC-0025). + + The handler receives the EngagementDecision. Use this to audit the + engagement mode chosen by the agent or to override it. + + Example: + ```python + @on_engagement_decision + async def audit_decision(self, intent, decision): + await self.log(intent.id, f"Engagement mode: {decision.mode}") + ``` + """ + func._openintent_handler = "engagement_decision" + return func + + def on_identity_registered(func: Callable) -> Callable: """ Decorator: Called when the agent's cryptographic identity is registered. @@ -636,6 +716,13 @@ class IntentSpec: depends_on: Optional[list[str]] = None constraints: dict[str, Any] = field(default_factory=dict) initial_state: dict[str, Any] = field(default_factory=dict) + # RFC-0024: I/O contracts preserved from WorkflowSpec phases. + # inputs: mapping from local key name -> upstream reference expression. + # outputs: mapping from output key name -> type declaration. + # The executor uses these to pre-populate ctx.input and validate + # completions; they are not consumed by IntentSpec itself. + inputs: dict[str, str] = field(default_factory=dict) + outputs: dict[str, Any] = field(default_factory=dict) def __post_init__(self): if self.depends_on is None: @@ -704,6 +791,57 @@ class AgentConfig: T = TypeVar("T", bound="BaseAgent") +_SENTINEL = object() + + +def _merge_retry_policies( + *, + call_site: "Optional[HumanRetryPolicy]", + agent_default: "Optional[HumanRetryPolicy]", + platform_default: "Optional[HumanRetryPolicy]", +) -> "Optional[HumanRetryPolicy]": + """RFC-0026 §5.3: field-level merge of retry policy levels. + + Merge precedence: call_site overrides agent_default overrides platform_default. + Each level only contributes a field when its value differs from the HumanRetryPolicy + class default, so the highest-priority level that explicitly sets a field wins. + + If all three are ``None``, returns ``None`` (single-attempt semantics). + """ + levels = [p for p in (platform_default, agent_default, call_site) if p is not None] + if not levels: + return None + if len(levels) == 1: + return levels[0] + + _class_defaults = HumanRetryPolicy() + _scalar_fields = ( + "max_attempts", + "interval_seconds", + "strategy", + "final_fallback_policy", + ) + + merged: dict = {} + merged_ladder: list | None = None + + for policy in levels: + for field_name in _scalar_fields: + val = getattr(policy, field_name) + class_default = getattr(_class_defaults, field_name) + if val != class_default: + merged[field_name] = val + if policy.escalation_ladder: + merged_ladder = [s.to_dict() for s in policy.escalation_ladder] + + for field_name in _scalar_fields: + merged.setdefault(field_name, getattr(_class_defaults, field_name)) + + if merged_ladder: + merged["escalation_ladder"] = merged_ladder + + return HumanRetryPolicy.from_dict(merged) + class BaseAgent(ABC): """ @@ -717,6 +855,24 @@ class BaseAgent(ABC): _config: AgentConfig = field(default_factory=AgentConfig) _handlers: dict[str, list[Callable]] = field(default_factory=dict) + default_human_retry_policy: Optional["HumanRetryPolicy"] = None + """Class-level or instance-level default re-notification policy (RFC-0026). + + When set, this policy is applied to all ``request_input()`` calls that do + not supply an explicit ``retry_policy`` argument. Subclasses may override + at class level: + + .. code-block:: python + + class MyAgent(BaseAgent): + default_human_retry_policy = HumanRetryPolicy( + max_attempts=3, + interval_seconds=900, + strategy="linear", + final_fallback_policy="complete_with_fallback", + ) + """ + def __init__( self, base_url: Optional[str] = None, @@ -734,6 +890,9 @@ def __init__( self._subscription: Optional[SSESubscription] = None self._running = False self._loop: Optional[asyncio.AbstractEventLoop] = None + self._platform_retry_policy_cache: Optional["HumanRetryPolicy | None"] = ( + _SENTINEL + ) self._discover_handlers() @@ -769,6 +928,11 @@ def _discover_handlers(self) -> None: "governance_blocked": [], "approval_granted": [], "approval_denied": [], + # RFC-0025: HITL lifecycle hooks + "input_requested": [], + "input_received": [], + "suspension_expired": [], + "engagement_decision": [], } for name in dir(self): @@ -901,6 +1065,12 @@ async def _build_context(self, intent: Intent) -> "IntentContext": """ Build an IntentContext for an intent, auto-populating based on this agent's permission level. + + RFC-0024: If the intent's state contains ``_io_inputs`` (set by the + executor via ``to_portfolio_spec``), the declared input mappings are + resolved from completed upstream dependency outputs and placed in + ``ctx.input`` before the handler is called. Agents should read + ``intent.ctx.input`` rather than probing dependency state directly. """ ctx = IntentContext() @@ -910,14 +1080,85 @@ async def _build_context(self, intent: Intent) -> "IntentContext": except Exception: pass + dep_outputs: dict[str, dict[str, Any]] = {} if intent.depends_on: for dep_id in intent.depends_on: try: dep = await self.async_client.get_intent(dep_id) - ctx.dependencies[dep.title] = dep.state.to_dict() + dep_state = dep.state.to_dict() + ctx.dependencies[dep.title] = dep_state + dep_outputs[dep.title] = dep_state except Exception: pass + # RFC-0024: resolve ctx.input from declared _io_inputs mapping stored + # in the intent's initial state. Input mapping expressions use phase + # names (YAML keys), but dependency intents are indexed by title. + # _io_dep_title_to_name provides the title→name mapping so we can + # resolve "phase_name.key" correctly even when title != phase name. + try: + intent_state_dict = intent.state.to_dict() + except Exception: + intent_state_dict = {} + + io_inputs: dict[str, str] = intent_state_dict.get("_io_inputs") or {} + dep_title_to_name: dict[str, str] = ( + intent_state_dict.get("_io_dep_title_to_name") or {} + ) + # Build a phase-name-keyed view of dep outputs for input resolution + dep_outputs_by_name: dict[str, dict[str, Any]] = {} + for dep_title, dep_name in dep_title_to_name.items(): + if dep_title in dep_outputs: + dep_outputs_by_name[dep_name] = dep_outputs[dep_title] + + if io_inputs: + resolved_input: dict[str, Any] = {} + unresolvable: list[str] = [] + + for local_key, mapping_expr in io_inputs.items(): + if not isinstance(mapping_expr, str): + continue + try: + if mapping_expr.startswith("$trigger."): + key = mapping_expr[len("$trigger.") :] + if key in intent_state_dict: + resolved_input[local_key] = intent_state_dict[key] + else: + unresolvable.append(mapping_expr) + elif mapping_expr.startswith("$initial_state."): + key = mapping_expr[len("$initial_state.") :] + if key in intent_state_dict: + resolved_input[local_key] = intent_state_dict[key] + else: + unresolvable.append(mapping_expr) + else: + parts = mapping_expr.split(".", 1) + if len(parts) == 2: + ref_phase, ref_key = parts[0], parts[1] + # Try phase-name index first, fall back to title + phase_out = dep_outputs_by_name.get( + ref_phase, dep_outputs.get(ref_phase, {}) + ) + if ref_key in phase_out: + resolved_input[local_key] = phase_out[ref_key] + else: + unresolvable.append(mapping_expr) + else: + unresolvable.append(mapping_expr) + except Exception: + pass + + if unresolvable: + from .workflow import UnresolvableInputError + + raise UnresolvableInputError( + task_id=intent.id, + phase_name=getattr(intent, "title", intent.id), + unresolvable_refs=unresolvable, + ) + + ctx.input = resolved_input + try: events = await self.async_client.get_events(intent.id, limit=20) ctx.events = events @@ -1024,6 +1265,509 @@ async def escalate( context=data or {}, ) + # ==================== HITL: Human-in-the-Loop (RFC-0025) ==================== + + async def request_input( + self, + intent_id: str, + question: str, + response_type: str = "choice", + choices: Optional[list[Union[dict[str, Any], "SuspensionChoice"]]] = None, + context: Optional[dict[str, Any]] = None, + channel_hint: Optional[str] = None, + timeout_seconds: Optional[int] = None, + fallback_policy: str = "fail", + fallback_value: Optional[Any] = None, + confidence: Optional[float] = None, + retry_policy: Optional["HumanRetryPolicy"] = None, + ) -> Any: + """ + Suspend the intent and request operator input (RFC-0025 / RFC-0026). + + Transitions the intent to ``suspended_awaiting_input``, fires + ``@on_input_requested`` hooks, and polls for a response. When the + operator responds (via POST /intents/{id}/suspend/respond), the + suspension is resolved and the operator's response value is returned. + + When ``retry_policy`` is supplied (RFC-0026), the agent re-notifies the + operator up to ``retry_policy.max_attempts`` times, waiting + ``retry_policy.interval_seconds`` between each attempt, firing + ``@on_input_requested`` hooks and emitting + ``intent.suspension_renotified`` events on each re-attempt. + Escalation steps in ``retry_policy.escalation_ladder`` cause an + ``intent.suspension_escalated`` event to be emitted. After all + attempts are exhausted, ``retry_policy.final_fallback_policy`` is + applied. + + If ``retry_policy`` is omitted, single-attempt behaviour is preserved + (original RFC-0025 semantics). + + The class attribute ``default_human_retry_policy`` can be set on a + ``BaseAgent`` subclass or instance to apply a retry policy to all + ``request_input()`` calls that do not supply an explicit one. + + Args: + intent_id: The intent to suspend. + question: The question or prompt for the operator. + response_type: Expected response type — ``"choice"`` (default), + ``"confirm"``, ``"text"``, or ``"form"``. + choices: List of ``SuspensionChoice`` objects or plain dicts with + ``value`` and ``label`` keys. For ``response_type="confirm"`` + and no explicit choices, defaults to yes/no. For ``"choice"`` + the operator must select one of these values. + context: Structured context to help the operator decide. + channel_hint: Preferred delivery channel (e.g. ``"slack"``). + timeout_seconds: Seconds before the suspension expires. + fallback_policy: What to do on timeout: ``"fail"``, + ``"complete_with_fallback"``, or ``"use_default_and_continue"``. + fallback_value: Value to use for ``"complete_with_fallback"`` policy. + confidence: Agent confidence score at suspension time (0.0–1.0). + retry_policy: Optional re-notification / escalation policy + (RFC-0026). When omitted, falls back to + ``self.default_human_retry_policy`` (if set), then single-attempt. + + Returns: + The operator's response value. + + Raises: + InputTimeoutError: If the suspension expires without a response + and fallback_policy is ``"fail"``. + InputCancelledError: If the suspension is cancelled. + """ + from datetime import datetime, timedelta + from uuid import uuid4 + + # RFC-0026 §5.3: three-level cascade with field-level merge. + # Fetch platform default once per agent instance (_SENTINEL = not yet fetched). + if self._platform_retry_policy_cache is _SENTINEL: + try: + cfg = await self.async_client.get_server_config() + policy_dict = (cfg.get("suspension") or {}).get("default_retry_policy") + if policy_dict: + self._platform_retry_policy_cache = HumanRetryPolicy.from_dict( + policy_dict + ) + else: + self._platform_retry_policy_cache = None + except Exception: + self._platform_retry_policy_cache = None + + effective_retry_policy = _merge_retry_policies( + call_site=retry_policy, + agent_default=getattr(self, "default_human_retry_policy", None), + platform_default=self._platform_retry_policy_cache, + ) + + suspension_id = str(uuid4()) + now = datetime.utcnow() + expires_at = None + if effective_retry_policy is not None: + # RFC-0026: total expiry = interval_seconds × max_attempts. + # Safeguard: if interval_seconds is 0 (e.g. max_attempts=1 single-shot) + # fall back to timeout_seconds so the suspension doesn't hang indefinitely. + total_seconds = ( + effective_retry_policy.interval_seconds + * effective_retry_policy.max_attempts + ) + if total_seconds > 0: + expires_at = now + timedelta(seconds=total_seconds) + elif timeout_seconds is not None: + expires_at = now + timedelta(seconds=timeout_seconds) + elif timeout_seconds is not None: + expires_at = now + timedelta(seconds=timeout_seconds) + + resolved_choices: list[SuspensionChoice] = [] + if choices: + for c in choices: + if isinstance(c, SuspensionChoice): + resolved_choices.append(c) + elif isinstance(c, dict): + resolved_choices.append(SuspensionChoice.from_dict(c)) + elif response_type == ResponseType.CONFIRM.value: + resolved_choices = [ + SuspensionChoice(value="yes", label="Yes", style="primary"), + SuspensionChoice(value="no", label="No", style="default"), + ] + + suspension = SuspensionRecord( + id=suspension_id, + question=question, + response_type=response_type, + choices=resolved_choices, + context=context or {}, + channel_hint=channel_hint, + suspended_at=now, + timeout_seconds=timeout_seconds, + expires_at=expires_at, + fallback_value=fallback_value, + fallback_policy=fallback_policy, + retry_policy=effective_retry_policy, + confidence_at_suspension=confidence, + ) + + intent = await self.async_client.get_intent(intent_id) + + # Transition to suspended_awaiting_input + await self.async_client.set_status( + intent_id, + IntentStatus.SUSPENDED_AWAITING_INPUT, + intent.version, + ) + + # Persist suspension record in state + updated = await self.async_client.get_intent(intent_id) + await self.async_client.update_state( + intent_id, + updated.version, + {"_suspension": suspension.to_dict()}, + ) + + # Emit intent_suspended event + await self.async_client.log_event( + intent_id, + EventType.INTENT_SUSPENDED, + { + "suspension_id": suspension_id, + "question": question, + "response_type": response_type, + "choices": [c.to_dict() for c in resolved_choices], + "channel_hint": channel_hint, + "timeout_seconds": timeout_seconds, + "fallback_policy": fallback_policy, + "confidence_at_suspension": confidence, + "retry_policy": effective_retry_policy.to_dict() + if effective_retry_policy + else None, + }, + ) + + # Fire @on_input_requested hooks + current_intent = await self.async_client.get_intent(intent_id) + for handler in self._handlers.get("input_requested", []): + try: + await self._call_handler(handler, current_intent, suspension) + except Exception as e: + logger.exception(f"on_input_requested handler error: {e}") + + # Resolve effective fallback policy (RFC-0026: retry_policy takes precedence) + effective_fallback = fallback_policy + if effective_retry_policy is not None: + effective_fallback = effective_retry_policy.final_fallback_policy + + # Poll for operator response, with optional re-notification (RFC-0026) + poll_interval = 2.0 + attempt = 1 + max_attempts = 1 + interval_seconds = 0 + if effective_retry_policy is not None: + max_attempts = effective_retry_policy.max_attempts + interval_seconds = effective_retry_policy.interval_seconds + + next_renotify_at: Optional[datetime] = None + if ( + effective_retry_policy is not None + and interval_seconds > 0 + and max_attempts > 1 + ): + next_renotify_at = now + timedelta(seconds=interval_seconds) + + while True: + await asyncio.sleep(poll_interval) + try: + current = await self.async_client.get_intent(intent_id) + except Exception: + continue + + state = current.state.to_dict() if current.state else {} + susp_data = state.get("_suspension", {}) + + resolution = susp_data.get("resolution") + if resolution == "responded": + response_value = susp_data.get("response") + responded_at_str = susp_data.get("responded_at") + responded_at = None + if responded_at_str: + try: + responded_at = datetime.fromisoformat(responded_at_str) + except Exception: + pass + + input_response = InputResponse( + suspension_id=suspension_id, + value=response_value, + responded_by=susp_data.get("responded_by", ""), + responded_at=responded_at, + ) + + # Fire @on_input_received hooks + for handler in self._handlers.get("input_received", []): + try: + await self._call_handler(handler, current, input_response) + except Exception as e: + logger.exception(f"on_input_received handler error: {e}") + + return response_value + + elif resolution == "expired": + # Fire @on_suspension_expired hooks before applying fallback + for handler in self._handlers.get("suspension_expired", []): + try: + await self._call_handler(handler, current, suspension) + except Exception as e: + logger.exception(f"on_suspension_expired handler error: {e}") + + if effective_fallback == "fail": + raise InputTimeoutError( + f"Suspension {suspension_id} expired without operator response", + suspension_id=suspension_id, + fallback_policy=effective_fallback, + ) + elif effective_fallback == "complete_with_fallback": + return fallback_value + else: + return fallback_value + + elif resolution == "cancelled": + raise InputCancelledError( + f"Suspension {suspension_id} was cancelled", + suspension_id=suspension_id, + ) + + # RFC-0026: re-notification loop + if ( + effective_retry_policy is not None + and next_renotify_at is not None + and datetime.utcnow() >= next_renotify_at + ): + if attempt < max_attempts: + attempt += 1 + logger.info( + f"Re-notifying operator for suspension {suspension_id} " + f"(attempt {attempt}/{max_attempts})" + ) + + # Check escalation ladder for this attempt + escalation_steps = effective_retry_policy.escalation_ladder + triggered_steps = [ + s for s in escalation_steps if s.attempt == attempt + ] + escalation_channel_hint: Optional[str] = None + escalation_notify_to: Optional[str] = None + for step in triggered_steps: + escalation_channel_hint = ( + step.channel_hint or escalation_channel_hint + ) + escalation_notify_to = step.notify_to or escalation_notify_to + try: + await self.async_client.log_event( + intent_id, + EventType.INTENT_SUSPENSION_ESCALATED, + { + "suspension_id": suspension_id, + "attempt": attempt, + "escalated_to": step.notify_to or None, + "channel_hint": step.channel_hint or None, + }, + ) + except Exception as e: + logger.exception(f"suspension_escalated event error: {e}") + + # Compute when the next attempt fires (for telemetry) + next_attempt_at = ( + datetime.utcnow() + timedelta(seconds=interval_seconds) + ).isoformat() + "Z" + + # Emit re-notification event with RFC-0026 field names + try: + await self.async_client.log_event( + intent_id, + EventType.INTENT_SUSPENSION_RENOTIFIED, + { + "suspension_id": suspension_id, + "attempt": attempt, + "max_attempts": max_attempts, + "channel_hint": escalation_channel_hint + or suspension.channel_hint, + "notify_to": escalation_notify_to, + "next_attempt_at": next_attempt_at + if attempt < max_attempts + else None, + }, + ) + except Exception as e: + logger.exception(f"suspension_renotified event error: {e}") + + # Build a re-notification SuspensionRecord that carries attempt + # metadata in its context using RFC-0026 key names (_attempt, + # _max_attempts) so @on_input_requested handlers can read them + # without a signature change. + import dataclasses + + renotify_context = dict(suspension.context) + renotify_context["_attempt"] = attempt + renotify_context["_max_attempts"] = max_attempts + if escalation_notify_to: + renotify_context["_notify_to"] = escalation_notify_to + + renotify_suspension = dataclasses.replace( + suspension, + context=renotify_context, + channel_hint=escalation_channel_hint or suspension.channel_hint, + ) + + # Re-fire @on_input_requested hooks with the enriched suspension + for handler in self._handlers.get("input_requested", []): + try: + await self._call_handler( + handler, current, renotify_suspension + ) + except Exception as e: + logger.exception( + f"on_input_requested (renotify) handler error: {e}" + ) + + next_renotify_at = datetime.utcnow() + timedelta( + seconds=interval_seconds + ) + else: + # All attempts exhausted — apply final fallback + for handler in self._handlers.get("suspension_expired", []): + try: + await self._call_handler(handler, current, suspension) + except Exception as e: + logger.exception( + f"on_suspension_expired handler error: {e}" + ) + + if effective_fallback == "fail": + raise InputTimeoutError( + f"Suspension {suspension_id} exhausted all {max_attempts} re-notification attempts", + suspension_id=suspension_id, + fallback_policy=effective_fallback, + ) + return fallback_value + + # Check if expired by time without server update (single-attempt path) + if ( + effective_retry_policy is None + and expires_at is not None + and datetime.utcnow() > expires_at + ): + for handler in self._handlers.get("suspension_expired", []): + try: + await self._call_handler(handler, current, suspension) + except Exception as e: + logger.exception(f"on_suspension_expired handler error: {e}") + + if effective_fallback == "fail": + raise InputTimeoutError( + f"Suspension {suspension_id} expired (client-side timeout)", + suspension_id=suspension_id, + fallback_policy=effective_fallback, + ) + return fallback_value + + async def should_request_input( + self, + intent_id: str, + signals: Optional[EngagementSignals] = None, + confidence: Optional[float] = None, + risk: Optional[float] = None, + reversibility: Optional[float] = None, + ) -> EngagementDecision: + """ + Decide whether to ask for human input or act autonomously (RFC-0025). + + Implements the default rule-based engagement logic: + - ``autonomous``: high confidence, low risk, reversible — act now. + - ``request_input``: moderate uncertainty — ask but don't block. + - ``require_input``: high risk or low confidence — must ask. + - ``defer``: risk too high to act, escalate to coordinator. + + The decision is emitted as an ``engagement.decision`` event and + ``@on_engagement_decision`` handlers are fired before returning. + + Args: + intent_id: The intent context. + signals: Pre-built EngagementSignals object (takes priority). + confidence: Agent confidence in autonomous action (0.0–1.0). + risk: Estimated risk of acting autonomously (0.0–1.0). + reversibility: How reversible the action is (0.0–1.0). + + Returns: + An EngagementDecision indicating the recommended mode. + """ + if signals is None: + signals = EngagementSignals( + confidence=confidence if confidence is not None else 1.0, + risk=risk if risk is not None else 0.0, + reversibility=reversibility if reversibility is not None else 1.0, + ) + + c = signals.confidence + r = signals.risk + rev = signals.reversibility + + # Default rule-based engagement logic + if c >= 0.85 and r <= 0.2 and rev >= 0.5: + mode = "autonomous" + should_ask = False + rationale = ( + f"High confidence ({c:.2f}), low risk ({r:.2f}), " + f"reversible ({rev:.2f}) — acting autonomously." + ) + elif r >= 0.8 or rev <= 0.1: + mode = "defer" + should_ask = False + rationale = ( + f"Risk ({r:.2f}) or irreversibility ({rev:.2f}) exceeds safe threshold " + f"— deferring to coordinator." + ) + elif c < 0.5 or r > 0.5: + mode = "require_input" + should_ask = True + rationale = ( + f"Low confidence ({c:.2f}) or elevated risk ({r:.2f}) " + f"— operator input required before proceeding." + ) + else: + mode = "request_input" + should_ask = True + rationale = ( + f"Moderate confidence ({c:.2f}) with manageable risk ({r:.2f}) " + f"— requesting operator input but can proceed without it." + ) + + decision = EngagementDecision( + mode=mode, + should_ask=should_ask, + rationale=rationale, + signals=signals, + ) + + # Emit engagement.decision event + try: + await self.async_client.log_event( + intent_id, + EventType.ENGAGEMENT_DECISION, + decision.to_dict(), + ) + except Exception as e: + logger.warning(f"Could not emit engagement.decision event: {e}") + + # Fire @on_engagement_decision hooks + try: + intent = await self.async_client.get_intent(intent_id) + for handler in self._handlers.get("engagement_decision", []): + try: + await self._call_handler(handler, intent, decision) + except Exception as e: + logger.exception(f"on_engagement_decision handler error: {e}") + except Exception: + pass + + return decision + # ==================== Event Routing ==================== async def _handle_event(self, event: SSEEvent) -> None: @@ -1073,7 +1817,21 @@ async def _on_assignment(self, event: SSEEvent) -> None: intent = await self.async_client.get_intent(intent_id) - ctx = await self._build_context(intent) + # RFC-0024: _build_context raises UnresolvableInputError if declared + # inputs cannot be resolved from upstream deps. This is a hard + # rejection — the handler is not invoked. + try: + ctx = await self._build_context(intent) + except Exception as build_err: + from .workflow import UnresolvableInputError + + if isinstance(build_err, UnresolvableInputError): + logger.warning( + f"RFC-0024: rejecting assignment for intent {intent_id}: " + f"{build_err}" + ) + return + raise delegated_by = event.data.get("delegated_by") if delegated_by: ctx.delegated_by = delegated_by @@ -1129,31 +1887,191 @@ async def _on_assignment(self, event: SSEEvent) -> None: try: result = await self._call_handler(handler, intent) if result and isinstance(result, dict): - for guardrail in self._handlers["output_guardrail"]: - try: - check = await self._call_handler(guardrail, intent, result) - if check is False: + # RFC-0024: validate output against declared _io_outputs + # schema before passing through output guardrails or + # patching state. Raises MissingOutputError / + # OutputTypeMismatchError on failure which is caught + # below; result is dropped so the agent can be retried. + try: + self._validate_io_outputs(intent_id, intent, result) + except Exception as io_err: + from .workflow import ( + MissingOutputError, + OutputTypeMismatchError, + ) + + if isinstance( + io_err, (MissingOutputError, OutputTypeMismatchError) + ): + logger.warning( + f"RFC-0024 output validation failed for " + f"{intent_id}: {io_err}" + ) + else: + logger.warning( + f"RFC-0024 output validation error for " + f"{intent_id}: {io_err}" + ) + result = None + + if result is not None: + for guardrail in self._handlers["output_guardrail"]: + try: + check = await self._call_handler( + guardrail, intent, result + ) + if check is False: + logger.warning( + f"Output guardrail rejected result for {intent_id}" + ) + result = None + break + except GuardrailError as e: logger.warning( - f"Output guardrail rejected result for {intent_id}" + f"Output guardrail rejected result for {intent_id}: {e}" ) result = None break - except GuardrailError as e: - logger.warning( - f"Output guardrail rejected result for {intent_id}: {e}" - ) - result = None - break - except Exception as e: - logger.exception(f"Output guardrail error: {e}") - result = None - break + except Exception as e: + logger.exception(f"Output guardrail error: {e}") + result = None + break if result and self._config.auto_complete: await self.patch_state(intent_id, result) except Exception as e: logger.exception(f"Assignment handler error: {e}") + def _validate_io_outputs( + self, + intent_id: str, + intent: Any, + result: dict[str, Any], + ) -> bool: + """Validate an agent's result dict against RFC-0024 declared outputs. + + Reads the ``_io_outputs`` schema stored in the intent's state + (populated by the executor via ``to_portfolio_spec``). Raises + ``MissingOutputError`` or ``OutputTypeMismatchError`` on failure. + Returns ``True`` if validation passes or if no schema is declared. + + Type primitives supported: ``string``, ``number``, ``boolean``, + ``object``, ``array``. Named types from the ``_io_types`` block are + validated structurally (key presence + enum membership). + Optional outputs (``required: false``) are allowed to be absent. + """ + from .workflow import MissingOutputError, OutputTypeMismatchError + + try: + try: + state_dict = intent.state.to_dict() + except Exception: + return True # Cannot read state — skip validation + + io_outputs: dict[str, Any] = state_dict.get("_io_outputs") or {} + if not io_outputs: + return True + + # Named type schemas may be stored alongside _io_outputs as + # _io_types if the WorkflowSpec stored them (future extension). + # For now, look up type schemas from _io_types if present. + io_types: dict[str, Any] = state_dict.get("_io_types") or {} + + primitive_type_map: dict[str, type] = { + "string": str, + "number": (int, float), # type: ignore[dict-item] + "boolean": bool, + "object": dict, + "array": list, + } + + missing: list[str] = [] + + for output_key, type_decl in io_outputs.items(): + required = True + expected_type = "any" + + if isinstance(type_decl, dict): + required = type_decl.get("required", True) + expected_type = str(type_decl.get("type", "any")) + elif isinstance(type_decl, str): + expected_type = type_decl + + if output_key not in result: + if required: + missing.append(output_key) + continue + + value = result[output_key] + if expected_type in ("any", ""): + continue + + # Primitive type check + if expected_type in primitive_type_map: + expected_python_type = primitive_type_map[expected_type] + if not isinstance(value, expected_python_type): + raise OutputTypeMismatchError( + task_id=intent_id, + phase_name=getattr(intent, "title", intent_id), + key=output_key, + expected_type=expected_type, + actual_type=type(value).__name__, + ) + continue + + # Named type check from _io_types + type_schema = io_types.get(expected_type) + if type_schema is None: + # Unknown type — accept without validation (incremental adoption) + continue + + # Enum type + if isinstance(type_schema, dict) and "enum" in type_schema: + enum_values = type_schema["enum"] + if isinstance(enum_values, list) and value not in enum_values: + raise OutputTypeMismatchError( + task_id=intent_id, + phase_name=getattr(intent, "title", intent_id), + key=output_key, + expected_type=f"{expected_type}(enum:{enum_values})", + actual_type=repr(value), + ) + continue + + # Struct type — value must be a dict with declared keys + if not isinstance(value, dict): + raise OutputTypeMismatchError( + task_id=intent_id, + phase_name=getattr(intent, "title", intent_id), + key=output_key, + expected_type=expected_type, + actual_type=type(value).__name__, + ) + if isinstance(type_schema, dict): + for schema_key in type_schema: + if schema_key not in value: + raise OutputTypeMismatchError( + task_id=intent_id, + phase_name=getattr(intent, "title", intent_id), + key=output_key, + expected_type=expected_type, + actual_type=f"dict missing required field '{schema_key}'", + ) + + if missing: + raise MissingOutputError( + task_id=intent_id, + phase_name=getattr(intent, "title", intent_id), + missing_keys=missing, + ) + + except (MissingOutputError, OutputTypeMismatchError): + raise + except Exception as e: + logger.debug(f"RFC-0024 output validation error for {intent_id}: {e}") + + return True + async def _on_status_change(self, event: SSEEvent) -> None: """Handle status change events.""" new_status = event.data.get("new_status") @@ -1210,13 +2128,53 @@ async def _on_state_change(self, event: SSEEvent) -> None: logger.exception(f"State change handler error: {e}") async def _on_generic_event(self, event: SSEEvent) -> None: - """Handle generic events via @on_event decorators.""" + """Handle generic events via @on_event decorators and HITL hooks.""" intent_id = event.data.get("intent_id") if not intent_id: return intent = await self.async_client.get_intent(intent_id) + # Route HITL events (RFC-0025) + if event.type in (EventType.INTENT_SUSPENDED, "intent.suspended"): + suspension = SuspensionRecord.from_dict(event.data) + for handler in self._handlers["input_requested"]: + try: + await self._call_handler(handler, intent, suspension) + except Exception as e: + logger.exception(f"on_input_requested handler error: {e}") + return + + if event.type in (EventType.INTENT_RESUMED, "intent.resumed"): + input_response = InputResponse.from_dict(event.data) + for handler in self._handlers["input_received"]: + try: + await self._call_handler(handler, intent, input_response) + except Exception as e: + logger.exception(f"on_input_received handler error: {e}") + return + + if event.type in ( + EventType.INTENT_SUSPENSION_EXPIRED, + "intent.suspension_expired", + ): + suspension = SuspensionRecord.from_dict(event.data) + for handler in self._handlers["suspension_expired"]: + try: + await self._call_handler(handler, intent, suspension) + except Exception as e: + logger.exception(f"on_suspension_expired handler error: {e}") + return + + if event.type in (EventType.ENGAGEMENT_DECISION, "engagement.decision"): + decision = EngagementDecision.from_dict(event.data) + for handler in self._handlers["engagement_decision"]: + try: + await self._call_handler(handler, intent, decision) + except Exception as e: + logger.exception(f"on_engagement_decision handler error: {e}") + return + for handler in self._handlers["event"]: handler_type = getattr(handler, "_openintent_event_type", None) if handler_type == event.type: @@ -1932,11 +2890,33 @@ def visit(spec: IntentSpec): cls._topological_sort = _topological_sort - async def execute(self, spec: PortfolioSpec) -> dict[str, Any]: - """Execute a portfolio and wait for completion.""" + async def execute( + self, + spec: PortfolioSpec, + workflow_spec: Optional[Any] = None, + ) -> dict[str, Any]: + """Execute a portfolio and wait for completion. + + Args: + spec: The portfolio specification to execute. + workflow_spec: Optional ``WorkflowSpec`` for RFC-0024 I/O + contract enforcement. When provided the executor calls + ``validate_claim_inputs`` when an intent becomes claimable + and ``validate_task_outputs`` when it completes. + + Returns: + Merged results from all completed intents. + """ + from .workflow import WorkflowSpec + portfolio = await self.create_portfolio(spec) await self._subscribe_portfolio(portfolio.id) + # RFC-0024: track which intents we have already validated so we + # don't re-validate on every poll cycle. + _claim_validated: set[str] = set() + _completion_validated: set[str] = set() + while True: portfolio_with_intents = await self.async_client.get_portfolio( portfolio.id @@ -1947,6 +2927,76 @@ async def execute(self, spec: PortfolioSpec) -> dict[str, Any]: portfolio_with_intents.intents = intents_list portfolio_with_intents.aggregate_status = aggregate + # RFC-0024: executor-side I/O validation when a WorkflowSpec is + # provided. We resolve phase names from intent titles via the + # name→title mapping stored in the WorkflowSpec. + if isinstance(workflow_spec, WorkflowSpec): + title_to_name = {p.title: p.name for p in workflow_spec.phases} + # Build upstream outputs from all completed intents + upstream_outputs: dict[str, dict[str, Any]] = {} + for intent in intents_list: + if intent.status == IntentStatus.COMPLETED: + phase_name = title_to_name.get(intent.title, intent.title) + try: + upstream_outputs[phase_name] = intent.state.to_dict() + except Exception: + upstream_outputs[phase_name] = {} + + for intent in intents_list: + phase_name = title_to_name.get(intent.title, intent.title) + + # Claim-time validation: validate inputs for intents + # that are PENDING with all deps satisfied. Raises + # UnresolvableInputError / InputWiringError to the + # caller if inputs cannot be resolved — this blocks + # the entire portfolio (RFC-0024 §3.1). + # Note: intent.depends_on contains intent IDs (not + # titles), so we compare against i2.id. + # Intents start in DRAFT and transition to ACTIVE + # when ready; validate claim when still in DRAFT + # with all deps complete. + if ( + intent.id not in _claim_validated + and intent.status == IntentStatus.DRAFT + ): + deps_complete = all( + any( + i2.id == dep and i2.status == IntentStatus.COMPLETED + for i2 in intents_list + ) + for dep in (intent.depends_on or []) + ) + if deps_complete: + # Raises UnresolvableInputError if unresolvable + workflow_spec.validate_claim_inputs( + phase_name=phase_name, + upstream_outputs=upstream_outputs, + task_id=intent.id, + ) + _claim_validated.add(intent.id) + + # Completion-time validation: validate outputs for + # intents that just completed. Raises + # MissingOutputError / OutputTypeMismatchError to the + # caller if outputs violate the declared schema + # (RFC-0024 §3.3). + if ( + intent.id not in _completion_validated + and intent.status == IntentStatus.COMPLETED + ): + agent_output: dict[str, Any] = {} + try: + agent_output = intent.state.to_dict() + except Exception: + pass + # Raises MissingOutputError / OutputTypeMismatchError + workflow_spec.validate_task_outputs( + phase_name=phase_name, + agent_output=agent_output, + task_id=intent.id, + ) + _completion_validated.add(intent.id) + all_complete = all( i.status == IntentStatus.COMPLETED for i in intents_list ) diff --git a/openintent/client.py b/openintent/client.py index 02aa18b..c75a4b7 100644 --- a/openintent/client.py +++ b/openintent/client.py @@ -3682,6 +3682,21 @@ async def get_intent_portfolios(self, intent_id: str) -> list[IntentPortfolio]: items = data if isinstance(data, list) else data.get("portfolios", []) return [IntentPortfolio.from_dict(p) for p in items] + async def get_server_config(self) -> dict[str, Any]: + """RFC-0026: Fetch read-only server configuration for client introspection. + + Returns a dict with ``protocol_version`` and a ``suspension`` key that + contains ``default_retry_policy`` (serialised HumanRetryPolicy dict or + ``None`` when no platform default is configured). + + Example:: + + cfg = await client.get_server_config() + policy_dict = cfg["suspension"]["default_retry_policy"] + """ + response = await self._client.get("/api/v1/server/config") + return self._handle_response(response) + # ==================== Attachments ==================== async def add_attachment( diff --git a/openintent/exceptions.py b/openintent/exceptions.py index ea7ed29..6d238dd 100644 --- a/openintent/exceptions.py +++ b/openintent/exceptions.py @@ -69,3 +69,31 @@ class AuthenticationError(OpenIntentError): """Raised when authentication fails or API key is invalid.""" pass + + +class InputTimeoutError(OpenIntentError): + """Raised when a HITL suspension times out before an operator responds (RFC-0025).""" + + def __init__( + self, + message: str, + suspension_id: Optional[str] = None, + fallback_policy: Optional[str] = None, + **kwargs: Any, + ) -> None: + super().__init__(message, **kwargs) + self.suspension_id = suspension_id + self.fallback_policy = fallback_policy + + +class InputCancelledError(OpenIntentError): + """Raised when a HITL suspension is cancelled before an operator responds (RFC-0025).""" + + def __init__( + self, + message: str, + suspension_id: Optional[str] = None, + **kwargs: Any, + ) -> None: + super().__init__(message, **kwargs) + self.suspension_id = suspension_id diff --git a/openintent/models.py b/openintent/models.py index 36f8d27..43d995f 100644 --- a/openintent/models.py +++ b/openintent/models.py @@ -14,6 +14,7 @@ class IntentStatus(str, Enum): DRAFT = "draft" ACTIVE = "active" BLOCKED = "blocked" + SUSPENDED_AWAITING_INPUT = "suspended_awaiting_input" COMPLETED = "completed" ABANDONED = "abandoned" @@ -188,6 +189,18 @@ class EventType(str, Enum): FEDERATION_COMPLETED = "federation.completed" FEDERATION_FAILED = "federation.failed" + # HITL events (RFC-0025) + INTENT_SUSPENDED = "intent.suspended" + INTENT_RESUMED = "intent.resumed" + INTENT_SUSPENSION_EXPIRED = "intent.suspension_expired" + ENGAGEMENT_DECISION = "engagement.decision" + + # RFC-0026: Suspension container interaction & human retry + INTENT_SUSPENSION_RENOTIFIED = "intent.suspension_renotified" + INTENT_SUSPENSION_ESCALATED = "intent.suspension_escalated" + PORTFOLIO_MEMBER_SUSPENDED = "portfolio.member_suspended" + PORTFOLIO_MEMBER_RESUMED = "portfolio.member_resumed" + # Legacy aliases for backward compatibility CREATED = "intent_created" STATE_UPDATED = "state_patched" @@ -1956,6 +1969,11 @@ class IntentContext: What you see depends on your permission level. The SDK automatically filters context based on the agent's access. + + RFC-0024 addition: ``input`` is pre-populated by the executor from resolved + upstream phase outputs before the agent handler is called. Agents should + read from ``intent.ctx.input`` rather than reaching into + ``intent.ctx.dependencies`` directly. """ parent: Optional[Intent] = None @@ -1966,6 +1984,10 @@ class IntentContext: attachments: list[IntentAttachment] = field(default_factory=list) peers: list[PeerInfo] = field(default_factory=list) delegated_by: Optional[str] = None + # RFC-0024: executor-wired task inputs. Pre-populated from upstream phase + # outputs by the executor before the agent handler is called. Read-only + # from the agent's perspective. + input: dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict[str, Any]: result: dict[str, Any] = { @@ -1982,6 +2004,10 @@ def to_dict(self) -> dict[str, Any]: result["my_permission"] = self.my_permission.value if self.delegated_by: result["delegated_by"] = self.delegated_by + # RFC-0024: include executor-wired input so serialized context + # round-trips correctly without losing pre-populated values. + if self.input: + result["input"] = self.input return result @classmethod @@ -2003,6 +2029,8 @@ def from_dict(cls, data: dict[str, Any]) -> "IntentContext": attachments=attachments, peers=peers, delegated_by=data.get("delegated_by"), + # RFC-0024: restore executor-wired input from serialized context + input=data.get("input", {}), ) @@ -4035,3 +4063,409 @@ def from_dict(cls, data: dict[str, Any]) -> "ChannelMessage": expires_at=expires_at, read_at=read_at, ) + + +# --------------------------------------------------------------------------- +# RFC-0025: Human-in-the-Loop Intent Suspension +# --------------------------------------------------------------------------- + + +class ResponseType(str, Enum): + """Type of response expected from the operator (RFC-0025). + + Values: + CHOICE: Operator must select one of the predefined choices. + CONFIRM: Binary yes/no confirmation. + TEXT: Free-form text input. + FORM: Structured key/value form (context keys define the fields). + """ + + CHOICE = "choice" + CONFIRM = "confirm" + TEXT = "text" + FORM = "form" + + +@dataclass +class SuspensionChoice: + """A single selectable choice presented to the operator (RFC-0025). + + Fields: + value: The machine-readable value returned to the agent when selected. + label: Human-readable label displayed to the operator. + description: Optional longer description providing additional context. + style: Optional visual hint for the channel UI (e.g. "primary", + "danger", "default"). + metadata: Arbitrary extra data attached to this choice. + """ + + value: str + label: str + description: str = "" + style: str = "default" + metadata: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = { + "value": self.value, + "label": self.label, + } + if self.description: + result["description"] = self.description + if self.style != "default": + result["style"] = self.style + if self.metadata: + result["metadata"] = self.metadata + return result + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "SuspensionChoice": + return cls( + value=data.get("value", ""), + label=data.get("label", ""), + description=data.get("description", ""), + style=data.get("style", "default"), + metadata=data.get("metadata", {}), + ) + + +@dataclass +class SuspensionRecord: + """A suspension record capturing the full context of an intent suspension (RFC-0025/RFC-0026). + + Fields: + id: Unique identifier for this suspension record. + question: The specific question or prompt presented to the operator. + response_type: Expected response type (choice, confirm, text, form). + choices: Available choices when response_type is "choice" or "confirm". + For "confirm", auto-populated with yes/no if not supplied. + context: Structured context dict to help the operator decide. + channel_hint: Preferred delivery channel (e.g. "slack", "email"). + suspended_at: ISO-8601 timestamp when the intent was suspended. + timeout_seconds: Per-attempt expiry window (None = no timeout). + When retry_policy is set, this is per-attempt; total expiry is + retry_policy.interval_seconds * retry_policy.max_attempts. + expires_at: Total deadline. When retry_policy is set: + suspended_at + (interval_seconds × max_attempts). + When retry_policy is absent: suspended_at + timeout_seconds. + fallback_value: Value to use if fallback_policy is "complete_with_fallback". + fallback_policy: One of "fail", "complete_with_fallback", "use_default_and_continue". + Alias for retry_policy.final_fallback_policy when retry_policy is set. + retry_policy: Optional RFC-0026 HumanRetryPolicy for re-notification & escalation. + confidence_at_suspension: Agent confidence score at time of suspension (0.0–1.0). + decision_record: Optional dict capturing the engagement decision rationale. + response: The operator's response value (populated on resume). + responded_at: ISO-8601 timestamp when the operator responded. + resolution: Final resolution: "responded", "expired", or "cancelled". + """ + + id: str + question: str + response_type: str = ResponseType.CHOICE.value + choices: list["SuspensionChoice"] = field(default_factory=list) + context: dict[str, Any] = field(default_factory=dict) + channel_hint: Optional[str] = None + suspended_at: Optional[datetime] = None + timeout_seconds: Optional[int] = None + expires_at: Optional[datetime] = None + fallback_value: Optional[Any] = None + fallback_policy: str = "fail" + retry_policy: Optional["HumanRetryPolicy"] = None + confidence_at_suspension: Optional[float] = None + decision_record: Optional[dict[str, Any]] = None + response: Optional[Any] = None + responded_at: Optional[datetime] = None + resolution: Optional[str] = None + + def valid_values(self) -> list[str] | None: + if self.response_type in ( + ResponseType.CHOICE.value, + ResponseType.CONFIRM.value, + ): + if self.choices: + return [c.value for c in self.choices] + return None + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = { + "id": self.id, + "question": self.question, + "response_type": self.response_type, + "context": self.context, + "fallback_policy": self.fallback_policy, + } + if self.choices: + result["choices"] = [c.to_dict() for c in self.choices] + if self.channel_hint is not None: + result["channel_hint"] = self.channel_hint + if self.suspended_at is not None: + result["suspended_at"] = self.suspended_at.isoformat() + if self.timeout_seconds is not None: + result["timeout_seconds"] = self.timeout_seconds + if self.expires_at is not None: + result["expires_at"] = self.expires_at.isoformat() + if self.fallback_value is not None: + result["fallback_value"] = self.fallback_value + if self.retry_policy is not None: + result["retry_policy"] = self.retry_policy.to_dict() + if self.confidence_at_suspension is not None: + result["confidence_at_suspension"] = self.confidence_at_suspension + if self.decision_record is not None: + result["decision_record"] = self.decision_record + if self.response is not None: + result["response"] = self.response + if self.responded_at is not None: + result["responded_at"] = self.responded_at.isoformat() + if self.resolution is not None: + result["resolution"] = self.resolution + return result + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "SuspensionRecord": + suspended_at = None + if data.get("suspended_at"): + suspended_at = datetime.fromisoformat(data["suspended_at"]) + expires_at = None + if data.get("expires_at"): + expires_at = datetime.fromisoformat(data["expires_at"]) + responded_at = None + if data.get("responded_at"): + responded_at = datetime.fromisoformat(data["responded_at"]) + choices_raw = data.get("choices", []) + choices = [SuspensionChoice.from_dict(c) for c in choices_raw] + retry_policy = None + if data.get("retry_policy"): + retry_policy = HumanRetryPolicy.from_dict(data["retry_policy"]) + return cls( + id=data.get("id", ""), + question=data.get("question", ""), + response_type=data.get("response_type", ResponseType.CHOICE.value), + choices=choices, + context=data.get("context", {}), + channel_hint=data.get("channel_hint"), + suspended_at=suspended_at, + timeout_seconds=data.get("timeout_seconds"), + expires_at=expires_at, + fallback_value=data.get("fallback_value"), + fallback_policy=data.get("fallback_policy", "fail"), + retry_policy=retry_policy, + confidence_at_suspension=data.get("confidence_at_suspension"), + decision_record=data.get("decision_record"), + response=data.get("response"), + responded_at=responded_at, + resolution=data.get("resolution"), + ) + + +@dataclass +class EngagementSignals: + """Signals that inform the engagement decision for HITL (RFC-0025). + + Fields: + confidence: Agent's confidence in its autonomous answer (0.0–1.0). + risk: Estimated risk of acting autonomously (0.0–1.0). + reversibility: How reversible the action is (0.0=irreversible, 1.0=fully reversible). + context: Additional key/value context for the decision engine. + """ + + confidence: float = 1.0 + risk: float = 0.0 + reversibility: float = 1.0 + context: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return { + "confidence": self.confidence, + "risk": self.risk, + "reversibility": self.reversibility, + "context": self.context, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "EngagementSignals": + return cls( + confidence=data.get("confidence", 1.0), + risk=data.get("risk", 0.0), + reversibility=data.get("reversibility", 1.0), + context=data.get("context", {}), + ) + + +@dataclass +class EngagementDecision: + """The output of should_request_input() (RFC-0025). + + Fields: + mode: One of "autonomous", "request_input", "require_input", "defer". + should_ask: True if the agent should call request_input(). + rationale: Human-readable explanation of the decision. + signals: The EngagementSignals that drove this decision. + """ + + mode: str + should_ask: bool + rationale: str = "" + signals: Optional[EngagementSignals] = None + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = { + "mode": self.mode, + "should_ask": self.should_ask, + "rationale": self.rationale, + } + if self.signals is not None: + result["signals"] = self.signals.to_dict() + return result + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "EngagementDecision": + signals = None + if data.get("signals"): + signals = EngagementSignals.from_dict(data["signals"]) + return cls( + mode=data.get("mode", "autonomous"), + should_ask=data.get("should_ask", False), + rationale=data.get("rationale", ""), + signals=signals, + ) + + +@dataclass +class InputResponse: + """The operator's response to a HITL suspension (RFC-0025). + + Fields: + suspension_id: ID of the SuspensionRecord this responds to. + value: The operator's answer or decision value. + responded_by: Identifier of the operator who responded. + responded_at: Timestamp of the response. + metadata: Optional additional metadata from the channel. + """ + + suspension_id: str + value: Any + responded_by: str = "" + responded_at: Optional[datetime] = None + metadata: Optional[dict[str, Any]] = None + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = { + "suspension_id": self.suspension_id, + "value": self.value, + "responded_by": self.responded_by, + } + if self.responded_at is not None: + result["responded_at"] = self.responded_at.isoformat() + if self.metadata is not None: + result["metadata"] = self.metadata + return result + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "InputResponse": + responded_at = None + if data.get("responded_at"): + responded_at = datetime.fromisoformat(data["responded_at"]) + return cls( + suspension_id=data.get("suspension_id", ""), + value=data.get("value"), + responded_by=data.get("responded_by", ""), + responded_at=responded_at, + metadata=data.get("metadata"), + ) + + +@dataclass +class EscalationStep: + """A single step in a HumanRetryPolicy escalation ladder (RFC-0026). + + Fields: + attempt: Trigger this escalation at this attempt number (RFC-0026 field name). + Alias ``after_attempt`` is accepted on deserialisation for backwards compatibility. + channel_hint: Delivery channel to use at this step (e.g. "pagerduty", "email"). + Alias ``channel`` is accepted on deserialisation for backwards compatibility. + notify_to: Identifier of the human or group to notify at this step. + Alias ``notify`` is accepted on deserialisation for backwards compatibility. + """ + + attempt: int + channel_hint: str = "" + notify_to: str = "" + + @property + def after_attempt(self) -> int: + """Backwards-compatible alias for ``attempt``.""" + return self.attempt + + @property + def channel(self) -> str: + """Backwards-compatible alias for ``channel_hint``.""" + return self.channel_hint + + @property + def notify(self) -> str: + """Backwards-compatible alias for ``notify_to``.""" + return self.notify_to + + def to_dict(self) -> dict[str, Any]: + return { + "attempt": self.attempt, + "channel_hint": self.channel_hint, + "notify_to": self.notify_to, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "EscalationStep": + attempt = data.get("attempt") or data.get("after_attempt", 1) + channel_hint = data.get("channel_hint") or data.get("channel", "") + notify_to = data.get("notify_to") or data.get("notify", "") + return cls( + attempt=attempt, + channel_hint=channel_hint, + notify_to=notify_to, + ) + + +@dataclass +class HumanRetryPolicy: + """Re-notification and escalation policy for suspended intents (RFC-0026). + + When attached to a SuspensionRecord, the server will re-notify the operator + up to `max_attempts` times, waiting `interval_seconds` between each attempt. + After all attempts are exhausted, `final_fallback_policy` is applied. + + Fields: + max_attempts: Maximum number of notification attempts (including first). Default 3. + interval_seconds: Seconds to wait between re-notification attempts. Default 3600. + strategy: Back-off strategy — "fixed" | "linear" | "exponential". Default "fixed". + escalation_ladder: Ordered list of escalation steps triggered at specific attempts. + final_fallback_policy: Fallback policy after all attempts exhausted. + One of "fail", "complete_with_fallback", "use_default_and_continue". + """ + + max_attempts: int = 3 + interval_seconds: int = 3600 + strategy: str = "fixed" + escalation_ladder: list[EscalationStep] = field(default_factory=list) + final_fallback_policy: str = "fail" + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = { + "max_attempts": self.max_attempts, + "interval_seconds": self.interval_seconds, + "strategy": self.strategy, + "final_fallback_policy": self.final_fallback_policy, + } + if self.escalation_ladder: + result["escalation_ladder"] = [s.to_dict() for s in self.escalation_ladder] + return result + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "HumanRetryPolicy": + ladder_raw = data.get("escalation_ladder", []) + return cls( + max_attempts=data.get("max_attempts", 3), + interval_seconds=data.get("interval_seconds", 3600), + strategy=data.get("strategy", "fixed"), + escalation_ladder=[EscalationStep.from_dict(s) for s in ladder_raw], + final_fallback_policy=data.get("final_fallback_policy", "fail"), + ) diff --git a/openintent/server/app.py b/openintent/server/app.py index 6a45b2f..e47c8b0 100644 --- a/openintent/server/app.py +++ b/openintent/server/app.py @@ -1171,6 +1171,192 @@ def _enforce_agent_allowlist( }, ) + def _rfc0024_validate_io_outputs(db: Database, session, intent, actor: str) -> None: + """RFC-0024 §3.3 – server-side output validation before COMPLETED transition. + + Reads ``_io_outputs`` and ``_io_types`` from the intent's current state + and validates the state against the declared output schema. Raises + HTTP 422 if required keys are missing or types mismatch. + """ + state: Dict[str, Any] = dict(intent.state) if intent.state else {} + io_outputs: Dict[str, Any] = state.get("_io_outputs") or {} + if not io_outputs: + return + + io_types: Dict[str, Any] = state.get("_io_types") or {} + + primitive_type_map: Dict[str, type] = { + "string": str, + "number": (int, float), + "boolean": bool, + "object": dict, + "array": list, + } + + missing: List[str] = [] + type_errors: List[str] = [] + + for output_key, type_decl in io_outputs.items(): + required = True + expected_type = "any" + + if isinstance(type_decl, dict): + required = type_decl.get("required", True) + expected_type = str(type_decl.get("type", "any")) + elif isinstance(type_decl, str): + expected_type = type_decl + + if output_key not in state: + if required: + missing.append(output_key) + continue + + value = state[output_key] + if expected_type in ("any", ""): + continue + + if expected_type in primitive_type_map: + expected_python_type = primitive_type_map[expected_type] + if not isinstance(value, expected_python_type): + type_errors.append( + f"'{output_key}': expected {expected_type!r}, " + f"got {type(value).__name__!r}" + ) + continue + + # Named type from io_types block + type_schema = io_types.get(expected_type) + if type_schema is None: + continue # Unknown named type — accept (incremental adoption) + + if isinstance(type_schema, dict) and "enum" in type_schema: + enum_values = type_schema["enum"] + if isinstance(enum_values, list) and value not in enum_values: + type_errors.append( + f"'{output_key}': expected {expected_type!r} enum " + f"{enum_values!r}, got {value!r}" + ) + elif not isinstance(value, dict): + type_errors.append( + f"'{output_key}': expected {expected_type!r} (object), " + f"got {type(value).__name__!r}" + ) + elif isinstance(type_schema, dict): + for schema_key in type_schema: + if schema_key not in value: + type_errors.append( + f"'{output_key}': named type {expected_type!r} " + f"missing required field '{schema_key}'" + ) + + if missing or type_errors: + db.create_event( + session, + intent_id=intent.id, + event_type="governance.violation", + actor=actor, + payload={ + "rule": "rfc0024_io_outputs", + "missing": missing, + "type_errors": type_errors, + }, + ) + detail_parts: List[str] = [] + if missing: + detail_parts.append(f"missing required outputs: {missing}") + if type_errors: + detail_parts.append(f"type mismatches: {type_errors}") + error_type = "MissingOutputError" if missing else "OutputTypeMismatchError" + raise HTTPException( + status_code=422, + detail={ + "error": error_type, + "rule": "io_outputs", + "message": ( + "RFC-0024: Intent output does not satisfy declared " + "output schema. " + "; ".join(detail_parts) + ), + "missing": missing, + "type_errors": type_errors, + }, + ) + + def _rfc0024_validate_claim_inputs( + db: Database, session, intent, actor: str + ) -> None: + """RFC-0024 §3.1 – server-side claim-time input validation. + + Checks that all declared inputs in ``_io_inputs`` can be resolved from + completed upstream dependency states. Raises HTTP 422 if any declared + input mapping cannot be resolved. + """ + state: Dict[str, Any] = dict(intent.state) if intent.state else {} + io_inputs: Dict[str, Any] = state.get("_io_inputs") or {} + if not io_inputs: + return + + dep_title_to_name: Dict[str, str] = state.get("_io_dep_title_to_name") or {} + + # Build upstream outputs by fetching completed dependencies + upstream_outputs: Dict[str, Dict[str, Any]] = {} + for dep_id in intent.depends_on or []: + dep = db.get_intent(session, dep_id) + if dep and dep.status == "completed": + dep_state = dict(dep.state) if dep.state else {} + # Index by phase name (via dep_title_to_name) and by title + dep_name = dep_title_to_name.get(dep.title, dep.title) + upstream_outputs[dep_name] = dep_state + upstream_outputs[dep.title] = dep_state + + unresolvable: List[str] = [] + for local_key, mapping_expr in io_inputs.items(): + if not isinstance(mapping_expr, str): + unresolvable.append(f"{local_key}: {mapping_expr!r}") + continue + + if mapping_expr.startswith("$trigger."): + key = mapping_expr[len("$trigger.") :] + if key not in state: + unresolvable.append(mapping_expr) + elif mapping_expr.startswith("$initial_state."): + key = mapping_expr[len("$initial_state.") :] + if key not in state: + unresolvable.append(mapping_expr) + else: + parts = mapping_expr.split(".", 1) + if len(parts) != 2: + unresolvable.append(mapping_expr) + continue + ref_phase, ref_key = parts[0], parts[1] + phase_out = upstream_outputs.get(ref_phase, {}) + if ref_key not in phase_out: + unresolvable.append(mapping_expr) + + if unresolvable: + db.create_event( + session, + intent_id=intent.id, + event_type="governance.violation", + actor=actor, + payload={ + "rule": "rfc0024_io_inputs", + "unresolvable": unresolvable, + }, + ) + raise HTTPException( + status_code=422, + detail={ + "error": "UnresolvableInputError", + "rule": "io_inputs", + "message": ( + "RFC-0024: Cannot resolve declared input mappings at " + "claim time — upstream outputs are missing or dependencies " + "are not yet complete. Unresolvable: " + str(unresolvable) + ), + "unresolvable": unresolvable, + }, + ) + @app.get("/.well-known/openintent.json") async def discovery(): return { @@ -1210,6 +1396,20 @@ async def discovery(): "openApiUrl": "/openapi.json", } + @app.get("/api/v1/server/config") + async def get_server_config(): + """RFC-0026: Read-only introspection endpoint for platform-level server config. + + Returns the platform-level suspension default retry policy (if configured) so + that clients can implement the three-level cascade without hard-coding defaults. + """ + return { + "protocol_version": config.protocol_version, + "suspension": { + "default_retry_policy": config.suspension_default_retry_policy, + }, + } + @app.get("/.well-known/openintent-compat.json") async def compatibility(): return { @@ -1809,6 +2009,8 @@ async def update_status( if request.status == "completed": _enforce_completion_gate(db, session, intent, api_key) + # RFC-0024: validate declared output schema before transition + _rfc0024_validate_io_outputs(db, session, intent, api_key) updated = db.update_intent_status( session, intent_id, if_match, request.status @@ -1846,6 +2048,180 @@ async def update_status( finally: session.close() + @app.post("/api/v1/intents/{intent_id}/suspend/respond") + async def respond_to_suspension( + intent_id: str, + request: Request, + db: Database = Depends(get_db), + api_key: str = Depends(validate_api_key), + ): + """ + RFC-0025: Respond to a suspended intent's input request. + + Body: + suspension_id: str — ID of the SuspensionRecord being answered. + value: Any — the operator's response value. + responded_by: str (optional) — identifier of the responding operator. + metadata: dict (optional) — additional channel metadata. + + Validates: + - Intent is in ``suspended_awaiting_input`` status. + - ``suspension_id`` matches the active suspension on the intent. + - If the suspension defines ``choices``, ``value`` must be one of + the defined choice values. + + Transitions the intent from suspended_awaiting_input → active, persists + the response in state, and broadcasts intent.resumed. The response + body includes the matching choice's label and description (if present) + so callers receive structured feedback. + """ + from datetime import datetime + + body = await request.json() + suspension_id = body.get("suspension_id", "") + value = body.get("value") + responded_by = body.get("responded_by", api_key or "operator") + metadata = body.get("metadata") + + if not suspension_id: + raise HTTPException( + status_code=422, + detail="suspension_id is required", + ) + + session = db.get_session() + try: + intent = db.get_intent(session, intent_id) + if not intent: + raise HTTPException(status_code=404, detail="Intent not found") + + if intent.status != "suspended_awaiting_input": + raise HTTPException( + status_code=409, + detail=f"Intent is not suspended (status={intent.status})", + ) + + current_state = intent.state or {} + susp_data = current_state.get("_suspension", {}) + + active_susp_id = susp_data.get("id") + if active_susp_id and active_susp_id != suspension_id: + raise HTTPException( + status_code=409, + detail=( + f"suspension_id mismatch: expected {active_susp_id}, " + f"got {suspension_id}" + ), + ) + + choices_raw = susp_data.get("choices", []) + response_type = susp_data.get("response_type", "choice") + valid_response_types = ("choice", "confirm", "text", "form") + if response_type not in valid_response_types: + raise HTTPException( + status_code=422, + detail=( + f"Unknown response_type {response_type!r}. " + f"Expected one of: {list(valid_response_types)}" + ), + ) + + matched_choice = None + if response_type in ("choice", "confirm"): + if choices_raw: + valid_vals = [c.get("value") for c in choices_raw] + elif response_type == "confirm": + valid_vals = ["yes", "no"] + else: + valid_vals = None + + if valid_vals is not None and value not in valid_vals: + raise HTTPException( + status_code=422, + detail={ + "error": "invalid_choice", + "message": ( + f"Value {value!r} is not a valid choice. " + f"Expected one of: {valid_vals}" + ), + "valid_choices": choices_raw + or [{"value": v, "label": v.title()} for v in valid_vals], + }, + ) + if choices_raw: + matched_choice = next( + (c for c in choices_raw if c.get("value") == value), None + ) + + now_str = datetime.utcnow().isoformat() + + susp_data["response"] = value + susp_data["responded_by"] = responded_by + susp_data["responded_at"] = now_str + susp_data["resolution"] = "responded" + if metadata: + susp_data["metadata"] = metadata + current_state["_suspension"] = susp_data + + susp_patch = [{"op": "set", "path": "/_suspension", "value": susp_data}] + updated_state = db.update_intent_state( + session, intent_id, intent.version, susp_patch + ) + if not updated_state: + raise HTTPException(status_code=409, detail="Version conflict") + + resumed = db.update_intent_status( + session, intent_id, updated_state.version, "active" + ) + if not resumed: + raise HTTPException( + status_code=409, detail="Version conflict on resume" + ) + + db.create_event( + session, + intent_id=intent_id, + event_type="intent.resumed", + actor=responded_by, + payload={ + "intent_id": intent_id, + "suspension_id": suspension_id or active_susp_id, + "value": value, + "responded_by": responded_by, + "responded_at": now_str, + }, + ) + + _broadcast_event( + "intents", + { + "type": "intent.resumed", + "intent_id": intent_id, + "data": { + "intent_id": intent_id, + "suspension_id": suspension_id or active_susp_id, + "value": value, + "responded_by": responded_by, + "responded_at": now_str, + }, + }, + ) + + result: dict = { + "intent_id": intent_id, + "suspension_id": suspension_id or active_susp_id, + "resolution": "responded", + "value": value, + "responded_by": responded_by, + "responded_at": now_str, + } + if matched_choice: + result["choice_label"] = matched_choice.get("label", "") + result["choice_description"] = matched_choice.get("description", "") + return result + finally: + session.close() + @app.get("/api/v1/intents/{intent_id}/events", response_model=List[EventResponse]) async def get_events( intent_id: str, @@ -1982,6 +2358,12 @@ async def acquire_lease( if not intent: raise HTTPException(status_code=404, detail="Intent not found") + # RFC-0024 §3.1: validate that all declared input mappings are + # resolvable from completed upstream dependency states at the + # true claim boundary (lease acquisition). This is the earliest + # point where the executor can guarantee inputs are available. + _rfc0024_validate_claim_inputs(db, session, intent, api_key) + lease = db.acquire_lease( session, intent_id=intent_id, diff --git a/openintent/server/config.py b/openintent/server/config.py index ce6daf6..dd02631 100644 --- a/openintent/server/config.py +++ b/openintent/server/config.py @@ -4,7 +4,7 @@ import os from dataclasses import dataclass, field -from typing import Optional, Set +from typing import Any, Optional, Set @dataclass @@ -32,6 +32,14 @@ class ServerConfig: protocol_version: str = "0.1" + suspension_default_retry_policy: Optional[dict[str, Any]] = None + """RFC-0026: platform-level default HumanRetryPolicy (serialised dict). + + When set, agents that have neither a call-site ``retry_policy`` argument nor a + ``default_human_retry_policy`` class attribute will inherit this policy for + every ``request_input()`` call. Exposed read-only via ``GET /api/v1/server/config``. + """ + def __post_init__(self): if self.database_url is None: self.database_url = os.environ.get( diff --git a/openintent/workflow.py b/openintent/workflow.py index 49b150c..1d85ca9 100644 --- a/openintent/workflow.py +++ b/openintent/workflow.py @@ -55,6 +55,144 @@ class WorkflowNotFoundError(WorkflowError): pass +# --------------------------------------------------------------------------- +# RFC-0024: Workflow I/O Contract Errors +# --------------------------------------------------------------------------- + + +class MissingOutputError(WorkflowError): + """Raised when a task completion is rejected because one or more declared + output keys are absent from the agent's returned dict. + + Attributes: + task_id: The ID of the task whose completion was rejected. + phase_name: The name of the phase definition. + missing_keys: The declared output keys that were not returned. + """ + + def __init__(self, task_id: str, phase_name: str, missing_keys: list[str]): + self.task_id = task_id + self.phase_name = phase_name + self.missing_keys = missing_keys + keys = ", ".join(repr(k) for k in missing_keys) + super().__init__( + f"Task completion rejected for task '{task_id}' (phase '{phase_name}'): " + f"declared output key(s) {keys} were not present in agent return value" + ) + + +class OutputTypeMismatchError(WorkflowError): + """Raised when a returned output key's value does not match the declared type. + + No type coercion is performed — the executor validates and rejects only. + + Attributes: + task_id: The ID of the task whose completion was rejected. + phase_name: The name of the phase definition. + key: The output key with the type mismatch. + expected_type: The type declared in the workflow definition. + actual_type: The Python type name of the value returned by the agent. + """ + + def __init__( + self, + task_id: str, + phase_name: str, + key: str, + expected_type: str, + actual_type: str, + ): + self.task_id = task_id + self.phase_name = phase_name + self.key = key + self.expected_type = expected_type + self.actual_type = actual_type + super().__init__( + f"Task completion rejected for task '{task_id}' (phase '{phase_name}'): " + f"output key '{key}' expected type '{expected_type}' " + f"but got '{actual_type}'" + ) + + +class UnresolvableInputError(WorkflowError): + """Raised at claim time when one or more declared inputs cannot be resolved + from completed upstream task outputs. + + Attributes: + task_id: The ID of the task whose claim was rejected. + phase_name: The name of the phase definition. + unresolvable_refs: Input mapping expressions that could not be resolved + (e.g. ["research.findings"]). + """ + + def __init__(self, task_id: str, phase_name: str, unresolvable_refs: list[str]): + self.task_id = task_id + self.phase_name = phase_name + self.unresolvable_refs = unresolvable_refs + refs = ", ".join(repr(r) for r in unresolvable_refs) + super().__init__( + f"Task claim rejected for task '{task_id}' (phase '{phase_name}'): " + f"input reference(s) {refs} could not be resolved from upstream outputs" + ) + + +class UpstreamIntentSuspendedError(WorkflowError): + """Raised at claim time when a declared input references an upstream phase + whose intent is currently ``suspended_awaiting_input``. + + Per RFC-0026 §4, an agent MUST NOT proceed with task execution while an + upstream producer intent is suspended — the executor should defer the claim + until the upstream intent resumes. + + Attributes: + task_id: The ID of the task whose claim was rejected. + phase_name: The name of the phase definition. + suspended_intent_id: The intent ID of the upstream suspended producer. + expected_resume_at: ISO-8601 string estimate of when the upstream intent + will resume, or None if unknown. + """ + + def __init__( + self, + task_id: str, + phase_name: str, + suspended_intent_id: str, + expected_resume_at: Optional[str] = None, + ): + self.task_id = task_id + self.phase_name = phase_name + self.suspended_intent_id = suspended_intent_id + self.expected_resume_at = expected_resume_at + msg = ( + f"Task claim deferred for task '{task_id}' (phase '{phase_name}'): " + f"upstream intent '{suspended_intent_id}' is suspended_awaiting_input" + ) + if expected_resume_at: + msg = f"{msg} (expected resume: {expected_resume_at})" + super().__init__(msg) + + +class InputWiringError(WorkflowValidationError): + """Raised at workflow validation time when an inputs declaration is + structurally invalid — e.g. referencing a phase not in depends_on, + referencing a non-existent phase, or using malformed mapping syntax. + + Attributes: + phase_name: The phase with the invalid inputs declaration. + invalid_refs: The malformed or invalid mapping expressions. + """ + + def __init__(self, phase_name: str, invalid_refs: list[str], suggestion: str = ""): + self.phase_name = phase_name + self.invalid_refs = invalid_refs + refs = ", ".join(repr(r) for r in invalid_refs) + super().__init__( + f"Phase '{phase_name}' has invalid input wiring: {refs}", + path=f"workflow.{phase_name}.inputs", + suggestion=suggestion, + ) + + class PermissionLevel(str, Enum): READ = "read" WRITE = "write" @@ -181,9 +319,14 @@ class PhaseConfig: # RFC-0011: Unified permissions permissions: Optional[PermissionsConfig] = None - # Inputs/outputs for interpolation + # RFC-0024: I/O contracts + # inputs: mapping from local key name -> upstream reference + # e.g. {"revenue": "fetch_financials.revenue", "q": "$trigger.quarter"} + # outputs: mapping from output key name -> type declaration + # e.g. {"revenue": "number", "findings": "Finding", + # "warnings": {"type": "array", "required": False}} inputs: dict[str, str] = field(default_factory=dict) - outputs: list[str] = field(default_factory=list) + outputs: dict[str, Any] = field(default_factory=dict) # Conditional skip_when: Optional[str] = None @@ -517,6 +660,15 @@ def _parse(cls, data: dict, source_path: Path) -> "WorkflowSpec": else None ) # noqa: E501 + # RFC-0024: outputs may be a legacy list[str] or new dict form. + # Normalise to dict[str, Any] so the rest of the code is uniform. + raw_outputs = phase_data.get("outputs", {}) + if isinstance(raw_outputs, list): + # Legacy form: ["key1", "key2"] -> {"key1": "any", "key2": "any"} + raw_outputs = {k: "any" for k in raw_outputs} + elif not isinstance(raw_outputs, dict): + raw_outputs = {} + phase = PhaseConfig( name=phase_name, title=title, @@ -531,7 +683,7 @@ def _parse(cls, data: dict, source_path: Path) -> "WorkflowSpec": attachments=phase_data.get("attachments"), permissions=permissions, inputs=phase_data.get("inputs", {}), - outputs=phase_data.get("outputs", []), + outputs=raw_outputs, skip_when=phase_data.get("skip_when"), ) phases.append(phase) @@ -604,6 +756,101 @@ def _validate(self) -> None: # Check for circular dependencies self._check_circular_deps() + # RFC-0024: Validate input wiring declarations + self._validate_io_wiring() + + def _validate_io_wiring(self) -> None: + """Validate RFC-0024 input/output wiring declarations at parse time. + + Checks performed: + 1. Every phase reference in an input mapping (``phase.key``) names a + phase that exists in the workflow. + 2. Every such reference names a phase that appears in this phase's + ``depends_on`` list. + 3. If the referenced upstream phase declares ``outputs``, the key must + appear there (incremental adoption: skip if upstream has no outputs). + 4. Input mapping syntax is valid (``phase.key``, ``$trigger.key``, or + ``$initial_state.key``). + """ + phase_map: dict[str, PhaseConfig] = {p.name: p for p in self.phases} + # Also build a title -> name map for depends_on that use titles + title_to_name: dict[str, str] = {p.title: p.name for p in self.phases} + + for phase in self.phases: + if not phase.inputs: + continue + + # Resolve depends_on to canonical phase names + resolved_deps: set[str] = set() + for dep in phase.depends_on: + if dep in phase_map: + resolved_deps.add(dep) + elif dep in title_to_name: + resolved_deps.add(title_to_name[dep]) + + invalid_refs: list[str] = [] + + for local_key, mapping_expr in phase.inputs.items(): + if not isinstance(mapping_expr, str): + invalid_refs.append( + f"{local_key}: {mapping_expr!r} (must be a string)" + ) + continue + + # Static references are valid by definition at parse time + if mapping_expr.startswith("$trigger.") or mapping_expr.startswith( + "$initial_state." + ): + continue + + # Must be "phase_name.key" + parts = mapping_expr.split(".", 1) + if len(parts) != 2 or not parts[0] or not parts[1]: + invalid_refs.append( + f"{local_key}: {mapping_expr!r} " + f"(invalid syntax; expected 'phase_name.key' or " + f"'$trigger.key' or '$initial_state.key')" + ) + continue + + ref_phase_name, ref_key = parts[0], parts[1] + + # Check phase exists + if ref_phase_name not in phase_map: + invalid_refs.append( + f"{local_key}: {mapping_expr!r} " + f"(phase '{ref_phase_name}' does not exist)" + ) + continue + + # Check phase is in depends_on + if ref_phase_name not in resolved_deps: + invalid_refs.append( + f"{local_key}: {mapping_expr!r} " + f"(phase '{ref_phase_name}' is not in depends_on)" + ) + continue + + # Check upstream output key exists if upstream declares outputs + upstream = phase_map[ref_phase_name] + if upstream.outputs and ref_key not in upstream.outputs: + invalid_refs.append( + f"{local_key}: {mapping_expr!r} " + f"(upstream phase '{ref_phase_name}' does not declare " + f"output key '{ref_key}')" + ) + + if invalid_refs: + raise InputWiringError( + phase_name=phase.name, + invalid_refs=invalid_refs, + suggestion=( + "Input mappings must use the form 'phase_name.key' " + "where phase_name appears in depends_on, or " + "'$trigger.key' / '$initial_state.key' for static values." + ), + ) + def _check_circular_deps(self) -> None: """Check for circular dependencies in the workflow.""" # Build dependency graph @@ -667,6 +914,31 @@ def to_portfolio_spec(self) -> "PortfolioSpec": if phase.cost_tracking: initial_state["cost_tracking"] = phase.cost_tracking + # RFC-0024: persist I/O contract declarations in the intent's + # initial_state so that the running agent can resolve ctx.input + # and validate outputs at completion time without needing a + # direct reference to the WorkflowSpec. + if phase.inputs: + initial_state["_io_inputs"] = phase.inputs + if phase.outputs: + initial_state["_io_outputs"] = phase.outputs + # Persist the workflow-level types block so that agent-side + # _validate_io_outputs can do named-type (struct/enum) checks + # without needing a reference to the WorkflowSpec at runtime. + if self.types: + initial_state["_io_types"] = self.types + # Store a mapping from dependency title -> phase name so that + # _build_context can resolve upstream outputs by phase name + # (as used in input mapping expressions) rather than by title. + # This is essential because titles and names can differ. + if phase.depends_on: + name_to_title = {p.name: p.title for p in self.phases} + dep_title_to_name: dict[str, str] = {} + for dep_name in phase.depends_on: + dep_title = name_to_title.get(dep_name, dep_name) + dep_title_to_name[dep_title] = dep_name + initial_state["_io_dep_title_to_name"] = dep_title_to_name + if phase.permissions: perm = phase.permissions perm_state: dict[str, Any] = { @@ -703,6 +975,9 @@ def to_portfolio_spec(self) -> "PortfolioSpec": depends_on=depends_on, constraints=phase.constraints, initial_state=initial_state, + # RFC-0024: preserve I/O contracts for executor wiring + inputs=phase.inputs, + outputs=phase.outputs, ) intents.append(intent) @@ -772,15 +1047,315 @@ async def run( api_key=api_key, ) - # Execute (timeout is managed internally by Coordinator) + # Execute with RFC-0024 I/O contract enforcement — pass self so the + # Coordinator can call validate_claim_inputs/validate_task_outputs. portfolio_spec = self.to_portfolio_spec() - result = await coordinator.execute(portfolio_spec) + result = await coordinator.execute(portfolio_spec, workflow_spec=self) if verbose: print(f"\nWorkflow complete: {self.name}") return result + # ------------------------------------------------------------------ + # RFC-0024: Executor I/O wiring helpers + # ------------------------------------------------------------------ + + def resolve_task_inputs( + self, + phase_name: str, + upstream_outputs: dict[str, dict[str, Any]], + trigger_payload: Optional[dict[str, Any]] = None, + initial_state: Optional[dict[str, Any]] = None, + task_id: str = "", + ) -> dict[str, Any]: + """Pre-populate ctx.input for a task from its declared inputs mapping. + + This is the executor's pre-handoff step (RFC-0024 §3.2). Call this + before dispatching a task to an agent handler. The returned dict + should be placed in ``ctx.input``. + + Args: + phase_name: The name of the phase being started. + upstream_outputs: Map of ``{phase_name: {key: value}}`` for all + phases that have already completed. Keys are canonical phase + names (not titles). + trigger_payload: Optional trigger payload for ``$trigger.*`` refs. + initial_state: Optional initial state for ``$initial_state.*`` refs. + task_id: Optional task ID for error messages. + + Returns: + Resolved ``ctx.input`` dict guaranteed to contain all declared + input keys. + + Raises: + UnresolvableInputError: If any declared input cannot be resolved. + KeyError: If ``phase_name`` is not found in this workflow. + """ + phase = next((p for p in self.phases if p.name == phase_name), None) + if phase is None: + raise KeyError(f"Phase '{phase_name}' not found in workflow '{self.name}'") + + if not phase.inputs: + return {} + + trigger_payload = trigger_payload or {} + initial_state = initial_state or {} + resolved: dict[str, Any] = {} + unresolvable: list[str] = [] + + for local_key, mapping_expr in phase.inputs.items(): + if not isinstance(mapping_expr, str): + unresolvable.append(f"{local_key}: {mapping_expr!r}") + continue + + if mapping_expr.startswith("$trigger."): + key = mapping_expr[len("$trigger.") :] + if key in trigger_payload: + resolved[local_key] = trigger_payload[key] + else: + unresolvable.append(mapping_expr) + elif mapping_expr.startswith("$initial_state."): + key = mapping_expr[len("$initial_state.") :] + if key in initial_state: + resolved[local_key] = initial_state[key] + else: + unresolvable.append(mapping_expr) + else: + parts = mapping_expr.split(".", 1) + if len(parts) != 2: + unresolvable.append(mapping_expr) + continue + ref_phase, ref_key = parts[0], parts[1] + phase_output = upstream_outputs.get(ref_phase, {}) + if ref_key in phase_output: + resolved[local_key] = phase_output[ref_key] + else: + unresolvable.append(mapping_expr) + + if unresolvable: + raise UnresolvableInputError( + task_id=task_id, + phase_name=phase_name, + unresolvable_refs=unresolvable, + ) + + return resolved + + def validate_claim_inputs( + self, + phase_name: str, + upstream_outputs: dict[str, dict[str, Any]], + trigger_payload: Optional[dict[str, Any]] = None, + initial_state: Optional[dict[str, Any]] = None, + task_id: str = "", + upstream_intents_status: Optional[dict[str, dict[str, Any]]] = None, + ) -> None: + """Validate that all declared inputs are resolvable at claim time. + + This is the executor's claim-time check (RFC-0024 §3.1 / RFC-0026 §4). + Call this when an agent attempts to claim a task. Raises a typed error + if the claim should be rejected; returns ``None`` if the claim is safe. + + RFC-0026: If ``upstream_intents_status`` is provided, this method checks + whether any upstream phase whose outputs are referenced by the current + phase's inputs has a corresponding intent that is currently + ``suspended_awaiting_input``. When such a phase is found, + ``UpstreamIntentSuspendedError`` is raised **before** the resolvability + check, because the upstream outputs may exist but the producer intent is + paused and may mutate its outputs upon resume. + + Args: + phase_name: The name of the phase being claimed. + upstream_outputs: Map of ``{phase_name: {key: value}}`` for all + completed upstream phases. + trigger_payload: Optional trigger payload for ``$trigger.*`` refs. + initial_state: Optional initial state for ``$initial_state.*`` refs. + task_id: Optional task ID for error messages. + upstream_intents_status: Optional map of + ``{phase_name: {"status": str, "intent_id": str, + "expected_resume_at": str | None}}`` describing the current + intent status for each upstream phase. When a referenced + upstream phase's status is ``"suspended_awaiting_input"``, + ``UpstreamIntentSuspendedError`` is raised. + + Raises: + UpstreamIntentSuspendedError: If any referenced upstream phase's + intent is currently suspended (RFC-0026). + UnresolvableInputError: If any declared input cannot be resolved. + """ + # RFC-0026: Check for upstream suspension before resolvability + if upstream_intents_status: + phase = next((p for p in self.phases if p.name == phase_name), None) + if phase and phase.inputs: + for _local_key, mapping_expr in phase.inputs.items(): + if not isinstance(mapping_expr, str): + continue + parts = mapping_expr.split(".", 1) + if len(parts) != 2 or parts[0].startswith("$"): + continue + upstream_phase_name = parts[0] + intent_info = upstream_intents_status.get(upstream_phase_name) + if intent_info is None: + continue + if intent_info.get("status") == "suspended_awaiting_input": + raise UpstreamIntentSuspendedError( + task_id=task_id, + phase_name=phase_name, + suspended_intent_id=intent_info.get( + "intent_id", upstream_phase_name + ), + expected_resume_at=intent_info.get("expected_resume_at"), + ) + + self.resolve_task_inputs( + phase_name=phase_name, + upstream_outputs=upstream_outputs, + trigger_payload=trigger_payload, + initial_state=initial_state, + task_id=task_id, + ) + + def validate_task_outputs( + self, + phase_name: str, + agent_output: dict[str, Any], + task_id: str = "", + ) -> None: + """Validate an agent's output dict against the phase's declared outputs. + + This is the executor's completion-time validation (RFC-0024 §3.3). + Call this when an agent submits a completion result. Raises a typed + error if validation fails; returns ``None`` if the output is acceptable. + + Args: + phase_name: The name of the phase that completed. + agent_output: The dict returned by the agent handler. + task_id: Optional task ID for error messages. + + Raises: + MissingOutputError: If any required declared output key is absent. + OutputTypeMismatchError: If a value does not match its declared type. + KeyError: If ``phase_name`` is not found in this workflow. + """ + phase = next((p for p in self.phases if p.name == phase_name), None) + if phase is None: + raise KeyError(f"Phase '{phase_name}' not found in workflow '{self.name}'") + + if not phase.outputs: + return + + missing: list[str] = [] + + for output_key, type_decl in phase.outputs.items(): + # Determine whether this output is required + required = True + expected_type: str = "any" + + if isinstance(type_decl, dict): + required = type_decl.get("required", True) + expected_type = str(type_decl.get("type", "any")) + elif isinstance(type_decl, str): + expected_type = type_decl + + if output_key not in agent_output: + if required: + missing.append(output_key) + continue + + # Type validation (structural, no coercion) + value = agent_output[output_key] + if expected_type not in ("any", ""): + self._check_value_type( + task_id=task_id, + phase_name=phase_name, + key=output_key, + expected_type=expected_type, + value=value, + ) + + if missing: + raise MissingOutputError( + task_id=task_id, + phase_name=phase_name, + missing_keys=missing, + ) + + def _check_value_type( + self, + task_id: str, + phase_name: str, + key: str, + expected_type: str, + value: Any, + ) -> None: + """Validate a single output value against a declared type string. + + Primitive type strings are mapped to Python types. Named types from + the ``types`` block are validated structurally (top-level key presence). + Raises ``OutputTypeMismatchError`` on mismatch. + """ + primitive_type_map: dict[str, type] = { + "string": str, + "number": (int, float), # type: ignore[dict-item] + "boolean": bool, + "object": dict, + "array": list, + } + + if expected_type in primitive_type_map: + expected_python_type = primitive_type_map[expected_type] + if not isinstance(value, expected_python_type): + raise OutputTypeMismatchError( + task_id=task_id, + phase_name=phase_name, + key=key, + expected_type=expected_type, + actual_type=type(value).__name__, + ) + return + + # Named type from the types block — validate structurally + type_schema = self.types.get(expected_type) + if type_schema is None: + # Unknown named type: accept without validation (incremental adoption) + return + + # Enum type: schema is {"enum": [...]}. Validate value membership + # before any isinstance check — enum values are scalars, not dicts. + if isinstance(type_schema, dict) and "enum" in type_schema: + enum_values = type_schema["enum"] + if isinstance(enum_values, list) and value not in enum_values: + raise OutputTypeMismatchError( + task_id=task_id, + phase_name=phase_name, + key=key, + expected_type=f"{expected_type}(enum:{enum_values})", + actual_type=repr(value), + ) + return + + if not isinstance(value, dict): + raise OutputTypeMismatchError( + task_id=task_id, + phase_name=phase_name, + key=key, + expected_type=expected_type, + actual_type=type(value).__name__, + ) + + # Validate that all keys declared in the type schema are present + if isinstance(type_schema, dict): + for schema_key in type_schema: + if schema_key not in value: + raise OutputTypeMismatchError( + task_id=task_id, + phase_name=phase_name, + key=key, + expected_type=expected_type, + actual_type=(f"dict missing required field '{schema_key}'"), + ) + def __repr__(self) -> str: return f"WorkflowSpec(name={self.name!r}, phases={len(self.phases)})" diff --git a/pyproject.toml b/pyproject.toml index 91ec7ac..de127fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "openintent" -version = "0.15.1" +version = "0.17.0" description = "Python SDK and Server for the OpenIntent Coordination Protocol" readme = "README.md" license = {text = "MIT"} diff --git a/tests/test_adapters.py b/tests/test_adapters.py index 901c58e..d066549 100644 --- a/tests/test_adapters.py +++ b/tests/test_adapters.py @@ -236,6 +236,102 @@ def test_config_disables_logging(self, mock_check): mock_client.log_llm_request_completed.assert_not_called() +class TestOpenAIAdapterMaxTokensTranslation: + """Tests for max_tokens → max_completion_tokens translation.""" + + def test_requires_max_completion_tokens_gpt5(self): + assert OpenAIAdapter._requires_max_completion_tokens("gpt-5-mini") is True + assert OpenAIAdapter._requires_max_completion_tokens("gpt-5") is True + + def test_requires_max_completion_tokens_o1(self): + assert OpenAIAdapter._requires_max_completion_tokens("o1") is True + assert OpenAIAdapter._requires_max_completion_tokens("o1-mini") is True + + def test_requires_max_completion_tokens_o3(self): + assert OpenAIAdapter._requires_max_completion_tokens("o3") is True + assert OpenAIAdapter._requires_max_completion_tokens("o3-mini") is True + + def test_does_not_require_for_gpt4(self): + assert OpenAIAdapter._requires_max_completion_tokens("gpt-4") is False + assert OpenAIAdapter._requires_max_completion_tokens("gpt-4o") is False + assert OpenAIAdapter._requires_max_completion_tokens("gpt-3.5-turbo") is False + + @patch("openintent.adapters.openai_adapter._check_openai_installed") + def test_max_tokens_remapped_for_gpt5(self, mock_check): + mock_openai = MagicMock() + mock_client = MagicMock() + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message = MagicMock() + mock_response.choices[0].message.content = "Hello!" + mock_response.choices[0].message.tool_calls = None + mock_response.choices[0].finish_reason = "stop" + mock_response.usage = None + mock_openai.chat.completions.create.return_value = mock_response + + adapter = OpenAIAdapter(mock_openai, mock_client, intent_id="test-intent") + adapter.chat.completions.create( + model="gpt-5-mini", + messages=[{"role": "user", "content": "Hello"}], + max_tokens=200, + ) + + call_kwargs = mock_openai.chat.completions.create.call_args[1] + assert "max_tokens" not in call_kwargs + assert call_kwargs["max_completion_tokens"] == 200 + + @patch("openintent.adapters.openai_adapter._check_openai_installed") + def test_max_completion_tokens_not_overwritten_if_already_set(self, mock_check): + mock_openai = MagicMock() + mock_client = MagicMock() + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message = MagicMock() + mock_response.choices[0].message.content = "Hello!" + mock_response.choices[0].message.tool_calls = None + mock_response.choices[0].finish_reason = "stop" + mock_response.usage = None + mock_openai.chat.completions.create.return_value = mock_response + + adapter = OpenAIAdapter(mock_openai, mock_client, intent_id="test-intent") + adapter.chat.completions.create( + model="gpt-5-mini", + messages=[{"role": "user", "content": "Hello"}], + max_tokens=200, + max_completion_tokens=500, + ) + + call_kwargs = mock_openai.chat.completions.create.call_args[1] + assert call_kwargs["max_completion_tokens"] == 500 + + @patch("openintent.adapters.openai_adapter._check_openai_installed") + def test_max_tokens_not_remapped_for_gpt4(self, mock_check): + mock_openai = MagicMock() + mock_client = MagicMock() + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message = MagicMock() + mock_response.choices[0].message.content = "Hello!" + mock_response.choices[0].message.tool_calls = None + mock_response.choices[0].finish_reason = "stop" + mock_response.usage = None + mock_openai.chat.completions.create.return_value = mock_response + + adapter = OpenAIAdapter(mock_openai, mock_client, intent_id="test-intent") + adapter.chat.completions.create( + model="gpt-4", + messages=[{"role": "user", "content": "Hello"}], + max_tokens=200, + ) + + call_kwargs = mock_openai.chat.completions.create.call_args[1] + assert call_kwargs["max_tokens"] == 200 + assert "max_completion_tokens" not in call_kwargs + + class TestAnthropicAdapter: """Tests for AnthropicAdapter.""" diff --git a/tests/test_hitl.py b/tests/test_hitl.py new file mode 100644 index 0000000..2f62176 --- /dev/null +++ b/tests/test_hitl.py @@ -0,0 +1,1785 @@ +"""Tests for RFC-0025: Human-in-the-Loop Intent Suspension.""" + +import asyncio +from datetime import datetime +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +# --------------------------------------------------------------------------- +# Model tests +# --------------------------------------------------------------------------- + + +class TestIntentStatusSuspended: + """IntentStatus.SUSPENDED_AWAITING_INPUT is present and correct.""" + + def test_status_value(self): + from openintent.models import IntentStatus + + assert IntentStatus.SUSPENDED_AWAITING_INPUT == "suspended_awaiting_input" + + def test_status_is_string(self): + from openintent.models import IntentStatus + + assert isinstance(IntentStatus.SUSPENDED_AWAITING_INPUT, str) + + def test_status_in_enum(self): + from openintent.models import IntentStatus + + members = [s.value for s in IntentStatus] + assert "suspended_awaiting_input" in members + + +class TestEventTypeHITL: + """New HITL EventType constants are present and correct.""" + + def test_intent_suspended(self): + from openintent.models import EventType + + assert EventType.INTENT_SUSPENDED == "intent.suspended" + + def test_intent_resumed(self): + from openintent.models import EventType + + assert EventType.INTENT_RESUMED == "intent.resumed" + + def test_intent_suspension_expired(self): + from openintent.models import EventType + + assert EventType.INTENT_SUSPENSION_EXPIRED == "intent.suspension_expired" + + def test_engagement_decision(self): + from openintent.models import EventType + + assert EventType.ENGAGEMENT_DECISION == "engagement.decision" + + def test_all_hitl_events_in_enum(self): + from openintent.models import EventType + + values = {e.value for e in EventType} + assert "intent.suspended" in values + assert "intent.resumed" in values + assert "intent.suspension_expired" in values + assert "engagement.decision" in values + + +class TestSuspensionRecord: + """SuspensionRecord model: construction, serialisation, round-trip.""" + + def _make_record(self, **kwargs): + from openintent.models import SuspensionRecord + + defaults = dict( + id="susp-001", + question="Should we proceed with refund?", + context={"amount": 500, "currency": "USD"}, + channel_hint="slack", + suspended_at=datetime(2026, 3, 23, 10, 0, 0), + timeout_seconds=300, + expires_at=datetime(2026, 3, 23, 10, 5, 0), + fallback_value="deny", + fallback_policy="complete_with_fallback", + confidence_at_suspension=0.55, + ) + defaults.update(kwargs) + return SuspensionRecord(**defaults) + + def test_construction(self): + rec = self._make_record() + assert rec.id == "susp-001" + assert rec.question == "Should we proceed with refund?" + assert rec.fallback_policy == "complete_with_fallback" + assert rec.confidence_at_suspension == 0.55 + + def test_to_dict_required_fields(self): + rec = self._make_record() + d = rec.to_dict() + assert d["id"] == "susp-001" + assert d["question"] == "Should we proceed with refund?" + assert d["fallback_policy"] == "complete_with_fallback" + assert d["context"]["amount"] == 500 + + def test_to_dict_optional_fields(self): + rec = self._make_record() + d = rec.to_dict() + assert d["channel_hint"] == "slack" + assert d["timeout_seconds"] == 300 + assert d["fallback_value"] == "deny" + assert d["confidence_at_suspension"] == 0.55 + + def test_to_dict_timestamps_iso8601(self): + rec = self._make_record() + d = rec.to_dict() + assert "T" in d["suspended_at"] + assert "T" in d["expires_at"] + + def test_from_dict_round_trip(self): + from openintent.models import SuspensionRecord + + rec = self._make_record() + d = rec.to_dict() + rec2 = SuspensionRecord.from_dict(d) + assert rec2.id == rec.id + assert rec2.question == rec.question + assert rec2.fallback_policy == rec.fallback_policy + assert rec2.confidence_at_suspension == rec.confidence_at_suspension + + def test_minimal_construction(self): + from openintent.models import SuspensionRecord + + rec = SuspensionRecord(id="x", question="Proceed?") + assert rec.fallback_policy == "fail" + assert rec.context == {} + assert rec.channel_hint is None + assert rec.response is None + + def test_resolution_responded(self): + rec = self._make_record(response="approve", resolution="responded") + d = rec.to_dict() + assert d["resolution"] == "responded" + assert d["response"] == "approve" + + def test_from_dict_with_responded_at(self): + from openintent.models import SuspensionRecord + + d = { + "id": "s1", + "question": "Q", + "responded_at": "2026-03-23T10:01:00", + "resolution": "responded", + } + rec = SuspensionRecord.from_dict(d) + assert rec.responded_at is not None + assert rec.resolution == "responded" + + +class TestEngagementSignals: + """EngagementSignals model: defaults, serialisation, round-trip.""" + + def test_defaults(self): + from openintent.models import EngagementSignals + + sig = EngagementSignals() + assert sig.confidence == 1.0 + assert sig.risk == 0.0 + assert sig.reversibility == 1.0 + assert sig.context == {} + + def test_custom_values(self): + from openintent.models import EngagementSignals + + sig = EngagementSignals(confidence=0.6, risk=0.7, reversibility=0.3) + assert sig.confidence == 0.6 + assert sig.risk == 0.7 + + def test_to_dict(self): + from openintent.models import EngagementSignals + + sig = EngagementSignals(confidence=0.6, risk=0.4, reversibility=0.8) + d = sig.to_dict() + assert d["confidence"] == 0.6 + assert d["risk"] == 0.4 + assert d["reversibility"] == 0.8 + + def test_from_dict_round_trip(self): + from openintent.models import EngagementSignals + + sig = EngagementSignals(confidence=0.75, risk=0.25, reversibility=0.9) + d = sig.to_dict() + sig2 = EngagementSignals.from_dict(d) + assert sig2.confidence == 0.75 + assert sig2.risk == 0.25 + + def test_from_dict_defaults(self): + from openintent.models import EngagementSignals + + sig = EngagementSignals.from_dict({}) + assert sig.confidence == 1.0 + assert sig.risk == 0.0 + + +class TestEngagementDecision: + """EngagementDecision model: all modes, serialisation, round-trip.""" + + def _make_decision(self, mode="autonomous", should_ask=False, **kwargs): + from openintent.models import EngagementDecision, EngagementSignals + + return EngagementDecision( + mode=mode, + should_ask=should_ask, + rationale="Test rationale", + signals=EngagementSignals(), + **kwargs, + ) + + def test_autonomous_mode(self): + d = self._make_decision("autonomous", False) + assert d.mode == "autonomous" + assert d.should_ask is False + + def test_request_input_mode(self): + d = self._make_decision("request_input", True) + assert d.mode == "request_input" + assert d.should_ask is True + + def test_require_input_mode(self): + d = self._make_decision("require_input", True) + assert d.mode == "require_input" + assert d.should_ask is True + + def test_defer_mode(self): + d = self._make_decision("defer", False) + assert d.mode == "defer" + assert d.should_ask is False + + def test_to_dict(self): + d = self._make_decision("request_input", True) + dd = d.to_dict() + assert dd["mode"] == "request_input" + assert dd["should_ask"] is True + assert "signals" in dd + + def test_from_dict_round_trip(self): + from openintent.models import EngagementDecision + + d = self._make_decision("require_input", True) + dd = d.to_dict() + d2 = EngagementDecision.from_dict(dd) + assert d2.mode == "require_input" + assert d2.should_ask is True + assert d2.signals is not None + + def test_from_dict_no_signals(self): + from openintent.models import EngagementDecision + + d = EngagementDecision.from_dict({"mode": "autonomous", "should_ask": False}) + assert d.signals is None + + +class TestInputResponse: + """InputResponse model: construction, serialisation, round-trip.""" + + def test_construction(self): + from openintent.models import InputResponse + + r = InputResponse( + suspension_id="susp-1", + value="approve", + responded_by="alice", + ) + assert r.suspension_id == "susp-1" + assert r.value == "approve" + + def test_to_dict(self): + from openintent.models import InputResponse + + r = InputResponse( + suspension_id="susp-1", + value=42, + responded_by="bob", + responded_at=datetime(2026, 3, 23, 11, 0, 0), + ) + d = r.to_dict() + assert d["suspension_id"] == "susp-1" + assert d["value"] == 42 + assert d["responded_by"] == "bob" + assert "T" in d["responded_at"] + + def test_from_dict_round_trip(self): + from openintent.models import InputResponse + + r = InputResponse( + suspension_id="s", + value={"decision": "approve"}, + responded_by="carol", + responded_at=datetime(2026, 3, 23, 12, 0, 0), + ) + d = r.to_dict() + r2 = InputResponse.from_dict(d) + assert r2.suspension_id == r.suspension_id + assert r2.value == r.value + assert r2.responded_by == r.responded_by + + def test_from_dict_without_timestamp(self): + from openintent.models import InputResponse + + r = InputResponse.from_dict({"suspension_id": "x", "value": "ok"}) + assert r.responded_at is None + + +# --------------------------------------------------------------------------- +# Exception tests +# --------------------------------------------------------------------------- + + +class TestInputTimeoutError: + """InputTimeoutError carries suspension metadata.""" + + def test_basic_raise(self): + from openintent.exceptions import InputTimeoutError + + with pytest.raises(InputTimeoutError) as exc_info: + raise InputTimeoutError("timed out") + assert "timed out" in str(exc_info.value) + + def test_suspension_id_attribute(self): + from openintent.exceptions import InputTimeoutError + + err = InputTimeoutError("timed out", suspension_id="susp-99") + assert err.suspension_id == "susp-99" + + def test_fallback_policy_attribute(self): + from openintent.exceptions import InputTimeoutError + + err = InputTimeoutError("timed out", fallback_policy="complete_with_fallback") + assert err.fallback_policy == "complete_with_fallback" + + def test_inherits_from_openintent_error(self): + from openintent.exceptions import InputTimeoutError, OpenIntentError + + assert issubclass(InputTimeoutError, OpenIntentError) + + +class TestInputCancelledError: + """InputCancelledError carries suspension metadata.""" + + def test_basic_raise(self): + from openintent.exceptions import InputCancelledError + + with pytest.raises(InputCancelledError) as exc_info: + raise InputCancelledError("cancelled") + assert "cancelled" in str(exc_info.value) + + def test_suspension_id_attribute(self): + from openintent.exceptions import InputCancelledError + + err = InputCancelledError("cancelled", suspension_id="susp-77") + assert err.suspension_id == "susp-77" + + def test_inherits_from_openintent_error(self): + from openintent.exceptions import InputCancelledError, OpenIntentError + + assert issubclass(InputCancelledError, OpenIntentError) + + +# --------------------------------------------------------------------------- +# Decorator tests +# --------------------------------------------------------------------------- + + +class TestHITLDecorators: + """HITL decorators set _openintent_handler correctly.""" + + def test_on_input_requested(self): + from openintent.agents import on_input_requested + + @on_input_requested + def handler(self, intent, suspension): + pass + + assert handler._openintent_handler == "input_requested" + + def test_on_input_received(self): + from openintent.agents import on_input_received + + @on_input_received + def handler(self, intent, response): + pass + + assert handler._openintent_handler == "input_received" + + def test_on_suspension_expired(self): + from openintent.agents import on_suspension_expired + + @on_suspension_expired + def handler(self, intent, suspension): + pass + + assert handler._openintent_handler == "suspension_expired" + + def test_on_engagement_decision(self): + from openintent.agents import on_engagement_decision + + @on_engagement_decision + def handler(self, intent, decision): + pass + + assert handler._openintent_handler == "engagement_decision" + + def test_decorators_preserve_function(self): + from openintent.agents import on_input_requested + + @on_input_requested + async def my_handler(self, intent, suspension): + """My handler.""" + return "ok" + + assert my_handler.__name__ == "my_handler" + assert asyncio.iscoroutinefunction(my_handler) + + def test_handler_discovery(self): + """Decorated methods are discovered in _discover_handlers.""" + from openintent.agents import Agent, on_input_requested, on_suspension_expired + + @Agent("test-agent-discovery") + class MyAgent: + @on_input_requested + async def notify(self, intent, suspension): + pass + + @on_suspension_expired + async def expire(self, intent, suspension): + pass + + # Instantiate to trigger _discover_handlers in __init__ + with patch.dict("os.environ", {"OPENINTENT_BASE_URL": "http://localhost:8000"}): + agent_instance = MyAgent.__new__(MyAgent) + agent_instance._agent_id = "test-agent-discovery" + agent_instance._client = None + agent_instance._async_client = None + from openintent.agents import AgentConfig + + agent_instance._config = AgentConfig() + agent_instance._config.auto_heartbeat = False + agent_instance._governance_policy = None + agent_instance._federation_visibility = None + agent_instance._running = False + agent_instance._mcp_bridge = None + agent_instance._discover_handlers() + + assert len(agent_instance._handlers["input_requested"]) == 1 + assert len(agent_instance._handlers["suspension_expired"]) == 1 + + +# --------------------------------------------------------------------------- +# should_request_input tests +# --------------------------------------------------------------------------- + + +class TestShouldRequestInput: + """should_request_input() returns correct EngagementDecision modes.""" + + def _make_agent_instance(self): + """Create a minimal BaseAgent instance with mock async client.""" + from openintent.agents import AgentConfig, BaseAgent + + instance = BaseAgent.__new__(BaseAgent) + instance._agent_id = "hitl-test-agent" + instance._client = None + instance._running = False + instance._mcp_bridge = None + instance._governance_policy = None + instance._federation_visibility = None + config = AgentConfig() + config.auto_heartbeat = False + instance._config = config + instance._discover_handlers() + + mock_client = AsyncMock() + mock_client.get_intent.return_value = MagicMock(id="intent-1") + mock_client.log_event = AsyncMock() + instance._async_client = mock_client + return instance + + @pytest.mark.asyncio + async def test_autonomous_mode(self): + """High confidence, low risk, reversible → autonomous.""" + from openintent.models import EngagementSignals + + agent = self._make_agent_instance() + signals = EngagementSignals(confidence=0.95, risk=0.05, reversibility=0.9) + decision = await agent.should_request_input("intent-1", signals=signals) + assert decision.mode == "autonomous" + assert decision.should_ask is False + + @pytest.mark.asyncio + async def test_request_input_mode(self): + """Moderate confidence and risk → request_input.""" + from openintent.models import EngagementSignals + + agent = self._make_agent_instance() + signals = EngagementSignals(confidence=0.7, risk=0.3, reversibility=0.7) + decision = await agent.should_request_input("intent-1", signals=signals) + assert decision.mode == "request_input" + assert decision.should_ask is True + + @pytest.mark.asyncio + async def test_require_input_mode(self): + """Low confidence → require_input.""" + from openintent.models import EngagementSignals + + agent = self._make_agent_instance() + signals = EngagementSignals(confidence=0.3, risk=0.4, reversibility=0.6) + decision = await agent.should_request_input("intent-1", signals=signals) + assert decision.mode == "require_input" + assert decision.should_ask is True + + @pytest.mark.asyncio + async def test_defer_mode_high_risk(self): + """Very high risk → defer.""" + from openintent.models import EngagementSignals + + agent = self._make_agent_instance() + signals = EngagementSignals(confidence=0.9, risk=0.9, reversibility=0.5) + decision = await agent.should_request_input("intent-1", signals=signals) + assert decision.mode == "defer" + assert decision.should_ask is False + + @pytest.mark.asyncio + async def test_defer_mode_irreversible(self): + """Irreversible action → defer.""" + from openintent.models import EngagementSignals + + agent = self._make_agent_instance() + signals = EngagementSignals(confidence=0.9, risk=0.5, reversibility=0.05) + decision = await agent.should_request_input("intent-1", signals=signals) + assert decision.mode == "defer" + + @pytest.mark.asyncio + async def test_kwargs_shorthand(self): + """Keyword shorthand works without EngagementSignals object.""" + agent = self._make_agent_instance() + decision = await agent.should_request_input( + "intent-1", confidence=0.95, risk=0.05, reversibility=0.9 + ) + assert decision.mode == "autonomous" + + @pytest.mark.asyncio + async def test_decision_has_signals(self): + """Decision object carries the EngagementSignals used.""" + from openintent.models import EngagementSignals + + agent = self._make_agent_instance() + signals = EngagementSignals(confidence=0.9, risk=0.1, reversibility=0.8) + decision = await agent.should_request_input("intent-1", signals=signals) + assert decision.signals is not None + assert decision.signals.confidence == 0.9 + + @pytest.mark.asyncio + async def test_engagement_decision_event_emitted(self): + """should_request_input() emits an engagement.decision event.""" + from openintent.models import EngagementSignals + + agent = self._make_agent_instance() + signals = EngagementSignals(confidence=0.9, risk=0.1, reversibility=0.8) + await agent.should_request_input("intent-1", signals=signals) + agent._async_client.log_event.assert_called_once() + call_args = agent._async_client.log_event.call_args + assert call_args[0][1].value == "engagement.decision" + + @pytest.mark.asyncio + async def test_on_engagement_decision_hook_fired(self): + """@on_engagement_decision handlers are called after should_request_input.""" + from openintent.agents import AgentConfig, BaseAgent, on_engagement_decision + from openintent.models import EngagementSignals + + received = [] + + class HookAgent(BaseAgent): + @on_engagement_decision + async def on_decision(self, intent, decision): + received.append(decision.mode) + + instance = HookAgent.__new__(HookAgent) + instance._agent_id = "hook-agent" + instance._client = None + instance._running = False + instance._mcp_bridge = None + instance._governance_policy = None + instance._federation_visibility = None + config = AgentConfig() + config.auto_heartbeat = False + instance._config = config + instance._discover_handlers() + + mock_client = AsyncMock() + mock_client.get_intent.return_value = MagicMock(id="intent-99") + mock_client.log_event = AsyncMock() + instance._async_client = mock_client + + signals = EngagementSignals(confidence=0.95, risk=0.05, reversibility=0.9) + await instance.should_request_input("intent-99", signals=signals) + assert received == ["autonomous"] + + +# --------------------------------------------------------------------------- +# Server endpoint tests +# --------------------------------------------------------------------------- + + +class TestSuspendRespondEndpoint: + """POST /api/v1/intents/{id}/suspend/respond endpoint tests.""" + + @pytest.fixture + def client(self, tmp_path): + from fastapi.testclient import TestClient + + from openintent.server.app import create_app + from openintent.server.config import ServerConfig + + db_path = str(tmp_path / "test.db") + config = ServerConfig( + database_url=f"sqlite:///{db_path}", + api_keys=["test-key"], + ) + app = create_app(config) + with TestClient(app) as tc: + return tc + + def _create_intent(self, client, title="HITL Test Intent"): + resp = client.post( + "/api/v1/intents", + json={"title": title}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code in (200, 201) + return resp.json() + + def _set_status(self, client, intent_id, status, version): + resp = client.post( + f"/api/v1/intents/{intent_id}/status", + json={"status": status}, + headers={"X-API-Key": "test-key", "If-Match": str(version)}, + ) + return resp + + def test_respond_to_suspended_intent(self, client): + """Happy path: respond to a suspended intent.""" + intent = self._create_intent(client) + intent_id = intent["id"] + + # Transition to suspended_awaiting_input + set_resp = self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + assert set_resp.status_code == 200 + + # Respond + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={ + "suspension_id": "susp-test", + "value": "approve", + "responded_by": "alice", + }, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["resolution"] == "responded" + assert data["value"] == "approve" + assert data["responded_by"] == "alice" + + def test_respond_transitions_to_active(self, client): + """After respond, intent transitions back to active.""" + intent = self._create_intent(client) + intent_id = intent["id"] + + self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + + client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "s1", "value": "yes"}, + headers={"X-API-Key": "test-key"}, + ) + + # Fetch intent - should be active + get_resp = client.get( + f"/api/v1/intents/{intent_id}", + headers={"X-API-Key": "test-key"}, + ) + assert get_resp.status_code == 200 + assert get_resp.json()["status"] == "active" + + def test_respond_to_non_suspended_intent_fails(self, client): + """Responding to a non-suspended intent returns 409.""" + intent = self._create_intent(client) + intent_id = intent["id"] + + # Intent is in draft status, not suspended + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "s1", "value": "yes"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 409 + + def test_respond_to_missing_intent_fails(self, client): + """Responding to non-existent intent returns 404.""" + resp = client.post( + "/api/v1/intents/nonexistent-id/suspend/respond", + json={"suspension_id": "s1", "value": "yes"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 404 + + def test_respond_with_complex_value(self, client): + """Response value can be a complex object.""" + intent = self._create_intent(client) + intent_id = intent["id"] + self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + + complex_value = {"decision": "approve", "reason": "looks good", "amount": 500} + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "s1", "value": complex_value}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 200 + assert resp.json()["value"] == complex_value + + def test_suspended_status_can_be_set(self, client): + """Setting status to suspended_awaiting_input succeeds.""" + intent = self._create_intent(client) + resp = self._set_status( + client, intent["id"], "suspended_awaiting_input", intent["version"] + ) + assert resp.status_code == 200 + assert resp.json()["status"] == "suspended_awaiting_input" + + def test_respond_without_api_key_fails(self, client): + """Unauthenticated respond request returns 401 or 403.""" + intent = self._create_intent(client) + intent_id = intent["id"] + self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "s1", "value": "yes"}, + ) + assert resp.status_code in (401, 403) + + def test_respond_response_has_timestamp(self, client): + """Respond endpoint returns a responded_at timestamp.""" + intent = self._create_intent(client) + intent_id = intent["id"] + self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "s1", "value": "ok"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 200 + assert resp.json()["responded_at"] is not None + + # ------- helpers for suspension-with-choices tests ------- + + def _suspend_with_choices(self, client, choices, response_type="choice"): + """Create an intent, set suspension state with choices, suspend it.""" + intent = self._create_intent(client) + intent_id = intent["id"] + + status_resp = self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + new_version = status_resp.json()["version"] + + susp_data = { + "id": "susp-choices", + "question": "Pick one", + "response_type": response_type, + "choices": choices, + "fallback_policy": "fail", + "context": {}, + } + patch_resp = client.post( + f"/api/v1/intents/{intent_id}/state", + json={ + "patches": [{"op": "set", "path": "/_suspension", "value": susp_data}] + }, + headers={"X-API-Key": "test-key", "If-Match": str(new_version)}, + ) + assert patch_resp.status_code == 200, f"State patch failed: {patch_resp.json()}" + return intent_id + + # ------- Structured choice validation ------- + + def test_valid_choice_accepted(self, client): + """Responding with a valid choice value succeeds.""" + choices = [ + {"value": "approve", "label": "Approve"}, + {"value": "deny", "label": "Deny", "description": "Reject the request"}, + ] + intent_id = self._suspend_with_choices(client, choices) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "susp-choices", "value": "approve"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["value"] == "approve" + assert data["choice_label"] == "Approve" + + def test_invalid_choice_rejected(self, client): + """Responding with an invalid choice value returns 422.""" + choices = [ + {"value": "approve", "label": "Approve"}, + {"value": "deny", "label": "Deny"}, + ] + intent_id = self._suspend_with_choices(client, choices) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "susp-choices", "value": "maybe"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 422 + detail = resp.json()["detail"] + assert detail["error"] == "invalid_choice" + assert "maybe" in detail["message"] + assert len(detail["valid_choices"]) == 2 + + def test_choice_description_in_response(self, client): + """Response includes the matching choice's description.""" + choices = [ + {"value": "approve", "label": "Approve", "description": "Issue the refund"}, + {"value": "deny", "label": "Deny", "description": "Reject the refund"}, + ] + intent_id = self._suspend_with_choices(client, choices) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "susp-choices", "value": "deny"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["choice_label"] == "Deny" + assert data["choice_description"] == "Reject the refund" + + def test_confirm_type_yes_no(self, client): + """Confirm response_type allows yes/no values.""" + choices = [ + {"value": "yes", "label": "Yes"}, + {"value": "no", "label": "No"}, + ] + intent_id = self._suspend_with_choices(client, choices, response_type="confirm") + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "susp-choices", "value": "yes"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 200 + assert resp.json()["value"] == "yes" + + def test_text_response_type_accepts_freeform(self, client): + """Text response_type accepts any value (no choice validation).""" + intent_id = self._suspend_with_choices(client, [], response_type="text") + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "susp-choices", "value": "some freeform text"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 200 + assert resp.json()["value"] == "some freeform text" + + def test_suspension_id_mismatch_rejected(self, client): + """Mismatched suspension_id returns 409.""" + choices = [{"value": "ok", "label": "OK"}] + intent_id = self._suspend_with_choices(client, choices) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "wrong-id", "value": "ok"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 409 + assert "mismatch" in resp.json()["detail"] + + def test_no_choices_allows_any_value(self, client): + """If suspension has no choices defined, any value is accepted.""" + intent = self._create_intent(client) + intent_id = intent["id"] + self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "susp-any", "value": "anything"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 200 + + def test_empty_suspension_id_rejected(self, client): + """Empty suspension_id returns 422.""" + intent = self._create_intent(client) + intent_id = intent["id"] + self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "", "value": "yes"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 422 + assert "required" in resp.json()["detail"].lower() + + def test_missing_suspension_id_rejected(self, client): + """Missing suspension_id returns 422.""" + intent = self._create_intent(client) + intent_id = intent["id"] + self._set_status( + client, intent_id, "suspended_awaiting_input", intent["version"] + ) + + resp = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"value": "yes"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp.status_code == 422 + + def test_confirm_without_choices_validates_yes_no(self, client): + """Confirm type without explicit choices still validates yes/no.""" + intent_id = self._suspend_with_choices(client, [], response_type="confirm") + + resp_valid = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "susp-choices", "value": "yes"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp_valid.status_code == 200 + + def test_confirm_without_choices_rejects_invalid(self, client): + """Confirm type without explicit choices rejects non-yes/no.""" + intent_id = self._suspend_with_choices(client, [], response_type="confirm") + + resp_invalid = client.post( + f"/api/v1/intents/{intent_id}/suspend/respond", + json={"suspension_id": "susp-choices", "value": "maybe"}, + headers={"X-API-Key": "test-key"}, + ) + assert resp_invalid.status_code == 422 + + +# --------------------------------------------------------------------------- +# Model tests for ResponseType, SuspensionChoice +# --------------------------------------------------------------------------- + + +class TestResponseType: + """ResponseType enum tests.""" + + def test_values(self): + from openintent.models import ResponseType + + assert ResponseType.CHOICE == "choice" + assert ResponseType.CONFIRM == "confirm" + assert ResponseType.TEXT == "text" + assert ResponseType.FORM == "form" + + def test_is_string_enum(self): + from openintent.models import ResponseType + + assert isinstance(ResponseType.CHOICE, str) + + +class TestSuspensionChoice: + """SuspensionChoice model tests.""" + + def test_construction(self): + from openintent.models import SuspensionChoice + + c = SuspensionChoice(value="approve", label="Approve") + assert c.value == "approve" + assert c.label == "Approve" + assert c.description == "" + assert c.style == "default" + assert c.metadata == {} + + def test_full_construction(self): + from openintent.models import SuspensionChoice + + c = SuspensionChoice( + value="deny", + label="Deny", + description="Reject the request", + style="danger", + metadata={"reason_required": True}, + ) + assert c.description == "Reject the request" + assert c.style == "danger" + assert c.metadata["reason_required"] is True + + def test_to_dict_minimal(self): + from openintent.models import SuspensionChoice + + c = SuspensionChoice(value="ok", label="OK") + d = c.to_dict() + assert d == {"value": "ok", "label": "OK"} + assert "style" not in d + assert "description" not in d + + def test_to_dict_full(self): + from openintent.models import SuspensionChoice + + c = SuspensionChoice( + value="approve", label="Approve", description="Go ahead", style="primary" + ) + d = c.to_dict() + assert d["description"] == "Go ahead" + assert d["style"] == "primary" + + def test_from_dict_round_trip(self): + from openintent.models import SuspensionChoice + + c = SuspensionChoice( + value="x", label="X", description="desc", style="danger", metadata={"a": 1} + ) + d = c.to_dict() + c2 = SuspensionChoice.from_dict(d) + assert c2.value == c.value + assert c2.label == c.label + assert c2.description == c.description + assert c2.style == c.style + + +class TestSuspensionRecordChoices: + """SuspensionRecord with choices / response_type.""" + + def test_default_response_type(self): + from openintent.models import SuspensionRecord + + rec = SuspensionRecord(id="s1", question="Q") + assert rec.response_type == "choice" + assert rec.choices == [] + + def test_choices_in_to_dict(self): + from openintent.models import SuspensionChoice, SuspensionRecord + + rec = SuspensionRecord( + id="s1", + question="Pick one", + response_type="choice", + choices=[ + SuspensionChoice(value="a", label="Alpha"), + SuspensionChoice(value="b", label="Beta"), + ], + ) + d = rec.to_dict() + assert d["response_type"] == "choice" + assert len(d["choices"]) == 2 + assert d["choices"][0]["value"] == "a" + assert d["choices"][1]["label"] == "Beta" + + def test_from_dict_with_choices(self): + from openintent.models import SuspensionRecord + + d = { + "id": "s2", + "question": "Continue?", + "response_type": "confirm", + "choices": [ + {"value": "yes", "label": "Yes"}, + {"value": "no", "label": "No"}, + ], + } + rec = SuspensionRecord.from_dict(d) + assert rec.response_type == "confirm" + assert len(rec.choices) == 2 + assert rec.choices[0].value == "yes" + assert rec.choices[1].label == "No" + + def test_valid_values_with_choices(self): + from openintent.models import SuspensionChoice, SuspensionRecord + + rec = SuspensionRecord( + id="s3", + question="Q", + response_type="choice", + choices=[ + SuspensionChoice(value="x", label="X"), + SuspensionChoice(value="y", label="Y"), + ], + ) + assert rec.valid_values() == ["x", "y"] + + def test_valid_values_text_type(self): + from openintent.models import SuspensionRecord + + rec = SuspensionRecord(id="s4", question="Q", response_type="text") + assert rec.valid_values() is None + + def test_valid_values_form_type(self): + from openintent.models import SuspensionRecord + + rec = SuspensionRecord(id="s5", question="Q", response_type="form") + assert rec.valid_values() is None + + +class TestHITLExports: + """Verify all HITL symbols are exported from the package.""" + + def test_models_exported(self): + import openintent + + assert hasattr(openintent, "ResponseType") + assert hasattr(openintent, "SuspensionChoice") + assert hasattr(openintent, "SuspensionRecord") + assert hasattr(openintent, "EngagementSignals") + assert hasattr(openintent, "EngagementDecision") + assert hasattr(openintent, "InputResponse") + + def test_exceptions_exported(self): + import openintent + + assert hasattr(openintent, "InputTimeoutError") + assert hasattr(openintent, "InputCancelledError") + + def test_decorators_exported(self): + import openintent + + assert hasattr(openintent, "on_input_requested") + assert hasattr(openintent, "on_input_received") + assert hasattr(openintent, "on_suspension_expired") + assert hasattr(openintent, "on_engagement_decision") + + def test_version_is_0_17_0(self): + import openintent + + assert openintent.__version__ == "0.17.0" + + +# =========================================================================== +# RFC-0026: Suspension Container Interaction & Human Retry +# =========================================================================== + + +class TestHumanRetryPolicyConstruction: + """HumanRetryPolicy dataclass — construction and defaults.""" + + def test_defaults(self): + from openintent.models import HumanRetryPolicy + + p = HumanRetryPolicy() + assert p.max_attempts == 3 + assert p.interval_seconds == 3600 + assert p.strategy == "fixed" + assert p.escalation_ladder == [] + assert p.final_fallback_policy == "fail" + + def test_custom_values(self): + from openintent.models import HumanRetryPolicy + + p = HumanRetryPolicy( + max_attempts=5, + interval_seconds=900, + strategy="exponential", + final_fallback_policy="complete_with_fallback", + ) + assert p.max_attempts == 5 + assert p.interval_seconds == 900 + assert p.strategy == "exponential" + assert p.final_fallback_policy == "complete_with_fallback" + + def test_to_dict_no_ladder(self): + from openintent.models import HumanRetryPolicy + + p = HumanRetryPolicy(max_attempts=2, interval_seconds=600) + d = p.to_dict() + assert d["max_attempts"] == 2 + assert d["interval_seconds"] == 600 + assert "escalation_ladder" not in d + + def test_from_dict_round_trip(self): + from openintent.models import HumanRetryPolicy + + raw = { + "max_attempts": 4, + "interval_seconds": 1800, + "strategy": "linear", + "final_fallback_policy": "complete_with_fallback", + } + p = HumanRetryPolicy.from_dict(raw) + assert p.max_attempts == 4 + assert p.interval_seconds == 1800 + assert p.strategy == "linear" + assert p.final_fallback_policy == "complete_with_fallback" + + def test_from_dict_defaults_on_empty(self): + from openintent.models import HumanRetryPolicy + + p = HumanRetryPolicy.from_dict({}) + assert p.max_attempts == 3 + assert p.interval_seconds == 3600 + assert p.strategy == "fixed" + assert p.final_fallback_policy == "fail" + + +class TestEscalationStep: + """EscalationStep dataclass — construction and serialization.""" + + def test_construction(self): + from openintent.models import EscalationStep + + s = EscalationStep( + attempt=2, channel_hint="pagerduty", notify_to="on-call-team" + ) + assert s.attempt == 2 + assert s.channel_hint == "pagerduty" + assert s.notify_to == "on-call-team" + assert s.after_attempt == 2 + assert s.channel == "pagerduty" + assert s.notify == "on-call-team" + + def test_to_dict(self): + from openintent.models import EscalationStep + + s = EscalationStep(attempt=3, channel_hint="slack", notify_to="#ops") + d = s.to_dict() + assert d == {"attempt": 3, "channel_hint": "slack", "notify_to": "#ops"} + + def test_from_dict_round_trip(self): + from openintent.models import EscalationStep + + raw = { + "attempt": 2, + "channel_hint": "email", + "notify_to": "manager@example.com", + } + s = EscalationStep.from_dict(raw) + assert s.attempt == 2 + assert s.channel_hint == "email" + assert s.notify_to == "manager@example.com" + + def test_from_dict_legacy_field_names(self): + """from_dict accepts legacy after_attempt/channel/notify for backwards compat.""" + from openintent.models import EscalationStep + + raw = {"after_attempt": 2, "channel": "email", "notify": "manager@example.com"} + s = EscalationStep.from_dict(raw) + assert s.attempt == 2 + assert s.channel_hint == "email" + assert s.notify_to == "manager@example.com" + + +class TestHumanRetryPolicyWithLadder: + """HumanRetryPolicy with an escalation_ladder.""" + + def test_with_ladder(self): + from openintent.models import EscalationStep, HumanRetryPolicy + + p = HumanRetryPolicy( + max_attempts=3, + interval_seconds=300, + escalation_ladder=[ + EscalationStep(attempt=2, channel_hint="pagerduty", notify_to="ops"), + ], + ) + d = p.to_dict() + assert "escalation_ladder" in d + assert d["escalation_ladder"][0]["attempt"] == 2 + assert d["escalation_ladder"][0]["channel_hint"] == "pagerduty" + + def test_from_dict_with_ladder(self): + from openintent.models import HumanRetryPolicy + + raw = { + "max_attempts": 3, + "interval_seconds": 300, + "strategy": "fixed", + "final_fallback_policy": "fail", + "escalation_ladder": [ + {"attempt": 2, "channel_hint": "slack", "notify_to": "#ops"}, + ], + } + p = HumanRetryPolicy.from_dict(raw) + assert len(p.escalation_ladder) == 1 + assert p.escalation_ladder[0].attempt == 2 + assert p.escalation_ladder[0].channel_hint == "slack" + + +class TestSuspensionRecordRetryPolicy: + """SuspensionRecord.retry_policy field (RFC-0026).""" + + def test_retry_policy_none_by_default(self): + from openintent.models import SuspensionRecord + + s = SuspensionRecord(id="x", question="q?") + assert s.retry_policy is None + + def test_retry_policy_set(self): + from openintent.models import HumanRetryPolicy, SuspensionRecord + + p = HumanRetryPolicy(max_attempts=2, interval_seconds=120) + s = SuspensionRecord(id="x", question="q?", retry_policy=p) + assert s.retry_policy is p + + def test_to_dict_includes_retry_policy(self): + from openintent.models import HumanRetryPolicy, SuspensionRecord + + p = HumanRetryPolicy(max_attempts=2) + s = SuspensionRecord(id="abc", question="Approve?", retry_policy=p) + d = s.to_dict() + assert "retry_policy" in d + assert d["retry_policy"]["max_attempts"] == 2 + + def test_to_dict_no_retry_policy_omits_key(self): + from openintent.models import SuspensionRecord + + s = SuspensionRecord(id="abc", question="Approve?") + d = s.to_dict() + assert "retry_policy" not in d + + def test_from_dict_with_retry_policy(self): + from openintent.models import SuspensionRecord + + raw = { + "id": "abc", + "question": "Approve?", + "fallback_policy": "fail", + "retry_policy": { + "max_attempts": 3, + "interval_seconds": 600, + "strategy": "fixed", + "final_fallback_policy": "complete_with_fallback", + }, + } + s = SuspensionRecord.from_dict(raw) + assert s.retry_policy is not None + assert s.retry_policy.max_attempts == 3 + assert s.retry_policy.final_fallback_policy == "complete_with_fallback" + + def test_from_dict_without_retry_policy(self): + from openintent.models import SuspensionRecord + + raw = {"id": "abc", "question": "Approve?", "fallback_policy": "fail"} + s = SuspensionRecord.from_dict(raw) + assert s.retry_policy is None + + +class TestEventTypeRFC0026: + """RFC-0026 EventType constants.""" + + def test_renotified_event(self): + from openintent.models import EventType + + assert EventType.INTENT_SUSPENSION_RENOTIFIED == "intent.suspension_renotified" + + def test_escalated_event(self): + from openintent.models import EventType + + assert EventType.INTENT_SUSPENSION_ESCALATED == "intent.suspension_escalated" + + def test_portfolio_member_suspended(self): + from openintent.models import EventType + + assert EventType.PORTFOLIO_MEMBER_SUSPENDED == "portfolio.member_suspended" + + def test_portfolio_member_resumed(self): + from openintent.models import EventType + + assert EventType.PORTFOLIO_MEMBER_RESUMED == "portfolio.member_resumed" + + def test_rfc0026_events_in_enum(self): + from openintent.models import EventType + + values = {e.value for e in EventType} + assert "intent.suspension_renotified" in values + assert "intent.suspension_escalated" in values + assert "portfolio.member_suspended" in values + assert "portfolio.member_resumed" in values + + +class TestRequestInputRetryPolicy: + """request_input() accepts retry_policy parameter (RFC-0026).""" + + def test_request_input_signature_accepts_retry_policy(self): + import inspect + + from openintent.agents import BaseAgent + + sig = inspect.signature(BaseAgent.request_input) + assert "retry_policy" in sig.parameters + + def test_retry_policy_default_is_none(self): + import inspect + + from openintent.agents import BaseAgent + + sig = inspect.signature(BaseAgent.request_input) + p = sig.parameters["retry_policy"] + assert p.default is None + + +class TestBaseAgentDefaultHumanRetryPolicy: + """BaseAgent.default_human_retry_policy class attribute (RFC-0026).""" + + def test_default_is_none(self): + from openintent.agents import BaseAgent + + assert BaseAgent.default_human_retry_policy is None + + def test_can_set_on_subclass(self): + from openintent.agents import BaseAgent + from openintent.models import HumanRetryPolicy + + class MyAgent(BaseAgent): + default_human_retry_policy = HumanRetryPolicy( + max_attempts=4, interval_seconds=600 + ) + + assert MyAgent.default_human_retry_policy is not None + assert MyAgent.default_human_retry_policy.max_attempts == 4 + + def test_subclass_policy_does_not_affect_base(self): + from openintent.agents import BaseAgent + from openintent.models import HumanRetryPolicy + + class MyAgent(BaseAgent): + default_human_retry_policy = HumanRetryPolicy(max_attempts=2) + + assert BaseAgent.default_human_retry_policy is None + + +class TestRFC0026PackageExports: + """RFC-0026 symbols are exported from the openintent top-level package.""" + + def test_human_retry_policy_exported(self): + import openintent + + assert hasattr(openintent, "HumanRetryPolicy") + + def test_escalation_step_exported(self): + import openintent + + assert hasattr(openintent, "EscalationStep") + + def test_upstream_intent_suspended_error_exported(self): + import openintent + + assert hasattr(openintent, "UpstreamIntentSuspendedError") + + def test_human_retry_policy_instantiable_from_package(self): + import openintent + + p = openintent.HumanRetryPolicy(max_attempts=2, interval_seconds=300) + assert p.max_attempts == 2 + + def test_event_types_renotified_exported(self): + import openintent + + assert ( + openintent.EventType.INTENT_SUSPENSION_RENOTIFIED + == "intent.suspension_renotified" + ) + + def test_event_types_escalated_exported(self): + import openintent + + assert ( + openintent.EventType.INTENT_SUSPENSION_ESCALATED + == "intent.suspension_escalated" + ) + + +class TestRenotificationHandlerInvocation: + """RFC-0026: re-notification fires @on_input_requested with attempt data in suspension.context.""" + + def test_suspension_context_attempt_key_structure(self): + """Verify _attempt/_max_attempts context keys match RFC-0026 spec (not _renotify dict).""" + from openintent.models import HumanRetryPolicy, SuspensionRecord + + p = HumanRetryPolicy(max_attempts=3, interval_seconds=60) + s = SuspensionRecord( + id="x", question="q?", retry_policy=p, context={"foo": "bar"} + ) + + import dataclasses + + renotify_context = dict(s.context) + renotify_context["_attempt"] = 2 + renotify_context["_max_attempts"] = 3 + renotify_suspension = dataclasses.replace(s, context=renotify_context) + + assert renotify_suspension.context["_attempt"] == 2 + assert renotify_suspension.context["_max_attempts"] == 3 + assert renotify_suspension.context["foo"] == "bar" + + def test_escalation_channel_hint_applied_to_suspension(self): + """Escalation step channel_hint is applied on re-notification suspension.""" + from openintent.models import EscalationStep, HumanRetryPolicy, SuspensionRecord + + p = HumanRetryPolicy( + max_attempts=3, + interval_seconds=60, + escalation_ladder=[ + EscalationStep( + attempt=2, channel_hint="pagerduty", notify_to="on-call" + ), + ], + ) + s = SuspensionRecord(id="x", question="q?", retry_policy=p) + + import dataclasses + + step = p.escalation_ladder[0] + renotify_context = dict(s.context) + renotify_context["_attempt"] = 2 + renotify_context["_max_attempts"] = 3 + renotify_context["_notify_to"] = step.notify_to + renotify_suspension = dataclasses.replace( + s, + context=renotify_context, + channel_hint=step.channel_hint, + ) + + assert renotify_suspension.channel_hint == "pagerduty" + assert renotify_suspension.context["_notify_to"] == "on-call" + assert renotify_suspension.context["_attempt"] == 2 + + def test_original_suspension_context_unchanged(self): + """The original suspension.context should not be mutated during re-notification.""" + from openintent.models import HumanRetryPolicy, SuspensionRecord + + original_ctx = {"original_key": "original_value"} + p = HumanRetryPolicy(max_attempts=2, interval_seconds=10) + s = SuspensionRecord( + id="x", question="q?", retry_policy=p, context=original_ctx + ) + + import dataclasses + + renotify_context = dict(s.context) + renotify_context["_attempt"] = 2 + renotify_context["_max_attempts"] = 2 + _renotify_suspension = dataclasses.replace(s, context=renotify_context) + + assert s.context == {"original_key": "original_value"} + assert "_attempt" not in s.context + + def test_handler_receives_same_signature_on_renotify(self): + """Handlers receive (intent, suspension) — same signature for first call and re-notifies.""" + from openintent.models import HumanRetryPolicy, SuspensionRecord + + received_args = [] + + async def my_handler(intent, suspension_record): + received_args.append((intent, suspension_record)) + + p = HumanRetryPolicy(max_attempts=3, interval_seconds=60) + s = SuspensionRecord(id="x", question="q?", retry_policy=p) + + import asyncio + import dataclasses + + renotify_suspension = dataclasses.replace( + s, context={"_attempt": 2, "_max_attempts": 3} + ) + + asyncio.run(my_handler("mock_intent", renotify_suspension)) + + assert len(received_args) == 1 + _intent, susp = received_args[0] + assert susp.context["_attempt"] == 2 + + def test_handler_can_read_attempt_from_context_rfc0026_example(self): + """Handlers can read _attempt from context per RFC-0026 example code.""" + from openintent.models import HumanRetryPolicy, SuspensionRecord + + p = HumanRetryPolicy(max_attempts=3, interval_seconds=60) + s = SuspensionRecord(id="x", question="q?", retry_policy=p) + + import dataclasses + + renotify_suspension = dataclasses.replace( + s, context={"_attempt": 2, "_max_attempts": 3} + ) + + attempt = renotify_suspension.context.get("_attempt", 1) + max_att = renotify_suspension.context.get("_max_attempts", 1) + assert attempt == 2 + assert max_att == 3 + + +class TestPlatformLevelCascade: + """RFC-0026 §5.3: three-level retry policy cascade: call-site > agent > platform.""" + + def test_server_config_suspension_field(self): + """ServerConfig supports suspension_default_retry_policy field.""" + from openintent.server.config import ServerConfig + + cfg = ServerConfig( + suspension_default_retry_policy={ + "max_attempts": 3, + "interval_seconds": 1800, + "strategy": "linear", + "escalation_ladder": [], + "final_fallback_policy": "fail", + } + ) + assert cfg.suspension_default_retry_policy["max_attempts"] == 3 + assert cfg.suspension_default_retry_policy["interval_seconds"] == 1800 + + def test_server_config_suspension_default_none(self): + """ServerConfig.suspension_default_retry_policy is None by default.""" + from openintent.server.config import ServerConfig + + cfg = ServerConfig() + assert cfg.suspension_default_retry_policy is None + + def test_human_retry_policy_from_dict_roundtrip(self): + """HumanRetryPolicy.from_dict can deserialise a ServerConfig policy dict.""" + from openintent.models import HumanRetryPolicy + + raw = { + "max_attempts": 4, + "interval_seconds": 900, + "strategy": "linear", + "escalation_ladder": [ + { + "after_attempt": 3, + "channel": "pagerduty", + "notify": "ops@example.com", + }, + ], + "final_fallback_policy": "fail", + } + policy = HumanRetryPolicy.from_dict(raw) + assert policy.max_attempts == 4 + assert policy.interval_seconds == 900 + assert len(policy.escalation_ladder) == 1 + assert policy.escalation_ladder[0].channel == "pagerduty" + + def test_expires_at_safeguard_interval_zero(self): + """When interval_seconds=0 and max_attempts=1, timeout_seconds is used for expiry.""" + from datetime import datetime, timedelta + + from openintent.models import HumanRetryPolicy + + p = HumanRetryPolicy(max_attempts=1, interval_seconds=0) + timeout_seconds = 300 + now = datetime.utcnow() + total_seconds = p.interval_seconds * p.max_attempts + if total_seconds > 0: + expires_at = now + timedelta(seconds=total_seconds) + elif timeout_seconds is not None: + expires_at = now + timedelta(seconds=timeout_seconds) + else: + expires_at = None + + assert expires_at is not None + delta = (expires_at - now).total_seconds() + assert abs(delta - 300) < 2 + + def test_renotification_event_payload_fields(self): + """intent.suspension_renotified payload uses RFC-0026 field names.""" + payload = { + "suspension_id": "susp-123", + "attempt": 2, + "max_attempts": 3, + "channel_hint": "email", + "notify_to": None, + "next_attempt_at": "2026-03-24T11:00:00Z", + } + assert "channel_hint" in payload + assert "notify_to" in payload + assert "next_attempt_at" in payload + assert "channel" not in payload + + def test_escalation_event_payload_fields(self): + """intent.suspension_escalated payload uses RFC-0026 field names.""" + payload = { + "suspension_id": "susp-123", + "attempt": 3, + "escalated_to": "supervisor@example.com", + "channel_hint": "pagerduty", + } + assert "escalated_to" in payload + assert "channel_hint" in payload + assert "notify" not in payload + + +class TestMergeRetryPolicies: + """RFC-0026: _merge_retry_policies field-level merge logic.""" + + def test_all_none_returns_none(self): + from openintent.agents import _merge_retry_policies + + result = _merge_retry_policies( + call_site=None, agent_default=None, platform_default=None + ) + assert result is None + + def test_single_policy_returned_as_is(self): + from openintent.agents import _merge_retry_policies + from openintent.models import HumanRetryPolicy + + p = HumanRetryPolicy(max_attempts=5) + result = _merge_retry_policies( + call_site=p, agent_default=None, platform_default=None + ) + assert result is p + + def test_call_site_overrides_platform_max_attempts(self): + from openintent.agents import _merge_retry_policies + from openintent.models import HumanRetryPolicy + + platform = HumanRetryPolicy(max_attempts=3, interval_seconds=3600) + call = HumanRetryPolicy(max_attempts=5) + result = _merge_retry_policies( + call_site=call, agent_default=None, platform_default=platform + ) + assert result is not None + assert result.max_attempts == 5 + assert result.interval_seconds == 3600 + + def test_agent_inherits_platform_interval(self): + from openintent.agents import _merge_retry_policies + from openintent.models import HumanRetryPolicy + + platform = HumanRetryPolicy(max_attempts=3, interval_seconds=1800) + agent = HumanRetryPolicy(max_attempts=2) + result = _merge_retry_policies( + call_site=None, agent_default=agent, platform_default=platform + ) + assert result is not None + assert result.max_attempts == 2 + assert result.interval_seconds == 1800 + + def test_call_site_escalation_ladder_overrides_lower_levels(self): + from openintent.agents import _merge_retry_policies + from openintent.models import EscalationStep, HumanRetryPolicy + + platform = HumanRetryPolicy( + escalation_ladder=[EscalationStep(attempt=2, channel_hint="email")] + ) + call = HumanRetryPolicy( + escalation_ladder=[EscalationStep(attempt=3, channel_hint="pagerduty")] + ) + result = _merge_retry_policies( + call_site=call, agent_default=None, platform_default=platform + ) + assert result is not None + assert len(result.escalation_ladder) == 1 + assert result.escalation_ladder[0].attempt == 3 + assert result.escalation_ladder[0].channel_hint == "pagerduty" + + def test_platform_only_returns_platform(self): + from openintent.agents import _merge_retry_policies + from openintent.models import HumanRetryPolicy + + platform = HumanRetryPolicy(max_attempts=4, interval_seconds=900) + result = _merge_retry_policies( + call_site=None, agent_default=None, platform_default=platform + ) + assert result is not None + assert result.max_attempts == 4 + assert result.interval_seconds == 900 diff --git a/tests/test_workflow_io.py b/tests/test_workflow_io.py new file mode 100644 index 0000000..71484ee --- /dev/null +++ b/tests/test_workflow_io.py @@ -0,0 +1,1591 @@ +"""Tests for RFC-0024: Workflow I/O Contracts. + +Covers: +- Error types (MissingOutputError, OutputTypeMismatchError, UnresolvableInputError, + InputWiringError) — construction, attributes, str representation. +- YAML parsing of ``outputs`` and ``inputs`` on PhaseConfig. +- Parse-time I/O wiring validation (_validate_io_wiring) including invalid + references, missing depends_on, and unknown upstream output keys. +- resolve_task_inputs — upstream phase refs, $trigger.*, $initial_state.*, + and UnresolvableInputError when keys are absent. +- validate_task_outputs — MissingOutputError for absent required keys, + OutputTypeMismatchError for all primitive types and named/enum types, + optional fields, and no-op when outputs schema is absent. +- validate_claim_inputs — delegates to resolve_task_inputs; raises on + unresolvable refs. +- _check_value_type — all six primitive types, named struct type (top-level + key presence), enum type (value membership), unknown named type (accepted). +- Incremental adoption — phases without outputs/inputs are unaffected. +- Package exports — all RFC-0024 symbols exported from openintent top-level. +""" + +import pytest + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_spec(yaml_text: str, tmp_path): + """Write *yaml_text* to a temp file, parse, and return WorkflowSpec.""" + from openintent.workflow import WorkflowSpec + + f = tmp_path / "wf.yaml" + f.write_text(yaml_text) + return WorkflowSpec.from_yaml(str(f)) + + +_BASE = """\ +openintent: "1.0" +info: + name: "IO Test Workflow" +""" + + +# --------------------------------------------------------------------------- +# Error type construction +# --------------------------------------------------------------------------- + + +class TestMissingOutputError: + def test_construction(self): + from openintent.workflow import MissingOutputError + + e = MissingOutputError( + task_id="t1", phase_name="fetch", missing_keys=["revenue"] + ) + assert e.task_id == "t1" + assert e.phase_name == "fetch" + assert e.missing_keys == ["revenue"] + + def test_multiple_missing_keys(self): + from openintent.workflow import MissingOutputError + + e = MissingOutputError( + task_id="t2", phase_name="analyze", missing_keys=["a", "b", "c"] + ) + assert len(e.missing_keys) == 3 + assert "b" in e.missing_keys + + def test_is_workflow_error(self): + from openintent.workflow import MissingOutputError, WorkflowError + + e = MissingOutputError(task_id="", phase_name="p", missing_keys=["x"]) + assert isinstance(e, WorkflowError) + + def test_message_contains_keys(self): + from openintent.workflow import MissingOutputError + + e = MissingOutputError(task_id="t", phase_name="p", missing_keys=["revenue"]) + assert "revenue" in str(e) + + def test_empty_task_id(self): + from openintent.workflow import MissingOutputError + + e = MissingOutputError(task_id="", phase_name="p", missing_keys=["k"]) + assert e.task_id == "" + + def test_export_from_package(self): + import openintent + + assert hasattr(openintent, "MissingOutputError") + + +class TestOutputTypeMismatchError: + def test_construction(self): + from openintent.workflow import OutputTypeMismatchError + + e = OutputTypeMismatchError( + task_id="t1", + phase_name="fetch", + key="revenue", + expected_type="number", + actual_type="str", + ) + assert e.task_id == "t1" + assert e.phase_name == "fetch" + assert e.key == "revenue" + assert e.expected_type == "number" + assert e.actual_type == "str" + + def test_is_workflow_error(self): + from openintent.workflow import OutputTypeMismatchError, WorkflowError + + e = OutputTypeMismatchError( + task_id="", + phase_name="p", + key="k", + expected_type="string", + actual_type="int", + ) + assert isinstance(e, WorkflowError) + + def test_message_contains_key_and_types(self): + from openintent.workflow import OutputTypeMismatchError + + e = OutputTypeMismatchError( + task_id="", + phase_name="p", + key="revenue", + expected_type="number", + actual_type="str", + ) + msg = str(e) + assert "revenue" in msg + + def test_export_from_package(self): + import openintent + + assert hasattr(openintent, "OutputTypeMismatchError") + + +class TestUnresolvableInputError: + def test_construction(self): + from openintent.workflow import UnresolvableInputError + + e = UnresolvableInputError( + task_id="t1", + phase_name="analyze", + unresolvable_refs=["fetch.revenue"], + ) + assert e.task_id == "t1" + assert e.phase_name == "analyze" + assert e.unresolvable_refs == ["fetch.revenue"] + + def test_multiple_refs(self): + from openintent.workflow import UnresolvableInputError + + e = UnresolvableInputError( + task_id="", phase_name="p", unresolvable_refs=["a.x", "b.y"] + ) + assert len(e.unresolvable_refs) == 2 + + def test_is_workflow_error(self): + from openintent.workflow import UnresolvableInputError, WorkflowError + + e = UnresolvableInputError(task_id="", phase_name="p", unresolvable_refs=[]) + assert isinstance(e, WorkflowError) + + def test_export_from_package(self): + import openintent + + assert hasattr(openintent, "UnresolvableInputError") + + +class TestInputWiringError: + def test_construction(self): + from openintent.workflow import InputWiringError + + e = InputWiringError(phase_name="analyze", invalid_refs=["revenue: bad_ref"]) + assert e.phase_name == "analyze" + assert e.invalid_refs == ["revenue: bad_ref"] + + def test_is_workflow_validation_error(self): + from openintent.workflow import InputWiringError, WorkflowValidationError + + e = InputWiringError(phase_name="p", invalid_refs=[]) + assert isinstance(e, WorkflowValidationError) + + def test_suggestion_optional(self): + from openintent.workflow import InputWiringError + + e = InputWiringError(phase_name="p", invalid_refs=[], suggestion="Fix it") + assert e.suggestion == "Fix it" + + def test_export_from_package(self): + import openintent + + assert hasattr(openintent, "InputWiringError") + + +# --------------------------------------------------------------------------- +# YAML parsing of outputs and inputs +# --------------------------------------------------------------------------- + + +class TestWorkflowIOParsing: + def test_outputs_dict_form(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + label: string +""", + tmp_path, + ) + phase = spec.phases[0] + assert phase.outputs == {"revenue": "number", "label": "string"} + + def test_outputs_legacy_list_form(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + - revenue + - label +""", + tmp_path, + ) + phase = spec.phases[0] + assert phase.outputs == {"revenue": "any", "label": "any"} + + def test_outputs_optional_flag(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + notes: + type: string + required: false +""", + tmp_path, + ) + phase = spec.phases[0] + assert phase.outputs["revenue"] == "number" + assert phase.outputs["notes"] == {"type": "string", "required": False} + + def test_inputs_dict_form(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + rev: fetch.revenue +""", + tmp_path, + ) + phase = next(p for p in spec.phases if p.name == "analyze") + assert phase.inputs == {"rev": "fetch.revenue"} + + def test_trigger_input_reference(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + inputs: + quarter: $trigger.quarter +""", + tmp_path, + ) + phase = spec.phases[0] + assert phase.inputs == {"quarter": "$trigger.quarter"} + + def test_initial_state_input_reference(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + inputs: + config: $initial_state.config +""", + tmp_path, + ) + phase = spec.phases[0] + assert phase.inputs == {"config": "$initial_state.config"} + + def test_types_block_stored(self, tmp_path): + spec = _make_spec( + _BASE + + """ +types: + FinancialSummary: + revenue: number + expenses: number + +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + summary: FinancialSummary +""", + tmp_path, + ) + assert "FinancialSummary" in spec.types + assert spec.phases[0].outputs == {"summary": "FinancialSummary"} + + def test_no_outputs_defaults_empty(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + simple: + title: "Simple" + assign: agent +""", + tmp_path, + ) + assert spec.phases[0].outputs == {} + + def test_no_inputs_defaults_empty(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + simple: + title: "Simple" + assign: agent +""", + tmp_path, + ) + assert spec.phases[0].inputs == {} + + +# --------------------------------------------------------------------------- +# Parse-time I/O wiring validation (_validate_io_wiring) +# --------------------------------------------------------------------------- + + +class TestValidateIOWiring: + def test_valid_wiring_passes(self, tmp_path): + _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + rev: fetch.revenue +""", + tmp_path, + ) + + def test_reference_to_nonexistent_phase_raises(self, tmp_path): + from openintent.workflow import InputWiringError + + with pytest.raises(InputWiringError): + _make_spec( + _BASE + + """ +workflow: + analyze: + title: "Analyze" + assign: agent + inputs: + rev: ghost.revenue +""", + tmp_path, + ) + + def test_reference_to_non_dependency_raises(self, tmp_path): + from openintent.workflow import InputWiringError + + with pytest.raises(InputWiringError): + _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + + analyze: + title: "Analyze" + assign: agent + inputs: + rev: fetch.revenue +""", + tmp_path, + ) + + def test_reference_to_undeclared_output_key_raises(self, tmp_path): + from openintent.workflow import InputWiringError + + with pytest.raises(InputWiringError): + _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + x: fetch.nonexistent_key +""", + tmp_path, + ) + + def test_trigger_reference_skips_validation(self, tmp_path): + _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + inputs: + quarter: $trigger.quarter +""", + tmp_path, + ) + + def test_initial_state_reference_skips_validation(self, tmp_path): + _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + inputs: + config: $initial_state.config +""", + tmp_path, + ) + + def test_upstream_without_outputs_declared_skips_key_check(self, tmp_path): + """If upstream has no outputs block, skip key validation (incremental adoption).""" + _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + rev: fetch.any_key_is_ok +""", + tmp_path, + ) + + def test_invalid_syntax_no_dot_raises(self, tmp_path): + from openintent.workflow import InputWiringError + + with pytest.raises(InputWiringError): + _make_spec( + _BASE + + """ +workflow: + analyze: + title: "Analyze" + assign: agent + inputs: + rev: just_a_bare_string +""", + tmp_path, + ) + + def test_multiple_inputs_one_bad_raises(self, tmp_path): + from openintent.workflow import InputWiringError + + with pytest.raises(InputWiringError): + _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + expenses: number + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + rev: fetch.revenue + bad: ghost.whatever +""", + tmp_path, + ) + + +# --------------------------------------------------------------------------- +# resolve_task_inputs +# --------------------------------------------------------------------------- + + +class TestResolveTaskInputs: + def _spec_with_io(self, tmp_path): + return _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + expenses: number + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + rev: fetch.revenue + exp: fetch.expenses +""", + tmp_path, + ) + + def test_resolves_from_upstream(self, tmp_path): + spec = self._spec_with_io(tmp_path) + result = spec.resolve_task_inputs( + phase_name="analyze", + upstream_outputs={"fetch": {"revenue": 1000, "expenses": 200}}, + ) + assert result == {"rev": 1000, "exp": 200} + + def test_no_inputs_returns_empty(self, tmp_path): + spec = self._spec_with_io(tmp_path) + result = spec.resolve_task_inputs( + phase_name="fetch", + upstream_outputs={}, + ) + assert result == {} + + def test_trigger_reference_resolved(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + inputs: + quarter: $trigger.quarter +""", + tmp_path, + ) + result = spec.resolve_task_inputs( + phase_name="fetch", + upstream_outputs={}, + trigger_payload={"quarter": "Q1-2026"}, + ) + assert result == {"quarter": "Q1-2026"} + + def test_initial_state_reference_resolved(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + inputs: + cfg: $initial_state.config +""", + tmp_path, + ) + result = spec.resolve_task_inputs( + phase_name="fetch", + upstream_outputs={}, + initial_state={"config": {"timeout": 30}}, + ) + assert result == {"cfg": {"timeout": 30}} + + def test_missing_upstream_output_raises(self, tmp_path): + from openintent.workflow import UnresolvableInputError + + spec = self._spec_with_io(tmp_path) + with pytest.raises(UnresolvableInputError) as exc_info: + spec.resolve_task_inputs( + phase_name="analyze", + upstream_outputs={"fetch": {"revenue": 1000}}, + ) + err = exc_info.value + assert err.phase_name == "analyze" + assert any("expenses" in r for r in err.unresolvable_refs) + + def test_missing_trigger_key_raises(self, tmp_path): + from openintent.workflow import UnresolvableInputError + + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + inputs: + quarter: $trigger.quarter +""", + tmp_path, + ) + with pytest.raises(UnresolvableInputError): + spec.resolve_task_inputs( + phase_name="fetch", + upstream_outputs={}, + trigger_payload={}, + ) + + def test_missing_initial_state_key_raises(self, tmp_path): + from openintent.workflow import UnresolvableInputError + + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + inputs: + cfg: $initial_state.config +""", + tmp_path, + ) + with pytest.raises(UnresolvableInputError): + spec.resolve_task_inputs( + phase_name="fetch", + upstream_outputs={}, + initial_state={}, + ) + + def test_unknown_phase_raises_key_error(self, tmp_path): + spec = self._spec_with_io(tmp_path) + with pytest.raises(KeyError): + spec.resolve_task_inputs( + phase_name="nonexistent", + upstream_outputs={}, + ) + + def test_task_id_propagated_to_error(self, tmp_path): + from openintent.workflow import UnresolvableInputError + + spec = self._spec_with_io(tmp_path) + with pytest.raises(UnresolvableInputError) as exc_info: + spec.resolve_task_inputs( + phase_name="analyze", + upstream_outputs={}, + task_id="task-abc-123", + ) + assert exc_info.value.task_id == "task-abc-123" + + def test_multiple_unresolvable_collected(self, tmp_path): + from openintent.workflow import UnresolvableInputError + + spec = self._spec_with_io(tmp_path) + with pytest.raises(UnresolvableInputError) as exc_info: + spec.resolve_task_inputs( + phase_name="analyze", + upstream_outputs={}, + ) + assert len(exc_info.value.unresolvable_refs) == 2 + + +# --------------------------------------------------------------------------- +# validate_task_outputs +# --------------------------------------------------------------------------- + + +class TestValidateTaskOutputs: + def _spec_with_outputs(self, tmp_path, outputs_yaml: str): + return _make_spec( + _BASE + + f""" +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + {outputs_yaml} +""", + tmp_path, + ) + + def test_valid_output_passes(self, tmp_path): + spec = self._spec_with_outputs(tmp_path, "revenue: number") + spec.validate_task_outputs("fetch", {"revenue": 1000}) + + def test_missing_required_key_raises(self, tmp_path): + from openintent.workflow import MissingOutputError + + spec = self._spec_with_outputs(tmp_path, "revenue: number") + with pytest.raises(MissingOutputError) as exc_info: + spec.validate_task_outputs("fetch", {}) + assert "revenue" in exc_info.value.missing_keys + + def test_multiple_missing_keys_raises(self, tmp_path): + from openintent.workflow import MissingOutputError + + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + expenses: number +""", + tmp_path, + ) + with pytest.raises(MissingOutputError) as exc_info: + spec.validate_task_outputs("fetch", {}) + assert len(exc_info.value.missing_keys) == 2 + + def test_wrong_type_raises(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = self._spec_with_outputs(tmp_path, "revenue: number") + with pytest.raises(OutputTypeMismatchError) as exc_info: + spec.validate_task_outputs("fetch", {"revenue": "not-a-number"}) + assert exc_info.value.key == "revenue" + assert exc_info.value.expected_type == "number" + + def test_optional_field_absent_passes(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + notes: + type: string + required: false +""", + tmp_path, + ) + spec.validate_task_outputs("fetch", {"revenue": 100}) + + def test_optional_field_present_wrong_type_raises(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + notes: + type: string + required: false +""", + tmp_path, + ) + with pytest.raises(OutputTypeMismatchError): + spec.validate_task_outputs("fetch", {"revenue": 100, "notes": 42}) + + def test_no_outputs_schema_is_noop(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + simple: + title: "Simple" + assign: agent +""", + tmp_path, + ) + spec.validate_task_outputs("simple", {}) + spec.validate_task_outputs("simple", {"anything": "is fine"}) + + def test_extra_keys_ignored(self, tmp_path): + spec = self._spec_with_outputs(tmp_path, "revenue: number") + spec.validate_task_outputs("fetch", {"revenue": 500, "bonus": "extra"}) + + def test_unknown_phase_raises_key_error(self, tmp_path): + spec = self._spec_with_outputs(tmp_path, "revenue: number") + with pytest.raises(KeyError): + spec.validate_task_outputs("nonexistent", {"revenue": 1}) + + def test_task_id_in_error(self, tmp_path): + from openintent.workflow import MissingOutputError + + spec = self._spec_with_outputs(tmp_path, "revenue: number") + with pytest.raises(MissingOutputError) as exc_info: + spec.validate_task_outputs("fetch", {}, task_id="task-xyz") + assert exc_info.value.task_id == "task-xyz" + + +# --------------------------------------------------------------------------- +# _check_value_type — primitive types +# --------------------------------------------------------------------------- + + +class TestCheckValueType: + def _spec(self, tmp_path): + return _make_spec( + _BASE + + """ +workflow: + p: + title: "P" + assign: a +""", + tmp_path, + ) + + def _check(self, spec, expected_type, value, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec.validate_task_outputs.__func__ # just to confirm it's on the class + try: + spec._check_value_type( + task_id="", + phase_name="p", + key="k", + expected_type=expected_type, + value=value, + ) + except OutputTypeMismatchError: + raise + + def test_string_accepts_str(self, tmp_path): + spec = self._spec(tmp_path) + spec._check_value_type("", "p", "k", "string", "hello") + + def test_string_rejects_int(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = self._spec(tmp_path) + with pytest.raises(OutputTypeMismatchError): + spec._check_value_type("", "p", "k", "string", 42) + + def test_number_accepts_int(self, tmp_path): + spec = self._spec(tmp_path) + spec._check_value_type("", "p", "k", "number", 42) + + def test_number_accepts_float(self, tmp_path): + spec = self._spec(tmp_path) + spec._check_value_type("", "p", "k", "number", 3.14) + + def test_number_rejects_string(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = self._spec(tmp_path) + with pytest.raises(OutputTypeMismatchError): + spec._check_value_type("", "p", "k", "number", "42") + + def test_boolean_accepts_true(self, tmp_path): + spec = self._spec(tmp_path) + spec._check_value_type("", "p", "k", "boolean", True) + + def test_boolean_rejects_int_one(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = self._spec(tmp_path) + with pytest.raises(OutputTypeMismatchError): + spec._check_value_type("", "p", "k", "boolean", 1) + + def test_object_accepts_dict(self, tmp_path): + spec = self._spec(tmp_path) + spec._check_value_type("", "p", "k", "object", {"a": 1}) + + def test_object_rejects_list(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = self._spec(tmp_path) + with pytest.raises(OutputTypeMismatchError): + spec._check_value_type("", "p", "k", "object", []) + + def test_array_accepts_list(self, tmp_path): + spec = self._spec(tmp_path) + spec._check_value_type("", "p", "k", "array", [1, 2, 3]) + + def test_array_rejects_dict(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = self._spec(tmp_path) + with pytest.raises(OutputTypeMismatchError): + spec._check_value_type("", "p", "k", "array", {"a": 1}) + + def test_any_accepts_anything(self, tmp_path): + spec = self._spec(tmp_path) + spec._check_value_type("", "p", "k", "any", "anything") + spec._check_value_type("", "p", "k", "any", 42) + spec._check_value_type("", "p", "k", "any", None) + + def test_unknown_named_type_accepts_without_validation(self, tmp_path): + """Named type not in types block is silently accepted (incremental adoption).""" + spec = self._spec(tmp_path) + spec._check_value_type("", "p", "k", "UnknownType", {"anything": True}) + + def test_named_struct_type_accepts_valid_keys(self, tmp_path): + spec = _make_spec( + _BASE + + """ +types: + Summary: + revenue: number + expenses: number + +workflow: + p: + title: "P" + assign: a + outputs: + result: Summary +""", + tmp_path, + ) + spec._check_value_type( + "", "p", "k", "Summary", {"revenue": 100, "expenses": 50} + ) + + def test_named_struct_type_rejects_non_dict(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = _make_spec( + _BASE + + """ +types: + Summary: + revenue: number + +workflow: + p: + title: "P" + assign: a +""", + tmp_path, + ) + with pytest.raises(OutputTypeMismatchError): + spec._check_value_type("", "p", "k", "Summary", "not-a-dict") + + def test_named_struct_missing_key_raises(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = _make_spec( + _BASE + + """ +types: + Summary: + revenue: number + expenses: number + +workflow: + p: + title: "P" + assign: a +""", + tmp_path, + ) + with pytest.raises(OutputTypeMismatchError): + spec._check_value_type("", "p", "k", "Summary", {"revenue": 100}) + + def test_enum_type_accepts_valid_value(self, tmp_path): + spec = _make_spec( + _BASE + + """ +types: + RiskLevel: + enum: [low, medium, high] + +workflow: + p: + title: "P" + assign: a +""", + tmp_path, + ) + spec._check_value_type("", "p", "k", "RiskLevel", "medium") + + def test_enum_type_rejects_invalid_value(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = _make_spec( + _BASE + + """ +types: + RiskLevel: + enum: [low, medium, high] + +workflow: + p: + title: "P" + assign: a +""", + tmp_path, + ) + with pytest.raises(OutputTypeMismatchError): + spec._check_value_type("", "p", "k", "RiskLevel", "critical") + + +# --------------------------------------------------------------------------- +# validate_claim_inputs +# --------------------------------------------------------------------------- + + +class TestValidateClaimInputs: + def test_valid_claim_passes(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + rev: fetch.revenue +""", + tmp_path, + ) + spec.validate_claim_inputs( + phase_name="analyze", + upstream_outputs={"fetch": {"revenue": 999}}, + ) + + def test_unresolvable_raises(self, tmp_path): + from openintent.workflow import UnresolvableInputError + + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + rev: fetch.revenue +""", + tmp_path, + ) + with pytest.raises(UnresolvableInputError): + spec.validate_claim_inputs( + phase_name="analyze", + upstream_outputs={}, + ) + + def test_phase_without_inputs_always_valid(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + simple: + title: "Simple" + assign: agent +""", + tmp_path, + ) + spec.validate_claim_inputs(phase_name="simple", upstream_outputs={}) + + +# --------------------------------------------------------------------------- +# End-to-end output validation via validate_task_outputs with types block +# --------------------------------------------------------------------------- + + +class TestOutputValidationWithTypesBlock: + def test_named_type_valid(self, tmp_path): + spec = _make_spec( + _BASE + + """ +types: + FinancialSummary: + revenue: number + expenses: number + +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + summary: FinancialSummary +""", + tmp_path, + ) + spec.validate_task_outputs( + "fetch", {"summary": {"revenue": 1000, "expenses": 200}} + ) + + def test_named_type_missing_field_raises(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = _make_spec( + _BASE + + """ +types: + FinancialSummary: + revenue: number + expenses: number + +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + summary: FinancialSummary +""", + tmp_path, + ) + with pytest.raises(OutputTypeMismatchError): + spec.validate_task_outputs("fetch", {"summary": {"revenue": 1000}}) + + def test_enum_output_valid(self, tmp_path): + spec = _make_spec( + _BASE + + """ +types: + Status: + enum: [pending, approved, rejected] + +workflow: + review: + title: "Review" + assign: agent + outputs: + status: Status +""", + tmp_path, + ) + spec.validate_task_outputs("review", {"status": "approved"}) + + def test_enum_output_invalid_raises(self, tmp_path): + from openintent.workflow import OutputTypeMismatchError + + spec = _make_spec( + _BASE + + """ +types: + Status: + enum: [pending, approved, rejected] + +workflow: + review: + title: "Review" + assign: agent + outputs: + status: Status +""", + tmp_path, + ) + with pytest.raises(OutputTypeMismatchError): + spec.validate_task_outputs("review", {"status": "unknown"}) + + def test_mixed_required_and_optional(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + outputs: + revenue: number + expenses: number + notes: + type: string + required: false + confidence: + type: number + required: false +""", + tmp_path, + ) + spec.validate_task_outputs("fetch", {"revenue": 1000, "expenses": 200}) + spec.validate_task_outputs( + "fetch", + {"revenue": 1000, "expenses": 200, "notes": "good", "confidence": 0.95}, + ) + + +# --------------------------------------------------------------------------- +# Incremental adoption — workflows without I/O contracts are unaffected +# --------------------------------------------------------------------------- + + +class TestIncrementalAdoption: + def test_workflow_without_io_still_valid(self, tmp_path): + spec = _make_spec( + _BASE + + """ +workflow: + research: + title: "Research" + assign: researcher + + summarize: + title: "Summarize" + assign: summarizer + depends_on: [research] +""", + tmp_path, + ) + assert len(spec.phases) == 2 + for phase in spec.phases: + assert phase.inputs == {} + assert phase.outputs == {} + + def test_partial_io_contract_still_validates(self, tmp_path): + """Only phases that declare outputs need to satisfy them.""" + spec = _make_spec( + _BASE + + """ +workflow: + raw: + title: "Raw" + assign: agent + + typed: + title: "Typed" + assign: agent + depends_on: [raw] + outputs: + result: string +""", + tmp_path, + ) + spec.validate_task_outputs("raw", {}) + spec.validate_task_outputs("typed", {"result": "done"}) + + def test_upstream_without_outputs_unblocks_downstream(self, tmp_path): + """Upstream with no declared outputs can still be referenced in inputs.""" + spec = _make_spec( + _BASE + + """ +workflow: + fetch: + title: "Fetch" + assign: agent + + analyze: + title: "Analyze" + assign: agent + depends_on: [fetch] + inputs: + data: fetch.anything +""", + tmp_path, + ) + result = spec.resolve_task_inputs( + "analyze", upstream_outputs={"fetch": {"anything": [1, 2, 3]}} + ) + assert result == {"data": [1, 2, 3]} + + +# --------------------------------------------------------------------------- +# Package export surface +# --------------------------------------------------------------------------- + + +class TestRFC0024Exports: + def test_missing_output_error_exported(self): + import openintent + + assert hasattr(openintent, "MissingOutputError") + + def test_output_type_mismatch_error_exported(self): + import openintent + + assert hasattr(openintent, "OutputTypeMismatchError") + + def test_unresolvable_input_error_exported(self): + import openintent + + assert hasattr(openintent, "UnresolvableInputError") + + def test_input_wiring_error_exported(self): + import openintent + + assert hasattr(openintent, "InputWiringError") + + def test_errors_are_instantiable_from_package(self): + import openintent + + e = openintent.MissingOutputError( + task_id="t", phase_name="p", missing_keys=["k"] + ) + assert isinstance(e, openintent.WorkflowError) + + def test_unresolvable_is_instantiable_from_package(self): + import openintent + + e = openintent.UnresolvableInputError( + task_id="t", phase_name="p", unresolvable_refs=[] + ) + assert isinstance(e, openintent.WorkflowError) + + def test_workflow_spec_still_exported(self): + import openintent + + assert hasattr(openintent, "WorkflowSpec") + + def test_phase_config_still_exported(self): + import openintent + + assert hasattr(openintent, "PhaseConfig") + + +# =========================================================================== +# RFC-0026: UpstreamIntentSuspendedError +# =========================================================================== + + +class TestUpstreamIntentSuspendedError: + """UpstreamIntentSuspendedError — construction and attributes.""" + + def test_construction_basic(self): + from openintent.workflow import UpstreamIntentSuspendedError + + e = UpstreamIntentSuspendedError( + task_id="task_01", + phase_name="run_analysis", + suspended_intent_id="intent_abc", + ) + assert e.task_id == "task_01" + assert e.phase_name == "run_analysis" + assert e.suspended_intent_id == "intent_abc" + assert e.expected_resume_at is None + + def test_construction_with_resume_estimate(self): + from openintent.workflow import UpstreamIntentSuspendedError + + e = UpstreamIntentSuspendedError( + task_id="task_02", + phase_name="generate_report", + suspended_intent_id="intent_xyz", + expected_resume_at="2026-03-24T15:00:00Z", + ) + assert e.expected_resume_at == "2026-03-24T15:00:00Z" + + def test_message_contains_intent_id(self): + from openintent.workflow import UpstreamIntentSuspendedError + + e = UpstreamIntentSuspendedError( + task_id="t", phase_name="p", suspended_intent_id="intent_abc" + ) + assert "intent_abc" in str(e) + + def test_message_contains_resume_estimate(self): + from openintent.workflow import UpstreamIntentSuspendedError + + e = UpstreamIntentSuspendedError( + task_id="t", + phase_name="p", + suspended_intent_id="intent_abc", + expected_resume_at="2026-03-24T15:00:00Z", + ) + assert "2026-03-24T15:00:00Z" in str(e) + + def test_is_workflow_error(self): + from openintent.workflow import UpstreamIntentSuspendedError, WorkflowError + + e = UpstreamIntentSuspendedError( + task_id="t", phase_name="p", suspended_intent_id="i" + ) + assert isinstance(e, WorkflowError) + + def test_is_exception(self): + from openintent.workflow import UpstreamIntentSuspendedError + + e = UpstreamIntentSuspendedError( + task_id="t", phase_name="p", suspended_intent_id="i" + ) + assert isinstance(e, Exception) + + def test_can_be_caught_as_workflow_error(self): + from openintent.workflow import UpstreamIntentSuspendedError, WorkflowError + + with pytest.raises(WorkflowError): + raise UpstreamIntentSuspendedError( + task_id="t", phase_name="p", suspended_intent_id="i" + ) + + def test_exported_from_openintent_package(self): + import openintent + + assert hasattr(openintent, "UpstreamIntentSuspendedError") + + def test_instantiable_from_package(self): + import openintent + + e = openintent.UpstreamIntentSuspendedError( + task_id="t", phase_name="p", suspended_intent_id="i" + ) + assert isinstance(e, openintent.WorkflowError) + + +class TestValidateClaimInputsUpstreamSuspension: + """validate_claim_inputs() raises UpstreamIntentSuspendedError when upstream is suspended (RFC-0026).""" + + _WORKFLOW = """\ +openintent: "1.0" +info: + name: "Suspension Test" +workflow: + fetch: + title: "Fetch" + assign: agent-a + outputs: + data: string + process: + title: "Process" + assign: agent-b + depends_on: [fetch] + inputs: + processed_data: fetch.data + outputs: + result: string +""" + + def test_raises_when_upstream_suspended(self, tmp_path): + from openintent.workflow import UpstreamIntentSuspendedError, WorkflowSpec + + spec = WorkflowSpec.from_string(self._WORKFLOW) + with pytest.raises(UpstreamIntentSuspendedError) as exc_info: + spec.validate_claim_inputs( + phase_name="process", + upstream_outputs={"fetch": {"data": "hello"}}, + task_id="task_process", + upstream_intents_status={ + "fetch": { + "status": "suspended_awaiting_input", + "intent_id": "intent_fetch_001", + "expected_resume_at": None, + } + }, + ) + e = exc_info.value + assert e.task_id == "task_process" + assert e.phase_name == "process" + assert e.suspended_intent_id == "intent_fetch_001" + assert e.expected_resume_at is None + + def test_raises_with_resume_estimate(self, tmp_path): + from openintent.workflow import UpstreamIntentSuspendedError, WorkflowSpec + + spec = WorkflowSpec.from_string(self._WORKFLOW) + with pytest.raises(UpstreamIntentSuspendedError) as exc_info: + spec.validate_claim_inputs( + phase_name="process", + upstream_outputs={"fetch": {"data": "hello"}}, + task_id="task_process", + upstream_intents_status={ + "fetch": { + "status": "suspended_awaiting_input", + "intent_id": "intent_fetch_001", + "expected_resume_at": "2026-03-24T15:00:00Z", + } + }, + ) + assert exc_info.value.expected_resume_at == "2026-03-24T15:00:00Z" + + def test_does_not_raise_when_upstream_active(self, tmp_path): + from openintent.workflow import WorkflowSpec + + spec = WorkflowSpec.from_string(self._WORKFLOW) + # Should not raise — upstream is active, outputs available + result = spec.validate_claim_inputs( + phase_name="process", + upstream_outputs={"fetch": {"data": "hello"}}, + task_id="task_process", + upstream_intents_status={ + "fetch": { + "status": "active", + "intent_id": "intent_fetch_001", + "expected_resume_at": None, + } + }, + ) + assert result is None + + def test_does_not_raise_when_status_map_absent(self, tmp_path): + from openintent.workflow import WorkflowSpec + + spec = WorkflowSpec.from_string(self._WORKFLOW) + # Backwards compat: if no upstream_intents_status provided, behaves as before + result = spec.validate_claim_inputs( + phase_name="process", + upstream_outputs={"fetch": {"data": "hello"}}, + task_id="task_process", + ) + assert result is None + + def test_trigger_refs_ignored(self, tmp_path): + """$trigger.* references should not trigger suspension check.""" + from openintent.workflow import WorkflowSpec + + workflow_yaml = """\ +openintent: "1.0" +info: + name: "Trigger Test" +workflow: + process: + title: "Process" + assign: agent-b + inputs: + quarter: $trigger.quarter +""" + spec = WorkflowSpec.from_string(workflow_yaml) + # Should not raise even if upstream_intents_status maps $trigger somehow + result = spec.validate_claim_inputs( + phase_name="process", + upstream_outputs={}, + trigger_payload={"quarter": "Q1"}, + task_id="task_process", + upstream_intents_status={ + "trigger": { + "status": "suspended_awaiting_input", + "intent_id": "intent_trigger", + "expected_resume_at": None, + } + }, + ) + assert result is None