diff --git a/docs/markform-apis.md b/docs/markform-apis.md index c2244c5c..05940ca3 100644 --- a/docs/markform-apis.md +++ b/docs/markform-apis.md @@ -148,6 +148,15 @@ Each patch has an `op` and `fieldId`. | `skip_field` | optional | `{ "op": "skip_field", "fieldId": "notes", "reason": "Not applicable" }` | | `abort_field` | any | `{ "op": "abort_field", "fieldId": "data", "reason": "Unable to find" }` | +**Agent fill contract:** +- For AI fill loops, use `skip_field` / `abort_field` operations for missing data. +- `%SKIP%` / `%ABORT%` in scalar `set_string` / `set_url` / `set_date` values are + tolerated and coerced to `skip_field` / `abort_field` (with a warning), but this is + compatibility behavior, not preferred usage. +- Sentinels in list-item `set_*_list` values remain rejected. +- Sentinel values remain valid in canonical serialized form content and round-trip + (`parse -> serialize -> parse`) as field-state metadata. + ### Checkbox Values For `set_checkboxes`, values depend on the checkbox mode: @@ -215,6 +224,10 @@ const result = await fillForm({ | `additionalTools` | `Record` | `undefined` | Custom tools for agent | | `recordFill` | `boolean` | (required) | Collect detailed FillRecord with timeline and stats | +**Prompt contract note:** model-visible harness prompts sanitize sentinel literals and +omit YAML frontmatter from the embedded form markdown. +On-disk form serialization is unchanged. + ### Parallel Execution When a form uses `parallel` attributes on groups, you can enable concurrent execution: diff --git a/docs/markform-reference.md b/docs/markform-reference.md index 8d3431e9..c84aec55 100644 --- a/docs/markform-reference.md +++ b/docs/markform-reference.md @@ -859,6 +859,11 @@ When designing a form, match each piece of data to the most specific field kind: - Use `checkboxMode="explicit"` when every item needs a yes/no answer - Add `role="user"` for human-provided inputs, `role="agent"` for AI-researched data - Include `` blocks to guide agents on format and sources +- In agent instructions, tell models to use `skip_field` / `abort_field` operations. + Do not instruct `%SKIP%` / `%ABORT%` literal tokens for patch values. +- Runtime compatibility: scalar `set_string` / `set_url` / `set_date` sentinel literals + are tolerated and coerced to `skip_field` / `abort_field`, but list-item sentinels are + rejected. - Organize related fields into `` blocks ## Best Practices diff --git a/docs/project/specs/active/plan-2026-02-25-skip-sentinel-handling.md b/docs/project/specs/active/plan-2026-02-25-skip-sentinel-handling.md new file mode 100644 index 00000000..73c2eb19 --- /dev/null +++ b/docs/project/specs/active/plan-2026-02-25-skip-sentinel-handling.md @@ -0,0 +1,266 @@ +--- +title: Skip Sentinel and Prompt Context Hygiene in LLM Fill Loops +description: Reduce skip/reject loops and prompt ambiguity by sanitizing sentinels and removing YAML front matter from model-visible form context +author: Codex (GPT-5) +--- +# Feature: Skip Sentinel and Prompt Context Hygiene in LLM Fill Loops + +**Date:** 2026-02-25 (last updated 2026-02-26) + +**Author:** Codex (GPT-5) + +**Status:** Implemented (PR #169) + +## Overview + +Markform currently has a confusing split: +- Form serialization and docs use sentinel strings like `%SKIP%` and `%ABORT%` +- Patch application requires explicit meta operations (`skip_field`, `abort_field`) + +In LLM fill loops, contradictory instructions and raw serialized form state can cause +models to emit `%SKIP%` in `set_*` patch values, which are then rejected. +This creates avoidable retry loops. + +This plan assumes we control the downstream form authoring surface and can update forms +and role instructions directly. + +Separately, the embedded literal form currently includes YAML front matter in +model-visible context. +We already inject role/form instructions into prompt body sections, so front matter +content can be redundant or confusing. +This plan verifies prompt body sufficiency and removes front matter from embedded form +context shown to agents. + +## Goals + +- Eliminate most `%SKIP%`/`%ABORT%` sentinel-in-value rejection loops caused by prompt + text +- Establish one clear AI-fill contract: `skip_field`/`abort_field` operations only +- Preserve current canonical file format +- Preserve full sentinel round-trip behavior in form content + (`parse -> state -> serialize`) +- Ensure agent prompt body contains all required instructions without relying on YAML + front matter +- Remove YAML front matter from model-visible embedded literal form context +- Keep required-field protection intact (`skip_field` must still fail on required + fields) + +## Non-Goals + +- Changing persisted markdown/YAML/JSON serialization format +- Removing sentinel parsing support from the engine +- Relaxing required-field rules +- Adding broad/ambiguous patch canonicalization (for example list-item sentinel rewrite) +- Changing persisted front matter on disk + +## Background + +Observed behavior from QA runs: +- Prompts can simultaneously instruct `skip_field` and `%SKIP% (reason)` +- Models copy `%SKIP%` literals into `set_*` values +- `applyPatches()` rejects these values by design, causing extra turns and noise + +This is amplified for complex forms because: +- Multiple instruction surfaces exist (form docs, role instructions, field docs, + retries) +- Large context payloads increase chance of copying literal sentinel tokens + +Related context issue: +- `buildContextPrompt()` embeds `serializeForm(form)` as markdown, including YAML front + matter +- Models may over-index on frontmatter metadata even when explicit body instructions + already exist + +Current wire-level evidence in checked-in goldens: +- `packages/markform/examples/simple/simple-with-skips.session.yaml:1864-1874` shows + YAML front matter inside model-visible context prompt +- `packages/markform/examples/simple/simple-with-skips.session.yaml:1907-1910` and + `2038-2041` show `%SKIP%` literals in model-visible context prompt + +## Design + +### Approach + +Use a single up-front design: + +1. **Authoring alignment (A)**: remove `%SKIP%`/`%ABORT%` guidance from form/role/field + instructions. +2. **Harness sanitization (B)**: never show raw sentinel literals in model-facing + prompt/context text. +3. **Prompt-content contract**: make body instructions self-sufficient, then remove YAML + front matter from model-visible embedded form markdown. + +Reject broad `Approach C` canonicalization for now because it can hide instruction bugs. +Adopt a narrow compatibility coercion for unambiguous scalar `set_string` / `set_url` / +`set_date` sentinel values to `skip_field` / `abort_field`. + +This intentionally keeps a dual-surface model: +- **Form content surface** (markdown/state): sentinels remain valid for round-trip and + interoperability. +- **Agent patch surface** (fill operations): agents should use explicit operations, not + embedded sentinels. +- **Prompt-display surface** (model-visible context): include instructions and current + field state, but omit YAML front matter once body-complete guidance is confirmed. + +Risk profile: +- Low risk to harness behavior because changes are isolated to prompt construction and + model-visible text transformations. +- Narrow patch-application change only for scalar sentinel compatibility coercion. +- Wire/session goldens will make every prompt-level byte change explicit in review. + +### Decision Matrix + +| Approach | Complex-Form Fit | Risk | Recommendation | +| --- | --- | --- | --- | +| A: form-content hygiene only | Medium (necessary but misses serialized-context leakage) | Low | Adopt with B | +| B: harness prompt sanitization | High (eliminates sentinel leakage to model) | Low | **Adopt** | +| C: patch canonicalization | Low for this strategy (unneeded semantic rewrite) | Medium | Do not adopt now | + +### File and Line-Level Implementation Map + +| File | Line-level touch points | Planned change | +| --- | --- | --- | +| `packages/markform/src/harness/liveAgent.ts` | `461-513` (`buildSystemPrompt`) | Apply sentinel-text sanitizer to form-level docs, role instructions, and field instruction snippets before joining sections. | +| `packages/markform/src/harness/liveAgent.ts` | `526-640` (`buildContextPrompt`) | Sanitize previous rejection messages (`542-555`), sanitize serialized-form payload, and replace `serializeForm(form)` insertion (`567`) with stripped-frontmatter + sanitized content. | +| `packages/markform/src/harness/liveAgent.ts` | `888-899` (`buildMockWireFormat`) | Keep wire generation path unchanged except inheriting new prompt-builder behavior, so golden sessions capture exact prompt diffs. | +| `packages/markform/src/harness/liveAgent.ts` | new helpers near context-building section | Add small pure helpers used only for prompt display: `sanitizeSentinelLiteralsForPrompt(text)` and `stripYamlFrontmatterForPrompt(markdown)`. | +| `packages/markform/src/harness/prompts.ts` | `20-79`, `254-257` | Confirm body instructions remain self-sufficient after frontmatter removal; adjust wording only if audit finds missing guidance. | +| `packages/markform/src/engine/apply.ts` | `129-230`, `268-338`, `383-397`, `485` | Add scalar sentinel coercion (`set_string` / `set_url` / `set_date` -> `skip_field` / `abort_field`) with warning; keep list-item sentinel rejection and required-field skip rejection as invariants. | +| `packages/markform/src/engine/serialize.ts` | `337-340`, `1077-1080`, `1505` | No behavior change; keep sentinel round-trip behavior in form serialization. | +| `packages/markform/tests/unit/harness/liveAgent.test.ts` | file add/expand test blocks | Add focused tests around `buildMockWireFormat()` output for model-visible system/context prompts. | +| `packages/markform/tests/golden/helpers.ts` | `136-153` | Keep wire/context capture unchanged; this is the primary mechanism that snapshots prompt text at session level. | +| `packages/markform/scripts/regen-golden-sessions.ts` | `56-73` (`SESSIONS`) | Add one minimal prompt-hygiene scenario to generate a compact, reviewable wire/session golden. | +| `packages/markform/tests/golden/validation.test.ts` | `39-100` (`MUTATIONS`) | Add mutations specifically for frontmatter leakage and sentinel-literal leakage in prompt text. | + +### Components + +- `packages/markform/src/harness/liveAgent.ts` + - Sanitize sentinel literals in: + - composed system prompt sections + - serialized form markdown inserted into context prompt + - prior rejection/error text shown back to model + - Remove YAML front matter from embedded literal form markdown in context prompt +- `packages/markform/src/harness/prompts.ts` and related harness helpers + - Keep patch guidance centered on `skip_field`/`abort_field` + - Ensure body instructions cover all agent-required behavior currently implied by + front matter +- Downstream form definitions (controlled authoring surface) + - Remove `%SKIP%`/`%ABORT%` textual instructions from role/form docs + +### API Changes + +No patch API semantic changes. + +Optional implementation choice: +- internal-only harness setting to disable sanitization for debugging +- default production behavior remains sanitization enabled + +## Implementation Plan + +### Task List (Implementation-Ready) + +- [x] Add prompt-only sanitizer helpers in `liveAgent.ts`: + - `%SKIP% (reason)` -> `(skipped: reason)` + - `%SKIP%` -> `(skipped)` + - `%ABORT% (reason)` -> `(aborted: reason)` + - `%ABORT%` -> `(aborted)` +- [x] Add frontmatter-strip helper for model-visible context markdown only (do not alter + persisted form serialization). +- [x] Apply helpers in `buildSystemPrompt()` and `buildContextPrompt()` at the exact + callsites in the line map above. +- [x] Audit prompt-body sufficiency: + - verify role/form/field instruction text appears in system prompt even when context + prompt no longer includes frontmatter block. +- [x] Keep serialization behavior unchanged (`serialize.ts`) and add narrow scalar + sentinel compatibility coercion in `apply.ts`. +- [x] Add/extend unit tests in `tests/unit/harness/liveAgent.test.ts` to assert: + - model-visible prompts contain no `%SKIP%`/`%ABORT%` literals + - context prompt omits leading YAML front matter + - role instructions still appear in system prompt +- [x] Add a minimal prompt-hygiene golden scenario: + - new tiny form fixture containing + - frontmatter with `role_instructions` + - at least one skipped/aborted value in serialized form state + - regenerate session so wire request prompt is captured in git +- [x] Extend golden sensitivity tests (`validation.test.ts`) with prompt-hygiene + mutations: + - inject `%SKIP%` literal into wire request prompt + - inject YAML frontmatter marker `---` at the start of embedded markdown block + - verify mutations are detected +- [x] Update docs to state: + - sentinels are valid serialization artifacts + - agents must use `skip_field`/`abort_field` + - YAML frontmatter is excluded from model-visible context prompt contract + +## Testing Strategy + +- Unit tests for prompt sanitization behavior and replacement formatting +- Unit tests for `liveAgent` prompt builders ensuring no `%SKIP%`/`%ABORT%` leak into + model-visible prompt text +- Unit tests for prompt builders ensuring YAML front matter is absent from embedded form + markdown +- Regression tests verifying `applyPatches()` coerces scalar embedded sentinel values to + meta operations, while list-item sentinels are still rejected +- Regression tests verifying sentinel round-trip still works for literal form responses +- Regression tests showing equivalent fill behavior with and without prompt-side front + matter display +- Golden/e2e fill-session test that exercises: + - serialized form context containing skipped/aborted responses + - agent turns producing valid `skip_field`/`abort_field` patches + - context prompt without YAML front matter + - final `parse -> serialize -> parse` equivalence for sentinel-bearing responses +- Golden wire-level assertions must verify: + - `turns[].wire.request.system` contains explicit skip operation guidance + - `turns[].wire.request.prompt` omits frontmatter block and sentinel literals + - `turns[].context.context_prompt` matches the same transformed prompt contract +- Reproduction fixture test from downstream-like contradictory instruction setup: + - Verify sentinel-in-value rejection is eliminated in first N turns after authoring + + sanitization changes + +### Golden Workflow (Explicit Before/After Diff) + +Implementation should intentionally rely on git-visible golden diffs instead of staged +rollout: + +1. Add/refresh minimal session fixture and run + `pnpm --filter markform test:golden:regen` +2. Implement prompt-hygiene code changes. +3. Re-run `pnpm --filter markform test:golden:regen`. +4. Review `git diff packages/markform/examples/**/*.session.yaml`: + - before/after prompt text change is explicit in wire/context sections + - frontmatter removal and sentinel sanitization are visible as textual diffs +5. Run `pnpm --filter markform test:golden` and full test suite. + +## Acceptance Criteria + +- Model-visible system/context prompts contain no `%SKIP%`/`%ABORT%` literals by + default. +- Model-visible embedded form markdown omits YAML front matter. +- Prompt body still contains all required role/form guidance after front matter removal. +- Serialization/export behavior and existing sentinel-friendly file format remain + unchanged. +- Literal sentinel form responses still round-trip correctly (including reason text). +- Golden/e2e fill-session test passes with sentinel-bearing forms. +- Reproduction runs no longer show sentinel-in-`set_*` rejection loops. +- Required-field skip errors continue to fire correctly. +- Scalar sentinel `set_*` compatibility inputs (`set_string` / `set_url` / `set_date`) + are coerced to `skip_field` / `abort_field` with warnings. +- Golden session diff for the prompt-hygiene fixture clearly shows: + - removed frontmatter in embedded prompt markdown + - sanitized sentinel display text + - no unintended harness behavior drift outside prompt text and expected hashes + +## Resolved Decisions + +- Use one provider-agnostic sanitizer transform in harness prompt builders. +- Omit frontmatter unconditionally in model-visible embedded form markdown. +- Keep a dedicated prompt-hygiene fixture directory (`examples/prompt-hygiene/`) for + explicit golden diffs. + +## References + +- `packages/markform/src/harness/liveAgent.ts` +- `packages/markform/src/engine/apply.ts` +- `packages/markform/src/engine/serialize.ts` +- `packages/markform/src/engine/parseSentinels.ts` +- `docs/project/specs/active/plan-2026-02-10-show-skip-reason-in-view.md` diff --git a/packages/markform/examples/prompt-hygiene/prompt-hygiene-mock-filled.form.md b/packages/markform/examples/prompt-hygiene/prompt-hygiene-mock-filled.form.md new file mode 100644 index 00000000..1ada34a1 --- /dev/null +++ b/packages/markform/examples/prompt-hygiene/prompt-hygiene-mock-filled.form.md @@ -0,0 +1,38 @@ +--- +markform: + spec: MF/0.1 + title: Prompt Hygiene Fixture + roles: + - agent + role_instructions: + agent: If unavailable, use %SKIP% (No evidence) or %ABORT% (Hard failure). +--- + + + + +Fallback guidance: use %SKIP:No evidence% when data is unavailable. + + + + + +```value +Acme Corp +``` + + + +```value +%SKIP% (Not provided at this stage) +``` + + + +Escalate with %ABORT(timeout)% if blocked. + + + + + + diff --git a/packages/markform/examples/prompt-hygiene/prompt-hygiene.form.md b/packages/markform/examples/prompt-hygiene/prompt-hygiene.form.md new file mode 100644 index 00000000..7dc9cb6d --- /dev/null +++ b/packages/markform/examples/prompt-hygiene/prompt-hygiene.form.md @@ -0,0 +1,34 @@ +--- +markform: + spec: MF/0.1 + title: Prompt Hygiene Fixture + roles: + - agent + role_instructions: + agent: If unavailable, use %SKIP% (No evidence) or %ABORT% (Hard failure). +--- + + + + +Fallback guidance: use %SKIP:No evidence% when data is unavailable. + + + + + + + +```value +%SKIP% (Not provided at this stage) +``` + + + +Escalate with %ABORT(timeout)% if blocked. + + + + + + diff --git a/packages/markform/examples/prompt-hygiene/prompt-hygiene.session.yaml b/packages/markform/examples/prompt-hygiene/prompt-hygiene.session.yaml new file mode 100644 index 00000000..885eea9e --- /dev/null +++ b/packages/markform/examples/prompt-hygiene/prompt-hygiene.session.yaml @@ -0,0 +1,301 @@ +session_version: 0.1.0 +mode: mock +form: + path: prompt-hygiene.form.md +mock: + completed_mock: prompt-hygiene-mock-filled.form.md +harness: + max_turns: 100 + max_patches_per_turn: 20 + max_issues_per_turn: 10 + target_roles: + - '*' + fill_mode: continue +turns: + - turn: 1 + inspect: + issues: + - ref: company_name + scope: field + reason: required_missing + message: Required field "Company Name" is empty + severity: required + priority: 1 + apply: + patches: + - op: set_string + field_id: company_name + value: Acme Corp + after: + required_issue_count: 0 + markdown_sha256: e8a80a0b8aa047fe9efe0083ca8c9f87f59ced7ef6ad066343ff42781b560fa8 + answered_field_count: 1 + skipped_field_count: 1 + context: + system_prompt: |- + # Form Instructions + + Research and fill the form fields using all available tools. Focus on accuracy over completeness. + + ## Guidelines + 1. Address required fields first (severity: "required"), then optional fields (severity: "recommended") + 2. NEVER fabricate or guess information - only use data you can verify + 3. If you cannot find verifiable information, use skip_field with a reason + + ## Patch Format Examples + + Use the fill_form tool with patches in these formats: + + | Type | Example | + |------|---------| + | string | `{ op: "set_string", fieldId: "name", value: "Acme Corp" }` | + | number | `{ op: "set_number", fieldId: "age", value: 32 }` | + | string_list | `{ op: "set_string_list", fieldId: "tags", value: ["ai", "ml"] }` | + | url | `{ op: "set_url", fieldId: "website", value: "https://example.com" }` | + | url_list | `{ op: "set_url_list", fieldId: "sources", value: ["https://a.com", "https://b.com"] }` | + | date | `{ op: "set_date", fieldId: "event_date", value: "2024-06-15" }` | + | year | `{ op: "set_year", fieldId: "founded", value: 2024 }` | + | single_select | `{ op: "set_single_select", fieldId: "priority", value: "high" }` | + | multi_select | `{ op: "set_multi_select", fieldId: "categories", value: ["frontend", "backend"] }` | + | checkboxes | `{ op: "set_checkboxes", fieldId: "tasks", value: { "task1": "done", "task2": "todo" } }` | + | table | `{ op: "set_table", fieldId: "team", value: [{ "name": "Alice", "role": "Engineer" }] }` | + + ## Incremental Operations (for collections) + + Use these to add/remove items without replacing the entire collection: + + | Type | Example | + |------|---------| + | append_table | `{ op: "append_table", fieldId: "team", value: [{ "name": "Bob", "role": "PM" }] }` | + | append_string_list | `{ op: "append_string_list", fieldId: "tags", value: ["new_tag"] }` | + | append_url_list | `{ op: "append_url_list", fieldId: "sources", value: ["https://new.com"] }` | + | delete_table | `{ op: "delete_table", fieldId: "team", value: 0 }` (0-based row index) | + | delete_string_list | `{ op: "delete_string_list", fieldId: "tags", value: 0 }` (0-based item index) | + | delete_url_list | `{ op: "delete_url_list", fieldId: "sources", value: 0 }` (0-based item index) | + + ## Important: checkboxes vs multi_select + + These two types look similar but have DIFFERENT value formats: + + - **multi_select** → array of option IDs: `["opt1", "opt2"]` + - **checkboxes** → object mapping IDs to states: `{ "opt1": "done", "opt2": "todo" }` + + **Checkbox states by mode:** + - Mode "simple": `"done"` or `"todo"` + - Mode "multi": `"done"`, `"todo"`, `"incomplete"`, `"active"`, or `"na"` + - Mode "explicit": `"yes"` or `"no"` (if unknown, use abort_field) + + **WRONG:** `{ op: "set_checkboxes", value: ["task1", "task2"] }` + **RIGHT:** `{ op: "set_checkboxes", value: { "task1": "done", "task2": "done" } }` + + ## Skipping Fields + + If you cannot find verifiable information: + `{ op: "skip_field", fieldId: "...", reason: "Could not find verified data" }` + + + # Form Instructions + Fallback guidance: use (skipped: No evidence) when data is unavailable. + + # Instructions for agent role + If unavailable, use (skipped: No evidence) or (aborted: Hard failure). + context_prompt: |- + # Current Form State + + Below is the complete form with all currently filled values. + Fields marked with `[ ]` or empty values still need to be filled. + + ```markdown + + + + + Fallback guidance: use (skipped: No evidence) when data is unavailable. + + + + + + + + ```value + (skipped: Not provided at this stage) + ``` + + + + Escalate with (aborted: timeout) if blocked. + + + + + + + + ``` + + # Current Form Issues + + You need to address 1 issue. Here are the current issues: + + - **company_name** (field): Required field "Company Name" is empty + Severity: required, Priority: P1 + Type: string + Set: { op: "set_string", fieldId: "company_name", value: "text here" } + This field is required. + + # General Instructions + + Use the fill_form tool to submit patches for the fields above. + For table fields, each row is an object with column ID keys. + wire: + request: + system: |- + # Form Instructions + + Research and fill the form fields using all available tools. Focus on accuracy over completeness. + + ## Guidelines + 1. Address required fields first (severity: "required"), then optional fields (severity: "recommended") + 2. NEVER fabricate or guess information - only use data you can verify + 3. If you cannot find verifiable information, use skip_field with a reason + + ## Patch Format Examples + + Use the fill_form tool with patches in these formats: + + | Type | Example | + |------|---------| + | string | `{ op: "set_string", fieldId: "name", value: "Acme Corp" }` | + | number | `{ op: "set_number", fieldId: "age", value: 32 }` | + | string_list | `{ op: "set_string_list", fieldId: "tags", value: ["ai", "ml"] }` | + | url | `{ op: "set_url", fieldId: "website", value: "https://example.com" }` | + | url_list | `{ op: "set_url_list", fieldId: "sources", value: ["https://a.com", "https://b.com"] }` | + | date | `{ op: "set_date", fieldId: "event_date", value: "2024-06-15" }` | + | year | `{ op: "set_year", fieldId: "founded", value: 2024 }` | + | single_select | `{ op: "set_single_select", fieldId: "priority", value: "high" }` | + | multi_select | `{ op: "set_multi_select", fieldId: "categories", value: ["frontend", "backend"] }` | + | checkboxes | `{ op: "set_checkboxes", fieldId: "tasks", value: { "task1": "done", "task2": "todo" } }` | + | table | `{ op: "set_table", fieldId: "team", value: [{ "name": "Alice", "role": "Engineer" }] }` | + + ## Incremental Operations (for collections) + + Use these to add/remove items without replacing the entire collection: + + | Type | Example | + |------|---------| + | append_table | `{ op: "append_table", fieldId: "team", value: [{ "name": "Bob", "role": "PM" }] }` | + | append_string_list | `{ op: "append_string_list", fieldId: "tags", value: ["new_tag"] }` | + | append_url_list | `{ op: "append_url_list", fieldId: "sources", value: ["https://new.com"] }` | + | delete_table | `{ op: "delete_table", fieldId: "team", value: 0 }` (0-based row index) | + | delete_string_list | `{ op: "delete_string_list", fieldId: "tags", value: 0 }` (0-based item index) | + | delete_url_list | `{ op: "delete_url_list", fieldId: "sources", value: 0 }` (0-based item index) | + + ## Important: checkboxes vs multi_select + + These two types look similar but have DIFFERENT value formats: + + - **multi_select** → array of option IDs: `["opt1", "opt2"]` + - **checkboxes** → object mapping IDs to states: `{ "opt1": "done", "opt2": "todo" }` + + **Checkbox states by mode:** + - Mode "simple": `"done"` or `"todo"` + - Mode "multi": `"done"`, `"todo"`, `"incomplete"`, `"active"`, or `"na"` + - Mode "explicit": `"yes"` or `"no"` (if unknown, use abort_field) + + **WRONG:** `{ op: "set_checkboxes", value: ["task1", "task2"] }` + **RIGHT:** `{ op: "set_checkboxes", value: { "task1": "done", "task2": "done" } }` + + ## Skipping Fields + + If you cannot find verifiable information: + `{ op: "skip_field", fieldId: "...", reason: "Could not find verified data" }` + + + # Form Instructions + Fallback guidance: use (skipped: No evidence) when data is unavailable. + + # Instructions for agent role + If unavailable, use (skipped: No evidence) or (aborted: Hard failure). + prompt: |- + # Current Form State + + Below is the complete form with all currently filled values. + Fields marked with `[ ]` or empty values still need to be filled. + + ```markdown + + + + + Fallback guidance: use (skipped: No evidence) when data is unavailable. + + + + + + + + ```value + (skipped: Not provided at this stage) + ``` + + + + Escalate with (aborted: timeout) if blocked. + + + + + + + + ``` + + # Current Form Issues + + You need to address 1 issue. Here are the current issues: + + - **company_name** (field): Required field "Company Name" is empty + Severity: required, Priority: P1 + Type: string + Set: { op: "set_string", fieldId: "company_name", value: "text here" } + This field is required. + + # General Instructions + + Use the fill_form tool to submit patches for the fields above. + For table fields, each row is an object with column ID keys. + tools: + fill_form: + description: Fill form fields by submitting patches. Each patch sets a value for one field. Use the field IDs from the issues list. Return patches for all issues you can address. + input_schema: + $defs: + patch: + description: A patch operation (see tool description for full schema) + properties: + patches: + description: Array of patches to apply to the form + items: + $ref: '#/$defs/patch' + type: array + required: + - patches + type: object + response: + steps: + - tool_calls: + - tool_name: fill_form + input: + patches: + - field_id: company_name + op: set_string + value: Acme Corp + tool_results: [] + text: null + usage: + input_tokens: 0 + output_tokens: 0 +final: + expect_complete: true + expected_completed_form: prompt-hygiene-mock-filled.form.md diff --git a/packages/markform/examples/rejection-test/rejection-test.session.yaml b/packages/markform/examples/rejection-test/rejection-test.session.yaml index e2e0de2b..e0421947 100644 --- a/packages/markform/examples/rejection-test/rejection-test.session.yaml +++ b/packages/markform/examples/rejection-test/rejection-test.session.yaml @@ -124,17 +124,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Rejection Test Form - description: Tests type mismatch rejection and recovery behavior - roles: - - agent - role_instructions: - user: Fill in the fields you have direct knowledge of. - agent: Complete the remaining fields based on the provided context. - --- @@ -260,17 +249,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Rejection Test Form - description: Tests type mismatch rejection and recovery behavior - roles: - - agent - role_instructions: - user: Fill in the fields you have direct knowledge of. - agent: Complete the remaining fields based on the provided context. - --- @@ -461,17 +439,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Rejection Test Form - description: Tests type mismatch rejection and recovery behavior - roles: - - agent - role_instructions: - user: Fill in the fields you have direct knowledge of. - agent: Complete the remaining fields based on the provided context. - --- @@ -603,17 +570,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Rejection Test Form - description: Tests type mismatch rejection and recovery behavior - roles: - - agent - role_instructions: - user: Fill in the fields you have direct knowledge of. - agent: Complete the remaining fields based on the provided context. - --- diff --git a/packages/markform/examples/simple/simple-with-skips.session.yaml b/packages/markform/examples/simple/simple-with-skips.session.yaml index 010bc8c5..ea888ebf 100644 --- a/packages/markform/examples/simple/simple-with-skips.session.yaml +++ b/packages/markform/examples/simple/simple-with-skips.session.yaml @@ -204,17 +204,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -543,17 +532,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -1058,17 +1036,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -1420,17 +1387,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -1861,17 +1817,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -1906,7 +1851,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2037,7 +1982,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2051,7 +1996,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2061,13 +2006,13 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2077,7 +2022,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2087,7 +2032,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2196,17 +2141,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -2241,7 +2175,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2372,7 +2306,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2386,7 +2320,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2396,13 +2330,13 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2412,7 +2346,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2422,7 +2356,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` diff --git a/packages/markform/examples/simple/simple.session.yaml b/packages/markform/examples/simple/simple.session.yaml index 5bf9647b..88f6f89b 100644 --- a/packages/markform/examples/simple/simple.session.yaml +++ b/packages/markform/examples/simple/simple.session.yaml @@ -205,17 +205,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -544,17 +533,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -1059,17 +1037,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -1422,17 +1389,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -1868,17 +1824,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -2047,7 +1992,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2071,7 +2016,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2087,7 +2032,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2097,7 +2042,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2206,17 +2151,6 @@ turns: Fields marked with `[ ]` or empty values still need to be filled. ```markdown - --- - markform: - spec: MF/0.1 - title: Simple Test Form - description: Fully interactive demo - no LLM required. Demonstrates all Markform field types. - run_mode: interactive - roles: - - user - role_instructions: - user: Fill in all fields in this form. - --- @@ -2385,7 +2319,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2409,7 +2343,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2425,7 +2359,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` @@ -2435,7 +2369,7 @@ turns: ```value - %SKIP% (No value in mock form) + (skipped: No value in mock form) ``` diff --git a/packages/markform/scripts/regen-golden-sessions.ts b/packages/markform/scripts/regen-golden-sessions.ts index 00c8b8f2..b66973c6 100644 --- a/packages/markform/scripts/regen-golden-sessions.ts +++ b/packages/markform/scripts/regen-golden-sessions.ts @@ -64,6 +64,11 @@ const SESSIONS: SessionConfig[] = [ mockSource: 'simple/simple-skipped-filled.form.md', sessionFile: 'simple/simple-with-skips.session.yaml', }, + { + form: 'prompt-hygiene/prompt-hygiene.form.md', + mockSource: 'prompt-hygiene/prompt-hygiene-mock-filled.form.md', + sessionFile: 'prompt-hygiene/prompt-hygiene.session.yaml', + }, { form: 'rejection-test/rejection-test.form.md', mockSource: 'rejection-test/rejection-test-mock-filled.form.md', diff --git a/packages/markform/src/engine/apply.ts b/packages/markform/src/engine/apply.ts index 02f44884..c992fa17 100644 --- a/packages/markform/src/engine/apply.ts +++ b/packages/markform/src/engine/apply.ts @@ -115,6 +115,56 @@ function createWarning( return { patchIndex: index, fieldId, message, coercion }; } +/** + * Coerce scalar set_* sentinel values to explicit meta operations. + * + * This preserves "accept but do not encourage" behavior: + * callers should emit skip_field/abort_field directly, but scalar + * sentinel literals are tolerated for compatibility. + */ +function coerceScalarSentinelToMetaPatch( + patch: Patch, + field: Field, + index: number, +): NormalizationResult | null { + if (patch.op !== 'set_string' && patch.op !== 'set_url' && patch.op !== 'set_date') { + return null; + } + if (typeof patch.value !== 'string') { + return null; + } + + const sentinel = detectSentinel(patch.value); + if (!sentinel) { + return null; + } + + const coercedPatch: Patch = + sentinel.type === 'skip' + ? { + op: 'skip_field', + fieldId: patch.fieldId, + role: field.role, + ...(sentinel.reason && { reason: sentinel.reason }), + } + : { + op: 'abort_field', + fieldId: patch.fieldId, + role: field.role, + ...(sentinel.reason && { reason: sentinel.reason }), + }; + + return { + patch: coercedPatch, + warning: createWarning( + index, + field.id, + 'sentinel_to_meta_op', + `Coerced ${patch.op} sentinel literal to ${coercedPatch.op}`, + ), + }; +} + /** * Normalize a patch, coercing common type mismatches with warnings. * @@ -137,6 +187,11 @@ function normalizePatch(form: ParsedForm, patch: Patch, index: number): Normaliz return { patch }; // Let validation handle missing field } + const sentinelCoercion = coerceScalarSentinelToMetaPatch(patch, field, index); + if (sentinelCoercion) { + return sentinelCoercion; + } + // Coerce single string → string_list if (patch.op === 'set_string_list' && field.kind === 'string_list') { if (typeof patch.value === 'string') { @@ -311,8 +366,7 @@ function validatePatch(form: ParsedForm, patch: Patch, index: number): PatchErro return typeMismatchError(index, patch.op, field); } - // Check for embedded sentinels in string values (issue #119) - // Agents should use skip_field/abort_field operations instead of embedding sentinels + // Safety net: reject any embedded sentinels that were not normalized. if (patch.op === 'set_string' || patch.op === 'set_url' || patch.op === 'set_date') { const sentinel = detectSentinel(patch.value); if (sentinel) { diff --git a/packages/markform/src/engine/coreTypes.ts b/packages/markform/src/engine/coreTypes.ts index b80007b2..fb71c57c 100644 --- a/packages/markform/src/engine/coreTypes.ts +++ b/packages/markform/src/engine/coreTypes.ts @@ -843,7 +843,8 @@ export type PatchCoercionType = | 'url_to_list' | 'option_to_array' | 'boolean_to_checkbox' - | 'array_to_checkboxes'; + | 'array_to_checkboxes' + | 'sentinel_to_meta_op'; /** Warning for coerced patches */ export interface PatchWarning { @@ -1862,6 +1863,7 @@ export const PatchCoercionTypeSchema = z.enum([ 'option_to_array', 'boolean_to_checkbox', 'array_to_checkboxes', + 'sentinel_to_meta_op', ]); export const PatchWarningSchema = z.object({ diff --git a/packages/markform/src/harness/liveAgent.ts b/packages/markform/src/harness/liveAgent.ts index 19da2989..d1d7937b 100644 --- a/packages/markform/src/harness/liveAgent.ts +++ b/packages/markform/src/harness/liveAgent.ts @@ -12,6 +12,7 @@ import { openai } from '@ai-sdk/openai'; import { anthropic } from '@ai-sdk/anthropic'; import { google } from '@ai-sdk/google'; import { xai } from '@ai-sdk/xai'; +import YAML from 'yaml'; import type { DocumentationBlock, @@ -449,6 +450,100 @@ function getDocBlocks(docs: DocumentationBlock[], ref: string, tag: string): Doc return docs.filter((d) => d.ref === ref && d.tag === tag); } +/** + * Replace sentinel literals in prompt-visible text so agents don't copy `%SKIP%`/`%ABORT%` + * into set_* patch values. This is prompt-only hygiene; canonical form serialization + * remains unchanged. + */ +function sanitizeSentinelLiteralsForPrompt(text: string): string { + const withReasons = ( + input: string, + sentinel: 'SKIP' | 'ABORT', + label: 'skipped' | 'aborted', + ): string => { + // %SKIP% (reason) + let output = input.replace( + new RegExp(`%${sentinel}%\\s*\\(([^)]*)\\)`, 'gi'), + (_match: string, reason: string) => + reason.trim().length > 0 ? `(${label}: ${reason.trim()})` : `(${label})`, + ); + // %SKIP:reason% + output = output.replace( + new RegExp(`%${sentinel}:([^%]+)%`, 'gi'), + (_match: string, reason: string) => + reason.trim().length > 0 ? `(${label}: ${reason.trim()})` : `(${label})`, + ); + // %SKIP(reason)% + output = output.replace( + new RegExp(`%${sentinel}\\(([^)]*)\\)%`, 'gi'), + (_match: string, reason: string) => + reason.trim().length > 0 ? `(${label}: ${reason.trim()})` : `(${label})`, + ); + return output; + }; + + let sanitized = withReasons(text, 'SKIP', 'skipped'); + sanitized = withReasons(sanitized, 'ABORT', 'aborted'); + sanitized = sanitized.replace(/%SKIP%/gi, '(skipped)'); + sanitized = sanitized.replace(/%ABORT%/gi, '(aborted)'); + return sanitized; +} + +/** + * Remove leading YAML frontmatter from markdown shown to the model. + * This only affects prompt display; on-disk serialization is untouched. + */ +function stripYamlFrontmatterForPrompt(markdown: string): string { + const source = markdown.startsWith('\uFEFF') ? markdown.slice(1) : markdown; + if (!source.startsWith('---')) { + return source; + } + + const firstNewline = source.indexOf('\n'); + if (firstNewline === -1) { + return source; + } + + const openingLine = source.slice(0, firstNewline).replace(/\r$/, '').trim(); + if (openingLine !== '---') { + return source; + } + + let cursor = firstNewline + 1; + let closingStart = -1; + let bodyStart = source.length; + + while (cursor <= source.length) { + const nextNewline = source.indexOf('\n', cursor); + const lineEnd = nextNewline === -1 ? source.length : nextNewline; + const line = source.slice(cursor, lineEnd).replace(/\r$/, '').trim(); + + if (line === '---') { + closingStart = cursor; + bodyStart = nextNewline === -1 ? source.length : nextNewline + 1; + break; + } + + if (nextNewline === -1) { + break; + } + cursor = nextNewline + 1; + } + + if (closingStart === -1) { + return source; + } + + const yamlSource = source.slice(firstNewline + 1, closingStart); + try { + YAML.parse(yamlSource); + } catch { + return source; + } + + return source.slice(bodyStart); +} + /** * Build a composed system prompt from form instructions. * @@ -509,7 +604,7 @@ function buildSystemPrompt(form: ParsedForm, targetRole: string, issues: Inspect sections.push(...fieldInstructions); } - return sections.join('\n'); + return sanitizeSentinelLiteralsForPrompt(sections.join('\n')); } /** @@ -564,7 +659,7 @@ function buildContextPrompt( lines.push('Fields marked with `[ ]` or empty values still need to be filled.'); lines.push(''); lines.push('```markdown'); - lines.push(serializeForm(form)); + lines.push(stripYamlFrontmatterForPrompt(serializeForm(form))); lines.push('```'); lines.push(''); @@ -636,7 +731,7 @@ function buildContextPrompt( lines.push(GENERAL_INSTRUCTIONS); - return lines.join('\n'); + return sanitizeSentinelLiteralsForPrompt(lines.join('\n')); } /** diff --git a/packages/markform/tests/golden/validation.test.ts b/packages/markform/tests/golden/validation.test.ts index 8d2b4246..ccb0b7e6 100644 --- a/packages/markform/tests/golden/validation.test.ts +++ b/packages/markform/tests/golden/validation.test.ts @@ -73,6 +73,20 @@ const MUTATIONS: Mutation[] = [ find: 'Address required fields first', replace: 'Address optional fields first', }, + { + name: 'sentinel prompt leakage', + description: 'Inject sentinel literal into model-visible prompt text', + find: 'Below is the complete form with all currently filled values.', + replace: + 'Below is the complete form with all currently filled values.\n%SKIP% (Injected leakage)', + }, + { + name: 'frontmatter prompt leakage', + description: 'Re-introduce YAML frontmatter into embedded prompt markdown', + find: '', + replace: + '---\nmarkform:\n spec: MF/0.1\n---\n', + }, { name: 'hash change', description: 'Corrupt the SHA256 hash', @@ -82,7 +96,7 @@ const MUTATIONS: Mutation[] = [ { name: 'tool schema $ref change', description: 'Modify tool schema reference', - find: "$ref: '#/$defs/patch'", + find: /\$ref: ['"]#\/\$defs\/patch['"]/, replace: "$ref: '#/$defs/patches'", }, { diff --git a/packages/markform/tests/integration/programmaticFill.test.ts b/packages/markform/tests/integration/programmaticFill.test.ts index ec96c3c0..9433180c 100644 --- a/packages/markform/tests/integration/programmaticFill.test.ts +++ b/packages/markform/tests/integration/programmaticFill.test.ts @@ -16,6 +16,7 @@ import { batchExecutionId, } from '../../src/harness/programmaticFill.js'; import { createMockAgent } from '../../src/harness/mockAgent.js'; +import type { Agent } from '../../src/harness/harnessTypes.js'; // ============================================================================= // Test Fixtures @@ -242,6 +243,165 @@ describe('programmatic fill API - integration tests', () => { expect(result.status.message).toContain('not found'); } }); + + it('preserves sentinel-backed skipped responses through harness round-trip', async () => { + const formWithSentinel = `--- +markform: + spec: MF/0.1 + title: Harness Sentinel Roundtrip + roles: + - agent +--- + + + + + +\`\`\`value +%SKIP% (From literal sentinel) +\`\`\` + + + +`; + + const agent: Agent = { + fillFormTool() { + return Promise.resolve({ + patches: [{ op: 'set_string', fieldId: 'company_name', value: 'Acme Corp' }], + }); + }, + }; + + const result = await fillForm({ + form: formWithSentinel, + model: 'mock/model', + enableWebSearch: false, + maxRetries: 0, + captureWireFormat: false, + recordFill: false, + targetRoles: ['agent'], + _testAgent: agent, + }); + + expect(result.status.ok).toBe(true); + const notes = result.form.responsesByFieldId.notes; + expect(notes?.state).toBe('skipped'); + expect(notes?.reason).toBe('From literal sentinel'); + + const reparsed = parseForm(result.markdown); + const reparsedNotes = reparsed.responsesByFieldId.notes; + expect(reparsedNotes?.state).toBe('skipped'); + expect(reparsedNotes?.reason).toBe('From literal sentinel'); + }); + + it('coerces scalar set_* sentinel patch values in harness loop', async () => { + const formWithOptionalNotes = `--- +markform: + spec: MF/0.1 + title: Scalar Sentinel Patch Coercion + roles: + - agent +--- + + + + + + + +`; + + const sentinelPatchAgent: Agent = { + fillFormTool() { + return Promise.resolve({ + patches: [ + { op: 'set_string', fieldId: 'company_name', value: 'Acme Corp' }, + { op: 'set_string', fieldId: 'notes', value: '%SKIP% (No evidence found)' }, + ], + }); + }, + }; + + const result = await fillForm({ + form: formWithOptionalNotes, + model: 'mock/model', + enableWebSearch: false, + maxRetries: 0, + captureWireFormat: false, + recordFill: false, + targetRoles: ['agent'], + _testAgent: sentinelPatchAgent, + }); + + expect(result.status.ok).toBe(true); + expect(result.form.responsesByFieldId.notes).toMatchObject({ + state: 'skipped', + reason: 'No evidence found', + }); + }); + + it('keeps required-field skip rejection in harness loop', async () => { + const formWithRequiredField = `--- +markform: + spec: MF/0.1 + title: Required Skip Rejection + roles: + - agent +--- + + + + + + +`; + + const rejectingAgent: Agent = { + fillFormTool() { + return Promise.resolve({ + patches: [ + { + op: 'skip_field', + fieldId: 'company_name', + role: 'agent', + reason: 'Cannot verify', + }, + ], + }); + }, + }; + + const rejectionMessages: string[] = []; + const result = await fillForm({ + form: formWithRequiredField, + model: 'mock/model', + enableWebSearch: false, + maxRetries: 0, + maxTurnsTotal: 2, + maxPatchesPerTurn: 1, + maxIssuesPerTurn: 1, + captureWireFormat: false, + recordFill: false, + targetRoles: ['agent'], + _testAgent: rejectingAgent, + callbacks: { + onTurnComplete: ({ rejectedPatches }) => { + rejectionMessages.push(...rejectedPatches.map((patch) => patch.message)); + }, + }, + }); + + expect(result.status.ok).toBe(false); + if (!result.status.ok) { + expect(result.status.reason).toBe('max_turns'); + } + expect( + rejectionMessages.some((message) => + message.includes('Cannot skip required field "company_name"'), + ), + ).toBe(true); + }); }); describe('progress tracking', () => { diff --git a/packages/markform/tests/unit/engine/apply.test.ts b/packages/markform/tests/unit/engine/apply.test.ts index cf056f42..3f3f98e9 100644 --- a/packages/markform/tests/unit/engine/apply.test.ts +++ b/packages/markform/tests/unit/engine/apply.test.ts @@ -1868,18 +1868,17 @@ markform: }); // ============================================================================= - // Issue #119: Reject embedded sentinels in string values + // Issue #119: Embedded sentinel handling in set_* patch values // ============================================================================= // - // Bug: fill_form accepts patches that embed %SKIP% as part of a string value - // (e.g., "%SKIP% (reason)") even on required fields. This causes markform serve - // to later fail validation because the final value contains the skip sentinel. - // - // Fix: Values containing %SKIP% or %ABORT% sentinels should be rejected during - // patch application, not just during final validation. - describe('embedded sentinel rejection (issue #119)', () => { + // Contract: + // - Scalar set_* values (%SKIP% / %ABORT%) are tolerated by coercing to + // skip_field / abort_field with a warning. + // - Agents should still prefer explicit skip_field / abort_field operations. + // - List-item sentinels remain rejected as ambiguous. + describe('embedded sentinel handling (issue #119)', () => { describe('set_string with embedded sentinels', () => { - it('rejects %SKIP% embedded in string value on required field', () => { + it('coerces %SKIP% embedded in string value on optional field', () => { const markdown = `--- markform: spec: MF/0.1 @@ -1888,24 +1887,36 @@ markform: {% form id="test" %} {% group id="g1" %} -{% field kind="string" id="name" label="Name" required=true %}{% /field %} +{% field kind="string" id="notes" label="Notes" %}{% /field %} {% /group %} {% /form %} `; const form = parseForm(markdown); const patches: Patch[] = [ - { op: 'set_string', fieldId: 'name', value: '%SKIP% (Not available)' }, + { op: 'set_string', fieldId: 'notes', value: '%SKIP% (reason text here)' }, ]; const result = applyPatches(form, patches); - expect(result.applyStatus).toBe('rejected'); - expect(result.rejectedPatches[0]?.message).toContain('%SKIP%'); - expect(result.rejectedPatches[0]?.message).toContain('skip_field'); + expect(result.applyStatus).toBe('applied'); + expect(result.rejectedPatches).toHaveLength(0); + expect(result.appliedPatches).toHaveLength(1); + expect(result.appliedPatches[0]).toMatchObject({ + op: 'skip_field', + fieldId: 'notes', + role: 'agent', + reason: 'reason text here', + }); + expect(form.responsesByFieldId.notes).toMatchObject({ + state: 'skipped', + reason: 'reason text here', + }); + expect(result.warnings).toHaveLength(1); + expect(result.warnings[0]?.coercion).toBe('sentinel_to_meta_op'); }); - it('rejects %SKIP% embedded in string value on optional field', () => { + it('keeps required-field protection when coercing %SKIP%', () => { const markdown = `--- markform: spec: MF/0.1 @@ -1914,23 +1925,23 @@ markform: {% form id="test" %} {% group id="g1" %} -{% field kind="string" id="notes" label="Notes" %}{% /field %} +{% field kind="string" id="name" label="Name" required=true %}{% /field %} {% /group %} {% /form %} `; const form = parseForm(markdown); const patches: Patch[] = [ - { op: 'set_string', fieldId: 'notes', value: '%SKIP% (reason text here)' }, + { op: 'set_string', fieldId: 'name', value: '%SKIP% (Not available)' }, ]; const result = applyPatches(form, patches); expect(result.applyStatus).toBe('rejected'); - expect(result.rejectedPatches[0]?.message).toContain('%SKIP%'); + expect(result.rejectedPatches[0]?.message).toContain('Cannot skip required field "name"'); }); - it('rejects exact %SKIP% value (should use skip_field instead)', () => { + it('coerces exact %SKIP% value (without reason) to skipped state', () => { const markdown = `--- markform: spec: MF/0.1 @@ -1949,11 +1960,16 @@ markform: const result = applyPatches(form, patches); - expect(result.applyStatus).toBe('rejected'); - expect(result.rejectedPatches[0]?.message).toContain('skip_field'); + expect(result.applyStatus).toBe('applied'); + expect(result.appliedPatches[0]).toMatchObject({ + op: 'skip_field', + fieldId: 'notes', + role: 'agent', + }); + expect(form.responsesByFieldId.notes?.state).toBe('skipped'); }); - it('rejects %ABORT% embedded in string value', () => { + it('coerces %ABORT% embedded in string value', () => { const markdown = `--- markform: spec: MF/0.1 @@ -1974,12 +1990,20 @@ markform: const result = applyPatches(form, patches); - expect(result.applyStatus).toBe('rejected'); - expect(result.rejectedPatches[0]?.message).toContain('%ABORT%'); - expect(result.rejectedPatches[0]?.message).toContain('abort_field'); + expect(result.applyStatus).toBe('applied'); + expect(result.appliedPatches[0]).toMatchObject({ + op: 'abort_field', + fieldId: 'name', + role: 'agent', + reason: 'Data unavailable', + }); + expect(form.responsesByFieldId.name).toMatchObject({ + state: 'aborted', + reason: 'Data unavailable', + }); }); - it('rejects case-insensitive %skip% sentinel', () => { + it('coerces case-insensitive %skip% sentinel', () => { const markdown = `--- markform: spec: MF/0.1 @@ -2000,7 +2024,11 @@ markform: const result = applyPatches(form, patches); - expect(result.applyStatus).toBe('rejected'); + expect(result.applyStatus).toBe('applied'); + expect(form.responsesByFieldId.notes).toMatchObject({ + state: 'skipped', + reason: 'lowercase', + }); }); it('allows normal strings containing "SKIP" as substring', () => { @@ -2028,7 +2056,7 @@ markform: }); describe('set_url with embedded sentinels', () => { - it('rejects %SKIP% in URL value', () => { + it('coerces %SKIP% in URL value', () => { const markdown = `--- markform: spec: MF/0.1 @@ -2049,13 +2077,18 @@ markform: const result = applyPatches(form, patches); - expect(result.applyStatus).toBe('rejected'); - expect(result.rejectedPatches[0]?.message).toContain('%SKIP%'); + expect(result.applyStatus).toBe('applied'); + expect(result.appliedPatches[0]).toMatchObject({ + op: 'skip_field', + fieldId: 'website', + role: 'agent', + reason: 'no website', + }); }); }); describe('set_date with embedded sentinels', () => { - it('rejects %SKIP% in date value', () => { + it('coerces %SKIP% in date value', () => { const markdown = `--- markform: spec: MF/0.1 @@ -2076,8 +2109,13 @@ markform: const result = applyPatches(form, patches); - expect(result.applyStatus).toBe('rejected'); - expect(result.rejectedPatches[0]?.message).toContain('%SKIP%'); + expect(result.applyStatus).toBe('applied'); + expect(result.appliedPatches[0]).toMatchObject({ + op: 'skip_field', + fieldId: 'birthday', + role: 'agent', + reason: 'unknown', + }); }); }); diff --git a/packages/markform/tests/unit/harness/liveAgent.test.ts b/packages/markform/tests/unit/harness/liveAgent.test.ts index ec3f2dd0..f4bef388 100644 --- a/packages/markform/tests/unit/harness/liveAgent.test.ts +++ b/packages/markform/tests/unit/harness/liveAgent.test.ts @@ -7,10 +7,13 @@ import { describe, expect, it } from 'vitest'; import { + buildMockWireFormat, createLiveAgent, wrapToolsWithCallbacks, type LiveAgentConfig, } from '../../../src/harness/liveAgent.js'; +import { parseForm } from '../../../src/engine/parse.js'; +import type { InspectIssue, PatchRejection } from '../../../src/engine/coreTypes.js'; import type { Tool } from 'ai'; // Mock a minimal LanguageModel - we only need it for constructor @@ -275,3 +278,85 @@ describe('wrapToolsWithCallbacks', () => { }); }); }); + +describe('buildMockWireFormat prompt hygiene', () => { + const PROMPT_HYGIENE_FORM = `--- +markform: + spec: MF/0.1 + title: Prompt Hygiene Test + roles: + - agent + role_instructions: + agent: Use %SKIP% (No evidence) or %ABORT% (Tooling failure) if needed. +--- + + + + +When unavailable, write %SKIP:No evidence%. + + + + + +\`\`\`value +%SKIP% (No value in fixture) +\`\`\` + + + +Fallback: %ABORT(Tool timeout)%. + + + + + +`; + + const issues: InspectIssue[] = [ + { + ref: 'notes', + scope: 'field', + reason: 'optional_unanswered', + message: 'Optional field not yet addressed', + severity: 'recommended', + priority: 1, + }, + ]; + + it('sanitizes sentinel literals in model-visible system/context prompts', () => { + const form = parseForm(PROMPT_HYGIENE_FORM); + const previousRejections: PatchRejection[] = [ + { + patchIndex: 0, + message: 'Value contains %SKIP% sentinel. Try %ABORT:No source% instead.', + }, + ]; + + const wire = buildMockWireFormat(form, issues, [], 10, 'agent', previousRejections); + + expect(wire.request.system).toContain('(skipped: No evidence)'); + expect(wire.request.system).toContain('(aborted: Tooling failure)'); + expect(wire.request.system).toContain('(aborted: Tool timeout)'); + expect(wire.request.system).not.toContain('%SKIP%'); + expect(wire.request.system).not.toContain('%ABORT%'); + + expect(wire.request.prompt).toContain('(aborted: No source)'); + expect(wire.request.prompt).toContain('(skipped: No value in fixture)'); + expect(wire.request.prompt).not.toContain('%SKIP%'); + expect(wire.request.prompt).not.toContain('%ABORT%'); + }); + + it('removes YAML frontmatter from embedded form markdown shown to the model', () => { + const form = parseForm(PROMPT_HYGIENE_FORM); + const wire = buildMockWireFormat(form, issues, [], 10, 'agent'); + + expect(wire.request.prompt).toContain('```markdown'); + expect(wire.request.prompt).toContain( + '', + ); + expect(wire.request.prompt).not.toContain('markform:'); + expect(wire.request.prompt).not.toContain('role_instructions:'); + expect(wire.request.prompt).not.toContain('---\nmarkform:'); + }); +});