diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index a3e8d006..d9d384b1 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "PACT", "source": "./pact-plugin", "description": "Orchestration harness that turns Claude Code into a coordinated team of specialist AI agents", - "version": "4.2.12", + "version": "4.2.13", "author": { "name": "Synaptic-Labs-AI" }, diff --git a/README.md b/README.md index 17fe6303..02b40592 100644 --- a/README.md +++ b/README.md @@ -505,7 +505,7 @@ When installed as a plugin, PACT lives in your plugin cache: │ └── cache/ │ └── pact-plugin/ │ └── PACT/ -│ └── 4.2.12/ # Plugin version +│ └── 4.2.13/ # Plugin version │ ├── agents/ │ ├── commands/ │ ├── skills/ diff --git a/pact-plugin/.claude-plugin/plugin.json b/pact-plugin/.claude-plugin/plugin.json index 62690430..a83cb86e 100644 --- a/pact-plugin/.claude-plugin/plugin.json +++ b/pact-plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "PACT", - "version": "4.2.12", + "version": "4.2.13", "description": "Orchestration harness that turns Claude Code into a coordinated team of specialist AI agents", "author": { "name": "Synaptic-Labs-AI", diff --git a/pact-plugin/README.md b/pact-plugin/README.md index 007cae17..4fe2d7dc 100644 --- a/pact-plugin/README.md +++ b/pact-plugin/README.md @@ -1,6 +1,6 @@ # PACT — Orchestration Harness for Claude Code -> **Version**: 4.2.12 +> **Version**: 4.2.13 Turn a single Claude Code session into a managed team of specialist AI agents that prepare, design, build, and test your code systematically. diff --git a/pact-plugin/commands/scan-pending-tasks.md b/pact-plugin/commands/scan-pending-tasks.md index ea2565ac..b7aa9366 100644 --- a/pact-plugin/commands/scan-pending-tasks.md +++ b/pact-plugin/commands/scan-pending-tasks.md @@ -32,7 +32,7 @@ Cron-fire body — silent read; emit nothing unless a real artifact is on disk f ```bash SJ="{plugin_root}/hooks/shared/session_journal.py" SD='{session_dir}' - ARMED_AT=$(python3 "$SJ" read-last --type scan_armed --session-dir "$SD" | python3 -c 'import json,sys; e=json.load(sys.stdin); print(e["armed_at"] if e else "")') + ARMED_AT=$(python3 "$SJ" read-last --type scan_armed --session-dir "$SD" | python3 -c 'import json,sys,datetime; e=json.load(sys.stdin); print(int(datetime.datetime.strptime(e["ts"],"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=datetime.timezone.utc).timestamp()) if e else "")' 2>/dev/null) if [ -n "$ARMED_AT" ]; then delta=$(( $(date +%s) - ARMED_AT )) if [ $delta -ge 0 ] && [ $delta -lt 300 ]; then exit 0; fi @@ -41,7 +41,39 @@ Cron-fire body — silent read; emit nothing unless a real artifact is on disk f Fail-open: `read-last` returns literal `null` on missing journal / no events / corrupt JSONL. The `python3 -c` extraction yields empty string in those cases; `[ -n "$ARMED_AT" ]` is false; the gate falls through to Step 1. - Negative-delta guard: `[ $delta -ge 0 ]` forces future-dated `armed_at` (clock skew / adversarial write) to fall through. Without it, negative deltas would always pass `-lt 300` — the gate would become a kill-switch. + Stderr suppression: the `2>/dev/null` redirect on the inner `python3 -c` extractor silences Python tracebacks (TypeError / ValueError / KeyError) that would otherwise surface in the cron-fire LLM-turn output when the journal contains a malformed `ts` (writer-bug, schema-drift, or corruption — e.g., `ts=42`, `ts=null`, `ts=""`, unparseable string). The fail-open contract is preserved unchanged: the extractor's stdout is still empty on failure, `[ -n "$ARMED_AT" ]` is still false, and the gate still falls through to Step 1. Trade-off: stderr-clean cron-fire turns (no traceback noise to the user) vs lost operator diagnostic visibility under journal corruption (a malformed `ts` no longer surfaces as a visible Python traceback; the only signal is the silent fall-through). Acceptable because the fail-open behavior is intentional and the journal-shape contract is pinned at write time by `session_journal.py`'s `_validate_event_schema`. A future editor MUST NOT redirect stdout (`>/dev/null` or `&>/dev/null`) — that would defeat the fail-open guard which depends on the extractor emitting empty-string on failure. + + Negative-delta guard: `[ $delta -ge 0 ]` forces a future-dated `scan_armed.ts` (clock skew / adversarial write) to fall through. Without it, negative deltas would always pass `-lt 300` — the gate would become a kill-switch. + +0.5. **Self-correcting teardown check**. Read the latest `teardown_request`, `scan_armed`, and `scan_disarmed` event timestamps; if a `teardown_request` event landed AFTER the current arm AND has not yet been processed via `scan_disarmed`, invoke `Skill("PACT:stop-pending-scan")` and return without continuing to Step 1. This is the **self-correcting fallback path** that catches orchestrator non-compliance with the `_TEARDOWN_DIRECTIVE` `additionalContext` channel — the orchestrator persona's scope-boundary clause excludes inbound `additionalContext` from MUST-binding (see memory `a7bcd37f`), so the cron-fire body itself enforces teardown via the trusted in-band channel + journal source-of-truth pair. Bounds compliance latency to ≤1 cron interval (5min nominal, up to ~6min with typical jitter, 15min worst-case) regardless of `additionalContext`-directive handling. Charter cross-reference: [§Cron-Fire Mechanism Teardown trigger sites](../protocols/pact-communication-charter.md#cron-fire-mechanism). + + ```bash + SJ="{plugin_root}/hooks/shared/session_journal.py" + SD='{session_dir}' + LATEST_TEARDOWN_REQUEST=$(python3 "$SJ" read-last --type teardown_request --session-dir "$SD" | python3 -c 'import json,sys,datetime; e=json.load(sys.stdin); print(int(datetime.datetime.strptime(e["ts"],"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=datetime.timezone.utc).timestamp()) if e else "")' 2>/dev/null) + LATEST_SCAN_ARMED=$(python3 "$SJ" read-last --type scan_armed --session-dir "$SD" | python3 -c 'import json,sys,datetime; e=json.load(sys.stdin); print(int(datetime.datetime.strptime(e["ts"],"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=datetime.timezone.utc).timestamp()) if e else "")' 2>/dev/null) + LATEST_SCAN_DISARMED=$(python3 "$SJ" read-last --type scan_disarmed --session-dir "$SD" | python3 -c 'import json,sys,datetime; e=json.load(sys.stdin); print(int(datetime.datetime.strptime(e["ts"],"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=datetime.timezone.utc).timestamp()) if e else "")' 2>/dev/null) + if [ -n "$LATEST_TEARDOWN_REQUEST" ] && [ -n "$LATEST_SCAN_ARMED" ] && \ + [ "$LATEST_TEARDOWN_REQUEST" -gt "$LATEST_SCAN_ARMED" ] && \ + { [ -z "$LATEST_SCAN_DISARMED" ] || \ + [ "$LATEST_TEARDOWN_REQUEST" -gt "$LATEST_SCAN_DISARMED" ]; }; then + exit 0 + fi + ``` + + When the bash exits 0 here, the scan body's LLM-side action is: invoke `Skill("PACT:stop-pending-scan")` and return without executing Steps 1+. The `stop-pending-scan` body is idempotent (CronList no-op on absent; `scan_disarmed` writes are benign; latest-event semantics dominate). Precedent: `commands/wrap-up.md:98` invokes the same skill from a sibling command. Multiple consecutive cron-fires hitting this branch before `stop-pending-scan` completes EACH write would result in multiple `scan_disarmed` events — benign; the latest dominates. + + Fail-open: `read-last` returns literal `null` on missing journal / no events / corrupt JSONL. The `python3 -c` extractors yield empty string in those cases; `[ -n "$VAR" ]` is false; the gate falls through to Step 1. + + Stderr suppression: each of the 3 inline `python3 -c` extractors above carries `2>/dev/null` for the same trade-off documented in the Step 0 `## Stderr suppression` paragraph above (stderr-clean cron-fire turns vs lost operator diagnostic visibility under journal corruption; fail-open contract preserved because the guards consume stdout only). A future editor MUST NOT redirect stdout (`>/dev/null` or `&>/dev/null`) on these extractors — that would defeat the fail-open `[ -n "$VAR" ]` guards which depend on the extractors emitting empty-string on failure. + + Uniform strptime conversion: `scan_armed.ts`, `scan_disarmed.ts`, and `teardown_request.ts` are all stamped as ISO-8601 UTC strings by `session_journal.make_event` (format literal `"%Y-%m-%dT%H:%M:%SZ"`). All three extractors use `python3 strptime(...).replace(tzinfo=utc).timestamp()` to convert ISO→int-epoch inline, making operands integer-comparable. Direct lexical comparison of `ts` strings would coincidentally match epoch ordering under the canonical fixed-shape format, but breaks silently under format drift (sub-second fractions, mixed TZ suffixes, or any future format relaxation) — strptime conversion is the architecturally correct path and is pinned by `test_python_consumer_parses_ts_via_strptime_not_string_compare`. + + A future editor MUST NOT add `set -e`, `set -o pipefail`, or an `ERR` trap to this block — empty-operand `-gt` would abort the cron-fire turn, breaking fail-open and reintroducing the compliance gap Option D exists to close. + + A future editor MUST NOT wrap the `python3 -c` extractors in `try/except` to silence parse errors — empty-string-on-failure is the fail-open contract; swallowing exceptions with a default value silently breaks it. + + A future editor MUST NOT switch the extractor to `fromisoformat` without verifying Python version baseline (3.11+ required for `Z`-suffix handling); the explicit-format `strptime` is portable to 3.7+. If a future Python baseline bump to 3.11+ makes `fromisoformat` viable, coordinate the switch across the 2 coupled sites (`session_journal.py` `make_event` SSOT + the uniform-strptime extractor pattern in Step 0 / Step 0.5 / `wake_inbox_drain.py:685-694`) AND update `test_python_consumer_parses_ts_via_strptime_not_string_compare`'s canonical literal. 1. `TaskList` — enumerate tasks. Filter to: `owner == any teammate` AND `status == "in_progress"` AND `metadata.intentional_wait.reason == "awaiting_lead_completion"`. (These are the tasks where a teammate has submitted teachback or handoff and is idle awaiting acceptance.) 2. For each candidate, raw-read `~/.claude/tasks/{team_name}/{id}.json` via filesystem read (NOT `TaskGet` — TaskGet does not surface `metadata.teachback_submit` or `metadata.handoff`). Inspect `metadata.teachback_submit` (for teachback gate tasks) and `metadata.handoff` (for primary-work tasks). @@ -61,7 +93,7 @@ The five anti-hallucination guardrails are LOAD-BEARING. Each guardrail prevents ### No-Narration -> **No-Narration**: The scan emits NO user-facing prose narrating what it found, considered, skipped, or did. The only outputs are: (a) `SendMessage` to the teammate as part of the acceptance two-call pair, (b) `TaskUpdate(status="completed")`, or (c) nothing. The scan never emits "Scanning… found 0 pending tasks", "Skipping task #N because…", "Race window detected, will retry next fire", or similar status-narrating text. +> **No-Narration**: The scan emits NO user-facing prose narrating what it found, considered, skipped, or did. The only outputs are: (a) `SendMessage` to the teammate as part of the acceptance two-call pair, (b) `TaskUpdate(status="completed")`, (c) `Skill("PACT:stop-pending-scan")` invocation when Step 0.5 self-correcting teardown fires, or (d) nothing. The scan never emits "Scanning… found 0 pending tasks", "Skipping task #N because…", "Race window detected, will retry next fire", or similar status-narrating text. **Audit**: No-Narration prevents the cron-fire noise failure mode. A 5-minute cron firing 12 times per hour produces 12 LLM turns per hour during active teammate work. If each fire emits a "Scanning…" prose line, the user's transcript fills with 12 useless status lines per hour. Worse, the prose-emit pattern primes the editing LLM to treat the cron fire as a conversation turn requiring response — re-opening the cascade failure mode the scan exists to prevent. An editing LLM tempted to "add a brief status line for observability" is re-introducing the failure mode. Observability happens via `CronList` (cron is registered), `TaskList` (tasks transition status), and journal events (HANDOFF acceptance is journaled) — NOT via per-fire prose. diff --git a/pact-plugin/commands/start-pending-scan.md b/pact-plugin/commands/start-pending-scan.md index a89bc346..2b0691b1 100644 --- a/pact-plugin/commands/start-pending-scan.md +++ b/pact-plugin/commands/start-pending-scan.md @@ -43,13 +43,12 @@ Single procedure — the command IS the operation. No Arm/Teardown sub-section. set -e trap 'rc=$?; echo "[JOURNAL WRITE FAILED] start-pending-scan.md (bash line $LINENO): \"${BASH_COMMAND%%$'\''\n'\''*}\" exit=$rc" >&2; exit $rc' ERR SJ="{plugin_root}/hooks/shared/session_journal.py" - ARMED_AT=$(date +%s) - python3 "$SJ" write --type scan_armed --session-dir '{session_dir}' --stdin <&2; exit $rc' ERR SJ="{plugin_root}/hooks/shared/session_journal.py" - DISARMED_AT=$(date +%s) - python3 "$SJ" write --type scan_disarmed --session-dir '{session_dir}' --stdin < NoReturn: import errno import os import re + from datetime import datetime, timezone from typing import Any from pathlib import Path @@ -167,6 +171,13 @@ def _emit_load_failure_advisory(stage: str, error: BaseException) -> NoReturn: "hookSpecificOutput": {"hookEventName": "UserPromptSubmit"}, }) +# Byte-coupled with session_journal.make_event's ts format literal and +# with the strptime literal in scan-pending-tasks.md Step 0 / Step 0.5. +# All three sites parse the auto-stamped `ts` ISO-8601 UTC string into +# an integer epoch via this literal; drift between sites silently breaks +# the cross-component comparison contract. +_TS_FMT = "%Y-%m-%dT%H:%M:%SZ" + # Bound stdin payload at 1 MB. UserPromptSubmit payloads carry the user's # prompt text + session metadata; a 1 MB cap is generous and serves as # defense-in-depth against parser amplification. @@ -507,9 +518,11 @@ def _decide_and_emit(input_data: dict) -> None: 5. Producer-side idempotency: read `scan_armed` and `scan_disarmed` events from this session's journal. Suppress only when scan_armed is present with a well-typed - armed_at AND (scan_disarmed is absent OR scan_armed.armed_at - > scan_disarmed.disarmed_at). Any journal-read failure or - malformed event falls through to step 6 (fail-conservative). + ISO-string `ts` (parsed via strptime) AND (scan_disarmed + is absent OR scan_armed.ts parsed-epoch > scan_disarmed.ts + parsed-epoch). Any journal-read failure or malformed `ts` + (non-str, empty, or unparseable) falls through to step 6 + (fail-conservative). 6. Fallback: count_active_tasks(team_name) >= 1 → emit Arm. Otherwise → suppressOutput. @@ -648,12 +661,28 @@ def _decide_and_emit(input_data: dict) -> None: # disarm suppresses. # # Event-presence is the primary predicate; timestamp comparison is - # gated on both fields being well-typed int values. A malformed - # event (missing armed_at, or wrong type) is treated as if absent - # and falls through to the existing emit behavior — over-emit is - # benign under the skill body's CronList exact-suffix-match - # idempotency; under-emit could miss a teammate's completion- - # authority signal. + # gated on both events carrying a well-typed ISO-string `ts` that + # parses successfully via strptime against `_TS_FMT`. A malformed + # event (missing ts, non-str, empty, or unparseable) is treated as + # if absent and falls through to the existing emit behavior — + # over-emit is benign under the skill body's CronList exact-suffix- + # match idempotency; under-emit could miss a teammate's completion- + # authority signal. Layered defense (COMPOSITE invariant): the + # malformed-ts fall-through is pinned by the composite of three + # layers — (1) the per-side `isinstance(ts, str) and ts` guard, + # (2) the inner `try: strptime / except (TypeError, ValueError)`, + # and (3) the OUTER `except Exception:` below. For the documented + # malformed-ts cases (ts=42, ts=False, ts=None, ts='', unparseable + # str), stripping any 1 or 2 layers leaves the fall-through intact; + # ONLY the cumulative strip of all 3 layers breaks it. The outer + # catch is the cheapest single layer to break the invariant under + # cumulative strip; the inner str-guard + inner try/except are + # honest defense-in-depth — short-circuiting cleanly when ts is + # recognizably malformed (sparing the outer catch) AND coverage + # for future unknown failure modes (e.g., a future ts shape that + # strptime accepts but yields a nonsense epoch). See the + # CUMULATIVE 3-row counter-test recipe in test_wake_inbox_drain.py + # Q2 retargeted test docstrings (post-F1+D1 reframe). # # Outer-except rationale: the producer-side check has a strict # fail-conservative contract — any unexpected failure must fall @@ -682,17 +711,37 @@ def _decide_and_emit(input_data: dict) -> None: armed = read_last_event("scan_armed") disarmed = read_last_event("scan_disarmed") if armed is not None: - armed_at = armed.get("armed_at") - if isinstance(armed_at, int) and not isinstance(armed_at, bool): + armed_ts = armed.get("ts") + armed_epoch: int | None = None + if isinstance(armed_ts, str) and armed_ts: + try: + armed_epoch = int( + datetime.strptime(armed_ts, _TS_FMT) + .replace(tzinfo=timezone.utc) + .timestamp() + ) + except (TypeError, ValueError): + # Fail-conservative: malformed scan_armed.ts falls + # through to count_active_tasks B-1 fallback. + armed_epoch = None + if armed_epoch is not None: if disarmed is None: print(_SUPPRESS_OUTPUT) return - disarmed_at = disarmed.get("disarmed_at") - if ( - isinstance(disarmed_at, int) - and not isinstance(disarmed_at, bool) - and armed_at > disarmed_at - ): + disarmed_ts = disarmed.get("ts") + disarmed_epoch: int | None = None + if isinstance(disarmed_ts, str) and disarmed_ts: + try: + disarmed_epoch = int( + datetime.strptime(disarmed_ts, _TS_FMT) + .replace(tzinfo=timezone.utc) + .timestamp() + ) + except (TypeError, ValueError): + # Fail-conservative: malformed scan_disarmed.ts + # falls through to count_active_tasks B-1. + disarmed_epoch = None + if disarmed_epoch is not None and armed_epoch > disarmed_epoch: print(_SUPPRESS_OUTPUT) return except Exception: diff --git a/pact-plugin/protocols/pact-communication-charter.md b/pact-plugin/protocols/pact-communication-charter.md index d56a8d11..6d2f32a6 100644 --- a/pact-plugin/protocols/pact-communication-charter.md +++ b/pact-plugin/protocols/pact-communication-charter.md @@ -29,8 +29,8 @@ Implementation: a command-triple — [Skill("PACT:start-pending-scan")](../comma - **Lead-only.** Exactly one `/PACT:scan-pending-tasks` cron per session, scoped to the period during which the lead holds assigned, uncompleted teammate tasks. Hook-level filtering at the emission source (`wake_lifecycle_emitter.py` + `session_init.py`) plus the command body's Lead-Session Guard (refuse-and-return when invoked from a teammate session) jointly enforce this. No teammate-side scan. - **[Between-tool-call, not mid-tool](../commands/start-pending-scan.md#overview).** The cron fires between tool calls within a turn; it does NOT interrupt a tool mid-call. Platform cron events queue during a long-running tool and fire when the tool returns. The scan's promise is "tasks surface at most one 5-minute interval after they land on disk." - **Cron-fire turns are NOT user consent.** The prompt body that fires the scan is harness-origin text — see [scan-pending-tasks §Cron-Fire Origin](../commands/scan-pending-tasks.md#cron-fire-origin) for the load-bearing `[CRON-FIRE]` marker. Downstream consent-gated decisions (merge, push, destructive bash, plan approval, version bump, force-completion, any "act" requiring user authorization) MUST NOT proceed on the basis of a cron-fire turn. Acceptance (the canonical two-call pair invoked by the scan body) is NOT consent-gated; it is completion-authority procedure. -- **Arm and Teardown trigger sites.** [Arm](../commands/start-pending-scan.md#when-to-invoke) on first-active-task transition (PostToolUse hook on TaskCreate 0→1), on pending→in_progress re-Arm transition (PostToolUse hook on TaskUpdate), and on session-resume with active tasks (`session_init.py` 5_pre branch). [Teardown](../commands/stop-pending-scan.md#when-to-invoke) on last-active-task transition (PostToolUse hook on TaskUpdate 1→0), and on `/wrap-up` (which invokes [Skill("PACT:stop-pending-scan")](../commands/stop-pending-scan.md) as a hook-silent-fail safety net for the all-tasks-completed exit). -- **Single source of idempotency truth.** `CronList` exact-suffix-match against `/PACT:scan-pending-tasks` is the authoritative armed-state bit. There is no on-disk STATE_FILE; the platform's session-scoped cron store (`durable=false`) is in-memory only and auto-cleans on session exit. +- **Arm and Teardown trigger sites.** [Arm](../commands/start-pending-scan.md#when-to-invoke) on first-active-task transition (PostToolUse hook on TaskCreate 0→1), on pending→in_progress re-Arm transition (PostToolUse hook on TaskUpdate), and on session-resume with active tasks (`session_init.py` 5_pre branch). [Teardown](../commands/stop-pending-scan.md#when-to-invoke) on last-active-task transition (PostToolUse hook on TaskUpdate 1→0), on `/wrap-up` (which invokes [Skill("PACT:stop-pending-scan")](../commands/stop-pending-scan.md) as a hook-silent-fail safety net for the all-tasks-completed exit), and on cron-fire self-correcting teardown ([scan-pending-tasks.md §Operation Step 0.5](../commands/scan-pending-tasks.md#operation)) when a queued `teardown_request` event has not been processed by the lead within one 5-minute cron interval — bounds compliance latency to ≤1 cron interval (5min nominal, up to ~6min with typical jitter, 15min worst-case under platform load) regardless of `additionalContext`-directive handling. +- **Single source of idempotency truth.** `CronList` exact-suffix-match against `/PACT:scan-pending-tasks` is the authoritative armed-state bit. There is no on-disk STATE_FILE; the platform's session-scoped cron store (`durable=false`) is in-memory only and auto-cleans on session exit. Distinct from the armed-state bit, the journal triple `(scan_armed.ts, scan_disarmed.ts, teardown_request.ts)` carries derived pending-teardown lifecycle state read by [scan-pending-tasks.md §Operation Step 0.5](../commands/scan-pending-tasks.md#operation) — all three timestamps share the auto-stamped ISO-8601 UTC `ts` field from `session_journal.make_event` and are parsed uniformly via `strptime` to int-epoch before comparison. These are orthogonal questions ("is the cron going to fire again?" vs "is a pending teardown unprocessed?") and the two sources do not conflict. - **7-day platform expiry.** Recurring cron entries auto-expire after 7 days (one final fire, then auto-delete). PACT sessions usually do not span 7+ days; the limitation is documented and deferred to a v2 refresh-on-aging mechanism. ### Scan Discipline @@ -38,7 +38,7 @@ Implementation: a command-triple — [Skill("PACT:start-pending-scan")](../comma Five anti-hallucination guardrails live in [scan-pending-tasks.md §Guardrails](../commands/scan-pending-tasks.md#guardrails) and govern the cron-fired scan body. Each guardrail prevents a specific cascade failure mode; each appears VERBATIM in the skill body (paraphrase during PR review is silent regression, audit-anchored). The five guardrails are the architectural replacement for the prior session-level "no-act-on-Monitor-events" discipline — they convert that session-level safety pin into per-fire structural properties. - **Read-Filesystem-Only.** The scan reads task JSON from `~/.claude/tasks/{team_name}/{id}.json` via filesystem read. It does NOT read teammate `SendMessage` prose, does NOT infer task state from prior conversation context, does NOT compose acceptance based on imagined teammate output. The filesystem is the single source of truth. -- **No-Narration.** The scan emits NO user-facing prose narrating what it found, considered, skipped, or did. The only outputs are: (a) `SendMessage` to the teammate as part of the acceptance two-call pair, (b) `TaskUpdate(status="completed")`, or (c) nothing. +- **No-Narration.** The scan emits NO user-facing prose narrating what it found, considered, skipped, or did. The only outputs are: (a) `SendMessage` to the teammate as part of the acceptance two-call pair, (b) `TaskUpdate(status="completed")`, (c) `Skill("PACT:stop-pending-scan")` invocation when Step 0.5 self-correcting teardown fires, or (d) nothing. - **Raw-Read-Metadata.** The scan reads `metadata.teachback_submit` and `metadata.handoff` via RAW filesystem read of the task JSON file, NOT via `TaskGet`. `TaskGet` returns a metadata-stripped summary that does not include the canonical artifact fields. - **Race-Window-Skip.** If the raw filesystem read returns null or empty `metadata.teachback_submit` / `metadata.handoff` for a task in `awaiting_lead_completion` status, the scan SKIPS the task and emits nothing. The race window is the lag between the teammate's `TaskUpdate(metadata=...)` call landing in the in-memory store and the platform's filesystem flush. Empirically observed at 20+ seconds in adversarial conditions. The scan does NOT reject on null read; the next 5-minute cron fire re-evaluates. - **Emit-Nothing-If-Empty.** When the scan finds no candidate tasks, or all candidates are skipped per Race-Window-Skip, the scan emits NOTHING. Return to idle. Empty scans are the common case during active teammate work and must remain silent. diff --git a/pact-plugin/tests/runbooks/pending-scan-dogfood.md b/pact-plugin/tests/runbooks/pending-scan-dogfood.md index 29a56384..3fa4f7ba 100644 --- a/pact-plugin/tests/runbooks/pending-scan-dogfood.md +++ b/pact-plugin/tests/runbooks/pending-scan-dogfood.md @@ -167,9 +167,60 @@ grep -o '/PACT:scan-pending-tasks' \ **Acceptance**: At least one occurrence per file (per-file `grep -o ... | wc -l` returns `>= 1` for each of the 3 files). +## 12. Step 0.5 Self-Correcting Teardown — Empirical Verification + +This step empirically verifies the Option D self-correcting fallback path that catches orchestrator non-compliance with the `_TEARDOWN_DIRECTIVE` `additionalContext` channel. See architecture doc `docs/architecture/819-cron-self-teardown.md` and memory `a7bcd37f` for the architectural tension this section dogfoods. + +### 12a. Simulate Orchestrator Non-Compliance + +**Action**: With the cron armed (after Step 1) and a teammate task active, trigger a 1→0 transition (have the teammate complete the last active task). The `wake_lifecycle_emitter.py` PostToolUse hook will write a `teardown_request` event to the journal AND emit the `_TEARDOWN_DIRECTIVE` `additionalContext`. **As the orchestrator, do NOT act on the directive.** Continue with whatever non-teardown action would naturally follow (e.g., continue conversation with the user, run an unrelated tool call). + +**Expected**: +- The journal contains a `teardown_request` event with `ts` AFTER the latest `scan_armed.ts`. +- `CronList` still contains the `/PACT:scan-pending-tasks` entry (no Teardown has been invoked). +- No `scan_disarmed` event has been written. + +**Acceptance**: `python3 pact-plugin/hooks/shared/session_journal.py read-last --type teardown_request --session-dir ` returns a populated event with ISO `ts`. `CronList | grep -o '/PACT:scan-pending-tasks' | wc -l` returns `1`. + +### 12b. Wait for Next Cron-Fire (Journal-Event-Anchored) + +**Action**: Wait until the `scan_disarmed` event appears in the journal, typically within ~6 minutes (allow up to ~15 minutes worst-case under platform load). The next cron-fire of `/PACT:scan-pending-tasks` will execute Step 0.5. + +**Expected**: +- Step 0's warmup-grace check passes (more than 300s have elapsed since the latest `scan_armed`). +- Step 0.5's bash block computes: `LATEST_TEARDOWN_REQUEST > LATEST_SCAN_ARMED` is TRUE; `LATEST_SCAN_DISARMED` is empty (no prior disarm in this arm cycle); the condition fires; the bash exits 0. +- The scan-body LLM-side action: invoke `Skill("PACT:stop-pending-scan")` and return without continuing to Steps 1+. +- `stop-pending-scan` body executes: `CronList` lookup finds the match; `CronDelete` succeeds; `scan_disarmed` event is written. + +**Acceptance**: Once the `scan_disarmed` event appears in the journal (typically within ~6 minutes of 12a), `CronList | grep -o '/PACT:scan-pending-tasks' | wc -l` returns `0` AND `python3 pact-plugin/hooks/shared/session_journal.py read-last --type scan_disarmed --session-dir ` returns a populated event with `ts` greater than the `teardown_request.ts` (compared via strptime → epoch). + +### 12c. No-Narration Discipline Across Self-Teardown Fire + +**Action**: Observe the lead session transcript across the Step 0.5 fire from 12b. + +**Expected**: No prose lines like "Detected pending teardown, invoking stop…", "Self-correcting…", or any narrative output. The only user-visible output across the fire turn is the `Skill("PACT:stop-pending-scan")` invocation itself (which the scan body discipline allows per the No-Narration allowed-outputs list item (c) per §No-Narration in `scan-pending-tasks.md` and the §Scan Discipline mirror in `pact-communication-charter.md`). + +**Acceptance**: No status-narrating prose in the transcript across the self-teardown fire turn. + +### 12d. Idempotency Across Repeat Fires (Edge Case) + +**Action**: Re-arm the cron (spawn a new teammate task to trigger the Arm hook). Have the teammate complete to trigger another 1→0 transition (writes a new `teardown_request`). Wait for the next Step 0.5 fire and observe. + +**Expected**: Step 0.5 fires again (the new `teardown_request.ts` > new `scan_armed.ts`, both compared via strptime → epoch). The cycle is independent of the prior 12a-12c cycle; latest-event semantics select the most recent triple. No interference from prior cycle's events. + +**Acceptance**: Re-arm cycle's self-teardown fires correctly; `scan_disarmed` event written; cron deleted. + +### 12e. Strptime Format Coupling Verification + +**Action**: Inspect the ISO format literal in the Step 0.5 bash block. Compare against `session_journal.py` `make_event` line 325. + +**Expected**: Both contain the identical literal `"%Y-%m-%dT%H:%M:%SZ"`. Under #821's ts-unification, all three pending-scan events (scan_armed, scan_disarmed, teardown_request) carry only the auto-stamped `ts` field, and the Step 0 / Step 0.5 bash extractors parse all three via strptime against this literal. Any drift between this literal and make_event's format string would silently break the uniform-strptime parsing. + +**Acceptance**: `grep -c '%Y-%m-%dT%H:%M:%SZ' pact-plugin/commands/scan-pending-tasks.md` returns `>=2` (Step 0.5 bash extractor + audit prose both reference the literal by design). `grep -c '%Y-%m-%dT%H:%M:%SZ' pact-plugin/hooks/shared/session_journal.py` returns `>=1` (`make_event` and any callers contain it). The literal must match BYTE-IDENTICAL across both files. + ## Pass Criteria -All 11 steps pass independently. A single step failure fails the runbook and must be triaged before merging the next PACT release. +All 12 steps pass independently. A single step failure fails the runbook and must be triaged before merging the next PACT release. ## Failure Triage diff --git a/pact-plugin/tests/test_pending_scan_coupling_invariant.py b/pact-plugin/tests/test_pending_scan_coupling_invariant.py index 76de82fb..8284a079 100644 --- a/pact-plugin/tests/test_pending_scan_coupling_invariant.py +++ b/pact-plugin/tests/test_pending_scan_coupling_invariant.py @@ -31,10 +31,38 @@ - Mutating BOTH in lockstep to a new equal pair: test PASSES (invariant preserved); pinned per-side tests would also need updating. - Mutating BOTH but to a NON-equal pair (asymmetric drift): test FAILS. + +Strptime-not-string-compare coupling (post-#821 ts unification): + +Under #821's ts-unification (folded into PR #820), the three pending-scan +lifecycle events (`scan_armed`, `scan_disarmed`, `teardown_request`) all +carry only the auto-stamped ISO-8601 `ts` field; the prior integer-epoch +`armed_at`/`disarmed_at` fields are deleted. All three consumer sites +(scan-pending-tasks.md Step 0, Step 0.5, and wake_inbox_drain.py:685+ +producer-side idempotency check) parse `ts` uniformly via strptime to +int-epoch before comparing. + +The 4-site byte-identity coupling chain collapses to 2 sites: + + - session_journal.py `make_event` (upstream SSOT — stamps `ts` via + `strftime(\"%Y-%m-%dT%H:%M:%SZ\")`) + - the uniform-strptime extractor pattern across Step 0 / Step 0.5 / + wake_inbox_drain.py:685+ (consumers — parse `ts` via the same literal) + +The remaining structural defense in this file is the strptime-not-string- +compare invariant: the Python consumer at wake_inbox_drain.py:685+ MUST +parse `scan_armed.ts` and `scan_disarmed.ts` via strptime — direct +lexical string comparison would coincidentally match epoch ordering under +the canonical `%Y-%m-%dT%H:%M:%SZ` format but silently break under any +future format drift (sub-second fractions, mixed TZ suffixes, or a +fromisoformat switch on a Python 3.11+ baseline). This file's pin is +counterpart to scan-pending-tasks.md:66 audit prose's "direct lexical +comparison ... breaks silently under format drift" forecloseure. """ from __future__ import annotations +import ast import re from pathlib import Path @@ -43,6 +71,11 @@ ROOT = Path(__file__).resolve().parent.parent SCAN_MD = ROOT / "commands" / "scan-pending-tasks.md" START_MD = ROOT / "commands" / "start-pending-scan.md" +SESSION_JOURNAL_PY = ROOT / "hooks" / "shared" / "session_journal.py" +TEST_SELF_TEARDOWN_PY = ROOT / "tests" / "test_scan_pending_tasks_self_teardown.py" +TEST_COMMAND_STRUCTURE_PY = ROOT / "tests" / "test_scan_pending_tasks_command_structure.py" + +ISO_FORMAT_LITERAL_CANONICAL = "%Y-%m-%dT%H:%M:%SZ" def _extract_grace_seconds_from_step_0(scan_md_text: str) -> int: @@ -158,3 +191,387 @@ def test_cron_interval_is_positive_integer(): f"cron interval must be > 0 minutes; got {cron_minutes}. " f"`*/0` is invalid cron syntax." ) + + +def test_python_consumer_parses_ts_via_strptime_not_string_compare(): + """Code-shape invariant: the wake_inbox_drain.py producer-side + idempotency check MUST parse `scan_armed.ts` and `scan_disarmed.ts` + via strptime AND thread the result through `int(...timestamp())` + to produce an `_epoch` binding before comparing — direct lexical + string comparison of the ISO-8601 `ts` strings would coincidentally + match epoch ordering under the canonical `%Y-%m-%dT%H:%M:%SZ` + format but silently break under any format drift (sub-second + fractions, mixed TZ suffixes, or any future format relaxation + including a 3.11+ `fromisoformat` switch). + + This pin guards against the future-editor 'simplification': + + armed_ts = armed.get(\"ts\") + disarmed_ts = disarmed.get(\"ts\") + if armed_ts > disarmed_ts: # direct lex compare — silently broken under drift + print(_SUPPRESS_OUTPUT) + + The architecturally-correct shape (which this test pins) is: + + armed_ts = armed.get(\"ts\") + armed_epoch = int(datetime.strptime(armed_ts, _TS_FMT) + .replace(tzinfo=timezone.utc).timestamp()) + ... symmetric for disarmed ... + if armed_epoch > disarmed_epoch: + + USE-based AST Call-node pin (per secretary memory `fa044ba5`, + promoted to form-(c) in commit-l). Earlier iterations of this pin + walked a regression ladder: + + - Original presence-based pin: `count("strptime") >= 2` inside + the span. Phantom-greens any decoy `_ = datetime.strptime(...)` + because the decoy preserves the call count while the + comparator falls back to direct lex compare. + + - form-(b) result-binding regex (commit-i/j, superseded): + `\\w+_epoch\\s*=\\s*int\\s*\\(\\s*\\n?\\s*datetime\\.strptime\\([^)]*_TS_FMT` + against the span. Closed the bare-decoy vector but phantom- + greens any Row-4 mutant of the shape: + + armed_epoch = int(datetime.strptime(armed_ts, _TS_FMT) + .replace(tzinfo=timezone.utc).timestamp()) + armed_epoch = armed_ts # OVERWRITE — value discarded + ... + if armed_epoch > disarmed_epoch: # silent lex compare + + The regex matches the decoy result-binding line, satisfies + `>= 2`, and never inspects subsequent re-assignment of the + same `_epoch` Name. Empirically verified phantom-green + against the producer-side span via /tmp probe during the + commit-l fold (form-(b) regex returned 2 matches on a Row-4 + mutant where the comparator does string compare). + + - form-(c) AST Call-node pin (commit-l, current): parse the + whole module via `ast.parse`, locate the FunctionDef whose + body contains a `read_last_event()` Call to + BOTH `scan_armed` and `scan_disarmed`, find the Try node + whose body wraps both reads, then collect EVERY Assign / + AnnAssign within that Try whose target Name matches + `\\w+_epoch`. For each such assignment, classify the RHS: + + GOOD: `int(, _TS_FMT)>)` + — walks chained `.replace(...).timestamp()` attribute + Calls back to the strptime Call, asserts the function + is `datetime.strptime` and one arg is the Name + `_TS_FMT`. + + BAD : anything else, EXCEPT the fail-conservative literal + `_epoch = None` reset (allowed; the comparator + gates on `is not None` so a None reset is correct). + + The test PASSES iff: + (a) BOTH `armed_epoch` and `disarmed_epoch` have at least + one GOOD assignment inside the span. + (b) NO assignment to ANY `\\w+_epoch` target inside the + span has a BAD RHS that is not literal None. + + Condition (b) is what closes Row-4: a decoy GOOD assignment + followed by `armed_epoch = armed_ts` (Name RHS, not None + and not int(strptime(...))) triggers the BAD-overwrite + rejection. The hostile mutant cannot smuggle a string + comparator behind a decoy without leaving a BAD assignment + the AST walk catches. + + Strictly more general than the rejected hybrid regex + alternative (form-(b) + forbid `\\w+_epoch\\s*=\\s*\\w+_ts\\b`), + which only catches overwrites whose RHS Name has a `_ts` + suffix. Empirically falsified against a Row-4 mutant whose + overwrite used the arbitrary Name `armed_raw` (phantom-green + under hybrid, FAIL under form-(c)). + + Span localization (architect §10.3): the AST walk restricts to + assignments inside the Try whose body Calls `read_last_event` for + BOTH `scan_armed` and `scan_disarmed`. Docstrings / comments / + other code at module scope cannot satisfy the pin even if they + quote a result-binding shape verbatim (this docstring contains + `armed_epoch = int(datetime.strptime(...))` as illustrative + example — it is module-level prose, not inside the producer Try + AST, so does not contribute to the count). + + Counter-test-by-revert ladder (CUMULATIVE strip, each row + incorporates the prior; empirical verification recipe in + commit-l). Cardinality per row references what form-(c) detects. + + Row 1 (replace strptime CALLS with direct lex compare, no + decoys — naive future-editor 'simplification'): FAIL + because no GOOD `_epoch` assignments exist inside the span; + condition (a) violated. + + Row 2 (CUMULATIVE: lex compare AND add decoy + `_ = datetime.strptime(...,_TS_FMT)` references): FAIL + because `_` does not match `\\w+_epoch` — the decoy does not + even register with the AST walk. Condition (a) still + violated; presence-based pin would PHANTOM-GREEN here. + + Row 3 (CUMULATIVE: rename decoy LHS to `_epoch = + datetime.strptime(...,_TS_FMT)` WITHOUT the `int(...)` + wrapper): FAIL because the RHS is a bare strptime Call, not + the required `int(...)` outer Call; classified BAD, condition + (b) violated. + + Row 4 (CUMULATIVE: full GOOD decoy + `_epoch = int(datetime.strptime(...,_TS_FMT).timestamp())` + followed by overwrite `_epoch = _ts` and lex compare): + FAIL under form-(c) because the overwrite is classified BAD + (RHS is a Name, not None and not int(strptime(...))) and + condition (b) catches it. PHANTOM-GREEN under the prior + form-(b) regex — closing this is the commit-l contribution. + + Row 5 (residual, NOT closed by form-(c)): a mutant that + produces a GOOD assignment AND uses the resulting `_epoch` + name in the comparator, BUT the strptime call's first arg + is a SHADOW variable rebound to a different string before + the call: + + armed_ts = "1970-01-01T00:00:00Z" # shadow + armed_epoch = int(datetime.strptime(armed_ts, _TS_FMT) + .replace(tzinfo=timezone.utc).timestamp()) + + The AST walk classifies this GOOD because it does not + data-flow the arg back to `armed.get("ts")`. Closing Row 5 + requires full data-flow analysis (taint from `armed.get("ts")` + through the strptime arg) — out of scope for a structural + AST pin. F4-future-extension candidate if a Row-5-shape + mutant is observed in the wild; deferred as a residual. + + Span localization anchor stability (parallel to Row 5): the + span localization walk above keys on two string literals — the + function name `read_last_event` and the Constant arg + `"scan_armed"` (the first walk that finds the producer + FunctionDef; the inner Try walk also gates on both `"scan_armed"` + and `"scan_disarmed"`). If a future refactor renames either the + journal-read helper (e.g., `read_last_event` → + `read_latest_event_of_type`) or the lifecycle event-type strings, + the AST walk fails LOUD via the two asserts below — message text + explicitly names the expected function/Constant pair so the + future-editor's grep target is the assert message itself. + Alternative anchors considered: (i) a `_decide_and_emit`-style + FunctionDef-name anchor (more stable across journal-API rename + but couples to producer function name, which the same refactor + might also rename); (ii) a `# Producer-side deterministic Python + check` comment-block anchor (least stable — comment text drifts + freely). Current anchor is most-specific-to-producer-semantics + but co-fragile with journal-API name. Trade-off accepted: the + failure mode is fail-loud not silent, parallel to the Row-5 + decision. Closing this residual would require either a fallback + chain of alternative anchors (added complexity, additional code + paths to maintain) or coupling the test to a less-specific anchor + (loses Row-2/3/4 phantom-green resolution). Empirically verified + fail-loud-on-rename via cp-revert-restore probe. Deferred as a + residual; future-extension candidate if a journal-API rename + actually lands. + + What this test pins: the COMPOSITE invariant that EVERY `_epoch` + assignment inside the producer-side Try produces the load-bearing + int+strptime+_TS_FMT shape (or the fail-conservative None reset) + — the comparator pipeline cannot be silently rerouted around the + binding without leaving an AST violation. + + This test REPLACES the prior 4-site byte-identity coupling pin + (`test_iso_format_literal_byte_identical_across_step_0_5_coupling_sites`, + retired per architect §3.4 Q4 when #821's ts-unification reduced + the 4-site chain to 2 sites). Per-site presence pins + (`test_step_0_5_audit_prose_forbids_set_e_and_fromisoformat` below + and the structural pin in test_scan_pending_tasks_command_structure.py) + cover the format-literal presence side. The Python consumer code- + shape invariant — that's this test's contribution. + """ + src = (ROOT / "hooks" / "wake_inbox_drain.py").read_text(encoding="utf-8") + tree = ast.parse(src) + + # Span localization via AST: find the FunctionDef containing a + # `read_last_event("scan_armed")` Call, then the Try inside it whose + # body Calls `read_last_event` for both `scan_armed` and `scan_disarmed`. + # AST inspection (not source-string match) is robust against quote + # style, formatting, and unrelated future try/except blocks. + def _try_reads(node): + names = set() + for sub in ast.walk(node): + if (isinstance(sub, ast.Call) + and isinstance(sub.func, ast.Name) + and sub.func.id == "read_last_event" + and len(sub.args) == 1 + and isinstance(sub.args[0], ast.Constant)): + names.add(sub.args[0].value) + return names + + fn_node = None + for fn in ast.walk(tree): + if isinstance(fn, ast.FunctionDef) and "scan_armed" in _try_reads(fn): + fn_node = fn + break + assert fn_node is not None, ( + "wake_inbox_drain.py must contain a function whose body Calls " + "`read_last_event(\"scan_armed\")` — the producer-side idempotency " + "check anchor. The form-(c) AST pin cannot locate the span without it." + ) + + try_node = None + for node in ast.walk(fn_node): + if isinstance(node, ast.Try) and {"scan_armed", "scan_disarmed"}.issubset(_try_reads(node)): + if try_node is None or node.lineno < try_node.lineno: + try_node = node + assert try_node is not None, ( + "wake_inbox_drain.py producer-side function must wrap the " + "`read_last_event(\"scan_armed\")` / `read_last_event(\"scan_disarmed\")` " + "calls in a single Try block — the outer fail-conservative catch is " + "the load-bearing producer-side guard, and the form-(c) AST pin uses " + "this Try as the span." + ) + + # Classify the RHS of an `_epoch = ...` assignment. GOOD iff the + # RHS is `int(, _TS_FMT)>)`. The + # walk descends through `.replace(...).timestamp()` attribute-Call chains + # back to the strptime Call. + def _is_good_epoch_rhs(value): + if not (isinstance(value, ast.Call) + and isinstance(value.func, ast.Name) + and value.func.id == "int" + and len(value.args) == 1): + return False + cur = value.args[0] + while isinstance(cur, ast.Call) and isinstance(cur.func, ast.Attribute): + if (cur.func.attr == "strptime" + and isinstance(cur.func.value, ast.Name) + and cur.func.value.id == "datetime"): + return any(isinstance(a, ast.Name) and a.id == "_TS_FMT" + for a in cur.args) + cur = cur.func.value + return False + + # Collect every (Ann)Assign in the Try whose target is `\w+_epoch`. + epoch_name_re = re.compile(r"^\w*_epoch$") + epoch_assigns = [] # list of (lineno, target_id, is_good, is_none_reset) + for node in ast.walk(try_node): + if isinstance(node, ast.Assign): + for tgt in node.targets: + if isinstance(tgt, ast.Name) and epoch_name_re.match(tgt.id): + is_none = isinstance(node.value, ast.Constant) and node.value.value is None + epoch_assigns.append((node.lineno, tgt.id, _is_good_epoch_rhs(node.value), is_none)) + elif isinstance(node, ast.AnnAssign): + if (isinstance(node.target, ast.Name) + and epoch_name_re.match(node.target.id) + and node.value is not None): + is_none = isinstance(node.value, ast.Constant) and node.value.value is None + epoch_assigns.append((node.lineno, node.target.id, _is_good_epoch_rhs(node.value), is_none)) + + # Condition (b): no BAD non-None RHS in the span. Catches Row-4 mutants + # where a GOOD decoy is followed by `_epoch = _ts` overwrite. + violations = [(ln, nm) for ln, nm, good, none_reset in epoch_assigns + if not good and not none_reset] + assert not violations, ( + "wake_inbox_drain.py producer-side Try (the block wrapping " + "`read_last_event(\"scan_armed\")` / `read_last_event(\"scan_disarmed\")`) " + "contains `_epoch = ...` assignments whose RHS is neither " + "`int(datetime.strptime(, _TS_FMT)...)` nor the fail-conservative " + "literal `None` reset. This is the Row-4 phantom-green vector: a " + "decoy result-binding shape can satisfy a regex pin while the actual " + "comparator falls back to direct lex compare on the raw ts strings — " + "silently broken under any future ts format drift. Violations " + f"(lineno, target): {violations!r}. Restore the int(strptime(...)) " + "shape, or use literal `None` for the fail-conservative path." + ) + + # Condition (a): both armed_epoch and disarmed_epoch must have at least + # one GOOD assignment in the span (both sides of the comparator wired + # through strptime). + good_names = {nm for _, nm, good, _ in epoch_assigns if good} + missing = {"armed_epoch", "disarmed_epoch"} - good_names + assert not missing, ( + "wake_inbox_drain.py producer-side Try must contain at least one " + "`_epoch = int(datetime.strptime(, _TS_FMT)...)` " + "assignment for EACH of `armed_epoch` and `disarmed_epoch` — both " + "sides of the `armed_epoch > disarmed_epoch` comparator must be " + f"wired through strptime. Missing GOOD bindings for: {sorted(missing)}. " + f"Found GOOD bindings for: {sorted(good_names)}." + ) + + +def test_step_0_5_audit_prose_forbids_set_e_and_fromisoformat(): + """Step 0.5's audit prose contains a verbatim future-editor warning + forbidding two specific 'harmonizations' that would silently break + the self-correcting teardown: + + 1. `set -e` or `ERR` trap inside the Step 0.5 bash block: + empty-operand `-gt` would abort the cron-fire turn (because + bash's `-gt` with an empty string operand exits non-zero in + `set -e` mode), breaking fail-open semantics and reintroducing + the compliance gap Option D exists to close. + + 2. Switching the extractor from explicit-format `strptime` to + `fromisoformat` without verifying Python version baseline: + `fromisoformat` only handles the `Z` suffix in Python 3.11+. + A 3.10-or-earlier baseline would raise ValueError on + `make_event`'s `%Y-%m-%dT%H:%M:%SZ` shape, the extractor + would crash, the variable would yield empty, and Step 0.5 + would silently fail-open. + + Both warnings are load-bearing PROSE that encodes a DESIGN decision. + The coder's HANDOFF uncertainty[0] flagged this verbatim: 'Step 0.5's + verbatim audit prose contains a future-editor warning that forbids + set -e AND fromisoformat switch — these are architecturally + load-bearing for fail-open semantics + Python 3.7+ portability, + and I have preserved them verbatim, but future reviewers should + verify the prose has not drifted in any subsequent edit pass.' + + This is a documentation-in-code test (per the documentation-in-code- + tests pattern): it pins the future-editor warning so any subsequent + 'cleanup' that strips the warning fails loudly. Without this test, + a well-meaning future editor could remove the warning during a + 'simplify Step 0.5 prose' pass, then later add `set -e` for + 'consistency with orchestrate.md' — silently re-opening the + compliance gap. + + The test pins the structural shape of the warning (both forbiddens + must be present in the Step 0.5 audit prose), not the exact wording + — minor copy-edits are allowed; removal of either forbidden is not. + + Counter-test-by-revert: stripping either forbidden token from the + audit prose causes this test to fail with a specific message + identifying which forbidden is missing. Restoring the token via + `git checkout` makes the test pass again. Empirically validated at + fix/819-cron-self-teardown HEAD. + """ + scan_text = SCAN_MD.read_text(encoding="utf-8") + op_start = scan_text.find("\n## Operation") + assert op_start >= 0, "scan-pending-tasks.md missing §Operation" + step_0_5_pos = scan_text.find("\n0.5. ", op_start) + assert step_0_5_pos >= 0, "scan-pending-tasks.md §Operation missing Step 0.5" + # Bound Step 0.5 to the next numbered step. + step_1_pos = scan_text.find("\n1. ", step_0_5_pos) + step_0_5_body = scan_text[step_0_5_pos:step_1_pos] if step_1_pos > 0 else scan_text[step_0_5_pos:] + + # Forbidden #1: set -e / ERR trap warning must remain in the audit prose. + has_set_e_warning = "set -e" in step_0_5_body and "MUST NOT" in step_0_5_body + assert has_set_e_warning, ( + "Step 0.5 audit prose must contain a future-editor warning " + "FORBIDDING `set -e` (or ERR trap) inside the Step 0.5 bash block. " + "Without `set -e`, an empty-operand `-gt` comparison evaluates to " + "false (fail-open). WITH `set -e`, an empty-operand `-gt` aborts " + "the cron-fire turn — breaking fail-open and reintroducing the " + "compliance gap Option D exists to close. The warning prose is " + "load-bearing for future-editor hardening; removing it permits " + "a 'consistency with orchestrate.md' edit to silently break the " + "fix. Restore the verbatim warning per coder HANDOFF (PR #819 " + "commit cf5e8ac9)." + ) + + # Forbidden #2: fromisoformat switch warning must remain in the audit prose. + has_fromisoformat_warning = "fromisoformat" in step_0_5_body + assert has_fromisoformat_warning, ( + "Step 0.5 audit prose must contain a future-editor warning " + "FORBIDDING the `fromisoformat` switch without Python 3.11+ " + "baseline verification. `fromisoformat` only handles the `Z` " + "suffix in 3.11+; an earlier baseline raises ValueError on " + "make_event's `%Y-%m-%dT%H:%M:%SZ` shape, the extractor crashes, " + "the variable yields empty, and Step 0.5 silently fail-opens — " + "defeating the compliance fix. The explicit-format `strptime` is " + "portable to Python 3.7+; the switch warning protects portability. " + "Restore the verbatim warning per coder HANDOFF (PR #819 commit " + "cf5e8ac9)." + ) diff --git a/pact-plugin/tests/test_scan_pending_tasks_command_structure.py b/pact-plugin/tests/test_scan_pending_tasks_command_structure.py index 767b0f14..373176fd 100644 --- a/pact-plugin/tests/test_scan_pending_tasks_command_structure.py +++ b/pact-plugin/tests/test_scan_pending_tasks_command_structure.py @@ -349,6 +349,48 @@ def test_warmup_grace_step_0_present_in_operation(cmd_text): ) +def test_step_0_5_self_teardown_present_in_operation(cmd_text): + """§Operation must contain a Step 0.5 self-correcting teardown + check, anchored at literal "0.5. " numbering per architecture spec. + The block must reference `teardown_request`, `scan_armed`, and + `scan_disarmed` event types by name; must include the uniform- + strptime parsing of each event's auto-stamped `ts` field via the + format literal '%Y-%m-%dT%H:%M:%SZ' (matching + session_journal.make_event under #821's ts-unification); + must invoke Skill("PACT:stop-pending-scan") on fire (LLM-side prose). + """ + op_start = cmd_text.find("\n## Operation") + op_end = cmd_text.find("\n## ", op_start + 1) + op_section = cmd_text[op_start:op_end] if op_end > 0 else cmd_text[op_start:] + step_0_5_pos = op_section.find("\n0.5. ") + assert step_0_5_pos >= 0, ( + "§Operation must contain a Step 0.5 numbered marker (`\\n0.5. `) — " + "the self-correcting teardown check is anchored at Step 0.5 " + "numbering per architecture spec." + ) + step_1_pos = op_section.find("\n1. ", step_0_5_pos) + step_0_5_body = op_section[step_0_5_pos:step_1_pos] if step_1_pos > 0 else op_section[step_0_5_pos:] + for token in ("teardown_request", "scan_armed", "scan_disarmed"): + assert token in step_0_5_body, ( + f"Step 0.5 must reference `{token}` event type by name." + ) + assert "%Y-%m-%dT%H:%M:%SZ" in step_0_5_body, ( + "Step 0.5 must contain the ISO format literal `%Y-%m-%dT%H:%M:%SZ` " + "matching session_journal.make_event's strftime call — under " + "#821's ts-unification, all three pending-scan events " + "(scan_armed, scan_disarmed, teardown_request) carry only the " + "auto-stamped `ts` field, and the Step 0.5 bash extractors " + "parse all three via strptime against this literal. Drift " + "between this literal and make_event's format string breaks " + "the comparison silently." + ) + assert 'Skill("PACT:stop-pending-scan")' in step_0_5_body, ( + "Step 0.5 LLM-side prose must invoke Skill(\"PACT:stop-pending-scan\") " + "on bash exit-0 — the bash itself cannot invoke the skill; the " + "LLM-side action is the precedent established by wrap-up.md:98." + ) + + # ---------- Forbidden-token absence ---------- @pytest.mark.parametrize("forbidden_slug", [ diff --git a/pact-plugin/tests/test_scan_pending_tasks_self_teardown.py b/pact-plugin/tests/test_scan_pending_tasks_self_teardown.py new file mode 100644 index 00000000..03fcc52e --- /dev/null +++ b/pact-plugin/tests/test_scan_pending_tasks_self_teardown.py @@ -0,0 +1,582 @@ +""" +End-to-end timing test for the self-correcting teardown check in +scan-pending-tasks.md Step 0.5. Extracts the bash block verbatim from +the .md file (drift-proof SSOT) and exercises it against synthetic +journal files. + +What is verified: +- When the most recent `teardown_request.ts` (ISO-8601 UTC, parsed via + strptime to epoch) is AFTER the most recent `scan_armed.ts` AND no + `scan_disarmed.ts` after the teardown_request, Step 0.5 exits 0 + (fire — LLM-side invokes Skill("PACT:stop-pending-scan")). +- When a `scan_disarmed` event has already been written after the + teardown_request, Step 0.5 falls through (already serviced). +- When no `teardown_request` event exists, Step 0.5 falls through + (no pending teardown). +- Re-arm cycle (scan_armed → teardown_request → scan_disarmed → + scan_armed → teardown_request): the LATEST triple drives the + decision; Step 0.5 fires on the latest teardown_request. +- Fail-open contract: empty session dir / no scan_armed event causes + empty extractor output and gate falls through. +- Uniform-strptime parsing round-trips correctly across the canonical + `%Y-%m-%dT%H:%M:%SZ` format literal (coupling pair with + session_journal.py:325 make_event format) — under #821's + ts-unification, all three pending-scan event types + (teardown_request.ts, scan_armed.ts, scan_disarmed.ts) carry only + the auto-stamped `ts` field and are parsed uniformly via strptime + → int epoch for `-gt` comparison. + +The bash block is extracted from `commands/scan-pending-tasks.md` +Step 0.5 via a section-bounded search (`\\n0.5. ` to `\\n1. `) and +then the FIRST fenced ```bash``` block within that section. The shell +snippet uses `exit 0` to short-circuit on fire; we discriminate +between fire and fall-through by appending a sentinel `echo` after +the snippet — sentinel present in stdout means the snippet fell +through (no fire); sentinel absent means the snippet exited early +(fire path taken — LLM-side action would follow). +""" + +from __future__ import annotations + +import datetime +import json +import re +import subprocess +import time +from pathlib import Path + +import pytest + + +ROOT = Path(__file__).resolve().parent.parent +SCAN_MD = ROOT / "commands" / "scan-pending-tasks.md" +PLUGIN_ROOT = ROOT # `pact-plugin/` +SJ_PATH = PLUGIN_ROOT / "hooks" / "shared" / "session_journal.py" + + +# Coupling pair partner: this literal MUST equal session_journal.py:325 +# `make_event` ts format. Any drift between the two silently breaks the +# uniform-strptime parsing in Step 0.5 (the read returns a string the +# bash strptime call cannot match, yielding empty output and gate +# fall-through under the fail-open contract). +ISO_FORMAT_LITERAL = "%Y-%m-%dT%H:%M:%SZ" + + +def _extract_step_0_5_bash_block(scan_md_text: str) -> str: + """Extract the fenced ```bash``` block from §Operation Step 0.5. + + Returns the bash content with the markdown fence stripped. The + indentation prefix of each line (3 spaces, since the bash block is + nested under the numbered Step 0.5 list item) is also stripped. + """ + op_start = scan_md_text.find("\n## Operation") + assert op_start >= 0, "scan-pending-tasks.md missing §Operation section" + step_0_5_pos = scan_md_text.find("\n0.5. ", op_start) + assert step_0_5_pos >= 0, ( + "scan-pending-tasks.md §Operation missing Step 0.5 (`0.5. ` marker)" + ) + # Bound Step 0.5 to next numbered step (Step 1). + step_1_pos = scan_md_text.find("\n1. ", step_0_5_pos) + step_0_5_body = ( + scan_md_text[step_0_5_pos:step_1_pos] if step_1_pos > 0 + else scan_md_text[step_0_5_pos:] + ) + match = re.search(r"```bash\n(.*?)```", step_0_5_body, re.DOTALL) + assert match is not None, ( + "Step 0.5 must contain a fenced ```bash``` block — the extractor " + "is the SSOT and drift-proof contract between Step 0.5's prose " + "and this test." + ) + raw = match.group(1) + lines = raw.splitlines() + dedented = [ln[3:] if ln.startswith(" ") else ln for ln in lines] + return "\n".join(dedented) + + +@pytest.fixture(scope="module") +def step_0_5_bash_template() -> str: + """The Step 0.5 bash block, with `{plugin_root}` and `{session_dir}` + template tokens preserved verbatim.""" + return _extract_step_0_5_bash_block(SCAN_MD.read_text(encoding="utf-8")) + + +def _render_step_0_5(template: str, plugin_root: Path, session_dir: Path) -> str: + """Render the Step 0.5 bash by substituting `{plugin_root}` and + `{session_dir}`. These tokens are platform-rendered at fire time; + we mimic that substitution in the test harness.""" + return template.replace("{plugin_root}", str(plugin_root)).replace( + "{session_dir}", str(session_dir) + ) + + +def _iso_ts(epoch_seconds: int) -> str: + """Render an epoch as ISO-8601 UTC matching make_event's format. + + The format literal is byte-coupled to session_journal.py:325 and to + the strptime literal in scan-pending-tasks.md Step 0.5. See + ISO_FORMAT_LITERAL above. + """ + return datetime.datetime.fromtimestamp( + epoch_seconds, tz=datetime.timezone.utc + ).strftime(ISO_FORMAT_LITERAL) + + +def _write_journal(session_dir: Path, event_type: str, payload: dict) -> Path: + """Append a single JSONL event to the session journal. Uses a + correctly-formatted `%Y-%m-%dT%H:%M:%SZ` `ts` matching make_event; + callers may override `ts` in `payload` for ISO-timestamp tests. + + This helper is intentionally independent from the one in + test_scan_pending_tasks_warmup_grace.py — its `ts` value is + deliberately distinct (matches make_event format vs. the older + test's `+00:00` shape) so the Step 0.5 uniform-strptime parsing + is exercised against the canonical on-disk shape. + """ + session_dir.mkdir(parents=True, exist_ok=True) + journal = session_dir / "session-journal.jsonl" + record = { + "v": 1, + "type": event_type, + "ts": _iso_ts(int(time.time())), + } + record.update(payload) + with journal.open("a", encoding="utf-8") as f: + f.write(json.dumps(record) + "\n") + return journal + + +def _write_teardown_request(session_dir: Path, epoch: int) -> Path: + """Convenience: write a `teardown_request` event with `ts` stamped + to the supplied epoch in canonical ISO-8601 UTC format. This + matches the on-disk shape produced by + `teardown_request_emitter.py` and `wake_inbox_drain.py` (both rely + on `session_journal.make_event` for the `ts` field).""" + return _write_journal(session_dir, "teardown_request", { + "task_id": "fake-1", + "team_name": "fake-team", + "ts": _iso_ts(epoch), + }) + + +def _run_step_0_5(bash_body: str) -> subprocess.CompletedProcess: + """Run the Step 0.5 bash block with a sentinel echo appended. + + Discriminator: if the snippet `exit 0`-fires, stdout will NOT + contain SENTINEL. If the snippet falls through, stdout WILL + contain SENTINEL. + """ + sentinel = "STEP_0_5_FELL_THROUGH" + script = bash_body + f'\necho "{sentinel}"\n' + return subprocess.run( + ["bash", "-c", script], + capture_output=True, + text=True, + timeout=10, + ) + + +SENTINEL = "STEP_0_5_FELL_THROUGH" + + +def test_step_0_5_fires_when_teardown_request_after_arm( + tmp_path, step_0_5_bash_template +): + """Pending-teardown happy path: a `teardown_request` was written + AFTER the latest `scan_armed.ts` and BEFORE any `scan_disarmed`. + Step 0.5 must fire (sentinel absent). + """ + session_dir = tmp_path / "session" + now = int(time.time()) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(now - 200)}) + _write_teardown_request(session_dir, now - 100) + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + result = _run_step_0_5(bash_body) + + assert result.returncode == 0, ( + f"Step 0.5 exit code expected 0, got {result.returncode}. " + f"stderr={result.stderr!r}" + ) + assert SENTINEL not in result.stdout, ( + f"Step 0.5 should have FIRED on teardown_request(now-100) > " + f"scan_armed(now-200) with no scan_disarmed. Sentinel " + f"{SENTINEL!r} present in stdout indicates the gate fell " + f"through. stdout={result.stdout!r}" + ) + + +def test_step_0_5_does_not_fire_when_disarm_after_teardown_request( + tmp_path, step_0_5_bash_template +): + """The teardown has already been serviced: `scan_disarmed.ts` > + `teardown_request.ts`. Step 0.5 must fall through (sentinel + present) — stop-pending-scan has already run. + """ + session_dir = tmp_path / "session" + now = int(time.time()) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(now - 300)}) + _write_teardown_request(session_dir, now - 200) + _write_journal(session_dir, "scan_disarmed", {"ts": _iso_ts(now - 100)}) + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + result = _run_step_0_5(bash_body) + + assert result.returncode == 0, ( + f"Step 0.5 exit code expected 0, got {result.returncode}. " + f"stderr={result.stderr!r}" + ) + assert SENTINEL in result.stdout, ( + f"Step 0.5 should have FALLEN THROUGH (already serviced) — " + f"scan_disarmed(now-100) > teardown_request(now-200). Sentinel " + f"{SENTINEL!r} absent in stdout indicates Step 0.5 fired " + f"spuriously. stdout={result.stdout!r}" + ) + + +def test_step_0_5_does_not_fire_when_no_teardown_request( + tmp_path, step_0_5_bash_template +): + """No teardown_request event in journal: Step 0.5 must fall through + (sentinel present). `[ -n "$LATEST_TEARDOWN_REQUEST" ]` guard + closes the gate when the extractor yields empty string. + """ + session_dir = tmp_path / "session" + now = int(time.time()) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(now - 100)}) + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + result = _run_step_0_5(bash_body) + + assert result.returncode == 0, ( + f"Step 0.5 exit code expected 0, got {result.returncode}. " + f"stderr={result.stderr!r}" + ) + assert SENTINEL in result.stdout, ( + f"Step 0.5 should have FALLEN THROUGH (no teardown_request) — " + f"empty LATEST_TEARDOWN_REQUEST should close the `-n` guard. " + f"Sentinel {SENTINEL!r} absent in stdout indicates the gate " + f"fired spuriously. stdout={result.stdout!r}" + ) + + +def test_step_0_5_fires_on_re_arm_cycle(tmp_path, step_0_5_bash_template): + """Re-arm cycle: an older (scan_armed, teardown_request, + scan_disarmed) triple was serviced; then a new scan_armed + landed; then a new teardown_request. Step 0.5 must fire on the + LATEST teardown_request (latest-vs-latest semantics in the + `read-last` reader contract). + + Sequence: + now-1000: scan_armed (old) + now-900: teardown_request (old) + now-800: scan_disarmed (old — serviced) + now-700: scan_armed (new) + now-100: teardown_request (new — unserviced) + """ + session_dir = tmp_path / "session" + now = int(time.time()) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(now - 1000)}) + _write_teardown_request(session_dir, now - 900) + _write_journal(session_dir, "scan_disarmed", {"ts": _iso_ts(now - 800)}) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(now - 700)}) + _write_teardown_request(session_dir, now - 100) + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + result = _run_step_0_5(bash_body) + + assert result.returncode == 0, ( + f"Step 0.5 exit code expected 0, got {result.returncode}. " + f"stderr={result.stderr!r}" + ) + assert SENTINEL not in result.stdout, ( + f"Step 0.5 should have FIRED on the LATEST triple: " + f"teardown_request(now-100) > scan_armed(now-700) > " + f"scan_disarmed(now-800). Sentinel {SENTINEL!r} present in " + f"stdout indicates the gate is reading stale events — the " + f"reader is no longer returning the most-recent event. " + f"stdout={result.stdout!r}" + ) + + +def test_step_0_5_falls_through_when_no_journal(tmp_path, step_0_5_bash_template): + """Fail-open contract: empty session dir (no journal file). + `read-last` returns null on missing journal → all three + extractors yield empty string → `-n` guards close → gate falls + through. + """ + session_dir = tmp_path / "session" + session_dir.mkdir(parents=True, exist_ok=True) + # No journal file written. + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + result = _run_step_0_5(bash_body) + + assert result.returncode == 0, ( + f"Step 0.5 exit code expected 0, got {result.returncode}. " + f"stderr={result.stderr!r}" + ) + assert SENTINEL in result.stdout, ( + f"Step 0.5 should fall through on missing journal file " + f"(fail-open contract). Sentinel {SENTINEL!r} absent in " + f"stdout indicates the gate fired spuriously on empty " + f"extractors. stdout={result.stdout!r}" + ) + # Stderr-cleanliness invariant: the `[ -n "$VAR" ]` guards are what + # make the empty-extractor case fall through cleanly. If a future + # editor strips a guard, the bash `-gt` test would receive an empty + # operand and emit `[: : integer expected` to stderr — the gate + # would STILL fall through (because the failed `-gt` evaluates to + # false), so the sentinel-in-stdout assertion would not catch the + # regression. Pinning stderr cleanliness catches the guard-strip + # case at the structural-invariant level. + assert "integer expected" not in result.stderr, ( + f"Step 0.5 fall-through path must be stderr-clean — the " + f"`[ -n \"$VAR\" ]` guards close BEFORE the integer `-gt` " + f"comparison reaches empty operands. Presence of `integer " + f"expected` in stderr indicates a guard was stripped and the " + f"`-gt` test ran with an empty operand. stderr={result.stderr!r}" + ) + + +def test_step_0_5_falls_through_when_only_teardown_request_no_arm( + tmp_path, step_0_5_bash_template +): + """Defensive: a teardown_request exists but no scan_armed event. + `[ -n "$LATEST_SCAN_ARMED" ]` guard closes the gate. Step 0.5 + must fall through (the cron should not have been armed without a + corresponding scan_armed event; if it is, falling through is the + conservative choice). + """ + session_dir = tmp_path / "session" + now = int(time.time()) + _write_teardown_request(session_dir, now - 100) + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + result = _run_step_0_5(bash_body) + + assert result.returncode == 0, ( + f"Step 0.5 exit code expected 0, got {result.returncode}. " + f"stderr={result.stderr!r}" + ) + assert SENTINEL in result.stdout, ( + f"Step 0.5 should fall through on missing scan_armed event " + f"(defensive guard against ill-formed journal state). " + f"Sentinel {SENTINEL!r} absent in stdout indicates the gate " + f"fired without an arm anchor. stdout={result.stdout!r}" + ) + # Stderr-cleanliness invariant: see test_step_0_5_falls_through_ + # when_no_journal for the full rationale. This test exercises the + # `[ -n "$LATEST_SCAN_ARMED" ]` guard specifically — stripping it + # would let the `-gt` comparison receive empty `$LATEST_SCAN_ARMED` + # and emit `integer expected` to stderr, without changing the + # sentinel-in-stdout outcome. + assert "integer expected" not in result.stderr, ( + f"Step 0.5 fall-through path must be stderr-clean — the " + f"`[ -n \"$LATEST_SCAN_ARMED\" ]` guard closes BEFORE the " + f"integer `-gt` comparison reaches an empty operand. Presence " + f"of `integer expected` in stderr indicates the guard was " + f"stripped. stderr={result.stderr!r}" + ) + + +def test_step_0_5_strptime_round_trip_uniform_across_three_sources( + tmp_path, step_0_5_bash_template +): + """Uniform-strptime coupling contract: under #821's ts-unification, + all three Step 0.5 extractors (`teardown_request.ts`, + `scan_armed.ts`, `scan_disarmed.ts`) parse the auto-stamped + ISO-8601 UTC `ts` field via the same strptime literal + `%Y-%m-%dT%H:%M:%SZ` and yield int epochs for `-gt` comparison. + This pins the byte-identity contract between + session_journal.make_event's format string and the Step 0.5 + extractor literal — any drift in either makes the comparison + silently fail. + + Verifies the round-trip across all three sources: arrange a journal + with deterministic anchors where the latest `scan_disarmed` is + strictly newer than the latest `teardown_request` (which is + strictly newer than `scan_armed`). Step 0.5 must FALL THROUGH + because `scan_disarmed.ts > teardown_request.ts` — i.e., the + teardown is already serviced. This exercises strptime on all three + extractors (all three must parse without raising for the gate + decision to be correct). + + Counter-test-by-revert: changing the format literal in either the + .md or session_journal.py without updating the other would cause + strptime to raise (visible in stderr) or return wrong epoch + (sentinel position would flip — the gate would fire instead of + falling through). + """ + session_dir = tmp_path / "session" + # Use a deterministic anchor epoch to make the round-trip explicit + # and reproducible across test runs. Three timestamps spanning a + # 2-second window exercise all three extractors with byte-distinct + # ts strings. + anchor_epoch = int(datetime.datetime( + 2026, 5, 21, 8, 54, 27, tzinfo=datetime.timezone.utc + ).timestamp()) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(anchor_epoch)}) + _write_teardown_request(session_dir, anchor_epoch + 1) + _write_journal(session_dir, "scan_disarmed", {"ts": _iso_ts(anchor_epoch + 2)}) + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + result = _run_step_0_5(bash_body) + + assert result.returncode == 0, ( + f"Step 0.5 exit code expected 0, got {result.returncode}. " + f"stderr={result.stderr!r}" + ) + assert SENTINEL in result.stdout, ( + f"Step 0.5 should have FALLEN THROUGH — scan_disarmed.ts " + f"(anchor+2s) > teardown_request.ts (anchor+1s) > " + f"scan_armed.ts (anchor). All three extractors must parse the " + f"auto-stamped `ts` field uniformly via strptime to produce " + f"int epochs that satisfy the already-serviced branch. " + f"Sentinel {SENTINEL!r} absent in " + f"stdout indicates one of the strptime conversions failed " + f"(format literal mismatch with session_journal.make_event) " + f"or returned a wrong epoch. stdout={result.stdout!r} " + f"stderr={result.stderr!r}" + ) + + +def test_step_0_5_double_fire_before_disarm_is_idempotent( + tmp_path, step_0_5_bash_template +): + """Double-disarm idempotency contract: multiple consecutive cron-fires + hitting Step 0.5 BEFORE `stop-pending-scan` has written `scan_disarmed` + must each fire (exit 0) deterministically. The architecture spec's + audit prose (scan-pending-tasks.md Step 0.5 line 62) claims: + + Multiple consecutive cron-fires hitting this branch before + `stop-pending-scan` completes write multiple `scan_disarmed` + events — benign; the latest dominates. + + The doc-only claim covers the post-disarm-write side (latest dominates), + but doesn't pin the PRE-disarm side: between the LLM-side action firing + `Skill("PACT:stop-pending-scan")` and that skill actually completing + its `scan_disarmed` write, additional cron fires can land in the same + (scan_armed, teardown_request) window with NO `scan_disarmed` event + yet written. Each such fire must deterministically choose to fire + Step 0.5 (not flap to fall-through). + + This test exercises two consecutive Step 0.5 evaluations on the + SAME journal state (no events added between them; the bash is purely + a function of the latest-event triple). Both must fire and produce + byte-identical sentinel-absent output. The test would catch: + + - Any subtle non-determinism in the bash (e.g., a future edit that + introduces a `read-last`-and-mutate pattern would diverge across + calls). + - Any side effect from one invocation that affects the next + (e.g., a future edit that touches a marker file before exit). + - Drift in the latest-event read semantics that would yield + different results across consecutive same-input reads. + + Counter-test-by-revert: inserting a side-effect (e.g., `touch + /tmp/step-0-5-already-fired; [ -e /tmp/step-0-5-already-fired ] && + exit 1`) into Step 0.5's bash would cause the second invocation to + fail-through (sentinel present). This test catches that exact + regression pattern. + + This is the documentation-in-code partner to the audit-prose claim; + making the property falsifiable. Per + feedback_documentation_in_code_tests memory. + """ + session_dir = tmp_path / "session" + now = int(time.time()) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(now - 200)}) + _write_teardown_request(session_dir, now - 100) + # NOTE: No scan_disarmed event written yet — Step 0.5 must fire each time. + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + + # First fire — should fire Step 0.5 (sentinel absent). + result1 = _run_step_0_5(bash_body) + assert result1.returncode == 0, ( + f"First Step 0.5 invocation exit code expected 0, got " + f"{result1.returncode}. stderr={result1.stderr!r}" + ) + assert SENTINEL not in result1.stdout, ( + f"First Step 0.5 invocation should have FIRED. " + f"stdout={result1.stdout!r}" + ) + + # Second fire — same journal state, no scan_disarmed yet — must + # ALSO fire Step 0.5 (sentinel absent). + result2 = _run_step_0_5(bash_body) + assert result2.returncode == 0, ( + f"Second Step 0.5 invocation exit code expected 0, got " + f"{result2.returncode}. stderr={result2.stderr!r}" + ) + assert SENTINEL not in result2.stdout, ( + f"Second Step 0.5 invocation on UNCHANGED journal state should " + f"have FIRED again (idempotent decision on same input). Sentinel " + f"{SENTINEL!r} present in stdout indicates Step 0.5 is no longer " + f"a pure function of latest-event-triple state — a regression " + f"that would break the double-disarm-is-benign architectural " + f"claim (scan-pending-tasks.md Step 0.5 audit prose line ~62). " + f"stdout={result2.stdout!r}" + ) + + # Determinism check: both invocations must produce byte-identical + # stdout/stderr/returncode given identical inputs. A divergence here + # would surface non-deterministic state-mutation introduced by a + # future edit. + assert result1.stdout == result2.stdout, ( + f"Step 0.5 must produce deterministic output on identical inputs. " + f"First stdout={result1.stdout!r}; second stdout={result2.stdout!r}" + ) + assert result1.returncode == result2.returncode, ( + f"Step 0.5 must produce deterministic exit codes on identical " + f"inputs. First rc={result1.returncode}; second rc={result2.returncode}" + ) + + +def test_step_0_5_falls_through_on_same_second_teardown_request_and_scan_armed( + tmp_path, step_0_5_bash_template +): + """Pin the `-gt` (strict greater-than) semantic for the same-second + edge case: when `teardown_request.ts` and `scan_armed.ts` land at + the same epoch second, `LATEST_TEARDOWN_REQUEST -gt + LATEST_SCAN_ARMED` must evaluate FALSE (because `N -gt N` is + false in bash) and Step 0.5 must fall through. + + Architect rationale (§9.1): equality is conservative — a teardown + that landed in the same wall-clock second as the arm is treated + as part of the arm cycle rather than a pending-teardown signal; + if the teardown is real, the next cron-fire (5+ minutes later) + will catch it via a later `teardown_request` write. + + Counter-test-by-revert: mutating bash `-gt` to `-ge` in the Step + 0.5 block flips this test (sentinel would be absent — the gate + would fire on the same-second case, violating the conservative + contract). Restoring `-gt` makes the test pass again. This pins + the operator choice against a future-editor 'consistency with + Step 0's `[ $delta -ge 0 ]`' temptation. + """ + session_dir = tmp_path / "session" + # Use a deterministic anchor epoch for byte-stable reproduction + # across test runs; the test does not depend on real wall-clock + # time. + anchor_epoch = int(datetime.datetime( + 2026, 5, 22, 2, 0, 0, tzinfo=datetime.timezone.utc + ).timestamp()) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(anchor_epoch)}) + _write_teardown_request(session_dir, anchor_epoch) # same second + + bash_body = _render_step_0_5(step_0_5_bash_template, PLUGIN_ROOT, session_dir) + result = _run_step_0_5(bash_body) + + assert result.returncode == 0, ( + f"Step 0.5 exit code expected 0, got {result.returncode}. " + f"stderr={result.stderr!r}" + ) + assert SENTINEL in result.stdout, ( + f"Step 0.5 should have FALLEN THROUGH on same-second equality " + f"(teardown_request(anchor) == scan_armed(anchor)). `-gt` is " + f"strict; equality must NOT fire the gate. Sentinel {SENTINEL!r} " + f"absent in stdout indicates the operator was loosened to `-ge` " + f"or equivalent, violating the conservative-equality contract. " + f"stdout={result.stdout!r} stderr={result.stderr!r}" + ) diff --git a/pact-plugin/tests/test_scan_pending_tasks_warmup_grace.py b/pact-plugin/tests/test_scan_pending_tasks_warmup_grace.py index d18a7c43..717bcfd0 100644 --- a/pact-plugin/tests/test_scan_pending_tasks_warmup_grace.py +++ b/pact-plugin/tests/test_scan_pending_tasks_warmup_grace.py @@ -4,9 +4,9 @@ SSOT) and exercises it against synthetic journal files. What is verified: -- When the most recent `scan_armed` event's `armed_at` is within the - warmup-grace window (now - 30s), the bash block exits 0 and short- - circuits — no further stdout. +- When the most recent `scan_armed` event's `ts` (ISO-8601 UTC, parsed + via strptime to epoch) is within the warmup-grace window (now - 30s), + the bash block exits 0 and short-circuits — no further stdout. - When the most recent `scan_armed` event is outside the grace window (now - 300s), the bash block falls through Step 0 — the gate exits 0 without short-circuiting. @@ -30,6 +30,7 @@ from __future__ import annotations +import datetime import json import os import re @@ -99,15 +100,33 @@ def _render_step_0(template: str, plugin_root: Path, session_dir: Path) -> str: ) +# Coupling pair partner: this literal MUST equal session_journal.py:325 +# `make_event` ts format. Any drift between the two silently breaks the +# strptime conversion in Step 0 (the read returns a string the bash +# integer comparison cannot parse). +ISO_FORMAT_LITERAL = "%Y-%m-%dT%H:%M:%SZ" + + +def _iso_ts(epoch_seconds: int) -> str: + """Render an epoch as ISO-8601 UTC matching make_event's format + literal (`%Y-%m-%dT%H:%M:%SZ`).""" + return datetime.datetime.fromtimestamp( + epoch_seconds, tz=datetime.timezone.utc + ).strftime(ISO_FORMAT_LITERAL) + + def _write_journal(session_dir: Path, event_type: str, payload: dict) -> Path: """Append a single JSONL event to the session journal, matching - the on-disk shape session_journal.py produces.""" + the on-disk shape session_journal.py produces. The default `ts` + uses make_event's canonical `%Y-%m-%dT%H:%M:%SZ` format; callers + may override `ts` in `payload` to control the timestamp the + Step 0 strptime-based extractor will consume.""" session_dir.mkdir(parents=True, exist_ok=True) journal = session_dir / "session-journal.jsonl" record = { "v": 1, "type": event_type, - "ts": "2026-05-15T00:00:00+00:00", + "ts": _iso_ts(int(time.time())), } record.update(payload) with journal.open("a", encoding="utf-8") as f: @@ -136,9 +155,9 @@ def _run_step_0(bash_body: str) -> subprocess.CompletedProcess: def test_step_0_skips_within_grace_window(tmp_path, step_0_bash_template): - """When `armed_at = now - (WARMUP_GRACE_SECONDS - 150)s` (well inside - the warmup-grace window), Step 0 must exit 0 without falling through. - Discriminator: sentinel NOT in stdout. + """When `scan_armed.ts = now - (WARMUP_GRACE_SECONDS - 150)s` (well + inside the warmup-grace window), Step 0 must exit 0 without falling + through. Discriminator: sentinel NOT in stdout. Threshold is parametrically derived from WARMUP_GRACE_SECONDS so a future bump of the constant retains the in/out semantic relationship @@ -150,11 +169,11 @@ def test_step_0_skips_within_grace_window(tmp_path, step_0_bash_template): """ session_dir = tmp_path / "session" elapsed = WARMUP_GRACE_SECONDS - 150 - armed_at = int(time.time()) - elapsed + armed_epoch = int(time.time()) - elapsed assert elapsed < WARMUP_GRACE_SECONDS, ( f"Test fixture invariant: elapsed={elapsed} < WARMUP_GRACE_SECONDS={WARMUP_GRACE_SECONDS}" ) - _write_journal(session_dir, "scan_armed", {"armed_at": armed_at}) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(armed_epoch)}) bash_body = _render_step_0(step_0_bash_template, PLUGIN_ROOT, session_dir) result = _run_step_0(bash_body) @@ -164,7 +183,7 @@ def test_step_0_skips_within_grace_window(tmp_path, step_0_bash_template): f"stderr={result.stderr!r}" ) assert SENTINEL not in result.stdout, ( - f"Step 0 should have SHORT-CIRCUITED on armed_at=now-{elapsed} " + f"Step 0 should have SHORT-CIRCUITED on scan_armed.ts=now-{elapsed} " f"(elapsed={elapsed} < WARMUP_GRACE_SECONDS={WARMUP_GRACE_SECONDS}). " f"Sentinel {SENTINEL!r} present in stdout indicates Step 0 fell " f"through. stdout={result.stdout!r}" @@ -172,9 +191,9 @@ def test_step_0_skips_within_grace_window(tmp_path, step_0_bash_template): def test_step_0_falls_through_outside_grace_window(tmp_path, step_0_bash_template): - """When `armed_at = now - (WARMUP_GRACE_SECONDS + 120)s` (well outside - the warmup-grace window), Step 0 must fall through. Discriminator: - sentinel IN stdout. + """When `scan_armed.ts = now - (WARMUP_GRACE_SECONDS + 120)s` (well + outside the warmup-grace window), Step 0 must fall through. + Discriminator: sentinel IN stdout. Threshold is parametrically derived from WARMUP_GRACE_SECONDS for the same reason as the inside-window test. @@ -185,11 +204,11 @@ def test_step_0_falls_through_outside_grace_window(tmp_path, step_0_bash_templat """ session_dir = tmp_path / "session" elapsed = WARMUP_GRACE_SECONDS + 120 - armed_at = int(time.time()) - elapsed + armed_epoch = int(time.time()) - elapsed assert elapsed > WARMUP_GRACE_SECONDS, ( f"Test fixture invariant: elapsed={elapsed} > WARMUP_GRACE_SECONDS={WARMUP_GRACE_SECONDS}" ) - _write_journal(session_dir, "scan_armed", {"armed_at": armed_at}) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(armed_epoch)}) bash_body = _render_step_0(step_0_bash_template, PLUGIN_ROOT, session_dir) result = _run_step_0(bash_body) @@ -199,7 +218,7 @@ def test_step_0_falls_through_outside_grace_window(tmp_path, step_0_bash_templat f"stderr={result.stderr!r}" ) assert SENTINEL in result.stdout, ( - f"Step 0 should have FALLEN THROUGH on armed_at=now-{elapsed} " + f"Step 0 should have FALLEN THROUGH on scan_armed.ts=now-{elapsed} " f"(elapsed={elapsed} > WARMUP_GRACE_SECONDS={WARMUP_GRACE_SECONDS}). " f"Sentinel {SENTINEL!r} absent in stdout indicates Step 0 " f"short-circuited. stdout={result.stdout!r}" @@ -254,22 +273,22 @@ def test_step_0_falls_through_when_no_journal(tmp_path, step_0_bash_template): def test_step_0_falls_through_on_negative_delta(tmp_path, step_0_bash_template): - """When `armed_at` is in the future (clock skew, journal corruption, - or adversarial write), the delta `(now - armed_at)` is negative. - Without the `[ $delta -ge 0 ]` guard, `negative -lt 300` would - always be true and the gate would short-circuit indefinitely — + """When `scan_armed.ts` is in the future (clock skew, journal + corruption, or adversarial write), the delta `(now - armed_epoch)` + is negative. Without the `[ $delta -ge 0 ]` guard, `negative -lt 300` + would always be true and the gate would short-circuit indefinitely — becoming a kill-switch for the scan. With the guard, the gate falls through to the scan body on every negative-delta fire, preserving fail-open semantics. Counter-test-by-revert: removing `[ $delta -ge 0 ] &&` from the Step 0 bash flips this test (Step 0 would short-circuit on the - future-armed_at fixture and the sentinel would be absent from + future-ts fixture and the sentinel would be absent from stdout). """ session_dir = tmp_path / "session" - armed_at = int(time.time()) + 100 - _write_journal(session_dir, "scan_armed", {"armed_at": armed_at}) + armed_epoch = int(time.time()) + 100 + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(armed_epoch)}) bash_body = _render_step_0(step_0_bash_template, PLUGIN_ROOT, session_dir) result = _run_step_0(bash_body) @@ -279,7 +298,7 @@ def test_step_0_falls_through_on_negative_delta(tmp_path, step_0_bash_template): f"stderr={result.stderr!r}" ) assert SENTINEL in result.stdout, ( - f"Step 0 should fall through on future-dated armed_at (delta < 0). " + f"Step 0 should fall through on future-dated scan_armed.ts (delta < 0). " f"Sentinel {SENTINEL!r} absent in stdout indicates Step 0 short-" f"circuited on the negative-delta edge case — the kill-switch " f"failure mode the `-ge 0` guard prevents. stdout={result.stdout!r}" @@ -288,27 +307,27 @@ def test_step_0_falls_through_on_negative_delta(tmp_path, step_0_bash_template): def test_step_0_uses_latest_when_multiple_scan_armed_events(tmp_path, step_0_bash_template): """When the journal contains multiple `scan_armed` events (e.g., the - session re-armed after a stop/start cycle, leaving an old armed_at - plus a recent armed_at), Step 0 MUST consume the MOST RECENT one. + session re-armed after a stop/start cycle, leaving an old ts plus a + recent ts), Step 0 MUST consume the MOST RECENT one. `session_journal.py:_read_last_event_at` is implemented as a reverse scan returning the first match — i.e., the latest event by file order. This test pins that contract for the scan_armed type: - appending an old (out-of-grace) armed_at FOLLOWED BY a recent - (in-grace) armed_at must yield a SKIP. If the reader ever regressed - to a forward scan (returning the FIRST match), the elapsed value - would be the old one and Step 0 would fall through — silently - degrading the warmup-grace guarantee. + appending an old (out-of-grace) ts FOLLOWED BY a recent (in-grace) + ts must yield a SKIP. If the reader ever regressed to a forward + scan (returning the FIRST match), the elapsed value would be the + old one and Step 0 would fall through — silently degrading the + warmup-grace guarantee. Counter-test-by-revert: changing the implementation of `_read_last_event_at` from reverse-scan to forward-scan flips this - test (the old armed_at would be used, elapsed > grace, sentinel + test (the old ts would be used, elapsed > grace, sentinel appears in stdout). """ session_dir = tmp_path / "session" # Append two scan_armed events in journal order: - # 1. Old armed_at (well outside grace — would force fall-through if read). - # 2. Recent armed_at (well inside grace — should force SKIP). + # 1. Old ts (well outside grace — would force fall-through if read). + # 2. Recent ts (well inside grace — should force SKIP). old_elapsed = WARMUP_GRACE_SECONDS + 1000 recent_elapsed = WARMUP_GRACE_SECONDS - 150 assert old_elapsed > WARMUP_GRACE_SECONDS, ( @@ -320,8 +339,8 @@ def test_step_0_uses_latest_when_multiple_scan_armed_events(tmp_path, step_0_bas f"WARMUP_GRACE_SECONDS={WARMUP_GRACE_SECONDS}" ) now = int(time.time()) - _write_journal(session_dir, "scan_armed", {"armed_at": now - old_elapsed}) - _write_journal(session_dir, "scan_armed", {"armed_at": now - recent_elapsed}) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(now - old_elapsed)}) + _write_journal(session_dir, "scan_armed", {"ts": _iso_ts(now - recent_elapsed)}) bash_body = _render_step_0(step_0_bash_template, PLUGIN_ROOT, session_dir) result = _run_step_0(bash_body) @@ -334,7 +353,7 @@ def test_step_0_uses_latest_when_multiple_scan_armed_events(tmp_path, step_0_bas f"Step 0 should have SHORT-CIRCUITED on the LATEST scan_armed " f"event (now-{recent_elapsed}, inside grace), not the older one " f"(now-{old_elapsed}, outside grace). Sentinel {SENTINEL!r} " - f"present in stdout indicates the gate used the OLD armed_at — " + f"present in stdout indicates the gate used the OLD ts — " f"the reader is no longer returning the most-recent event, " f"silently degrading the warmup-grace contract. stdout=" f"{result.stdout!r}" diff --git a/pact-plugin/tests/test_session_journal.py b/pact-plugin/tests/test_session_journal.py index 4b596486..0b0e6ade 100644 --- a/pact-plugin/tests/test_session_journal.py +++ b/pact-plugin/tests/test_session_journal.py @@ -3255,19 +3255,21 @@ class TestValidateEventSchemaPerType: "reason": "empty_team_config_fail_conservative", }, # `scan_armed` is emitted by commands/start-pending-scan.md Step 5 - # after CronCreate arms the pending-scan cron; `armed_at` carries - # the bash `$(date +%s)` epoch-seconds at arm time. Read by + # after CronCreate arms the pending-scan cron. No type-specific + # required fields — the arm time is carried by the auto-stamped + # `ts` ISO-8601 string (set by make_event), parsed via strptime at # commands/scan-pending-tasks.md Step 0 to bound the warmup-grace # skip window. - "scan_armed": {"armed_at": 1715731200}, + "scan_armed": {}, # `scan_disarmed` is emitted by commands/stop-pending-scan.md after - # CronDelete tears down the pending-scan cron; `disarmed_at` carries - # the bash `$(date +%s)` epoch-seconds at teardown time. Paired - # writer to scan_armed; together they drive the event-model - # lifecycle consumed by hooks/wake_inbox_drain.py's producer-side - # idempotency check (suppress only when scan_armed is strictly more - # recent than scan_disarmed). - "scan_disarmed": {"disarmed_at": 1715734800}, + # CronDelete tears down the pending-scan cron. No type-specific + # required fields — the teardown time is carried by the auto- + # stamped `ts` (parsed via strptime). Paired writer to scan_armed; + # together they drive the event-model lifecycle consumed by + # hooks/wake_inbox_drain.py's producer-side idempotency check + # (suppress only when scan_armed.ts is strictly more recent than + # scan_disarmed.ts). + "scan_disarmed": {}, # `teardown_request` is the falsifiable trace for the 1->0 active- # task transition that retires the pending-scan cron. Written by # hooks/teardown_request_emitter.py (Tier-1, lead-session @@ -3327,7 +3329,7 @@ def test_happy_path_all_required_fields_present(self, event_type): @pytest.mark.parametrize( "event_type", - [t for t, f in _SAMPLES.items() if f], # Skip session_end (no required fields). + [t for t, f in _SAMPLES.items() if f], # Skip empty-sample types (no required fields). ) def test_missing_single_required_field_rejected(self, event_type): """Validation rejects when ANY single required field is missing. @@ -3684,6 +3686,86 @@ def test_cli_write_rejects_empty_string_required_field( ) in result.stderr assert not journal_file.exists() or journal_file.read_text() == "" + def test_legacy_scan_armed_with_armed_at_extra_field_validates_bc_safe(self): + """BC-safety migration contract for #821 ts-unification (DELETE + path per architect Q1 binding). + + #821 deleted `armed_at` from _REQUIRED_FIELDS_BY_TYPE["scan_armed"] + (the schema entry is now `{}`) and the writer at + commands/start-pending-scan.md Step 5 no longer emits the + `armed_at` field. The DELETE-vs-DEPRECATE binding decision rests + on the lenient-validator contract: an OLD journal event from + before #821 carrying BOTH the auto-stamped `ts` ISO string AND + the now-deprecated `armed_at: int` extra field MUST still pass + `_validate_event_schema` so post-#821 readers can continue + consuming legacy journals without rewrite. + + Symmetric pin for `scan_disarmed.disarmed_at`. + + Counter-test-by-revert: if a future refactor changes + `_validate_event_schema` from open-schema (extras pass silently) + to closed-schema (extras rejected), this test FAILS — and the + failure message names the BC-safety contract that broke. + + This test SUPPLEMENTS the side-effect coverage at + TestReadLastEventTailStreamed lines ~884/919 (which write + `make_event("scan_armed", armed_at=N)` as a unique-identifier + marker for tail-window position-recovery tests) by EXPLICITLY + pinning the migration contract rather than relying on it as an + unintended-consequence pass. + + References: + - architect #821 §3.1 Q1 (schema entry `{}` not REMOVE, + relies on lenient validator for BC). + - PREPARE #821 §4 (lenient-validator empirical probe) + §7 + (live-journal byte-identity evidence). + - The producer-side idempotency check at + hooks/wake_inbox_drain.py:~696-734 reads only `ts`, not + `armed_at` — a legacy event's extra `armed_at` is + ignored. + """ + from shared.session_journal import _validate_event_schema + + legacy_scan_armed_event = { + "v": 1, + "type": "scan_armed", + "ts": "2026-05-18T23:37:44Z", + "armed_at": 1779147464, # pre-#821 integer-epoch field + } + ok, reason = _validate_event_schema(legacy_scan_armed_event) + assert ok is True, ( + f"Legacy scan_armed event with `armed_at` extra field MUST " + f"validate (BC-safety migration contract per architect " + f"#821 §3.1 Q1). Got ok={ok}, reason={reason!r}. A False " + f"result here indicates _validate_event_schema has shifted " + f"from open-schema (extras pass silently) to closed-schema " + f"(extras rejected) — the #821 DELETE-vs-DEPRECATE binding " + f"decision rests on lenient-validator behavior; this " + f"regression would break readers consuming legacy journals." + ) + assert reason == "ok", ( + f"BC-safety: lenient validator must return 'ok' for " + f"legacy event with `armed_at` extra; got {reason!r}." + ) + + legacy_scan_disarmed_event = { + "v": 1, + "type": "scan_disarmed", + "ts": "2026-05-19T07:34:35Z", + "disarmed_at": 1779176075, # pre-#821 integer-epoch field + } + ok, reason = _validate_event_schema(legacy_scan_disarmed_event) + assert ok is True, ( + f"Legacy scan_disarmed event with `disarmed_at` extra " + f"field MUST validate (BC-safety migration contract per " + f"architect #821 §3.1 Q1, symmetric to scan_armed). Got " + f"ok={ok}, reason={reason!r}." + ) + assert reason == "ok", ( + f"BC-safety: lenient validator must return 'ok' for " + f"legacy event with `disarmed_at` extra; got {reason!r}." + ) + # --------------------------------------------------------------------------- # Per-type optional-field validation (BR-M2) diff --git a/pact-plugin/tests/test_wake_inbox_drain.py b/pact-plugin/tests/test_wake_inbox_drain.py index d0d63575..4e35e575 100644 --- a/pact-plugin/tests/test_wake_inbox_drain.py +++ b/pact-plugin/tests/test_wake_inbox_drain.py @@ -12,6 +12,7 @@ tests/runbooks/wake-lifecycle-arm-starvation.md. """ +import datetime import json import os import subprocess @@ -23,6 +24,21 @@ HOOK_DIR = Path(__file__).resolve().parent.parent / "hooks" DRAIN = HOOK_DIR / "wake_inbox_drain.py" +# Byte-coupled with session_journal.make_event's ts format and with the +# `_TS_FMT` literal in wake_inbox_drain.py producer-side idempotency +# check. Used by `_iso_ts` to render integer-epoch helper arguments as +# canonical ISO strings the consumer's strptime can parse. +ISO_FORMAT_LITERAL = "%Y-%m-%dT%H:%M:%SZ" + + +def _iso_ts(epoch_seconds: int) -> str: + """Render an integer-epoch as the canonical ISO-8601 UTC string + that wake_inbox_drain.py's strptime literal parses + (`%Y-%m-%dT%H:%M:%SZ`).""" + return datetime.datetime.fromtimestamp( + epoch_seconds, tz=datetime.timezone.utc + ).strftime(ISO_FORMAT_LITERAL) + def _run_drain(stdin_payload, env_extra=None): env = {k: v for k, v in os.environ.items() if not k.startswith("CLAUDE_")} @@ -97,12 +113,23 @@ def _write_marker(home, team_name, filename, payload): return target -def _write_scan_armed_event(home, session_id, project_dir, armed_at=1715731200): +_DEFAULT_SCAN_ARMED_TS = _iso_ts(1715731200) +_DEFAULT_SCAN_DISARMED_TS = _iso_ts(1715734800) + + +def _write_scan_armed_event(home, session_id, project_dir, ts=_DEFAULT_SCAN_ARMED_TS): """Append a `scan_armed` event to the session's journal — mirrors the write performed by commands/start-pending-scan.md Step 5 (the CLI form `python3 session_journal.py write --type scan_armed ...`). The test writes the JSONL line directly to keep the test self-contained and not depend on the CLI subprocess. + + `ts` is the auto-stamped ISO-8601 UTC timestamp matching + `session_journal.make_event`'s format literal; the producer-side + idempotency check in `wake_inbox_drain.py:684+` parses it via + strptime to int epoch. Callers may override `ts` to control the + comparison branch under test (use `_iso_ts(epoch)` for integer- + epoch readability). """ slug = Path(project_dir).name sess_dir = home / ".claude" / "pact-sessions" / slug / session_id @@ -111,18 +138,21 @@ def _write_scan_armed_event(home, session_id, project_dir, armed_at=1715731200): event = { "v": 1, "type": "scan_armed", - "ts": "2026-05-15T00:00:00Z", - "armed_at": armed_at, + "ts": ts, } with journal.open("a", encoding="utf-8") as f: f.write(json.dumps(event) + "\n") return journal -def _write_scan_disarmed_event(home, session_id, project_dir, disarmed_at=1715734800): +def _write_scan_disarmed_event(home, session_id, project_dir, ts=_DEFAULT_SCAN_DISARMED_TS): """Append a `scan_disarmed` event to the session's journal — mirrors the write performed by commands/stop-pending-scan.md Step 5 (paired writer to scan_armed). Symmetric with `_write_scan_armed_event`. + + `ts` is the auto-stamped ISO-8601 UTC timestamp; the producer-side + idempotency check parses it via strptime to int epoch. Callers may + override `ts` to control the comparison branch under test. """ slug = Path(project_dir).name sess_dir = home / ".claude" / "pact-sessions" / slug / session_id @@ -131,8 +161,7 @@ def _write_scan_disarmed_event(home, session_id, project_dir, disarmed_at=171573 event = { "v": 1, "type": "scan_disarmed", - "ts": "2026-05-15T00:01:00Z", - "disarmed_at": disarmed_at, + "ts": ts, } with journal.open("a", encoding="utf-8") as f: f.write(json.dumps(event) + "\n") @@ -639,8 +668,8 @@ def test_fallback_emits_when_armed_then_disarmed(tmp_path): home, team, "T5", status="in_progress", owner=teammate_owner, ) # scan_armed at t=100, scan_disarmed at t=200 (more recent). - _write_scan_armed_event(home, lead_sid, pdir, armed_at=100) - _write_scan_disarmed_event(home, lead_sid, pdir, disarmed_at=200) + _write_scan_armed_event(home, lead_sid, pdir, ts=_iso_ts(100)) + _write_scan_disarmed_event(home, lead_sid, pdir, ts=_iso_ts(200)) rc, out_str, err = _run_drain( json.dumps({ @@ -681,7 +710,7 @@ def test_fallback_suppressed_when_armed_no_disarm(tmp_path): _write_task( home, team, "T6", status="in_progress", owner=teammate_owner, ) - _write_scan_armed_event(home, lead_sid, pdir, armed_at=100) + _write_scan_armed_event(home, lead_sid, pdir, ts=_iso_ts(100)) # No scan_disarmed event. out = _drain_out({ @@ -728,9 +757,9 @@ def test_fallback_suppressed_when_armed_disarmed_rearmed(tmp_path): # Event-order on disk: arm(t=100), disarm(t=200), arm(t=300). # read_last_event returns the LAST event of each type by reverse # scan, so scan_armed=300, scan_disarmed=200. 300 > 200 → suppress. - _write_scan_armed_event(home, lead_sid, pdir, armed_at=100) - _write_scan_disarmed_event(home, lead_sid, pdir, disarmed_at=200) - _write_scan_armed_event(home, lead_sid, pdir, armed_at=300) + _write_scan_armed_event(home, lead_sid, pdir, ts=_iso_ts(100)) + _write_scan_disarmed_event(home, lead_sid, pdir, ts=_iso_ts(200)) + _write_scan_armed_event(home, lead_sid, pdir, ts=_iso_ts(300)) out = _drain_out({ "session_id": lead_sid, "cwd": pdir, @@ -746,13 +775,14 @@ def test_fallback_suppressed_when_armed_disarmed_rearmed(tmp_path): # ─── Strict-greater equality-boundary test ───────────────────────────── -def test_fallback_emits_when_armed_at_equals_disarmed_at(tmp_path): - """Strict-greater equality-boundary invariant. When scan_armed.armed_at - is exactly equal to scan_disarmed.disarmed_at (same-second arm-then- - disarm via `$(date +%s)`), the comparator `armed_at > disarmed_at` - evaluates False and falls through to the count_active_tasks fallback - — fail-conservative emit. Same-second timestamp collisions are - realistic at second-resolution epoch sources. +def test_fallback_emits_when_armed_ts_equals_disarmed_ts(tmp_path): + """Strict-greater equality-boundary invariant. When scan_armed.ts is + exactly equal to scan_disarmed.ts (same-second arm-then-disarm at + second-resolution `%Y-%m-%dT%H:%M:%SZ` precision), the comparator + `armed_epoch > disarmed_epoch` evaluates False and the hook falls + through to the count_active_tasks fallback — fail-conservative + emit. Same-second timestamp collisions are realistic at second- + resolution epoch sources. Counter-test-by-revert: mutating the operator from `>` to `>=` at wake_inbox_drain.py flips this test (the equality case would @@ -777,9 +807,11 @@ def test_fallback_emits_when_armed_at_equals_disarmed_at(tmp_path): _write_task( home, team, "T8", status="in_progress", owner=teammate_owner, ) - # Same-second collision: armed_at == disarmed_at == 200. - _write_scan_armed_event(home, lead_sid, pdir, armed_at=200) - _write_scan_disarmed_event(home, lead_sid, pdir, disarmed_at=200) + # Same-second collision: scan_armed.ts == scan_disarmed.ts (both + # render to the same ISO string at second resolution). + same_second_ts = _iso_ts(200) + _write_scan_armed_event(home, lead_sid, pdir, ts=same_second_ts) + _write_scan_disarmed_event(home, lead_sid, pdir, ts=same_second_ts) rc, out_str, err = _run_drain( json.dumps({ @@ -792,25 +824,29 @@ def test_fallback_emits_when_armed_at_equals_disarmed_at(tmp_path): assert rc == 0, f"non-zero exit; stderr={err}" arm_count = out_str.count("Active teammate work detected") assert arm_count == 1, ( - f"Same-second armed_at == disarmed_at must fall through to " - f"count_active_tasks fallback (fail-conservative emit); got " - f"{arm_count} Arm directives in stdout={out_str!r}. " + f"Same-second scan_armed.ts == scan_disarmed.ts must fall " + f"through to count_active_tasks fallback (fail-conservative " + f"emit); got {arm_count} Arm directives in stdout={out_str!r}. " f"A 0 count here indicates the strict-greater operator was " f"weakened to `>=` — equality now suppresses incorrectly." ) -# ─── Bool-vs-int discrimination parametric tests ─────────────────────── +# ─── Malformed-ts discrimination parametric tests ────────────────────── def _write_event_with_value(home, session_id, project_dir, event_type, field, value): """Append a journal event with a raw field value. Bypasses session_journal.py's write-side schema validator, which would reject bool-in-int fields per _REQUIRED_FIELDS_BY_TYPE. The hook-layer - bool-discrimination guards in wake_inbox_drain.py are defense-in- + fail-conservative guards in wake_inbox_drain.py are defense-in- depth against this exact path: a malformed event on disk (corrupted journal, out-of-band writer, future schema drift). This helper simulates that disk state directly. + + The `field`/`value` pair OVERRIDES any default field in the event + dict (including `ts`) — the caller's assignment is the last + statement in the dict literal so caller-supplied values win. """ slug = Path(project_dir).name sess_dir = home / ".claude" / "pact-sessions" / slug / session_id @@ -827,31 +863,78 @@ def _write_event_with_value(home, session_id, project_dir, event_type, field, va return journal -def test_fallback_emits_when_armed_at_is_bool(tmp_path): - """Bool-vs-int discrimination invariant for armed_at. Python's - `True`/`False` are instances of `int` (since `bool` subclasses - `int`), so a naive `isinstance(armed_at, int)` check would let bool - values flow through the timestamp comparison as 1 or 0. The - `not isinstance(armed_at, bool)` clause rejects bool values as - malformed; the hook falls through to count_active_tasks (fail- - conservative emit). - - Counter-test-by-revert: removing `not isinstance(armed_at, bool)` - from wake_inbox_drain.py flips this test — the bool True (==int 1) - would pass the int check; with disarmed absent the hook would - short-circuit-suppress; this test would see 0 Arm directives - instead of 1. - - Defense-in-depth: the write-side schema validator at - session_journal.py _validate_event_schema also rejects bool-in-int - (pinned by test_session_journal.py). This test pins the - INDEPENDENT hook-layer guard, which defends against journal - corruption / out-of-band writes that bypass the validator. +def test_fallback_emits_when_armed_ts_is_malformed_falls_through_to_count_path(tmp_path): + """Fail-conservative-on-malformed-ts COMPOSITE invariant for scan_armed. + + Pre-#821 failure mode (bool-vs-int): Python `bool` subclassed `int`, + so a naive `isinstance(armed_at, int)` check would let bool values + flow through the timestamp comparison as 1 or 0; the + `not isinstance(armed_at, bool)` clause rejected them as malformed. + + Post-#821 failure mode (ts-string-malformation): the consumer reads + `scan_armed.ts` (auto-stamped ISO string) and parses via strptime. + A writer-bug or journal-corruption that lands `ts=42` / `ts=True` / + `ts=None` / `ts=""` / unparseable string must NOT cause the + producer-side idempotency check to suppress incorrectly. + + Layered defense (composite invariant — what this test pins): + + 1. Outer `try: ... except Exception:` (wake_inbox_drain.py ~696/733): + catch-all for the producer-side block; on ANY raise inside the + block (including TypeError from strptime(42, FMT)), execution + falls through to count_active_tasks (fail-conservative emit). + THIS is the load-bearer that the documented `ts=42` scenario + exercises directly — even with the inner str-guard and inner + try/except both stripped, the outer catch still produces the + correct fall-through behavior. + + 2. `isinstance(armed_ts, str) and armed_ts` str-guard + + inner `try/except (TypeError, ValueError)` around strptime: + defense-in-depth REDUNDANCY layers that short-circuit cleanly + (no raise → no outer catch needed) when ts is recognizably + malformed. Good engineering for future failure modes not yet + anticipated by the current test suite (e.g., a future ts shape + that strptime accepts but yields a nonsense epoch); NOT + independent load-bearers for the documented `ts=42` scenario. + + Counter-test-by-revert (CUMULATIVE strip, empirical, verified by + review-phase F1 probe). Each row strips LAYERS ON TOP OF the prior + row's mutation — these are NOT 3 independent strips. + + Row 1 (strip only the str-guard): test STILL passes. The inner + try/except catches the strptime TypeError raised on ts=42. + + Row 2 (CUMULATIVE: strip the str-guard AND the inner try/except): + test STILL passes. The outer `except Exception:` catches the + propagating TypeError. + + Row 3 (CUMULATIVE: strip the str-guard AND the inner try/except + AND narrow the outer `except Exception:` to `except ImportError:`): + test FAILS. With all three layers stripped, the malformed-ts + TypeError now propagates past the producer-side block to + main()'s top-level catch which prints _SUPPRESS_OUTPUT + (under-emit, 0 Arm directives observed). + + What this test pins: the COMPOSITE invariant that the malformed-ts + fall-through must hold. The outer catch is the cheapest single + layer to remove that breaks the invariant (under Row 3's + cumulative mutation), but Row 1 + Row 2 demonstrate that the + str-guard and inner try/except are honest defense-in-depth — + they short-circuit cleanly when ts is recognizably malformed, + sparing the outer catch, AND they are robust against future + hypothetical refactors that might widen / narrow / replace the + outer catch. + + Distinct from the symmetric ..._disarmed_ts_... test: the armed- + side malformation hits the arm-presence guard (`if armed is not + None`) and the inner armed-ts parse block never executes the + happy path; the disarmed-side malformation hits the inner-branch + comparator (the arm-presence guard has already validated armed). """ home = tmp_path / "home"; home.mkdir() lead_sid = "lead-sid" pdir = "/tmp/p" - team = "team-armed-bool" + team = "team-armed-malformed-ts" teammate_owner = "backend-coder" _write_session_context( home, lead_sid, pdir, team, @@ -864,14 +947,19 @@ def test_fallback_emits_when_armed_at_is_bool(tmp_path): _write_task( home, team, "T9", status="in_progress", owner=teammate_owner, ) - # armed_at as Python bool True → JSON true → loaded back as - # Python bool True (isinstance(True, int) is True; isinstance(True, - # bool) is True). With the bool-guard intact, this event is treated - # as malformed and the hook falls through to the fallback. - _write_event_with_value(home, lead_sid, pdir, "scan_armed", "armed_at", True) - # No scan_disarmed event; without the bool-guard, the bool True - # would short-circuit-suppress at the `disarmed is None → suppress` - # branch (since `isinstance(True, int)` is True). + # armed.ts as integer 42 (not str) → isinstance(armed_ts, str) is + # False → armed_epoch stays None → hook falls through to + # count_active_tasks. The composite defense (str-guard + inner + # try/except + outer `except Exception:`) means stripping any 1 + # or 2 layers leaves the fall-through intact; ONLY the cumulative + # 3-layer strip breaks it — see composite-invariant docstring + # above for the empirical CUMULATIVE counter-test-by-revert recipe. + _write_event_with_value(home, lead_sid, pdir, "scan_armed", "ts", 42) + # No scan_disarmed event. The composite invariant pinned: the + # malformed-ts fall-through must hold. Cumulative strip of all + # 3 layers (str-guard + inner try/except + outer except Exception) + # would propagate the TypeError to main()'s top-level catch which + # prints _SUPPRESS_OUTPUT (under-emit, 0 Arm directives). rc, out_str, err = _run_drain( json.dumps({ @@ -884,42 +972,96 @@ def test_fallback_emits_when_armed_at_is_bool(tmp_path): assert rc == 0, f"non-zero exit; stderr={err}" arm_count = out_str.count("Active teammate work detected") assert arm_count == 1, ( - f"armed_at=bool(True) must be rejected by the bool-guard and " - f"fall through to count_active_tasks (fail-conservative emit); " - f"got {arm_count} Arm directives in stdout={out_str!r}. " - f"A 0 count here indicates `not isinstance(armed_at, bool)` " - f"was removed — bool True flowed through the int check and " - f"suppressed Arm incorrectly." - ) - - -def test_fallback_emits_when_disarmed_at_is_bool(tmp_path): - """Bool-vs-int discrimination invariant for disarmed_at. Symmetric - pin to test_fallback_emits_when_armed_at_is_bool but targeted at - the disarmed_at guard at the inner-branch comparison - `isinstance(disarmed_at, int) and not isinstance(disarmed_at, bool) - and armed_at > disarmed_at`. - - Fixture shape: armed_at is a well-typed int (so the outer arm- - presence branch is entered) AND disarmed_at is a bool. The inner - bool-guard on disarmed_at rejects it; the comparison is skipped; - the hook falls through to the fallback emit path. - - Counter-test-by-revert: removing `not isinstance(disarmed_at, - bool)` from wake_inbox_drain.py flips this test. The bool False - (==int 0) would pass the int check; the comparison - `armed_at(100) > disarmed_at(False==0)` is True; the hook - short-circuit-suppresses. Without the bool-guard, this test sees - 0 Arm directives instead of 1. - - Distinct from the armed_at test: the disarmed_at guard is at a - SEPARATE site (the inner branch); removing one guard does not - affect the other. Two tests pin two sites independently. + f"scan_armed.ts=int(42) must fall through to " + f"count_active_tasks (fail-conservative emit) via the " + f"composite-layered defense (str-guard + inner strptime " + f"try/except + outer `except Exception:`); got {arm_count} " + f"Arm directives in stdout={out_str!r}. A 0 count here " + f"indicates ALL THREE layers were stripped cumulatively — " + f"the malformed-ts TypeError propagated past the producer-" + f"side block to main()'s top-level catch which prints " + f"_SUPPRESS_OUTPUT, causing under-emit incorrectly. See the " + f"composite-invariant docstring above for the CUMULATIVE " + f"counter-test-by-revert recipe (each layer individually is " + f"defense-in-depth; only the 3-layer cumulative strip breaks " + f"the invariant)." + ) + + +def test_fallback_emits_when_disarmed_ts_is_malformed_falls_through_to_count_path(tmp_path): + """Fail-conservative-on-malformed-ts COMPOSITE invariant for scan_disarmed. + Symmetric pin to + test_fallback_emits_when_armed_ts_is_malformed_falls_through_to_count_path + but targeted at the INNER-branch malformed-ts handling on + `disarmed_ts` parsing. + + Pre-#821 failure mode (bool-vs-int): Python bool False (==int 0) + passing through `isinstance(disarmed_at, int)` would suppress when + `armed_at > 0`; the `not isinstance(disarmed_at, bool)` clause + rejected it. + + Post-#821 failure mode (ts-string-malformation): the consumer + parses `scan_disarmed.ts` via strptime; the disarmed-side defense + is layered identically to the armed-side: + + 1. Outer `try: ... except Exception:` (wake_inbox_drain.py + catching the producer-side block): the load-bearer that + catches a propagating strptime TypeError when disarmed.ts is + non-str (e.g., bool False). THIS is the layer the documented + `disarmed_ts=False` scenario exercises directly. + + 2. Inner `isinstance(disarmed_ts, str) and disarmed_ts` guard + + `try/except (TypeError, ValueError)` around strptime: defense- + in-depth REDUNDANCY that short-circuits cleanly on + recognizably-malformed disarmed_ts, sparing the outer catch. + NOT independently load-bearing for the documented scenario. + + Fixture shape: armed_ts is a well-typed ISO string (so the + arm-presence guard `if armed is not None` AND the armed-side + str-guard both pass), AND disarmed_ts is a non-str (`False`). The + disarmed-side composite defense causes the comparator to be + skipped (whether via disarmed-side inner short-circuit or outer + catch); the hook falls through to fail-conservative emit. + + Distinct from the armed_ts test: the disarmed_ts handling lives at + a SEPARATE site (the inner-branch comparator of the producer-side + block, AFTER the arm-presence guard has validated armed) — even + though the same OUTER `except Exception:` catches both sides, the + symmetric pin pair pins the structural symmetry of the armed-side + and disarmed-side parsing patterns. + + Counter-test-by-revert (CUMULATIVE strip, empirical, verified by + review-phase F1 probe). Each row strips LAYERS ON TOP OF the prior + row's mutation — these are NOT 3 independent strips. + + Row 1 (strip only the disarmed-side str-guard): test STILL + passes. The disarmed-side inner try/except catches the + strptime TypeError raised on ts=False. + + Row 2 (CUMULATIVE: strip the disarmed-side str-guard AND the + disarmed-side inner try/except): test STILL passes. The outer + `except Exception:` catches the propagating TypeError. + + Row 3 (CUMULATIVE: strip the disarmed-side str-guard AND the + disarmed-side inner try/except AND narrow the outer + `except Exception:` to `except ImportError:`): test FAILS. + With all three layers stripped, the malformed disarmed-ts + TypeError now propagates past the producer-side block to + main()'s top-level catch which prints _SUPPRESS_OUTPUT + (under-emit, 0 Arm directives observed). + + What this test pins: the COMPOSITE invariant that the disarmed- + side malformed-ts fall-through must hold. The outer catch is the + cheapest single layer to remove that breaks the invariant (under + Row 3's cumulative mutation), but the disarmed-side str-guard + and inner try/except are honest defense-in-depth — they short- + circuit cleanly when disarmed_ts is recognizably malformed, + sparing the outer catch. """ home = tmp_path / "home"; home.mkdir() lead_sid = "lead-sid" pdir = "/tmp/p" - team = "team-disarmed-bool" + team = "team-disarmed-malformed-ts" teammate_owner = "backend-coder" _write_session_context( home, lead_sid, pdir, team, @@ -932,11 +1074,280 @@ def test_fallback_emits_when_disarmed_at_is_bool(tmp_path): _write_task( home, team, "T10", status="in_progress", owner=teammate_owner, ) - # armed_at is a well-typed int; disarmed_at is bool False. - # Without the bool-guard, isinstance(False, int) is True and - # armed_at(100) > disarmed_at(0==False) suppresses incorrectly. - _write_scan_armed_event(home, lead_sid, pdir, armed_at=100) - _write_event_with_value(home, lead_sid, pdir, "scan_disarmed", "disarmed_at", False) + # scan_armed.ts is well-typed ISO; scan_disarmed.ts is bool False. + # The composite defense (disarmed-side str-guard + disarmed-side + # inner try/except + outer `except Exception:`) means stripping + # any 1 or 2 layers leaves the fall-through intact; ONLY the + # cumulative 3-layer strip breaks it — see composite-invariant + # docstring above for the empirical CUMULATIVE counter-test-by- + # revert recipe. + _write_scan_armed_event(home, lead_sid, pdir, ts=_iso_ts(100)) + _write_event_with_value(home, lead_sid, pdir, "scan_disarmed", "ts", False) + + rc, out_str, err = _run_drain( + json.dumps({ + "session_id": lead_sid, "cwd": pdir, + "hook_event_name": "UserPromptSubmit", + "prompt": "go", + }), + env_extra={"HOME": str(home), "CLAUDE_PROJECT_DIR": pdir}, + ) + assert rc == 0, f"non-zero exit; stderr={err}" + arm_count = out_str.count("Active teammate work detected") + assert arm_count == 1, ( + f"scan_disarmed.ts=bool(False) must fall through to the " + f"fallback emit (fail-conservative) via the composite-layered " + f"defense (disarmed-side str-guard + disarmed-side inner " + f"strptime try/except + outer `except Exception:`); got " + f"{arm_count} Arm directives in stdout={out_str!r}. A 0 count " + f"here indicates ALL THREE layers were stripped cumulatively " + f"— the malformed disarmed-ts TypeError propagated past the " + f"producer-side block to main()'s top-level catch which " + f"prints _SUPPRESS_OUTPUT, causing under-emit incorrectly. " + f"See the composite-invariant docstring above for the " + f"CUMULATIVE counter-test-by-revert recipe (each layer " + f"individually is defense-in-depth; only the 3-layer " + f"cumulative strip breaks the invariant)." + ) + + +# ─── Format-drift fall-through coverage (F6 from PR #820 peer-review) ── + + +@pytest.mark.parametrize( + "drifted_ts_armed", + [ + # Sub-second-fraction: strptime against `_TS_FMT = '%Y-%m-%dT%H:%M:%SZ'` + # raises ValueError on the trailing `.123` — fall-through expected. + "2026-05-15T00:00:00.123Z", + # Sub-second-fraction (3-digit milliseconds variant). + "2026-05-15T00:00:00.999Z", + # Mixed TZ: explicit `+00:00` offset suffix instead of `Z`. strptime + # against `Z`-anchored `_TS_FMT` raises ValueError on the `+00:00`. + "2026-05-15T00:00:00+00:00", + # Mixed TZ: non-zero offset. Same fail-conservative behavior. + "2026-05-15T00:00:00-05:00", + # Future-relaxation candidate: `fromisoformat`-shape with no tz suffix + # at all. strptime against Z-anchored format raises ValueError. + "2026-05-15T00:00:00", + # Trailing whitespace inside the string (a writer-bug variant that + # `isinstance(ts, str) and ts` passes but strptime rejects). + # EMPIRICAL: `datetime.strptime('2026-05-15T00:00:00Z ', + # '%Y-%m-%dT%H:%M:%SZ')` raises `ValueError: unconverted data + # remains: ` on Python 3.9.6 / 3.12.7 / 3.13.5 / 3.14.5 + # (verified 2026-05-24). This case exercises the inner + # try/except (TypeError, ValueError) fall-through path as the + # docstring claims — NOT a valid-parse-no-disarm path. Pin + # blocks future re-investigation of an earlier phantom claim + # that strptime silently ignores trailing whitespace. + "2026-05-15T00:00:00Z ", + ], + ids=[ + "subsecond_3digit", + "subsecond_999", + "mixed_tz_plus0000", + "mixed_tz_minus0500", + "no_tz_suffix", + "trailing_whitespace", + ], +) +def test_fallback_emits_when_armed_ts_format_drifts_falls_through_to_count_path( + tmp_path, drifted_ts_armed +): + """Format-drift fail-conservative COMPOSITE invariant for scan_armed.ts. + + F6 finding from PR #820 peer-review (test-engineer Task #45): the + Q3 audit-prose ban on `fromisoformat` switching (and the architect + §3.3 binding decision against direct lex compare) BOTH rest on a + behavioral claim — that any `ts` shape that doesn't match the + canonical `%Y-%m-%dT%H:%M:%SZ` literal MUST cause the producer- + side idempotency check to fall through to count_active_tasks + (fail-conservative emit), NOT to crash, suppress incorrectly, or + silently misorder events. The format-drift behavior is the + architectural-correctness defense the strptime-not-lex-compare + pin (test_python_consumer_parses_ts_via_strptime_not_string_compare) + relies on — but until F6, no test EXERCISED the fall-through + behavior under format drift; only the audit-prose ban and the + strptime-presence pin defended the same surface structurally. + + Fixture: write a `scan_armed` event with a drifted-format `ts` on + disk (parametrized across 6 representative drift shapes covering + sub-second fractions, mixed TZ suffixes, no-TZ-suffix, and + trailing-whitespace bug). No `scan_disarmed` event. The hook must + fall through to count_active_tasks (1 in-progress teammate task + → 1 Arm directive emit). + + Layered defense (COMPOSITE invariant — what this test pins): + + 1. Outer `try: ... except Exception:` (wake_inbox_drain.py + ~696/733): catches ANY raise from the producer-side block; + strptime's ValueError on drifted `ts` propagates here in the + worst case. + 2. Inner `try/except (TypeError, ValueError)` around strptime + (wake_inbox_drain.py ~717/720): catches ValueError directly, + sets `armed_epoch = None`, falls through cleanly without + escalating to the outer catch. This is the layer most + exercised by format-drift inputs (the str-guard at + `isinstance(armed_ts, str) and armed_ts` passes — all 6 + parametrized drift shapes ARE non-empty strings — so + the strptime call IS reached and raises ValueError, caught + by the inner try/except). + 3. `isinstance(armed_ts, str) and armed_ts` str-guard + (wake_inbox_drain.py ~716): defense-in-depth for non-str + malformed-ts (covered by the sibling _is_malformed_ test); + not the load-bearing layer for format-drift since drifted + shapes are str-typed. + + Counter-test-by-revert (CUMULATIVE strip, empirical, verified + during F6 fold). Each row strips LAYERS ON TOP OF the prior + row — these are NOT independent strips. The cumulative framing + matches the discipline established for the Q2 retargeted-test + docstrings in commit-g (per secretary `0d19dfbd`). + + Row 1 (strip ONLY the inner `try/except (TypeError, ValueError)`): + test STILL passes for all 6 parametrized drift shapes — + strptime's ValueError propagates past the inner catch but + the outer `except Exception:` still catches it, and + fall-through to count_active_tasks still occurs. + + Row 2 (CUMULATIVE: strip the inner try/except AND narrow the + outer `except Exception:` to `except ImportError:`): test + FAILS for all 6 parametrized drift shapes — the ValueError + now propagates past both layers to main()'s top-level + catch which prints _SUPPRESS_OUTPUT (under-emit, 0 Arm + directives observed). + + What this test pins: the COMPOSITE invariant that format-drifted + `ts` falls through to count_active_tasks. This DEFENDS the + behavioral claim that the Q3 audit-prose ban rests on — + `fromisoformat` switching (or any format relaxation that introduces + drift between writer and reader) is fail-conservative, not + silently-broken. Pairs with the F4 strptime-not-lex-compare pin + in test_pending_scan_coupling_invariant.py: F4 pins the STRUCTURAL + shape (consumer uses strptime CALL with result-binding); F6 pins + the BEHAVIORAL outcome (drifted format → fall-through). + Together they form the test-coverage defense for the + architectural decision to use strptime rather than lex compare. + + Symmetric pin for scan_disarmed.ts in + test_fallback_emits_when_disarmed_ts_format_drifts_falls_through_to_count_path. + """ + home = tmp_path / "home"; home.mkdir() + lead_sid = "lead-sid" + pdir = "/tmp/p" + team = f"team-armed-format-drift" + teammate_owner = "backend-coder" + _write_session_context( + home, lead_sid, pdir, team, + members=[ + {"name": teammate_owner, "agentId": "agent-bc"}, + {"name": "lead", "agentId": "agent-lead"}, + ], + lead_agent_id="agent-lead", + ) + _write_task( + home, team, "T-fd", status="in_progress", owner=teammate_owner, + ) + # armed.ts is a str (passes isinstance check) but does NOT match the + # canonical _TS_FMT → strptime raises ValueError → inner + # try/except catches → armed_epoch stays None → hook falls + # through to count_active_tasks (fail-conservative emit). + _write_event_with_value( + home, lead_sid, pdir, "scan_armed", "ts", drifted_ts_armed, + ) + # No scan_disarmed event. + + rc, out_str, err = _run_drain( + json.dumps({ + "session_id": lead_sid, "cwd": pdir, + "hook_event_name": "UserPromptSubmit", + "prompt": "go", + }), + env_extra={"HOME": str(home), "CLAUDE_PROJECT_DIR": pdir}, + ) + assert rc == 0, f"non-zero exit; stderr={err}" + arm_count = out_str.count("Active teammate work detected") + assert arm_count == 1, ( + f"scan_armed.ts={drifted_ts_armed!r} (format-drift shape) MUST " + f"fall through to count_active_tasks (fail-conservative emit) " + f"via the composite-layered defense (inner strptime " + f"try/except + outer `except Exception:`); got {arm_count} Arm " + f"directives in stdout={out_str!r}. A 0 count here indicates " + f"the inner try/except AND the outer except Exception were " + f"BOTH stripped cumulatively — the strptime ValueError " + f"propagated to main()'s top-level catch which prints " + f"_SUPPRESS_OUTPUT, causing under-emit incorrectly. This is " + f"the BEHAVIORAL counterpart to the F4 strptime-not-lex-compare " + f"structural pin in test_pending_scan_coupling_invariant.py; " + f"the Q3 audit-prose ban on fromisoformat (and the architect " + f"§3.3 binding against direct lex compare) rests on this " + f"format-drift fall-through behavior being load-bearing." + ) + + +@pytest.mark.parametrize( + "drifted_ts_disarmed", + [ + "2026-05-15T00:00:00.123Z", + "2026-05-15T00:00:00.999Z", + "2026-05-15T00:00:00+00:00", + "2026-05-15T00:00:00-05:00", + "2026-05-15T00:00:00", + "2026-05-15T00:00:00Z ", + ], + ids=[ + "subsecond_3digit", + "subsecond_999", + "mixed_tz_plus0000", + "mixed_tz_minus0500", + "no_tz_suffix", + "trailing_whitespace", + ], +) +def test_fallback_emits_when_disarmed_ts_format_drifts_falls_through_to_count_path( + tmp_path, drifted_ts_disarmed +): + """Format-drift fail-conservative COMPOSITE invariant for + scan_disarmed.ts. Symmetric pin to + test_fallback_emits_when_armed_ts_format_drifts_falls_through_to_count_path + targeting the INNER-branch comparator's disarmed-side strptime + parse. + + Fixture: armed.ts is well-typed canonical ISO; disarmed.ts is a + drifted-format str. The arm-presence guard passes; the inner + armed-side strptime succeeds; armed_epoch is well-typed. Then + the inner disarmed-side strptime raises ValueError on the + drifted disarmed.ts; the disarmed-side inner try/except catches; + disarmed_epoch stays None; the + `disarmed_epoch is not None and armed_epoch > disarmed_epoch` + comparison is skipped; the hook falls through to + count_active_tasks. + + Counter-test-by-revert cumulative recipe is symmetric to the + armed-side test — see armed-side docstring for the layered + defense and the empirical cumulative-strip rows. + """ + home = tmp_path / "home"; home.mkdir() + lead_sid = "lead-sid" + pdir = "/tmp/p" + team = "team-disarmed-format-drift" + teammate_owner = "backend-coder" + _write_session_context( + home, lead_sid, pdir, team, + members=[ + {"name": teammate_owner, "agentId": "agent-bc"}, + {"name": "lead", "agentId": "agent-lead"}, + ], + lead_agent_id="agent-lead", + ) + _write_task( + home, team, "T-fd", status="in_progress", owner=teammate_owner, + ) + # armed.ts is well-typed canonical; disarmed.ts is format-drift. + _write_scan_armed_event(home, lead_sid, pdir, ts=_iso_ts(100)) + _write_event_with_value( + home, lead_sid, pdir, "scan_disarmed", "ts", drifted_ts_disarmed, + ) rc, out_str, err = _run_drain( json.dumps({ @@ -949,12 +1360,18 @@ def test_fallback_emits_when_disarmed_at_is_bool(tmp_path): assert rc == 0, f"non-zero exit; stderr={err}" arm_count = out_str.count("Active teammate work detected") assert arm_count == 1, ( - f"disarmed_at=bool(False) must be rejected by the disarmed-at " - f"bool-guard; the hook should fall through to the fallback " - f"emit; got {arm_count} Arm directives in stdout={out_str!r}. " - f"A 0 count here indicates `not isinstance(disarmed_at, bool)` " - f"was removed — bool False flowed through the int check, " - f"armed_at(100) > 0 suppressed Arm incorrectly." + f"scan_disarmed.ts={drifted_ts_disarmed!r} (format-drift shape) " + f"MUST fall through to count_active_tasks (fail-conservative " + f"emit) via the composite-layered defense (disarmed-side inner " + f"strptime try/except + outer `except Exception:`); got " + f"{arm_count} Arm directives in stdout={out_str!r}. A 0 count " + f"here indicates the inner disarmed-side try/except AND the " + f"outer except Exception were BOTH stripped cumulatively — " + f"the strptime ValueError propagated to main()'s top-level " + f"catch which prints _SUPPRESS_OUTPUT, causing under-emit " + f"incorrectly. Pairs with the F4 strptime-not-lex-compare " + f"structural pin; F6 BEHAVIORAL defense for the format-drift " + f"fall-through invariant." ) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..4d1fd947 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[project] +name = "pact-plugin" +version = "4.2.13" +requires-python = ">=3.7" +description = "Orchestration harness that turns Claude Code into a coordinated team of specialist AI agents"