diff --git a/CHANGELOG.md b/CHANGELOG.md index 95e309cf..0720f1b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,14 @@ signal. Per-file coverage is not enforced; the aggregate floor only rises in step with what's already proven on `main`. +- **Adoption kit rendering externalized.** Codex and Claude Code + `--agent-instructions` skill bundles now render from packaged + `adoption-kits/` files instead of Python string constants. Downstream repos + can provide `.agents-shipgate/adoption-kit.yaml` or + `--agent-instructions-kit ` for local overrides, and generated skill + directories now carry `.agents-shipgate-kit.json` sidecars for managed + migrations. + - **v0.20 — third-party adapter entry-point discovery (E4 from round-3 review).** Opens the same extension surface for adapters (input loaders) that M5 already opened for check plugins. Discovery is gated by the existing diff --git a/README.md b/README.md index a6aea80c..63afc7e5 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,16 @@ The skill lives at `.agents/skills/agents-shipgate/`, can be invoked with `$agents-shipgate`, and teaches Codex the bootstrap, scan, report-reading, advisory CI, and finding-triage workflows. +To customize generated skill content in a downstream repo without rebuilding +`agents-shipgate`, add `.agents-shipgate/adoption-kit.yaml` with repo-local +overrides, or pass it explicitly: + +```bash +agents-shipgate init --workspace . --write \ + --agent-instructions=codex-skill \ + --agent-instructions-kit .agents-shipgate/adoption-kit.yaml +``` + ## Who this is for - **Agent builders** — review MCP, OpenAPI, and SDK tool definitions before merging changes that expand the tool surface. diff --git a/STABILITY.md b/STABILITY.md index eaf9ab86..93807bef 100644 --- a/STABILITY.md +++ b/STABILITY.md @@ -329,6 +329,11 @@ tests on every CI run, not by convention: call to resolve the bundled trigger catalog. - **`fixtures.py`** — one `importlib.resources.files('agents_shipgate')` call to resolve the bundled fixture directory. + - **`cli/discovery/agent_instructions/adoption_kit.py`** — one + `importlib.resources.files('agents_shipgate')` call to resolve bundled + first-party adoption-kit files from the installed wheel. Downstream + customization is explicit repo-local file reading through + `--agent-instructions-kit`, never dynamic imports or network fetches. - **`cli/self_check.py`** — one `__import__(module_name)` call validates that supplied modules import cleanly. Runs only under `agents-shipgate self-check`, never during scan. diff --git a/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json b/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json new file mode 100644 index 00000000..8c654786 --- /dev/null +++ b/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json @@ -0,0 +1,37 @@ +{ + "schema_version": 1, + "target": "claude-code-skill", + "prior_render_sha256": {}, + "bootstrap_legacy_sha256": { + "SKILL.md": [ + "b17c53d9905f46b196be38e98cf71e53da6779e3a4f426ecff14f2b0f238aba9" + ], + "prompts/add-shipgate-to-repo.md": [ + "1ea69b1d3d418080c76540fff3b20044f70ed6787418eb5e4d3d39e036b34014" + ], + "prompts/decide-shipgate-relevance.md": [ + "8fab0595326b127fb1678828fd9b15c63cbe98f0229aad5bb87d47030e4b9ca6" + ], + "prompts/explain-finding-to-user.md": [ + "18031ed870b3c937a2996173820639ef441afe0a45e8171f16468826cd389829" + ], + "prompts/fix-top-finding.md": [ + "90d36fbe91668fdc64e5e73727ec8285ee62c584d695b866261ef569fea07074" + ], + "prompts/recommend-fixes.md": [ + "162aa2fb96066535425d9cf86a247a6782b8ec7cc661a18b42dbedf394779475" + ], + "prompts/stabilize-strict-mode.md": [ + "bb97c3fbd3b52d5755f6960878f350d484837849c3e536d99aab3fab3e353405" + ], + "prompts/triage-false-positive.md": [ + "8cfbb0d4b6e2c36569d24260384d3a54165f966276112f4b143b4ac234b51ada" + ], + "prompts/upgrade-shipgate-version.md": [ + "992122338eba26ae5d8056b9658117d718a6b477b9928c2a438dd449b5effb68" + ], + "ci-recipes/advisory-pr-comment.yml": [ + "c3756c86f52cf00a594b3fe38179b66e0f07dc8c52b98b9e76f4a15939901c77" + ] + } +} diff --git a/adoption-kits/claude-code-skill/SKILL.md b/adoption-kits/claude-code-skill/SKILL.md new file mode 100644 index 00000000..01fd401d --- /dev/null +++ b/adoption-kits/claude-code-skill/SKILL.md @@ -0,0 +1,93 @@ +--- +name: agents-shipgate +description: Use when the user wants to add a local-first, static Tool-Use Readiness release gate for an AI agent's tool surface, run agents-shipgate scans, fix or triage Shipgate findings, add Shipgate to CI, or interpret a shipgate report. Triggers on phrases like "add shipgate", "release readiness for my agent", "tool-use readiness", "scan my agent", "shipgate scan", "shipgate.yaml", "agents-shipgate-reports/report.json", "fix shipgate finding". +--- + +# agents-shipgate skill + +`agents-shipgate` is a local-first, static Tool-Use Readiness release gate for AI agent tool surfaces. It analyzes `shipgate.yaml` plus tool sources (MCP exports, OpenAPI specs, OpenAI Agents SDK Python files, Anthropic Messages API artifacts, Google ADK files, LangChain/LangGraph files, CrewAI files, OpenAI API artifacts, Codex plugin packages and marketplaces, n8n workflow JSON) and emits deterministic findings as Markdown, JSON, and SARIF. + +It does **not** run agents, call tools, invoke LLMs, connect to MCP servers, or send telemetry by default. Static analysis only; audited exceptions are pinned in `tests/test_adapter_static_only.py::ALLOWED_EXCEPTIONS`. + +> The skill name is intentionally `agents-shipgate` (not `shipgate`) so it does not collide with the `/shipgate` slash command shipped at `.claude/commands/shipgate.md` — Claude Code lets a skill with the same name preempt a command, which would bypass the bootstrap flow the slash command is meant to deliver. + +## When to use this skill + +- The user asks to add Tool-Use Readiness or pre-merge checks to an agent project. +- The repo already has `shipgate.yaml` or `agents-shipgate-reports/report.json`. +- The user asks to fix, triage, suppress, or explain a Shipgate finding. +- The user wants to add Shipgate to CI (GitHub Actions, GitLab CI, CircleCI). + +## When NOT to use this skill + +- Generic linting / type checking — use the project's existing tooling. +- Runtime monitoring, evals, or behavioral testing — Shipgate is static-only. +- LLM output quality assessment — out of scope. +- Editing `agents-shipgate`'s own check implementations — that's upstream-repo work, not user-repo work. + +## How to act + +Pick the matching task and follow the linked recipe verbatim. Recipes are bundled inside this skill so behavior is pinned to the installed version and works offline. Each prompt is self-contained: install commands, exit codes, and `AGENTS_SHIPGATE_AGENT_MODE=1` error handling are in the prompt itself. + +| Task | Recipe | +|---|---| +| Decide whether Shipgate should run at all (apply `docs/triggers.json` against the PR) | [`prompts/decide-shipgate-relevance.md`](prompts/decide-shipgate-relevance.md) | +| Bootstrap a repo (install, init, scan, report) | [`prompts/add-shipgate-to-repo.md`](prompts/add-shipgate-to-repo.md) | +| Add Shipgate to CI for the first time (advisory, PR comment) | See "First-time CI setup" below; copy [`ci-recipes/advisory-pr-comment.yml`](ci-recipes/advisory-pr-comment.yml) | +| Fix the highest-severity finding | [`prompts/fix-top-finding.md`](prompts/fix-top-finding.md) | +| Recommend fixes across all active findings | [`prompts/recommend-fixes.md`](prompts/recommend-fixes.md) | +| Explain a single finding in user-facing prose (3–5 sentences for a PR comment / chat reply) | [`prompts/explain-finding-to-user.md`](prompts/explain-finding-to-user.md); pair with `agents-shipgate explain-finding --from agents-shipgate-reports/report.json --json` | +| Triage a suspected false positive | [`prompts/triage-false-positive.md`](prompts/triage-false-positive.md) | +| Promote advisory CI to strict CI (assumes advisory is already running) | [`prompts/stabilize-strict-mode.md`](prompts/stabilize-strict-mode.md) | +| Upgrade agents-shipgate version | [`prompts/upgrade-shipgate-version.md`](prompts/upgrade-shipgate-version.md) | + +Always: + +1. Set `AGENTS_SHIPGATE_AGENT_MODE=1` so errors emit a `next_action` JSON line on stderr. +2. Parse `agents-shipgate-reports/report.json` (stable contract), not the markdown. +3. Confirm with the user before any command that writes files (`init --write`, `baseline save`). + +## First-time CI setup (advisory) + +If the user has no Shipgate CI yet, default to **advisory** mode — never strict, never with a baseline. The promotion path comes later, only after findings have been reviewed. + +1. Confirm the repo has `shipgate.yaml` and a clean local scan (`agents-shipgate scan -c shipgate.yaml --ci-mode advisory` exits `0`). If not, run the bootstrap recipe first. +2. Create `.github/workflows/agents-shipgate.yml` from [`ci-recipes/advisory-pr-comment.yml`](ci-recipes/advisory-pr-comment.yml). It runs on every pull request, posts a summary comment, uploads the report as an artifact, and never fails the job. +3. Confirm `permissions: pull-requests: write` is acceptable to the user before committing — required for the PR comment. +4. Push and open a test PR. Verify the agents-shipgate comment appears. +5. **Stop here.** Promotion to strict mode is a separate task — only run [`prompts/stabilize-strict-mode.md`](prompts/stabilize-strict-mode.md) after the user has reviewed the advisory output and decided which findings they accept. + +For non-GitHub CI (GitLab, CircleCI, Jenkins, Azure Pipelines, Buildkite, Bitbucket, pre-commit) refer to https://github.com/ThreeMoonsLab/agents-shipgate/tree/main/examples or `docs/integrations.md` in the upstream repo. Always start in advisory mode. + +## Stable contracts (rely on these) + +- **CLI surface** is frozen for `0.x` — see https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md. +- **Installed CLI contract**: when available, run `agents-shipgate contract --json` to verify local schema versions, `release_decision.decision`, and manual-review signal fields. Older installs should use [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) or upgrade before automating against the local contract command. +- **Report JSON**: `report_schema_version: "0.20"`. Read `release_decision.decision` first for release gating; use `agent_summary` / `findings[].agent_action` for agent routing and `reviewer_summary` for the human-review entry point. To filter findings by source reliability, use `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`; it reads `findings[].provenance_kind` (v0.15+) as a reviewer triage signal only, independent of `confidence` and never as a gate input. Do not gate on `summary.status`; it is legacy and baseline-blind. The full field list lives in [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md#read-these-first-for-release-gating), and reports validate against [`docs/report-schema.v0.20.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/report-schema.v0.20.json). +- **Release Evidence Packet**: `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` with the `[pdf]` extras) is emitted alongside the report by default. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.6.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/packet-schema.v0.6.json) (latest; v0.6 adds the top-level `evidence_matrix` compact review section AND `ReleaseDecisionItem.{source, policy_evidence_source}` for reviewer-grade dual-source provenance over the v0.5 baseline). See [STABILITY.md §Release Evidence Packet](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md#release-evidence-packet-v06). Use the packet for reviewer-shaped output; use the report for finding details. +- **Single source of truth for the contract**: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). When the schema bumps, that file updates first. +- **Exit codes**: `0` pass, `2` config error, `3` parse error, `4` other error, `20` strict-mode gate failure. +- **Check IDs** (e.g. `SHIP-POLICY-APPROVAL-MISSING`) are stable; new ones may be added but existing ones will not be renamed or repurposed. + +## Boundaries (do not violate) + +- Do not claim a finding is fixed without re-running `agents-shipgate scan` and showing the diff in counts. +- Do not silently suppress findings — `checks.ignore` requires a `reason` and the manifest validator rejects empty reasons. +- Do not commit `agents-shipgate-reports/` — it's regenerated each run; add it to `.gitignore`. +- Do not run `agents-shipgate baseline save` until the user has reviewed the initial findings; baselining ratchets in noise. +- Do not enable strict CI as the first CI step. Always start advisory. +- Do not modify checks in `agents-shipgate`'s own source — that's upstream repo work. + +## If something errors out + +Set `AGENTS_SHIPGATE_AGENT_MODE=1` and re-run. The CLI appends a JSON line to stderr with `{error, message, next_action}`. Follow the `next_action`. The error kinds emitted by the current CLI: + +| Error kind | Fix | +|---|---| +| `config_error` | Manifest is missing, malformed, or fails validation. Common cause: no `shipgate.yaml` yet — run `agents-shipgate init --workspace . --write`. | +| `config_already_exists` | `init --write` was run with an existing `shipgate.yaml`. Edit the file in place or remove it before re-running. | +| `input_parse_error` | A file referenced from the manifest (`tool_sources[].path`, baseline, policy pack) is missing, malformed, or resolves outside the manifest directory. Correct the path. | +| `unknown_check_id` | The check ID passed to `explain` does not exist. Run `agents-shipgate list-checks --json` to enumerate. | +| `other_error` / `internal_error` | Unexpected failure. Re-run with `--verbose` and include the output if filing an issue. | + +For deeper troubleshooting see https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/troubleshooting.md. diff --git a/adoption-kits/claude-code-skill/ci-recipes/advisory-pr-comment.yml b/adoption-kits/claude-code-skill/ci-recipes/advisory-pr-comment.yml new file mode 100644 index 00000000..82d6eb37 --- /dev/null +++ b/adoption-kits/claude-code-skill/ci-recipes/advisory-pr-comment.yml @@ -0,0 +1,26 @@ +# Advisory PR comment. +# Recommended starting point — runs the scanner on every PR, posts a summary +# comment, uploads the report as an artifact, and never fails the job. +name: Agents Shipgate (advisory) + +on: + pull_request: + +permissions: + contents: read + pull-requests: write + +jobs: + shipgate: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: ThreeMoonsLab/agents-shipgate@v{{ shipgate_version }} + with: + ci_mode: advisory + diff_base: target + pr_comment: 'true' + shipgate_version: '{{ shipgate_version }}' diff --git a/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md b/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md new file mode 100644 index 00000000..afa92503 --- /dev/null +++ b/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md @@ -0,0 +1,137 @@ +# Prompt · Add Agents Shipgate to a repo + +You are working in a repo that may contain an AI agent — likely one of: an MCP server tool list (`*mcp*.json` or `.agents-shipgate/*.json`), an OpenAPI spec the agent calls, a Codex plugin package (`.codex-plugin/plugin.json`) or marketplace (`.agents/plugins/marketplace.json`), a Python file with `@function_tool` / `@tool` decorators (OpenAI Agents SDK, LangChain, CrewAI), a Google ADK agent in `agent.py`, an Anthropic Messages API artifact set under `prompts/`/`tools/anthropic-tools.json`/`policies/anthropic-policy.yaml`, or an OpenAI API artifact set under `prompts/`/`tools/openai-tools.json`/`openai-config.json`. + +Your job is to drive the canonical 4-call flow end-to-end in one tool-using +turn, which adds a local-first, static Tool-Use Readiness release gate. + +## Your task + +1. **Install the tool:** + ```bash + pipx install agents-shipgate + ``` + If `pipx` is unavailable, use `python -m pip install agents-shipgate` and verify with `agents-shipgate --version`. + +2. **Sanity-check the install** before touching the user's code: + ```bash + agents-shipgate self-check --json + ``` + Confirm `"ready": true`. If not, surface the failure to the user. + + When available, verify the installed CLI contract locally: + ```bash + agents-shipgate contract --json + ``` + Read `report_schema_version`, `packet_schema_version`, `gating_signal`, and + `manual_review_signals[]`; prefer these local values over stale docs. If the + command is not recognized on an older install, continue after `self-check` + using [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) + and upgrade before relying on local contract verification in automation. + +3. **Detect:** + ```bash + agents-shipgate detect --workspace . --json + ``` + Read the response: `is_agent_project`, `frameworks[]` (per-framework score + evidence + candidate files), `agent_name_candidates[]`, `suggested_sources[]` (MCP/OpenAPI files matched by glob). + + **Stop only when ALL of these hold:** `is_agent_project: false`, `suggested_sources` is empty, `codex_plugin_candidates` is empty, no `shipgate.yaml` already exists in the workspace, AND the user did not explicitly request a scan. Otherwise proceed — MCP/OpenAPI tool-surface repos and Codex plugin package repos register as `is_agent_project: false` because they have no Python framework imports, but they are valid Shipgate targets. MCP/OpenAPI hits surface as `suggested_sources`; Codex plugin hits surface as `codex_plugin_candidates`. + +4. **Generate a starter manifest + GitHub Actions workflow:** + ```bash + agents-shipgate init --workspace . --write --ci --json + ``` + The `--json` form returns: + - `manifest_status`: `"written"` | `"skipped_existing"` | `"not_attempted"` + - `workflow.status` (with `--ci`): `"written"` | `"skipped_existing_target"` | `"skipped_cross_reference"` + - `placeholders[]` — entries the template intentionally left as `CHANGE_ME` because no high-confidence signal was available + - `auto_detected.agent_name` — the value the manifest carries (`null` when the template fell back to `CHANGE_ME`) + + `--ci` writes `.github/workflows/agents-shipgate.yml` orthogonally to `--write`. Each gets its own overwrite-refusal check; existing workflows that already call `ThreeMoonsLab/agents-shipgate` skip with a distinct `cross_reference_path`. + +5. **Replace placeholders.** Walk `placeholders[]` from the JSON output. On a fresh workspace the template typically leaves two: + - `agent.name: CHANGE_ME` — replace with the agent's actual role (no strong `Agent(name="…")` literal was found in the source). + - `agent.declared_purpose[]: CHANGE_ME` — replace with a one-line description of what the agent should do (auto-init can't infer this; the schema requires a non-empty value). + + Read the agent's prompt or main file to derive both. Skipping this leaves an invalid adoption artifact — the manifest validates but downstream consumers see meaningless defaults. + +6. **Run the scan with patch suggestions:** + ```bash + agents-shipgate scan -c shipgate.yaml --suggest-patches --format json --ci-mode advisory + ``` + The report lands at `agents-shipgate-reports/report.json`. The Release Evidence Packet lands at `agents-shipgate-reports/packet.{md,json,html}`. Parse `report.json`; Codex plugin facts, when present, live under `codex_plugin_surface`. + + **Read these first for release gating (v0.8+):** + - `release_decision.decision` ∈ `{"blocked", "review_required", "insufficient_evidence", "passed"}` — baseline-aware. This is the gating signal. `insufficient_evidence` (v0.14+) fires when evidence coverage is degraded past threshold; treat unknown future values as `review_required`. + - `release_decision.{reason, blockers, review_items, fail_policy.would_fail_ci}` + + **Read these for release review (v0.9+):** + - `capability_facts[]`, `declared_intentions[]`, `misalignments[]`, `release_consequence`, `suggested_scenarios[]` + + **Per-finding fields:** + - `check_id`, `severity`, `category`, `tool_name`, `recommendation`, `suppressed` + - `autofix_safe`, `requires_human_review`, `suggested_patch_kind`, `docs_url` (v0.7+) + - `patches[]` (only with `--suggest-patches`) — each has `kind` ∈ `{set_pointer, append_pointer, remove_pointer, manual}` plus `confidence` + `target_file` + etc. for non-manual kinds. + + **Top-level:** `manifest_dir` (absolute path of the manifest's directory — used by `apply-patches` for the containment check). `summary.{status, critical_count, high_count, medium_count}` is preserved for v0.7 callers and is baseline-blind — do not gate on `summary.status` for new consumers. Full contract: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). + +7. **Apply the safe patches:** + ```bash + agents-shipgate apply-patches --from agents-shipgate-reports/report.json --confidence high --apply --json + ``` + Default `--confidence high` only mutates patches whose `confidence` field is `"high"`. Today that's the 3 stale-manifest removals. Scope-coverage appends ship at `medium` and require explicit `--confidence medium` to apply. ManualPatches are never auto-applied. + + **Decision tree** for walking the report: + ``` + for finding in active_findings: + if finding.suggested_patch_kind in ("manual", "none"): + surface_to_user(finding) # Surface; do NOT auto-apply. + continue + if finding.autofix_safe is True: + plan_to_apply(finding) # Will be applied at --confidence high. + continue + surface_for_medium_review(finding) # Medium-confidence — opt-in only. + ``` + + Trace findings (`SHIP-API-TRACE-{APPROVAL,CONFIRMATION}-MISSING`) are permanent ManualPatch by policy. Implement the runtime gate; never edit the trace recording — that patches the evidence, not the agent. See [`docs/autofix-policy.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/autofix-policy.md) for the full classification. + +8. **Add `agents-shipgate-reports/` to `.gitignore`** if it isn't already. The reports are scan artifacts, not source. + +9. **Report back to the user**: + - `release_decision.decision` and `release_decision.reason` (the gating signal — baseline-aware, v0.8+) + - Blocker / review-item counts (`len(release_decision.blockers)` / `len(release_decision.review_items)`) + - The path to the Release Evidence Packet (`agents-shipgate-reports/packet.md`) for reviewer-shaped output + - The top 3 active critical/high findings (use `report.json`, not stdout) + - Which patches were applied (count from `apply-patches --json` output's `files`) + - Any check IDs the user should investigate first — link to `docs_url` from the finding for full rationale, or use `agents-shipgate explain --json` for the same content via CLI + +## What to do if the scan errors out + +Set `AGENTS_SHIPGATE_AGENT_MODE=1` and re-run. The CLI will append a JSON line to stderr with `{error, message, next_action}`. Follow the `next_action`. + +Common errors and fixes: + +| Error | Fix | +|---|---| +| `Config file not found: shipgate.yaml` | Run `agents-shipgate init --workspace . --write` first | +| `Input path '...' resolves outside manifest directory` | The declared `tool_sources[].path` is outside the manifest dir. Move the spec inside the tree, symlink it, or copy it | +| `Invalid shipgate.yaml: ... Did you mean X?` | A field is at the wrong nesting level; move it as suggested | +| `Containment violation` (apply-patches exit 5) | A patch's `target_file` resolved outside `report.manifest_dir`. Re-run scan to refresh; never patch arbitrary system files | + +## What NOT to do + +- Do **not** commit `agents-shipgate-reports/` — it's regenerated each run. +- Do **not** run `agents-shipgate baseline save` until the user has reviewed the initial findings. Baselining ratchets in noise that strict CI will silently ignore. The right time to baseline is **after** the user has decided which findings they accept. +- Do **not** suppress findings without a real `reason` — the manifest validator rejects empty reasons, and the `reason` field is the audit trail when someone asks "why is this OK?" +- Do **not** use `risk_overrides.tools.{tool}.remove_tags` to silence a finding without checking whether the heuristic is actually wrong. Prefer `checks.ignore` with a reason. +- Do **not** edit a trace recording to flip `approved` or `confirmed` — implement the runtime gate instead. + +## Verification before reporting success + +- `agents-shipgate-reports/report.json` exists and parses as JSON +- `report.json` carries `report_schema_version: "0.11"` (or higher) and a non-empty `manifest_dir` +- `report.json` carries a non-null `release_decision.decision` — this is the field to surface to the user +- `shipgate.yaml` has no `CHANGE_ME` values (comments containing the literal `CHANGE_ME` are informational and OK) +- `.gitignore` contains `agents-shipgate-reports/` (or equivalent) +- If `--ci` ran with `workflow.status: "written"`: `.github/workflows/agents-shipgate.yml` exists and references `ThreeMoonsLab/agents-shipgate@v…` +- The user knows the top 3 findings and at least one suggested next step diff --git a/adoption-kits/claude-code-skill/prompts/decide-shipgate-relevance.md b/adoption-kits/claude-code-skill/prompts/decide-shipgate-relevance.md new file mode 100644 index 00000000..6c1cab70 --- /dev/null +++ b/adoption-kits/claude-code-skill/prompts/decide-shipgate-relevance.md @@ -0,0 +1,152 @@ +# Prompt · Decide whether Agents Shipgate is relevant + +You are working in a repo or reviewing a PR and need to decide whether +to propose Agents Shipgate as the next step. The other prompts in +[`prompts/`](https://github.com/ThreeMoonsLab/agents-shipgate/tree/main/prompts) +assume relevance is already established — this one runs **before** that +decision and tells you yes or no with a rationale. + +The decision is fully data-driven: it does not depend on prose-reading. +[`docs/triggers.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/triggers.json) is the machine-readable +mirror of the AGENTS.md trigger table; you fetch (or read) it and apply +the rules to the changed file list. + +## Your task + +1. **Identify the changed file set.** Repo-relative, forward slashes: + - PR context: `git diff --name-only origin/main...HEAD` + - Working tree: `git status --short` (uncommitted) + - User-pasted diff: parse `diff --git a/ b/` headers + +2. **Fetch the trigger catalog.** Either: + - **Local repo** (already adopted Shipgate): read `docs/triggers.json` directly. + - **Remote** (target repo without Shipgate): fetch + `https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/triggers.json`. + - The catalog has `schema_version: "0.1"` and is stable for `0.x`. + +3. **Apply the rules.** Two equivalent options: + + **Option A — read the JSON yourself.** Walk `rules[]`. For each rule, + evaluate `rule.when` against the changed file list **and** the unified + diff body — several rules use `diff_contains` predicates (e.g. + `@function_tool`) that a path-only listing cannot satisfy. The + predicate vocabulary is documented in `triggers.json` under + `predicate_vocabulary`; the action precedence is in + `action_precedence`. See the decision tree below. + + **Option B — call the bundled evaluator** (when Shipgate is installed). + Use the `--git-diff` flag so paths AND diff body come from git in one + call; piping `git diff --name-only` alone causes `diff_contains` rules + (decorators, framework tokens, Action URL) to silently never fire: + ```bash + agents-shipgate self-check --json # confirm install + python -m agents_shipgate.triggers \ + --git-diff origin/main...HEAD --json + ``` + For uncommitted changes pass `--git-diff` with no revspec — that + runs `git diff HEAD` (covers BOTH staged and unstaged tracked + changes) plus `git ls-files --others --exclude-standard` to add + untracked file paths. Untracked files contribute paths only; their + content is not in `diff_text`, so `diff_contains` rules won't fire + on a brand-new file until you `git add` it (or pass `--diff-text` + manually). If your repo already has a manifest, also pass + `--manifest-present` so the `force_run` rule can fire. + The output shape is `{run_shipgate, dry_run_recommended, + matched_rules, stop_conditions_fired, rationale, schema_version}`. + +4. **Emit the decision.** Always reply in this exact JSON shape so + downstream automation can parse you: + + ```json + { + "run_shipgate": true, + "dry_run_recommended": false, + "matched_rules": [ + {"id": "TRIGGER-MCP-EXPORT-CHANGED", "action": "run_shipgate"} + ], + "rationale": "PR adds tools/mcp-export.json; MCP exports are a tool surface." + } + ``` + +5. **Then act.** + - If `run_shipgate: true` and Shipgate is installed: + ```bash + agents-shipgate detect --workspace . --json + ``` + Then follow [`prompts/add-shipgate-to-repo.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/prompts/add-shipgate-to-repo.md) + for the canonical 4-call flow. + - If `run_shipgate: true` and Shipgate is **not** installed: install + it (`pipx install agents-shipgate`) and run `detect`. If the user + prefers a zero-install first step, point them at the GitHub Action + (`ThreeMoonsLab/agents-shipgate@v0.10.0`) instead. + - If `run_shipgate: false` and `dry_run_recommended: true`: propose + a non-mutating scan only — never propose `init --write` based on a + dry-run match alone. Phrase it as "X may have shifted the tool + surface; want me to run a read-only scan against the existing + manifest?" If there's no existing manifest, surface the + `matched_rules` and let the user choose. + - If `run_shipgate: false` and `dry_run_recommended: false`: **do + not propose Shipgate.** Recommend whatever the actual review need + is (lint, type check, unit test, security scan). Mentioning + Shipgate when no rule matched is noise. + +## Decision tree (when reading `triggers.json` by hand) + +``` +For each changed file path AND the unified diff body: + For each rule in triggers.rules: + if rule.when matches → record (rule.id, rule.action) + +stop_fired := every clause in triggers.stop_conditions holds + (requires running detect first; if you haven't, treat as false) + +# Action precedence (highest first), see triggers.json:action_precedence: +if stop_fired: → run = false +elif any action == "force_run": → run = true (manifest present) +elif any action == "skip_shipgate": → run = false (skip beats run) +elif any action == "run_shipgate": → run = true +elif any action == "dry_run": → run = false, dry_run_recommended = true +else: → run = false (no rules matched) +``` + +Why `skip_shipgate` beats `run_shipgate`: a brittle `diff_contains` match +(e.g. `@tool` mentioned in README prose) should not override the explicit +"this is a docs-only PR with no tool surface impact" signal. + +Why `force_run` overrides `skip_shipgate`: an existing `shipgate.yaml` is +the operational opt-in; even a docs-only PR in such a repo gets scanned +because the cost is low (advisory) and tool-adjacent prose changes can +matter. + +## What NOT to do + +- Do **not** propose Shipgate based on filename guesses ("looks like an + AI agent"). The trigger catalog is the source of truth — if no rule + matches, the answer is no. +- Do **not** silently fall back to "yes, run it" when you can't fetch + `triggers.json`. Surface the fetch failure to the user and ask. +- Do **not** invent rule IDs in the output. Every entry in + `matched_rules` must come from `triggers.json`. +- Do **not** treat the **negative control** ("update docs only") as a + reason to propose Shipgate. The `TRIGGER-DOCS-ONLY-NEGATIVE` rule + fires `skip_shipgate` for a reason — and it covers test-only PRs + too, not just `*.md`. +- Do **not** propose `agents-shipgate init --write` on a `dry_run`-only + match. `dry_run_recommended: true` justifies a non-mutating `scan` + against an existing manifest, nothing more. +- Do **not** rely on bare `--git-diff` for brand-new untracked files + to fire `diff_contains` rules. Bare flag covers tracked changes + (staged + unstaged) and untracked file *paths*, but not untracked + file *content*. `git add` first, or pass `--diff-text` explicitly. + +## Verification before reporting + +- Output is valid JSON with the keys `run_shipgate`, + `dry_run_recommended`, `matched_rules`, `rationale`. +- Every `matched_rules[].id` exists in the loaded `triggers.json`. +- If `run_shipgate: true`, the next-step command is named. +- If `run_shipgate: false` AND `dry_run_recommended: true`, exactly + one Shipgate command appears (a non-mutating `scan` against an + existing manifest) — never `init --write`. +- If `run_shipgate: false` AND `dry_run_recommended: false`, no + Shipgate command appears anywhere in your reply. diff --git a/adoption-kits/claude-code-skill/prompts/explain-finding-to-user.md b/adoption-kits/claude-code-skill/prompts/explain-finding-to-user.md new file mode 100644 index 00000000..13145f6b --- /dev/null +++ b/adoption-kits/claude-code-skill/prompts/explain-finding-to-user.md @@ -0,0 +1,89 @@ +# Prompt · Explain a single Agents Shipgate finding to a user + +You need to translate one `report.json` finding into prose for a human +who has never read the Shipgate docs. Companion to `agents-shipgate +explain-finding `, which gives you the structured payload +you'll quote. + +This is for the moment when an agent has run a scan, identified the top +finding (via `agent_summary.first_recommended_action.why` or by walking +`findings[]`), and now has to summarize it for a PR comment, chat +reply, or commit message. The user shouldn't have to follow a doc link +to understand what's going on. + +## Your task + +1. **Get the fingerprint.** Read it from `agent_summary.first_recommended_action.why` if that names a `check_id` and tool, then look up the matching `findings[].fingerprint` in `report.json`. Otherwise pick the highest-severity active finding (`critical > high > medium > low`) and read `fingerprint` directly from that entry. + +2. **Run `explain-finding` to get the structured payload.** + ```bash + agents-shipgate explain-finding \ + --from agents-shipgate-reports/report.json --json + ``` + The output carries: + - `check_id`, `title`, `severity`, `category` — what the check is. + - `tool_name`, `tool_id` — the affected tool (may be null for manifest-level checks). + - `evidence` — the structured evidence the check captured. + - `recommendation` — the check author's verbatim suggested fix. + - `agent_action` — `auto_apply | propose_patch_for_review | escalate_to_human | suppress_with_reason | informational`. + - `metadata` — full `CheckMetadata` (rationale, fires_when, evidence_fields, docs_url) when the check is in the catalog. + - `explanation` — a deterministic 3–5-sentence prose summary you can quote verbatim or rewrite. + +3. **Write the prose for the user.** Three to five sentences, in this order: + 1. **What.** Name the check (`check_id` is fine), the affected tool (`tool_name`), and the severity in one sentence. If the check has no `tool_name`, name what the check examined (e.g. "the manifest", "permissions"). + 2. **Why it matters.** Pull from `metadata.rationale` or `metadata.fires_when`. If neither exists, paraphrase the `recommendation`. Avoid verbatim verbose catalog text — translate "limited automation review" into plain English. + 3. **What you'll do (or want).** Map `agent_action` to a concrete next step: + - `auto_apply`: "I can apply the fix automatically — say yes and I'll run `apply-patches --confidence high --apply`." + - `propose_patch_for_review`: "There's a suggested patch but the confidence is medium/low (or there's a manual sibling). Want me to show the diff before applying?" + - `escalate_to_human`: "There's no automatic fix. Here's the recommended remediation: [paraphrase recommendation]. Want me to draft the change for you to review?" + - `suppress_with_reason`: "If you want to accept this risk, I can add a suppression with reason. What should the reason say?" + - `informational`: "No action needed; flagging for awareness." + 4. *(Optional)* **Where to learn more.** If `metadata.docs_url` exists, link it. + 5. *(Optional)* **Suppression status.** If `suppressed` is true, mention that — otherwise omit. + +4. **Cite evidence sparingly.** Only quote a specific evidence value when it makes the explanation concrete (e.g. naming the broken parameter, the file path from `source.ref`). Do not dump the whole `evidence` dict. + +5. **Format for the surface.** PR comments and chat support markdown — use a code span for `check_id` and `tool_name`. Plain text emails should drop the backticks but keep the structure. + +## Example + +Input (from `explain-finding fp_f092940f62fbb012 --from ... --json`): +```json +{ + "check_id": "SHIP-POLICY-APPROVAL-MISSING", + "severity": "critical", + "tool_name": "stripe.create_refund", + "agent_action": "escalate_to_human", + "recommendation": "Declare an approval policy or remove the tool.", + "metadata": { + "rationale": "High-risk actions need explicit approval before promotion.", + "fires_when": "Financial/destructive risk exists without approval policy." + } +} +``` + +Good prose for a PR comment: + +> The Tool-Use Readiness scan flagged a critical issue: `stripe.create_refund` doesn't declare an approval policy in `shipgate.yaml`. High-risk actions like refunds need an explicit human approval gate before they can ship — without one, an agent could trigger a refund on its own without review. There's no automatic fix here. The right remediation is to either add `policies.require_approval_for_tools: [stripe.create_refund]` (with a reviewer-visible approval trace) or remove the tool from this release surface. Want me to draft the manifest change for you? + +Bad prose for the same input: + +> Finding `fp_f092940f62fbb012`: `SHIP-POLICY-APPROVAL-MISSING` fired with severity `critical` on `stripe.create_refund`. autofix_safe=false, requires_human_review=true. evidence: risk_tags=[financial_action, destructive]. recommendation: "Declare an approval policy or remove the tool." + +The bad version is true but unreadable — it dumps the JSON instead of translating it. + +## What NOT to do + +- Do **not** quote the structured `explanation` field verbatim if it's robotic. It's a deterministic baseline; rewrite for tone when needed. +- Do **not** fabricate consequences. If the check's `rationale` doesn't say "could trigger a refund," don't say it. Stay grounded in catalog text. +- Do **not** propose `apply-patches` for `escalate_to_human` findings — the user has to decide on the fix manually. +- Do **not** propose adding a `checks.ignore` entry as the default response. Suppression is a real choice, but it's the last resort and needs an audit-trail-quality reason. Use the [`triage-false-positive.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/prompts/triage-false-positive.md) prompt for that workflow. +- Do **not** include the fingerprint string in the user-facing prose unless they specifically asked for it. Fingerprints are agent-to-agent identifiers, not human-friendly labels. + +## Verification before sending the message + +- The user-facing prose names the affected tool (or what the check examined) at least once. +- The severity is mentioned somewhere (a word like "critical" or "medium-severity" — not just the JSON token). +- The action sentence matches the finding's `agent_action`. If the message says "I'll apply this automatically," `agent_action` must be `auto_apply`. +- No raw JSON dumps in the prose — translate, don't quote. +- If `metadata.docs_url` exists, include it (or link text equivalent). diff --git a/adoption-kits/claude-code-skill/prompts/fix-top-finding.md b/adoption-kits/claude-code-skill/prompts/fix-top-finding.md new file mode 100644 index 00000000..e2a45c8f --- /dev/null +++ b/adoption-kits/claude-code-skill/prompts/fix-top-finding.md @@ -0,0 +1,72 @@ +# Prompt · Fix the top Agents Shipgate finding + +You are working in a repo with `shipgate.yaml` already in place. Run a scan and fix the highest-severity unsuppressed finding. + +## Your task + +1. **Run a scan and locate the top finding.** + ```bash + agents-shipgate scan -c shipgate.yaml --ci-mode advisory + ``` + Read `agents-shipgate-reports/report.json`. For v0.12+ reports the easy path is `agent_summary.first_recommended_action.why` — for most `blocked`/`review_required` verdicts it names the top finding's `check_id` and `tool_name` directly. Three exceptions to expect: + + - **`insufficient_evidence` verdict** (v0.14+; the scan saw too many low-confidence tools or 4+ source warnings to gate release). There is no specific finding to fix; the action's `why` describes the evidence situation and recommends gathering deeper sources (MCP/OpenAPI inputs, eval traces, additional source files). Follow that guidance instead of looking for a top finding. + - **Evidence-coverage-driven `review_required`** (sub-threshold low-confidence/static evidence; no specific finding to fix). The action's `why` describes the evidence situation and recommends gathering MCP/OpenAPI inputs or eval traces — there is no `check_id` to parse out. If you see "low-confidence evidence" or "static-only" in the why-text, follow that guidance instead of looking for a top finding. + - **`auto_appliable_patches > 0`**. The action proposes `apply-patches`; the why-text names the apply-patches command, not a specific finding. Walk `findings[]` for the actual top entry. + + Fall back to picking the entry with the highest severity (`critical > high > medium > low > info`) and `"suppressed": false` whenever the action doesn't name a finding directly. + +2. **Look up the check definition.** + ```bash + agents-shipgate explain --json + ``` + This returns the `CheckMetadata` with `description`, `rationale`, `fires_when`, `evidence_fields`, `recommendation`. + +3. **Diagnose the fix.** There are exactly four legitimate responses to a finding. v0.12+ reports project the routing via `agent_action`: + + | Response | When | `agent_action` (v0.12+) | + |---|---|---| + | **Add the missing policy / scope / annotation** to `shipgate.yaml` | The check is correct; the manifest just hadn't declared the safeguard yet | `propose_patch_for_review` (a `set_pointer`/`append_pointer` patch is attached) or `escalate_to_human` (no patch — you write the entry by hand) | + | **Override the heuristic** via `risk_overrides.tools.{tool}.{tags,remove_tags}` | The risk classification is wrong (e.g. a GET endpoint that picked up the `destructive` tag because of a misleading operationId) | `escalate_to_human` | + | **Suppress the finding** via `checks.ignore` with a `reason` | The check is correct but you've decided to accept the risk explicitly (e.g. "tool deprecated 2026-Q2") | `escalate_to_human` (the future `suppress_with_reason` value is reserved for checks that pre-classify themselves as suppressible) | + | **Fix the underlying tool definition** | The tool spec itself is wrong (missing description, broad scope, free-form action field) | `escalate_to_human` | + +4. **Apply the fix.** Edit either `shipgate.yaml` or the tool source file. Do not delete tools wholesale to silence findings. + +5. **Re-scan and confirm the count went down.** + ```bash + agents-shipgate scan -c shipgate.yaml --ci-mode advisory + ``` + The previously-failing fingerprint should be gone from `report.json`. + +6. **Report back**: + - What was the original finding (check ID, tool, severity) + - Which of the four response types you used + - The diff to `shipgate.yaml` (or other file) you applied + - The new finding count + +## Common fixes by check ID + +| Check | Typical fix | +|---|---| +| `SHIP-POLICY-APPROVAL-MISSING` | Add the tool to `policies.require_approval_for_tools` with a reason | +| `SHIP-POLICY-CONFIRMATION-MISSING` | Add the tool to `policies.require_confirmation_for_tools` | +| `SHIP-SIDEFX-IDEMPOTENCY-MISSING` | Add an `idempotency_key` parameter, set `idempotentHint: true` annotation, or list under `policies.require_idempotency_for_tools` | +| `SHIP-AUTH-MISSING-SCOPE` | Declare the scope on the tool (in OpenAPI security or MCP metadata) and in `permissions.scopes` | +| `SHIP-AUTH-MANIFEST-BROAD-SCOPE` | Replace `*` / `admin` with the specific operation scope(s) | +| `SHIP-DOC-MISSING-DESCRIPTION` | Add a 20+ char description to the tool definition | +| `SHIP-SCHEMA-BROAD-FREE-TEXT` | Constrain the parameter with an enum, structured schema, or narrower fields | +| `SHIP-SCHEMA-MISSING-BOUNDS` | Add `maximum` to the numeric parameter | +| `SHIP-INVENTORY-LOW-CONFIDENCE-PRODUCTION-SURFACE` | Declare the tools through MCP/OpenAPI for higher-confidence inventory; or move target to staging | + +## What NOT to do + +- Do not blanket-suppress an entire check. Suppressions are per-tool unless the check is genuinely irrelevant for this repo. +- Do not write `reason: "false positive"` without explanation. Reviewers should be able to read the reason and understand the decision in 60 seconds. +- Do not edit `agents-shipgate-reports/`. It's regenerated each run. + +## Verification + +- The previously-failing finding's fingerprint is no longer present in `report.json` +- The fix is committed in a single, focused diff (manifest change + reason) +- If you used `checks.ignore`, the `reason` is concrete (a date, a ticket link, or "tool deprecated; see roadmap") diff --git a/adoption-kits/claude-code-skill/prompts/recommend-fixes.md b/adoption-kits/claude-code-skill/prompts/recommend-fixes.md new file mode 100644 index 00000000..abe986bb --- /dev/null +++ b/adoption-kits/claude-code-skill/prompts/recommend-fixes.md @@ -0,0 +1,82 @@ +# Prompt · Recommend fixes for active Agents Shipgate findings + +You are working in a repo with `shipgate.yaml` already in place and want a coordinated remediation pass across **all** active findings — not just the top one. Walk every finding, classify it against the current autofix policy, and surface targeted fix recommendations. Apply only the safe, high-confidence patches (after preview + explicit confirmation); leave the rest for human review with concrete advice. + +## Your task + +1. **Always run a fresh v0.8+ scan with patches.** Do not reuse a stale report — earlier scans may be pre-v0.7 (no remediation fields), pre-v0.8 (no `release_decision`), or may lack `patches[]` (no `--suggest-patches`). Set `AGENTS_SHIPGATE_AGENT_MODE=1` so errors emit a `next_action` JSON line on stderr. + ```bash + AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate scan -c shipgate.yaml \ + --suggest-patches --format json --ci-mode advisory + ``` + Read `agents-shipgate-reports/report.json`. Verify `report_schema_version` is `"0.8"` or higher. Filter `findings[]` to entries with `"suppressed": false`. + +2. **Bucket each active finding into one of four classes.** Read `agent_action` (v0.12+; deterministic projection of patches/autofix/human-review fields) to bucket each active finding directly. If `agent_action` is missing (older v0.11 or earlier reports), fall back to the legacy three-field check shown in the right column. The buckets correspond to [`docs/autofix-policy.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/autofix-policy.md): + + | Bucket | `agent_action` (v0.12+) | Legacy fallback (v0.11 or earlier) | Example check IDs | + |---|---|---|---| + | **A. Safe auto-fix** | `auto_apply` | `autofix_safe == true` | `SHIP-MANIFEST-STALE-{SUPPRESSION,POLICY,RISK-OVERRIDE}` when the match is unique | + | **B. Medium-confidence config fix** | `propose_patch_for_review` | `autofix_safe == false` AND `suggested_patch_kind` ∈ `{set_pointer, append_pointer, remove_pointer}` | `SHIP-AUTH-SCOPE-COVERAGE-MISSING` | + | **C. Manual** | `escalate_to_human` (with `suggested_patch_kind == "manual"`) | `suggested_patch_kind == "manual"` | Documentation, schema bounds, owner gaps, ADK/LangChain/CrewAI metadata, and the never-auto-fix trace findings | + | **D. No patch emitted** | `escalate_to_human` (with `suggested_patch_kind == "none"`) | `suggested_patch_kind == "none"` | The generator emitted nothing — but the finding can still be high/critical (e.g. low-confidence inventory). Treat as **human triage**, not informational. | + | (skip) | `informational` | `suppressed == true` | Already-suppressed findings; show counts only. | + + For one-fetch counts read the top-level `agent_summary` block (v0.12+): + `agent_summary.auto_appliable_patches` is the bucket-A count, and + `agent_summary.needs_human_review` is buckets B + C + D combined + (every active finding the user must weigh in on before applying — + medium/low-confidence patches AND escalations). To split bucket B + from bucket C+D you have to walk `findings[].agent_action` — + agent_summary deliberately does not disaggregate them, since the + distinction is an implementation detail of the patch-confidence + policy rather than a release-gate signal. Use + `agent_summary.first_recommended_action.command` as your default + suggestion when bucket A is non-empty. + +3. **Build a recommendation card per finding.** For each, present: + - `check_id`, `title`, `severity`, `tool_name`, `confidence` + - The verbatim `recommendation` string (per-finding fix text from the check author) + - `docs_url` as a markdown link (when non-null) + - **Concrete fix step** — branch on patch kind, since the patch shapes differ: + - `set_pointer` / `append_pointer`: show `target_file`, `pointer`, `value`, `confidence`, `rationale` + - `remove_pointer`: show `target_file`, `pointer`, `confidence`, `rationale` + - `manual`: show `instructions` verbatim. `ManualPatch` has only `kind` and `instructions` — do NOT try to read `target_file`/`pointer`/`value`; they don't exist. + - No patches (bucket D): use `evidence` and `source` to make `recommendation` concrete — quote the offending parameter name, the file path from `source.ref`, the manifest key. Generic advice is not acceptable here. + +4. **Present the prioritised plan.** Severity-ordered (critical → high → medium → low → info), grouped by bucket within each severity tier. Show counts per bucket up front. For low/info findings in bucket D, summary-link via `docs_url` rather than full cards — avoid wall-of-text. + +5. **Decision points — ask the user explicitly. Always preview before mutating.** + - **Bucket A (safe auto-fix).** First run a **dry-run** (omit `--apply`): + ```bash + agents-shipgate apply-patches \ + --from agents-shipgate-reports/report.json \ + --confidence high + ``` + Show the user the planned file diffs. Only after explicit confirmation, re-run with `--apply --json`. Never silently apply. + - **Bucket B (medium-confidence config).** Surface the patches with their `pointer` and `value`. Tell the user the opt-in command (`apply-patches --confidence medium`) and that they must read the appended values first — scope strings can encode policy choices. Do not apply on the user's behalf in this recipe. + - **Bucket C (manual).** Ask whether to walk through them now or defer. For deep dive on a single finding, cross-link to [`fix-top-finding.md`](fix-top-finding.md). Never edit a trace recording to silence `SHIP-API-TRACE-{APPROVAL,CONFIRMATION}-MISSING` — that patches the evidence, not the agent. Implement the runtime gate instead. + - **Bucket D (no patch).** Ask whether to walk through them — these need diagnosis, not patch application. Cross-link to [`fix-top-finding.md`](fix-top-finding.md); the four-response decision tree (add policy / override / suppress / fix tool spec) applies. + +6. **Re-scan after applying any Bucket A patches.** Show the diff in `summary.{critical_count, high_count, medium_count}`. Confirm the previously-fixed fingerprints are gone from `report.json`. + +7. **Report back**: + - Counts per bucket (A/B/C/D) and per severity + - What was applied (from `apply-patches --apply --json` output's `files`) + - What remains, with one clear next action per remaining bucket + - Any cross-links the user should follow ([`fix-top-finding.md`](fix-top-finding.md), [`triage-false-positive.md`](triage-false-positive.md)) + +## What NOT to do + +- Do **not** run `apply-patches --apply` without showing the dry-run preview first AND getting explicit user confirmation, even when `autofix_safe == true`. +- Do **not** apply `--confidence medium` patches in this recipe. They are opt-in only and require the user to read the appended values. +- Do **not** edit a trace recording to silence `SHIP-API-TRACE-{APPROVAL,CONFIRMATION}-MISSING`. Trace findings are class-four "never auto-fix" per the autofix policy. Implement the runtime approval/confirmation gate. +- Do **not** recommend `checks.ignore` as a fix here. That's the [`triage-false-positive.md`](triage-false-positive.md) workflow's job — cross-link to it. +- Do **not** claim a finding is fixed without re-running `agents-shipgate scan` and showing the diff in counts. +- Do **not** invent recommendations not grounded in `recommendation`, `evidence`, `patches[].instructions`, or `docs_url`. Use evidence to make advice concrete; do not replace check-author guidance with a guess. + +## Verification + +- A fresh `report.json` exists, validates as `report_schema_version: "0.8"` (or higher; v0.12+ exposes `agent_action` and `agent_summary`), and was generated with `--suggest-patches`. +- Each presented card cites a concrete location: `target_file` + `pointer` for non-manual patches, `instructions` verbatim for manual patches, file path + parameter name from `evidence`/`source` for bucket D. +- If Bucket A patches were applied: re-scan shows lower active counts AND the previously-failing fingerprints are absent from the new `report.json`. +- If only B/C/D were surfaced: counts are unchanged (expected); the user has a clear list of next actions. diff --git a/adoption-kits/claude-code-skill/prompts/stabilize-strict-mode.md b/adoption-kits/claude-code-skill/prompts/stabilize-strict-mode.md new file mode 100644 index 00000000..990bba68 --- /dev/null +++ b/adoption-kits/claude-code-skill/prompts/stabilize-strict-mode.md @@ -0,0 +1,76 @@ +# Prompt · Stabilize Agents Shipgate strict mode + +The user has Agents Shipgate running in **advisory** mode and wants to graduate to **strict** mode (CI fails on findings) without surprising contributors. + +## The pattern + +1. Run a fresh scan and inventory the active findings. +2. Tune `risk_overrides` and `checks.ignore` for genuine false positives, with reasons. +3. Save a baseline of everything that's left. +4. Switch CI to strict mode with the baseline applied — only NEW findings fail. +5. Pick a severity threshold; usually start with `critical`, raise to `[critical, high]` later. + +## Your task + +1. **Inventory current findings.** + ```bash + agents-shipgate scan -c shipgate.yaml --ci-mode advisory + ``` + Look at `agents-shipgate-reports/report.json` `summary.critical_count`, `high_count`, `medium_count`. If the active list is small (< 20 unique check IDs), consider just fixing them rather than baselining. + +2. **Tune false positives.** For each unique check ID, decide: + - True positive that should be fixed → use the `fix-top-finding.md` prompt to apply a real fix. + - True positive that the team explicitly accepts (deprecated tool, known limitation) → add to `checks.ignore` with a real `reason`. + - False positive (heuristic misfire) → use `risk_overrides.tools.{tool}.remove_tags` or add tags via `risk_overrides.tools.{tool}.tags`. + +3. **Save the baseline:** + ```bash + agents-shipgate baseline save -c shipgate.yaml \ + --out .agents-shipgate/baseline.json + ``` + +4. **Commit the baseline:** + ```bash + git add .agents-shipgate/baseline.json + git commit -m "Baseline shipgate findings ($N criticals, $M highs)" + ``` + +5. **Update the CI workflow.** Replace the existing advisory step with strict + baseline. Use [`examples/github-actions/03-strict-with-baseline.yml`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/examples/github-actions/03-strict-with-baseline.yml) as the template: + ```yaml + - uses: ThreeMoonsLab/agents-shipgate@v0.10.0 + with: + ci_mode: strict + fail_on: critical + baseline: .agents-shipgate/baseline.json + pr_comment: 'true' + ``` + +6. **Verify the gate fires correctly.** In a throwaway branch, deliberately introduce a new finding (e.g. add a wildcard scope) and confirm CI fails. Revert before merging. + +## When to refresh the baseline + +| Situation | Action | +|---|---| +| Found a false positive after baselining | Add a `checks.ignore` entry; do **not** re-baseline | +| Fixed several findings | Re-baseline so resolved ones disappear: `agents-shipgate baseline save ...` | +| Upgraded shipgate to a version with new checks | New check IDs surface as new findings; fix or suppress, then re-baseline | +| Added new tools that have no policy yet | Each new tool's findings are `new` and will fail; fix or accept, then re-baseline | + +Re-baselining is just running `baseline save` again. Diff the new file vs the old in code review so the team sees what's been accepted. + +## Promotion to `[critical, high]` + +After a sprint or two of strict-on-critical, the active high-severity list usually compresses enough to flip on. Update `fail_on: critical,high` and re-baseline. + +## What NOT to do + +- Do **not** baseline in your first run as a "shortcut to make CI green." That hides the existing risk surface from review. +- Do **not** baseline findings that have a real fix — fix them first, baseline only what you're explicitly accepting. +- Do **not** write `--fail-on critical,high` without a baseline if the repo has many existing high findings; CI will fail on day one and contributors will mute the workflow. + +## Verification + +- `.agents-shipgate/baseline.json` is committed and contains `findings[]` +- CI workflow uses `ci_mode: strict` and `baseline: .agents-shipgate/baseline.json` +- A test PR that adds a deliberate new critical finding fails CI +- A test PR that doesn't change the tool surface passes CI diff --git a/adoption-kits/claude-code-skill/prompts/triage-false-positive.md b/adoption-kits/claude-code-skill/prompts/triage-false-positive.md new file mode 100644 index 00000000..db39be5a --- /dev/null +++ b/adoption-kits/claude-code-skill/prompts/triage-false-positive.md @@ -0,0 +1,90 @@ +# Prompt · Triage a suspected Agents Shipgate false positive + +The user thinks a specific finding is wrong. You need to decide whether to override the heuristic, suppress the finding, or convince the user that the check is correct. + +## Your task + +1. **Read the full finding.** From `agents-shipgate-reports/report.json`: + ```json + { + "id": "fp_...", + "check_id": "SHIP-...", + "tool_name": "...", + "severity": "...", + "evidence": { ... }, + "recommendation": "..." + } + ``` + And the check definition: + ```bash + agents-shipgate explain --json + ``` + +2. **Read the actual tool definition.** Look up the OpenAPI / MCP / SDK source: + - For OpenAPI: open the spec at the path given in `findings[].source.ref` + - For MCP: open the JSON file + - For SDK: open the `.py` file at the line given in `source.location` + +3. **Apply the decision tree:** + + ``` + Is the heuristic wrong about the tool? + (e.g. "destructive" tag on a GET; "financial_action" tag on a non-financial scope) + → YES: override via risk_overrides.tools.{tool}.remove_tags + → NO: continue + + Is the check fundamentally inapplicable to this tool? + (e.g. SHIP-DOC-MISSING-DESCRIPTION on an internal-only tool slated for removal) + → YES: suppress via checks.ignore with a concrete reason + → NO: continue + + The check is correct. Fix the tool definition. + → use the fix-top-finding.md prompt + ``` + +## Override vs suppress — which to use + +| Use `risk_overrides` when | Use `checks.ignore` when | +|---|---| +| The risk **classification** is wrong | The classification is right but the team accepts the risk | +| You want to remove a tag (e.g. `remove_tags: [destructive]`) | You want to suppress one specific finding | +| The fix benefits all checks that consume that tag | The acceptance is per-check, per-tool | +| Example: a `get_records` GET picks up `destructive` from substring "destroy" | Example: a documented internal-only tool with no description | + +**Rule of thumb:** if the fix would silence multiple findings naturally, use `risk_overrides`. If you want to acknowledge one specific finding by name, use `checks.ignore`. + +## Required: a concrete `reason` + +Both `checks.ignore` entries and `risk_overrides` entries take a `reason`. Empty reasons fail manifest validation. Good reasons answer "why is this OK?" in a way a future reviewer can verify: + +| Bad reason | Better reason | +|---|---| +| `false positive` | `GET endpoint; "destroy" appears in operationId only because it returns destroy-status` | +| `not applicable` | `Tool deprecated 2026-Q2; deletion tracked in JIRA-1234` | +| `team decision` | `Reviewed by platform-eng 2026-04-10; see ADR-007` | + +## Re-run and confirm + +After editing the manifest: + +```bash +agents-shipgate scan -c shipgate.yaml --ci-mode advisory +``` + +The previously-failing fingerprint should be gone (overridden) or marked `"suppressed": true` (suppressed) in `report.json`. + +## When the heuristic is genuinely buggy + +If you've found a real classifier bug — the kind that affects many users, not just this tool — file an issue tagged `false-positive` at https://github.com/ThreeMoonsLab/agents-shipgate/issues with: + +- The check ID +- A minimal reproduction (manifest fragment + tool source) +- The current behavior vs. expected behavior + +The risk classifier in `core/risk_hints.py` improves through reports. + +## Verification + +- The decision (override / suppress / fix) is documented in the manifest with a reason. +- The previously-failing fingerprint is gone or `"suppressed": true` in the next scan. +- The `reason` would be understandable to a reviewer who hasn't seen the finding. diff --git a/adoption-kits/claude-code-skill/prompts/upgrade-shipgate-version.md b/adoption-kits/claude-code-skill/prompts/upgrade-shipgate-version.md new file mode 100644 index 00000000..0fdec8d8 --- /dev/null +++ b/adoption-kits/claude-code-skill/prompts/upgrade-shipgate-version.md @@ -0,0 +1,73 @@ +# Prompt · Upgrade Agents Shipgate version + +Bump the agents-shipgate version pinned in CI and the development environment. + +## Your task + +1. **Read the changelog** for the gap between the current and target version: + - https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/CHANGELOG.md + - Specifically look for entries under "Breaking changes" and "New checks added". + +2. **Update the pin in three places** (in this order): + + a. **`pyproject.toml`** (if the project depends on shipgate as a dev dep): + ```toml + [project.optional-dependencies] + dev = ["agents-shipgate==", ...] + ``` + + b. **CI workflow** at `.github/workflows/shipgate.yml`: + ```yaml + - uses: ThreeMoonsLab/agents-shipgate@v + with: + shipgate_version: '' + ``` + + c. **Pre-commit config** at `.pre-commit-config.yaml` (if present): + ```yaml + repos: + - repo: https://github.com/ThreeMoonsLab/agents-shipgate + rev: v + ``` + +3. **Run a local scan** with the new version: + ```bash + pipx upgrade agents-shipgate + agents-shipgate --version # confirm the new version is in PATH + agents-shipgate scan -c shipgate.yaml --ci-mode advisory + ``` + +4. **Compare the new finding count to the baseline.** If `report.json` shows new finding fingerprints (any with `"baseline_status": "new"`): + - These are usually new checks added in the upgrade. Read the changelog "New checks added" section. + - For each new check ID, decide: fix, override, or suppress (see [`triage-false-positive.md`](triage-false-positive.md)). + +5. **Re-baseline if the new findings are accepted:** + ```bash + agents-shipgate baseline save -c shipgate.yaml \ + --out .agents-shipgate/baseline.json + ``` + +6. **Commit** the version bumps + the new baseline (if regenerated) in one PR. Title: `Upgrade agents-shipgate v → v`. + +## Stability guarantees + +Per [`STABILITY.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md), within `0.x`: + +- Existing check IDs do not change names or fingerprint algorithms. +- Existing CLI flags do not break. +- The JSON report's stable fields persist. + +So a `0.2.x → 0.3.x` upgrade should not silently break existing suppressions or baselines. If it does, that's a stability bug — file an issue. + +## What may legitimately change + +- Risk-classifier keyword sets (false-positive tuning). Use `risk_overrides` to pin specific behavior. +- New checks fire (additive). Triage with the prompts above. +- Markdown report layout (parse `report.json` instead). + +## Verification + +- `agents-shipgate --version` reflects the new version +- CI workflow uses the new version +- A scan completes without error +- The baseline file (if used) is up to date diff --git a/adoption-kits/codex-skill/.agents-shipgate-kit-metadata.json b/adoption-kits/codex-skill/.agents-shipgate-kit-metadata.json new file mode 100644 index 00000000..9b85f06b --- /dev/null +++ b/adoption-kits/codex-skill/.agents-shipgate-kit-metadata.json @@ -0,0 +1,22 @@ +{ + "schema_version": 1, + "target": "codex-skill", + "prior_render_sha256": {}, + "bootstrap_legacy_sha256": { + "SKILL.md": [ + "59ec0a31f9747acf569f731561236ff4ef6d8734b614edfa04ea6ff10043f21a" + ], + "references/recipes.md": [ + "df5110bfa05eeabd9b918d8902b5c054fa547d1155be61ef6e7d7d63378bf210" + ], + "references/report-reading.md": [ + "75a655059f3d45db365c744b0ff82d1c9d69c3638acacf640fd667ae87260d05" + ], + "assets/advisory-pr-comment.yml": [ + "d4005102df70a627d3883334e827c4bc7527a35a2278573699e18a43afed3bcb" + ], + "agents/openai.yaml": [ + "4d94a724336e5d36a2769630495f341007580e4dee306bc42a1aeca1af9e867b" + ] + } +} diff --git a/adoption-kits/codex-skill/SKILL.md b/adoption-kits/codex-skill/SKILL.md new file mode 100644 index 00000000..d2fe4f41 --- /dev/null +++ b/adoption-kits/codex-skill/SKILL.md @@ -0,0 +1,37 @@ +--- +name: agents-shipgate +description: Use when the user wants to add or run Agents Shipgate as a local-first, static Tool-Use Readiness release gate for an AI agent's tool surface; review or prepare a tool-using agent for release; scan MCP, OpenAPI, OpenAI Agents SDK, Anthropic, Google ADK, LangChain/LangGraph, CrewAI, OpenAI API, Codex plugin, or n8n tool artifacts; add advisory CI; or interpret, fix, triage, suppress, or explain a Shipgate finding. +--- + +# Agents Shipgate + +Agents Shipgate is a local-first, static Tool-Use Readiness release gate for AI agent tool surfaces. It reads `shipgate.yaml` plus local tool sources and writes deterministic reports as Markdown, JSON, SARIF, and Release Evidence Packets. + +Use this skill when a task touches agent tools, MCP exports, OpenAPI specs, prompts that constrain tool use, permissions/scopes, approval or confirmation policies, `shipgate.yaml`, Shipgate CI, or `agents-shipgate-reports/report.json`. + +Do not use it for general linting, runtime monitoring, evals, model-output quality, or runtime guardrail enforcement. Shipgate is static-only: no agent execution, no tool calls, no LLM calls, no MCP server connections, and no telemetry by default. + +## Workflow + +1. For relevance decisions, bootstrap, scanning, CI setup, finding fixes, false-positive triage, strict-mode promotion, or version upgrades, read `references/recipes.md`. +2. For reading `report.json`, summarizing release decisions, or deciding what may be auto-applied, read `references/report-reading.md`. +3. Set `AGENTS_SHIPGATE_AGENT_MODE=1` before running Shipgate commands so errors include structured `next_action` JSON. +4. Default first-time CI to advisory mode. Do not enable release-blocking CI or save a baseline until a human has reviewed current findings. +5. Always parse `agents-shipgate-reports/report.json`, not Markdown. Use `release_decision.decision` as the release signal. +6. Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. +7. Ensure `.gitignore` covers `agents-shipgate-reports/` before committing. + +## Fast Paths + +- First adoption: run `agents-shipgate detect --workspace . --json`, then follow `references/recipes.md`. +- Existing manifest: run `agents-shipgate scan -c shipgate.yaml --suggest-patches --format json`. +- First GitHub CI: copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. +- Explain one finding: run `agents-shipgate explain-finding --from agents-shipgate-reports/report.json --json`. +- Triage heuristic findings: run `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`. + +## Boundaries + +- Do not claim a finding is fixed without re-running `agents-shipgate scan` and reporting the new decision/counts. +- Do not silently suppress findings. Suppressions require a non-empty `reason`. +- Do not commit generated reports. +- Do not edit the upstream `agents-shipgate` check implementation unless the user is working in the Agents Shipgate repo itself. diff --git a/adoption-kits/codex-skill/agents/openai.yaml b/adoption-kits/codex-skill/agents/openai.yaml new file mode 100644 index 00000000..36ccfb2f --- /dev/null +++ b/adoption-kits/codex-skill/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Agents Shipgate" + short_description: "Run Tool-Use Readiness release gates" + default_prompt: "Use $agents-shipgate to add a Tool-Use Readiness release gate to this agent repo." + +policy: + allow_implicit_invocation: true diff --git a/adoption-kits/codex-skill/assets/advisory-pr-comment.yml b/adoption-kits/codex-skill/assets/advisory-pr-comment.yml new file mode 100644 index 00000000..89f5408d --- /dev/null +++ b/adoption-kits/codex-skill/assets/advisory-pr-comment.yml @@ -0,0 +1,26 @@ +# Advisory PR comment. +# Recommended starting point: runs the scanner on every PR, posts a summary +# comment, uploads the report as an artifact, and never fails the job. +name: Agents Shipgate (advisory) + +on: + pull_request: + +permissions: + contents: read + pull-requests: write + +jobs: + shipgate: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: ThreeMoonsLab/agents-shipgate@v{{ shipgate_version }} + with: + ci_mode: advisory + diff_base: target + pr_comment: 'true' + shipgate_version: '{{ shipgate_version }}' diff --git a/adoption-kits/codex-skill/references/recipes.md b/adoption-kits/codex-skill/references/recipes.md new file mode 100644 index 00000000..4391584c --- /dev/null +++ b/adoption-kits/codex-skill/references/recipes.md @@ -0,0 +1,108 @@ +# Agents Shipgate Recipes + +Use these recipes after the `agents-shipgate` skill triggers. + +## Decide Relevance + +Run: + +```bash +AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json +``` + +Proceed when any of these are true: + +- `is_agent_project: true` +- `suggested_sources` is non-empty +- `codex_plugin_candidates` is non-empty +- `shipgate.yaml` already exists +- the user explicitly asked for a Shipgate scan or Tool-Use Readiness gate + +Stop only when all signals are absent and the user did not explicitly request Shipgate. + +## Bootstrap A Repo + +Run: + +```bash +AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json +AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate contract --json +AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate init --workspace . --write --ci --json +AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate scan -c shipgate.yaml --suggest-patches --format json +AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate apply-patches \ + --from agents-shipgate-reports/report.json \ + --confidence high --apply +``` + +If `init` reports placeholders, replace `CHANGE_ME` values from repo context before scanning. If `shipgate.yaml` already exists, edit it rather than overwriting it. + +## First-Time CI + +Use advisory mode only. Copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. + +Do not switch to release-blocking behavior in the same task. Strict promotion requires human review, suppressions with reasons, and optionally a saved baseline. + +## Fix Top Finding + +1. Read `agents-shipgate-reports/report.json`. +2. Pick the first blocker, then highest-severity review item. +3. If `findings[].agent_action == "auto_apply"` and a high-confidence patch exists, apply it with `apply-patches --confidence high --apply`. +4. For policy/evidence gaps, propose the exact human decision needed. Do not fabricate approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. +5. Re-run scan and report the new `release_decision.decision`, blocker count, and review item count. + +## Recommend Fixes + +Group active findings by action: + +- `auto_apply`: safe mechanical patches. +- `propose_patch_for_review`: show patch, leave final decision to user. +- `escalate_to_human`: policy/evidence decision. +- `suppress_with_reason`: only when the user confirms the finding is intentionally accepted. +- `informational`: summarize, no gate action. + +## Explain A Finding + +Run: + +```bash +agents-shipgate explain-finding \ + --from agents-shipgate-reports/report.json --json +``` + +Use the returned deterministic `explanation` for PR comments or chat replies. Keep it to 3-5 sentences and include the tool name, release risk, and next action. + +## Triage False Positives + +Prefer fixing the manifest or policy evidence over suppression. Suppress only with a specific reason: + +```yaml +checks: + ignore: + - check_id: SHIP-CHECK-ID + tool: tool.name + reason: specific accepted-risk rationale +``` + +## Promote Advisory To Strict + +Only after humans review advisory output: + +```bash +agents-shipgate baseline save -c shipgate.yaml --out .agents-shipgate/baseline.json +agents-shipgate scan -c shipgate.yaml \ + --baseline .agents-shipgate/baseline.json \ + --ci-mode strict --fail-on critical,high +``` + +The promoted gate should fail only on new findings above the selected threshold. + +## Upgrade Shipgate + +Update the GitHub Action tag and `shipgate_version` together. Re-run: + +```bash +agents-shipgate contract --json +agents-shipgate scan -c shipgate.yaml --suggest-patches --format json +``` + +If schema or decision fields changed, use `docs/agent-contract-current.md` from the installed version or upstream repo. diff --git a/adoption-kits/codex-skill/references/report-reading.md b/adoption-kits/codex-skill/references/report-reading.md new file mode 100644 index 00000000..237875f0 --- /dev/null +++ b/adoption-kits/codex-skill/references/report-reading.md @@ -0,0 +1,52 @@ +# Reading Agents Shipgate Reports + +Always read `agents-shipgate-reports/report.json`. Do not scrape Markdown. + +## Order + +1. `release_decision.decision`: `blocked`, `review_required`, `insufficient_evidence`, or `passed`. +2. `release_decision.blockers[]`: items blocking release. +3. `release_decision.review_items[]`: accepted debt or human-review items. +4. `agent_summary`: one-fetch summary with `headline`, counts, safe patches, human-review needs, and `first_recommended_action`. +5. `findings[]`: detailed evidence, source, severity, and remediation. + +## Per-Finding Action + +Prefer `findings[].agent_action` when present: + +- `auto_apply`: safe to apply only when a high-confidence patch exists. +- `propose_patch_for_review`: show patch and ask for review. +- `escalate_to_human`: policy/evidence decision. +- `suppress_with_reason`: suppress only after explicit user confirmation. +- `informational`: summarize only. + +Do not synthesize an action from lower-level fields when `agent_action` exists. + +## Manual-Review Boundary + +Never auto-assert these categories: + +- approval policy +- confirmation policy +- idempotency evidence +- broad-scope permission decisions +- prohibited-action policy decisions +- runtime trace evidence + +For those, summarize the risk and the exact decision a human needs to make. + +## Summary Template + +Report back with: + +```text +Decision: +Blockers: +Review items: +Safe patches applied: +Needs human review: +Top findings: +1. +``` + +If `privacy_audit` is present, mention that default report redaction ran. If `insufficient_evidence` appears, treat it as review-required unless the user has stricter release policy. diff --git a/docs/agents/use-with-claude-code.md b/docs/agents/use-with-claude-code.md index 6453691c..55660fbc 100644 --- a/docs/agents/use-with-claude-code.md +++ b/docs/agents/use-with-claude-code.md @@ -44,6 +44,20 @@ cp /path/to/agents-shipgate/.claude/commands/shipgate.md .claude/commands/shipga cp -r /path/to/agents-shipgate/skills/agents-shipgate .claude/skills/agents-shipgate ``` +The `agents-shipgate init --agent-instructions=claude-code-skill` renderer can +also use repo-local overrides without rebuilding the package: + +```yaml +schema_version: 1 +targets: + claude-code-skill: + overrides_dir: .agents-shipgate/adoption-kit/claude-code-skill +``` + +Files in that directory are relative to `.claude/skills/agents-shipgate/`. +The default config path is `.agents-shipgate/adoption-kit.yaml`; override it +with `--agent-instructions-kit `. + ## Verify Open Claude Code in the project. Two checks: diff --git a/docs/agents/use-with-codex.md b/docs/agents/use-with-codex.md index f8f7bcd1..3b280f18 100644 --- a/docs/agents/use-with-codex.md +++ b/docs/agents/use-with-codex.md @@ -30,6 +30,24 @@ agents-shipgate init --workspace . --write --agent-instructions=all The `codex-skill` target writes `.agents/skills/agents-shipgate/`. It is idempotent and safe to rerun; user-edited skill files are not overwritten. +## Customize Generated Skill Content + +The installed package ships the default skill content offline. A downstream +repo can override selected files without patching the wheel by adding +`.agents-shipgate/adoption-kit.yaml`: + +```yaml +schema_version: 1 +targets: + codex-skill: + overrides_dir: .agents-shipgate/adoption-kit/codex-skill +``` + +Files under the override directory are relative to +`.agents/skills/agents-shipgate/`, for example `SKILL.md`, +`references/recipes.md`, or `assets/advisory-pr-comment.yml`. Pass a +different config path with `--agent-instructions-kit `. + ## Verify Open Codex in the project and run two checks: diff --git a/pyproject.toml b/pyproject.toml index 3d7a33cc..da993975 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,6 +111,7 @@ packages = ["src/agents_shipgate"] "AGENTS.md" = "agents_shipgate/_meta/AGENTS.md" "STABILITY.md" = "agents_shipgate/_meta/STABILITY.md" "docs/triggers.json" = "agents_shipgate/_meta/triggers.json" +"adoption-kits" = "agents_shipgate/_adoption_kits" [tool.hatch.build.targets.sdist] exclude = [ diff --git a/src/agents_shipgate/cli/_register_init.py b/src/agents_shipgate/cli/_register_init.py index 11aeee33..a9b81fa1 100644 --- a/src/agents_shipgate/cli/_register_init.py +++ b/src/agents_shipgate/cli/_register_init.py @@ -18,6 +18,10 @@ apply_agent_instructions, parse_selector, ) +from agents_shipgate.cli.discovery.agent_instructions.adoption_kit import ( + AdoptionKitError, + load_adoption_kit_config, +) from agents_shipgate.cli.discovery.agent_instructions.targets import SPECS as _AI_SPECS from agents_shipgate.cli.discovery.placeholders import collect_placeholders from agents_shipgate.schemas.diagnostics import NextAction @@ -82,6 +86,16 @@ def init( "decisions; this flag only emits advisory guidance." ), ), + agent_instructions_kit: Path | None = typer.Option( + None, + "--agent-instructions-kit", + help=( + "Optional repo-local adoption-kit YAML config for file-tree " + "agent-instruction targets. Relative paths resolve under " + "--workspace. When omitted, init auto-discovers " + ".agents-shipgate/adoption-kit.yaml." + ), + ), ) -> None: """Draft a starter shipgate.yaml from a workspace. @@ -197,6 +211,35 @@ def init( "Inspect the template, then re-run with --write to commit it." ) + kit_config = None + if agent_instructions_kit is not None or requested_targets is not None: + try: + kit_config = load_adoption_kit_config( + workspace_resolved, + agent_instructions_kit, + ) + except AdoptionKitError as exc: + path = str(exc.path or agent_instructions_kit or workspace_resolved) + typer.echo(str(exc), err=True) + _emit_agent_mode_error( + "config_error", + path=path, + message=str(exc), + next_action=f"Edit {path}", + next_actions=[ + NextAction( + kind="edit", + path=path, + why=str(exc), + expects=( + "Adoption-kit config uses schema_version: 1 " + "and each overrides_dir resolves under the workspace." + ), + ).model_dump(mode="json") + ], + ) + raise typer.Exit(2) from exc + # Manifest action — orthogonal to --ci. Track outcome instead of # exiting immediately so --ci can still run when the manifest exists. manifest_status = "not_attempted" @@ -235,7 +278,10 @@ def init( agent_instructions_targets: list[object] = [] if requested_targets is not None: ai_result = apply_agent_instructions( - workspace_resolved, requested_targets, write=write + workspace_resolved, + requested_targets, + write=write, + kit_config=kit_config, ) agent_instructions_outcome = ai_result.to_json() agent_instructions_exit = ai_result.exit_code diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/adoption_kit.py b/src/agents_shipgate/cli/discovery/agent_instructions/adoption_kit.py new file mode 100644 index 00000000..1297898d --- /dev/null +++ b/src/agents_shipgate/cli/discovery/agent_instructions/adoption_kit.py @@ -0,0 +1,494 @@ +"""File-backed adoption-kit rendering for repo-scoped skill bundles.""" + +from __future__ import annotations + +import hashlib +import json +import re +from dataclasses import dataclass +from importlib.resources import files +from pathlib import Path +from typing import Any, Literal + +import yaml + +from agents_shipgate import __version__ + +DEFAULT_CONFIG_RELATIVE_PATH = ".agents-shipgate/adoption-kit.yaml" +KIT_METADATA_FILENAME = ".agents-shipgate-kit-metadata.json" +SIDECAR_FILENAME = ".agents-shipgate-kit.json" +KIT_CONFIG_SCHEMA_VERSION = 1 +KIT_SIDECAR_SCHEMA_VERSION = 1 + +KitSource = Literal["bundled", "local_override", "bundled_plus_local_override"] + +_TEMPLATE_RE = re.compile(r"\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}") + + +class AdoptionKitError(ValueError): + """Raised when an adoption-kit config or override tree is invalid.""" + + def __init__(self, message: str, *, path: Path | None = None) -> None: + super().__init__(message) + self.path = path + + +@dataclass(frozen=True) +class KitTarget: + name: str + target_root: str + bundled_dir: str + + +KIT_TARGETS: dict[str, KitTarget] = { + "codex-skill": KitTarget( + name="codex-skill", + target_root=".agents/skills/agents-shipgate", + bundled_dir="codex-skill", + ), + "claude-code-skill": KitTarget( + name="claude-code-skill", + target_root=".claude/skills/agents-shipgate", + bundled_dir="claude-code-skill", + ), +} + + +@dataclass(frozen=True) +class AdoptionKitConfig: + """Validated downstream override configuration.""" + + path: Path + source_id: str + target_overrides: dict[str, Path] + + +@dataclass(frozen=True) +class RenderedAdoptionKit: + """Rendered target tree plus provenance for JSON output and sidecars.""" + + target: str + files: dict[str, str] + root_files: dict[str, str] + kit_source: KitSource + kit_source_id: str + + +@dataclass(frozen=True) +class KitSidecar: + target: str + kit_source: str + kit_source_id: str + writer_version: str + file_hashes: dict[str, str] + + +def load_adoption_kit_config( + workspace: Path, config_path: Path | None = None +) -> AdoptionKitConfig | None: + """Load the optional repo-local adoption-kit override config. + + Relative ``config_path`` values are resolved against ``workspace`` so + ``--workspace other-repo --agent-instructions-kit .agents-shipgate/...`` + stays repo-local. + """ + + workspace = workspace.resolve() + if config_path is None: + raw_path = workspace / DEFAULT_CONFIG_RELATIVE_PATH + if not raw_path.exists(): + return None + else: + raw_path = config_path if config_path.is_absolute() else workspace / config_path + symlink = first_symlink_in_chain(raw_path, workspace) + if symlink is not None: + raise AdoptionKitError( + f"{symlink} is a symlink; refusing to read adoption-kit config.", + path=raw_path, + ) + path = raw_path.resolve() + _ensure_under_workspace(path, workspace) + if not path.is_file(): + raise AdoptionKitError(f"Adoption-kit config not found: {path}", path=path) + + try: + raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {} + except (OSError, UnicodeDecodeError, yaml.YAMLError) as exc: + raise AdoptionKitError( + f"Could not read adoption-kit config {path}: {exc}", + path=path, + ) from exc + if not isinstance(raw, dict): + raise AdoptionKitError( + f"Adoption-kit config {path} must be a YAML mapping.", + path=path, + ) + if raw.get("schema_version") != KIT_CONFIG_SCHEMA_VERSION: + raise AdoptionKitError( + "Adoption-kit config " + f"{path} must set schema_version: {KIT_CONFIG_SCHEMA_VERSION}.", + path=path, + ) + targets_raw = raw.get("targets") or {} + if not isinstance(targets_raw, dict): + raise AdoptionKitError( + f"Adoption-kit config {path} field 'targets' must be a mapping.", + path=path, + ) + + target_overrides: dict[str, Path] = {} + for target, target_raw in targets_raw.items(): + if target not in KIT_TARGETS: + raise AdoptionKitError( + f"Adoption-kit config {path} has unknown target {target!r}.", + path=path, + ) + if not isinstance(target_raw, dict): + raise AdoptionKitError( + f"Adoption-kit config {path} target {target!r} must be a mapping.", + path=path, + ) + overrides_value = target_raw.get("overrides_dir") + if not isinstance(overrides_value, str) or not overrides_value.strip(): + raise AdoptionKitError( + f"Adoption-kit config {path} target {target!r} must set overrides_dir.", + path=path, + ) + overrides_path = workspace / overrides_value + symlink = first_symlink_in_chain(overrides_path, workspace) + if symlink is not None: + raise AdoptionKitError( + f"{symlink} is a symlink; refusing to read adoption-kit overrides.", + path=path, + ) + resolved_overrides = overrides_path.resolve() + _ensure_under_workspace(resolved_overrides, workspace) + if not resolved_overrides.is_dir(): + raise AdoptionKitError( + f"Adoption-kit overrides_dir not found for {target!r}: " + f"{resolved_overrides}", + path=path, + ) + target_overrides[target] = resolved_overrides + + return AdoptionKitConfig( + path=path, + source_id=path.relative_to(workspace).as_posix(), + target_overrides=target_overrides, + ) + + +def render_adoption_kit( + target: str, config: AdoptionKitConfig | None = None +) -> RenderedAdoptionKit: + """Render bundled content plus any validated local overrides.""" + + spec = _target(target) + bundled_files = _read_bundled_files(spec) + override_root = config.target_overrides.get(target) if config else None + override_files = _read_override_files(override_root) if override_root else {} + merged_files = {**bundled_files, **override_files} + root_files = { + rel: _render_template(text) + for rel, text in sorted(merged_files.items(), key=lambda item: item[0]) + } + files = { + f"{spec.target_root}/{rel}": text + for rel, text in root_files.items() + } + if not override_files: + kit_source: KitSource = "bundled" + kit_source_id = f"bundled:{spec.bundled_dir}" + elif set(bundled_files).issubset(override_files): + kit_source = "local_override" + kit_source_id = f"local:{config.source_id}:{target}" if config else f"local:{target}" + else: + kit_source = "bundled_plus_local_override" + kit_source_id = ( + f"bundled:{spec.bundled_dir}+local:{config.source_id}:{target}" + if config + else f"bundled:{spec.bundled_dir}+local:{target}" + ) + return RenderedAdoptionKit( + target=target, + files=files, + root_files=root_files, + kit_source=kit_source, + kit_source_id=kit_source_id, + ) + + +def render_bundle_text( + target: str, config: AdoptionKitConfig | None = None +) -> str: + """Return a human-readable dry-run rendering of a full kit target.""" + + chunks: list[str] = [] + for path, text in render_adoption_kit(target, config).files.items(): + chunks.append(f"--- {path} ---\n{text.rstrip()}\n") + return "\n".join(chunks) + + +def prior_render_hashes(target: str) -> dict[str, tuple[str, ...]]: + """Return import-compatible prior hashes keyed by workspace-relative path.""" + + return _metadata_hashes(target, "prior_render_sha256") + + +def bootstrap_legacy_hashes(target: str) -> dict[str, tuple[str, ...]]: + """Return pre-sidecar bootstrap hashes keyed by workspace-relative path.""" + + return _metadata_hashes(target, "bootstrap_legacy_sha256") + + +def build_sidecar(rendered: RenderedAdoptionKit) -> dict[str, object]: + """Build the sidecar JSON payload for a rendered kit target.""" + + return { + "schema_version": KIT_SIDECAR_SCHEMA_VERSION, + "target": rendered.target, + "kit_source": rendered.kit_source, + "kit_source_id": rendered.kit_source_id, + "writer_version": __version__, + "file_hashes": _root_file_hashes(rendered.root_files), + } + + +def parse_sidecar(path: Path) -> KitSidecar | None: + """Parse a sidecar if present; return ``None`` for missing/invalid.""" + + if not path.is_file(): + return None + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except (OSError, UnicodeDecodeError, json.JSONDecodeError): + return None + if not isinstance(raw, dict): + return None + if raw.get("schema_version") != KIT_SIDECAR_SCHEMA_VERSION: + return None + target = raw.get("target") + kit_source = raw.get("kit_source") + kit_source_id = raw.get("kit_source_id") + writer_version = raw.get("writer_version") + file_hashes = raw.get("file_hashes") + if not all( + isinstance(value, str) + for value in (target, kit_source, kit_source_id, writer_version) + ): + return None + if not isinstance(file_hashes, dict) or not all( + isinstance(key, str) and isinstance(value, str) + for key, value in file_hashes.items() + ): + return None + return KitSidecar( + target=str(target), + kit_source=str(kit_source), + kit_source_id=str(kit_source_id), + writer_version=str(writer_version), + file_hashes=dict(file_hashes), + ) + + +def sidecar_text(rendered: RenderedAdoptionKit) -> str: + return json.dumps(build_sidecar(rendered), indent=2, sort_keys=True) + "\n" + + +def root_relative_path(target: str, workspace_relative_path: str) -> str: + """Convert a rendered workspace-relative path to a kit-root path.""" + + spec = _target(target) + prefix = f"{spec.target_root}/" + if not workspace_relative_path.startswith(prefix): + raise ValueError( + f"{workspace_relative_path!r} is not under target root {spec.target_root!r}" + ) + return workspace_relative_path.removeprefix(prefix) + + +def first_symlink_in_chain(path: Path, workspace: Path) -> Path | None: + """Return the first existing symlink under ``workspace``. + + Paths outside ``workspace`` return ``None`` here; callers do their own + containment checks and should report those as containment errors, not + symlink errors. + """ + + workspace_real = workspace.resolve() + try: + relative_parts = path.relative_to(workspace_real).parts + except ValueError: + try: + relative_parts = path.resolve().relative_to(workspace_real).parts + except ValueError: + return None + cur = workspace_real + for part in relative_parts: + cur = cur / part + if cur.is_symlink(): + return cur + if not cur.exists(): + return None + return None + + +def _target(target: str) -> KitTarget: + try: + return KIT_TARGETS[target] + except KeyError as exc: + raise AdoptionKitError(f"Unknown adoption-kit target {target!r}.") from exc + + +def _read_bundled_files(spec: KitTarget) -> dict[str, str]: + root = _bundled_target_root(spec) + if root is None: + raise AdoptionKitError( + f"Bundled adoption kit {spec.bundled_dir!r} is not available." + ) + return _read_tree(root) + + +def _bundled_target_root(spec: KitTarget) -> Any | None: + try: + bundled = files("agents_shipgate") / "_adoption_kits" / spec.bundled_dir + if bundled.is_dir(): + return bundled + except (ModuleNotFoundError, FileNotFoundError): + pass + + here = Path(__file__).resolve().parent + for parent in [here, *here.parents]: + candidate = parent / "adoption-kits" / spec.bundled_dir + if candidate.is_dir(): + return candidate + return None + + +def _read_tree(root: Any) -> dict[str, str]: + output: dict[str, str] = {} + + def walk(node: Any, prefix: str = "") -> None: + for child in sorted(node.iterdir(), key=lambda item: item.name): + rel = f"{prefix}{child.name}" + if child.name == KIT_METADATA_FILENAME: + continue + if child.is_dir(): + walk(child, f"{rel}/") + elif child.is_file(): + output[rel] = child.read_text(encoding="utf-8") + + walk(root) + return output + + +def _read_override_files(root: Path) -> dict[str, str]: + output: dict[str, str] = {} + for path in sorted(root.rglob("*")): + if path.is_dir(): + continue + if path.name == SIDECAR_FILENAME: + raise AdoptionKitError( + f"Adoption-kit overrides may not define {SIDECAR_FILENAME}: {path}", + path=path, + ) + if first_symlink_in_chain(path, root) is not None or path.is_symlink(): + raise AdoptionKitError( + f"{path} is a symlink; refusing to read adoption-kit override.", + path=path, + ) + resolved = path.resolve() + _ensure_under_workspace(resolved, root.resolve()) + try: + rel = resolved.relative_to(root.resolve()).as_posix() + except ValueError as exc: # pragma: no cover - guarded above + raise AdoptionKitError( + f"Adoption-kit override escapes its override root: {path}", + path=path, + ) from exc + try: + output[rel] = path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError) as exc: + raise AdoptionKitError( + f"Could not read adoption-kit override {path}: {exc}", + path=path, + ) from exc + return output + + +def _metadata_hashes(target: str, field: str) -> dict[str, tuple[str, ...]]: + spec = _target(target) + raw = _read_metadata(spec).get(field) or {} + if not isinstance(raw, dict): + return {} + output: dict[str, tuple[str, ...]] = {} + for root_rel, hashes in raw.items(): + if not isinstance(root_rel, str) or not isinstance(hashes, list): + continue + safe_hashes = tuple(value for value in hashes if isinstance(value, str)) + if safe_hashes: + output[f"{spec.target_root}/{root_rel}"] = safe_hashes + return output + + +def _read_metadata(spec: KitTarget) -> dict[str, Any]: + root = _bundled_target_root(spec) + if root is None: + return {} + metadata = root / KIT_METADATA_FILENAME + if not metadata.is_file(): + return {} + try: + raw = json.loads(metadata.read_text(encoding="utf-8")) + except (OSError, UnicodeDecodeError, json.JSONDecodeError): + return {} + return raw if isinstance(raw, dict) else {} + + +def _render_template(text: str) -> str: + context = {"shipgate_version": __version__} + + def replace(match: re.Match[str]) -> str: + return context.get(match.group(1), match.group(0)) + + return _TEMPLATE_RE.sub(replace, text) + + +def _root_file_hashes(files: dict[str, str]) -> dict[str, str]: + return { + rel: hashlib.sha256(content.encode("utf-8")).hexdigest() + for rel, content in sorted(files.items()) + } + + +def _ensure_under_workspace(path: Path, workspace: Path) -> None: + try: + path.relative_to(workspace) + except ValueError as exc: + raise AdoptionKitError( + f"Adoption-kit path {path} resolves outside workspace {workspace}.", + path=path, + ) from exc + + +__all__ = [ + "AdoptionKitConfig", + "AdoptionKitError", + "DEFAULT_CONFIG_RELATIVE_PATH", + "KIT_CONFIG_SCHEMA_VERSION", + "KIT_SIDECAR_SCHEMA_VERSION", + "KIT_TARGETS", + "KitSource", + "SIDECAR_FILENAME", + "RenderedAdoptionKit", + "bootstrap_legacy_hashes", + "build_sidecar", + "first_symlink_in_chain", + "load_adoption_kit_config", + "parse_sidecar", + "prior_render_hashes", + "render_adoption_kit", + "render_bundle_text", + "root_relative_path", + "sidecar_text", +] diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/apply.py b/src/agents_shipgate/cli/discovery/agent_instructions/apply.py index c8181ebe..f704942d 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/apply.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/apply.py @@ -18,7 +18,7 @@ CLI; refused to downgrade. - ``skipped_ambiguous`` — multiple/mismatched markers; refused to guess. - ``skipped_user_modified`` — cursor MDC file content does not match any - shipped render, or a Codex skill file was edited by the user; refused to + shipped render, or a skill-bundle file was edited by the user; refused to overwrite. - ``skipped_symlink`` — the host path is a symlink; refused to follow it outside the workspace. @@ -37,30 +37,33 @@ from __future__ import annotations import hashlib -from collections.abc import Callable, Iterable +from collections.abc import Iterable from dataclasses import dataclass from pathlib import Path +from agents_shipgate.cli.discovery.agent_instructions.adoption_kit import ( + SIDECAR_FILENAME, + AdoptionKitConfig, + KitSource, + bootstrap_legacy_hashes, + build_sidecar, + parse_sidecar, + render_adoption_kit, + root_relative_path, + sidecar_text, +) from agents_shipgate.cli.discovery.agent_instructions.managed_block import ( UpsertStatus, upsert, ) -from agents_shipgate.cli.discovery.agent_instructions.renderers import ( - claude_code_skill as claude_code_skill_renderer, -) -from agents_shipgate.cli.discovery.agent_instructions.renderers import ( - codex_skill as codex_skill_renderer, -) from agents_shipgate.cli.discovery.agent_instructions.renderers import ( cursor as cursor_renderer, ) from agents_shipgate.cli.discovery.agent_instructions.renderers import ( render_agents_md, render_claude_code_skill_bundle_text, - render_claude_code_skill_files, render_claude_md, render_codex_skill_bundle_text, - render_codex_skill_files, render_cursor_file, render_pr_template, ) @@ -128,6 +131,7 @@ class TargetOutcome: message: str = "" rendered: str | None = None # populated only on dry-run files: list[dict[str, str]] | None = None # populated for file-tree targets + kit_source: KitSource | None = None # populated for adoption-kit file-tree targets def to_json(self) -> dict[str, object]: payload: dict[str, object] = { @@ -142,6 +146,8 @@ def to_json(self) -> dict[str, object]: payload["rendered"] = self.rendered if self.files is not None: payload["files"] = list(self.files) + if self.kit_source is not None: + payload["kit_source"] = self.kit_source return payload @property @@ -172,15 +178,15 @@ def to_json(self) -> dict[str, object]: # --- rendering ------------------------------------------------------------- -def _rendered_inner(name: str) -> str: +def _rendered_inner(name: str, kit_config: AdoptionKitConfig | None = None) -> str: if name == "agents-md": return render_agents_md() if name == "claude-md": return render_claude_md() if name == "codex-skill": - return render_codex_skill_bundle_text() + return render_codex_skill_bundle_text(kit_config) if name == "claude-code-skill": - return render_claude_code_skill_bundle_text() + return render_claude_code_skill_bundle_text(kit_config) if name == "pr-template": return render_pr_template() if name == "cursor": @@ -188,7 +194,12 @@ def _rendered_inner(name: str) -> str: raise ValueError(f"unknown target {name!r}") # pragma: no cover - guarded by selector -def render_targets(workspace: Path, requested: Iterable[str]) -> list[TargetOutcome]: +def render_targets( + workspace: Path, + requested: Iterable[str], + *, + kit_config: AdoptionKitConfig | None = None, +) -> list[TargetOutcome]: """Pure rendering pass for dry-run output. Does not read existing files.""" workspace = workspace.resolve() outcomes: list[TargetOutcome] = [] @@ -198,17 +209,23 @@ def render_targets(workspace: Path, requested: Iterable[str]) -> list[TargetOutc # symlink at the host path and report a path outside the workspace, # which would mislead callers in the dry-run JSON. path = workspace / spec.relative_path + rendered_kit = ( + render_adoption_kit(name, kit_config) + if spec.is_file_tree + else None + ) outcomes.append( TargetOutcome( name=name, path=str(path), status="would_render", - rendered=_rendered_inner(name), + rendered=_rendered_inner(name, kit_config), files=( - _file_payload(workspace, _FILE_TREE_RENDERERS[name]()) + _file_payload(workspace, rendered_kit.files) if spec.is_file_tree else None ), + kit_source=rendered_kit.kit_source if rendered_kit else None, ) ) return outcomes @@ -407,29 +424,19 @@ def _apply_cursor(path: Path, workspace: Path) -> TargetOutcome: ) -_FILE_TREE_RENDERERS: dict[str, Callable[[], dict[str, str]]] = { - "codex-skill": render_codex_skill_files, - "claude-code-skill": render_claude_code_skill_files, -} - -_FILE_TREE_MODULES = { - "codex-skill": codex_skill_renderer, - "claude-code-skill": claude_code_skill_renderer, -} - - def _apply_file_tree( name: str, path: Path, workspace: Path, - render_fn: Callable[[], dict[str, str]], - prior_sha: dict[str, tuple[str, ...]], + kit_config: AdoptionKitConfig | None, ) -> TargetOutcome: - rendered_files = render_fn() + rendered = render_adoption_kit(name, kit_config) + rendered_files = rendered.files target_paths = {rel: workspace / rel for rel in rendered_files} files = _file_payload(workspace, rendered_files) + sidecar_path = path / SIDECAR_FILENAME - for target in target_paths.values(): + for target in [*target_paths.values(), sidecar_path]: symlink = _first_symlink_in_chain(target, workspace) if symlink is not None: return TargetOutcome( @@ -437,6 +444,7 @@ def _apply_file_tree( path=str(path), status="skipped_symlink", files=files, + kit_source=rendered.kit_source, message=( f"{symlink} is a symlink; refusing to follow it. " "Replace the symlink with a regular file or directory before " @@ -447,6 +455,17 @@ def _apply_file_tree( existing: list[str] = [] missing: list[str] = [] prior_version: list[str] = [] + sidecar = parse_sidecar(sidecar_path) + if sidecar is not None and sidecar.target != name: + sidecar = None + expected_sidecar = build_sidecar(rendered) + expected_hashes = expected_sidecar["file_hashes"] + sidecar_current = ( + sidecar is not None + and sidecar.file_hashes == expected_hashes + and sidecar.kit_source_id == rendered.kit_source_id + ) + legacy_sha = bootstrap_legacy_hashes(name) for rel, content in rendered_files.items(): target = target_paths[rel] if not target.exists(): @@ -459,13 +478,16 @@ def _apply_file_tree( path=str(path), status="skipped_user_modified", files=files, + kit_source=rendered.kit_source, message=f"{target} exists but is not a regular file; refusing to overwrite.", ) current = target.read_text(encoding="utf-8") if current == content: continue current_sha = hashlib.sha256(current.encode("utf-8")).hexdigest() - if current_sha in prior_sha.get(rel, ()): + root_rel = root_relative_path(name, rel) + sidecar_hash = sidecar.file_hashes.get(root_rel) if sidecar else None + if current_sha == sidecar_hash or current_sha in legacy_sha.get(rel, ()): prior_version.append(rel) continue return TargetOutcome( @@ -473,6 +495,7 @@ def _apply_file_tree( path=str(path), status="skipped_user_modified", files=files, + kit_source=rendered.kit_source, message=( f"{target} does not match a shipped Agents Shipgate {name} file; " "refusing to overwrite user edits. Edit the file manually or remove " @@ -484,6 +507,9 @@ def _apply_file_tree( target = target_paths[rel] target.parent.mkdir(parents=True, exist_ok=True) target.write_text(content, encoding="utf-8") + sidecar_path.parent.mkdir(parents=True, exist_ok=True) + if not sidecar_current: + sidecar_path.write_text(sidecar_text(rendered), encoding="utf-8") if not existing: status = "created_file_tree" @@ -497,6 +523,9 @@ def _apply_file_tree( elif missing: status = "updated" message = f"Repaired missing {name} skill file(s) under {path}" + elif not sidecar_current: + status = "migrated" + message = f"Recorded managed {name} skill bundle metadata at {path}" else: status = "unchanged" message = f"{name} skill bundle already current at {path}" @@ -505,6 +534,7 @@ def _apply_file_tree( path=str(path), status=status, files=files, + kit_source=rendered.kit_source, message=message, ) @@ -517,7 +547,11 @@ def _file_payload(workspace: Path, files: dict[str, str]) -> list[dict[str, str] def apply_agent_instructions( - workspace: Path, requested: Iterable[str], *, write: bool + workspace: Path, + requested: Iterable[str], + *, + write: bool, + kit_config: AdoptionKitConfig | None = None, ) -> ApplyResult: """Apply the per-target decision tree against ``workspace``. @@ -530,7 +564,7 @@ def apply_agent_instructions( if not write: return ApplyResult( requested=requested_list, - targets=render_targets(workspace, requested_list), + targets=render_targets(workspace, requested_list, kit_config=kit_config), ) outcomes: list[TargetOutcome] = [] @@ -574,11 +608,7 @@ def apply_agent_instructions( if spec.is_file_tree: outcomes.append( - _apply_file_tree( - name, path, workspace, - _FILE_TREE_RENDERERS[name], - _FILE_TREE_MODULES[name].PRIOR_RENDER_SHA256, - ) + _apply_file_tree(name, path, workspace, kit_config) ) elif name == "cursor": outcomes.append(_apply_cursor(path, workspace)) diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/__init__.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/__init__.py index 71ca6c76..f318d5b8 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/__init__.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/__init__.py @@ -5,8 +5,10 @@ returns the full file body since we own the whole file; the Codex skill renderer returns a repo-scoped file tree. -Content is sourced from ``docs/target-repo-agent-snippets.md``. A snapshot -test enforces parity so the doc and renderers cannot drift independently. +Managed-block content is sourced from ``docs/target-repo-agent-snippets.md``. +File-tree skill bundles are sourced from packaged ``adoption-kits/`` files. +Snapshot tests enforce parity so authored content and renderers cannot drift +independently. """ from __future__ import annotations diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_code_skill.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_code_skill.py index 469e2550..221ddb9b 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_code_skill.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_code_skill.py @@ -1,980 +1,22 @@ -"""Render the repo-scoped Claude Code skill bundle. - -The canonical checked-in copy lives under ``skills/agents-shipgate``. -This renderer deliberately keeps a hard-coded copy so the installed wheel can -generate the skill without relying on repository files being present. -Snapshot tests keep the two copies in sync. -""" +"""Render the repo-scoped Claude Code skill bundle from packaged kit files.""" from __future__ import annotations -from agents_shipgate import __version__ - - -def render_files() -> dict[str, str]: - """Return relative file path -> UTF-8 text for the Claude Code skill bundle.""" - return { - ".claude/skills/agents-shipgate/SKILL.md": _SKILL_MD, - ".claude/skills/agents-shipgate/prompts/add-shipgate-to-repo.md": _ADD_SHIPGATE_MD, - ".claude/skills/agents-shipgate/prompts/decide-shipgate-relevance.md": _DECIDE_RELEVANCE_MD, - ".claude/skills/agents-shipgate/prompts/explain-finding-to-user.md": _EXPLAIN_FINDING_MD, - ".claude/skills/agents-shipgate/prompts/fix-top-finding.md": _FIX_TOP_FINDING_MD, - ".claude/skills/agents-shipgate/prompts/recommend-fixes.md": _RECOMMEND_FIXES_MD, - ".claude/skills/agents-shipgate/prompts/stabilize-strict-mode.md": _STABILIZE_STRICT_MD, - ".claude/skills/agents-shipgate/prompts/triage-false-positive.md": _TRIAGE_FP_MD, - ".claude/skills/agents-shipgate/prompts/upgrade-shipgate-version.md": _UPGRADE_VERSION_MD, - ".claude/skills/agents-shipgate/ci-recipes/advisory-pr-comment.yml": _ADVISORY_CI_YML, - } - - -def render_bundle_text() -> str: - """Return a human-readable dry-run rendering of the full bundle.""" - chunks: list[str] = [] - for path, text in render_files().items(): - chunks.append(f"--- {path} ---\n{text.rstrip()}\n") - return "\n".join(chunks) - - -# SHA-256 hashes of every prior render, keyed by bundle-relative file path. -# When a rendered file changes after the first shipped Claude Code skill -# release, move that file's previous current-render hash into this dict so -# `init --agent-instructions=claude-code-skill --write` can safely migrate -# v(N-1) files. Leave the dict empty while there is no prior shipped Claude -# Code skill bundle. -PRIOR_RENDER_SHA256: dict[str, tuple[str, ...]] = { - ".claude/skills/agents-shipgate/SKILL.md": ( - "b17c53d9905f46b196be38e98cf71e53da6779e3a4f426ecff14f2b0f238aba9", - ), -} - -_ACTION_VERSION = __version__ - - -_SKILL_MD = """\ ---- -name: agents-shipgate -description: Use when the user wants to add a local-first, static Tool-Use Readiness release gate for an AI agent's tool surface, run agents-shipgate scans, fix or triage Shipgate findings, add Shipgate to CI, or interpret a shipgate report. Triggers on phrases like "add shipgate", "release readiness for my agent", "tool-use readiness", "scan my agent", "shipgate scan", "shipgate.yaml", "agents-shipgate-reports/report.json", "fix shipgate finding". ---- - -# agents-shipgate skill - -`agents-shipgate` is a local-first, static Tool-Use Readiness release gate for AI agent tool surfaces. It analyzes `shipgate.yaml` plus tool sources (MCP exports, OpenAPI specs, OpenAI Agents SDK Python files, Anthropic Messages API artifacts, Google ADK files, LangChain/LangGraph files, CrewAI files, OpenAI API artifacts, Codex plugin packages and marketplaces, n8n workflow JSON) and emits deterministic findings as Markdown, JSON, and SARIF. - -It does **not** run agents, call tools, invoke LLMs, connect to MCP servers, or send telemetry by default. Static analysis only; audited exceptions are pinned in `tests/test_adapter_static_only.py::ALLOWED_EXCEPTIONS`. - -> The skill name is intentionally `agents-shipgate` (not `shipgate`) so it does not collide with the `/shipgate` slash command shipped at `.claude/commands/shipgate.md` — Claude Code lets a skill with the same name preempt a command, which would bypass the bootstrap flow the slash command is meant to deliver. - -## When to use this skill - -- The user asks to add Tool-Use Readiness or pre-merge checks to an agent project. -- The repo already has `shipgate.yaml` or `agents-shipgate-reports/report.json`. -- The user asks to fix, triage, suppress, or explain a Shipgate finding. -- The user wants to add Shipgate to CI (GitHub Actions, GitLab CI, CircleCI). - -## When NOT to use this skill - -- Generic linting / type checking — use the project's existing tooling. -- Runtime monitoring, evals, or behavioral testing — Shipgate is static-only. -- LLM output quality assessment — out of scope. -- Editing `agents-shipgate`'s own check implementations — that's upstream-repo work, not user-repo work. - -## How to act - -Pick the matching task and follow the linked recipe verbatim. Recipes are bundled inside this skill so behavior is pinned to the installed version and works offline. Each prompt is self-contained: install commands, exit codes, and `AGENTS_SHIPGATE_AGENT_MODE=1` error handling are in the prompt itself. - -| Task | Recipe | -|---|---| -| Decide whether Shipgate should run at all (apply `docs/triggers.json` against the PR) | [`prompts/decide-shipgate-relevance.md`](prompts/decide-shipgate-relevance.md) | -| Bootstrap a repo (install, init, scan, report) | [`prompts/add-shipgate-to-repo.md`](prompts/add-shipgate-to-repo.md) | -| Add Shipgate to CI for the first time (advisory, PR comment) | See "First-time CI setup" below; copy [`ci-recipes/advisory-pr-comment.yml`](ci-recipes/advisory-pr-comment.yml) | -| Fix the highest-severity finding | [`prompts/fix-top-finding.md`](prompts/fix-top-finding.md) | -| Recommend fixes across all active findings | [`prompts/recommend-fixes.md`](prompts/recommend-fixes.md) | -| Explain a single finding in user-facing prose (3–5 sentences for a PR comment / chat reply) | [`prompts/explain-finding-to-user.md`](prompts/explain-finding-to-user.md); pair with `agents-shipgate explain-finding --from agents-shipgate-reports/report.json --json` | -| Triage a suspected false positive | [`prompts/triage-false-positive.md`](prompts/triage-false-positive.md) | -| Promote advisory CI to strict CI (assumes advisory is already running) | [`prompts/stabilize-strict-mode.md`](prompts/stabilize-strict-mode.md) | -| Upgrade agents-shipgate version | [`prompts/upgrade-shipgate-version.md`](prompts/upgrade-shipgate-version.md) | - -Always: - -1. Set `AGENTS_SHIPGATE_AGENT_MODE=1` so errors emit a `next_action` JSON line on stderr. -2. Parse `agents-shipgate-reports/report.json` (stable contract), not the markdown. -3. Confirm with the user before any command that writes files (`init --write`, `baseline save`). - -## First-time CI setup (advisory) - -If the user has no Shipgate CI yet, default to **advisory** mode — never strict, never with a baseline. The promotion path comes later, only after findings have been reviewed. - -1. Confirm the repo has `shipgate.yaml` and a clean local scan (`agents-shipgate scan -c shipgate.yaml --ci-mode advisory` exits `0`). If not, run the bootstrap recipe first. -2. Create `.github/workflows/agents-shipgate.yml` from [`ci-recipes/advisory-pr-comment.yml`](ci-recipes/advisory-pr-comment.yml). It runs on every pull request, posts a summary comment, uploads the report as an artifact, and never fails the job. -3. Confirm `permissions: pull-requests: write` is acceptable to the user before committing — required for the PR comment. -4. Push and open a test PR. Verify the agents-shipgate comment appears. -5. **Stop here.** Promotion to strict mode is a separate task — only run [`prompts/stabilize-strict-mode.md`](prompts/stabilize-strict-mode.md) after the user has reviewed the advisory output and decided which findings they accept. - -For non-GitHub CI (GitLab, CircleCI, Jenkins, Azure Pipelines, Buildkite, Bitbucket, pre-commit) refer to https://github.com/ThreeMoonsLab/agents-shipgate/tree/main/examples or `docs/integrations.md` in the upstream repo. Always start in advisory mode. - -## Stable contracts (rely on these) - -- **CLI surface** is frozen for `0.x` — see https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md. -- **Installed CLI contract**: when available, run `agents-shipgate contract --json` to verify local schema versions, `release_decision.decision`, and manual-review signal fields. Older installs should use [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) or upgrade before automating against the local contract command. -- **Report JSON**: `report_schema_version: "0.20"`. Read `release_decision.decision` first for release gating; use `agent_summary` / `findings[].agent_action` for agent routing and `reviewer_summary` for the human-review entry point. To filter findings by source reliability, use `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`; it reads `findings[].provenance_kind` (v0.15+) as a reviewer triage signal only, independent of `confidence` and never as a gate input. Do not gate on `summary.status`; it is legacy and baseline-blind. The full field list lives in [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md#read-these-first-for-release-gating), and reports validate against [`docs/report-schema.v0.20.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/report-schema.v0.20.json). -- **Release Evidence Packet**: `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` with the `[pdf]` extras) is emitted alongside the report by default. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.6.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/packet-schema.v0.6.json) (latest; v0.6 adds the top-level `evidence_matrix` compact review section AND `ReleaseDecisionItem.{source, policy_evidence_source}` for reviewer-grade dual-source provenance over the v0.5 baseline). See [STABILITY.md §Release Evidence Packet](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md#release-evidence-packet-v06). Use the packet for reviewer-shaped output; use the report for finding details. -- **Single source of truth for the contract**: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). When the schema bumps, that file updates first. -- **Exit codes**: `0` pass, `2` config error, `3` parse error, `4` other error, `20` strict-mode gate failure. -- **Check IDs** (e.g. `SHIP-POLICY-APPROVAL-MISSING`) are stable; new ones may be added but existing ones will not be renamed or repurposed. - -## Boundaries (do not violate) - -- Do not claim a finding is fixed without re-running `agents-shipgate scan` and showing the diff in counts. -- Do not silently suppress findings — `checks.ignore` requires a `reason` and the manifest validator rejects empty reasons. -- Do not commit `agents-shipgate-reports/` — it's regenerated each run; add it to `.gitignore`. -- Do not run `agents-shipgate baseline save` until the user has reviewed the initial findings; baselining ratchets in noise. -- Do not enable strict CI as the first CI step. Always start advisory. -- Do not modify checks in `agents-shipgate`'s own source — that's upstream repo work. - -## If something errors out - -Set `AGENTS_SHIPGATE_AGENT_MODE=1` and re-run. The CLI appends a JSON line to stderr with `{error, message, next_action}`. Follow the `next_action`. The error kinds emitted by the current CLI: - -| Error kind | Fix | -|---|---| -| `config_error` | Manifest is missing, malformed, or fails validation. Common cause: no `shipgate.yaml` yet — run `agents-shipgate init --workspace . --write`. | -| `config_already_exists` | `init --write` was run with an existing `shipgate.yaml`. Edit the file in place or remove it before re-running. | -| `input_parse_error` | A file referenced from the manifest (`tool_sources[].path`, baseline, policy pack) is missing, malformed, or resolves outside the manifest directory. Correct the path. | -| `unknown_check_id` | The check ID passed to `explain` does not exist. Run `agents-shipgate list-checks --json` to enumerate. | -| `other_error` / `internal_error` | Unexpected failure. Re-run with `--verbose` and include the output if filing an issue. | - -For deeper troubleshooting see https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/troubleshooting.md. -""" - - -_ADD_SHIPGATE_MD = """\ -# Prompt · Add Agents Shipgate to a repo - -You are working in a repo that may contain an AI agent — likely one of: an MCP server tool list (`*mcp*.json` or `.agents-shipgate/*.json`), an OpenAPI spec the agent calls, a Codex plugin package (`.codex-plugin/plugin.json`) or marketplace (`.agents/plugins/marketplace.json`), a Python file with `@function_tool` / `@tool` decorators (OpenAI Agents SDK, LangChain, CrewAI), a Google ADK agent in `agent.py`, an Anthropic Messages API artifact set under `prompts/`/`tools/anthropic-tools.json`/`policies/anthropic-policy.yaml`, or an OpenAI API artifact set under `prompts/`/`tools/openai-tools.json`/`openai-config.json`. - -Your job is to drive the canonical 4-call flow end-to-end in one tool-using -turn, which adds a local-first, static Tool-Use Readiness release gate. - -## Your task - -1. **Install the tool:** - ```bash - pipx install agents-shipgate - ``` - If `pipx` is unavailable, use `python -m pip install agents-shipgate` and verify with `agents-shipgate --version`. - -2. **Sanity-check the install** before touching the user's code: - ```bash - agents-shipgate self-check --json - ``` - Confirm `"ready": true`. If not, surface the failure to the user. - - When available, verify the installed CLI contract locally: - ```bash - agents-shipgate contract --json - ``` - Read `report_schema_version`, `packet_schema_version`, `gating_signal`, and - `manual_review_signals[]`; prefer these local values over stale docs. If the - command is not recognized on an older install, continue after `self-check` - using [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) - and upgrade before relying on local contract verification in automation. - -3. **Detect:** - ```bash - agents-shipgate detect --workspace . --json - ``` - Read the response: `is_agent_project`, `frameworks[]` (per-framework score + evidence + candidate files), `agent_name_candidates[]`, `suggested_sources[]` (MCP/OpenAPI files matched by glob). - - **Stop only when ALL of these hold:** `is_agent_project: false`, `suggested_sources` is empty, `codex_plugin_candidates` is empty, no `shipgate.yaml` already exists in the workspace, AND the user did not explicitly request a scan. Otherwise proceed — MCP/OpenAPI tool-surface repos and Codex plugin package repos register as `is_agent_project: false` because they have no Python framework imports, but they are valid Shipgate targets. MCP/OpenAPI hits surface as `suggested_sources`; Codex plugin hits surface as `codex_plugin_candidates`. - -4. **Generate a starter manifest + GitHub Actions workflow:** - ```bash - agents-shipgate init --workspace . --write --ci --json - ``` - The `--json` form returns: - - `manifest_status`: `"written"` | `"skipped_existing"` | `"not_attempted"` - - `workflow.status` (with `--ci`): `"written"` | `"skipped_existing_target"` | `"skipped_cross_reference"` - - `placeholders[]` — entries the template intentionally left as `CHANGE_ME` because no high-confidence signal was available - - `auto_detected.agent_name` — the value the manifest carries (`null` when the template fell back to `CHANGE_ME`) - - `--ci` writes `.github/workflows/agents-shipgate.yml` orthogonally to `--write`. Each gets its own overwrite-refusal check; existing workflows that already call `ThreeMoonsLab/agents-shipgate` skip with a distinct `cross_reference_path`. - -5. **Replace placeholders.** Walk `placeholders[]` from the JSON output. On a fresh workspace the template typically leaves two: - - `agent.name: CHANGE_ME` — replace with the agent's actual role (no strong `Agent(name="…")` literal was found in the source). - - `agent.declared_purpose[]: CHANGE_ME` — replace with a one-line description of what the agent should do (auto-init can't infer this; the schema requires a non-empty value). - - Read the agent's prompt or main file to derive both. Skipping this leaves an invalid adoption artifact — the manifest validates but downstream consumers see meaningless defaults. - -6. **Run the scan with patch suggestions:** - ```bash - agents-shipgate scan -c shipgate.yaml --suggest-patches --format json --ci-mode advisory - ``` - The report lands at `agents-shipgate-reports/report.json`. The Release Evidence Packet lands at `agents-shipgate-reports/packet.{md,json,html}`. Parse `report.json`; Codex plugin facts, when present, live under `codex_plugin_surface`. - - **Read these first for release gating (v0.8+):** - - `release_decision.decision` ∈ `{"blocked", "review_required", "insufficient_evidence", "passed"}` — baseline-aware. This is the gating signal. `insufficient_evidence` (v0.14+) fires when evidence coverage is degraded past threshold; treat unknown future values as `review_required`. - - `release_decision.{reason, blockers, review_items, fail_policy.would_fail_ci}` - - **Read these for release review (v0.9+):** - - `capability_facts[]`, `declared_intentions[]`, `misalignments[]`, `release_consequence`, `suggested_scenarios[]` - - **Per-finding fields:** - - `check_id`, `severity`, `category`, `tool_name`, `recommendation`, `suppressed` - - `autofix_safe`, `requires_human_review`, `suggested_patch_kind`, `docs_url` (v0.7+) - - `patches[]` (only with `--suggest-patches`) — each has `kind` ∈ `{set_pointer, append_pointer, remove_pointer, manual}` plus `confidence` + `target_file` + etc. for non-manual kinds. - - **Top-level:** `manifest_dir` (absolute path of the manifest's directory — used by `apply-patches` for the containment check). `summary.{status, critical_count, high_count, medium_count}` is preserved for v0.7 callers and is baseline-blind — do not gate on `summary.status` for new consumers. Full contract: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). - -7. **Apply the safe patches:** - ```bash - agents-shipgate apply-patches --from agents-shipgate-reports/report.json --confidence high --apply --json - ``` - Default `--confidence high` only mutates patches whose `confidence` field is `"high"`. Today that's the 3 stale-manifest removals. Scope-coverage appends ship at `medium` and require explicit `--confidence medium` to apply. ManualPatches are never auto-applied. - - **Decision tree** for walking the report: - ``` - for finding in active_findings: - if finding.suggested_patch_kind in ("manual", "none"): - surface_to_user(finding) # Surface; do NOT auto-apply. - continue - if finding.autofix_safe is True: - plan_to_apply(finding) # Will be applied at --confidence high. - continue - surface_for_medium_review(finding) # Medium-confidence — opt-in only. - ``` - - Trace findings (`SHIP-API-TRACE-{APPROVAL,CONFIRMATION}-MISSING`) are permanent ManualPatch by policy. Implement the runtime gate; never edit the trace recording — that patches the evidence, not the agent. See [`docs/autofix-policy.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/autofix-policy.md) for the full classification. - -8. **Add `agents-shipgate-reports/` to `.gitignore`** if it isn't already. The reports are scan artifacts, not source. - -9. **Report back to the user**: - - `release_decision.decision` and `release_decision.reason` (the gating signal — baseline-aware, v0.8+) - - Blocker / review-item counts (`len(release_decision.blockers)` / `len(release_decision.review_items)`) - - The path to the Release Evidence Packet (`agents-shipgate-reports/packet.md`) for reviewer-shaped output - - The top 3 active critical/high findings (use `report.json`, not stdout) - - Which patches were applied (count from `apply-patches --json` output's `files`) - - Any check IDs the user should investigate first — link to `docs_url` from the finding for full rationale, or use `agents-shipgate explain --json` for the same content via CLI - -## What to do if the scan errors out - -Set `AGENTS_SHIPGATE_AGENT_MODE=1` and re-run. The CLI will append a JSON line to stderr with `{error, message, next_action}`. Follow the `next_action`. - -Common errors and fixes: - -| Error | Fix | -|---|---| -| `Config file not found: shipgate.yaml` | Run `agents-shipgate init --workspace . --write` first | -| `Input path '...' resolves outside manifest directory` | The declared `tool_sources[].path` is outside the manifest dir. Move the spec inside the tree, symlink it, or copy it | -| `Invalid shipgate.yaml: ... Did you mean X?` | A field is at the wrong nesting level; move it as suggested | -| `Containment violation` (apply-patches exit 5) | A patch's `target_file` resolved outside `report.manifest_dir`. Re-run scan to refresh; never patch arbitrary system files | - -## What NOT to do - -- Do **not** commit `agents-shipgate-reports/` — it's regenerated each run. -- Do **not** run `agents-shipgate baseline save` until the user has reviewed the initial findings. Baselining ratchets in noise that strict CI will silently ignore. The right time to baseline is **after** the user has decided which findings they accept. -- Do **not** suppress findings without a real `reason` — the manifest validator rejects empty reasons, and the `reason` field is the audit trail when someone asks "why is this OK?" -- Do **not** use `risk_overrides.tools.{tool}.remove_tags` to silence a finding without checking whether the heuristic is actually wrong. Prefer `checks.ignore` with a reason. -- Do **not** edit a trace recording to flip `approved` or `confirmed` — implement the runtime gate instead. - -## Verification before reporting success - -- `agents-shipgate-reports/report.json` exists and parses as JSON -- `report.json` carries `report_schema_version: "0.11"` (or higher) and a non-empty `manifest_dir` -- `report.json` carries a non-null `release_decision.decision` — this is the field to surface to the user -- `shipgate.yaml` has no `CHANGE_ME` values (comments containing the literal `CHANGE_ME` are informational and OK) -- `.gitignore` contains `agents-shipgate-reports/` (or equivalent) -- If `--ci` ran with `workflow.status: "written"`: `.github/workflows/agents-shipgate.yml` exists and references `ThreeMoonsLab/agents-shipgate@v…` -- The user knows the top 3 findings and at least one suggested next step -""" - - -_DECIDE_RELEVANCE_MD = """\ -# Prompt · Decide whether Agents Shipgate is relevant - -You are working in a repo or reviewing a PR and need to decide whether -to propose Agents Shipgate as the next step. The other prompts in -[`prompts/`](https://github.com/ThreeMoonsLab/agents-shipgate/tree/main/prompts) -assume relevance is already established — this one runs **before** that -decision and tells you yes or no with a rationale. - -The decision is fully data-driven: it does not depend on prose-reading. -[`docs/triggers.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/triggers.json) is the machine-readable -mirror of the AGENTS.md trigger table; you fetch (or read) it and apply -the rules to the changed file list. - -## Your task - -1. **Identify the changed file set.** Repo-relative, forward slashes: - - PR context: `git diff --name-only origin/main...HEAD` - - Working tree: `git status --short` (uncommitted) - - User-pasted diff: parse `diff --git a/ b/` headers - -2. **Fetch the trigger catalog.** Either: - - **Local repo** (already adopted Shipgate): read `docs/triggers.json` directly. - - **Remote** (target repo without Shipgate): fetch - `https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/triggers.json`. - - The catalog has `schema_version: "0.1"` and is stable for `0.x`. - -3. **Apply the rules.** Two equivalent options: - - **Option A — read the JSON yourself.** Walk `rules[]`. For each rule, - evaluate `rule.when` against the changed file list **and** the unified - diff body — several rules use `diff_contains` predicates (e.g. - `@function_tool`) that a path-only listing cannot satisfy. The - predicate vocabulary is documented in `triggers.json` under - `predicate_vocabulary`; the action precedence is in - `action_precedence`. See the decision tree below. - - **Option B — call the bundled evaluator** (when Shipgate is installed). - Use the `--git-diff` flag so paths AND diff body come from git in one - call; piping `git diff --name-only` alone causes `diff_contains` rules - (decorators, framework tokens, Action URL) to silently never fire: - ```bash - agents-shipgate self-check --json # confirm install - python -m agents_shipgate.triggers \\ - --git-diff origin/main...HEAD --json - ``` - For uncommitted changes pass `--git-diff` with no revspec — that - runs `git diff HEAD` (covers BOTH staged and unstaged tracked - changes) plus `git ls-files --others --exclude-standard` to add - untracked file paths. Untracked files contribute paths only; their - content is not in `diff_text`, so `diff_contains` rules won't fire - on a brand-new file until you `git add` it (or pass `--diff-text` - manually). If your repo already has a manifest, also pass - `--manifest-present` so the `force_run` rule can fire. - The output shape is `{run_shipgate, dry_run_recommended, - matched_rules, stop_conditions_fired, rationale, schema_version}`. - -4. **Emit the decision.** Always reply in this exact JSON shape so - downstream automation can parse you: - - ```json - { - "run_shipgate": true, - "dry_run_recommended": false, - "matched_rules": [ - {"id": "TRIGGER-MCP-EXPORT-CHANGED", "action": "run_shipgate"} - ], - "rationale": "PR adds tools/mcp-export.json; MCP exports are a tool surface." - } - ``` - -5. **Then act.** - - If `run_shipgate: true` and Shipgate is installed: - ```bash - agents-shipgate detect --workspace . --json - ``` - Then follow [`prompts/add-shipgate-to-repo.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/prompts/add-shipgate-to-repo.md) - for the canonical 4-call flow. - - If `run_shipgate: true` and Shipgate is **not** installed: install - it (`pipx install agents-shipgate`) and run `detect`. If the user - prefers a zero-install first step, point them at the GitHub Action - (`ThreeMoonsLab/agents-shipgate@v0.10.0`) instead. - - If `run_shipgate: false` and `dry_run_recommended: true`: propose - a non-mutating scan only — never propose `init --write` based on a - dry-run match alone. Phrase it as "X may have shifted the tool - surface; want me to run a read-only scan against the existing - manifest?" If there's no existing manifest, surface the - `matched_rules` and let the user choose. - - If `run_shipgate: false` and `dry_run_recommended: false`: **do - not propose Shipgate.** Recommend whatever the actual review need - is (lint, type check, unit test, security scan). Mentioning - Shipgate when no rule matched is noise. - -## Decision tree (when reading `triggers.json` by hand) - -``` -For each changed file path AND the unified diff body: - For each rule in triggers.rules: - if rule.when matches → record (rule.id, rule.action) - -stop_fired := every clause in triggers.stop_conditions holds - (requires running detect first; if you haven't, treat as false) - -# Action precedence (highest first), see triggers.json:action_precedence: -if stop_fired: → run = false -elif any action == "force_run": → run = true (manifest present) -elif any action == "skip_shipgate": → run = false (skip beats run) -elif any action == "run_shipgate": → run = true -elif any action == "dry_run": → run = false, dry_run_recommended = true -else: → run = false (no rules matched) -``` - -Why `skip_shipgate` beats `run_shipgate`: a brittle `diff_contains` match -(e.g. `@tool` mentioned in README prose) should not override the explicit -"this is a docs-only PR with no tool surface impact" signal. - -Why `force_run` overrides `skip_shipgate`: an existing `shipgate.yaml` is -the operational opt-in; even a docs-only PR in such a repo gets scanned -because the cost is low (advisory) and tool-adjacent prose changes can -matter. - -## What NOT to do - -- Do **not** propose Shipgate based on filename guesses ("looks like an - AI agent"). The trigger catalog is the source of truth — if no rule - matches, the answer is no. -- Do **not** silently fall back to "yes, run it" when you can't fetch - `triggers.json`. Surface the fetch failure to the user and ask. -- Do **not** invent rule IDs in the output. Every entry in - `matched_rules` must come from `triggers.json`. -- Do **not** treat the **negative control** ("update docs only") as a - reason to propose Shipgate. The `TRIGGER-DOCS-ONLY-NEGATIVE` rule - fires `skip_shipgate` for a reason — and it covers test-only PRs - too, not just `*.md`. -- Do **not** propose `agents-shipgate init --write` on a `dry_run`-only - match. `dry_run_recommended: true` justifies a non-mutating `scan` - against an existing manifest, nothing more. -- Do **not** rely on bare `--git-diff` for brand-new untracked files - to fire `diff_contains` rules. Bare flag covers tracked changes - (staged + unstaged) and untracked file *paths*, but not untracked - file *content*. `git add` first, or pass `--diff-text` explicitly. - -## Verification before reporting - -- Output is valid JSON with the keys `run_shipgate`, - `dry_run_recommended`, `matched_rules`, `rationale`. -- Every `matched_rules[].id` exists in the loaded `triggers.json`. -- If `run_shipgate: true`, the next-step command is named. -- If `run_shipgate: false` AND `dry_run_recommended: true`, exactly - one Shipgate command appears (a non-mutating `scan` against an - existing manifest) — never `init --write`. -- If `run_shipgate: false` AND `dry_run_recommended: false`, no - Shipgate command appears anywhere in your reply. -""" - - -_EXPLAIN_FINDING_MD = """\ -# Prompt · Explain a single Agents Shipgate finding to a user - -You need to translate one `report.json` finding into prose for a human -who has never read the Shipgate docs. Companion to `agents-shipgate -explain-finding `, which gives you the structured payload -you'll quote. - -This is for the moment when an agent has run a scan, identified the top -finding (via `agent_summary.first_recommended_action.why` or by walking -`findings[]`), and now has to summarize it for a PR comment, chat -reply, or commit message. The user shouldn't have to follow a doc link -to understand what's going on. - -## Your task - -1. **Get the fingerprint.** Read it from `agent_summary.first_recommended_action.why` if that names a `check_id` and tool, then look up the matching `findings[].fingerprint` in `report.json`. Otherwise pick the highest-severity active finding (`critical > high > medium > low`) and read `fingerprint` directly from that entry. - -2. **Run `explain-finding` to get the structured payload.** - ```bash - agents-shipgate explain-finding \\ - --from agents-shipgate-reports/report.json --json - ``` - The output carries: - - `check_id`, `title`, `severity`, `category` — what the check is. - - `tool_name`, `tool_id` — the affected tool (may be null for manifest-level checks). - - `evidence` — the structured evidence the check captured. - - `recommendation` — the check author's verbatim suggested fix. - - `agent_action` — `auto_apply | propose_patch_for_review | escalate_to_human | suppress_with_reason | informational`. - - `metadata` — full `CheckMetadata` (rationale, fires_when, evidence_fields, docs_url) when the check is in the catalog. - - `explanation` — a deterministic 3–5-sentence prose summary you can quote verbatim or rewrite. - -3. **Write the prose for the user.** Three to five sentences, in this order: - 1. **What.** Name the check (`check_id` is fine), the affected tool (`tool_name`), and the severity in one sentence. If the check has no `tool_name`, name what the check examined (e.g. "the manifest", "permissions"). - 2. **Why it matters.** Pull from `metadata.rationale` or `metadata.fires_when`. If neither exists, paraphrase the `recommendation`. Avoid verbatim verbose catalog text — translate "limited automation review" into plain English. - 3. **What you'll do (or want).** Map `agent_action` to a concrete next step: - - `auto_apply`: "I can apply the fix automatically — say yes and I'll run `apply-patches --confidence high --apply`." - - `propose_patch_for_review`: "There's a suggested patch but the confidence is medium/low (or there's a manual sibling). Want me to show the diff before applying?" - - `escalate_to_human`: "There's no automatic fix. Here's the recommended remediation: [paraphrase recommendation]. Want me to draft the change for you to review?" - - `suppress_with_reason`: "If you want to accept this risk, I can add a suppression with reason. What should the reason say?" - - `informational`: "No action needed; flagging for awareness." - 4. *(Optional)* **Where to learn more.** If `metadata.docs_url` exists, link it. - 5. *(Optional)* **Suppression status.** If `suppressed` is true, mention that — otherwise omit. - -4. **Cite evidence sparingly.** Only quote a specific evidence value when it makes the explanation concrete (e.g. naming the broken parameter, the file path from `source.ref`). Do not dump the whole `evidence` dict. - -5. **Format for the surface.** PR comments and chat support markdown — use a code span for `check_id` and `tool_name`. Plain text emails should drop the backticks but keep the structure. - -## Example - -Input (from `explain-finding fp_f092940f62fbb012 --from ... --json`): -```json -{ - "check_id": "SHIP-POLICY-APPROVAL-MISSING", - "severity": "critical", - "tool_name": "stripe.create_refund", - "agent_action": "escalate_to_human", - "recommendation": "Declare an approval policy or remove the tool.", - "metadata": { - "rationale": "High-risk actions need explicit approval before promotion.", - "fires_when": "Financial/destructive risk exists without approval policy." - } -} -``` - -Good prose for a PR comment: - -> The Tool-Use Readiness scan flagged a critical issue: `stripe.create_refund` doesn't declare an approval policy in `shipgate.yaml`. High-risk actions like refunds need an explicit human approval gate before they can ship — without one, an agent could trigger a refund on its own without review. There's no automatic fix here. The right remediation is to either add `policies.require_approval_for_tools: [stripe.create_refund]` (with a reviewer-visible approval trace) or remove the tool from this release surface. Want me to draft the manifest change for you? - -Bad prose for the same input: - -> Finding `fp_f092940f62fbb012`: `SHIP-POLICY-APPROVAL-MISSING` fired with severity `critical` on `stripe.create_refund`. autofix_safe=false, requires_human_review=true. evidence: risk_tags=[financial_action, destructive]. recommendation: "Declare an approval policy or remove the tool." +from agents_shipgate.cli.discovery.agent_instructions import adoption_kit as _adoption_kit -The bad version is true but unreadable — it dumps the JSON instead of translating it. +TARGET = "claude-code-skill" -## What NOT to do -- Do **not** quote the structured `explanation` field verbatim if it's robotic. It's a deterministic baseline; rewrite for tone when needed. -- Do **not** fabricate consequences. If the check's `rationale` doesn't say "could trigger a refund," don't say it. Stay grounded in catalog text. -- Do **not** propose `apply-patches` for `escalate_to_human` findings — the user has to decide on the fix manually. -- Do **not** propose adding a `checks.ignore` entry as the default response. Suppression is a real choice, but it's the last resort and needs an audit-trail-quality reason. Use the [`triage-false-positive.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/prompts/triage-false-positive.md) prompt for that workflow. -- Do **not** include the fingerprint string in the user-facing prose unless they specifically asked for it. Fingerprints are agent-to-agent identifiers, not human-friendly labels. - -## Verification before sending the message - -- The user-facing prose names the affected tool (or what the check examined) at least once. -- The severity is mentioned somewhere (a word like "critical" or "medium-severity" — not just the JSON token). -- The action sentence matches the finding's `agent_action`. If the message says "I'll apply this automatically," `agent_action` must be `auto_apply`. -- No raw JSON dumps in the prose — translate, don't quote. -- If `metadata.docs_url` exists, include it (or link text equivalent). -""" - - -_FIX_TOP_FINDING_MD = """\ -# Prompt · Fix the top Agents Shipgate finding - -You are working in a repo with `shipgate.yaml` already in place. Run a scan and fix the highest-severity unsuppressed finding. - -## Your task - -1. **Run a scan and locate the top finding.** - ```bash - agents-shipgate scan -c shipgate.yaml --ci-mode advisory - ``` - Read `agents-shipgate-reports/report.json`. For v0.12+ reports the easy path is `agent_summary.first_recommended_action.why` — for most `blocked`/`review_required` verdicts it names the top finding's `check_id` and `tool_name` directly. Three exceptions to expect: - - - **`insufficient_evidence` verdict** (v0.14+; the scan saw too many low-confidence tools or 4+ source warnings to gate release). There is no specific finding to fix; the action's `why` describes the evidence situation and recommends gathering deeper sources (MCP/OpenAPI inputs, eval traces, additional source files). Follow that guidance instead of looking for a top finding. - - **Evidence-coverage-driven `review_required`** (sub-threshold low-confidence/static evidence; no specific finding to fix). The action's `why` describes the evidence situation and recommends gathering MCP/OpenAPI inputs or eval traces — there is no `check_id` to parse out. If you see "low-confidence evidence" or "static-only" in the why-text, follow that guidance instead of looking for a top finding. - - **`auto_appliable_patches > 0`**. The action proposes `apply-patches`; the why-text names the apply-patches command, not a specific finding. Walk `findings[]` for the actual top entry. - - Fall back to picking the entry with the highest severity (`critical > high > medium > low > info`) and `"suppressed": false` whenever the action doesn't name a finding directly. - -2. **Look up the check definition.** - ```bash - agents-shipgate explain --json - ``` - This returns the `CheckMetadata` with `description`, `rationale`, `fires_when`, `evidence_fields`, `recommendation`. - -3. **Diagnose the fix.** There are exactly four legitimate responses to a finding. v0.12+ reports project the routing via `agent_action`: - - | Response | When | `agent_action` (v0.12+) | - |---|---|---| - | **Add the missing policy / scope / annotation** to `shipgate.yaml` | The check is correct; the manifest just hadn't declared the safeguard yet | `propose_patch_for_review` (a `set_pointer`/`append_pointer` patch is attached) or `escalate_to_human` (no patch — you write the entry by hand) | - | **Override the heuristic** via `risk_overrides.tools.{tool}.{tags,remove_tags}` | The risk classification is wrong (e.g. a GET endpoint that picked up the `destructive` tag because of a misleading operationId) | `escalate_to_human` | - | **Suppress the finding** via `checks.ignore` with a `reason` | The check is correct but you've decided to accept the risk explicitly (e.g. "tool deprecated 2026-Q2") | `escalate_to_human` (the future `suppress_with_reason` value is reserved for checks that pre-classify themselves as suppressible) | - | **Fix the underlying tool definition** | The tool spec itself is wrong (missing description, broad scope, free-form action field) | `escalate_to_human` | - -4. **Apply the fix.** Edit either `shipgate.yaml` or the tool source file. Do not delete tools wholesale to silence findings. - -5. **Re-scan and confirm the count went down.** - ```bash - agents-shipgate scan -c shipgate.yaml --ci-mode advisory - ``` - The previously-failing fingerprint should be gone from `report.json`. - -6. **Report back**: - - What was the original finding (check ID, tool, severity) - - Which of the four response types you used - - The diff to `shipgate.yaml` (or other file) you applied - - The new finding count - -## Common fixes by check ID - -| Check | Typical fix | -|---|---| -| `SHIP-POLICY-APPROVAL-MISSING` | Add the tool to `policies.require_approval_for_tools` with a reason | -| `SHIP-POLICY-CONFIRMATION-MISSING` | Add the tool to `policies.require_confirmation_for_tools` | -| `SHIP-SIDEFX-IDEMPOTENCY-MISSING` | Add an `idempotency_key` parameter, set `idempotentHint: true` annotation, or list under `policies.require_idempotency_for_tools` | -| `SHIP-AUTH-MISSING-SCOPE` | Declare the scope on the tool (in OpenAPI security or MCP metadata) and in `permissions.scopes` | -| `SHIP-AUTH-MANIFEST-BROAD-SCOPE` | Replace `*` / `admin` with the specific operation scope(s) | -| `SHIP-DOC-MISSING-DESCRIPTION` | Add a 20+ char description to the tool definition | -| `SHIP-SCHEMA-BROAD-FREE-TEXT` | Constrain the parameter with an enum, structured schema, or narrower fields | -| `SHIP-SCHEMA-MISSING-BOUNDS` | Add `maximum` to the numeric parameter | -| `SHIP-INVENTORY-LOW-CONFIDENCE-PRODUCTION-SURFACE` | Declare the tools through MCP/OpenAPI for higher-confidence inventory; or move target to staging | - -## What NOT to do - -- Do not blanket-suppress an entire check. Suppressions are per-tool unless the check is genuinely irrelevant for this repo. -- Do not write `reason: "false positive"` without explanation. Reviewers should be able to read the reason and understand the decision in 60 seconds. -- Do not edit `agents-shipgate-reports/`. It's regenerated each run. - -## Verification - -- The previously-failing finding's fingerprint is no longer present in `report.json` -- The fix is committed in a single, focused diff (manifest change + reason) -- If you used `checks.ignore`, the `reason` is concrete (a date, a ticket link, or "tool deprecated; see roadmap") -""" - - -_RECOMMEND_FIXES_MD = """\ -# Prompt · Recommend fixes for active Agents Shipgate findings - -You are working in a repo with `shipgate.yaml` already in place and want a coordinated remediation pass across **all** active findings — not just the top one. Walk every finding, classify it against the current autofix policy, and surface targeted fix recommendations. Apply only the safe, high-confidence patches (after preview + explicit confirmation); leave the rest for human review with concrete advice. - -## Your task - -1. **Always run a fresh v0.8+ scan with patches.** Do not reuse a stale report — earlier scans may be pre-v0.7 (no remediation fields), pre-v0.8 (no `release_decision`), or may lack `patches[]` (no `--suggest-patches`). Set `AGENTS_SHIPGATE_AGENT_MODE=1` so errors emit a `next_action` JSON line on stderr. - ```bash - AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate scan -c shipgate.yaml \\ - --suggest-patches --format json --ci-mode advisory - ``` - Read `agents-shipgate-reports/report.json`. Verify `report_schema_version` is `"0.8"` or higher. Filter `findings[]` to entries with `"suppressed": false`. - -2. **Bucket each active finding into one of four classes.** Read `agent_action` (v0.12+; deterministic projection of patches/autofix/human-review fields) to bucket each active finding directly. If `agent_action` is missing (older v0.11 or earlier reports), fall back to the legacy three-field check shown in the right column. The buckets correspond to [`docs/autofix-policy.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/autofix-policy.md): - - | Bucket | `agent_action` (v0.12+) | Legacy fallback (v0.11 or earlier) | Example check IDs | - |---|---|---|---| - | **A. Safe auto-fix** | `auto_apply` | `autofix_safe == true` | `SHIP-MANIFEST-STALE-{SUPPRESSION,POLICY,RISK-OVERRIDE}` when the match is unique | - | **B. Medium-confidence config fix** | `propose_patch_for_review` | `autofix_safe == false` AND `suggested_patch_kind` ∈ `{set_pointer, append_pointer, remove_pointer}` | `SHIP-AUTH-SCOPE-COVERAGE-MISSING` | - | **C. Manual** | `escalate_to_human` (with `suggested_patch_kind == "manual"`) | `suggested_patch_kind == "manual"` | Documentation, schema bounds, owner gaps, ADK/LangChain/CrewAI metadata, and the never-auto-fix trace findings | - | **D. No patch emitted** | `escalate_to_human` (with `suggested_patch_kind == "none"`) | `suggested_patch_kind == "none"` | The generator emitted nothing — but the finding can still be high/critical (e.g. low-confidence inventory). Treat as **human triage**, not informational. | - | (skip) | `informational` | `suppressed == true` | Already-suppressed findings; show counts only. | - - For one-fetch counts read the top-level `agent_summary` block (v0.12+): - `agent_summary.auto_appliable_patches` is the bucket-A count, and - `agent_summary.needs_human_review` is buckets B + C + D combined - (every active finding the user must weigh in on before applying — - medium/low-confidence patches AND escalations). To split bucket B - from bucket C+D you have to walk `findings[].agent_action` — - agent_summary deliberately does not disaggregate them, since the - distinction is an implementation detail of the patch-confidence - policy rather than a release-gate signal. Use - `agent_summary.first_recommended_action.command` as your default - suggestion when bucket A is non-empty. - -3. **Build a recommendation card per finding.** For each, present: - - `check_id`, `title`, `severity`, `tool_name`, `confidence` - - The verbatim `recommendation` string (per-finding fix text from the check author) - - `docs_url` as a markdown link (when non-null) - - **Concrete fix step** — branch on patch kind, since the patch shapes differ: - - `set_pointer` / `append_pointer`: show `target_file`, `pointer`, `value`, `confidence`, `rationale` - - `remove_pointer`: show `target_file`, `pointer`, `confidence`, `rationale` - - `manual`: show `instructions` verbatim. `ManualPatch` has only `kind` and `instructions` — do NOT try to read `target_file`/`pointer`/`value`; they don't exist. - - No patches (bucket D): use `evidence` and `source` to make `recommendation` concrete — quote the offending parameter name, the file path from `source.ref`, the manifest key. Generic advice is not acceptable here. - -4. **Present the prioritised plan.** Severity-ordered (critical → high → medium → low → info), grouped by bucket within each severity tier. Show counts per bucket up front. For low/info findings in bucket D, summary-link via `docs_url` rather than full cards — avoid wall-of-text. - -5. **Decision points — ask the user explicitly. Always preview before mutating.** - - **Bucket A (safe auto-fix).** First run a **dry-run** (omit `--apply`): - ```bash - agents-shipgate apply-patches \\ - --from agents-shipgate-reports/report.json \\ - --confidence high - ``` - Show the user the planned file diffs. Only after explicit confirmation, re-run with `--apply --json`. Never silently apply. - - **Bucket B (medium-confidence config).** Surface the patches with their `pointer` and `value`. Tell the user the opt-in command (`apply-patches --confidence medium`) and that they must read the appended values first — scope strings can encode policy choices. Do not apply on the user's behalf in this recipe. - - **Bucket C (manual).** Ask whether to walk through them now or defer. For deep dive on a single finding, cross-link to [`fix-top-finding.md`](fix-top-finding.md). Never edit a trace recording to silence `SHIP-API-TRACE-{APPROVAL,CONFIRMATION}-MISSING` — that patches the evidence, not the agent. Implement the runtime gate instead. - - **Bucket D (no patch).** Ask whether to walk through them — these need diagnosis, not patch application. Cross-link to [`fix-top-finding.md`](fix-top-finding.md); the four-response decision tree (add policy / override / suppress / fix tool spec) applies. - -6. **Re-scan after applying any Bucket A patches.** Show the diff in `summary.{critical_count, high_count, medium_count}`. Confirm the previously-fixed fingerprints are gone from `report.json`. - -7. **Report back**: - - Counts per bucket (A/B/C/D) and per severity - - What was applied (from `apply-patches --apply --json` output's `files`) - - What remains, with one clear next action per remaining bucket - - Any cross-links the user should follow ([`fix-top-finding.md`](fix-top-finding.md), [`triage-false-positive.md`](triage-false-positive.md)) - -## What NOT to do - -- Do **not** run `apply-patches --apply` without showing the dry-run preview first AND getting explicit user confirmation, even when `autofix_safe == true`. -- Do **not** apply `--confidence medium` patches in this recipe. They are opt-in only and require the user to read the appended values. -- Do **not** edit a trace recording to silence `SHIP-API-TRACE-{APPROVAL,CONFIRMATION}-MISSING`. Trace findings are class-four "never auto-fix" per the autofix policy. Implement the runtime approval/confirmation gate. -- Do **not** recommend `checks.ignore` as a fix here. That's the [`triage-false-positive.md`](triage-false-positive.md) workflow's job — cross-link to it. -- Do **not** claim a finding is fixed without re-running `agents-shipgate scan` and showing the diff in counts. -- Do **not** invent recommendations not grounded in `recommendation`, `evidence`, `patches[].instructions`, or `docs_url`. Use evidence to make advice concrete; do not replace check-author guidance with a guess. - -## Verification - -- A fresh `report.json` exists, validates as `report_schema_version: "0.8"` (or higher; v0.12+ exposes `agent_action` and `agent_summary`), and was generated with `--suggest-patches`. -- Each presented card cites a concrete location: `target_file` + `pointer` for non-manual patches, `instructions` verbatim for manual patches, file path + parameter name from `evidence`/`source` for bucket D. -- If Bucket A patches were applied: re-scan shows lower active counts AND the previously-failing fingerprints are absent from the new `report.json`. -- If only B/C/D were surfaced: counts are unchanged (expected); the user has a clear list of next actions. -""" - - -_STABILIZE_STRICT_MD = """\ -# Prompt · Stabilize Agents Shipgate strict mode - -The user has Agents Shipgate running in **advisory** mode and wants to graduate to **strict** mode (CI fails on findings) without surprising contributors. - -## The pattern - -1. Run a fresh scan and inventory the active findings. -2. Tune `risk_overrides` and `checks.ignore` for genuine false positives, with reasons. -3. Save a baseline of everything that's left. -4. Switch CI to strict mode with the baseline applied — only NEW findings fail. -5. Pick a severity threshold; usually start with `critical`, raise to `[critical, high]` later. - -## Your task - -1. **Inventory current findings.** - ```bash - agents-shipgate scan -c shipgate.yaml --ci-mode advisory - ``` - Look at `agents-shipgate-reports/report.json` `summary.critical_count`, `high_count`, `medium_count`. If the active list is small (< 20 unique check IDs), consider just fixing them rather than baselining. - -2. **Tune false positives.** For each unique check ID, decide: - - True positive that should be fixed → use the `fix-top-finding.md` prompt to apply a real fix. - - True positive that the team explicitly accepts (deprecated tool, known limitation) → add to `checks.ignore` with a real `reason`. - - False positive (heuristic misfire) → use `risk_overrides.tools.{tool}.remove_tags` or add tags via `risk_overrides.tools.{tool}.tags`. - -3. **Save the baseline:** - ```bash - agents-shipgate baseline save -c shipgate.yaml \\ - --out .agents-shipgate/baseline.json - ``` - -4. **Commit the baseline:** - ```bash - git add .agents-shipgate/baseline.json - git commit -m "Baseline shipgate findings ($N criticals, $M highs)" - ``` - -5. **Update the CI workflow.** Replace the existing advisory step with strict + baseline. Use [`examples/github-actions/03-strict-with-baseline.yml`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/examples/github-actions/03-strict-with-baseline.yml) as the template: - ```yaml - - uses: ThreeMoonsLab/agents-shipgate@v0.10.0 - with: - ci_mode: strict - fail_on: critical - baseline: .agents-shipgate/baseline.json - pr_comment: 'true' - ``` - -6. **Verify the gate fires correctly.** In a throwaway branch, deliberately introduce a new finding (e.g. add a wildcard scope) and confirm CI fails. Revert before merging. - -## When to refresh the baseline - -| Situation | Action | -|---|---| -| Found a false positive after baselining | Add a `checks.ignore` entry; do **not** re-baseline | -| Fixed several findings | Re-baseline so resolved ones disappear: `agents-shipgate baseline save ...` | -| Upgraded shipgate to a version with new checks | New check IDs surface as new findings; fix or suppress, then re-baseline | -| Added new tools that have no policy yet | Each new tool's findings are `new` and will fail; fix or accept, then re-baseline | - -Re-baselining is just running `baseline save` again. Diff the new file vs the old in code review so the team sees what's been accepted. - -## Promotion to `[critical, high]` - -After a sprint or two of strict-on-critical, the active high-severity list usually compresses enough to flip on. Update `fail_on: critical,high` and re-baseline. - -## What NOT to do - -- Do **not** baseline in your first run as a "shortcut to make CI green." That hides the existing risk surface from review. -- Do **not** baseline findings that have a real fix — fix them first, baseline only what you're explicitly accepting. -- Do **not** write `--fail-on critical,high` without a baseline if the repo has many existing high findings; CI will fail on day one and contributors will mute the workflow. - -## Verification - -- `.agents-shipgate/baseline.json` is committed and contains `findings[]` -- CI workflow uses `ci_mode: strict` and `baseline: .agents-shipgate/baseline.json` -- A test PR that adds a deliberate new critical finding fails CI -- A test PR that doesn't change the tool surface passes CI -""" - - -_TRIAGE_FP_MD = """\ -# Prompt · Triage a suspected Agents Shipgate false positive - -The user thinks a specific finding is wrong. You need to decide whether to override the heuristic, suppress the finding, or convince the user that the check is correct. - -## Your task - -1. **Read the full finding.** From `agents-shipgate-reports/report.json`: - ```json - { - "id": "fp_...", - "check_id": "SHIP-...", - "tool_name": "...", - "severity": "...", - "evidence": { ... }, - "recommendation": "..." - } - ``` - And the check definition: - ```bash - agents-shipgate explain --json - ``` - -2. **Read the actual tool definition.** Look up the OpenAPI / MCP / SDK source: - - For OpenAPI: open the spec at the path given in `findings[].source.ref` - - For MCP: open the JSON file - - For SDK: open the `.py` file at the line given in `source.location` - -3. **Apply the decision tree:** - - ``` - Is the heuristic wrong about the tool? - (e.g. "destructive" tag on a GET; "financial_action" tag on a non-financial scope) - → YES: override via risk_overrides.tools.{tool}.remove_tags - → NO: continue - - Is the check fundamentally inapplicable to this tool? - (e.g. SHIP-DOC-MISSING-DESCRIPTION on an internal-only tool slated for removal) - → YES: suppress via checks.ignore with a concrete reason - → NO: continue - - The check is correct. Fix the tool definition. - → use the fix-top-finding.md prompt - ``` - -## Override vs suppress — which to use - -| Use `risk_overrides` when | Use `checks.ignore` when | -|---|---| -| The risk **classification** is wrong | The classification is right but the team accepts the risk | -| You want to remove a tag (e.g. `remove_tags: [destructive]`) | You want to suppress one specific finding | -| The fix benefits all checks that consume that tag | The acceptance is per-check, per-tool | -| Example: a `get_records` GET picks up `destructive` from substring "destroy" | Example: a documented internal-only tool with no description | - -**Rule of thumb:** if the fix would silence multiple findings naturally, use `risk_overrides`. If you want to acknowledge one specific finding by name, use `checks.ignore`. - -## Required: a concrete `reason` - -Both `checks.ignore` entries and `risk_overrides` entries take a `reason`. Empty reasons fail manifest validation. Good reasons answer "why is this OK?" in a way a future reviewer can verify: - -| Bad reason | Better reason | -|---|---| -| `false positive` | `GET endpoint; "destroy" appears in operationId only because it returns destroy-status` | -| `not applicable` | `Tool deprecated 2026-Q2; deletion tracked in JIRA-1234` | -| `team decision` | `Reviewed by platform-eng 2026-04-10; see ADR-007` | - -## Re-run and confirm - -After editing the manifest: - -```bash -agents-shipgate scan -c shipgate.yaml --ci-mode advisory -``` - -The previously-failing fingerprint should be gone (overridden) or marked `"suppressed": true` (suppressed) in `report.json`. - -## When the heuristic is genuinely buggy - -If you've found a real classifier bug — the kind that affects many users, not just this tool — file an issue tagged `false-positive` at https://github.com/ThreeMoonsLab/agents-shipgate/issues with: - -- The check ID -- A minimal reproduction (manifest fragment + tool source) -- The current behavior vs. expected behavior - -The risk classifier in `core/risk_hints.py` improves through reports. - -## Verification - -- The decision (override / suppress / fix) is documented in the manifest with a reason. -- The previously-failing fingerprint is gone or `"suppressed": true` in the next scan. -- The `reason` would be understandable to a reviewer who hasn't seen the finding. -""" - - -_UPGRADE_VERSION_MD = """\ -# Prompt · Upgrade Agents Shipgate version - -Bump the agents-shipgate version pinned in CI and the development environment. - -## Your task - -1. **Read the changelog** for the gap between the current and target version: - - https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/CHANGELOG.md - - Specifically look for entries under "Breaking changes" and "New checks added". - -2. **Update the pin in three places** (in this order): - - a. **`pyproject.toml`** (if the project depends on shipgate as a dev dep): - ```toml - [project.optional-dependencies] - dev = ["agents-shipgate==", ...] - ``` - - b. **CI workflow** at `.github/workflows/shipgate.yml`: - ```yaml - - uses: ThreeMoonsLab/agents-shipgate@v - with: - shipgate_version: '' - ``` - - c. **Pre-commit config** at `.pre-commit-config.yaml` (if present): - ```yaml - repos: - - repo: https://github.com/ThreeMoonsLab/agents-shipgate - rev: v - ``` - -3. **Run a local scan** with the new version: - ```bash - pipx upgrade agents-shipgate - agents-shipgate --version # confirm the new version is in PATH - agents-shipgate scan -c shipgate.yaml --ci-mode advisory - ``` - -4. **Compare the new finding count to the baseline.** If `report.json` shows new finding fingerprints (any with `"baseline_status": "new"`): - - These are usually new checks added in the upgrade. Read the changelog "New checks added" section. - - For each new check ID, decide: fix, override, or suppress (see [`triage-false-positive.md`](triage-false-positive.md)). - -5. **Re-baseline if the new findings are accepted:** - ```bash - agents-shipgate baseline save -c shipgate.yaml \\ - --out .agents-shipgate/baseline.json - ``` - -6. **Commit** the version bumps + the new baseline (if regenerated) in one PR. Title: `Upgrade agents-shipgate v → v`. - -## Stability guarantees - -Per [`STABILITY.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md), within `0.x`: - -- Existing check IDs do not change names or fingerprint algorithms. -- Existing CLI flags do not break. -- The JSON report's stable fields persist. - -So a `0.2.x → 0.3.x` upgrade should not silently break existing suppressions or baselines. If it does, that's a stability bug — file an issue. - -## What may legitimately change - -- Risk-classifier keyword sets (false-positive tuning). Use `risk_overrides` to pin specific behavior. -- New checks fire (additive). Triage with the prompts above. -- Markdown report layout (parse `report.json` instead). - -## Verification +def render_files(config: _adoption_kit.AdoptionKitConfig | None = None) -> dict[str, str]: + """Return relative file path -> UTF-8 text for the Claude Code skill bundle.""" -- `agents-shipgate --version` reflects the new version -- CI workflow uses the new version -- A scan completes without error -- The baseline file (if used) is up to date -""" + return _adoption_kit.render_adoption_kit(TARGET, config).files -_ADVISORY_CI_YML = f"""\ -# Advisory PR comment. -# Recommended starting point — runs the scanner on every PR, posts a summary -# comment, uploads the report as an artifact, and never fails the job. -name: Agents Shipgate (advisory) +def render_bundle_text(config: _adoption_kit.AdoptionKitConfig | None = None) -> str: + """Return a human-readable dry-run rendering of the full bundle.""" -on: - pull_request: + return _adoption_kit.render_bundle_text(TARGET, config) -permissions: - contents: read - pull-requests: write -jobs: - shipgate: - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - uses: ThreeMoonsLab/agents-shipgate@v{_ACTION_VERSION} - with: - ci_mode: advisory - diff_base: target - pr_comment: 'true' - shipgate_version: '{_ACTION_VERSION}' -""" +PRIOR_RENDER_SHA256: dict[str, tuple[str, ...]] = _adoption_kit.prior_render_hashes(TARGET) diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/codex_skill.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/codex_skill.py index f4a1aa65..c65b6f9b 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/codex_skill.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/codex_skill.py @@ -1,289 +1,22 @@ -"""Render the repo-scoped Codex skill bundle. - -The canonical checked-in copy lives under ``.agents/skills/agents-shipgate``. -This renderer deliberately keeps a hard-coded copy so the installed wheel can -generate the skill without relying on repository files being present. -Snapshot tests keep the two copies in sync. -""" +"""Render the repo-scoped Codex skill bundle from packaged kit files.""" from __future__ import annotations -from agents_shipgate import __version__ - - -def render_files() -> dict[str, str]: - """Return relative file path -> UTF-8 text for the Codex skill bundle.""" - return { - ".agents/skills/agents-shipgate/SKILL.md": _SKILL_MD, - ".agents/skills/agents-shipgate/references/recipes.md": _RECIPES_MD, - ".agents/skills/agents-shipgate/references/report-reading.md": _REPORT_READING_MD, - ".agents/skills/agents-shipgate/assets/advisory-pr-comment.yml": _ADVISORY_CI_YML, - ".agents/skills/agents-shipgate/agents/openai.yaml": _OPENAI_YAML, - } - - -def render_bundle_text() -> str: - """Return a human-readable dry-run rendering of the full bundle.""" - chunks: list[str] = [] - for path, text in render_files().items(): - chunks.append(f"--- {path} ---\n{text.rstrip()}\n") - return "\n".join(chunks) - - -# SHA-256 hashes of every prior render, keyed by bundle-relative file path. -# When a rendered file changes after the first shipped Codex skill release, -# move that file's previous current-render hash into this dict so `init -# --agent-instructions=codex-skill --write` can safely migrate v(N-1) files. -# Leave the dict empty while there is no prior shipped Codex skill bundle. -PRIOR_RENDER_SHA256: dict[str, tuple[str, ...]] = { - ".agents/skills/agents-shipgate/SKILL.md": ( - "59ec0a31f9747acf569f731561236ff4ef6d8734b614edfa04ea6ff10043f21a", - ), -} - -_ACTION_VERSION = __version__ - - -_SKILL_MD = """--- -name: agents-shipgate -description: Use when the user wants to add or run Agents Shipgate as a local-first, static Tool-Use Readiness release gate for an AI agent's tool surface; review or prepare a tool-using agent for release; scan MCP, OpenAPI, OpenAI Agents SDK, Anthropic, Google ADK, LangChain/LangGraph, CrewAI, OpenAI API, Codex plugin, or n8n tool artifacts; add advisory CI; or interpret, fix, triage, suppress, or explain a Shipgate finding. ---- - -# Agents Shipgate - -Agents Shipgate is a local-first, static Tool-Use Readiness release gate for AI agent tool surfaces. It reads `shipgate.yaml` plus local tool sources and writes deterministic reports as Markdown, JSON, SARIF, and Release Evidence Packets. - -Use this skill when a task touches agent tools, MCP exports, OpenAPI specs, prompts that constrain tool use, permissions/scopes, approval or confirmation policies, `shipgate.yaml`, Shipgate CI, or `agents-shipgate-reports/report.json`. - -Do not use it for general linting, runtime monitoring, evals, model-output quality, or runtime guardrail enforcement. Shipgate is static-only: no agent execution, no tool calls, no LLM calls, no MCP server connections, and no telemetry by default. - -## Workflow - -1. For relevance decisions, bootstrap, scanning, CI setup, finding fixes, false-positive triage, strict-mode promotion, or version upgrades, read `references/recipes.md`. -2. For reading `report.json`, summarizing release decisions, or deciding what may be auto-applied, read `references/report-reading.md`. -3. Set `AGENTS_SHIPGATE_AGENT_MODE=1` before running Shipgate commands so errors include structured `next_action` JSON. -4. Default first-time CI to advisory mode. Do not enable release-blocking CI or save a baseline until a human has reviewed current findings. -5. Always parse `agents-shipgate-reports/report.json`, not Markdown. Use `release_decision.decision` as the release signal. -6. Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. -7. Ensure `.gitignore` covers `agents-shipgate-reports/` before committing. - -## Fast Paths - -- First adoption: run `agents-shipgate detect --workspace . --json`, then follow `references/recipes.md`. -- Existing manifest: run `agents-shipgate scan -c shipgate.yaml --suggest-patches --format json`. -- First GitHub CI: copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. -- Explain one finding: run `agents-shipgate explain-finding --from agents-shipgate-reports/report.json --json`. -- Triage heuristic findings: run `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`. - -## Boundaries - -- Do not claim a finding is fixed without re-running `agents-shipgate scan` and reporting the new decision/counts. -- Do not silently suppress findings. Suppressions require a non-empty `reason`. -- Do not commit generated reports. -- Do not edit the upstream `agents-shipgate` check implementation unless the user is working in the Agents Shipgate repo itself. -""" - - -_RECIPES_MD = """# Agents Shipgate Recipes - -Use these recipes after the `agents-shipgate` skill triggers. - -## Decide Relevance - -Run: - -```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json -``` - -Proceed when any of these are true: - -- `is_agent_project: true` -- `suggested_sources` is non-empty -- `codex_plugin_candidates` is non-empty -- `shipgate.yaml` already exists -- the user explicitly asked for a Shipgate scan or Tool-Use Readiness gate - -Stop only when all signals are absent and the user did not explicitly request Shipgate. - -## Bootstrap A Repo - -Run: - -```bash -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate detect --workspace . --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate contract --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate init --workspace . --write --ci --json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate scan -c shipgate.yaml --suggest-patches --format json -AGENTS_SHIPGATE_AGENT_MODE=1 agents-shipgate apply-patches \\ - --from agents-shipgate-reports/report.json \\ - --confidence high --apply -``` - -If `init` reports placeholders, replace `CHANGE_ME` values from repo context before scanning. If `shipgate.yaml` already exists, edit it rather than overwriting it. - -## First-Time CI - -Use advisory mode only. Copy `assets/advisory-pr-comment.yml` to `.github/workflows/agents-shipgate.yml`. - -Do not switch to release-blocking behavior in the same task. Strict promotion requires human review, suppressions with reasons, and optionally a saved baseline. +from agents_shipgate.cli.discovery.agent_instructions import adoption_kit as _adoption_kit -## Fix Top Finding +TARGET = "codex-skill" -1. Read `agents-shipgate-reports/report.json`. -2. Pick the first blocker, then highest-severity review item. -3. If `findings[].agent_action == "auto_apply"` and a high-confidence patch exists, apply it with `apply-patches --confidence high --apply`. -4. For policy/evidence gaps, propose the exact human decision needed. Do not fabricate approval, confirmation, idempotency, broad-scope, prohibited-action, or runtime-trace evidence. -5. Re-run scan and report the new `release_decision.decision`, blocker count, and review item count. -## Recommend Fixes - -Group active findings by action: - -- `auto_apply`: safe mechanical patches. -- `propose_patch_for_review`: show patch, leave final decision to user. -- `escalate_to_human`: policy/evidence decision. -- `suppress_with_reason`: only when the user confirms the finding is intentionally accepted. -- `informational`: summarize, no gate action. - -## Explain A Finding - -Run: - -```bash -agents-shipgate explain-finding \\ - --from agents-shipgate-reports/report.json --json -``` - -Use the returned deterministic `explanation` for PR comments or chat replies. Keep it to 3-5 sentences and include the tool name, release risk, and next action. - -## Triage False Positives - -Prefer fixing the manifest or policy evidence over suppression. Suppress only with a specific reason: - -```yaml -checks: - ignore: - - check_id: SHIP-CHECK-ID - tool: tool.name - reason: specific accepted-risk rationale -``` - -## Promote Advisory To Strict - -Only after humans review advisory output: - -```bash -agents-shipgate baseline save -c shipgate.yaml --out .agents-shipgate/baseline.json -agents-shipgate scan -c shipgate.yaml \\ - --baseline .agents-shipgate/baseline.json \\ - --ci-mode strict --fail-on critical,high -``` - -The promoted gate should fail only on new findings above the selected threshold. - -## Upgrade Shipgate - -Update the GitHub Action tag and `shipgate_version` together. Re-run: - -```bash -agents-shipgate contract --json -agents-shipgate scan -c shipgate.yaml --suggest-patches --format json -``` - -If schema or decision fields changed, use `docs/agent-contract-current.md` from the installed version or upstream repo. -""" - - -_REPORT_READING_MD = """# Reading Agents Shipgate Reports - -Always read `agents-shipgate-reports/report.json`. Do not scrape Markdown. - -## Order - -1. `release_decision.decision`: `blocked`, `review_required`, `insufficient_evidence`, or `passed`. -2. `release_decision.blockers[]`: items blocking release. -3. `release_decision.review_items[]`: accepted debt or human-review items. -4. `agent_summary`: one-fetch summary with `headline`, counts, safe patches, human-review needs, and `first_recommended_action`. -5. `findings[]`: detailed evidence, source, severity, and remediation. - -## Per-Finding Action - -Prefer `findings[].agent_action` when present: - -- `auto_apply`: safe to apply only when a high-confidence patch exists. -- `propose_patch_for_review`: show patch and ask for review. -- `escalate_to_human`: policy/evidence decision. -- `suppress_with_reason`: suppress only after explicit user confirmation. -- `informational`: summarize only. - -Do not synthesize an action from lower-level fields when `agent_action` exists. - -## Manual-Review Boundary - -Never auto-assert these categories: - -- approval policy -- confirmation policy -- idempotency evidence -- broad-scope permission decisions -- prohibited-action policy decisions -- runtime trace evidence - -For those, summarize the risk and the exact decision a human needs to make. - -## Summary Template - -Report back with: - -```text -Decision: -Blockers: -Review items: -Safe patches applied: -Needs human review: -Top findings: -1. -``` - -If `privacy_audit` is present, mention that default report redaction ran. If `insufficient_evidence` appears, treat it as review-required unless the user has stricter release policy. -""" - - -_ADVISORY_CI_YML = f"""# Advisory PR comment. -# Recommended starting point: runs the scanner on every PR, posts a summary -# comment, uploads the report as an artifact, and never fails the job. -name: Agents Shipgate (advisory) +def render_files(config: _adoption_kit.AdoptionKitConfig | None = None) -> dict[str, str]: + """Return relative file path -> UTF-8 text for the Codex skill bundle.""" -on: - pull_request: + return _adoption_kit.render_adoption_kit(TARGET, config).files -permissions: - contents: read - pull-requests: write -jobs: - shipgate: - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - uses: ThreeMoonsLab/agents-shipgate@v{_ACTION_VERSION} - with: - ci_mode: advisory - diff_base: target - pr_comment: 'true' - shipgate_version: '{_ACTION_VERSION}' -""" +def render_bundle_text(config: _adoption_kit.AdoptionKitConfig | None = None) -> str: + """Return a human-readable dry-run rendering of the full bundle.""" + return _adoption_kit.render_bundle_text(TARGET, config) -_OPENAI_YAML = """interface: - display_name: "Agents Shipgate" - short_description: "Run Tool-Use Readiness release gates" - default_prompt: "Use $agents-shipgate to add a Tool-Use Readiness release gate to this agent repo." -policy: - allow_implicit_invocation: true -""" +PRIOR_RENDER_SHA256: dict[str, tuple[str, ...]] = _adoption_kit.prior_render_hashes(TARGET) diff --git a/tests/test_adapter_static_only.py b/tests/test_adapter_static_only.py index 9bce98de..7861a03c 100644 --- a/tests/test_adapter_static_only.py +++ b/tests/test_adapter_static_only.py @@ -342,6 +342,29 @@ class AllowedException: "'agents_shipgate' string (snippet pinning enforces this)." ), ), + AllowedException( + relative_path="cli/discovery/agent_instructions/adoption_kit.py", + surface="import:importlib.resources.files", + line=9, + snippet="from importlib.resources import files", + rationale=( + "adoption_kit.py reads bundled first-party adoption-kit files " + "from the installed wheel. From-import line pinned alongside " + "the call site for the same literal-anchor review as triggers.py." + ), + ), + AllowedException( + relative_path="cli/discovery/agent_instructions/adoption_kit.py", + surface="attr_call:importlib.resources.files", + line=354, + snippet="files('agents_shipgate')", + rationale=( + "Resolves bundled adoption-kits/* content inside the " + "agents-shipgate wheel. Anchor is the literal 'agents_shipgate' " + "string (snippet pinning enforces this); downstream customization " + "uses explicit repo-local override files, not dynamic imports." + ), + ), ) diff --git a/tests/test_agent_instructions_apply.py b/tests/test_agent_instructions_apply.py index db8d056e..1954497f 100644 --- a/tests/test_agent_instructions_apply.py +++ b/tests/test_agent_instructions_apply.py @@ -7,6 +7,7 @@ from __future__ import annotations import hashlib +import json from pathlib import Path import pytest @@ -18,17 +19,15 @@ apply_agent_instructions, parse_selector, ) +from agents_shipgate.cli.discovery.agent_instructions.adoption_kit import ( + SIDECAR_FILENAME, + load_adoption_kit_config, +) from agents_shipgate.cli.discovery.agent_instructions.apply import ( PR_TEMPLATE_DIR, PR_TEMPLATE_LOWER, PR_TEMPLATE_UPPER, ) -from agents_shipgate.cli.discovery.agent_instructions.renderers import ( - claude_code_skill as claude_code_skill_module, -) -from agents_shipgate.cli.discovery.agent_instructions.renderers import ( - codex_skill as codex_skill_module, -) from agents_shipgate.cli.discovery.agent_instructions.renderers import ( cursor as cursor_module, ) @@ -59,6 +58,28 @@ def _filesystem_is_case_sensitive(path: Path) -> bool: reason="Test asserts case-insensitive samefile collapsing.", ) + +def _write_sidecar( + root: Path, + *, + target: str, + file_hashes: dict[str, str], +) -> None: + (root / SIDECAR_FILENAME).write_text( + json.dumps( + { + "schema_version": 1, + "target": target, + "kit_source": "bundled", + "kit_source_id": f"test:{target}", + "writer_version": "0.0.0-test", + "file_hashes": file_hashes, + } + ) + + "\n", + encoding="utf-8", + ) + # --- selector parsing ------------------------------------------------------ @@ -116,6 +137,8 @@ def test_apply_write_fresh_workspace_creates_all_targets(tmp_path: Path) -> None assert (tmp_path / "AGENTS.md").exists() assert (tmp_path / "CLAUDE.md").exists() assert (tmp_path / ".agents/skills/agents-shipgate/SKILL.md").exists() + assert (tmp_path / ".agents/skills/agents-shipgate" / SIDECAR_FILENAME).exists() + assert (tmp_path / ".claude/skills/agents-shipgate" / SIDECAR_FILENAME).exists() assert (tmp_path / ".cursor/rules/agents-shipgate.mdc").exists() assert (tmp_path / PR_TEMPLATE_LOWER).exists() # AGENTS.md preamble + block. @@ -163,19 +186,68 @@ def test_codex_skill_repairs_missing_file(tmp_path: Path) -> None: assert missing.exists() -def test_codex_skill_reports_migrate_and_repair_when_prior_file_and_missing_file( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch +def test_codex_skill_records_sidecar_for_pre_sidecar_current_tree( + tmp_path: Path, +) -> None: + for rel, content in render_codex_skill_files().items(): + target = tmp_path / rel + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + sidecar = tmp_path / ".agents/skills/agents-shipgate" / SIDECAR_FILENAME + assert not sidecar.exists() + + result = apply_agent_instructions(tmp_path, ["codex-skill"], write=True) + + [outcome] = result.targets + assert outcome.status == "migrated" + assert sidecar.exists() + + +def test_codex_skill_local_override_migrates_sidecar_managed_tree( + tmp_path: Path, ) -> None: apply_agent_instructions(tmp_path, ["codex-skill"], write=True) - skill = tmp_path / ".agents/skills/agents-shipgate/SKILL.md" - missing = tmp_path / ".agents/skills/agents-shipgate/references/recipes.md" + override_root = tmp_path / ".agents-shipgate/adoption-kit/codex-skill" + override_root.mkdir(parents=True) + override_root.joinpath("SKILL.md").write_text( + "# Custom Agents Shipgate Skill\n", + encoding="utf-8", + ) + config_path = tmp_path / ".agents-shipgate/adoption-kit.yaml" + config_path.write_text( + "schema_version: 1\n" + "targets:\n" + " codex-skill:\n" + " overrides_dir: .agents-shipgate/adoption-kit/codex-skill\n", + encoding="utf-8", + ) + kit_config = load_adoption_kit_config(tmp_path) + + result = apply_agent_instructions( + tmp_path, + ["codex-skill"], + write=True, + kit_config=kit_config, + ) + + [outcome] = result.targets + assert outcome.status == "migrated" + assert outcome.kit_source == "bundled_plus_local_override" + assert (tmp_path / ".agents/skills/agents-shipgate/SKILL.md").read_text( + encoding="utf-8" + ) == "# Custom Agents Shipgate Skill\n" + + +def test_codex_skill_reports_migrate_and_repair_from_sidecar( + tmp_path: Path, +) -> None: + apply_agent_instructions(tmp_path, ["codex-skill"], write=True) + root = tmp_path / ".agents/skills/agents-shipgate" + skill = root / "SKILL.md" + missing = root / "references/recipes.md" prior_text = "# prior shipped skill\n" prior_sha = hashlib.sha256(prior_text.encode("utf-8")).hexdigest() - monkeypatch.setattr( - codex_skill_module, - "PRIOR_RENDER_SHA256", - {".agents/skills/agents-shipgate/SKILL.md": (prior_sha,)}, - ) + _write_sidecar(root, target="codex-skill", file_hashes={"SKILL.md": prior_sha}) skill.write_text(prior_text, encoding="utf-8") missing.unlink() @@ -226,18 +298,19 @@ def test_claude_code_skill_repairs_missing_file(tmp_path: Path) -> None: assert missing.exists() -def test_claude_code_skill_reports_migrate_and_repair( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch +def test_claude_code_skill_reports_migrate_and_repair_from_sidecar( + tmp_path: Path, ) -> None: apply_agent_instructions(tmp_path, ["claude-code-skill"], write=True) - skill = tmp_path / ".claude/skills/agents-shipgate/SKILL.md" - missing = tmp_path / ".claude/skills/agents-shipgate/prompts/fix-top-finding.md" + root = tmp_path / ".claude/skills/agents-shipgate" + skill = root / "SKILL.md" + missing = root / "prompts/fix-top-finding.md" prior_text = "# prior shipped skill\n" prior_sha = hashlib.sha256(prior_text.encode("utf-8")).hexdigest() - monkeypatch.setattr( - claude_code_skill_module, - "PRIOR_RENDER_SHA256", - {".claude/skills/agents-shipgate/SKILL.md": (prior_sha,)}, + _write_sidecar( + root, + target="claude-code-skill", + file_hashes={"SKILL.md": prior_sha}, ) skill.write_text(prior_text, encoding="utf-8") diff --git a/tests/test_agent_instructions_renderers.py b/tests/test_agent_instructions_renderers.py index 48133c94..45146b03 100644 --- a/tests/test_agent_instructions_renderers.py +++ b/tests/test_agent_instructions_renderers.py @@ -7,6 +7,7 @@ from __future__ import annotations +import ast import hashlib import re from pathlib import Path @@ -144,6 +145,27 @@ def test_codex_skill_source_matches_renderer() -> None: assert (REPO_ROOT / rel).read_text(encoding="utf-8") == content +def test_skill_renderers_do_not_embed_long_content_constants() -> None: + """Skill bundle prose lives in adoption-kit files, not Python constants.""" + + renderer_paths = ( + REPO_ROOT + / "src/agents_shipgate/cli/discovery/agent_instructions/renderers/codex_skill.py", + REPO_ROOT + / "src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_code_skill.py", + ) + for path in renderer_paths: + tree = ast.parse(path.read_text(encoding="utf-8")) + long_strings = [ + node.value + for node in ast.walk(tree) + if isinstance(node, ast.Constant) + and isinstance(node.value, str) + and len(node.value) > 500 + ] + assert not long_strings, f"{path} embeds generated content in Python" + + def test_codex_skill_benchmark_variant_uses_renderer(tmp_path: Path) -> None: """The Codex adoption-harness overlay must use the same skill files.""" variant = REPO_ROOT / "benchmark/setup-variants/25-codex-skill" diff --git a/tests/test_init_agent_instructions.py b/tests/test_init_agent_instructions.py index 07f85854..f3b39cfa 100644 --- a/tests/test_init_agent_instructions.py +++ b/tests/test_init_agent_instructions.py @@ -12,6 +12,7 @@ import shutil from pathlib import Path +import pytest from typer.testing import CliRunner from agents_shipgate.cli.discovery.agent_instructions import TARGETS @@ -67,6 +68,15 @@ def test_dry_run_all_targets_json_has_rendered_content(tmp_path: Path) -> None: assert statuses == {name: "would_render" for name in TARGETS} for entry in ai["targets"]: assert entry["rendered"] + kit_sources = { + entry["name"]: entry.get("kit_source") + for entry in ai["targets"] + if entry["name"] in {"codex-skill", "claude-code-skill"} + } + assert kit_sources == { + "codex-skill": "bundled", + "claude-code-skill": "bundled", + } # No filesystem changes. for name in TARGETS: assert not (workspace / SPECS[name].relative_path).exists() @@ -105,6 +115,150 @@ def test_dry_run_none_selector_emits_empty_targets_list(tmp_path: Path) -> None: } +def test_explicit_agent_instructions_kit_reports_local_source( + tmp_path: Path, +) -> None: + workspace = _seed_workspace(tmp_path, "simple_langchain_agent") + override_root = workspace / ".agents-shipgate/adoption-kit/codex-skill" + override_root.mkdir(parents=True) + override_root.joinpath("SKILL.md").write_text( + "# Custom Codex Skill\n", + encoding="utf-8", + ) + kit_path = workspace / ".agents-shipgate/custom-kit.yaml" + kit_path.write_text( + "schema_version: 1\n" + "targets:\n" + " codex-skill:\n" + " overrides_dir: .agents-shipgate/adoption-kit/codex-skill\n", + encoding="utf-8", + ) + + result = runner.invoke( + app, + [ + "init", + "--workspace", + str(workspace), + "--agent-instructions=codex-skill", + "--agent-instructions-kit", + str(kit_path.relative_to(workspace)), + "--json", + ], + ) + + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + [target] = payload["agent_instructions"]["targets"] + assert target["kit_source"] == "bundled_plus_local_override" + rendered_skill = next( + file["content"] + for file in target["files"] + if file["path"].endswith("/SKILL.md") + ) + assert rendered_skill == "# Custom Codex Skill\n" + + +def test_auto_discovered_agent_instructions_kit_is_used_on_write( + tmp_path: Path, +) -> None: + workspace = _seed_workspace(tmp_path, "simple_langchain_agent") + override_root = workspace / ".agents-shipgate/adoption-kit/codex-skill" + override_root.mkdir(parents=True) + override_root.joinpath("references").mkdir() + override_root.joinpath("references/report-reading.md").write_text( + "# Custom Report Reader\n", + encoding="utf-8", + ) + (workspace / ".agents-shipgate/adoption-kit.yaml").write_text( + "schema_version: 1\n" + "targets:\n" + " codex-skill:\n" + " overrides_dir: .agents-shipgate/adoption-kit/codex-skill\n", + encoding="utf-8", + ) + + result = runner.invoke( + app, + [ + "init", + "--workspace", + str(workspace), + "--write", + "--agent-instructions=codex-skill", + "--json", + ], + ) + + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + [target] = payload["agent_instructions"]["targets"] + assert target["kit_source"] == "bundled_plus_local_override" + assert ( + workspace + / ".agents/skills/agents-shipgate/references/report-reading.md" + ).read_text(encoding="utf-8") == "# Custom Report Reader\n" + + +def test_invalid_agent_instructions_kit_fails_before_write( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + workspace = _seed_workspace(tmp_path, "simple_langchain_agent") + kit_path = workspace / ".agents-shipgate/adoption-kit.yaml" + kit_path.parent.mkdir(parents=True) + kit_path.write_text("schema_version: 2\n", encoding="utf-8") + monkeypatch.setenv("AGENTS_SHIPGATE_AGENT_MODE", "1") + + result = runner.invoke( + app, + [ + "init", + "--workspace", + str(workspace), + "--write", + "--agent-instructions=codex-skill", + ], + ) + + assert result.exit_code == 2 + assert not (workspace / "shipgate.yaml").exists() + assert '"error": "config_error"' in result.output + assert str(kit_path) in result.output + + +def test_agent_instructions_kit_absolute_override_outside_workspace_error( + tmp_path: Path, +) -> None: + workspace = _seed_workspace(tmp_path, "simple_langchain_agent") + outside = tmp_path / "outside-overrides" + outside.mkdir() + kit_path = workspace / ".agents-shipgate/adoption-kit.yaml" + kit_path.parent.mkdir(parents=True) + kit_path.write_text( + "schema_version: 1\n" + "targets:\n" + " codex-skill:\n" + f" overrides_dir: {outside}\n", + encoding="utf-8", + ) + + result = runner.invoke( + app, + [ + "init", + "--workspace", + str(workspace), + "--write", + "--agent-instructions=codex-skill", + ], + ) + + assert result.exit_code == 2 + assert "resolves outside workspace" in result.output + assert "is a symlink" not in result.output + + def test_invalid_selector_exits_two_with_human_error(tmp_path: Path) -> None: workspace = _seed_workspace(tmp_path, "simple_langchain_agent") result = runner.invoke( @@ -119,7 +273,7 @@ def test_invalid_selector_exits_two_with_human_error(tmp_path: Path) -> None: def test_invalid_selector_emits_structured_error_under_agent_mode( tmp_path: Path, - monkeypatch, + monkeypatch: pytest.MonkeyPatch, ) -> None: workspace = _seed_workspace(tmp_path, "simple_langchain_agent") monkeypatch.setenv("AGENTS_SHIPGATE_AGENT_MODE", "1") @@ -309,7 +463,7 @@ def test_write_cursor_skipped_when_user_modified_exits_two(tmp_path: Path) -> No def test_skipped_target_emits_structured_stderr_under_agent_mode( - tmp_path: Path, monkeypatch + tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: """Hand-edited cursor + AGENTS_SHIPGATE_AGENT_MODE=1 produces a structured next_action JSON line on stderr so coding-agent callers can route to a fix diff --git a/tests/test_packaging.py b/tests/test_packaging.py new file mode 100644 index 00000000..44f50fd7 --- /dev/null +++ b/tests/test_packaging.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import subprocess +import sys +import zipfile +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent + + +def test_wheel_includes_adoption_kits(tmp_path: Path) -> None: + pytest.importorskip("build", reason="python-build not installed") + out_dir = tmp_path / "dist" + subprocess.run( + [sys.executable, "-m", "build", "--wheel", "--outdir", str(out_dir)], + cwd=REPO_ROOT, + check=True, + capture_output=True, + text=True, + ) + [wheel] = out_dir.glob("*.whl") + with zipfile.ZipFile(wheel) as archive: + names = set(archive.namelist()) + assert "agents_shipgate/_adoption_kits/codex-skill/SKILL.md" in names + assert "agents_shipgate/_adoption_kits/claude-code-skill/SKILL.md" in names + assert ( + "agents_shipgate/_adoption_kits/codex-skill/.agents-shipgate-kit-metadata.json" + in names + )