diff --git a/docs/adoption-harness-automated.md b/docs/adoption-harness-automated.md index 371dc4e2..4e7de5dd 100644 --- a/docs/adoption-harness-automated.md +++ b/docs/adoption-harness-automated.md @@ -151,7 +151,7 @@ for real Cursor runs. |---|---| | Agent ignores Shipgate on `10-agents-md` (tool-PR prompt) | Strengthen wording in `docs/target-repo-agent-snippets.md` AGENTS.md block; the renderer in `src/agents_shipgate/cli/discovery/agent_instructions/renderers/` lifts from there. | | Scan invoked without `--ci-mode advisory` | Make advisory the default in the snippet example; consider `init --write` defaulting workflow to advisory. | -| Agent parses Markdown report not JSON | Add `agent_summary` excerpt to the snippet; have `src/agents_shipgate/cli/scan.py` print "Parse the JSON report at …" hint in agent mode. | +| Agent parses Markdown report not JSON | Add `agent_summary` excerpt to the snippet; have `src/agents_shipgate/cli/scan/` print "Parse the JSON report at …" hint in agent mode. | | `CHANGE_ME` left in `shipgate.yaml` | CLI fix in `src/agents_shipgate/cli/_register_init.py`. Add diagnostic in `src/agents_shipgate/cli/diagnostics.py`. | | `agents-shipgate-reports/` committed | `init --write` patches `.gitignore` if not already covered. | | Auto-asserted approval / confirmation / idempotency | **Detector blocker → docs fix.** Strengthen warning in target snippets. **No manifest schema change in P0.2.** | diff --git a/docs/architecture.md b/docs/architecture.md index 27857d5e..a5837148 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -126,7 +126,7 @@ report/{markdown,json,sarif} formatters write to agents-shipgate-reports/ packet/builder.build_packet Release Evidence Packet (v0.6) including the evidence_matrix lens ↓ -cli/scan.py:run_scan entry-point orchestrator. Composed of +cli/scan/orchestrator.py:run_scan entry-point orchestrator. Composed of nine sequential phase helpers (_prepare_scan → _load_inputs → _build_tools_and_agent → diff --git a/docs/decks/architecture-overview/README.md b/docs/decks/architecture-overview/README.md index 8a4bc9c8..f7491f1c 100644 --- a/docs/decks/architecture-overview/README.md +++ b/docs/decks/architecture-overview/README.md @@ -7,7 +7,7 @@ The deck is grounded in: - `docs/architecture.md` - `docs/trust-model.md` - `STABILITY.md` -- `src/agents_shipgate/cli/scan.py` +- `src/agents_shipgate/cli/scan/orchestrator.py` ## Files diff --git a/docs/decks/architecture-overview/src/build.mjs b/docs/decks/architecture-overview/src/build.mjs index f1bab15f..e7ac3f2c 100644 --- a/docs/decks/architecture-overview/src/build.mjs +++ b/docs/decks/architecture-overview/src/build.mjs @@ -290,7 +290,7 @@ async function main() { "The important boundary is before checks: all inputs become tools, artifacts, warnings, and one ScanContext.", grid({ name: "scan-grid", width: fill, height: fill, columns: [fr(1.05), fr(0.95)], columnGap: 52 }, [ codePanel("scan-code", [ - "src/agents_shipgate/cli/scan.py", + "src/agents_shipgate/cli/scan/orchestrator.py", "manifest = load_manifest(config_path)", "loaded_sources = _load_sources(...)", "api_source, api_artifacts = load_openai_api_artifacts(...)", @@ -307,7 +307,7 @@ async function main() { compactItem("scan-output", "Reports are generated before exit policy", "JSON/Markdown/SARIF exist even when strict mode fails.", C.green), ]), ]), - "Source: src/agents_shipgate/cli/scan.py", + "Source: src/agents_shipgate/cli/scan/orchestrator.py", ); // 6. Checks and findings @@ -361,7 +361,7 @@ async function main() { compactItem("exit-20", "20", "strict-mode gate failure", C.red), ])), ]), - "Source: STABILITY.md and src/agents_shipgate/cli/scan.py", + "Source: STABILITY.md and src/agents_shipgate/cli/scan/orchestrator.py", ); // 8. Trust and extension diff --git a/src/agents_shipgate/checks/baseline_integrity.py b/src/agents_shipgate/checks/baseline_integrity.py index b4d4b2ce..390b358c 100644 --- a/src/agents_shipgate/checks/baseline_integrity.py +++ b/src/agents_shipgate/checks/baseline_integrity.py @@ -8,7 +8,7 @@ hitting the report. Unlike checks in ``BUILTIN_CHECKS``, this module is not invoked by -``run_checks``. It is called directly from :mod:`agents_shipgate.cli.scan` +``run_checks``. It is called directly from :mod:`agents_shipgate.cli.scan.sanitization` after ``apply_baseline`` because the integrity check needs the loaded baseline and audit log paths, not the tool context. diff --git a/src/agents_shipgate/cli/_helpers.py b/src/agents_shipgate/cli/_helpers.py index b89dd9e2..2ddac2ad 100644 --- a/src/agents_shipgate/cli/_helpers.py +++ b/src/agents_shipgate/cli/_helpers.py @@ -14,9 +14,9 @@ diagnose_missing_manifest, ) from agents_shipgate.cli.discovery import discover_manifest_paths -from agents_shipgate.cli.scan import run_scan +from agents_shipgate.cli.scan.orchestrator import run_scan from agents_shipgate.core.errors import AgentsShipgateError, ConfigError, InputParseError -from agents_shipgate.core.findings import SEVERITY_ORDER +from agents_shipgate.core.findings.constants import SEVERITY_ORDER logger = logging.getLogger(__name__) diff --git a/src/agents_shipgate/cli/_register_baseline.py b/src/agents_shipgate/cli/_register_baseline.py index 8df58f4a..b3bdc8b2 100644 --- a/src/agents_shipgate/cli/_register_baseline.py +++ b/src/agents_shipgate/cli/_register_baseline.py @@ -6,7 +6,8 @@ import typer from agents_shipgate.checks.baseline_integrity import has_hash_mismatch -from agents_shipgate.cli.scan import _resolve_audit_log_path, run_scan +from agents_shipgate.cli.scan.orchestrator import run_scan +from agents_shipgate.cli.scan.path_helpers import _resolve_audit_log_path from agents_shipgate.config.loader import load_manifest from agents_shipgate.core.baseline import verify_baseline, write_baseline from agents_shipgate.core.errors import AgentsShipgateError, ConfigError, InputParseError diff --git a/src/agents_shipgate/cli/_register_doctor.py b/src/agents_shipgate/cli/_register_doctor.py index 8a32ca0d..281f9be6 100644 --- a/src/agents_shipgate/cli/_register_doctor.py +++ b/src/agents_shipgate/cli/_register_doctor.py @@ -12,7 +12,7 @@ top_next_actions, ) from agents_shipgate.cli.discovery.placeholders import collect_placeholders -from agents_shipgate.cli.scan import inspect_sources +from agents_shipgate.cli.scan.inspect import inspect_sources from agents_shipgate.core.errors import ConfigError, InputParseError from agents_shipgate.core.logging import configure_logging from agents_shipgate.schemas.diagnostics import NextAction diff --git a/src/agents_shipgate/cli/_register_scan.py b/src/agents_shipgate/cli/_register_scan.py index 0bc1f9aa..3fe2bc8c 100644 --- a/src/agents_shipgate/cli/_register_scan.py +++ b/src/agents_shipgate/cli/_register_scan.py @@ -17,7 +17,7 @@ ) from agents_shipgate.cli.agent_mode import emit_agent_mode_error as _emit_agent_mode_error from agents_shipgate.cli.diagnostics import top_next_actions -from agents_shipgate.cli.scan import run_scan +from agents_shipgate.cli.scan.orchestrator import run_scan from agents_shipgate.core.errors import AgentsShipgateError, ConfigError, InputParseError from agents_shipgate.core.logging import configure_logging from agents_shipgate.schemas.diagnostics import NextAction diff --git a/src/agents_shipgate/cli/fixture.py b/src/agents_shipgate/cli/fixture.py index e8da3d35..3bfcfcbb 100644 --- a/src/agents_shipgate/cli/fixture.py +++ b/src/agents_shipgate/cli/fixture.py @@ -11,7 +11,7 @@ import typer -from agents_shipgate.cli.scan import run_scan +from agents_shipgate.cli.scan.orchestrator import run_scan from agents_shipgate.core.errors import AgentsShipgateError, ConfigError, InputParseError from agents_shipgate.fixtures import ( FixtureNotFoundError, diff --git a/src/agents_shipgate/cli/scan.py b/src/agents_shipgate/cli/scan.py deleted file mode 100644 index c55d2faf..00000000 --- a/src/agents_shipgate/cli/scan.py +++ /dev/null @@ -1,2318 +0,0 @@ -from __future__ import annotations - -import hashlib -import json -import logging -import os -from dataclasses import dataclass -from pathlib import Path -from typing import Any - -from agents_shipgate.checks.baseline_integrity import ( - build_findings as build_integrity_findings, -) -from agents_shipgate.checks.registry import check_catalog, run_checks -from agents_shipgate.ci.github_summary import write_github_step_summary -from agents_shipgate.cli.discovery.placeholders import collect_placeholders -from agents_shipgate.config.loader import load_manifest, load_manifest_with_positions -from agents_shipgate.core.artifact_models import ( - AnthropicArtifacts, - CodexPluginArtifacts, - CrewAiArtifacts, - GoogleAdkArtifacts, - LangChainArtifacts, - N8nArtifacts, - OpenAIApiArtifacts, - ValidationArtifacts, -) -from agents_shipgate.core.artifacts import ArtifactBag -from agents_shipgate.core.baseline import ( - apply_baseline, - baseline_resolved_fingerprints, - load_baseline, - verify_baseline, -) -from agents_shipgate.core.baseline_audit import DEFAULT_AUDIT_LOG_PATH -from agents_shipgate.core.context import ScanContext -from agents_shipgate.core.domain import ( - Agent, - LoadedToolSource, - Tool, -) -from agents_shipgate.core.dynamic_defaults import dynamic_check_defaults -from agents_shipgate.core.errors import ConfigError, InputParseError -from agents_shipgate.core.findings import ( - annotate_remediation, - apply_severity_overrides, - apply_suppressions, - assign_finding_ids, - build_report, - build_reviewer_summary, - dedupe_findings, - finding_fingerprint, - tool_inventory, -) -from agents_shipgate.core.privacy import ( - RedactionStats, - build_privacy_audit, - redact_data, - sanitize_findings, - sanitize_model, - sanitize_tools, -) -from agents_shipgate.core.risk_hints import enrich_tools_with_risk_hints -from agents_shipgate.core.severity_overrides import resolve_severity_overrides -from agents_shipgate.inputs.policy_packs import load_policy_packs, run_policy_pack_rules -from agents_shipgate.inputs.protocol import ( - REGISTRY, - LoadedAdapterResult, - ToolSourceAdapter, -) -from agents_shipgate.packet.builder import build_packet -from agents_shipgate.packet.html import write_packet_html -from agents_shipgate.packet.json_packet import write_packet_json -from agents_shipgate.packet.markdown import write_packet_markdown -from agents_shipgate.packet.pdf import ( - PdfRendererUnavailable, - is_pdf_available, - render_packet_pdf, -) -from agents_shipgate.report.action_surface_diff import ( - action_reference_from_scan_reference, - attach_action_surface_finding_summary, - build_action_surface_facts, - compute_action_surface_diff, - enrich_action_surface_diff_with_source, - evaluate_action_surface_policies, -) -from agents_shipgate.report.capability_diff import apply_capability_diff -from agents_shipgate.report.json_report import report_json_payload, write_json_report -from agents_shipgate.report.markdown import write_markdown_report -from agents_shipgate.report.sarif import write_sarif_report -from agents_shipgate.report.tool_surface_diff import ( - ToolSurfaceDiffReference, - _stable_hash, - build_tool_surface_facts, - compute_tool_surface_diff, - disabled_tool_surface_diff, - enrich_tool_surface_diff_with_source, - load_tool_surface_diff_reference, - reference_from_baseline, -) -from agents_shipgate.schemas.codex_plugin import CodexPluginSurface -from agents_shipgate.schemas.common import parse_severity -from agents_shipgate.schemas.manifest import ( - AgentsShipgateManifest, - ToolSourceConfig, -) -from agents_shipgate.schemas.report import ( - BaselineSummary, - LoadedPolicyPack, - PolicyAudit, - ReadinessReport, -) -from agents_shipgate.schemas.surfaces import ( - ActionFact, - ActionSurfaceFacts, - ActionSurfaceHashes, - ToolSurfaceFacts, -) - -PACKET_FORMAT_NAMES = {"md", "json", "html", "pdf"} -"""Allowed values for ``--packet-format`` and ``output.packet.formats``.""" - -logger = logging.getLogger(__name__) - - -# ----------------------------------------------------------------------------- -# Phase-result dataclasses (v0.19 R-3 architecture review item E3). -# -# ``run_scan`` was a single 619-line function that mixed nine sequential -# concerns: manifest preparation, input loading, tool/agent building, diff -# loading, check execution + severity resolution, output planning, privacy -# sanitization, report building, and file writing. Decomposing into named -# phase helpers — each with an explicit input/output dataclass — makes the -# pipeline visible at the call site and lets the most fragile phase -# (sanitization) be reasoned about in isolation. -# -# Hard contracts preserved (verified by tests/test_scan.py + -# tests/test_patches_model.py + tests/test_source_provenance.py): -# -# - Public ``run_scan`` signature unchanged. -# - ``_run_id`` inputs byte-identical to pre-decomp; the order of -# operations inside each phase is preserved. -# - Sanitization (Phase 7) runs BEFORE any file is written. Every -# write-path receives only ``public_*`` values from the -# ``_SanitizedSurfaces`` bundle, never the raw values. -# - Existing helpers exported for direct test access (``_load_sources``, -# ``_flatten_and_deduplicate_tools``, ``_run_id``, -# ``_build_agent``) keep their signatures. -# ----------------------------------------------------------------------------- - - -@dataclass(frozen=True) -class _ResolvedManifest: - """Phase 1 output: manifest after CLI overrides applied.""" - - manifest: AgentsShipgateManifest - manifest_positions: Any - base_dir: Path - - -@dataclass(frozen=True) -class _LoadedInputs: - """Phase 2 output: source loading + artifact extraction + warnings. - - Warning buckets are kept separate so Phase 3 (``_build_tools_and_agent``) - can interleave ``duplicate_warnings`` from - ``_flatten_and_deduplicate_tools`` between ``source_only_warnings`` and - ``artifact_warnings``, preserving the pre-decomp deterministic order: - - source → duplicate → artifact → placeholder → policy_pack → dedup - - Collapsing them into a single ``warnings`` list here (the P3 bug that - this split fixes) would push duplicate warnings to the end, changing - ``report.source_warnings`` order for fixtures with both duplicate-tool - names and artifact/policy-pack warnings. - """ - - loaded_sources: list[LoadedToolSource] - artifact_bag: ArtifactBag - policy_packs: Any # LoadedPolicyPacks - source_only_warnings: list[str] # per-source warnings, no dedup yet - artifact_warnings_list: list[str] # from _artifact_warnings(artifact_bag) - placeholder_warnings: list[str] # from _manifest_placeholder_warnings - policy_pack_warnings: list[str] # from policy_packs.warnings - # v0.20: third-party adapter provenance from - # ``discover_third_party_adapters``. Both valid and invalid records - # appear here; ``loaded_adapters[].validation_status == "valid"`` - # distinguishes them. Empty list when --no-plugins is set or no - # third-party adapters are installed. - loaded_adapters: list[dict[str, Any]] - adk: GoogleAdkArtifacts | None - langchain: LangChainArtifacts | None - crewai: CrewAiArtifacts | None - n8n: N8nArtifacts | None - api: OpenAIApiArtifacts | None - anthropic: AnthropicArtifacts | None - codex_plugin: CodexPluginArtifacts | None - validation: ValidationArtifacts | None - - -@dataclass(frozen=True) -class _ToolsAndAgent: - """Phase 3 output: flattened/deduped/enriched tools + Agent + final warnings.""" - - tools: list[Tool] - agent: Agent - warnings: list[str] # deduplicated source warnings - - -@dataclass(frozen=True) -class _DiffReferences: - """Phase 4 output: optional baseline + diff_from references.""" - - baseline_file: Any # BaselineFile | None - baseline_display_path: str | None - diff_reference: ToolSurfaceDiffReference | None - diff_reference_error: str | None - - -@dataclass(frozen=True) -class _ChecksDecision: - """Phase 5 output: action surface + checks + severity + remediation.""" - - action_surface_facts: ActionSurfaceFacts - action_surface_diff: Any # ActionSurfaceDiff (internal/semantic) - findings: list[Any] # list[Finding] - legacy_fingerprints: list[str] - override_resolution: Any # SeverityOverrideResolution - loaded_plugins: list[dict[str, str | None]] - context: ScanContext - - -@dataclass(frozen=True) -class _OutputPlan: - """Phase 6 output: file paths + packet format set + privacy stats. - - ``privacy_stats`` is intentionally mutable — the sanitization phase - accumulates redaction counts into it. The dataclass is ``frozen`` only - in the sense that the field bindings don't change; the contained - ``RedactionStats`` mutates in place. - """ - - out_dir: Path - generated_paths: dict[str, Path] - packet_format_set: set[str] - output_surfaces: list[str] - privacy_stats: RedactionStats - generated_report_refs: Any - - -@dataclass -class _SanitizedSurfaces: - """Phase 7 output: every ``public_*`` value flowing into report/packet. - - Not frozen — the baseline-integrity branch appends to ``findings`` - in place and refreshes derivative fields. After Phase 7 returns, - every value here has been passed through ``redact_data`` / - ``sanitize_*`` exactly once. Phase 8+ (``build_report`` / - ``build_packet`` / ``_write_*``) MUST NOT re-redact and MUST NOT - touch any raw (non-``public_*``) value. - """ - - manifest: AgentsShipgateManifest - manifest_dir: str - project: Any - environment: Any - agent: Agent - tools: list[Tool] - findings: list[Any] - source_warnings: list[str] - api_artifacts: OpenAIApiArtifacts | None - anthropic_artifacts: AnthropicArtifacts | None - validation_artifacts: ValidationArtifacts | None - api_surface: Any - anthropic_surface: Any - frameworks_surface: Any - codex_plugin_surface: CodexPluginSurface | None - policy_audit: PolicyAudit - loaded_policy_packs: list[Any] - loaded_plugins: Any - loaded_adapters: Any # v0.20: list[dict[str, Any]]; sanitized via redact_data - diff_reference: ToolSurfaceDiffReference | None - action_surface_facts: ActionSurfaceFacts - action_surface_diff: Any - tool_surface_facts: Any - tool_surface_diff: Any - baseline_summary: Any - privacy_audit: Any - - -# ----------------------------------------------------------------------------- -# Phase helpers. Each takes explicit kwargs and returns a phase-result -# dataclass. Order of operations inside each helper matches the pre-decomp -# code one-for-one so ``_run_id`` and finding fingerprints stay -# byte-identical. -# ----------------------------------------------------------------------------- - - -def _prepare_scan( - *, - config_path: Path, - ci_mode: str | None, - fail_on: list[str] | None, - output_dir: Path | None, - formats: list[str] | None, - packet_enabled: bool | None, - packet_formats: list[str] | None, - baseline_mode: str, -) -> _ResolvedManifest: - """Phase 1: load manifest with positions; apply CLI overrides. - - CLI overrides take precedence over manifest values. Raises - ``ConfigError`` (exit 2) for invalid packet formats or unsupported - baseline modes — both fail before any source loading happens. - """ - raw_manifest, manifest_positions = load_manifest_with_positions(config_path) - manifest = raw_manifest.model_copy(deep=True) - if ci_mode: - manifest.ci.mode = ci_mode - if fail_on is not None: - manifest.ci.fail_on = [parse_severity(item) for item in fail_on] - if output_dir: - manifest.output.directory = str(output_dir) - if formats: - manifest.output.formats = formats - if packet_enabled is not None: - manifest.output.packet.enabled = packet_enabled - if packet_formats is not None: - invalid = [f for f in packet_formats if f not in PACKET_FORMAT_NAMES] - if invalid: - raise ConfigError( - "--packet-format values must be one of " - f"{sorted(PACKET_FORMAT_NAMES)}; got {invalid}" - ) - manifest.output.packet.formats = packet_formats - if baseline_mode != "new-findings": - raise ConfigError("--baseline-mode supports only new-findings") - return _ResolvedManifest( - manifest=manifest, - manifest_positions=manifest_positions, - base_dir=config_path.resolve().parent, - ) - - -def _load_inputs( - *, - manifest: AgentsShipgateManifest, - base_dir: Path, - config_path: Path, - policy_pack_paths: list[Path] | None, - verbose: bool, - plugins_enabled: bool | None = None, -) -> _LoadedInputs: - """Phase 2: dispatch every adapter through ``REGISTRY``, extract - typed artifacts from the ``ArtifactBag``, aggregate source warnings - (including CHANGE_ME placeholder warnings from the manifest text), - load policy packs. - - v0.20: also discovers third-party adapters from the - ``agents_shipgate.adapters`` entry-point group BEFORE - ``_load_sources`` runs, so the dispatcher resolves any - user-installed plugin source_types alongside built-ins. Discovery - is gated by ``plugins_enabled`` (mirroring the plugin-check flow - in ``checks/registry.py``). - """ - from agents_shipgate.inputs.protocol import discover_third_party_adapters - - # v0.20 (PR #111 review fix P1 #1+#2): build a per-scan registry - # clone so third-party discovery NEVER mutates the global - # ``REGISTRY``. Without this, a later ``--no-plugins`` scan would - # still see adapters registered by an earlier scan, and the - # collision check on scan-two would misclassify stable third- - # party adapters as ``source_type_collision`` (the global already - # has them from scan-one). The clone captures any monkeypatch - # state at this exact moment, so existing tests that - # ``monkeypatch.setitem(REGISTRY._adapters, …)`` still work. - scan_registry = REGISTRY.clone() - loaded_adapters: list[dict[str, Any]] = [] - discovery_records = discover_third_party_adapters( - scan_registry, - plugins_enabled=plugins_enabled, - loaded_adapters=loaded_adapters, - ) - # v0.20 (PR #111 review follow-up #2): map of source_type → valid - # LoadedAdapter record. Used by ``_load_sources`` to route - # third-party adapter ``load()`` calls through - # ``run_validated_adapter`` so runtime exceptions land in - # ``loaded_adapters[].runtime_errors`` instead of crashing the - # scan. Invalid records (validation_status != "valid") are - # excluded: they never registered on ``scan_registry`` and so the - # dispatcher will never reach them. - third_party_records: dict[str, Any] = { - record.adapter.source_type: record - for record in discovery_records - if record.adapter is not None - } - loaded_sources, artifact_bag = _load_sources( - manifest, - base_dir, - verbose=verbose, - registry=scan_registry, - third_party_records=third_party_records, - plugins_enabled=plugins_enabled, - ) - logger.debug( - "loaded sources", - extra={ - "agents_shipgate_source_count": len(loaded_sources), - "agents_shipgate_sources": [ - {"id": source.source_id, "type": source.source_type, "tools": len(source.tools)} - for source in loaded_sources - ], - }, - ) - # Keep warning buckets separate so Phase 3 can re-assemble them in the - # pre-decomp order: source → duplicate → artifact → placeholder → - # policy_pack → dedup. See _LoadedInputs docstring for the P3 rationale. - source_only_warnings: list[str] = [ - warning for loaded in loaded_sources for warning in loaded.warnings - ] - artifact_warnings_list: list[str] = _artifact_warnings(artifact_bag) - # Unresolved CHANGE_ME placeholders in the manifest mean the run is - # operating on stub data. Surface them as source warnings so the - # existing ``source_warning_count > 0`` branch in - # release_decision.evidence_coverage routes the gate to - # ``review_required`` and the packet §10 "Not proven" section - # mentions the placeholder verbatim. - placeholder_warnings: list[str] = _manifest_placeholder_warnings(config_path) - policy_packs = load_policy_packs( - manifest=manifest, - base_dir=base_dir, - cli_policy_packs=policy_pack_paths, - ) - policy_pack_warnings: list[str] = list(policy_packs.warnings) - return _LoadedInputs( - loaded_sources=loaded_sources, - artifact_bag=artifact_bag, - policy_packs=policy_packs, - source_only_warnings=source_only_warnings, - artifact_warnings_list=artifact_warnings_list, - placeholder_warnings=placeholder_warnings, - policy_pack_warnings=policy_pack_warnings, - loaded_adapters=loaded_adapters, - adk=artifact_bag.get("google_adk", GoogleAdkArtifacts), - langchain=artifact_bag.get("langchain", LangChainArtifacts), - crewai=artifact_bag.get("crewai", CrewAiArtifacts), - n8n=artifact_bag.get("n8n", N8nArtifacts), - api=artifact_bag.get("openai_api", OpenAIApiArtifacts), - anthropic=artifact_bag.get("anthropic_api", AnthropicArtifacts), - codex_plugin=artifact_bag.get("codex_plugin", CodexPluginArtifacts), - validation=artifact_bag.get("validation", ValidationArtifacts), - ) - - -def _build_tools_and_agent( - *, - manifest: AgentsShipgateManifest, - inputs: _LoadedInputs, -) -> _ToolsAndAgent: - """Phase 3: flatten/dedup tools with source priority, enrich with - manifest-derived risk hints, build the ``Agent`` object, finalize - the source-warnings list (dedup after appending the duplicate-tool - warnings from ``_flatten_and_deduplicate_tools``). - """ - tools, duplicate_warnings = _flatten_and_deduplicate_tools(inputs.loaded_sources) - # Assemble in pre-decomp order: source → duplicate → artifact → - # placeholder → policy_pack. Duplicate warnings MUST come immediately - # after per-source warnings (before artifact / placeholder / policy_pack) - # so ``report.source_warnings`` is byte-identical to pre-v0.19 output. - # (P3 fix: _LoadedInputs now carries separate buckets instead of a - # pre-assembled list so this interleaving is possible.) - warnings: list[str] = list(inputs.source_only_warnings) - warnings.extend(duplicate_warnings) - warnings.extend(inputs.artifact_warnings_list) - warnings.extend(inputs.placeholder_warnings) - warnings.extend(inputs.policy_pack_warnings) - # Some adapters expose the same warnings through both LoadedToolSource - # and the artifact bag; keep report warning output stable and unique. - warnings = list(dict.fromkeys(warnings)) - tools = enrich_tools_with_risk_hints(manifest, tools) - logger.debug( - "risk hints generated", - extra={ - "agents_shipgate_tools": [ - { - "name": tool.name, - "risk_hints": [ - {"tag": hint.tag, "confidence": hint.confidence, "source": hint.source} - for hint in tool.risk_hints - ], - } - for tool in tools - ] - }, - ) - agent = _build_agent( - manifest, tools, inputs.api, inputs.anthropic, inputs.adk - ) - return _ToolsAndAgent(tools=tools, agent=agent, warnings=warnings) - - -def _load_diff_references( - *, - baseline_path: Path | None, - diff_from_path: Path | None, - base_dir: Path, -) -> _DiffReferences: - """Phase 4: load optional baseline JSON + tool-surface diff reference. - - ``--diff-from`` wins over baseline-derived reference when both are - supplied. ``InputParseError`` from either path is caught and returned - as a string so the downstream diff is rendered as ``enabled=False`` - with a reviewer-visible note rather than aborting the scan. - """ - baseline_file = load_baseline(baseline_path) if baseline_path else None - baseline_display_path = ( - _relative_display_path(baseline_path, base_dir) if baseline_path else None - ) - diff_reference: ToolSurfaceDiffReference | None = None - diff_reference_error: str | None = None - try: - if diff_from_path: - diff_reference = load_tool_surface_diff_reference( - diff_from_path, - display_path=_relative_display_path(diff_from_path, base_dir), - ) - elif baseline_file: - diff_reference = reference_from_baseline( - baseline_file, - display_path=baseline_display_path, - ) - except InputParseError as exc: - diff_reference_error = str(exc) - return _DiffReferences( - baseline_file=baseline_file, - baseline_display_path=baseline_display_path, - diff_reference=diff_reference, - diff_reference_error=diff_reference_error, - ) - - -def _run_checks_and_decide( - *, - manifest: AgentsShipgateManifest, - manifest_positions: Any, - config_path: Path, - tools_and_agent: _ToolsAndAgent, - inputs: _LoadedInputs, - diffs: _DiffReferences, - plugins_enabled: bool | None, - suggest_patches: bool, -) -> _ChecksDecision: - """Phase 5: build internal action-surface facts, run all checks - (built-in + plugin + policy-pack + action-surface policies), - resolve severity overrides via the dynamic-default aggregator, - apply suppressions + optional patches, annotate remediation - metadata, snapshot ``legacy_fingerprints`` for pre-v0.18 baseline - compatibility. - - The INTERNAL ``action_surface_diff`` returned here is semantic - only — provenance enrichment happens later on the PUBLIC diff - derived from sanitized tools. Mutating ``reason`` here would leak - ``path:line`` into ``Finding.evidence``, churning fingerprints. - """ - action_surface_facts = build_action_surface_facts( - manifest, - agent_id=tools_and_agent.agent.id, - tools=tools_and_agent.tools, - ) - action_reference = action_reference_from_scan_reference(diffs.diff_reference) - action_surface_diff = compute_action_surface_diff( - action_surface_facts, - action_reference.facts if action_reference else None, - reference=action_reference, - ) - if diffs.diff_reference_error: - action_surface_diff.enabled = False - action_surface_diff.notes = [diffs.diff_reference_error] - context = ScanContext( - manifest=manifest, - agent=tools_and_agent.agent, - tools=tools_and_agent.tools, - config_path=config_path.resolve(), - framework_artifacts=inputs.artifact_bag, - action_surface_facts=action_surface_facts, - manifest_positions=manifest_positions, - ) - loaded_plugins: list[dict[str, str | None]] = [] - findings = run_checks( - context, - plugins_enabled=plugins_enabled, - loaded_plugins=loaded_plugins, - extra_known_check_ids={ - resolved.rule.id for resolved in inputs.policy_packs.rules - }, - ) - findings.extend(run_policy_pack_rules(context, inputs.policy_packs)) - findings.extend( - evaluate_action_surface_policies( - manifest, - action_surface_facts, - action_surface_diff, - agent_id=tools_and_agent.agent.id, - tools=tools_and_agent.tools, - ) - ) - findings = dedupe_findings(findings) - # v0.17 (M1) + v0.18 (PR #1): centralized aggregator covers every - # catalog check with ``dynamic_default=True``. See - # ``core/dynamic_defaults.py`` and ``severity_overrides.py`` for the - # tier-crossing / floor-enforcement contract. - catalog = check_catalog(plugins_enabled=plugins_enabled) - effective_dynamic_defaults = dynamic_check_defaults( - manifest, inputs.policy_packs, catalog=catalog - ) - override_resolution = resolve_severity_overrides( - overrides=manifest.severity_override_entries(), - acknowledgements=manifest.acknowledge_overrides(), - catalog=catalog, - extra_known_check_defaults=effective_dynamic_defaults, - ) - apply_severity_overrides(findings, override_resolution.override_by_check_id) - apply_suppressions(findings, manifest.checks.ignore) - if suggest_patches: - _attach_patches( - findings, - manifest, - config_path, - plugins_enabled=plugins_enabled, - ) - # v0.7: annotate every finding (regardless of --suggest-patches) with - # the four remediation fields. When patches are present they're - # derived from those; otherwise the per-check CheckMetadata seeds - # the values. - annotate_remediation( - findings, - _check_metadata_lookup(plugins_enabled=plugins_enabled), - ) - legacy_fingerprints = [finding_fingerprint(finding) for finding in findings] - logger.debug( - "checks completed", - extra={ - "agents_shipgate_finding_count": len(findings), - "agents_shipgate_suppressed_count": sum( - 1 for finding in findings if finding.suppressed - ), - }, - ) - return _ChecksDecision( - action_surface_facts=action_surface_facts, - action_surface_diff=action_surface_diff, - findings=findings, - legacy_fingerprints=legacy_fingerprints, - override_resolution=override_resolution, - loaded_plugins=loaded_plugins, - context=context, - ) - - -def _plan_outputs( - *, - manifest: AgentsShipgateManifest, - base_dir: Path, -) -> _OutputPlan: - """Phase 6: resolve output dir + planned file paths + packet format - set (filtering PDF if weasyprint is missing). Initialize the - ``RedactionStats`` accumulator and the already-redacted - ``generated_reports`` map needed by ``build_report`` downstream. - """ - out_dir = (base_dir / manifest.output.directory).resolve() - packet_cfg = manifest.output.packet - packet_format_set, packet_pdf_skipped = _resolve_packet_format_set(packet_cfg) - if packet_pdf_skipped: - # PDF availability is an *output renderer* concern, not a source - # loader concern. Routing it through `warnings` would inflate - # `evidence_coverage.source_warning_count` and add a noise - # residual to the packet's §10, telling reviewers to rerun the - # scan after fixing source warnings even when no source loader - # had a problem. Log it instead — same channel as runtime - # WeasyPrint failures in `_write_packet`. - logger.warning( - "packet.pdf requested but weasyprint is not installed; " - "install with `pipx install 'agents-shipgate[pdf]'` to " - "enable. Skipping PDF for this run." - ) - generated_paths = _planned_generated_paths( - out_dir, - manifest.output.formats, - packet_enabled=packet_cfg.enabled, - packet_formats=packet_format_set, - ) - privacy_stats = RedactionStats() - generated_report_refs = redact_data( - { - key: _relative_display_path(path, base_dir) - for key, path in generated_paths.items() - }, - stats=privacy_stats, - path="generated_reports", - ) - output_surfaces = list(generated_paths) - if os.environ.get("GITHUB_STEP_SUMMARY"): - output_surfaces.append("github_step_summary") - return _OutputPlan( - out_dir=out_dir, - generated_paths=generated_paths, - packet_format_set=packet_format_set, - output_surfaces=output_surfaces, - privacy_stats=privacy_stats, - generated_report_refs=generated_report_refs, - ) - - -def _sanitize_for_output( - *, - manifest: AgentsShipgateManifest, - config_path: Path, - baseline_path: Path | None, - inputs: _LoadedInputs, - tools_and_agent: _ToolsAndAgent, - diffs: _DiffReferences, - decision: _ChecksDecision, - plan: _OutputPlan, - plugins_enabled: bool | None, -) -> _SanitizedSurfaces: - """Phase 7: privacy redaction of every value that flows into a - report or packet — STABILITY contract: runs BEFORE any file is - written. Also: assign public finding IDs (redacted-evidence - fingerprints), apply baseline (with legacy-fingerprint - compatibility), run baseline-integrity checks, build public - tool/action surface facts + diffs (enriched with provenance from - the *public* tool source index, never from the raw one), build the - final privacy audit envelope. - - Returns a single ``_SanitizedSurfaces`` bundle. Nothing in later - phases re-redacts, and ``build_report`` / ``build_packet`` see only - these values. - """ - privacy_stats = plan.privacy_stats - - public_manifest = sanitize_model( - manifest, AgentsShipgateManifest, stats=privacy_stats, path="manifest" - ) - public_manifest_dir = redact_data( - str(config_path.resolve().parent), - stats=privacy_stats, - path="manifest_dir", - ) - public_api_artifacts = ( - sanitize_model( - inputs.api, - OpenAIApiArtifacts, - stats=privacy_stats, - path="api_artifacts", - ) - if inputs.api - else None - ) - public_anthropic_artifacts = ( - sanitize_model( - inputs.anthropic, - AnthropicArtifacts, - stats=privacy_stats, - path="anthropic_artifacts", - ) - if inputs.anthropic - else None - ) - public_validation_artifacts = ( - sanitize_model( - inputs.validation, - ValidationArtifacts, - stats=privacy_stats, - path="validation_artifacts", - ) - if inputs.validation - else None - ) - public_tools = sanitize_tools(tools_and_agent.tools, stats=privacy_stats) - public_findings = sanitize_findings(decision.findings, stats=privacy_stats) - assign_finding_ids(public_findings) - - public_project = redact_data( - public_manifest.project.model_dump(exclude_none=True), - stats=privacy_stats, - path="project", - ) - public_agent = sanitize_model( - tools_and_agent.agent, Agent, stats=privacy_stats, path="agent" - ) - public_environment = redact_data( - public_manifest.environment.model_dump(exclude_none=True), - stats=privacy_stats, - path="environment", - ) - public_source_warnings = redact_data( - tools_and_agent.warnings, - stats=privacy_stats, - path="source_warnings[]", - ) - public_api_surface = redact_data( - public_api_artifacts.surface_summary() if public_api_artifacts else None, - stats=privacy_stats, - path="api_surface", - ) - public_anthropic_surface = redact_data( - public_anthropic_artifacts.surface_summary() - if public_anthropic_artifacts - else None, - stats=privacy_stats, - path="anthropic_surface", - ) - public_frameworks_surface = redact_data( - _frameworks_surface( - inputs.adk, - inputs.langchain, - inputs.crewai, - inputs.n8n, - ), - stats=privacy_stats, - path="frameworks", - ) - public_codex_plugin_surface = _sanitize_codex_plugin_surface( - inputs.codex_plugin.surface_summary() if inputs.codex_plugin else None, - stats=privacy_stats, - ) - public_policy_audit = sanitize_model( - decision.override_resolution.audit, - PolicyAudit, - stats=privacy_stats, - path="policy_audit", - ) - public_loaded_policy_packs = [ - sanitize_model( - pack, - LoadedPolicyPack, - stats=privacy_stats, - path="loaded_policy_packs[]", - ) - for pack in inputs.policy_packs.loaded - ] - public_loaded_plugins = redact_data( - decision.loaded_plugins, - stats=privacy_stats, - path="loaded_plugins[]", - ) - # v0.20: third-party adapter provenance. Same redaction shape as - # loaded_plugins[] — entry-point ``value`` strings and distribution - # metadata are first-party and don't carry secrets, but the audit - # envelope flows through redact_data for forward-compat with future - # adapter-emitted fields. - public_loaded_adapters = redact_data( - inputs.loaded_adapters, - stats=privacy_stats, - path="loaded_adapters[]", - ) - - public_diff_reference = _sanitize_diff_reference( - diffs.diff_reference, - stats=privacy_stats, - ) - public_action_surface_facts = _build_public_action_surface_facts( - raw_facts=decision.action_surface_facts, - manifest=public_manifest, - agent_id=public_agent.id, - tools=public_tools, - stats=privacy_stats, - ) - public_action_reference = action_reference_from_scan_reference(public_diff_reference) - public_action_surface_diff = compute_action_surface_diff( - public_action_surface_facts, - public_action_reference.facts if public_action_reference else None, - reference=public_action_reference, - ) - if diffs.diff_reference_error: - public_action_surface_diff.enabled = False - public_action_surface_diff.notes = redact_data( - [diffs.diff_reference_error], - stats=privacy_stats, - path="action_surface_diff.notes", - ) - # v0.19 reviewer-grade provenance: enrich the PUBLIC action-surface - # diff rows from ``public_tools`` (already sanitized) so the - # rendered ``report.json`` and packet §3B carry tool source - # citations on every reason field. - enrich_action_surface_diff_with_source( - public_action_surface_diff, _tool_source_index(public_tools) - ) - - baseline_summary = None - if diffs.baseline_file and diffs.baseline_display_path: - baseline_summary = apply_baseline( - public_findings, - diffs.baseline_file, - display_path=diffs.baseline_display_path, - legacy_fingerprints=decision.legacy_fingerprints, - ) - baseline_summary = sanitize_model( - baseline_summary, - BaselineSummary, - stats=privacy_stats, - path="baseline", - ) - # v0.5 baseline-integrity (M2). Run this after public finding - # fingerprints are assigned so integrity output does not depend on - # raw secret-bearing finding IDs. - integrity_mode = manifest.baseline.integrity_mode - if integrity_mode != "off" and baseline_path is not None: - audit_log_path = _resolve_audit_log_path(manifest, baseline_path) - try: - static_issues = verify_baseline(baseline_path, audit_log_path) - except InputParseError as exc: - logger.warning( - "baseline integrity verification failed", - extra={ - "agents_shipgate_baseline_path": str(baseline_path), - "agents_shipgate_error": str(exc), - }, - ) - static_issues = [] - warning = f"Baseline integrity check skipped: {exc}" - public_source_warnings.append( - redact_data( - warning, - stats=privacy_stats, - path="source_warnings[]", - ) - ) - stale_issues = baseline_resolved_fingerprints( - public_findings, - diffs.baseline_file, - legacy_fingerprints=decision.legacy_fingerprints, - ) - baseline_privacy_hint = None - if stale_issues and privacy_stats.occurrence_count: - baseline_privacy_hint = ( - "If these stale baseline entries appeared immediately after " - "upgrading to report schema v0.18, review and regenerate the " - "baseline. Secret-bearing public fingerprints are now computed " - "from redacted evidence." - ) - for issue in stale_issues: - issue.evidence["v0_18_privacy_migration_hint"] = ( - baseline_privacy_hint - ) - integrity_findings = build_integrity_findings( - static_issues + stale_issues, - context=decision.context, - integrity_mode=integrity_mode, - ) - if baseline_privacy_hint: - for finding in integrity_findings: - if finding.check_id == "SHIP-BASELINE-ENTRY-STALE": - finding.recommendation = ( - f"{finding.recommendation} {baseline_privacy_hint}" - ) - if integrity_findings: - public_findings.extend( - sanitize_findings(integrity_findings, stats=privacy_stats) - ) - assign_finding_ids(public_findings) - annotate_remediation( - public_findings, - _check_metadata_lookup(plugins_enabled=plugins_enabled), - ) - attach_action_surface_finding_summary(public_action_surface_diff, public_findings) - - public_tool_surface_facts = sanitize_model( - build_tool_surface_facts( - public_manifest, - public_tools, - public_findings, - public_api_artifacts, - public_anthropic_artifacts, - ), - ToolSurfaceFacts, - stats=privacy_stats, - path="tool_surface_facts", - ) - if diffs.diff_reference_error: - public_tool_surface_diff = disabled_tool_surface_diff( - redact_data( - diffs.diff_reference_error, - stats=privacy_stats, - path="tool_surface_diff.notes", - ) - ) - else: - public_tool_surface_diff = compute_tool_surface_diff( - public_tool_surface_facts, - public_diff_reference.facts if public_diff_reference else None, - public_findings, - reference=public_diff_reference, - ) - # v0.19 reviewer-grade provenance: enrich tool-surface diff - # controls (and any other reason-bearing rows) with the public - # tool path:line citation so the rendered report.json and packet - # §3A carry source info on every change-row reason. - enrich_tool_surface_diff_with_source( - public_tool_surface_diff, _tool_source_index(public_tools) - ) - privacy_audit = build_privacy_audit( - privacy_stats, - output_surfaces=plan.output_surfaces, - notes=[ - "Default-on best-effort pattern/key redaction ran before public artifacts were written.", - "Redaction audit paths contain counts and secret kinds only; raw values and raw hashes are not emitted.", - *( - [ - "Baseline matching accepts legacy pre-v0.18 raw secret fingerprints for compatibility; re-save reviewed baselines to migrate to redacted public fingerprints." - ] - if diffs.baseline_file and privacy_stats.occurrence_count - else [] - ), - ], - ) - return _SanitizedSurfaces( - manifest=public_manifest, - manifest_dir=public_manifest_dir, - project=public_project, - environment=public_environment, - agent=public_agent, - tools=public_tools, - findings=public_findings, - source_warnings=public_source_warnings, - api_artifacts=public_api_artifacts, - anthropic_artifacts=public_anthropic_artifacts, - validation_artifacts=public_validation_artifacts, - api_surface=public_api_surface, - anthropic_surface=public_anthropic_surface, - frameworks_surface=public_frameworks_surface, - codex_plugin_surface=public_codex_plugin_surface, - policy_audit=public_policy_audit, - loaded_policy_packs=public_loaded_policy_packs, - loaded_plugins=public_loaded_plugins, - loaded_adapters=public_loaded_adapters, - diff_reference=public_diff_reference, - action_surface_facts=public_action_surface_facts, - action_surface_diff=public_action_surface_diff, - tool_surface_facts=public_tool_surface_facts, - tool_surface_diff=public_tool_surface_diff, - baseline_summary=baseline_summary, - privacy_audit=privacy_audit, - ) - - -def _build_final_report( - *, - manifest: AgentsShipgateManifest, - sanitized: _SanitizedSurfaces, - plan: _OutputPlan, -) -> tuple[ReadinessReport, Any]: - """Phase 8: hash the run_id, build the ``ReadinessReport`` from the - fully sanitized surfaces, run capability-diff enrichment, and - project the JSON payload that packet building consumes. - - The ``_run_id`` inputs are exactly what they were pre-decomp — - STABILITY contract requires byte-identical hashes for the same - workspace. - """ - report = build_report( - run_id=_run_id( - manifest, - sanitized.tools, - sanitized.findings, - project=sanitized.project, - agent_name=sanitized.agent.name, - environment=sanitized.environment, - api_surface=sanitized.api_surface, - anthropic_surface=sanitized.anthropic_surface, - frameworks=sanitized.frameworks_surface, - codex_plugin_surface=sanitized.codex_plugin_surface, - action_surface_facts=sanitized.action_surface_facts, - ), - manifest=sanitized.manifest, - project=sanitized.project, - manifest_dir=sanitized.manifest_dir, - agent=sanitized.agent.model_dump(exclude_none=True), - environment=sanitized.environment, - tools=sanitized.tools, - findings=sanitized.findings, - generated_reports=plan.generated_report_refs, - ci_mode=sanitized.manifest.ci.mode, - fail_on=sanitized.manifest.ci.fail_on, - new_findings_only=sanitized.baseline_summary is not None, - loaded_policy_packs=sanitized.loaded_policy_packs, - loaded_plugins=sanitized.loaded_plugins, - loaded_adapters=sanitized.loaded_adapters, - source_warnings=sanitized.source_warnings, - api_surface=sanitized.api_surface, - anthropic_surface=sanitized.anthropic_surface, - frameworks=sanitized.frameworks_surface, - codex_plugin_surface=sanitized.codex_plugin_surface, - baseline=sanitized.baseline_summary, - tool_surface_facts=sanitized.tool_surface_facts, - tool_surface_diff=sanitized.tool_surface_diff, - action_surface_facts=sanitized.action_surface_facts, - action_surface_diff=sanitized.action_surface_diff, - # v0.17 (M1): top-of-report policy audit. Always emitted (may - # be an empty envelope) so consumers can rely on the field - # existing in v0.17 reports. - policy_audit=sanitized.policy_audit, - privacy_audit=sanitized.privacy_audit, - ) - apply_capability_diff(report, sanitized.tools) - # v0.20: reviewer_summary is built HERE — after apply_capability_diff - # has populated misalignments / release_consequence / suggested_scenarios. - # Building it inside build_report() would project from incomplete state - # (capability_misalignments would always be 0). Pure projection, no I/O. - report.reviewer_summary = build_reviewer_summary( - findings=sanitized.findings, - report=report, - ) - public_report_payload = report_json_payload(report) - return report, public_report_payload - - -def _write_outputs( - *, - report: ReadinessReport, - public_report_payload: Any, - sanitized: _SanitizedSurfaces, - plan: _OutputPlan, - manifest: AgentsShipgateManifest, - config_path: Path, - packet_generated_at: str | None, -) -> None: - """Phase 9: write report (md/json/sarif) + packet (md/json/html/pdf). - - Both writes consume only sanitized values; the raw manifest is - passed to ``build_packet`` for non-output internal use (packet - builder reads manifest defaults like ``output.packet.formats`` but - never serializes raw manifest content into the packet). - """ - _write_reports(report, plan.generated_paths, manifest.output.formats) - if manifest.output.packet.enabled and plan.packet_format_set: - assert report.release_decision is not None - packet = build_packet( - manifest=manifest, - agent=report.agent, - project=report.project, - environment=report.environment, - run_id=report.run_id, - tools=sanitized.tools, - findings=sanitized.findings, - release_decision=report.release_decision, - api_artifacts=sanitized.api_artifacts, - anthropic_artifacts=sanitized.anthropic_artifacts, - source_warnings=sanitized.source_warnings, - validation_artifacts=sanitized.validation_artifacts, - tool_surface_diff=report.tool_surface_diff, - action_surface_diff=report.action_surface_diff, - report_payload=public_report_payload, - generated_at=packet_generated_at, - config_ref=config_path.resolve().name, - ) - _write_packet(packet, plan.generated_paths, plan.packet_format_set) - - -# ----------------------------------------------------------------------------- -# Public entry point. -# ----------------------------------------------------------------------------- - - -def run_scan( - *, - config_path: Path, - output_dir: Path | None = None, - formats: list[str] | None = None, - ci_mode: str | None = None, - fail_on: list[str] | None = None, - baseline_path: Path | None = None, - diff_from_path: Path | None = None, - baseline_mode: str = "new-findings", - deep_import: bool = False, - policy_pack_paths: list[Path] | None = None, - plugins_enabled: bool | None = None, - verbose: bool = False, - suggest_patches: bool = False, - packet_enabled: bool | None = None, - packet_formats: list[str] | None = None, - packet_generated_at: str | None = None, -) -> tuple[ReadinessReport, int]: - """Run a full scan pipeline. Returns ``(report, exit_code)``. - - Orchestrates nine sequential phases (see the phase helpers above). - Public signature, exit-code contract, and ``_run_id`` hash inputs - are stable across the v0.19 R-3 decomposition refactor. - """ - if deep_import: - raise ConfigError("Deep import is intentionally deferred and is not supported.") - - resolved = _prepare_scan( - config_path=config_path, - ci_mode=ci_mode, - fail_on=fail_on, - output_dir=output_dir, - formats=formats, - packet_enabled=packet_enabled, - packet_formats=packet_formats, - baseline_mode=baseline_mode, - ) - inputs = _load_inputs( - manifest=resolved.manifest, - base_dir=resolved.base_dir, - config_path=config_path, - policy_pack_paths=policy_pack_paths, - verbose=verbose, - plugins_enabled=plugins_enabled, - ) - tools_and_agent = _build_tools_and_agent( - manifest=resolved.manifest, - inputs=inputs, - ) - diffs = _load_diff_references( - baseline_path=baseline_path, - diff_from_path=diff_from_path, - base_dir=resolved.base_dir, - ) - decision = _run_checks_and_decide( - manifest=resolved.manifest, - manifest_positions=resolved.manifest_positions, - config_path=config_path, - tools_and_agent=tools_and_agent, - inputs=inputs, - diffs=diffs, - plugins_enabled=plugins_enabled, - suggest_patches=suggest_patches, - ) - plan = _plan_outputs( - manifest=resolved.manifest, - base_dir=resolved.base_dir, - ) - sanitized = _sanitize_for_output( - manifest=resolved.manifest, - config_path=config_path, - baseline_path=baseline_path, - inputs=inputs, - tools_and_agent=tools_and_agent, - diffs=diffs, - decision=decision, - plan=plan, - plugins_enabled=plugins_enabled, - ) - report, public_report_payload = _build_final_report( - manifest=resolved.manifest, - sanitized=sanitized, - plan=plan, - ) - _write_outputs( - report=report, - public_report_payload=public_report_payload, - sanitized=sanitized, - plan=plan, - manifest=resolved.manifest, - config_path=config_path, - packet_generated_at=packet_generated_at, - ) - write_github_step_summary(report) - assert report.release_decision is not None # build_report always populates it - return report, report.release_decision.fail_policy.exit_code - - - - -def inspect_sources( - *, - config_path: Path, - verbose: bool = False, - plugins_enabled: bool | None = None, -) -> dict[str, object]: - """``doctor``'s manifest-introspection entry point. - - v0.20 (PR #111 review fix follow-up #3): mirrors ``_load_inputs``'s - per-scan registry clone + adapter discovery so ``doctor`` can - inspect manifests that reference third-party source types. Before - this fix, the global ``REGISTRY`` was builtin-only (by design, - after the per-scan-registry refactor), so a manifest with - ``tool_sources[].type: demo_source`` scanned fine but ``doctor`` - raised ``ConfigError: No adapter registered``. - """ - - from agents_shipgate.inputs.protocol import discover_third_party_adapters - - manifest = load_manifest(config_path) - base_dir = config_path.resolve().parent - unresolved_sources = _resolve_source_paths(manifest, base_dir, config_path) - if unresolved_sources: - # Drop unresolved-required sources from the manifest before loading - # so doctor returns a structured payload with `unresolved_sources` - # instead of raising InputParseError. scan() does not use this path - # — its `_load_sources` call is unchanged and still raises. - unresolved_ids = {entry["id"] for entry in unresolved_sources} - manifest = manifest.model_copy( - update={ - "tool_sources": [ - src for src in manifest.tool_sources - if src.id not in unresolved_ids - ] - } - ) - # v0.20 (PR #111 review follow-up #3): build a per-scan registry - # for ``doctor`` so it sees the same adapter set as ``scan``. The - # global ``REGISTRY`` is builtin-only by design after the - # per-scan-clone refactor; without this discovery step, - # third-party source types would be unresolvable here. - scan_registry = REGISTRY.clone() - loaded_adapters: list[dict[str, Any]] = [] - discovery_records = discover_third_party_adapters( - scan_registry, - plugins_enabled=plugins_enabled, - loaded_adapters=loaded_adapters, - ) - third_party_records: dict[str, Any] = { - record.adapter.source_type: record - for record in discovery_records - if record.adapter is not None - } - loaded_sources, artifact_bag = _load_sources( - manifest, - base_dir, - verbose=verbose, - registry=scan_registry, - third_party_records=third_party_records, - plugins_enabled=plugins_enabled, - ) - adk_artifacts = artifact_bag.get("google_adk", GoogleAdkArtifacts) - langchain_artifacts = artifact_bag.get("langchain", LangChainArtifacts) - crewai_artifacts = artifact_bag.get("crewai", CrewAiArtifacts) - n8n_artifacts = artifact_bag.get("n8n", N8nArtifacts) - api_artifacts = artifact_bag.get("openai_api", OpenAIApiArtifacts) - anthropic_artifacts = artifact_bag.get("anthropic_api", AnthropicArtifacts) - codex_plugin_artifacts = artifact_bag.get("codex_plugin", CodexPluginArtifacts) - tools, duplicate_warnings = _flatten_and_deduplicate_tools(loaded_sources) - warnings = [warning for loaded in loaded_sources for warning in loaded.warnings] - warnings.extend(duplicate_warnings) - warnings.extend(_artifact_warnings(artifact_bag)) - policy_packs = load_policy_packs(manifest, base_dir) - warnings.extend(policy_packs.warnings) - # Some adapters expose the same warnings through both LoadedToolSource - # and the artifact bag; keep doctor warning output stable and unique. - warnings = list(dict.fromkeys(warnings)) - payload = { - "project": manifest.project.name, - "agent": manifest.agent.name, - "config": str(config_path), - "total_tools": len(tools), - "sources": [ - { - "id": source.source_id, - "type": source.source_type, - "tool_count": len(source.tools), - "sample_tool": source.tools[0].name if source.tools else None, - "warnings": source.warnings, - } - for source in loaded_sources - ], - "api_surface": api_artifacts.surface_summary() if api_artifacts else None, - "anthropic_surface": ( - anthropic_artifacts.surface_summary() if anthropic_artifacts else None - ), - "frameworks": _frameworks_surface( - adk_artifacts, - langchain_artifacts, - crewai_artifacts, - n8n_artifacts, - ), - "codex_plugin_surface": ( - codex_plugin_artifacts.surface_summary().model_dump(mode="json") - if codex_plugin_artifacts - else None - ), - "policy_packs": [pack.model_dump(mode="json") for pack in policy_packs.loaded], - # v0.20 (PR #111 review follow-up #3): surface third-party - # adapter discovery results in the doctor payload so the - # operator can confirm which extensions were loaded (or why - # they were skipped) without running a full scan. - "loaded_adapters": loaded_adapters, - "baseline": _default_baseline_status(base_dir), - "warnings": warnings, - "unresolved_sources": unresolved_sources, - "manifest_summary": { - "environment_target": manifest.environment.target, - "has_permissions": bool( - manifest.permissions.scopes or manifest.permissions.credential_mode - ), - "has_policies": bool( - manifest.policies.require_approval_for_tools - or manifest.policies.require_confirmation_for_tools - or manifest.policies.require_idempotency_for_tools - ), - "scope_count": len(manifest.permissions.scopes), - }, - } - return redact_data(payload, stats=RedactionStats(), path="$") - - -def _resolve_source_paths( - manifest, base_dir: Path, config_path: Path -) -> list[dict[str, object]]: - """Return required tool_sources whose declared path is unusable. - - Two failure modes are flagged so doctor can surface them as a - ``SHIP-DIAG-MISSING-SOURCE-FILE`` diagnostic instead of crashing in - a downstream loader: - - - ``reason="missing"`` — the file does not exist. - - ``reason="outside_manifest_dir"`` — the file exists but escapes the - manifest's containment boundary (loaders mirror this check and - would raise ``InputParseError``). - - Optional sources are not reported here — the existing - ``_load_sources`` flow handles them with a warning. Returned entries - carry the source id, the declared path string, the 1-indexed line - number in the manifest text where the path appears (best-effort), - and the failure reason. - """ - unresolved: list[dict[str, object]] = [] - try: - manifest_text = config_path.read_text(encoding="utf-8") - except (OSError, UnicodeDecodeError): - manifest_text = "" - text_lines = manifest_text.splitlines() - base_resolved = base_dir.resolve() - for source in manifest.tool_sources: - if source.optional: - continue - if source.path is None: - continue - raw_path = Path(source.path) - candidate = ( - raw_path if raw_path.is_absolute() else base_resolved / raw_path - ).resolve() - if not candidate.exists(): - reason = "missing" - else: - try: - candidate.relative_to(base_resolved) - except ValueError: - reason = "outside_manifest_dir" - else: - continue - line_no: int | None = None - needle = f"path: {source.path}" - for index, line in enumerate(text_lines, start=1): - if needle in line: - line_no = index - break - unresolved.append( - { - "id": source.id, - "declared_path": source.path, - "line": line_no, - "reason": reason, - } - ) - return unresolved - - -def _load_sources( - manifest: AgentsShipgateManifest, - base_dir: Path, - *, - verbose: bool, - registry: Any = None, - third_party_records: dict[str, Any] | None = None, - plugins_enabled: bool | None = None, -) -> tuple[list[LoadedToolSource], ArtifactBag]: - """Dispatch every adapter through the supplied ``registry``. - - Returns ``(loaded_sources, artifact_bag)``. ``artifact_bag`` is a - typed ``ArtifactBag`` with per-scan adapter artifacts keyed by - ``source_type``. Per-source adapters (mcp, openapi, - openai_agents_sdk) never populate artifacts. - - Ordering is deterministic and matches the legacy run_scan order: - - 1. per-source loaders in tool_sources declared order - 2. per-scan adapters in registry iteration order: - google_adk → langchain → crewai → n8n → openai_api - → anthropic_api → codex_plugin → validation - - Per-scan adapters are invoked unconditionally in pass 2, in - canonical order — NOT in tool_sources declared order. This matches - today's run_scan exactly: framework loaders fire once per scan in - fixed order, and the manifest-only loaders (openai_api, - anthropic_api) and codex_plugin trail them. - Per-scan source types appearing in tool_sources are ignored by - pass 1 — they would be redundant; framework loaders already iterate - every matching entry internally via the manifest. - - v0.20 (PR #111 review fix): ``registry`` is the per-scan registry - built by ``_load_inputs`` (``REGISTRY.clone()`` plus any - third-party adapters validated in this scan). Defaults to the - module-global ``REGISTRY`` only for callers that bypass - ``_load_inputs`` (notably the legacy tests in - ``tests/test_adapter_registry.py``). New code should always pass - a per-scan registry. - - v0.20 (PR #111 review fix follow-up #2): ``third_party_records`` - maps each validated third-party ``source_type`` to its - ``LoadedAdapter`` record (from ``discover_third_party_adapters``). - When set, the dispatcher routes those adapters through - ``run_validated_adapter`` so any exception during their - ``load()`` call is captured into - ``loaded_adapters[].runtime_errors`` and the scan continues in - lenient mode (or trips ``--strict-plugins`` exit 4 in strict - mode). Built-in adapters keep the direct call shape — a built-in - raising means the scanner itself is broken and must abort loudly. - - ``plugins_enabled`` is forwarded into ``AdapterRegistry.require`` so - unknown third-party source-type errors reflect explicit CLI - overrides such as ``--no-plugins`` instead of only inspecting the - environment. - """ - if registry is None: - registry = REGISTRY - if third_party_records is None: - third_party_records = {} - per_source_loaded: list[LoadedToolSource] = [] - per_scan_loaded: list[LoadedToolSource] = [] - bag = ArtifactBag() - - # Pass 1 — per-source adapters only, in tool_sources declared - # order. Per-scan source types (langchain, crewai, etc.) are - # skipped here; pass 2 invokes them in canonical registry order - # regardless of where they appear in tool_sources. This protects - # the dedup tie-break in _flatten_and_deduplicate_tools from - # changing based on user-facing tool_sources ordering. - for source in manifest.tool_sources: - adapter = registry.require(source.type, plugins_enabled=plugins_enabled) - if adapter.scope != "per_source": - continue - third_party_record = third_party_records.get(source.type) - result = _invoke_per_source_adapter( - adapter, - source, - base_dir, - manifest, - verbose=verbose, - third_party_record=third_party_record, - ) - if result is None: - # Third-party adapter raised at runtime; the wrapper - # captured the failure into runtime_errors and we skip - # absorbing the (None) result. - continue - _absorb(result, source.type, per_source_loaded, bag, adapter) - - # Pass 2 — every per-scan adapter fires once, in registry order. - # Covers framework adapters (always check their manifest section - # internally and may emit zero LoadedToolSource entries when not - # configured) and manifest-only adapters (openai_api, - # anthropic_api, n8n). - for adapter in registry.per_scan_adapters(): - third_party_record = third_party_records.get(adapter.source_type) - if third_party_record is not None: - from agents_shipgate.inputs.adapter_validation import ( - run_validated_adapter, - ) - - result = run_validated_adapter( - third_party_record, - source=None, - base_dir=base_dir, - manifest=manifest, - ) - if result is None: - continue - else: - result = adapter.load(None, base_dir, manifest) - _absorb(result, adapter.source_type, per_scan_loaded, bag, adapter) - - return per_source_loaded + per_scan_loaded, bag - - -def _tool_source_index( - tools: list[Tool], -) -> dict[str, tuple[str | None, int | None]]: - """Build a tool-name → ``(source_path, source_start_line)`` map for - surface-diff enrichment. - - Used by ``enrich_action_surface_diff_with_source`` and - ``enrich_tool_surface_diff_with_source`` to append - ``(source: path:line)`` to change-row ``reason`` strings, and by - the packet builder to suffix §3A / §3B highlights. Empty when the - tool list is empty so callers can rely on a boolean test. - """ - return { - tool.name: (tool.source_path, tool.source_start_line) - for tool in tools - } - - -def _artifact_warnings(artifact_bag: ArtifactBag) -> list[str]: - warnings: list[str] = [] - for artifact in artifact_bag.raw().values(): - artifact_warnings = getattr(artifact, "warnings", None) - if isinstance(artifact_warnings, list): - warnings.extend(str(warning) for warning in artifact_warnings) - return warnings - - -def _manifest_placeholder_warnings(config_path: Path) -> list[str]: - """Return source-warning strings for each ``CHANGE_ME`` placeholder - surviving in the manifest text. - - Doctor already surfaces these as ``SHIP-DIAG-CHANGE-ME-PLACEHOLDERS`` - diagnostics; the same fact also needs to flow into the scan so the - existing ``source_warning_count > 0 → review_required`` branch in - release_decision.evidence_coverage trips. Read failures (missing - file, non-UTF8 content) yield no warnings — the manifest loader runs - immediately before and will have already raised a structured error - in that case. - """ - try: - manifest_text = config_path.read_text(encoding="utf-8") - except (OSError, UnicodeDecodeError): - return [] - placeholders = collect_placeholders(manifest_text) - name = config_path.name - return [ - f"{name}:{entry['line']} — CHANGE_ME placeholder at " - f"{entry.get('path', '')!r}; replace before treating this " - "report as evidence." - for entry in placeholders - ] - - -def _absorb( - result: LoadedAdapterResult, - source_type: str, - sink: list[LoadedToolSource], - bag: ArtifactBag, - adapter: ToolSourceAdapter, -) -> None: - sink.extend(result.tool_sources) - if result.artifact is not None: - if adapter.artifact_class is not None and not isinstance( - result.artifact, adapter.artifact_class - ): - raise TypeError( - f"Adapter {adapter.source_type!r} declared " - f"artifact_class={adapter.artifact_class.__name__} but " - f"returned {type(result.artifact).__name__}" - ) - bag.set(source_type, result.artifact) - if result.warnings: - sink.append( - LoadedToolSource( - source_id=f"adapter:{source_type}", - source_type=source_type, - warnings=list(result.warnings), - ) - ) - - -def _invoke_per_source_adapter( - adapter: ToolSourceAdapter, - source: ToolSourceConfig, - base_dir: Path, - manifest: AgentsShipgateManifest, - *, - verbose: bool, - third_party_record: Any = None, -) -> LoadedAdapterResult | None: - """Invoke a per_source adapter and return its result. - - For **built-in** adapters: catch ``InputParseError`` only when the - source is marked ``optional`` (returning a warning-only stub); - any other exception propagates. A built-in raising means the - scanner is broken and must abort loudly. - - For **third-party** adapters (``third_party_record`` is the - matching ``LoadedAdapter``): route through - ``run_validated_adapter``, which captures ALL exceptions into the - record's ``runtime_errors`` list and returns ``None``. Returning - ``None`` signals the caller to skip ``_absorb`` for this source — - the scan continues in lenient mode and ``--strict-plugins`` sees - the runtime error on exit. - """ - - if third_party_record is not None: - from agents_shipgate.inputs.adapter_validation import ( - run_validated_adapter, - ) - - return run_validated_adapter( - third_party_record, - source=source, - base_dir=base_dir, - manifest=manifest, - ) - try: - return adapter.load(source, base_dir, manifest) - except InputParseError: - if source.optional: - warning = f"Optional source {source.id} failed to load" - if verbose: - warning = ( - f"{warning}; continuing because the source is marked optional" - ) - return LoadedAdapterResult( - tool_sources=[ - LoadedToolSource( - source_id=source.id, - source_type=source.type, - warnings=[warning], - ) - ], - ) - raise - - -def _flatten_and_deduplicate_tools( - loaded_sources: list[LoadedToolSource], -) -> tuple[list[Tool], list[str]]: - by_id: dict[str, Tool] = {} - warnings: list[str] = [] - for loaded in loaded_sources: - for tool in loaded.tools: - existing = by_id.get(tool.id) - if not existing: - by_id[tool.id] = tool - continue - if _source_priority(tool) > _source_priority(existing): - kept, dropped = tool, existing - else: - kept, dropped = existing, tool - by_id[tool.id] = _merge_duplicate_tool_metadata(kept, dropped) - warnings.append( - "Duplicate tool name " - f"{tool.name!r}; kept {kept.source_type} source {kept.source_id!r} " - f"and merged metadata from {dropped.source_type} source {dropped.source_id!r}." - ) - return list(by_id.values()), warnings - - -def _source_priority(tool: Tool) -> int: - # Anthropic and OpenAI artifacts are equally authoritative; on duplicate - # tool names across them the first-loaded entry wins (OpenAI is loaded - # first in run_scan), and a `Duplicate tool name` warning surfaces. - return { - "openai_api": 40, - "anthropic_api": 40, - "openapi": 30, - "google_adk_inventory": 25, - "langchain_inventory": 25, - "crewai_inventory": 25, - "codex_plugin_mcp_inventory": 25, - "n8n_inventory": 25, - "mcp": 20, - "google_adk_function": 10, - "langchain_function": 10, - "langchain_structured_tool": 10, - "crewai_function": 10, - "crewai_class_tool": 10, - "n8n_ai_tool": 10, - "n8n_workflow_tool": 10, - "n8n_code_tool": 10, - "n8n_http_tool": 10, - "n8n_mcp_client_tool": 10, - "sdk_function": 10, - "google_adk_config": 5, - "crewai_prebuilt_tool": 5, - }.get(tool.source_type, 0) - - -def _merge_duplicate_tool_metadata(kept: Tool, dropped: Tool) -> Tool: - merged = kept.model_copy(deep=True) - merged.annotations = {**dropped.annotations, **merged.annotations} - seen_hints = {_risk_hint_key(hint) for hint in merged.risk_hints} - for hint in dropped.risk_hints: - key = _risk_hint_key(hint) - if key in seen_hints: - continue - merged.risk_hints.append(hint.model_copy(deep=True)) - seen_hints.add(key) - merged.auth = merged.auth.model_copy(deep=True) - merged.auth.scopes = _merge_string_values(merged.auth.scopes, dropped.auth.scopes) - if not merged.auth.type: - merged.auth.type = dropped.auth.type - if not merged.auth.credential_mode: - merged.auth.credential_mode = dropped.auth.credential_mode - if not merged.auth.source and dropped.auth.source: - merged.auth.source = dropped.auth.source - if merged.owner is None: - merged.owner = dropped.owner - return merged - - -def _risk_hint_key(hint) -> tuple[str, str, str, str]: - evidence = json.dumps(hint.evidence, sort_keys=True, default=str) - return hint.tag, hint.source, hint.confidence, evidence - - -def _merge_string_values(primary: list[str], secondary: list[str]) -> list[str]: - merged: list[str] = [] - for value in [*primary, *secondary]: - if value not in merged: - merged.append(value) - return merged - - -def _build_agent( - manifest, - tools: list[Tool], - api_artifacts: OpenAIApiArtifacts | None = None, - anthropic_artifacts: AnthropicArtifacts | None = None, - adk_artifacts: GoogleAdkArtifacts | None = None, -) -> Agent: - sdk = manifest.agent.sdk - instructions_preview = manifest.agent.instructions_preview - instruction_source = "config" if instructions_preview else "dynamic_unknown" - instruction_confidence = "high" if instructions_preview else "medium" - if not instructions_preview and api_artifacts and api_artifacts.prompt_text: - instructions_preview = api_artifacts.prompt_text[:500] - instruction_source = "openai_api_prompt_files" - instruction_confidence = "high" - if ( - not instructions_preview - and anthropic_artifacts - and anthropic_artifacts.prompt_text - ): - instructions_preview = anthropic_artifacts.prompt_text[:500] - instruction_source = "anthropic_prompt_files" - instruction_confidence = "high" - if not instructions_preview and adk_artifacts: - adk_instruction = _first_adk_instruction_preview(adk_artifacts) - if adk_instruction: - instructions_preview = adk_instruction[:500] - instruction_source = "google_adk_static" - instruction_confidence = "medium" - return Agent( - id=f"agent:{manifest.project.name}/{manifest.agent.name}", - name=manifest.agent.name, - source=sdk.model_dump(exclude_none=True) if sdk else {"source": "manifest"}, - instructions={ - "value_preview": instructions_preview, - "source": instruction_source, - "confidence": instruction_confidence, - }, - declared_purpose=manifest.agent.declared_purpose, - prohibited_actions=manifest.agent.prohibited_actions, - tools=[tool.name for tool in tools], - guardrails={ - "input": "unknown", - "output": "unknown", - "tool": "unknown", - "source": "unknown", - }, - extraction={ - "method": "config_assisted", - "confidence": "medium", - "missing_fields": ["runtime_traces"], - "dynamic_fields": [], - }, - ) - - -def _first_adk_instruction_preview(adk_artifacts: GoogleAdkArtifacts) -> str | None: - for agent in adk_artifacts.agents: - value = agent.get("instruction_preview") - if isinstance(value, str) and value.strip(): - return value - return None - - -def _planned_generated_paths( - out_dir: Path, - formats: list[str], - *, - packet_enabled: bool = False, - packet_formats: set[str] | None = None, -) -> dict[str, Path]: - paths: dict[str, Path] = {} - if "markdown" in formats: - paths["markdown"] = out_dir / "report.md" - if "json" in formats: - paths["json"] = out_dir / "report.json" - if "sarif" in formats: - paths["sarif"] = out_dir / "report.sarif" - if packet_enabled and packet_formats: - if "md" in packet_formats: - paths["packet_md"] = out_dir / "packet.md" - if "json" in packet_formats: - paths["packet_json"] = out_dir / "packet.json" - if "html" in packet_formats: - paths["packet_html"] = out_dir / "packet.html" - if "pdf" in packet_formats: - paths["packet_pdf"] = out_dir / "packet.pdf" - return paths - - -def _write_reports( - report: ReadinessReport, paths: dict[str, Path], formats: list[str] -) -> None: - if "markdown" in formats and "markdown" in paths: - write_markdown_report(report, paths["markdown"]) - if "json" in formats and "json" in paths: - write_json_report(report, paths["json"]) - if "sarif" in formats and "sarif" in paths: - write_sarif_report(report, paths["sarif"]) - - -def _write_packet(packet, paths: dict[str, Path], packet_formats: set[str]) -> None: - if "md" in packet_formats and "packet_md" in paths: - write_packet_markdown(packet, paths["packet_md"]) - if "json" in packet_formats and "packet_json" in paths: - write_packet_json(packet, paths["packet_json"]) - if "html" in packet_formats and "packet_html" in paths: - write_packet_html(packet, paths["packet_html"]) - if "pdf" in packet_formats and "packet_pdf" in paths: - try: - render_packet_pdf(packet, paths["packet_pdf"]) - except PdfRendererUnavailable as exc: - logger.warning("packet.pdf skipped: %s", exc) - - -def _resolve_packet_format_set(packet_cfg) -> tuple[set[str], bool]: - """Resolve the writeable packet formats after probing weasyprint. - - Returns ``(formats, pdf_skipped)``: ``formats`` is the set of - format names that should actually be emitted; ``pdf_skipped`` is - ``True`` iff the user requested PDF but weasyprint is unavailable - on this install (so the caller can record a single warning). - """ - - requested = {fmt for fmt in packet_cfg.formats if fmt in PACKET_FORMAT_NAMES} - if not packet_cfg.enabled: - return set(), False - if "pdf" in requested and not is_pdf_available(): - return requested - {"pdf"}, True - return requested, False - - -def _relative_display_path(path: Path, base_dir: Path) -> str: - resolved = path.resolve() - base = base_dir.resolve() - rel = os.path.relpath(resolved, base) - if rel == ".." or rel.startswith(f"..{os.sep}"): - return str(resolved) - return rel - - -def _resolve_audit_log_path( - manifest: AgentsShipgateManifest, - baseline_path: Path, -) -> Path: - """Resolve the baseline audit log path. - - Resolution order: - 1. ``manifest.baseline.audit_log`` if set (relative paths resolved - against the baseline file's directory). - 2. Otherwise ``/baseline-audit.log`` — - co-located with the baseline JSON. This matches the default that - ``write_baseline`` uses, so save/verify see the same file - without configuration. - """ - override = manifest.baseline.audit_log - if override: - candidate = Path(override) - if not candidate.is_absolute(): - candidate = baseline_path.parent / candidate - return candidate - return baseline_path.parent / DEFAULT_AUDIT_LOG_PATH.name - - -def _check_metadata_lookup( - *, plugins_enabled: bool | None -) -> dict: - """Build a {check_id: CheckMetadata} lookup honoring the scan's - actual plugin setting. Used by ``annotate_remediation`` so the - serialized report's per-finding remediation fields reflect the - catalog the scan was run against. - - Avoids the late-stage plugin-loading hazard: by passing the lookup - *into* annotation, we never call ``check_catalog()`` at write time - where ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` could re-load plugins - even for ``--no-plugins`` scans. - """ - from agents_shipgate.checks.registry import check_catalog - - return { - check.id: check - for check in check_catalog(plugins_enabled=plugins_enabled) - } - - -def _attach_patches( - findings: list, - manifest, - config_path: Path, - *, - plugins_enabled: bool | None, -) -> None: - """Attach Patch objects to unsuppressed findings (per v0.6 plan §3). - - Suppressed findings are intentionally skipped — apply-patches must - not mutate entries the user marked ignored. - - Coverage rule: every active finding gets ≥ 1 patch (non-manual when - a generator exists, ManualPatch otherwise). Findings without - --suggest-patches keep ``patches=None`` (per C4) and are filtered - out of the JSON by ``report_json_payload``. - - Per the v0.7 PR 3 review: ``plugins_enabled`` is forwarded into - ``check_catalog`` so the recommendation lookup honors the scan's - explicit ``--no-plugins`` flag even when ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` - is set in the environment. Without this, the patch-attachment path - would load third-party plugin entry points before - ``annotate_remediation`` ran with its plugin-safe lookup. - """ - from agents_shipgate.checks.patches import ( - PatchContext, - generate_patches_for_finding, - ) - from agents_shipgate.checks.registry import check_catalog - - recommendation_lookup = { - check.id: check.recommendation - for check in check_catalog(plugins_enabled=plugins_enabled) - if check.recommendation - } - context = PatchContext( - manifest=manifest, - manifest_path=config_path, - recommendation_lookup=recommendation_lookup, - ) - for finding in findings: - if finding.suppressed: - continue - finding.patches = generate_patches_for_finding(context, finding) - - -def _run_id( - manifest, - tools: list[Tool], - findings, - project: dict[str, object] | None = None, - agent_name: str | None = None, - environment: dict[str, object] | None = None, - api_surface: dict[str, object] | None = None, - anthropic_surface: dict[str, object] | None = None, - frameworks: dict[str, object] | None = None, - codex_plugin_surface: CodexPluginSurface | None = None, - action_surface_facts: ActionSurfaceFacts | None = None, -) -> str: - payload = { - "project": project - if project is not None - else manifest.project.model_dump(mode="json", exclude_none=False), - "agent_name": agent_name if agent_name is not None else manifest.agent.name, - "environment": environment - if environment is not None - else manifest.environment.model_dump(mode="json", exclude_none=False), - "tool_inventory": tool_inventory(tools), - "findings": [ - finding.model_dump( - mode="json", - # Exclude derived-enrichment fields (per C11 + v0.7 - # review finding 2): patches and the four remediation - # fields are computed AFTER the input surface is - # known, so they MUST NOT enter the run_id hash. Two - # scans of the same workspace must produce the same - # run_id whether `--suggest-patches` is set or not, and - # whether v0.7 metadata is present or not. - exclude={ - "id": True, - "baseline_status": True, - "patches": True, - "autofix_safe": True, - "requires_human_review": True, - "suggested_patch_kind": True, - "docs_url": True, - "blocks_release": True, - # v0.12 derived enrichment: same exclusion rule as - # the v0.7 remediation fields above. agent_action is - # a deterministic projection of those fields, so - # excluding them already implies it should be - # excluded — but make it explicit so a future - # contributor doesn't have to trace the projection. - "agent_action": True, - # v0.11 provenance fields are excluded so YAML line - # drift cannot churn run_id; the legacy - # type/ref/location strings stay in the hash so - # existing run_ids remain stable. - "source": { - "path": True, - "start_line": True, - "end_line": True, - "start_column": True, - "pointer": True, - }, - # v0.19 reviewer-grade provenance: the secondary - # manifest pointer ``policy_evidence_source`` is - # excluded in its entirety. The whole field is - # additive (older scans never emitted it) and - # YAML line drift on the manifest must not churn - # run_id — same rationale as the v0.11 exclusion - # above. - "policy_evidence_source": True, - }, - exclude_none=False, - ) - for finding in findings - ], - "api_surface": api_surface, - "anthropic_surface": anthropic_surface, - "frameworks": frameworks or {}, - "codex_plugin_surface": ( - codex_plugin_surface.model_dump(mode="json") if codex_plugin_surface else None - ), - "action_surface_facts": ( - action_surface_facts.model_dump(mode="json") - if action_surface_facts is not None - else None - ), - } - digest = hashlib.sha256( - json.dumps(payload, sort_keys=True, default=str).encode("utf-8") - ).hexdigest()[:16] - return f"agents_shipgate_{digest}" - - -def _frameworks_surface( - adk_artifacts: GoogleAdkArtifacts | None, - langchain_artifacts: LangChainArtifacts | None = None, - crewai_artifacts: CrewAiArtifacts | None = None, - n8n_artifacts: N8nArtifacts | None = None, -) -> dict[str, object]: - surface: dict[str, object] = {} - if adk_artifacts: - surface["google_adk"] = adk_artifacts.surface_summary() - if langchain_artifacts: - surface["langchain"] = langchain_artifacts.surface_summary() - if crewai_artifacts: - surface["crewai"] = crewai_artifacts.surface_summary() - if n8n_artifacts: - surface["n8n"] = n8n_artifacts.surface_summary() - return surface - - -def _build_public_action_surface_facts( - *, - raw_facts: ActionSurfaceFacts, - manifest: AgentsShipgateManifest, - agent_id: str, - tools: list[Tool], - stats: RedactionStats, -) -> ActionSurfaceFacts: - try: - return sanitize_model( - build_action_surface_facts( - manifest, - agent_id=agent_id, - tools=tools, - ), - ActionSurfaceFacts, - stats=stats, - path="action_surface_facts", - ) - except ConfigError: - logger.debug( - "redacted action surface collapsed distinct raw action ids; " - "falling back to a sanitized raw snapshot with public-only " - "ordinal disambiguators" - ) - return _sanitize_existing_action_surface_facts( - raw_facts, - stats=stats, - path="action_surface_facts", - ) - - -def _sanitize_existing_action_surface_facts( - facts: ActionSurfaceFacts, - *, - stats: RedactionStats, - path: str, -) -> ActionSurfaceFacts: - public_facts = sanitize_model( - facts, - ActionSurfaceFacts, - stats=stats, - path=path, - ) - _disambiguate_public_action_ids(public_facts) - return public_facts - - -def _disambiguate_public_action_ids(facts: ActionSurfaceFacts) -> None: - seen: dict[str, int] = {} - for action in facts.actions: - count = seen.get(action.action_id, 0) + 1 - seen[action.action_id] = count - if count > 1: - action.action_id = f"{action.action_id}#{count}" - _refresh_public_action_hashes(action) - - -def _refresh_public_action_hashes(action: ActionFact) -> None: - schema_hash = _stable_hash( - { - "input_fields": action.input_fields, - "required_input_fields": action.required_input_fields, - } - ) - policy_hash = _stable_hash( - { - "approval": action.approval_policy.model_dump(mode="json"), - "safeguards": action.safeguards.model_dump(mode="json"), - "evidence": action.evidence.model_dump(mode="json"), - } - ) - risk_hash = _stable_hash( - { - "effect": action.effect, - "risk_tags": action.risk_tags, - "required_scopes": action.required_scopes, - } - ) - action.input_schema_hash = schema_hash - action.hashes = ActionSurfaceHashes( - identity_hash=_stable_hash(action.action_id), - schema_hash=schema_hash, - policy_hash=policy_hash, - risk_hash=risk_hash, - ) - - -def _sanitize_codex_plugin_surface( - surface: CodexPluginSurface | None, - *, - stats: RedactionStats, -) -> CodexPluginSurface | None: - if surface is None: - return None - return sanitize_model( - surface, - CodexPluginSurface, - stats=stats, - path="codex_plugin_surface", - ) - - -def _sanitize_diff_reference( - reference: ToolSurfaceDiffReference | None, - *, - stats: RedactionStats, -) -> ToolSurfaceDiffReference | None: - if reference is None: - return None - facts = ( - sanitize_model( - reference.facts, - ToolSurfaceFacts, - stats=stats, - path="tool_surface_diff.base.facts", - ) - if reference.facts is not None - else None - ) - action_facts = ( - _sanitize_existing_action_surface_facts( - reference.action_facts, - stats=stats, - path="action_surface_diff.base.facts", - ) - if reference.action_facts is not None - else None - ) - findings = ( - [ - item.__class__.model_validate( - redact_data( - item.model_dump(mode="python"), - stats=stats, - path="tool_surface_diff.base.findings[]", - ) - ) - for item in reference.findings - ] - if reference.findings is not None - else None - ) - return ToolSurfaceDiffReference( - kind=reference.kind, - facts=facts, - path=redact_data(reference.path, stats=stats, path="tool_surface_diff.base.path"), - report_schema_version=reference.report_schema_version, - baseline_schema_version=reference.baseline_schema_version, - action_facts=action_facts, - findings=findings, - notes=tuple( - redact_data( - list(reference.notes), - stats=stats, - path="tool_surface_diff.base.notes[]", - ) - ), - action_notes=tuple( - redact_data( - list(reference.action_notes), - stats=stats, - path="action_surface_diff.base.notes[]", - ) - ), - ) - - -def _default_baseline_status(base_dir: Path) -> dict[str, object]: - path = base_dir / ".agents-shipgate" / "baseline.json" - return { - "default_path": _relative_display_path(path, base_dir), - "present": path.exists(), - } diff --git a/src/agents_shipgate/cli/scan/__init__.py b/src/agents_shipgate/cli/scan/__init__.py new file mode 100644 index 00000000..f5ac3343 --- /dev/null +++ b/src/agents_shipgate/cli/scan/__init__.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +from .agent_builder import _build_agent, _first_adk_instruction_preview +from .decision import _run_checks_and_decide +from .diffs import _load_diff_references +from .final_report import _build_final_report +from .inputs import _load_inputs +from .inspect import inspect_sources +from .models import ( + _ChecksDecision, + _DiffReferences, + _LoadedInputs, + _OutputPlan, + _ResolvedManifest, + _SanitizedSurfaces, + _ToolsAndAgent, +) +from .orchestrator import run_scan +from .output_helpers import ( + PACKET_FORMAT_NAMES, + _planned_generated_paths, + _resolve_packet_format_set, + _write_packet, + _write_reports, +) +from .output_planning import _plan_outputs +from .patching import _attach_patches, _check_metadata_lookup +from .path_helpers import ( + _default_baseline_status, + _relative_display_path, + _resolve_audit_log_path, +) +from .prepare import _prepare_scan +from .run_identity import _run_id +from .sanitization import _sanitize_for_output +from .source_loading import ( + _absorb, + _artifact_warnings, + _flatten_and_deduplicate_tools, + _invoke_per_source_adapter, + _load_sources, + _merge_duplicate_tool_metadata, + _merge_string_values, + _risk_hint_key, + _source_priority, + _tool_source_index, +) +from .surface_redaction import ( + _build_public_action_surface_facts, + _disambiguate_public_action_ids, + _frameworks_surface, + _refresh_public_action_hashes, + _sanitize_codex_plugin_surface, + _sanitize_diff_reference, + _sanitize_existing_action_surface_facts, +) +from .tools_agent import _build_tools_and_agent +from .validation import _manifest_placeholder_warnings, _resolve_source_paths +from .writing import _write_outputs + +__all__ = [ + "PACKET_FORMAT_NAMES", + "_ChecksDecision", + "_DiffReferences", + "_LoadedInputs", + "_OutputPlan", + "_ResolvedManifest", + "_SanitizedSurfaces", + "_ToolsAndAgent", + "_absorb", + "_artifact_warnings", + "_attach_patches", + "_build_agent", + "_build_final_report", + "_build_public_action_surface_facts", + "_build_tools_and_agent", + "_check_metadata_lookup", + "_default_baseline_status", + "_disambiguate_public_action_ids", + "_first_adk_instruction_preview", + "_flatten_and_deduplicate_tools", + "_frameworks_surface", + "_invoke_per_source_adapter", + "_load_diff_references", + "_load_inputs", + "_load_sources", + "_manifest_placeholder_warnings", + "_merge_duplicate_tool_metadata", + "_merge_string_values", + "_plan_outputs", + "_planned_generated_paths", + "_prepare_scan", + "_refresh_public_action_hashes", + "_relative_display_path", + "_resolve_audit_log_path", + "_resolve_packet_format_set", + "_resolve_source_paths", + "_risk_hint_key", + "_run_checks_and_decide", + "_run_id", + "_sanitize_codex_plugin_surface", + "_sanitize_diff_reference", + "_sanitize_existing_action_surface_facts", + "_sanitize_for_output", + "_source_priority", + "_tool_source_index", + "_write_outputs", + "_write_packet", + "_write_reports", + "inspect_sources", + "run_scan", +] diff --git a/src/agents_shipgate/cli/scan/agent_builder.py b/src/agents_shipgate/cli/scan/agent_builder.py new file mode 100644 index 00000000..0ce30c22 --- /dev/null +++ b/src/agents_shipgate/cli/scan/agent_builder.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from agents_shipgate.core.artifact_models import ( + AnthropicArtifacts, + GoogleAdkArtifacts, + OpenAIApiArtifacts, +) +from agents_shipgate.core.domain import Agent, Tool + + +def _build_agent( + manifest, + tools: list[Tool], + api_artifacts: OpenAIApiArtifacts | None = None, + anthropic_artifacts: AnthropicArtifacts | None = None, + adk_artifacts: GoogleAdkArtifacts | None = None, +) -> Agent: + sdk = manifest.agent.sdk + instructions_preview = manifest.agent.instructions_preview + instruction_source = "config" if instructions_preview else "dynamic_unknown" + instruction_confidence = "high" if instructions_preview else "medium" + if not instructions_preview and api_artifacts and api_artifacts.prompt_text: + instructions_preview = api_artifacts.prompt_text[:500] + instruction_source = "openai_api_prompt_files" + instruction_confidence = "high" + if ( + not instructions_preview + and anthropic_artifacts + and anthropic_artifacts.prompt_text + ): + instructions_preview = anthropic_artifacts.prompt_text[:500] + instruction_source = "anthropic_prompt_files" + instruction_confidence = "high" + if not instructions_preview and adk_artifacts: + adk_instruction = _first_adk_instruction_preview(adk_artifacts) + if adk_instruction: + instructions_preview = adk_instruction[:500] + instruction_source = "google_adk_static" + instruction_confidence = "medium" + return Agent( + id=f"agent:{manifest.project.name}/{manifest.agent.name}", + name=manifest.agent.name, + source=sdk.model_dump(exclude_none=True) if sdk else {"source": "manifest"}, + instructions={ + "value_preview": instructions_preview, + "source": instruction_source, + "confidence": instruction_confidence, + }, + declared_purpose=manifest.agent.declared_purpose, + prohibited_actions=manifest.agent.prohibited_actions, + tools=[tool.name for tool in tools], + guardrails={ + "input": "unknown", + "output": "unknown", + "tool": "unknown", + "source": "unknown", + }, + extraction={ + "method": "config_assisted", + "confidence": "medium", + "missing_fields": ["runtime_traces"], + "dynamic_fields": [], + }, + ) + + +def _first_adk_instruction_preview(adk_artifacts: GoogleAdkArtifacts) -> str | None: + for agent in adk_artifacts.agents: + value = agent.get("instruction_preview") + if isinstance(value, str) and value.strip(): + return value + return None diff --git a/src/agents_shipgate/cli/scan/decision.py b/src/agents_shipgate/cli/scan/decision.py new file mode 100644 index 00000000..a0dcf9ab --- /dev/null +++ b/src/agents_shipgate/cli/scan/decision.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Any + +from agents_shipgate.checks.registry import check_catalog, run_checks +from agents_shipgate.core.context import ScanContext +from agents_shipgate.core.dynamic_defaults import dynamic_check_defaults +from agents_shipgate.core.findings.identity import dedupe_findings, finding_fingerprint +from agents_shipgate.core.findings.mutations import apply_severity_overrides, apply_suppressions +from agents_shipgate.core.findings.remediation import annotate_remediation +from agents_shipgate.core.severity_overrides import resolve_severity_overrides +from agents_shipgate.inputs.policy_packs import run_policy_pack_rules +from agents_shipgate.report.action_surface_diff import ( + action_reference_from_scan_reference, + build_action_surface_facts, + compute_action_surface_diff, + evaluate_action_surface_policies, +) +from agents_shipgate.schemas.manifest import AgentsShipgateManifest + +from .models import _ChecksDecision, _DiffReferences, _LoadedInputs, _ToolsAndAgent +from .patching import _attach_patches, _check_metadata_lookup + +logger = logging.getLogger(__name__) + +def _run_checks_and_decide( + *, + manifest: AgentsShipgateManifest, + manifest_positions: Any, + config_path: Path, + tools_and_agent: _ToolsAndAgent, + inputs: _LoadedInputs, + diffs: _DiffReferences, + plugins_enabled: bool | None, + suggest_patches: bool, +) -> _ChecksDecision: + """Phase 5: build internal action-surface facts, run all checks + (built-in + plugin + policy-pack + action-surface policies), + resolve severity overrides via the dynamic-default aggregator, + apply suppressions + optional patches, annotate remediation + metadata, snapshot ``legacy_fingerprints`` for pre-v0.18 baseline + compatibility. + + The INTERNAL ``action_surface_diff`` returned here is semantic + only — provenance enrichment happens later on the PUBLIC diff + derived from sanitized tools. Mutating ``reason`` here would leak + ``path:line`` into ``Finding.evidence``, churning fingerprints. + """ + action_surface_facts = build_action_surface_facts( + manifest, + agent_id=tools_and_agent.agent.id, + tools=tools_and_agent.tools, + ) + action_reference = action_reference_from_scan_reference(diffs.diff_reference) + action_surface_diff = compute_action_surface_diff( + action_surface_facts, + action_reference.facts if action_reference else None, + reference=action_reference, + ) + if diffs.diff_reference_error: + action_surface_diff.enabled = False + action_surface_diff.notes = [diffs.diff_reference_error] + context = ScanContext( + manifest=manifest, + agent=tools_and_agent.agent, + tools=tools_and_agent.tools, + config_path=config_path.resolve(), + framework_artifacts=inputs.artifact_bag, + action_surface_facts=action_surface_facts, + manifest_positions=manifest_positions, + ) + loaded_plugins: list[dict[str, str | None]] = [] + findings = run_checks( + context, + plugins_enabled=plugins_enabled, + loaded_plugins=loaded_plugins, + extra_known_check_ids={ + resolved.rule.id for resolved in inputs.policy_packs.rules + }, + ) + findings.extend(run_policy_pack_rules(context, inputs.policy_packs)) + findings.extend( + evaluate_action_surface_policies( + manifest, + action_surface_facts, + action_surface_diff, + agent_id=tools_and_agent.agent.id, + tools=tools_and_agent.tools, + ) + ) + findings = dedupe_findings(findings) + # v0.17 (M1) + v0.18 (PR #1): centralized aggregator covers every + # catalog check with ``dynamic_default=True``. See + # ``core/dynamic_defaults.py`` and ``severity_overrides.py`` for the + # tier-crossing / floor-enforcement contract. + catalog = check_catalog(plugins_enabled=plugins_enabled) + effective_dynamic_defaults = dynamic_check_defaults( + manifest, inputs.policy_packs, catalog=catalog + ) + override_resolution = resolve_severity_overrides( + overrides=manifest.severity_override_entries(), + acknowledgements=manifest.acknowledge_overrides(), + catalog=catalog, + extra_known_check_defaults=effective_dynamic_defaults, + ) + apply_severity_overrides(findings, override_resolution.override_by_check_id) + apply_suppressions(findings, manifest.checks.ignore) + if suggest_patches: + _attach_patches( + findings, + manifest, + config_path, + plugins_enabled=plugins_enabled, + ) + # v0.7: annotate every finding (regardless of --suggest-patches) with + # the four remediation fields. When patches are present they're + # derived from those; otherwise the per-check CheckMetadata seeds + # the values. + annotate_remediation( + findings, + _check_metadata_lookup(plugins_enabled=plugins_enabled), + ) + legacy_fingerprints = [finding_fingerprint(finding) for finding in findings] + logger.debug( + "checks completed", + extra={ + "agents_shipgate_finding_count": len(findings), + "agents_shipgate_suppressed_count": sum( + 1 for finding in findings if finding.suppressed + ), + }, + ) + return _ChecksDecision( + action_surface_facts=action_surface_facts, + action_surface_diff=action_surface_diff, + findings=findings, + legacy_fingerprints=legacy_fingerprints, + override_resolution=override_resolution, + loaded_plugins=loaded_plugins, + context=context, + ) diff --git a/src/agents_shipgate/cli/scan/diffs.py b/src/agents_shipgate/cli/scan/diffs.py new file mode 100644 index 00000000..f85a4d0b --- /dev/null +++ b/src/agents_shipgate/cli/scan/diffs.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from pathlib import Path + +from agents_shipgate.core.baseline import load_baseline +from agents_shipgate.core.errors import InputParseError +from agents_shipgate.report.tool_surface_diff import ( + ToolSurfaceDiffReference, + load_tool_surface_diff_reference, + reference_from_baseline, +) + +from .models import _DiffReferences +from .path_helpers import _relative_display_path + + +def _load_diff_references( + *, + baseline_path: Path | None, + diff_from_path: Path | None, + base_dir: Path, +) -> _DiffReferences: + """Phase 4: load optional baseline JSON + tool-surface diff reference. + + ``--diff-from`` wins over baseline-derived reference when both are + supplied. ``InputParseError`` from either path is caught and returned + as a string so the downstream diff is rendered as ``enabled=False`` + with a reviewer-visible note rather than aborting the scan. + """ + baseline_file = load_baseline(baseline_path) if baseline_path else None + baseline_display_path = ( + _relative_display_path(baseline_path, base_dir) if baseline_path else None + ) + diff_reference: ToolSurfaceDiffReference | None = None + diff_reference_error: str | None = None + try: + if diff_from_path: + diff_reference = load_tool_surface_diff_reference( + diff_from_path, + display_path=_relative_display_path(diff_from_path, base_dir), + ) + elif baseline_file: + diff_reference = reference_from_baseline( + baseline_file, + display_path=baseline_display_path, + ) + except InputParseError as exc: + diff_reference_error = str(exc) + return _DiffReferences( + baseline_file=baseline_file, + baseline_display_path=baseline_display_path, + diff_reference=diff_reference, + diff_reference_error=diff_reference_error, + ) diff --git a/src/agents_shipgate/cli/scan/final_report.py b/src/agents_shipgate/cli/scan/final_report.py new file mode 100644 index 00000000..51f82568 --- /dev/null +++ b/src/agents_shipgate/cli/scan/final_report.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from typing import Any + +from agents_shipgate.core.findings.report_builder import build_report +from agents_shipgate.core.findings.reviewer_summary import build_reviewer_summary +from agents_shipgate.report.capability_diff import apply_capability_diff +from agents_shipgate.report.json_report import report_json_payload +from agents_shipgate.schemas.manifest import AgentsShipgateManifest +from agents_shipgate.schemas.report import ReadinessReport + +from .models import _OutputPlan, _SanitizedSurfaces +from .run_identity import _run_id + + +def _build_final_report( + *, + manifest: AgentsShipgateManifest, + sanitized: _SanitizedSurfaces, + plan: _OutputPlan, +) -> tuple[ReadinessReport, Any]: + """Phase 8: hash the run_id, build the ``ReadinessReport`` from the + fully sanitized surfaces, run capability-diff enrichment, and + project the JSON payload that packet building consumes. + + The ``_run_id`` inputs are exactly what they were pre-decomp — + STABILITY contract requires byte-identical hashes for the same + workspace. + """ + report = build_report( + run_id=_run_id( + manifest, + sanitized.tools, + sanitized.findings, + project=sanitized.project, + agent_name=sanitized.agent.name, + environment=sanitized.environment, + api_surface=sanitized.api_surface, + anthropic_surface=sanitized.anthropic_surface, + frameworks=sanitized.frameworks_surface, + codex_plugin_surface=sanitized.codex_plugin_surface, + action_surface_facts=sanitized.action_surface_facts, + ), + manifest=sanitized.manifest, + project=sanitized.project, + manifest_dir=sanitized.manifest_dir, + agent=sanitized.agent.model_dump(exclude_none=True), + environment=sanitized.environment, + tools=sanitized.tools, + findings=sanitized.findings, + generated_reports=plan.generated_report_refs, + ci_mode=sanitized.manifest.ci.mode, + fail_on=sanitized.manifest.ci.fail_on, + new_findings_only=sanitized.baseline_summary is not None, + loaded_policy_packs=sanitized.loaded_policy_packs, + loaded_plugins=sanitized.loaded_plugins, + loaded_adapters=sanitized.loaded_adapters, + source_warnings=sanitized.source_warnings, + api_surface=sanitized.api_surface, + anthropic_surface=sanitized.anthropic_surface, + frameworks=sanitized.frameworks_surface, + codex_plugin_surface=sanitized.codex_plugin_surface, + baseline=sanitized.baseline_summary, + tool_surface_facts=sanitized.tool_surface_facts, + tool_surface_diff=sanitized.tool_surface_diff, + action_surface_facts=sanitized.action_surface_facts, + action_surface_diff=sanitized.action_surface_diff, + # v0.17 (M1): top-of-report policy audit. Always emitted (may + # be an empty envelope) so consumers can rely on the field + # existing in v0.17 reports. + policy_audit=sanitized.policy_audit, + privacy_audit=sanitized.privacy_audit, + ) + apply_capability_diff(report, sanitized.tools) + # v0.20: reviewer_summary is built HERE — after apply_capability_diff + # has populated misalignments / release_consequence / suggested_scenarios. + # Building it inside build_report() would project from incomplete state + # (capability_misalignments would always be 0). Pure projection, no I/O. + report.reviewer_summary = build_reviewer_summary( + findings=sanitized.findings, + report=report, + ) + public_report_payload = report_json_payload(report) + return report, public_report_payload diff --git a/src/agents_shipgate/cli/scan/inputs.py b/src/agents_shipgate/cli/scan/inputs.py new file mode 100644 index 00000000..a3690136 --- /dev/null +++ b/src/agents_shipgate/cli/scan/inputs.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Any + +from agents_shipgate.core.artifact_models import ( + AnthropicArtifacts, + CodexPluginArtifacts, + CrewAiArtifacts, + GoogleAdkArtifacts, + LangChainArtifacts, + N8nArtifacts, + OpenAIApiArtifacts, + ValidationArtifacts, +) +from agents_shipgate.inputs.policy_packs import load_policy_packs +from agents_shipgate.inputs.protocol import REGISTRY +from agents_shipgate.schemas.manifest import AgentsShipgateManifest + +from .models import _LoadedInputs +from .source_loading import _artifact_warnings, _load_sources +from .validation import _manifest_placeholder_warnings + +logger = logging.getLogger(__name__) + +def _load_inputs( + *, + manifest: AgentsShipgateManifest, + base_dir: Path, + config_path: Path, + policy_pack_paths: list[Path] | None, + verbose: bool, + plugins_enabled: bool | None = None, +) -> _LoadedInputs: + """Phase 2: dispatch every adapter through ``REGISTRY``, extract + typed artifacts from the ``ArtifactBag``, aggregate source warnings + (including CHANGE_ME placeholder warnings from the manifest text), + load policy packs. + + v0.20: also discovers third-party adapters from the + ``agents_shipgate.adapters`` entry-point group BEFORE + ``_load_sources`` runs, so the dispatcher resolves any + user-installed plugin source_types alongside built-ins. Discovery + is gated by ``plugins_enabled`` (mirroring the plugin-check flow + in ``checks/registry.py``). + """ + from agents_shipgate.inputs.protocol import discover_third_party_adapters + + # v0.20 (PR #111 review fix P1 #1+#2): build a per-scan registry + # clone so third-party discovery NEVER mutates the global + # ``REGISTRY``. Without this, a later ``--no-plugins`` scan would + # still see adapters registered by an earlier scan, and the + # collision check on scan-two would misclassify stable third- + # party adapters as ``source_type_collision`` (the global already + # has them from scan-one). The clone captures any monkeypatch + # state at this exact moment, so existing tests that + # ``monkeypatch.setitem(REGISTRY._adapters, …)`` still work. + scan_registry = REGISTRY.clone() + loaded_adapters: list[dict[str, Any]] = [] + discovery_records = discover_third_party_adapters( + scan_registry, + plugins_enabled=plugins_enabled, + loaded_adapters=loaded_adapters, + ) + # v0.20 (PR #111 review follow-up #2): map of source_type → valid + # LoadedAdapter record. Used by ``_load_sources`` to route + # third-party adapter ``load()`` calls through + # ``run_validated_adapter`` so runtime exceptions land in + # ``loaded_adapters[].runtime_errors`` instead of crashing the + # scan. Invalid records (validation_status != "valid") are + # excluded: they never registered on ``scan_registry`` and so the + # dispatcher will never reach them. + third_party_records: dict[str, Any] = { + record.adapter.source_type: record + for record in discovery_records + if record.adapter is not None + } + loaded_sources, artifact_bag = _load_sources( + manifest, + base_dir, + verbose=verbose, + registry=scan_registry, + third_party_records=third_party_records, + plugins_enabled=plugins_enabled, + ) + logger.debug( + "loaded sources", + extra={ + "agents_shipgate_source_count": len(loaded_sources), + "agents_shipgate_sources": [ + {"id": source.source_id, "type": source.source_type, "tools": len(source.tools)} + for source in loaded_sources + ], + }, + ) + # Keep warning buckets separate so Phase 3 can re-assemble them in the + # pre-decomp order: source → duplicate → artifact → placeholder → + # policy_pack → dedup. See _LoadedInputs docstring for the P3 rationale. + source_only_warnings: list[str] = [ + warning for loaded in loaded_sources for warning in loaded.warnings + ] + artifact_warnings_list: list[str] = _artifact_warnings(artifact_bag) + # Unresolved CHANGE_ME placeholders in the manifest mean the run is + # operating on stub data. Surface them as source warnings so the + # existing ``source_warning_count > 0`` branch in + # release_decision.evidence_coverage routes the gate to + # ``review_required`` and the packet §10 "Not proven" section + # mentions the placeholder verbatim. + placeholder_warnings: list[str] = _manifest_placeholder_warnings(config_path) + policy_packs = load_policy_packs( + manifest=manifest, + base_dir=base_dir, + cli_policy_packs=policy_pack_paths, + ) + policy_pack_warnings: list[str] = list(policy_packs.warnings) + return _LoadedInputs( + loaded_sources=loaded_sources, + artifact_bag=artifact_bag, + policy_packs=policy_packs, + source_only_warnings=source_only_warnings, + artifact_warnings_list=artifact_warnings_list, + placeholder_warnings=placeholder_warnings, + policy_pack_warnings=policy_pack_warnings, + loaded_adapters=loaded_adapters, + adk=artifact_bag.get("google_adk", GoogleAdkArtifacts), + langchain=artifact_bag.get("langchain", LangChainArtifacts), + crewai=artifact_bag.get("crewai", CrewAiArtifacts), + n8n=artifact_bag.get("n8n", N8nArtifacts), + api=artifact_bag.get("openai_api", OpenAIApiArtifacts), + anthropic=artifact_bag.get("anthropic_api", AnthropicArtifacts), + codex_plugin=artifact_bag.get("codex_plugin", CodexPluginArtifacts), + validation=artifact_bag.get("validation", ValidationArtifacts), + ) diff --git a/src/agents_shipgate/cli/scan/inspect.py b/src/agents_shipgate/cli/scan/inspect.py new file mode 100644 index 00000000..50837dfc --- /dev/null +++ b/src/agents_shipgate/cli/scan/inspect.py @@ -0,0 +1,159 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from agents_shipgate.config.loader import load_manifest +from agents_shipgate.core.artifact_models import ( + AnthropicArtifacts, + CodexPluginArtifacts, + CrewAiArtifacts, + GoogleAdkArtifacts, + LangChainArtifacts, + N8nArtifacts, + OpenAIApiArtifacts, +) +from agents_shipgate.core.privacy import RedactionStats, redact_data +from agents_shipgate.inputs.policy_packs import load_policy_packs +from agents_shipgate.inputs.protocol import REGISTRY + +from .path_helpers import _default_baseline_status +from .source_loading import ( + _artifact_warnings, + _flatten_and_deduplicate_tools, + _load_sources, +) +from .surface_redaction import _frameworks_surface +from .validation import _resolve_source_paths + + +def inspect_sources( + *, + config_path: Path, + verbose: bool = False, + plugins_enabled: bool | None = None, +) -> dict[str, object]: + """``doctor``'s manifest-introspection entry point. + + v0.20 (PR #111 review fix follow-up #3): mirrors ``_load_inputs``'s + per-scan registry clone + adapter discovery so ``doctor`` can + inspect manifests that reference third-party source types. Before + this fix, the global ``REGISTRY`` was builtin-only (by design, + after the per-scan-registry refactor), so a manifest with + ``tool_sources[].type: demo_source`` scanned fine but ``doctor`` + raised ``ConfigError: No adapter registered``. + """ + + from agents_shipgate.inputs.protocol import discover_third_party_adapters + + manifest = load_manifest(config_path) + base_dir = config_path.resolve().parent + unresolved_sources = _resolve_source_paths(manifest, base_dir, config_path) + if unresolved_sources: + # Drop unresolved-required sources from the manifest before loading + # so doctor returns a structured payload with `unresolved_sources` + # instead of raising InputParseError. scan() does not use this path + # — its `_load_sources` call is unchanged and still raises. + unresolved_ids = {entry["id"] for entry in unresolved_sources} + manifest = manifest.model_copy( + update={ + "tool_sources": [ + src for src in manifest.tool_sources + if src.id not in unresolved_ids + ] + } + ) + # v0.20 (PR #111 review follow-up #3): build a per-scan registry + # for ``doctor`` so it sees the same adapter set as ``scan``. The + # global ``REGISTRY`` is builtin-only by design after the + # per-scan-clone refactor; without this discovery step, + # third-party source types would be unresolvable here. + scan_registry = REGISTRY.clone() + loaded_adapters: list[dict[str, Any]] = [] + discovery_records = discover_third_party_adapters( + scan_registry, + plugins_enabled=plugins_enabled, + loaded_adapters=loaded_adapters, + ) + third_party_records: dict[str, Any] = { + record.adapter.source_type: record + for record in discovery_records + if record.adapter is not None + } + loaded_sources, artifact_bag = _load_sources( + manifest, + base_dir, + verbose=verbose, + registry=scan_registry, + third_party_records=third_party_records, + plugins_enabled=plugins_enabled, + ) + adk_artifacts = artifact_bag.get("google_adk", GoogleAdkArtifacts) + langchain_artifacts = artifact_bag.get("langchain", LangChainArtifacts) + crewai_artifacts = artifact_bag.get("crewai", CrewAiArtifacts) + n8n_artifacts = artifact_bag.get("n8n", N8nArtifacts) + api_artifacts = artifact_bag.get("openai_api", OpenAIApiArtifacts) + anthropic_artifacts = artifact_bag.get("anthropic_api", AnthropicArtifacts) + codex_plugin_artifacts = artifact_bag.get("codex_plugin", CodexPluginArtifacts) + tools, duplicate_warnings = _flatten_and_deduplicate_tools(loaded_sources) + warnings = [warning for loaded in loaded_sources for warning in loaded.warnings] + warnings.extend(duplicate_warnings) + warnings.extend(_artifact_warnings(artifact_bag)) + policy_packs = load_policy_packs(manifest, base_dir) + warnings.extend(policy_packs.warnings) + # Some adapters expose the same warnings through both LoadedToolSource + # and the artifact bag; keep doctor warning output stable and unique. + warnings = list(dict.fromkeys(warnings)) + payload = { + "project": manifest.project.name, + "agent": manifest.agent.name, + "config": str(config_path), + "total_tools": len(tools), + "sources": [ + { + "id": source.source_id, + "type": source.source_type, + "tool_count": len(source.tools), + "sample_tool": source.tools[0].name if source.tools else None, + "warnings": source.warnings, + } + for source in loaded_sources + ], + "api_surface": api_artifacts.surface_summary() if api_artifacts else None, + "anthropic_surface": ( + anthropic_artifacts.surface_summary() if anthropic_artifacts else None + ), + "frameworks": _frameworks_surface( + adk_artifacts, + langchain_artifacts, + crewai_artifacts, + n8n_artifacts, + ), + "codex_plugin_surface": ( + codex_plugin_artifacts.surface_summary().model_dump(mode="json") + if codex_plugin_artifacts + else None + ), + "policy_packs": [pack.model_dump(mode="json") for pack in policy_packs.loaded], + # v0.20 (PR #111 review follow-up #3): surface third-party + # adapter discovery results in the doctor payload so the + # operator can confirm which extensions were loaded (or why + # they were skipped) without running a full scan. + "loaded_adapters": loaded_adapters, + "baseline": _default_baseline_status(base_dir), + "warnings": warnings, + "unresolved_sources": unresolved_sources, + "manifest_summary": { + "environment_target": manifest.environment.target, + "has_permissions": bool( + manifest.permissions.scopes or manifest.permissions.credential_mode + ), + "has_policies": bool( + manifest.policies.require_approval_for_tools + or manifest.policies.require_confirmation_for_tools + or manifest.policies.require_idempotency_for_tools + ), + "scope_count": len(manifest.permissions.scopes), + }, + } + return redact_data(payload, stats=RedactionStats(), path="$") diff --git a/src/agents_shipgate/cli/scan/models.py b/src/agents_shipgate/cli/scan/models.py new file mode 100644 index 00000000..fa0fa1f5 --- /dev/null +++ b/src/agents_shipgate/cli/scan/models.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from agents_shipgate.core.artifact_models import ( + AnthropicArtifacts, + CodexPluginArtifacts, + CrewAiArtifacts, + GoogleAdkArtifacts, + LangChainArtifacts, + N8nArtifacts, + OpenAIApiArtifacts, + ValidationArtifacts, +) +from agents_shipgate.core.artifacts import ArtifactBag +from agents_shipgate.core.context import ScanContext +from agents_shipgate.core.domain import Agent, LoadedToolSource, Tool +from agents_shipgate.core.privacy import RedactionStats +from agents_shipgate.report.tool_surface_diff import ToolSurfaceDiffReference +from agents_shipgate.schemas.codex_plugin import CodexPluginSurface +from agents_shipgate.schemas.manifest import AgentsShipgateManifest +from agents_shipgate.schemas.report import PolicyAudit +from agents_shipgate.schemas.surfaces import ActionSurfaceFacts + + +@dataclass(frozen=True) +class _ResolvedManifest: + """Phase 1 output: manifest after CLI overrides applied.""" + + manifest: AgentsShipgateManifest + manifest_positions: Any + base_dir: Path + + +@dataclass(frozen=True) +class _LoadedInputs: + """Phase 2 output: source loading + artifact extraction + warnings. + + Warning buckets are kept separate so Phase 3 (``_build_tools_and_agent``) + can interleave ``duplicate_warnings`` from + ``_flatten_and_deduplicate_tools`` between ``source_only_warnings`` and + ``artifact_warnings``, preserving the pre-decomp deterministic order: + + source → duplicate → artifact → placeholder → policy_pack → dedup + + Collapsing them into a single ``warnings`` list here (the P3 bug that + this split fixes) would push duplicate warnings to the end, changing + ``report.source_warnings`` order for fixtures with both duplicate-tool + names and artifact/policy-pack warnings. + """ + + loaded_sources: list[LoadedToolSource] + artifact_bag: ArtifactBag + policy_packs: Any # LoadedPolicyPacks + source_only_warnings: list[str] # per-source warnings, no dedup yet + artifact_warnings_list: list[str] # from _artifact_warnings(artifact_bag) + placeholder_warnings: list[str] # from _manifest_placeholder_warnings + policy_pack_warnings: list[str] # from policy_packs.warnings + # v0.20: third-party adapter provenance from + # ``discover_third_party_adapters``. Both valid and invalid records + # appear here; ``loaded_adapters[].validation_status == "valid"`` + # distinguishes them. Empty list when --no-plugins is set or no + # third-party adapters are installed. + loaded_adapters: list[dict[str, Any]] + adk: GoogleAdkArtifacts | None + langchain: LangChainArtifacts | None + crewai: CrewAiArtifacts | None + n8n: N8nArtifacts | None + api: OpenAIApiArtifacts | None + anthropic: AnthropicArtifacts | None + codex_plugin: CodexPluginArtifacts | None + validation: ValidationArtifacts | None + + +@dataclass(frozen=True) +class _ToolsAndAgent: + """Phase 3 output: flattened/deduped/enriched tools + Agent + final warnings.""" + + tools: list[Tool] + agent: Agent + warnings: list[str] # deduplicated source warnings + + +@dataclass(frozen=True) +class _DiffReferences: + """Phase 4 output: optional baseline + diff_from references.""" + + baseline_file: Any # BaselineFile | None + baseline_display_path: str | None + diff_reference: ToolSurfaceDiffReference | None + diff_reference_error: str | None + + +@dataclass(frozen=True) +class _ChecksDecision: + """Phase 5 output: action surface + checks + severity + remediation.""" + + action_surface_facts: ActionSurfaceFacts + action_surface_diff: Any # ActionSurfaceDiff (internal/semantic) + findings: list[Any] # list[Finding] + legacy_fingerprints: list[str] + override_resolution: Any # SeverityOverrideResolution + loaded_plugins: list[dict[str, str | None]] + context: ScanContext + + +@dataclass(frozen=True) +class _OutputPlan: + """Phase 6 output: file paths + packet format set + privacy stats. + + ``privacy_stats`` is intentionally mutable — the sanitization phase + accumulates redaction counts into it. The dataclass is ``frozen`` only + in the sense that the field bindings don't change; the contained + ``RedactionStats`` mutates in place. + """ + + out_dir: Path + generated_paths: dict[str, Path] + packet_format_set: set[str] + output_surfaces: list[str] + privacy_stats: RedactionStats + generated_report_refs: Any + + +@dataclass +class _SanitizedSurfaces: + """Phase 7 output: every ``public_*`` value flowing into report/packet. + + Not frozen — the baseline-integrity branch appends to ``findings`` + in place and refreshes derivative fields. After Phase 7 returns, + every value here has been passed through ``redact_data`` / + ``sanitize_*`` exactly once. Phase 8+ (``build_report`` / + ``build_packet`` / ``_write_*``) MUST NOT re-redact and MUST NOT + touch any raw (non-``public_*``) value. + """ + + manifest: AgentsShipgateManifest + manifest_dir: str + project: Any + environment: Any + agent: Agent + tools: list[Tool] + findings: list[Any] + source_warnings: list[str] + api_artifacts: OpenAIApiArtifacts | None + anthropic_artifacts: AnthropicArtifacts | None + validation_artifacts: ValidationArtifacts | None + api_surface: Any + anthropic_surface: Any + frameworks_surface: Any + codex_plugin_surface: CodexPluginSurface | None + policy_audit: PolicyAudit + loaded_policy_packs: list[Any] + loaded_plugins: Any + loaded_adapters: Any # v0.20: list[dict[str, Any]]; sanitized via redact_data + diff_reference: ToolSurfaceDiffReference | None + action_surface_facts: ActionSurfaceFacts + action_surface_diff: Any + tool_surface_facts: Any + tool_surface_diff: Any + baseline_summary: Any + privacy_audit: Any diff --git a/src/agents_shipgate/cli/scan/orchestrator.py b/src/agents_shipgate/cli/scan/orchestrator.py new file mode 100644 index 00000000..1d0ddde5 --- /dev/null +++ b/src/agents_shipgate/cli/scan/orchestrator.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +from pathlib import Path + +from agents_shipgate.ci.github_summary import write_github_step_summary +from agents_shipgate.core.errors import ConfigError +from agents_shipgate.schemas.report import ReadinessReport + +from .decision import _run_checks_and_decide +from .diffs import _load_diff_references +from .final_report import _build_final_report +from .inputs import _load_inputs +from .output_planning import _plan_outputs +from .prepare import _prepare_scan +from .sanitization import _sanitize_for_output +from .tools_agent import _build_tools_and_agent +from .writing import _write_outputs + + +def run_scan( + *, + config_path: Path, + output_dir: Path | None = None, + formats: list[str] | None = None, + ci_mode: str | None = None, + fail_on: list[str] | None = None, + baseline_path: Path | None = None, + diff_from_path: Path | None = None, + baseline_mode: str = "new-findings", + deep_import: bool = False, + policy_pack_paths: list[Path] | None = None, + plugins_enabled: bool | None = None, + verbose: bool = False, + suggest_patches: bool = False, + packet_enabled: bool | None = None, + packet_formats: list[str] | None = None, + packet_generated_at: str | None = None, +) -> tuple[ReadinessReport, int]: + """Run a full scan pipeline. Returns ``(report, exit_code)``. + + Orchestrates nine sequential phases (see the phase helpers above). + Public signature, exit-code contract, and ``_run_id`` hash inputs + are stable across the v0.19 R-3 decomposition refactor. + """ + if deep_import: + raise ConfigError("Deep import is intentionally deferred and is not supported.") + + resolved = _prepare_scan( + config_path=config_path, + ci_mode=ci_mode, + fail_on=fail_on, + output_dir=output_dir, + formats=formats, + packet_enabled=packet_enabled, + packet_formats=packet_formats, + baseline_mode=baseline_mode, + ) + inputs = _load_inputs( + manifest=resolved.manifest, + base_dir=resolved.base_dir, + config_path=config_path, + policy_pack_paths=policy_pack_paths, + verbose=verbose, + plugins_enabled=plugins_enabled, + ) + tools_and_agent = _build_tools_and_agent( + manifest=resolved.manifest, + inputs=inputs, + ) + diffs = _load_diff_references( + baseline_path=baseline_path, + diff_from_path=diff_from_path, + base_dir=resolved.base_dir, + ) + decision = _run_checks_and_decide( + manifest=resolved.manifest, + manifest_positions=resolved.manifest_positions, + config_path=config_path, + tools_and_agent=tools_and_agent, + inputs=inputs, + diffs=diffs, + plugins_enabled=plugins_enabled, + suggest_patches=suggest_patches, + ) + plan = _plan_outputs( + manifest=resolved.manifest, + base_dir=resolved.base_dir, + ) + sanitized = _sanitize_for_output( + manifest=resolved.manifest, + config_path=config_path, + baseline_path=baseline_path, + inputs=inputs, + tools_and_agent=tools_and_agent, + diffs=diffs, + decision=decision, + plan=plan, + plugins_enabled=plugins_enabled, + ) + report, public_report_payload = _build_final_report( + manifest=resolved.manifest, + sanitized=sanitized, + plan=plan, + ) + _write_outputs( + report=report, + public_report_payload=public_report_payload, + sanitized=sanitized, + plan=plan, + manifest=resolved.manifest, + config_path=config_path, + packet_generated_at=packet_generated_at, + ) + write_github_step_summary(report) + assert report.release_decision is not None # build_report always populates it + return report, report.release_decision.fail_policy.exit_code diff --git a/src/agents_shipgate/cli/scan/output_helpers.py b/src/agents_shipgate/cli/scan/output_helpers.py new file mode 100644 index 00000000..e67dd057 --- /dev/null +++ b/src/agents_shipgate/cli/scan/output_helpers.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import logging +from pathlib import Path + +from agents_shipgate.packet.html import write_packet_html +from agents_shipgate.packet.json_packet import write_packet_json +from agents_shipgate.packet.markdown import write_packet_markdown +from agents_shipgate.packet.pdf import ( + PdfRendererUnavailable, + is_pdf_available, + render_packet_pdf, +) +from agents_shipgate.report.json_report import write_json_report +from agents_shipgate.report.markdown import write_markdown_report +from agents_shipgate.report.sarif import write_sarif_report +from agents_shipgate.schemas.report import ReadinessReport + +PACKET_FORMAT_NAMES = {"md", "json", "html", "pdf"} +"""Allowed values for ``--packet-format`` and ``output.packet.formats``.""" + +logger = logging.getLogger(__name__) + +def _planned_generated_paths( + out_dir: Path, + formats: list[str], + *, + packet_enabled: bool = False, + packet_formats: set[str] | None = None, +) -> dict[str, Path]: + paths: dict[str, Path] = {} + if "markdown" in formats: + paths["markdown"] = out_dir / "report.md" + if "json" in formats: + paths["json"] = out_dir / "report.json" + if "sarif" in formats: + paths["sarif"] = out_dir / "report.sarif" + if packet_enabled and packet_formats: + if "md" in packet_formats: + paths["packet_md"] = out_dir / "packet.md" + if "json" in packet_formats: + paths["packet_json"] = out_dir / "packet.json" + if "html" in packet_formats: + paths["packet_html"] = out_dir / "packet.html" + if "pdf" in packet_formats: + paths["packet_pdf"] = out_dir / "packet.pdf" + return paths + + +def _write_reports( + report: ReadinessReport, paths: dict[str, Path], formats: list[str] +) -> None: + if "markdown" in formats and "markdown" in paths: + write_markdown_report(report, paths["markdown"]) + if "json" in formats and "json" in paths: + write_json_report(report, paths["json"]) + if "sarif" in formats and "sarif" in paths: + write_sarif_report(report, paths["sarif"]) + + +def _write_packet(packet, paths: dict[str, Path], packet_formats: set[str]) -> None: + if "md" in packet_formats and "packet_md" in paths: + write_packet_markdown(packet, paths["packet_md"]) + if "json" in packet_formats and "packet_json" in paths: + write_packet_json(packet, paths["packet_json"]) + if "html" in packet_formats and "packet_html" in paths: + write_packet_html(packet, paths["packet_html"]) + if "pdf" in packet_formats and "packet_pdf" in paths: + try: + render_packet_pdf(packet, paths["packet_pdf"]) + except PdfRendererUnavailable as exc: + logger.warning("packet.pdf skipped: %s", exc) + + +def _resolve_packet_format_set(packet_cfg) -> tuple[set[str], bool]: + """Resolve the writeable packet formats after probing weasyprint. + + Returns ``(formats, pdf_skipped)``: ``formats`` is the set of + format names that should actually be emitted; ``pdf_skipped`` is + ``True`` iff the user requested PDF but weasyprint is unavailable + on this install (so the caller can record a single warning). + """ + + requested = {fmt for fmt in packet_cfg.formats if fmt in PACKET_FORMAT_NAMES} + if not packet_cfg.enabled: + return set(), False + if "pdf" in requested and not is_pdf_available(): + return requested - {"pdf"}, True + return requested, False diff --git a/src/agents_shipgate/cli/scan/output_planning.py b/src/agents_shipgate/cli/scan/output_planning.py new file mode 100644 index 00000000..bc0643ba --- /dev/null +++ b/src/agents_shipgate/cli/scan/output_planning.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import logging +import os +from pathlib import Path + +from agents_shipgate.core.privacy import RedactionStats, redact_data +from agents_shipgate.schemas.manifest import AgentsShipgateManifest + +from .models import _OutputPlan +from .output_helpers import _planned_generated_paths, _resolve_packet_format_set +from .path_helpers import _relative_display_path + +logger = logging.getLogger(__name__) + +def _plan_outputs( + *, + manifest: AgentsShipgateManifest, + base_dir: Path, +) -> _OutputPlan: + """Phase 6: resolve output dir + planned file paths + packet format + set (filtering PDF if weasyprint is missing). Initialize the + ``RedactionStats`` accumulator and the already-redacted + ``generated_reports`` map needed by ``build_report`` downstream. + """ + out_dir = (base_dir / manifest.output.directory).resolve() + packet_cfg = manifest.output.packet + packet_format_set, packet_pdf_skipped = _resolve_packet_format_set(packet_cfg) + if packet_pdf_skipped: + # PDF availability is an *output renderer* concern, not a source + # loader concern. Routing it through `warnings` would inflate + # `evidence_coverage.source_warning_count` and add a noise + # residual to the packet's §10, telling reviewers to rerun the + # scan after fixing source warnings even when no source loader + # had a problem. Log it instead — same channel as runtime + # WeasyPrint failures in `_write_packet`. + logger.warning( + "packet.pdf requested but weasyprint is not installed; " + "install with `pipx install 'agents-shipgate[pdf]'` to " + "enable. Skipping PDF for this run." + ) + generated_paths = _planned_generated_paths( + out_dir, + manifest.output.formats, + packet_enabled=packet_cfg.enabled, + packet_formats=packet_format_set, + ) + privacy_stats = RedactionStats() + generated_report_refs = redact_data( + { + key: _relative_display_path(path, base_dir) + for key, path in generated_paths.items() + }, + stats=privacy_stats, + path="generated_reports", + ) + output_surfaces = list(generated_paths) + if os.environ.get("GITHUB_STEP_SUMMARY"): + output_surfaces.append("github_step_summary") + return _OutputPlan( + out_dir=out_dir, + generated_paths=generated_paths, + packet_format_set=packet_format_set, + output_surfaces=output_surfaces, + privacy_stats=privacy_stats, + generated_report_refs=generated_report_refs, + ) diff --git a/src/agents_shipgate/cli/scan/patching.py b/src/agents_shipgate/cli/scan/patching.py new file mode 100644 index 00000000..63431a7f --- /dev/null +++ b/src/agents_shipgate/cli/scan/patching.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from pathlib import Path + + +def _check_metadata_lookup( + *, plugins_enabled: bool | None +) -> dict: + """Build a {check_id: CheckMetadata} lookup honoring the scan's + actual plugin setting. Used by ``annotate_remediation`` so the + serialized report's per-finding remediation fields reflect the + catalog the scan was run against. + + Avoids the late-stage plugin-loading hazard: by passing the lookup + *into* annotation, we never call ``check_catalog()`` at write time + where ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` could re-load plugins + even for ``--no-plugins`` scans. + """ + from agents_shipgate.checks.registry import check_catalog + + return { + check.id: check + for check in check_catalog(plugins_enabled=plugins_enabled) + } + + +def _attach_patches( + findings: list, + manifest, + config_path: Path, + *, + plugins_enabled: bool | None, +) -> None: + """Attach Patch objects to unsuppressed findings (per v0.6 plan §3). + + Suppressed findings are intentionally skipped — apply-patches must + not mutate entries the user marked ignored. + + Coverage rule: every active finding gets ≥ 1 patch (non-manual when + a generator exists, ManualPatch otherwise). Findings without + --suggest-patches keep ``patches=None`` (per C4) and are filtered + out of the JSON by ``report_json_payload``. + + Per the v0.7 PR 3 review: ``plugins_enabled`` is forwarded into + ``check_catalog`` so the recommendation lookup honors the scan's + explicit ``--no-plugins`` flag even when ``AGENTS_SHIPGATE_ENABLE_PLUGINS=1`` + is set in the environment. Without this, the patch-attachment path + would load third-party plugin entry points before + ``annotate_remediation`` ran with its plugin-safe lookup. + """ + from agents_shipgate.checks.patches import ( + PatchContext, + generate_patches_for_finding, + ) + from agents_shipgate.checks.registry import check_catalog + + recommendation_lookup = { + check.id: check.recommendation + for check in check_catalog(plugins_enabled=plugins_enabled) + if check.recommendation + } + context = PatchContext( + manifest=manifest, + manifest_path=config_path, + recommendation_lookup=recommendation_lookup, + ) + for finding in findings: + if finding.suppressed: + continue + finding.patches = generate_patches_for_finding(context, finding) diff --git a/src/agents_shipgate/cli/scan/path_helpers.py b/src/agents_shipgate/cli/scan/path_helpers.py new file mode 100644 index 00000000..1e7af8fb --- /dev/null +++ b/src/agents_shipgate/cli/scan/path_helpers.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import os +from pathlib import Path + +from agents_shipgate.core.baseline_audit import DEFAULT_AUDIT_LOG_PATH +from agents_shipgate.schemas.manifest import AgentsShipgateManifest + + +def _relative_display_path(path: Path, base_dir: Path) -> str: + resolved = path.resolve() + base = base_dir.resolve() + rel = os.path.relpath(resolved, base) + if rel == ".." or rel.startswith(f"..{os.sep}"): + return str(resolved) + return rel + + +def _resolve_audit_log_path( + manifest: AgentsShipgateManifest, + baseline_path: Path, +) -> Path: + """Resolve the baseline audit log path. + + Resolution order: + 1. ``manifest.baseline.audit_log`` if set (relative paths resolved + against the baseline file's directory). + 2. Otherwise ``/baseline-audit.log`` — + co-located with the baseline JSON. This matches the default that + ``write_baseline`` uses, so save/verify see the same file + without configuration. + """ + override = manifest.baseline.audit_log + if override: + candidate = Path(override) + if not candidate.is_absolute(): + candidate = baseline_path.parent / candidate + return candidate + return baseline_path.parent / DEFAULT_AUDIT_LOG_PATH.name + + +def _default_baseline_status(base_dir: Path) -> dict[str, object]: + path = base_dir / ".agents-shipgate" / "baseline.json" + return { + "default_path": _relative_display_path(path, base_dir), + "present": path.exists(), + } diff --git a/src/agents_shipgate/cli/scan/prepare.py b/src/agents_shipgate/cli/scan/prepare.py new file mode 100644 index 00000000..00cffbad --- /dev/null +++ b/src/agents_shipgate/cli/scan/prepare.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from pathlib import Path + +from agents_shipgate.config.loader import load_manifest_with_positions +from agents_shipgate.core.errors import ConfigError +from agents_shipgate.schemas.common import parse_severity + +from .models import _ResolvedManifest +from .output_helpers import PACKET_FORMAT_NAMES + + +def _prepare_scan( + *, + config_path: Path, + ci_mode: str | None, + fail_on: list[str] | None, + output_dir: Path | None, + formats: list[str] | None, + packet_enabled: bool | None, + packet_formats: list[str] | None, + baseline_mode: str, +) -> _ResolvedManifest: + """Phase 1: load manifest with positions; apply CLI overrides. + + CLI overrides take precedence over manifest values. Raises + ``ConfigError`` (exit 2) for invalid packet formats or unsupported + baseline modes — both fail before any source loading happens. + """ + raw_manifest, manifest_positions = load_manifest_with_positions(config_path) + manifest = raw_manifest.model_copy(deep=True) + if ci_mode: + manifest.ci.mode = ci_mode + if fail_on is not None: + manifest.ci.fail_on = [parse_severity(item) for item in fail_on] + if output_dir: + manifest.output.directory = str(output_dir) + if formats: + manifest.output.formats = formats + if packet_enabled is not None: + manifest.output.packet.enabled = packet_enabled + if packet_formats is not None: + invalid = [f for f in packet_formats if f not in PACKET_FORMAT_NAMES] + if invalid: + raise ConfigError( + "--packet-format values must be one of " + f"{sorted(PACKET_FORMAT_NAMES)}; got {invalid}" + ) + manifest.output.packet.formats = packet_formats + if baseline_mode != "new-findings": + raise ConfigError("--baseline-mode supports only new-findings") + return _ResolvedManifest( + manifest=manifest, + manifest_positions=manifest_positions, + base_dir=config_path.resolve().parent, + ) diff --git a/src/agents_shipgate/cli/scan/run_identity.py b/src/agents_shipgate/cli/scan/run_identity.py new file mode 100644 index 00000000..20c951fd --- /dev/null +++ b/src/agents_shipgate/cli/scan/run_identity.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import hashlib +import json + +from agents_shipgate.core.domain import Tool +from agents_shipgate.core.findings.summaries import tool_inventory +from agents_shipgate.schemas.codex_plugin import CodexPluginSurface +from agents_shipgate.schemas.surfaces import ActionSurfaceFacts + + +def _run_id( + manifest, + tools: list[Tool], + findings, + project: dict[str, object] | None = None, + agent_name: str | None = None, + environment: dict[str, object] | None = None, + api_surface: dict[str, object] | None = None, + anthropic_surface: dict[str, object] | None = None, + frameworks: dict[str, object] | None = None, + codex_plugin_surface: CodexPluginSurface | None = None, + action_surface_facts: ActionSurfaceFacts | None = None, +) -> str: + payload = { + "project": project + if project is not None + else manifest.project.model_dump(mode="json", exclude_none=False), + "agent_name": agent_name if agent_name is not None else manifest.agent.name, + "environment": environment + if environment is not None + else manifest.environment.model_dump(mode="json", exclude_none=False), + "tool_inventory": tool_inventory(tools), + "findings": [ + finding.model_dump( + mode="json", + # Exclude derived-enrichment fields (per C11 + v0.7 + # review finding 2): patches and the four remediation + # fields are computed AFTER the input surface is + # known, so they MUST NOT enter the run_id hash. Two + # scans of the same workspace must produce the same + # run_id whether `--suggest-patches` is set or not, and + # whether v0.7 metadata is present or not. + exclude={ + "id": True, + "baseline_status": True, + "patches": True, + "autofix_safe": True, + "requires_human_review": True, + "suggested_patch_kind": True, + "docs_url": True, + "blocks_release": True, + # v0.12 derived enrichment: same exclusion rule as + # the v0.7 remediation fields above. agent_action is + # a deterministic projection of those fields, so + # excluding them already implies it should be + # excluded — but make it explicit so a future + # contributor doesn't have to trace the projection. + "agent_action": True, + # v0.11 provenance fields are excluded so YAML line + # drift cannot churn run_id; the legacy + # type/ref/location strings stay in the hash so + # existing run_ids remain stable. + "source": { + "path": True, + "start_line": True, + "end_line": True, + "start_column": True, + "pointer": True, + }, + # v0.19 reviewer-grade provenance: the secondary + # manifest pointer ``policy_evidence_source`` is + # excluded in its entirety. The whole field is + # additive (older scans never emitted it) and + # YAML line drift on the manifest must not churn + # run_id — same rationale as the v0.11 exclusion + # above. + "policy_evidence_source": True, + }, + exclude_none=False, + ) + for finding in findings + ], + "api_surface": api_surface, + "anthropic_surface": anthropic_surface, + "frameworks": frameworks or {}, + "codex_plugin_surface": ( + codex_plugin_surface.model_dump(mode="json") if codex_plugin_surface else None + ), + "action_surface_facts": ( + action_surface_facts.model_dump(mode="json") + if action_surface_facts is not None + else None + ), + } + digest = hashlib.sha256( + json.dumps(payload, sort_keys=True, default=str).encode("utf-8") + ).hexdigest()[:16] + return f"agents_shipgate_{digest}" diff --git a/src/agents_shipgate/cli/scan/sanitization.py b/src/agents_shipgate/cli/scan/sanitization.py new file mode 100644 index 00000000..64f82826 --- /dev/null +++ b/src/agents_shipgate/cli/scan/sanitization.py @@ -0,0 +1,471 @@ +from __future__ import annotations + +import logging +from pathlib import Path + +from agents_shipgate.checks.baseline_integrity import build_findings as build_integrity_findings +from agents_shipgate.core.artifact_models import ( + AnthropicArtifacts, + OpenAIApiArtifacts, + ValidationArtifacts, +) +from agents_shipgate.core.baseline import ( + apply_baseline, + baseline_resolved_fingerprints, + verify_baseline, +) +from agents_shipgate.core.domain import Agent +from agents_shipgate.core.errors import InputParseError +from agents_shipgate.core.findings.identity import assign_finding_ids +from agents_shipgate.core.findings.remediation import annotate_remediation +from agents_shipgate.core.privacy import ( + build_privacy_audit, + redact_data, + sanitize_findings, + sanitize_model, + sanitize_tools, +) +from agents_shipgate.report.action_surface_diff import ( + action_reference_from_scan_reference, + attach_action_surface_finding_summary, + compute_action_surface_diff, + enrich_action_surface_diff_with_source, +) +from agents_shipgate.report.tool_surface_diff import ( + build_tool_surface_facts, + compute_tool_surface_diff, + disabled_tool_surface_diff, + enrich_tool_surface_diff_with_source, +) +from agents_shipgate.schemas.manifest import AgentsShipgateManifest +from agents_shipgate.schemas.report import BaselineSummary, LoadedPolicyPack, PolicyAudit +from agents_shipgate.schemas.surfaces import ToolSurfaceFacts + +from .models import ( + _ChecksDecision, + _DiffReferences, + _LoadedInputs, + _OutputPlan, + _SanitizedSurfaces, + _ToolsAndAgent, +) +from .patching import _check_metadata_lookup +from .path_helpers import _resolve_audit_log_path +from .source_loading import _tool_source_index +from .surface_redaction import ( + _build_public_action_surface_facts, + _frameworks_surface, + _sanitize_codex_plugin_surface, + _sanitize_diff_reference, +) + +logger = logging.getLogger(__name__) + +def _sanitize_for_output( + *, + manifest: AgentsShipgateManifest, + config_path: Path, + baseline_path: Path | None, + inputs: _LoadedInputs, + tools_and_agent: _ToolsAndAgent, + diffs: _DiffReferences, + decision: _ChecksDecision, + plan: _OutputPlan, + plugins_enabled: bool | None, +) -> _SanitizedSurfaces: + """Phase 7: privacy redaction of every value that flows into a + report or packet — STABILITY contract: runs BEFORE any file is + written. Also: assign public finding IDs (redacted-evidence + fingerprints), apply baseline (with legacy-fingerprint + compatibility), run baseline-integrity checks, build public + tool/action surface facts + diffs (enriched with provenance from + the *public* tool source index, never from the raw one), build the + final privacy audit envelope. + + Returns a single ``_SanitizedSurfaces`` bundle. Nothing in later + phases re-redacts, and ``build_report`` / ``build_packet`` see only + these values. + """ + privacy_stats = plan.privacy_stats + + public_manifest = sanitize_model( + manifest, AgentsShipgateManifest, stats=privacy_stats, path="manifest" + ) + public_manifest_dir = redact_data( + str(config_path.resolve().parent), + stats=privacy_stats, + path="manifest_dir", + ) + public_api_artifacts = ( + sanitize_model( + inputs.api, + OpenAIApiArtifacts, + stats=privacy_stats, + path="api_artifacts", + ) + if inputs.api + else None + ) + public_anthropic_artifacts = ( + sanitize_model( + inputs.anthropic, + AnthropicArtifacts, + stats=privacy_stats, + path="anthropic_artifacts", + ) + if inputs.anthropic + else None + ) + public_validation_artifacts = ( + sanitize_model( + inputs.validation, + ValidationArtifacts, + stats=privacy_stats, + path="validation_artifacts", + ) + if inputs.validation + else None + ) + public_tools = sanitize_tools(tools_and_agent.tools, stats=privacy_stats) + public_findings = sanitize_findings(decision.findings, stats=privacy_stats) + assign_finding_ids(public_findings) + + public_project = redact_data( + public_manifest.project.model_dump(exclude_none=True), + stats=privacy_stats, + path="project", + ) + public_agent = sanitize_model( + tools_and_agent.agent, Agent, stats=privacy_stats, path="agent" + ) + public_environment = redact_data( + public_manifest.environment.model_dump(exclude_none=True), + stats=privacy_stats, + path="environment", + ) + public_source_warnings = redact_data( + tools_and_agent.warnings, + stats=privacy_stats, + path="source_warnings[]", + ) + public_api_surface = redact_data( + public_api_artifacts.surface_summary() if public_api_artifacts else None, + stats=privacy_stats, + path="api_surface", + ) + public_anthropic_surface = redact_data( + public_anthropic_artifacts.surface_summary() + if public_anthropic_artifacts + else None, + stats=privacy_stats, + path="anthropic_surface", + ) + public_frameworks_surface = redact_data( + _frameworks_surface( + inputs.adk, + inputs.langchain, + inputs.crewai, + inputs.n8n, + ), + stats=privacy_stats, + path="frameworks", + ) + public_codex_plugin_surface = _sanitize_codex_plugin_surface( + inputs.codex_plugin.surface_summary() if inputs.codex_plugin else None, + stats=privacy_stats, + ) + public_policy_audit = sanitize_model( + decision.override_resolution.audit, + PolicyAudit, + stats=privacy_stats, + path="policy_audit", + ) + public_loaded_policy_packs = [ + sanitize_model( + pack, + LoadedPolicyPack, + stats=privacy_stats, + path="loaded_policy_packs[]", + ) + for pack in inputs.policy_packs.loaded + ] + public_loaded_plugins = redact_data( + decision.loaded_plugins, + stats=privacy_stats, + path="loaded_plugins[]", + ) + # v0.20: third-party adapter provenance. Same redaction shape as + # loaded_plugins[] — entry-point ``value`` strings and distribution + # metadata are first-party and don't carry secrets, but the audit + # envelope flows through redact_data for forward-compat with future + # adapter-emitted fields. + public_loaded_adapters = redact_data( + inputs.loaded_adapters, + stats=privacy_stats, + path="loaded_adapters[]", + ) + + ( + public_diff_reference, + public_action_surface_facts, + public_action_surface_diff, + ) = _public_action_surfaces( + public_manifest=public_manifest, + public_agent_id=public_agent.id, + public_tools=public_tools, + diffs=diffs, + decision=decision, + privacy_stats=privacy_stats, + ) + + baseline_summary = None + if diffs.baseline_file and diffs.baseline_display_path: + baseline_summary = apply_baseline( + public_findings, + diffs.baseline_file, + display_path=diffs.baseline_display_path, + legacy_fingerprints=decision.legacy_fingerprints, + ) + baseline_summary = sanitize_model( + baseline_summary, + BaselineSummary, + stats=privacy_stats, + path="baseline", + ) + _append_baseline_integrity_findings( + manifest=manifest, + baseline_path=baseline_path, + baseline_file=diffs.baseline_file, + decision=decision, + public_findings=public_findings, + public_source_warnings=public_source_warnings, + privacy_stats=privacy_stats, + plugins_enabled=plugins_enabled, + ) + attach_action_surface_finding_summary(public_action_surface_diff, public_findings) + + public_tool_surface_facts, public_tool_surface_diff = _public_tool_surfaces( + public_manifest=public_manifest, + public_tools=public_tools, + public_findings=public_findings, + public_api_artifacts=public_api_artifacts, + public_anthropic_artifacts=public_anthropic_artifacts, + public_diff_reference=public_diff_reference, + diffs=diffs, + privacy_stats=privacy_stats, + ) + privacy_audit = build_privacy_audit( + privacy_stats, + output_surfaces=plan.output_surfaces, + notes=[ + "Default-on best-effort pattern/key redaction ran before public artifacts were written.", + "Redaction audit paths contain counts and secret kinds only; raw values and raw hashes are not emitted.", + *( + [ + "Baseline matching accepts legacy pre-v0.18 raw secret fingerprints for compatibility; re-save reviewed baselines to migrate to redacted public fingerprints." + ] + if diffs.baseline_file and privacy_stats.occurrence_count + else [] + ), + ], + ) + return _SanitizedSurfaces( + manifest=public_manifest, + manifest_dir=public_manifest_dir, + project=public_project, + environment=public_environment, + agent=public_agent, + tools=public_tools, + findings=public_findings, + source_warnings=public_source_warnings, + api_artifacts=public_api_artifacts, + anthropic_artifacts=public_anthropic_artifacts, + validation_artifacts=public_validation_artifacts, + api_surface=public_api_surface, + anthropic_surface=public_anthropic_surface, + frameworks_surface=public_frameworks_surface, + codex_plugin_surface=public_codex_plugin_surface, + policy_audit=public_policy_audit, + loaded_policy_packs=public_loaded_policy_packs, + loaded_plugins=public_loaded_plugins, + loaded_adapters=public_loaded_adapters, + diff_reference=public_diff_reference, + action_surface_facts=public_action_surface_facts, + action_surface_diff=public_action_surface_diff, + tool_surface_facts=public_tool_surface_facts, + tool_surface_diff=public_tool_surface_diff, + baseline_summary=baseline_summary, + privacy_audit=privacy_audit, + ) + + +def _public_action_surfaces( + *, + public_manifest: AgentsShipgateManifest, + public_agent_id: str, + public_tools: list, + diffs: _DiffReferences, + decision: _ChecksDecision, + privacy_stats, +): + public_diff_reference = _sanitize_diff_reference( + diffs.diff_reference, + stats=privacy_stats, + ) + public_action_surface_facts = _build_public_action_surface_facts( + raw_facts=decision.action_surface_facts, + manifest=public_manifest, + agent_id=public_agent_id, + tools=public_tools, + stats=privacy_stats, + ) + public_action_reference = action_reference_from_scan_reference(public_diff_reference) + public_action_surface_diff = compute_action_surface_diff( + public_action_surface_facts, + public_action_reference.facts if public_action_reference else None, + reference=public_action_reference, + ) + if diffs.diff_reference_error: + public_action_surface_diff.enabled = False + public_action_surface_diff.notes = redact_data( + [diffs.diff_reference_error], + stats=privacy_stats, + path="action_surface_diff.notes", + ) + # v0.19 reviewer-grade provenance: enrich the PUBLIC action-surface + # diff rows from ``public_tools`` (already sanitized) so the + # rendered ``report.json`` and packet §3B carry tool source + # citations on every reason field. + enrich_action_surface_diff_with_source( + public_action_surface_diff, _tool_source_index(public_tools) + ) + return ( + public_diff_reference, + public_action_surface_facts, + public_action_surface_diff, + ) + + +def _public_tool_surfaces( + *, + public_manifest: AgentsShipgateManifest, + public_tools: list, + public_findings: list, + public_api_artifacts: OpenAIApiArtifacts | None, + public_anthropic_artifacts: AnthropicArtifacts | None, + public_diff_reference, + diffs: _DiffReferences, + privacy_stats, +): + public_tool_surface_facts = sanitize_model( + build_tool_surface_facts( + public_manifest, + public_tools, + public_findings, + public_api_artifacts, + public_anthropic_artifacts, + ), + ToolSurfaceFacts, + stats=privacy_stats, + path="tool_surface_facts", + ) + if diffs.diff_reference_error: + public_tool_surface_diff = disabled_tool_surface_diff( + redact_data( + diffs.diff_reference_error, + stats=privacy_stats, + path="tool_surface_diff.notes", + ) + ) + else: + public_tool_surface_diff = compute_tool_surface_diff( + public_tool_surface_facts, + public_diff_reference.facts if public_diff_reference else None, + public_findings, + reference=public_diff_reference, + ) + # v0.19 reviewer-grade provenance: enrich tool-surface diff + # controls (and any other reason-bearing rows) with the public + # tool path:line citation so the rendered report.json and packet + # §3A carry source info on every change-row reason. + enrich_tool_surface_diff_with_source( + public_tool_surface_diff, _tool_source_index(public_tools) + ) + return public_tool_surface_facts, public_tool_surface_diff + + +def _append_baseline_integrity_findings( + *, + manifest: AgentsShipgateManifest, + baseline_path: Path | None, + baseline_file, + decision: _ChecksDecision, + public_findings: list, + public_source_warnings: list[str], + privacy_stats, + plugins_enabled: bool | None, +) -> None: + """Append public baseline-integrity findings after baseline matching. + + Runs after public finding fingerprints are assigned so integrity output + does not depend on raw secret-bearing finding IDs. Mutates + ``public_findings`` and ``public_source_warnings`` in place, matching the + original Phase 7 ordering. + """ + integrity_mode = manifest.baseline.integrity_mode + if integrity_mode == "off" or baseline_path is None: + return + + audit_log_path = _resolve_audit_log_path(manifest, baseline_path) + try: + static_issues = verify_baseline(baseline_path, audit_log_path) + except InputParseError as exc: + logger.warning( + "baseline integrity verification failed", + extra={ + "agents_shipgate_baseline_path": str(baseline_path), + "agents_shipgate_error": str(exc), + }, + ) + static_issues = [] + warning = f"Baseline integrity check skipped: {exc}" + public_source_warnings.append( + redact_data( + warning, + stats=privacy_stats, + path="source_warnings[]", + ) + ) + stale_issues = baseline_resolved_fingerprints( + public_findings, + baseline_file, + legacy_fingerprints=decision.legacy_fingerprints, + ) + baseline_privacy_hint = None + if stale_issues and privacy_stats.occurrence_count: + baseline_privacy_hint = ( + "If these stale baseline entries appeared immediately after " + "upgrading to report schema v0.18, review and regenerate the " + "baseline. Secret-bearing public fingerprints are now computed " + "from redacted evidence." + ) + for issue in stale_issues: + issue.evidence["v0_18_privacy_migration_hint"] = baseline_privacy_hint + integrity_findings = build_integrity_findings( + static_issues + stale_issues, + context=decision.context, + integrity_mode=integrity_mode, + ) + if baseline_privacy_hint: + for finding in integrity_findings: + if finding.check_id == "SHIP-BASELINE-ENTRY-STALE": + finding.recommendation = f"{finding.recommendation} {baseline_privacy_hint}" + if not integrity_findings: + return + + public_findings.extend(sanitize_findings(integrity_findings, stats=privacy_stats)) + assign_finding_ids(public_findings) + annotate_remediation( + public_findings, + _check_metadata_lookup(plugins_enabled=plugins_enabled), + ) diff --git a/src/agents_shipgate/cli/scan/source_loading.py b/src/agents_shipgate/cli/scan/source_loading.py new file mode 100644 index 00000000..19b78d16 --- /dev/null +++ b/src/agents_shipgate/cli/scan/source_loading.py @@ -0,0 +1,333 @@ +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import Any + +from agents_shipgate.core.artifacts import ArtifactBag +from agents_shipgate.core.domain import LoadedToolSource, Tool +from agents_shipgate.core.errors import InputParseError +from agents_shipgate.inputs.protocol import REGISTRY, LoadedAdapterResult, ToolSourceAdapter +from agents_shipgate.schemas.manifest import AgentsShipgateManifest, ToolSourceConfig + +logger = logging.getLogger(__name__) + + +def _load_sources( + manifest: AgentsShipgateManifest, + base_dir: Path, + *, + verbose: bool, + registry: Any = None, + third_party_records: dict[str, Any] | None = None, + plugins_enabled: bool | None = None, +) -> tuple[list[LoadedToolSource], ArtifactBag]: + """Dispatch every adapter through the supplied ``registry``. + + Returns ``(loaded_sources, artifact_bag)``. ``artifact_bag`` is a + typed ``ArtifactBag`` with per-scan adapter artifacts keyed by + ``source_type``. Per-source adapters (mcp, openapi, + openai_agents_sdk) never populate artifacts. + + Ordering is deterministic and matches the legacy run_scan order: + + 1. per-source loaders in tool_sources declared order + 2. per-scan adapters in registry iteration order: + google_adk → langchain → crewai → n8n → openai_api + → anthropic_api → codex_plugin → validation + + Per-scan adapters are invoked unconditionally in pass 2, in + canonical order — NOT in tool_sources declared order. This matches + today's run_scan exactly: framework loaders fire once per scan in + fixed order, and the manifest-only loaders (openai_api, + anthropic_api) and codex_plugin trail them. + Per-scan source types appearing in tool_sources are ignored by + pass 1 — they would be redundant; framework loaders already iterate + every matching entry internally via the manifest. + + v0.20 (PR #111 review fix): ``registry`` is the per-scan registry + built by ``_load_inputs`` (``REGISTRY.clone()`` plus any + third-party adapters validated in this scan). Defaults to the + module-global ``REGISTRY`` only for callers that bypass + ``_load_inputs`` (notably the legacy tests in + ``tests/test_adapter_registry.py``). New code should always pass + a per-scan registry. + + v0.20 (PR #111 review fix follow-up #2): ``third_party_records`` + maps each validated third-party ``source_type`` to its + ``LoadedAdapter`` record (from ``discover_third_party_adapters``). + When set, the dispatcher routes those adapters through + ``run_validated_adapter`` so any exception during their + ``load()`` call is captured into + ``loaded_adapters[].runtime_errors`` and the scan continues in + lenient mode (or trips ``--strict-plugins`` exit 4 in strict + mode). Built-in adapters keep the direct call shape — a built-in + raising means the scanner itself is broken and must abort loudly. + + ``plugins_enabled`` is forwarded into ``AdapterRegistry.require`` so + unknown third-party source-type errors reflect explicit CLI + overrides such as ``--no-plugins`` instead of only inspecting the + environment. + """ + if registry is None: + registry = REGISTRY + if third_party_records is None: + third_party_records = {} + per_source_loaded: list[LoadedToolSource] = [] + per_scan_loaded: list[LoadedToolSource] = [] + bag = ArtifactBag() + + # Pass 1 — per-source adapters only, in tool_sources declared + # order. Per-scan source types (langchain, crewai, etc.) are + # skipped here; pass 2 invokes them in canonical registry order + # regardless of where they appear in tool_sources. This protects + # the dedup tie-break in _flatten_and_deduplicate_tools from + # changing based on user-facing tool_sources ordering. + for source in manifest.tool_sources: + adapter = registry.require(source.type, plugins_enabled=plugins_enabled) + if adapter.scope != "per_source": + continue + third_party_record = third_party_records.get(source.type) + result = _invoke_per_source_adapter( + adapter, + source, + base_dir, + manifest, + verbose=verbose, + third_party_record=third_party_record, + ) + if result is None: + # Third-party adapter raised at runtime; the wrapper + # captured the failure into runtime_errors and we skip + # absorbing the (None) result. + continue + _absorb(result, source.type, per_source_loaded, bag, adapter) + + # Pass 2 — every per-scan adapter fires once, in registry order. + # Covers framework adapters (always check their manifest section + # internally and may emit zero LoadedToolSource entries when not + # configured) and manifest-only adapters (openai_api, + # anthropic_api, n8n). + for adapter in registry.per_scan_adapters(): + third_party_record = third_party_records.get(adapter.source_type) + if third_party_record is not None: + from agents_shipgate.inputs.adapter_validation import ( + run_validated_adapter, + ) + + result = run_validated_adapter( + third_party_record, + source=None, + base_dir=base_dir, + manifest=manifest, + ) + if result is None: + continue + else: + result = adapter.load(None, base_dir, manifest) + _absorb(result, adapter.source_type, per_scan_loaded, bag, adapter) + + return per_source_loaded + per_scan_loaded, bag + + +def _tool_source_index( + tools: list[Tool], +) -> dict[str, tuple[str | None, int | None]]: + """Build a tool-name → ``(source_path, source_start_line)`` map for + surface-diff enrichment. + + Used by ``enrich_action_surface_diff_with_source`` and + ``enrich_tool_surface_diff_with_source`` to append + ``(source: path:line)`` to change-row ``reason`` strings, and by + the packet builder to suffix §3A / §3B highlights. Empty when the + tool list is empty so callers can rely on a boolean test. + """ + return { + tool.name: (tool.source_path, tool.source_start_line) + for tool in tools + } + + +def _artifact_warnings(artifact_bag: ArtifactBag) -> list[str]: + warnings: list[str] = [] + for artifact in artifact_bag.raw().values(): + artifact_warnings = getattr(artifact, "warnings", None) + if isinstance(artifact_warnings, list): + warnings.extend(str(warning) for warning in artifact_warnings) + return warnings + + +def _absorb( + result: LoadedAdapterResult, + source_type: str, + sink: list[LoadedToolSource], + bag: ArtifactBag, + adapter: ToolSourceAdapter, +) -> None: + sink.extend(result.tool_sources) + if result.artifact is not None: + if adapter.artifact_class is not None and not isinstance( + result.artifact, adapter.artifact_class + ): + raise TypeError( + f"Adapter {adapter.source_type!r} declared " + f"artifact_class={adapter.artifact_class.__name__} but " + f"returned {type(result.artifact).__name__}" + ) + bag.set(source_type, result.artifact) + if result.warnings: + sink.append( + LoadedToolSource( + source_id=f"adapter:{source_type}", + source_type=source_type, + warnings=list(result.warnings), + ) + ) + + +def _invoke_per_source_adapter( + adapter: ToolSourceAdapter, + source: ToolSourceConfig, + base_dir: Path, + manifest: AgentsShipgateManifest, + *, + verbose: bool, + third_party_record: Any = None, +) -> LoadedAdapterResult | None: + """Invoke a per_source adapter and return its result. + + For **built-in** adapters: catch ``InputParseError`` only when the + source is marked ``optional`` (returning a warning-only stub); + any other exception propagates. A built-in raising means the + scanner is broken and must abort loudly. + + For **third-party** adapters (``third_party_record`` is the + matching ``LoadedAdapter``): route through + ``run_validated_adapter``, which captures ALL exceptions into the + record's ``runtime_errors`` list and returns ``None``. Returning + ``None`` signals the caller to skip ``_absorb`` for this source — + the scan continues in lenient mode and ``--strict-plugins`` sees + the runtime error on exit. + """ + + if third_party_record is not None: + from agents_shipgate.inputs.adapter_validation import ( + run_validated_adapter, + ) + + return run_validated_adapter( + third_party_record, + source=source, + base_dir=base_dir, + manifest=manifest, + ) + try: + return adapter.load(source, base_dir, manifest) + except InputParseError: + if source.optional: + warning = f"Optional source {source.id} failed to load" + if verbose: + warning = ( + f"{warning}; continuing because the source is marked optional" + ) + return LoadedAdapterResult( + tool_sources=[ + LoadedToolSource( + source_id=source.id, + source_type=source.type, + warnings=[warning], + ) + ], + ) + raise + + +def _flatten_and_deduplicate_tools( + loaded_sources: list[LoadedToolSource], +) -> tuple[list[Tool], list[str]]: + by_id: dict[str, Tool] = {} + warnings: list[str] = [] + for loaded in loaded_sources: + for tool in loaded.tools: + existing = by_id.get(tool.id) + if not existing: + by_id[tool.id] = tool + continue + if _source_priority(tool) > _source_priority(existing): + kept, dropped = tool, existing + else: + kept, dropped = existing, tool + by_id[tool.id] = _merge_duplicate_tool_metadata(kept, dropped) + warnings.append( + "Duplicate tool name " + f"{tool.name!r}; kept {kept.source_type} source {kept.source_id!r} " + f"and merged metadata from {dropped.source_type} source {dropped.source_id!r}." + ) + return list(by_id.values()), warnings + + +def _source_priority(tool: Tool) -> int: + # Anthropic and OpenAI artifacts are equally authoritative; on duplicate + # tool names across them the first-loaded entry wins (OpenAI is loaded + # first in run_scan), and a `Duplicate tool name` warning surfaces. + return { + "openai_api": 40, + "anthropic_api": 40, + "openapi": 30, + "google_adk_inventory": 25, + "langchain_inventory": 25, + "crewai_inventory": 25, + "codex_plugin_mcp_inventory": 25, + "n8n_inventory": 25, + "mcp": 20, + "google_adk_function": 10, + "langchain_function": 10, + "langchain_structured_tool": 10, + "crewai_function": 10, + "crewai_class_tool": 10, + "n8n_ai_tool": 10, + "n8n_workflow_tool": 10, + "n8n_code_tool": 10, + "n8n_http_tool": 10, + "n8n_mcp_client_tool": 10, + "sdk_function": 10, + "google_adk_config": 5, + "crewai_prebuilt_tool": 5, + }.get(tool.source_type, 0) + + +def _merge_duplicate_tool_metadata(kept: Tool, dropped: Tool) -> Tool: + merged = kept.model_copy(deep=True) + merged.annotations = {**dropped.annotations, **merged.annotations} + seen_hints = {_risk_hint_key(hint) for hint in merged.risk_hints} + for hint in dropped.risk_hints: + key = _risk_hint_key(hint) + if key in seen_hints: + continue + merged.risk_hints.append(hint.model_copy(deep=True)) + seen_hints.add(key) + merged.auth = merged.auth.model_copy(deep=True) + merged.auth.scopes = _merge_string_values(merged.auth.scopes, dropped.auth.scopes) + if not merged.auth.type: + merged.auth.type = dropped.auth.type + if not merged.auth.credential_mode: + merged.auth.credential_mode = dropped.auth.credential_mode + if not merged.auth.source and dropped.auth.source: + merged.auth.source = dropped.auth.source + if merged.owner is None: + merged.owner = dropped.owner + return merged + + +def _risk_hint_key(hint) -> tuple[str, str, str, str]: + evidence = json.dumps(hint.evidence, sort_keys=True, default=str) + return hint.tag, hint.source, hint.confidence, evidence + + +def _merge_string_values(primary: list[str], secondary: list[str]) -> list[str]: + merged: list[str] = [] + for value in [*primary, *secondary]: + if value not in merged: + merged.append(value) + return merged diff --git a/src/agents_shipgate/cli/scan/surface_redaction.py b/src/agents_shipgate/cli/scan/surface_redaction.py new file mode 100644 index 00000000..12dfa5a1 --- /dev/null +++ b/src/agents_shipgate/cli/scan/surface_redaction.py @@ -0,0 +1,210 @@ +from __future__ import annotations + +import logging + +from agents_shipgate.core.artifact_models import ( + CrewAiArtifacts, + GoogleAdkArtifacts, + LangChainArtifacts, + N8nArtifacts, +) +from agents_shipgate.core.domain import Tool +from agents_shipgate.core.errors import ConfigError +from agents_shipgate.core.privacy import RedactionStats, redact_data, sanitize_model +from agents_shipgate.report.action_surface_diff import build_action_surface_facts +from agents_shipgate.report.tool_surface_diff import ToolSurfaceDiffReference, _stable_hash +from agents_shipgate.schemas.codex_plugin import CodexPluginSurface +from agents_shipgate.schemas.manifest import AgentsShipgateManifest +from agents_shipgate.schemas.surfaces import ( + ActionFact, + ActionSurfaceFacts, + ActionSurfaceHashes, + ToolSurfaceFacts, +) + +logger = logging.getLogger(__name__) + +def _frameworks_surface( + adk_artifacts: GoogleAdkArtifacts | None, + langchain_artifacts: LangChainArtifacts | None = None, + crewai_artifacts: CrewAiArtifacts | None = None, + n8n_artifacts: N8nArtifacts | None = None, +) -> dict[str, object]: + surface: dict[str, object] = {} + if adk_artifacts: + surface["google_adk"] = adk_artifacts.surface_summary() + if langchain_artifacts: + surface["langchain"] = langchain_artifacts.surface_summary() + if crewai_artifacts: + surface["crewai"] = crewai_artifacts.surface_summary() + if n8n_artifacts: + surface["n8n"] = n8n_artifacts.surface_summary() + return surface + + +def _build_public_action_surface_facts( + *, + raw_facts: ActionSurfaceFacts, + manifest: AgentsShipgateManifest, + agent_id: str, + tools: list[Tool], + stats: RedactionStats, +) -> ActionSurfaceFacts: + try: + return sanitize_model( + build_action_surface_facts( + manifest, + agent_id=agent_id, + tools=tools, + ), + ActionSurfaceFacts, + stats=stats, + path="action_surface_facts", + ) + except ConfigError: + logger.debug( + "redacted action surface collapsed distinct raw action ids; " + "falling back to a sanitized raw snapshot with public-only " + "ordinal disambiguators" + ) + return _sanitize_existing_action_surface_facts( + raw_facts, + stats=stats, + path="action_surface_facts", + ) + + +def _sanitize_existing_action_surface_facts( + facts: ActionSurfaceFacts, + *, + stats: RedactionStats, + path: str, +) -> ActionSurfaceFacts: + public_facts = sanitize_model( + facts, + ActionSurfaceFacts, + stats=stats, + path=path, + ) + _disambiguate_public_action_ids(public_facts) + return public_facts + + +def _disambiguate_public_action_ids(facts: ActionSurfaceFacts) -> None: + seen: dict[str, int] = {} + for action in facts.actions: + count = seen.get(action.action_id, 0) + 1 + seen[action.action_id] = count + if count > 1: + action.action_id = f"{action.action_id}#{count}" + _refresh_public_action_hashes(action) + + +def _refresh_public_action_hashes(action: ActionFact) -> None: + schema_hash = _stable_hash( + { + "input_fields": action.input_fields, + "required_input_fields": action.required_input_fields, + } + ) + policy_hash = _stable_hash( + { + "approval": action.approval_policy.model_dump(mode="json"), + "safeguards": action.safeguards.model_dump(mode="json"), + "evidence": action.evidence.model_dump(mode="json"), + } + ) + risk_hash = _stable_hash( + { + "effect": action.effect, + "risk_tags": action.risk_tags, + "required_scopes": action.required_scopes, + } + ) + action.input_schema_hash = schema_hash + action.hashes = ActionSurfaceHashes( + identity_hash=_stable_hash(action.action_id), + schema_hash=schema_hash, + policy_hash=policy_hash, + risk_hash=risk_hash, + ) + + +def _sanitize_codex_plugin_surface( + surface: CodexPluginSurface | None, + *, + stats: RedactionStats, +) -> CodexPluginSurface | None: + if surface is None: + return None + return sanitize_model( + surface, + CodexPluginSurface, + stats=stats, + path="codex_plugin_surface", + ) + + +def _sanitize_diff_reference( + reference: ToolSurfaceDiffReference | None, + *, + stats: RedactionStats, +) -> ToolSurfaceDiffReference | None: + if reference is None: + return None + facts = ( + sanitize_model( + reference.facts, + ToolSurfaceFacts, + stats=stats, + path="tool_surface_diff.base.facts", + ) + if reference.facts is not None + else None + ) + action_facts = ( + _sanitize_existing_action_surface_facts( + reference.action_facts, + stats=stats, + path="action_surface_diff.base.facts", + ) + if reference.action_facts is not None + else None + ) + findings = ( + [ + item.__class__.model_validate( + redact_data( + item.model_dump(mode="python"), + stats=stats, + path="tool_surface_diff.base.findings[]", + ) + ) + for item in reference.findings + ] + if reference.findings is not None + else None + ) + return ToolSurfaceDiffReference( + kind=reference.kind, + facts=facts, + path=redact_data(reference.path, stats=stats, path="tool_surface_diff.base.path"), + report_schema_version=reference.report_schema_version, + baseline_schema_version=reference.baseline_schema_version, + action_facts=action_facts, + findings=findings, + notes=tuple( + redact_data( + list(reference.notes), + stats=stats, + path="tool_surface_diff.base.notes[]", + ) + ), + action_notes=tuple( + redact_data( + list(reference.action_notes), + stats=stats, + path="action_surface_diff.base.notes[]", + ) + ), + ) diff --git a/src/agents_shipgate/cli/scan/tools_agent.py b/src/agents_shipgate/cli/scan/tools_agent.py new file mode 100644 index 00000000..9007dc04 --- /dev/null +++ b/src/agents_shipgate/cli/scan/tools_agent.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import logging + +from agents_shipgate.core.risk_hints import enrich_tools_with_risk_hints +from agents_shipgate.schemas.manifest import AgentsShipgateManifest + +from .agent_builder import _build_agent +from .models import _LoadedInputs, _ToolsAndAgent +from .source_loading import _flatten_and_deduplicate_tools + +logger = logging.getLogger(__name__) + +def _build_tools_and_agent( + *, + manifest: AgentsShipgateManifest, + inputs: _LoadedInputs, +) -> _ToolsAndAgent: + """Phase 3: flatten/dedup tools with source priority, enrich with + manifest-derived risk hints, build the ``Agent`` object, finalize + the source-warnings list (dedup after appending the duplicate-tool + warnings from ``_flatten_and_deduplicate_tools``). + """ + tools, duplicate_warnings = _flatten_and_deduplicate_tools(inputs.loaded_sources) + # Assemble in pre-decomp order: source → duplicate → artifact → + # placeholder → policy_pack. Duplicate warnings MUST come immediately + # after per-source warnings (before artifact / placeholder / policy_pack) + # so ``report.source_warnings`` is byte-identical to pre-v0.19 output. + # (P3 fix: _LoadedInputs now carries separate buckets instead of a + # pre-assembled list so this interleaving is possible.) + warnings: list[str] = list(inputs.source_only_warnings) + warnings.extend(duplicate_warnings) + warnings.extend(inputs.artifact_warnings_list) + warnings.extend(inputs.placeholder_warnings) + warnings.extend(inputs.policy_pack_warnings) + # Some adapters expose the same warnings through both LoadedToolSource + # and the artifact bag; keep report warning output stable and unique. + warnings = list(dict.fromkeys(warnings)) + tools = enrich_tools_with_risk_hints(manifest, tools) + logger.debug( + "risk hints generated", + extra={ + "agents_shipgate_tools": [ + { + "name": tool.name, + "risk_hints": [ + {"tag": hint.tag, "confidence": hint.confidence, "source": hint.source} + for hint in tool.risk_hints + ], + } + for tool in tools + ] + }, + ) + agent = _build_agent( + manifest, tools, inputs.api, inputs.anthropic, inputs.adk + ) + return _ToolsAndAgent(tools=tools, agent=agent, warnings=warnings) diff --git a/src/agents_shipgate/cli/scan/validation.py b/src/agents_shipgate/cli/scan/validation.py new file mode 100644 index 00000000..6cd67f60 --- /dev/null +++ b/src/agents_shipgate/cli/scan/validation.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from pathlib import Path + +from agents_shipgate.cli.discovery.placeholders import collect_placeholders + + +def _resolve_source_paths( + manifest, base_dir: Path, config_path: Path +) -> list[dict[str, object]]: + """Return required tool_sources whose declared path is unusable. + + Two failure modes are flagged so doctor can surface them as a + ``SHIP-DIAG-MISSING-SOURCE-FILE`` diagnostic instead of crashing in + a downstream loader: + + - ``reason="missing"`` — the file does not exist. + - ``reason="outside_manifest_dir"`` — the file exists but escapes the + manifest's containment boundary (loaders mirror this check and + would raise ``InputParseError``). + + Optional sources are not reported here — the existing + ``_load_sources`` flow handles them with a warning. Returned entries + carry the source id, the declared path string, the 1-indexed line + number in the manifest text where the path appears (best-effort), + and the failure reason. + """ + unresolved: list[dict[str, object]] = [] + try: + manifest_text = config_path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError): + manifest_text = "" + text_lines = manifest_text.splitlines() + base_resolved = base_dir.resolve() + for source in manifest.tool_sources: + if source.optional: + continue + if source.path is None: + continue + raw_path = Path(source.path) + candidate = ( + raw_path if raw_path.is_absolute() else base_resolved / raw_path + ).resolve() + if not candidate.exists(): + reason = "missing" + else: + try: + candidate.relative_to(base_resolved) + except ValueError: + reason = "outside_manifest_dir" + else: + continue + line_no: int | None = None + needle = f"path: {source.path}" + for index, line in enumerate(text_lines, start=1): + if needle in line: + line_no = index + break + unresolved.append( + { + "id": source.id, + "declared_path": source.path, + "line": line_no, + "reason": reason, + } + ) + return unresolved + + +def _manifest_placeholder_warnings(config_path: Path) -> list[str]: + """Return source-warning strings for each ``CHANGE_ME`` placeholder + surviving in the manifest text. + + Doctor already surfaces these as ``SHIP-DIAG-CHANGE-ME-PLACEHOLDERS`` + diagnostics; the same fact also needs to flow into the scan so the + existing ``source_warning_count > 0 → review_required`` branch in + release_decision.evidence_coverage trips. Read failures (missing + file, non-UTF8 content) yield no warnings — the manifest loader runs + immediately before and will have already raised a structured error + in that case. + """ + try: + manifest_text = config_path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError): + return [] + placeholders = collect_placeholders(manifest_text) + name = config_path.name + return [ + f"{name}:{entry['line']} — CHANGE_ME placeholder at " + f"{entry.get('path', '')!r}; replace before treating this " + "report as evidence." + for entry in placeholders + ] diff --git a/src/agents_shipgate/cli/scan/writing.py b/src/agents_shipgate/cli/scan/writing.py new file mode 100644 index 00000000..1a8f2277 --- /dev/null +++ b/src/agents_shipgate/cli/scan/writing.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from agents_shipgate.packet.builder import build_packet +from agents_shipgate.schemas.manifest import AgentsShipgateManifest +from agents_shipgate.schemas.report import ReadinessReport + +from .models import _OutputPlan, _SanitizedSurfaces +from .output_helpers import _write_packet, _write_reports + + +def _write_outputs( + *, + report: ReadinessReport, + public_report_payload: Any, + sanitized: _SanitizedSurfaces, + plan: _OutputPlan, + manifest: AgentsShipgateManifest, + config_path: Path, + packet_generated_at: str | None, +) -> None: + """Phase 9: write report (md/json/sarif) + packet (md/json/html/pdf). + + Both writes consume only sanitized values; the raw manifest is + passed to ``build_packet`` for non-output internal use (packet + builder reads manifest defaults like ``output.packet.formats`` but + never serializes raw manifest content into the packet). + """ + _write_reports(report, plan.generated_paths, manifest.output.formats) + if manifest.output.packet.enabled and plan.packet_format_set: + assert report.release_decision is not None + packet = build_packet( + manifest=manifest, + agent=report.agent, + project=report.project, + environment=report.environment, + run_id=report.run_id, + tools=sanitized.tools, + findings=sanitized.findings, + release_decision=report.release_decision, + api_artifacts=sanitized.api_artifacts, + anthropic_artifacts=sanitized.anthropic_artifacts, + source_warnings=sanitized.source_warnings, + validation_artifacts=sanitized.validation_artifacts, + tool_surface_diff=report.tool_surface_diff, + action_surface_diff=report.action_surface_diff, + report_payload=public_report_payload, + generated_at=packet_generated_at, + config_ref=config_path.resolve().name, + ) + _write_packet(packet, plan.generated_paths, plan.packet_format_set) diff --git a/src/agents_shipgate/cli/self_check.py b/src/agents_shipgate/cli/self_check.py index 423472fb..19867bb7 100644 --- a/src/agents_shipgate/cli/self_check.py +++ b/src/agents_shipgate/cli/self_check.py @@ -15,7 +15,7 @@ import typer from agents_shipgate import __version__ -from agents_shipgate.cli.scan import run_scan +from agents_shipgate.cli.scan.orchestrator import run_scan from agents_shipgate.core.errors import AgentsShipgateError, ConfigError, InputParseError from agents_shipgate.fixtures import ( FixturesUnavailableError, diff --git a/src/agents_shipgate/core/findings.py b/src/agents_shipgate/core/findings.py deleted file mode 100644 index 0eab2dcc..00000000 --- a/src/agents_shipgate/core/findings.py +++ /dev/null @@ -1,1471 +0,0 @@ -from __future__ import annotations - -import hashlib -import json -import shlex -from collections import Counter, defaultdict - -from agents_shipgate.ci.release_decision import build_release_decision -from agents_shipgate.core.check_ids import expands_to_check_id -from agents_shipgate.core.domain import Tool -from agents_shipgate.core.risk_hints import is_high_risk_tool, risk_tags -from agents_shipgate.schemas.checks import CheckMetadata -from agents_shipgate.schemas.codex_plugin import CodexPluginSurface -from agents_shipgate.schemas.common import ( - AgentAction, - Severity, - confidence_rank, -) -from agents_shipgate.schemas.manifest import ( - AgentsShipgateManifest, - SuppressionConfig, -) -from agents_shipgate.schemas.patches import ManualPatch -from agents_shipgate.schemas.report import ( - AgentSummary, - AgentSummaryAction, - BaselineSummary, - Finding, - LoadedPolicyPack, - PolicyAudit, - PrivacyAudit, - ReadinessReport, - ReleaseDecision, - ReportSummary, - ReviewerSummary, - ReviewerSurfacePointer, - ToolSurfaceSummary, -) -from agents_shipgate.schemas.surfaces import ( - ActionSurfaceDiff, - ActionSurfaceFacts, - ToolSurfaceDiff, - ToolSurfaceFacts, -) - -SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4} -FINGERPRINT_EXCLUDED_EVIDENCE_KEYS = { - "default_severity", - "observed", - "source_provenance", -} - - -def assign_finding_ids(findings: list[Finding]) -> list[Finding]: - by_fingerprint: dict[str, list[Finding]] = defaultdict(list) - for finding in findings: - finding.fingerprint = finding_fingerprint(finding) - by_fingerprint[finding.fingerprint].append(finding) - used_ids: dict[str, int] = defaultdict(int) - for finding in findings: - assert finding.fingerprint is not None - if len(by_fingerprint[finding.fingerprint]) == 1: - candidate = finding.fingerprint - else: - candidate = f"{finding.fingerprint}_{_collision_discriminator(finding)}" - used_ids[candidate] += 1 - finding.id = ( - candidate - if used_ids[candidate] == 1 - else f"{candidate}_{used_ids[candidate]}" - ) - return findings - - -def dedupe_findings(findings: list[Finding]) -> list[Finding]: - seen: set[tuple[str, str, str, str, str, str]] = set() - deduped: list[Finding] = [] - for finding in findings: - evidence_key = json.dumps( - _canonicalize_for_fingerprint(finding.evidence), - sort_keys=True, - default=str, - ) - source_key = json.dumps( - finding.source.model_dump(mode="json") if finding.source else None, - sort_keys=True, - default=str, - ) - key = ( - finding.check_id, - # Title is intentionally part of local de-dupe identity. Some - # checks share structured evidence across distinct user-visible - # targets, and the interpolated title is the only stable context - # that keeps those findings separate before IDs are assigned. - finding.title, - finding.tool_id or "", - finding.tool_name or "", - evidence_key, - source_key, - ) - if key in seen: - continue - seen.add(key) - deduped.append(finding) - return deduped - - -def apply_suppressions( - findings: list[Finding], suppressions: list[SuppressionConfig] -) -> list[Finding]: - for finding in findings: - match = _matching_suppression(finding, suppressions) - if match: - finding.suppressed = True - finding.suppression_reason = match.reason - return findings - - -def apply_severity_overrides( - findings: list[Finding], overrides: dict[str, Severity] -) -> list[Finding]: - for finding in findings: - override = _severity_override_for_check(finding.check_id, overrides) - if override: - # Keep this audit field out of fingerprinting so overrides can be - # applied before or after ID assignment without changing identity. - finding.evidence.setdefault("default_severity", finding.severity) - finding.severity = override - return findings - - -# v0.7: safe-closed default for findings whose check_id isn't in the -# loaded catalog — policy-pack rules, third-party plugins, or any check -# emitted outside the built-in set. The static catalog is silent for -# these, so we default-close: human review required, no auto-fix kind -# claimed. -_REMEDIATION_FALLBACK = { - "autofix_safe": False, - "requires_human_review": True, - "suggested_patch_kind": "manual", - "docs_url": None, -} - - -def annotate_remediation( - findings: list[Finding], - check_metadata_lookup: dict[str, CheckMetadata], -) -> list[Finding]: - """Populate the v0.7 per-finding remediation fields in place. - - Strict derivation policy: - - - When ``finding.patches`` is non-empty, the safety bools are derived - from the actual emitted patches: - * ``autofix_safe=True`` iff EVERY patch is non-manual AND has - ``confidence == "high"``. Mixed-state (e.g. one safe + one - manual, one high + one medium) → ``autofix_safe=False``. - * ``requires_human_review`` is the inverse of ``autofix_safe``. - * ``suggested_patch_kind`` = kind of the first non-manual patch, - or ``"manual"`` when all are manual, or ``"none"`` when the - list is empty. - - When ``finding.patches`` is None (scan ran without - ``--suggest-patches``), the safety bools and - ``suggested_patch_kind`` come from the matching ``CheckMetadata`` - entry, with the safe-closed fallback for unknown check IDs. - - ``docs_url`` is always sourced from CheckMetadata (or None for - unknown check IDs). Patches don't carry per-instance doc URLs. - - Caller (`scan.run_scan`) builds the metadata lookup from the - catalog with the scan's actual ``plugins_enabled`` setting, so this - function never triggers plugin loading at serialization time. - """ - for finding in findings: - meta = check_metadata_lookup.get(finding.check_id) - catalog_doc_url = meta.docs_url if meta is not None else None - - # Three states, treated distinctly: - # 1. `patches is None` → scan ran without --suggest-patches. - # Seed from CheckMetadata (or safe-closed fallback for - # unknown check IDs). - # 2. `patches == []` → scan ran WITH --suggest-patches but - # the generator emitted nothing for this finding. Treat as - # safe-closed with `suggested_patch_kind="none"` — falling - # back to the catalog would misleadingly report a patch - # kind that the report doesn't actually carry. - # 3. `patches` non-empty → derive from the actual patches - # via the strict rule below. - if finding.patches is None: - if meta is not None: - autofix_safe = meta.autofix_safe - requires_human_review = meta.requires_human_review - suggested_patch_kind = meta.suggested_patch_kind - else: - autofix_safe = bool(_REMEDIATION_FALLBACK["autofix_safe"]) - requires_human_review = bool( - _REMEDIATION_FALLBACK["requires_human_review"] - ) - suggested_patch_kind = str( - _REMEDIATION_FALLBACK["suggested_patch_kind"] - ) - else: - ( - autofix_safe, - requires_human_review, - suggested_patch_kind, - ) = _derive_from_patches(finding.patches) - - # Reviewer-grade escalation: when the catalog flags this check - # as requiring human review regardless of the per-patch state - # (approval/confirmation/idempotency, broad-scope, - # prohibited-action, runtime-trace, HITL evidence), force - # safe-closed values BEFORE assigning. Setting them here keeps - # `finding.autofix_safe`, `finding.requires_human_review`, and - # `finding.agent_action` (derived below) in agreement: the - # existing `auto_apply` early-return in `derive_agent_action` - # tests `finding.autofix_safe`, so flipping it to False naturally - # routes the verdict to `propose_patch_for_review` whenever - # patches are present, and to `escalate_to_human` otherwise. - if meta is not None and meta.requires_human_review_regardless_of_patch: - autofix_safe = False - requires_human_review = True - - finding.autofix_safe = autofix_safe - finding.requires_human_review = requires_human_review - finding.suggested_patch_kind = suggested_patch_kind - finding.docs_url = catalog_doc_url - finding.agent_action = derive_agent_action(finding) - # v0.14: ensure every emitted finding carries a real - # `provenance_kind`. Built-in checks set it via the required - # `tool_finding`/`agent_finding` kwarg. Third-party plugin - # checks may still construct `Finding(...)` directly without - # the field; coerce None → "static_declaration" so the wire - # schema's required + non-nullable enum is satisfied. Plugins - # that want a more accurate label should set the field - # themselves; this fallback is the conservative declarative - # label rather than a sentinel. - if finding.provenance_kind is None: - finding.provenance_kind = "static_declaration" - return findings - - -def derive_agent_action(finding: Finding) -> AgentAction: - """Project ``finding`` to a single ``AgentAction`` enum value. - - Deterministic projection of (``blocks_release``, ``patches``, - ``autofix_safe``, ``requires_human_review``). A release-blocking - finding always escalates to a human unless it is suppressed. - Order-invariant: the result depends on the SET of patches, not on - their list ordering. The first - non-manual patch's confidence drives the verdict, mirroring - :func:`_derive_from_patches` (which derives ``suggested_patch_kind`` - from the first non-manual patch). Earlier this function used - ``patches[0]`` directly, so a finding with - ``[ManualPatch, medium SetPointerPatch]`` mapped to - ``escalate_to_human`` while - ``[medium SetPointerPatch, ManualPatch]`` mapped to - ``propose_patch_for_review`` despite identical patch content - (#57 review P2). - - The strategy proposal in ``docs/agent-adoption-strategy.md`` §7 - G10 sketched an algorithm that ordered ``requires_human_review`` - before the medium/low confidence check, but that mapped non-manual - medium-confidence patches to ``escalate_to_human`` even though the - value's defined semantic ("no machine-applicable patch; needs - human judgment") excludes that case. We deviate by checking - confidence on the first non-manual patch BEFORE falling through - to escalate, keeping the value definitions consistent with the - projection. - - The ``suppress_with_reason`` value is reserved for future check - classes that explicitly mark themselves as suppressible. The - built-in projection does not emit it. - """ - if finding.suppressed: - return "informational" - if finding.blocks_release: - return "escalate_to_human" - - patches = finding.patches - - # No patch list (no --suggest-patches) or empty patch list: - # nothing machine-applicable. Route on the catalog flags. - if not patches: - if finding.requires_human_review: - return "escalate_to_human" - return "informational" - - # Pick the first non-manual patch (order-invariant: every patch - # generator produces a stable order, but the agent_action verdict - # should depend on the set, not on which manual patch happened to - # land first). All-manual lists fall through to escalate. - non_manual = [p for p in patches if p.kind != "manual"] - if not non_manual: - return "escalate_to_human" - - first = non_manual[0] - first_confidence = getattr(first, "confidence", None) - if first_confidence == "high" and finding.autofix_safe: - return "auto_apply" - - # Any non-manual patch with declared confidence (high, medium, or - # low) is machine-applicable, so the verdict is propose-for-review - # — including high-confidence patches in mixed lists where a - # ManualPatch sibling disqualified `autofix_safe`. The enum's - # `escalate_to_human` definition is "no machine-applicable patch", - # which doesn't fit this case; routing it to escalate would - # contradict the documented semantics (#57 review P3). - if first_confidence in {"high", "medium", "low"}: - return "propose_patch_for_review" - - # Rare: non-manual patch carries no confidence. Conservative escalate. - if finding.requires_human_review: - return "escalate_to_human" - return "informational" - - -def build_agent_summary( - *, - findings: list[Finding], - release_decision: ReleaseDecision | None, - json_report_path: str | None = None, -) -> AgentSummary: - """Construct the top-level ``agent_summary`` block. - - Deterministic projection of ``release_decision`` plus the - per-finding ``agent_action`` values. Surfaces the same numbers a - coding agent would otherwise compute by traversing arrays — same - inputs, same output, no agent-side aggregation needed. - - ``json_report_path`` is the actual on-disk path of the emitted JSON - report (from ``ReadinessReport.generated_reports['json']``). It is - threaded in so ``first_recommended_action.command`` can name the - real path the user just wrote — not the default. When the scan ran - without JSON output (no path available), the action falls back to - ``kind: "info"`` with a parameterised hint instead of a command, - so we never emit an apply-patches invocation pointing at a file - that doesn't exist or — worse — at a stale default-path report - from a previous run. - """ - if release_decision is None: - verdict: str = "passed" - blocker_count = 0 - review_item_count = 0 - reason = "No release decision computed." - evidence_recommended = False - else: - verdict = release_decision.decision - blocker_count = len(release_decision.blockers) - review_item_count = len(release_decision.review_items) - reason = (release_decision.reason or "").strip() - # `evidence_coverage.human_review_recommended` is the - # release-decision signal that says "this is review_required - # because the scan saw only low-confidence/static evidence, - # not because any specific finding needs fixing." In that - # case we want to surface the evidence-coverage reason - # (rather than the unhelpful "0 review items flagged" text) - # and route the agent toward gathering better evidence - # (#57 review P2: evidence-only review_required). - # - # v0.14 also routes source_warning_count > 0 to review_required - # via an explicit branch in build_release_decision() - # (summarize_findings() doesn't fold source warnings into - # human_review_recommended, so without including them here a - # source-warning-only scan would render as "0 review item(s) - # flagged" with no first_recommended_action — losing the - # release_decision.reason that has the only useful context). - evidence_recommended = bool( - release_decision.evidence_coverage - and ( - release_decision.evidence_coverage.human_review_recommended - or release_decision.evidence_coverage.source_warning_count > 0 - ) - ) - - active_findings = [f for f in findings if not f.suppressed] - auto_appliable = sum( - 1 for f in active_findings if f.agent_action == "auto_apply" - ) - # `needs_human_review` covers every active finding the user has to - # weigh in on before release: full escalations (no machine path) - # PLUS proposed patches that ship at medium/low confidence and - # require an explicit `--apply` after the user reviews the diff. - # Earlier this counted only `escalate_to_human`, which silently - # under-counted propose_patch_for_review findings — release_decision - # already routes both into review_items, so the agent_summary - # number must agree (#57 review P1). - needs_review = sum( - 1 - for f in active_findings - if f.agent_action in {"escalate_to_human", "propose_patch_for_review"} - ) - - # Headline: short, one-sentence statement that names the verdict - # and the action-driven counts. The two populations differ: - # `review_item_count` mirrors `release_decision.review_items` - # (severity-driven; can include medium-severity auto_apply - # findings), while `needs_human_review` counts only findings whose - # `agent_action` requires human input. The headline uses - # `needs_human_review` for the "require human review" wording so a - # review_required verdict with only auto-applicable findings reads - # honestly as "auto-applicable; none require human input" instead - # of falsely claiming N findings need review. - # `release_decision.reason` is severity-driven and can contradict - # an action-driven headline (e.g. when only-auto-applicable - # findings are flagged for release review, the reason often reads - # "1 finding requires human review" — the opposite of what - # agent_summary needs to say). We therefore skip the reason append - # in branches where the headline already explains the agent-level - # situation in agent-driven terms; we keep the append in branches - # where the reason adds non-overlapping context (like blocker - # counts). - append_reason = True - if verdict == "blocked": - headline = ( - f"{blocker_count} active finding(s) block release" - + ( - f"; {review_item_count} review item(s) accepted as debt." - if review_item_count - else "." - ) - ) - elif verdict == "review_required": - if needs_review > 0: - head = f"{needs_review} finding(s) require human review" - if auto_appliable > 0: - head += f"; {auto_appliable} also auto-applicable" - headline = head + "." - elif auto_appliable > 0 and evidence_recommended: - # Mixed case: every flagged finding is auto-applicable - # *but* evidence coverage is incomplete (low-confidence - # tools or source warnings tipped review_required). Saying - # "none require human input beyond apply-patches" would - # silently drop the evidence-review requirement that the - # release_decision.reason explicitly calls out. Surface - # both so the agent applies the patches AND asks the - # human to review the evidence gap. - evidence_clause = reason or ( - "evidence coverage is incomplete and should be reviewed " - "before shipping" - ) - headline = ( - f"{auto_appliable} auto-applicable finding(s) flagged for " - f"release review; {evidence_clause}" - ) - if not headline.endswith("."): - headline += "." - append_reason = False # already in headline - elif auto_appliable > 0: - headline = ( - f"{auto_appliable} auto-applicable finding(s) flagged for " - "release review; none require human input beyond apply-patches." - ) - # Suppress the severity-driven reason here. release_decision - # likely says something like "N finding(s) require human - # review" — appending it would directly contradict the - # action-driven headline (#57 review P1). - append_reason = False - elif evidence_recommended: - # Evidence-coverage-driven review: no actionable findings, - # but the scan saw only low-confidence/static evidence and - # the release_decision wants a human to weigh in. Surface - # the reason directly — it carries the only useful - # explanation. Falling back to "0 review items flagged" - # would lose the most important context (#57 review P2). - headline = ( - reason - if reason - else "Human review recommended: low-confidence evidence." - ) - append_reason = False # already in headline - else: - # Even rarer fallback: review_required without any of the - # above signals. Surface review_item_count so the - # headline isn't a self-contradiction. - headline = ( - f"{review_item_count} review item(s) flagged for release review." - ) - append_reason = False - if blocker_count: - headline += f" ({blocker_count} blocker(s) detected.)" - elif verdict == "insufficient_evidence": - # No specific finding to surface — by definition the issue is - # evidence quality, not findings. Surface the release_decision - # reason verbatim; it already names the counts and explains why - # the scan can't gate release. Falling through to the "Release - # ready" branch would lie about a degraded scan. - headline = ( - reason - if reason - else "Evidence coverage below threshold; scan results not trustworthy enough to gate release." - ) - append_reason = False - if blocker_count: - headline += f" ({blocker_count} blocker(s) detected.)" - else: - headline = ( - "Release ready" - + ( - f" ({review_item_count} review item(s) accepted as debt)." - if review_item_count - else "." - ) - ) - if append_reason and reason and len(headline) + len(reason) + 4 < 240: - headline = f"{headline} {reason}" if reason.endswith(".") else f"{headline} {reason}." - - first_action = _build_first_recommended_action( - verdict=verdict, - auto_appliable=auto_appliable, - needs_review=needs_review, - review_item_count=review_item_count, - active_findings=active_findings, - json_report_path=json_report_path, - evidence_recommended=evidence_recommended, - evidence_reason=( - reason - if (evidence_recommended or verdict == "insufficient_evidence") - else "" - ), - ) - - return AgentSummary( - verdict=verdict, # type: ignore[arg-type] - headline=headline, - blocker_count=blocker_count, - review_item_count=review_item_count, - auto_appliable_patches=auto_appliable, - needs_human_review=needs_review, - first_recommended_action=first_action, - ) - - -def _build_first_recommended_action( - *, - verdict: str, - auto_appliable: int, - needs_review: int, - review_item_count: int, - active_findings: list[Finding], - json_report_path: str | None, - evidence_recommended: bool = False, - evidence_reason: str = "", -) -> AgentSummaryAction | None: - """Deterministic next-step picker for ``agent_summary``. - - Order (highest impact first): - 1. Verdict is insufficient_evidence → emit an info action that - surfaces the evidence reason and recommends gathering deeper - sources (MCP, OpenAPI inputs, eval traces). Checked before - auto-apply because applying patches does NOT clear an evidence - verdict — the scan results are not trustworthy enough to gate - release, and running apply-patches first would contradict the - headline. Tell the agent to fix the trust problem before - cleaning up findings. - 2. Auto-applicable patches available → propose ``apply-patches``, - but only as a ``command`` action when we know the actual JSON - report path (so the command never points at the wrong file). - Otherwise emit ``kind: "info"`` with a parameterised hint. - 3. Verdict is blocked → surface the top blocker for review. - 4. Verdict is review_required → walk the top review item. - 5. Verdict is passed → no action (None). - """ - if verdict == "insufficient_evidence": - base = ( - evidence_reason - or "Evidence coverage below threshold; scan results are not " - "trustworthy enough to gate release." - ) - return AgentSummaryAction( - kind="info", - command=None, - why=( - f"{base} Surface this to the user and gather deeper " - "evidence (e.g. MCP/OpenAPI inputs, eval traces, " - "additional source files) before re-running the scan; " - "applying patches does not clear an evidence verdict, " - "so no machine-applicable fix is available." - ), - ) - - if auto_appliable > 0: - why = ( - f"{auto_appliable} finding(s) carry high-confidence patches " - "safe to apply without human review." - ) - if verdict == "review_required" and evidence_recommended: - # The patches are still worth applying (the scan IS - # trustworthy enough to gate at review_required, unlike - # the insufficient_evidence path that outranks auto-apply - # entirely). But the action's why must call out the - # evidence gap so the agent doesn't treat apply-patches - # as the *only* next step — the human still needs to - # review the source warnings / low-confidence tools. - evidence_note = evidence_reason or ( - "Evidence coverage is incomplete (source warnings or " - "low-confidence tools); review before shipping." - ) - why = ( - f"{why} Note: {evidence_note} Applying patches does not " - "address the evidence gap." - ) - if json_report_path: - # shlex.quote so paths with spaces (e.g. macOS - # "/Users/.../My Project/agents-shipgate-reports/report.json") - # round-trip through shlex.split unchanged. Without the - # quote, the advertised command splits at the spaces and - # apply-patches receives garbage --from arguments - # (#57 review P2). - quoted_path = shlex.quote(json_report_path) - return AgentSummaryAction( - kind="command", - command=( - f"agents-shipgate apply-patches --from " - f"{quoted_path} --confidence high --apply" - ), - why=why, - ) - # No JSON output on this scan: emit an info action that names - # the canonical pattern so the agent runs apply-patches against - # *their* report, not the default path. The user-facing reports - # path is stable enough (`agents-shipgate-reports/report.json` - # is the default) that we mention it in the why-text, but as - # documentation, not a literal command the agent might dispatch. - return AgentSummaryAction( - kind="info", - command=None, - why=( - f"{why} Re-run the scan with --format json (default path " - "is agents-shipgate-reports/report.json), then: " - "agents-shipgate apply-patches --from " - "--confidence high --apply." - ), - ) - - if verdict == "blocked": - top = _top_active_finding(active_findings) - if top is None: - return None - return AgentSummaryAction( - kind="info", - command=None, - why=( - f"Surface {top.check_id} on {top.tool_name or 'agent'} to " - "the user; release is blocked and no auto-applicable patch " - "is available." - ), - ) - - if verdict == "review_required": - # Evidence-coverage-driven review: no specific finding to walk; - # the release_decision is asking for human attention because - # the scan saw only low-confidence/static evidence. Return an - # info action that names the situation so first_recommended_action - # is non-null and useful in this case (#57 review P2). - if ( - evidence_recommended - and needs_review == 0 - and auto_appliable == 0 - ): - base = ( - evidence_reason - or "Static-only scan with low-confidence evidence; " - "human review recommended." - ) - return AgentSummaryAction( - kind="info", - command=None, - why=( - f"{base} Surface this to the user and discuss whether " - "to gather better evidence (e.g. add MCP/OpenAPI " - "inputs, eval traces) or accept the static-only " - "review posture; no machine-applicable fix is " - "available." - ), - ) - - top = _top_active_finding(active_findings) - if top is None: - return None - # Prefer the action-driven count when there are findings that - # need human input. Fall back to the severity-driven - # review_item_count when needs_review is 0 — otherwise the - # text would read "Walk the 0 review item(s)" even though the - # release decision has flagged something for review. - visible = needs_review if needs_review > 0 else review_item_count - return AgentSummaryAction( - kind="info", - command=None, - why=( - f"Walk the {visible} review item(s) starting with " - f"{top.check_id}; release is allowed but the human " - "reviewer should weigh in." - ), - ) - - return None - - -def _top_active_finding(findings: list[Finding]) -> Finding | None: - """Pick the highest-severity active finding (ties broken by check_id).""" - if not findings: - return None - return min( - findings, key=lambda f: (SEVERITY_ORDER.get(f.severity, 99), f.check_id) - ) - - -# --- v0.20: reviewer_summary projection ------------------------------------- -# -# Parallels build_agent_summary. AgentSummary answers "what should an agent -# do next?"; ReviewerSummary answers "what should a reviewer look at first?". -# Both are deterministic projections of the same underlying scan state — no -# extra side effects, no I/O, no LLM calls. - -# Baseline integrity findings are emitted under these three check IDs (M2). -# Kept in sync with checks/registry.py + STABILITY.md "Baseline Integrity". -_BASELINE_INTEGRITY_CHECK_IDS: frozenset[str] = frozenset( - { - "SHIP-BASELINE-INTEGRITY-MISMATCH", - "SHIP-BASELINE-ENTRY-EXPIRED", - "SHIP-BASELINE-ENTRY-STALE", - } -) - - -def _tool_surface_changes(report: ReadinessReport) -> int: - """Sum of structural-change counters in tool_surface_diff.summary. - - Returns zero when the diff is disabled (no baseline configured) or - when every counter is zero (no changes). The counters are - pre-computed by compute_tool_surface_diff so this is O(1). - """ - diff = report.tool_surface_diff - if diff is None or not getattr(diff, "enabled", False): - return 0 - summary = diff.summary - return ( - summary.tools_added - + summary.tools_removed - + summary.tools_changed - + summary.new_scopes - + summary.removed_scopes - + summary.new_high_risk_effects - + summary.removed_high_risk_effects - + summary.controls_added - + summary.controls_removed - + summary.metadata_changes - + summary.policy_drift_items - ) - - -def _action_surface_changes(report: ReadinessReport) -> int: - """Sum of structural-change counters in action_surface_diff.summary. - - Returns zero when the diff is disabled or empty. ``blocking_findings`` - is intentionally NOT summed here — that count flows into the release - decision, and is already reflected in ``AgentSummary.blocker_count``. - Reviewer-side activity is the structural delta itself. - """ - diff = report.action_surface_diff - if diff is None or not getattr(diff, "enabled", False): - return 0 - summary = diff.summary - return ( - summary.actions_added - + summary.actions_removed - + summary.actions_modified - + summary.scope_expansions - + summary.effect_escalations - + summary.risk_tags_added - + summary.approvals_removed - + summary.safeguards_removed - + summary.input_schema_expansions - ) - - -def _capability_misalignment_count(report: ReadinessReport) -> int: - """Total misalignments surfaced by the capability/intent diff (v0.9).""" - return len(report.misalignments) - - -def _evidence_matrix_gap_count(report: ReadinessReport) -> int: - """Count of evidence-matrix rows whose status is a reviewer-actionable - gap. - - The matrix is normally a packet-only section, but ``build_evidence_matrix`` - accepts the report payload and produces the same shape. We call it - here so the count is available in JSON reports even when the packet - is disabled (``--no-packet`` runs). - - Only ``missing`` rows are counted as gaps. ``not_declared`` means - "this domain is not relevant to this manifest" (e.g., - ``memory_isolation`` on every scan today because the manifest schema - does not model it yet) — these are intentional non-coverage, not - reviewer signals. ``partial``, ``covered``, and ``informational`` - are also not gaps. - - Import is deferred to avoid a top-level cycle: ``packet`` imports - from ``schemas/report.py`` (and indirectly from ``core/findings.py`` - via build helpers), so we import only when this function runs. - """ - try: - from agents_shipgate.packet.evidence_matrix import build_evidence_matrix - except ImportError: # pragma: no cover - defensive - return 0 - payload = report.model_dump(mode="json") - section = build_evidence_matrix(payload) - return sum( - 1 - for row in section.rows - if getattr(row, "evidence_present", None) == "missing" - ) - - -def _severity_override_counts( - policy_audit: PolicyAudit | None, -) -> tuple[int, int]: - """Return (total_applied, tier_crossed_subset). - - ``total_applied`` is len(severity_overrides_applied). - ``tier_crossed_subset`` counts the entries flagged ``tier_crossed=True``. - """ - if policy_audit is None: - return 0, 0 - rows = policy_audit.severity_overrides_applied - return len(rows), sum(1 for row in rows if row.tier_crossed) - - -def _privacy_redaction_count(privacy_audit: PrivacyAudit | None) -> int: - """Total redactions across the public output surfaces. - - Reads the pre-computed ``redacted_occurrence_count`` field rather - than re-summing per-path counts — they agree by construction (see - privacy.py) but the pre-computed total is the canonical surface and - keeps the projection cheap. - """ - if privacy_audit is None or not privacy_audit.enabled: - return 0 - return privacy_audit.redacted_occurrence_count - - -def _baseline_integrity_issue_count(findings: list[Finding]) -> int: - """Count of active findings emitted by the three baseline-integrity - checks (SHIP-BASELINE-{INTEGRITY-MISMATCH,ENTRY-EXPIRED,ENTRY-STALE}). - - Suppressed findings are excluded; matched (accepted-debt) findings - are included because tampering and expiry remain reviewer signals - regardless of baseline status. - """ - return sum( - 1 - for f in findings - if not f.suppressed and f.check_id in _BASELINE_INTEGRITY_CHECK_IDS - ) - - -def _reviewer_headline( - *, - verdict: str, - lens_total: int, - audit_total: int, - pointer: ReviewerSurfacePointer | None, -) -> str: - """Deterministic, ≤200-char one-sentence summary. - - Sort order in the headline mirrors the priority order in - ``_pick_first_recommended_surface`` so the headline and pointer - cannot disagree about which surface matters most. - """ - if verdict == "blocked": - head = "Release blocked" - elif verdict == "insufficient_evidence": - head = "Evidence coverage below threshold" - elif verdict == "review_required": - head = "Review required" - else: # passed - head = "Release ready" - - if lens_total == 0 and audit_total == 0: - body = "no reviewer signals (lenses + audits all clean)." - else: - body_parts: list[str] = [] - if lens_total > 0: - noun = "change" if lens_total == 1 else "changes" - body_parts.append(f"{lens_total} lens {noun}") - if audit_total > 0: - noun = "event" if audit_total == 1 else "events" - body_parts.append(f"{audit_total} audit {noun}") - body = "; ".join(body_parts) + "." - - headline = f"{head}: {body}" - if pointer is not None: - suffix = f" Start at {pointer.name}." - if len(headline) + len(suffix) <= 200: - headline += suffix - return headline - - -def _pick_first_recommended_surface( - *, - release_decision: ReleaseDecision | None, - action_surface_changes: int, - baseline_integrity_issues: int, - severity_overrides_tier_crossed: int, - severity_overrides_applied: int, - capability_misalignments: int, - tool_surface_changes: int, - privacy_redactions: int, - evidence_matrix_gaps: int, -) -> ReviewerSurfacePointer | None: - """Deterministic priority order. Returns None only when every - counter is zero AND the release decision is ``passed``. - - Priority encodes "which surface gives the highest-leverage reviewer - signal first": - - 1. Release decision when blocked or insufficient_evidence — the - verdict tells the reviewer what to gate on. - 2. Action-surface changes — first-class PR/release delta of what - the agent can do externally. - 3. Baseline integrity issues — tampering or expiry on accepted - debt is high-attention. - 4. Tier-crossed severity overrides — explicit downgrade across - a release-critical boundary. - 5. Capability/intent misalignments — declared purpose vs. - observed surface. - 6. Tool surface changes — registry-level PR diff. - 7. Privacy redactions — output sanitation events. - 8. Evidence matrix gaps — coverage holes for review. - 9. Non-tier-crossed severity overrides — same-tier downgrades - and upgrades. Lower priority than the tier-crossed case - because they don't cross a release-critical boundary, but - still a reviewer signal that warrants a glance. Without - this fallthrough, ``severity_overrides_applied > 0`` would - produce a non-zero headline but a ``null`` pointer — - contradicting the contract that ``null`` means a fully - clean scan. - 10. review_required verdict with no specific lens/audit signal — - source warnings (e.g., duplicate tool names) or evidence - gaps can produce ``decision=review_required`` with all - reviewer counters at zero. Without this fallthrough, such - scans emit a non-null verdict but a ``null`` pointer — - contradicting the contract that ``null`` means - ``passed + all-zero``. - - Each branch picks the most informative ``path`` and a single - sentence ``why`` suitable for a PR comment lead. - """ - verdict = (release_decision.decision if release_decision else "passed") - - if verdict == "blocked": - return ReviewerSurfacePointer( - kind="release_decision", - name="release_decision", - path="report.release_decision", - why=( - "Release is blocked; read release_decision.blockers[] for " - "the gating findings." - ), - ) - if verdict == "insufficient_evidence": - return ReviewerSurfacePointer( - kind="release_decision", - name="release_decision", - path="report.release_decision", - why=( - "Evidence coverage is below threshold; read " - "release_decision.reason and evidence_coverage." - ), - ) - - if action_surface_changes > 0: - return ReviewerSurfacePointer( - kind="lens", - name="action_surface_diff", - path="report.action_surface_diff", - why=( - "Action-surface diff has structural changes " - "(scope, effect, approval, or safeguard); read this lens " - "first to see what the agent can now do." - ), - ) - - if baseline_integrity_issues > 0: - return ReviewerSurfacePointer( - kind="audit", - name="baseline_integrity", - path="report.findings[]", - why=( - "Baseline integrity findings were emitted; filter " - "findings[] by SHIP-BASELINE-* check_id and inspect the " - "baseline-audit.log alongside the baseline file." - ), - ) - - if severity_overrides_tier_crossed > 0: - return ReviewerSurfacePointer( - kind="audit", - name="policy_audit", - path="report.policy_audit.severity_overrides_applied", - why=( - "One or more severity overrides crossed a tier boundary " - "with explicit acknowledgement; review the entries to " - "confirm the downgrade is still appropriate." - ), - ) - - if capability_misalignments > 0: - return ReviewerSurfacePointer( - kind="lens", - name="capability_intent_diff", - path="report.misalignments", - why=( - "The capability/intent diff surfaced misalignments " - "between declared agent purpose and observed tool " - "surface; read misalignments[] for the specifics." - ), - ) - - if tool_surface_changes > 0: - return ReviewerSurfacePointer( - kind="lens", - name="tool_surface_diff", - path="report.tool_surface_diff", - why=( - "Tool-surface diff has structural changes (tools, " - "scopes, controls, or policies); read this lens to see " - "the registry-level delta." - ), - ) - - if privacy_redactions > 0: - return ReviewerSurfacePointer( - kind="audit", - name="privacy_audit", - path="report.privacy_audit.redacted_paths", - why=( - "Sensitive values were redacted from the public outputs; " - "confirm the redactions match expectations and that no " - "secret should have been there in the first place." - ), - ) - - if evidence_matrix_gaps > 0: - return ReviewerSurfacePointer( - kind="evidence_matrix", - name="evidence_matrix", - path="packet.evidence_matrix.rows", - why=( - "Evidence-matrix rows report coverage gaps " - "(missing/not_declared); open the Release Evidence " - "Packet to see which domains lack reviewer-visible " - "evidence." - ), - ) - - # Low-priority fallthrough: same-tier severity overrides and - # upgrades. Without this branch, a manifest with a single - # medium → low override would produce a non-zero - # ``severity_overrides_applied`` counter AND a non-zero - # ``audit_total`` in the headline, but a ``null`` pointer — - # contradicting the contract that ``null`` means a fully clean - # scan. The tier-crossed case above handles the higher-attention - # subset; this branch covers the rest. - if severity_overrides_applied > 0: - return ReviewerSurfacePointer( - kind="audit", - name="policy_audit", - path="report.policy_audit.severity_overrides_applied", - why=( - "Severity overrides are applied (same-tier or upgrade); " - "review the policy_audit entries to confirm the overrides " - "match reviewer intent." - ), - ) - - # Final fallthrough: review_required verdict with no specific - # lens/audit signals. Source warnings (e.g., duplicate tool names, - # unresolvable imports) or an evidence gap can force - # decision=review_required even when findings=0 and all reviewer - # counters are zero. Without this branch, such scans would emit - # first_recommended_surface=null despite a non-passed verdict, - # contradicting the contract that null means passed + all-zero. - if verdict == "review_required": - return ReviewerSurfacePointer( - kind="release_decision", - name="release_decision", - path="report.release_decision", - why=( - "Review is required (source warnings or evidence gap " - "without specific lens/audit signals); read " - "release_decision.reason for details." - ), - ) - - return None - - -def build_reviewer_summary( - *, - findings: list[Finding], - report: ReadinessReport, -) -> ReviewerSummary: - """Construct the top-level ``reviewer_summary`` block. - - Deterministic projection of the reviewer lens surfaces and audit - envelopes on ``report``. Parallels ``build_agent_summary`` but for - the audit/lens dimensions: a reviewer who wants headline activity - counts and a recommended starting surface reads this block instead - of opening every lens and audit envelope. - - ``findings`` is passed separately (not derived from - ``report.findings``) so we can run on the same active-findings list - callers have already filtered/annotated — same pattern as - ``build_agent_summary``. - """ - tool_surface_changes = _tool_surface_changes(report) - action_surface_changes = _action_surface_changes(report) - capability_misalignments = _capability_misalignment_count(report) - evidence_matrix_gaps = _evidence_matrix_gap_count(report) - severity_overrides_applied, severity_overrides_tier_crossed = ( - _severity_override_counts(report.policy_audit) - ) - privacy_redactions = _privacy_redaction_count(report.privacy_audit) - baseline_integrity_issues = _baseline_integrity_issue_count(findings) - - lens_total = ( - tool_surface_changes - + action_surface_changes - + capability_misalignments - + evidence_matrix_gaps - ) - audit_total = ( - severity_overrides_applied - + privacy_redactions - + baseline_integrity_issues - ) - - verdict = ( - report.release_decision.decision if report.release_decision else "passed" - ) - - pointer = _pick_first_recommended_surface( - release_decision=report.release_decision, - action_surface_changes=action_surface_changes, - baseline_integrity_issues=baseline_integrity_issues, - severity_overrides_tier_crossed=severity_overrides_tier_crossed, - severity_overrides_applied=severity_overrides_applied, - capability_misalignments=capability_misalignments, - tool_surface_changes=tool_surface_changes, - privacy_redactions=privacy_redactions, - evidence_matrix_gaps=evidence_matrix_gaps, - ) - headline = _reviewer_headline( - verdict=verdict, - lens_total=lens_total, - audit_total=audit_total, - pointer=pointer, - ) - - return ReviewerSummary( - verdict=verdict, - headline=headline, - tool_surface_changes=tool_surface_changes, - capability_misalignments=capability_misalignments, - action_surface_changes=action_surface_changes, - evidence_matrix_gaps=evidence_matrix_gaps, - severity_overrides_applied=severity_overrides_applied, - severity_overrides_tier_crossed=severity_overrides_tier_crossed, - privacy_redactions=privacy_redactions, - baseline_integrity_issues=baseline_integrity_issues, - first_recommended_surface=pointer, - ) - - -def _derive_from_patches(patches: list) -> tuple[bool, bool, str]: - """Strict derivation: ``autofix_safe`` is True only when EVERY - emitted patch is non-manual AND high-confidence. Mixed states fall - to safe-closed.""" - if not patches: - return (False, True, "none") - - has_manual = any(isinstance(p, ManualPatch) for p in patches) - non_manual = [p for p in patches if not isinstance(p, ManualPatch)] - all_high_confidence_non_manual = ( - not has_manual - and bool(non_manual) - and all(getattr(p, "confidence", None) == "high" for p in non_manual) - ) - - # Per the plan §2 derivation rule: kind of the FIRST non-manual - # patch takes priority (even when ManualPatches are also present). - # All-manual → "manual". Empty list → "none" (handled above). - if non_manual: - suggested_patch_kind = non_manual[0].kind - else: - suggested_patch_kind = "manual" - - autofix_safe = all_high_confidence_non_manual - requires_human_review = not autofix_safe - return (autofix_safe, requires_human_review, suggested_patch_kind) - - -def summarize_findings(findings: list[Finding], tools: list[Tool]) -> ReportSummary: - active = [finding for finding in findings if not finding.suppressed] - counts = Counter(finding.severity for finding in active) - suppressed_count = len(findings) - len(active) - if counts["critical"] > 0: - status = "release_blockers_detected" - elif active: - status = "warnings_detected" - elif any(tool.extraction_confidence != "high" for tool in tools): - status = "human_review_recommended" - else: - status = "no_release_blockers_detected" - return ReportSummary( - status=status, - critical_count=counts["critical"], - high_count=counts["high"], - medium_count=counts["medium"], - low_count=counts["low"], - info_count=counts["info"], - suppressed_count=suppressed_count, - human_review_recommended=counts["critical"] > 0 or counts["high"] > 0 or status == "human_review_recommended", - evidence_coverage="mixed" if _has_mixed_evidence(tools) else "static", - ) - - -def summarize_tool_surface(tools: list[Tool]) -> ToolSurfaceSummary: - sources = Counter(tool.source_type for tool in tools) - return ToolSurfaceSummary( - total_tools=len(tools), - high_risk_tools=sum(1 for tool in tools if is_high_risk_tool(tool)), - sources=dict(sorted(sources.items())), - wildcard_tools=sum(1 for tool in tools if tool.annotations.get("wildcard_tools") is True), - missing_descriptions=sum(1 for tool in tools if not (tool.description or "").strip()), - ) - - -def recommended_actions(findings: list[Finding]) -> list[str]: - active = sorted( - [finding for finding in findings if not finding.suppressed], - key=lambda finding: (SEVERITY_ORDER[finding.severity], finding.check_id), - ) - actions: list[str] = [] - seen: set[str] = set() - for finding in active: - if finding.recommendation in seen: - continue - actions.append(finding.recommendation) - seen.add(finding.recommendation) - if len(actions) >= 8: - break - return actions - - -def tool_inventory(tools: list[Tool]) -> list[dict[str, object]]: - # v0.19 reviewer-grade provenance: ``source_path`` / ``source_start_line`` - # are additive optional keys per row. Post-scan renderers - # (scenario YAML, downstream consumers reading ``report.json``) - # use this lookup to cite ``path:line`` for tools touched by a - # finding without re-parsing the artifact. Older consumers ignore - # the new keys; new consumers can require them for high-risk tools. - return [ - { - "name": tool.name, - "source_type": tool.source_type, - "source_ref": tool.source_ref, - "source_path": tool.source_path, - "source_start_line": tool.source_start_line, - "source_pointer": tool.source_pointer, - "risk_tags": risk_tags(tool, min_confidence="medium"), - "risk_tag_confidence": _risk_tag_confidence(tool, min_confidence="medium"), - "auth_scopes": tool.auth.scopes, - "owner": tool.owner, - "confidence": tool.extraction_confidence, - } - for tool in sorted(tools, key=lambda item: item.name) - ] - - -def build_report( - *, - run_id: str, - manifest: AgentsShipgateManifest, - project: dict[str, object] | None = None, - agent: dict[str, object], - environment: dict[str, object], - tools: list[Tool], - findings: list[Finding], - generated_reports: dict[str, str], - ci_mode: str, - fail_on: list[Severity] | None = None, - new_findings_only: bool = False, - loaded_policy_packs: list[LoadedPolicyPack] | None = None, - loaded_plugins: list[dict[str, object]] | None = None, - loaded_adapters: list[dict[str, object]] | None = None, - source_warnings: list[str] | None = None, - api_surface: dict[str, object] | None = None, - anthropic_surface: dict[str, object] | None = None, - frameworks: dict[str, object] | None = None, - codex_plugin_surface: CodexPluginSurface | None = None, - baseline: BaselineSummary | None = None, - manifest_dir: str | None = None, - tool_surface_facts: ToolSurfaceFacts | None = None, - tool_surface_diff: ToolSurfaceDiff | None = None, - action_surface_facts: ActionSurfaceFacts | None = None, - action_surface_diff: ActionSurfaceDiff | None = None, - policy_audit: PolicyAudit | None = None, - privacy_audit: PrivacyAudit | None = None, -) -> ReadinessReport: - report = ReadinessReport( - run_id=run_id, - manifest_dir=manifest_dir, - project=project or manifest.project.model_dump(exclude_none=True), - agent=agent, - environment=environment, - summary=summarize_findings(findings, tools), - tool_surface=summarize_tool_surface(tools), - tool_surface_facts=tool_surface_facts or ToolSurfaceFacts(), - tool_surface_diff=tool_surface_diff or ToolSurfaceDiff(), - action_surface_facts=action_surface_facts or ActionSurfaceFacts(), - action_surface_diff=action_surface_diff or ActionSurfaceDiff(), - api_surface=api_surface, - anthropic_surface=anthropic_surface, - frameworks=frameworks or {}, - codex_plugin_surface=codex_plugin_surface, - baseline=baseline, - findings=findings, - recommended_actions=recommended_actions(findings), - generated_reports=generated_reports, - loaded_policy_packs=loaded_policy_packs or [], - loaded_plugins=loaded_plugins or [], - loaded_adapters=loaded_adapters or [], - tool_inventory=tool_inventory(tools), - source_warnings=source_warnings or [], - # v0.17 (M1): policy audit envelope. Always present on emitted - # scans (empty when no overrides applied) so consumers can read - # ``report.policy_audit.severity_overrides_applied`` without a - # null check. - policy_audit=policy_audit or PolicyAudit(), - privacy_audit=privacy_audit, - ) - report.release_decision = build_release_decision( - report=report, - tools=tools, - ci_mode=ci_mode, - fail_on=fail_on, - new_findings_only=new_findings_only, - ) - # v0.12: agent_summary is the deterministic projection of - # release_decision + per-finding agent_action. Built last so it - # picks up everything else. The JSON report path is threaded in - # so first_recommended_action.command names the real on-disk - # path the user just wrote (not the default — see #57 review P1.1). - report.agent_summary = build_agent_summary( - findings=findings, - release_decision=report.release_decision, - json_report_path=generated_reports.get("json"), - ) - # v0.20 NOTE: ``report.reviewer_summary`` is NOT built here. It - # depends on ``report.misalignments`` which ``apply_capability_diff`` - # populates AFTER ``build_report`` returns (see cli/scan.py). Building - # it here would project from incomplete state — ``capability_misalignments`` - # would always be 0 even on reports that later carry dozens of - # misalignments. The scan pipeline calls ``build_reviewer_summary`` - # post-capability-diff so the projection sees the final report state. - # Test fixtures that need a populated ``reviewer_summary`` should - # also call ``build_reviewer_summary`` after they finish assembling - # the report. - return report - - -def _matching_suppression( - finding: Finding, suppressions: list[SuppressionConfig] -) -> SuppressionConfig | None: - for suppression in suppressions: - if not expands_to_check_id(suppression.check_id, finding.check_id): - continue - if not suppression.tool: - return suppression - possible_tools = { - finding.tool_name, - finding.tool_id, - finding.tool_id.replace("tool:", "") if finding.tool_id else None, - } - if suppression.tool in possible_tools: - return suppression - return None - - -def _severity_override_for_check( - check_id: str, overrides: dict[str, Severity] -) -> Severity | None: - if override := overrides.get(check_id): - return override - for configured_check_id, override in overrides.items(): - if expands_to_check_id(configured_check_id, check_id): - return override - return None - - -def finding_fingerprint(finding: Finding) -> str: - identity = { - "check_id": finding.check_id, - "tool_name": finding.tool_name, - "evidence": _canonicalize_for_fingerprint(finding.evidence), - } - digest = hashlib.sha256( - json.dumps(identity, sort_keys=True, default=str).encode("utf-8") - ).hexdigest()[:16] - return f"fp_{digest}" - - -def _canonicalize_for_fingerprint(value): - if isinstance(value, dict): - return { - key: _canonicalize_for_fingerprint(value[key]) - for key in sorted(value) - if key not in FINGERPRINT_EXCLUDED_EVIDENCE_KEYS - } - if isinstance(value, list): - items = [_canonicalize_for_fingerprint(item) for item in value] - return sorted( - items, - key=lambda item: json.dumps(item, sort_keys=True, default=str), - ) - if isinstance(value, tuple | set): - return _canonicalize_for_fingerprint(list(value)) - return value - - -def _collision_discriminator(finding: Finding) -> str: - identity = { - "agent_id": finding.agent_id, - "category": finding.category, - "check_id": finding.check_id, - "confidence": finding.confidence, - "recommendation": finding.recommendation, - "source": finding.source.model_dump(mode="json") if finding.source else None, - "title": finding.title, - "tool_id": finding.tool_id, - "tool_name": finding.tool_name, - } - digest = hashlib.sha256( - json.dumps( - _canonicalize_for_fingerprint(identity), - sort_keys=True, - default=str, - ).encode("utf-8") - ).hexdigest()[:8] - return digest - - -def _risk_tag_confidence(tool: Tool, min_confidence: str) -> dict[str, str]: - threshold = confidence_rank(min_confidence) - by_tag: dict[str, str] = {} - for hint in tool.risk_hints: - if confidence_rank(hint.confidence) < threshold: - continue - current = by_tag.get(hint.tag) - if current is None or confidence_rank(hint.confidence) > confidence_rank(current): - by_tag[hint.tag] = hint.confidence - return dict(sorted(by_tag.items())) - - -def _has_mixed_evidence(tools: list[Tool]) -> bool: - return any( - tool.source_type == "sdk_function" or tool.extraction_confidence != "high" - for tool in tools - ) diff --git a/src/agents_shipgate/core/findings/__init__.py b/src/agents_shipgate/core/findings/__init__.py new file mode 100644 index 00000000..ad946ebf --- /dev/null +++ b/src/agents_shipgate/core/findings/__init__.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from .agent_summary import ( + _build_first_recommended_action, + _top_active_finding, + build_agent_summary, +) +from .constants import FINGERPRINT_EXCLUDED_EVIDENCE_KEYS, SEVERITY_ORDER +from .identity import ( + _canonicalize_for_fingerprint, + _collision_discriminator, + assign_finding_ids, + dedupe_findings, + finding_fingerprint, +) +from .mutations import ( + _matching_suppression, + _severity_override_for_check, + apply_severity_overrides, + apply_suppressions, +) +from .remediation import ( + _REMEDIATION_FALLBACK, + _derive_from_patches, + annotate_remediation, + derive_agent_action, +) +from .report_builder import build_report +from .reviewer_summary import ( + _action_surface_changes, + _baseline_integrity_issue_count, + _capability_misalignment_count, + _evidence_matrix_gap_count, + _pick_first_recommended_surface, + _privacy_redaction_count, + _reviewer_headline, + _severity_override_counts, + _tool_surface_changes, + build_reviewer_summary, +) +from .summaries import ( + _has_mixed_evidence, + _risk_tag_confidence, + recommended_actions, + summarize_findings, + summarize_tool_surface, + tool_inventory, +) + +__all__ = [ + "FINGERPRINT_EXCLUDED_EVIDENCE_KEYS", + "SEVERITY_ORDER", + "_REMEDIATION_FALLBACK", + "_action_surface_changes", + "_baseline_integrity_issue_count", + "_build_first_recommended_action", + "_canonicalize_for_fingerprint", + "_capability_misalignment_count", + "_collision_discriminator", + "_derive_from_patches", + "_evidence_matrix_gap_count", + "_has_mixed_evidence", + "_matching_suppression", + "_pick_first_recommended_surface", + "_privacy_redaction_count", + "_reviewer_headline", + "_risk_tag_confidence", + "_severity_override_counts", + "_severity_override_for_check", + "_tool_surface_changes", + "_top_active_finding", + "annotate_remediation", + "apply_severity_overrides", + "apply_suppressions", + "assign_finding_ids", + "build_agent_summary", + "build_report", + "build_reviewer_summary", + "dedupe_findings", + "derive_agent_action", + "finding_fingerprint", + "recommended_actions", + "summarize_findings", + "summarize_tool_surface", + "tool_inventory", +] diff --git a/src/agents_shipgate/core/findings/agent_summary.py b/src/agents_shipgate/core/findings/agent_summary.py new file mode 100644 index 00000000..7877bfa7 --- /dev/null +++ b/src/agents_shipgate/core/findings/agent_summary.py @@ -0,0 +1,403 @@ +from __future__ import annotations + +import shlex + +from agents_shipgate.schemas.report import ( + AgentSummary, + AgentSummaryAction, + Finding, + ReleaseDecision, +) + +from .constants import SEVERITY_ORDER + + +def build_agent_summary( + *, + findings: list[Finding], + release_decision: ReleaseDecision | None, + json_report_path: str | None = None, +) -> AgentSummary: + """Construct the top-level ``agent_summary`` block. + + Deterministic projection of ``release_decision`` plus the + per-finding ``agent_action`` values. Surfaces the same numbers a + coding agent would otherwise compute by traversing arrays — same + inputs, same output, no agent-side aggregation needed. + + ``json_report_path`` is the actual on-disk path of the emitted JSON + report (from ``ReadinessReport.generated_reports['json']``). It is + threaded in so ``first_recommended_action.command`` can name the + real path the user just wrote — not the default. When the scan ran + without JSON output (no path available), the action falls back to + ``kind: "info"`` with a parameterised hint instead of a command, + so we never emit an apply-patches invocation pointing at a file + that doesn't exist or — worse — at a stale default-path report + from a previous run. + """ + if release_decision is None: + verdict: str = "passed" + blocker_count = 0 + review_item_count = 0 + reason = "No release decision computed." + evidence_recommended = False + else: + verdict = release_decision.decision + blocker_count = len(release_decision.blockers) + review_item_count = len(release_decision.review_items) + reason = (release_decision.reason or "").strip() + # `evidence_coverage.human_review_recommended` is the + # release-decision signal that says "this is review_required + # because the scan saw only low-confidence/static evidence, + # not because any specific finding needs fixing." In that + # case we want to surface the evidence-coverage reason + # (rather than the unhelpful "0 review items flagged" text) + # and route the agent toward gathering better evidence + # (#57 review P2: evidence-only review_required). + # + # v0.14 also routes source_warning_count > 0 to review_required + # via an explicit branch in build_release_decision() + # (summarize_findings() doesn't fold source warnings into + # human_review_recommended, so without including them here a + # source-warning-only scan would render as "0 review item(s) + # flagged" with no first_recommended_action — losing the + # release_decision.reason that has the only useful context). + evidence_recommended = bool( + release_decision.evidence_coverage + and ( + release_decision.evidence_coverage.human_review_recommended + or release_decision.evidence_coverage.source_warning_count > 0 + ) + ) + + active_findings = [f for f in findings if not f.suppressed] + auto_appliable = sum( + 1 for f in active_findings if f.agent_action == "auto_apply" + ) + # `needs_human_review` covers every active finding the user has to + # weigh in on before release: full escalations (no machine path) + # PLUS proposed patches that ship at medium/low confidence and + # require an explicit `--apply` after the user reviews the diff. + # Earlier this counted only `escalate_to_human`, which silently + # under-counted propose_patch_for_review findings — release_decision + # already routes both into review_items, so the agent_summary + # number must agree (#57 review P1). + needs_review = sum( + 1 + for f in active_findings + if f.agent_action in {"escalate_to_human", "propose_patch_for_review"} + ) + + # Headline: short, one-sentence statement that names the verdict + # and the action-driven counts. The two populations differ: + # `review_item_count` mirrors `release_decision.review_items` + # (severity-driven; can include medium-severity auto_apply + # findings), while `needs_human_review` counts only findings whose + # `agent_action` requires human input. The headline uses + # `needs_human_review` for the "require human review" wording so a + # review_required verdict with only auto-applicable findings reads + # honestly as "auto-applicable; none require human input" instead + # of falsely claiming N findings need review. + # `release_decision.reason` is severity-driven and can contradict + # an action-driven headline (e.g. when only-auto-applicable + # findings are flagged for release review, the reason often reads + # "1 finding requires human review" — the opposite of what + # agent_summary needs to say). We therefore skip the reason append + # in branches where the headline already explains the agent-level + # situation in agent-driven terms; we keep the append in branches + # where the reason adds non-overlapping context (like blocker + # counts). + append_reason = True + if verdict == "blocked": + headline = ( + f"{blocker_count} active finding(s) block release" + + ( + f"; {review_item_count} review item(s) accepted as debt." + if review_item_count + else "." + ) + ) + elif verdict == "review_required": + if needs_review > 0: + head = f"{needs_review} finding(s) require human review" + if auto_appliable > 0: + head += f"; {auto_appliable} also auto-applicable" + headline = head + "." + elif auto_appliable > 0 and evidence_recommended: + # Mixed case: every flagged finding is auto-applicable + # *but* evidence coverage is incomplete (low-confidence + # tools or source warnings tipped review_required). Saying + # "none require human input beyond apply-patches" would + # silently drop the evidence-review requirement that the + # release_decision.reason explicitly calls out. Surface + # both so the agent applies the patches AND asks the + # human to review the evidence gap. + evidence_clause = reason or ( + "evidence coverage is incomplete and should be reviewed " + "before shipping" + ) + headline = ( + f"{auto_appliable} auto-applicable finding(s) flagged for " + f"release review; {evidence_clause}" + ) + if not headline.endswith("."): + headline += "." + append_reason = False # already in headline + elif auto_appliable > 0: + headline = ( + f"{auto_appliable} auto-applicable finding(s) flagged for " + "release review; none require human input beyond apply-patches." + ) + # Suppress the severity-driven reason here. release_decision + # likely says something like "N finding(s) require human + # review" — appending it would directly contradict the + # action-driven headline (#57 review P1). + append_reason = False + elif evidence_recommended: + # Evidence-coverage-driven review: no actionable findings, + # but the scan saw only low-confidence/static evidence and + # the release_decision wants a human to weigh in. Surface + # the reason directly — it carries the only useful + # explanation. Falling back to "0 review items flagged" + # would lose the most important context (#57 review P2). + headline = ( + reason + if reason + else "Human review recommended: low-confidence evidence." + ) + append_reason = False # already in headline + else: + # Even rarer fallback: review_required without any of the + # above signals. Surface review_item_count so the + # headline isn't a self-contradiction. + headline = ( + f"{review_item_count} review item(s) flagged for release review." + ) + append_reason = False + if blocker_count: + headline += f" ({blocker_count} blocker(s) detected.)" + elif verdict == "insufficient_evidence": + # No specific finding to surface — by definition the issue is + # evidence quality, not findings. Surface the release_decision + # reason verbatim; it already names the counts and explains why + # the scan can't gate release. Falling through to the "Release + # ready" branch would lie about a degraded scan. + headline = ( + reason + if reason + else "Evidence coverage below threshold; scan results not trustworthy enough to gate release." + ) + append_reason = False + if blocker_count: + headline += f" ({blocker_count} blocker(s) detected.)" + else: + headline = ( + "Release ready" + + ( + f" ({review_item_count} review item(s) accepted as debt)." + if review_item_count + else "." + ) + ) + if append_reason and reason and len(headline) + len(reason) + 4 < 240: + headline = f"{headline} {reason}" if reason.endswith(".") else f"{headline} {reason}." + + first_action = _build_first_recommended_action( + verdict=verdict, + auto_appliable=auto_appliable, + needs_review=needs_review, + review_item_count=review_item_count, + active_findings=active_findings, + json_report_path=json_report_path, + evidence_recommended=evidence_recommended, + evidence_reason=( + reason + if (evidence_recommended or verdict == "insufficient_evidence") + else "" + ), + ) + + return AgentSummary( + verdict=verdict, # type: ignore[arg-type] + headline=headline, + blocker_count=blocker_count, + review_item_count=review_item_count, + auto_appliable_patches=auto_appliable, + needs_human_review=needs_review, + first_recommended_action=first_action, + ) + + +def _build_first_recommended_action( + *, + verdict: str, + auto_appliable: int, + needs_review: int, + review_item_count: int, + active_findings: list[Finding], + json_report_path: str | None, + evidence_recommended: bool = False, + evidence_reason: str = "", +) -> AgentSummaryAction | None: + """Deterministic next-step picker for ``agent_summary``. + + Order (highest impact first): + 1. Verdict is insufficient_evidence → emit an info action that + surfaces the evidence reason and recommends gathering deeper + sources (MCP, OpenAPI inputs, eval traces). Checked before + auto-apply because applying patches does NOT clear an evidence + verdict — the scan results are not trustworthy enough to gate + release, and running apply-patches first would contradict the + headline. Tell the agent to fix the trust problem before + cleaning up findings. + 2. Auto-applicable patches available → propose ``apply-patches``, + but only as a ``command`` action when we know the actual JSON + report path (so the command never points at the wrong file). + Otherwise emit ``kind: "info"`` with a parameterised hint. + 3. Verdict is blocked → surface the top blocker for review. + 4. Verdict is review_required → walk the top review item. + 5. Verdict is passed → no action (None). + """ + if verdict == "insufficient_evidence": + base = ( + evidence_reason + or "Evidence coverage below threshold; scan results are not " + "trustworthy enough to gate release." + ) + return AgentSummaryAction( + kind="info", + command=None, + why=( + f"{base} Surface this to the user and gather deeper " + "evidence (e.g. MCP/OpenAPI inputs, eval traces, " + "additional source files) before re-running the scan; " + "applying patches does not clear an evidence verdict, " + "so no machine-applicable fix is available." + ), + ) + + if auto_appliable > 0: + why = ( + f"{auto_appliable} finding(s) carry high-confidence patches " + "safe to apply without human review." + ) + if verdict == "review_required" and evidence_recommended: + # The patches are still worth applying (the scan IS + # trustworthy enough to gate at review_required, unlike + # the insufficient_evidence path that outranks auto-apply + # entirely). But the action's why must call out the + # evidence gap so the agent doesn't treat apply-patches + # as the *only* next step — the human still needs to + # review the source warnings / low-confidence tools. + evidence_note = evidence_reason or ( + "Evidence coverage is incomplete (source warnings or " + "low-confidence tools); review before shipping." + ) + why = ( + f"{why} Note: {evidence_note} Applying patches does not " + "address the evidence gap." + ) + if json_report_path: + # shlex.quote so paths with spaces (e.g. macOS + # "/Users/.../My Project/agents-shipgate-reports/report.json") + # round-trip through shlex.split unchanged. Without the + # quote, the advertised command splits at the spaces and + # apply-patches receives garbage --from arguments + # (#57 review P2). + quoted_path = shlex.quote(json_report_path) + return AgentSummaryAction( + kind="command", + command=( + f"agents-shipgate apply-patches --from " + f"{quoted_path} --confidence high --apply" + ), + why=why, + ) + # No JSON output on this scan: emit an info action that names + # the canonical pattern so the agent runs apply-patches against + # *their* report, not the default path. The user-facing reports + # path is stable enough (`agents-shipgate-reports/report.json` + # is the default) that we mention it in the why-text, but as + # documentation, not a literal command the agent might dispatch. + return AgentSummaryAction( + kind="info", + command=None, + why=( + f"{why} Re-run the scan with --format json (default path " + "is agents-shipgate-reports/report.json), then: " + "agents-shipgate apply-patches --from " + "--confidence high --apply." + ), + ) + + if verdict == "blocked": + top = _top_active_finding(active_findings) + if top is None: + return None + return AgentSummaryAction( + kind="info", + command=None, + why=( + f"Surface {top.check_id} on {top.tool_name or 'agent'} to " + "the user; release is blocked and no auto-applicable patch " + "is available." + ), + ) + + if verdict == "review_required": + # Evidence-coverage-driven review: no specific finding to walk; + # the release_decision is asking for human attention because + # the scan saw only low-confidence/static evidence. Return an + # info action that names the situation so first_recommended_action + # is non-null and useful in this case (#57 review P2). + if ( + evidence_recommended + and needs_review == 0 + and auto_appliable == 0 + ): + base = ( + evidence_reason + or "Static-only scan with low-confidence evidence; " + "human review recommended." + ) + return AgentSummaryAction( + kind="info", + command=None, + why=( + f"{base} Surface this to the user and discuss whether " + "to gather better evidence (e.g. add MCP/OpenAPI " + "inputs, eval traces) or accept the static-only " + "review posture; no machine-applicable fix is " + "available." + ), + ) + + top = _top_active_finding(active_findings) + if top is None: + return None + # Prefer the action-driven count when there are findings that + # need human input. Fall back to the severity-driven + # review_item_count when needs_review is 0 — otherwise the + # text would read "Walk the 0 review item(s)" even though the + # release decision has flagged something for review. + visible = needs_review if needs_review > 0 else review_item_count + return AgentSummaryAction( + kind="info", + command=None, + why=( + f"Walk the {visible} review item(s) starting with " + f"{top.check_id}; release is allowed but the human " + "reviewer should weigh in." + ), + ) + + return None + + +def _top_active_finding(findings: list[Finding]) -> Finding | None: + """Pick the highest-severity active finding (ties broken by check_id).""" + if not findings: + return None + return min( + findings, key=lambda f: (SEVERITY_ORDER.get(f.severity, 99), f.check_id) + ) diff --git a/src/agents_shipgate/core/findings/constants.py b/src/agents_shipgate/core/findings/constants.py new file mode 100644 index 00000000..752e3434 --- /dev/null +++ b/src/agents_shipgate/core/findings/constants.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4} +FINGERPRINT_EXCLUDED_EVIDENCE_KEYS = { + "default_severity", + "observed", + "source_provenance", +} diff --git a/src/agents_shipgate/core/findings/identity.py b/src/agents_shipgate/core/findings/identity.py new file mode 100644 index 00000000..ae6fb75b --- /dev/null +++ b/src/agents_shipgate/core/findings/identity.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import hashlib +import json +from collections import defaultdict + +from agents_shipgate.schemas.report import Finding + +from .constants import FINGERPRINT_EXCLUDED_EVIDENCE_KEYS + + +def assign_finding_ids(findings: list[Finding]) -> list[Finding]: + by_fingerprint: dict[str, list[Finding]] = defaultdict(list) + for finding in findings: + finding.fingerprint = finding_fingerprint(finding) + by_fingerprint[finding.fingerprint].append(finding) + used_ids: dict[str, int] = defaultdict(int) + for finding in findings: + assert finding.fingerprint is not None + if len(by_fingerprint[finding.fingerprint]) == 1: + candidate = finding.fingerprint + else: + candidate = f"{finding.fingerprint}_{_collision_discriminator(finding)}" + used_ids[candidate] += 1 + finding.id = ( + candidate + if used_ids[candidate] == 1 + else f"{candidate}_{used_ids[candidate]}" + ) + return findings + + +def dedupe_findings(findings: list[Finding]) -> list[Finding]: + seen: set[tuple[str, str, str, str, str, str]] = set() + deduped: list[Finding] = [] + for finding in findings: + evidence_key = json.dumps( + _canonicalize_for_fingerprint(finding.evidence), + sort_keys=True, + default=str, + ) + source_key = json.dumps( + finding.source.model_dump(mode="json") if finding.source else None, + sort_keys=True, + default=str, + ) + key = ( + finding.check_id, + # Title is intentionally part of local de-dupe identity. Some + # checks share structured evidence across distinct user-visible + # targets, and the interpolated title is the only stable context + # that keeps those findings separate before IDs are assigned. + finding.title, + finding.tool_id or "", + finding.tool_name or "", + evidence_key, + source_key, + ) + if key in seen: + continue + seen.add(key) + deduped.append(finding) + return deduped + + +def finding_fingerprint(finding: Finding) -> str: + identity = { + "check_id": finding.check_id, + "tool_name": finding.tool_name, + "evidence": _canonicalize_for_fingerprint(finding.evidence), + } + digest = hashlib.sha256( + json.dumps(identity, sort_keys=True, default=str).encode("utf-8") + ).hexdigest()[:16] + return f"fp_{digest}" + + +def _canonicalize_for_fingerprint(value): + if isinstance(value, dict): + return { + key: _canonicalize_for_fingerprint(value[key]) + for key in sorted(value) + if key not in FINGERPRINT_EXCLUDED_EVIDENCE_KEYS + } + if isinstance(value, list): + items = [_canonicalize_for_fingerprint(item) for item in value] + return sorted( + items, + key=lambda item: json.dumps(item, sort_keys=True, default=str), + ) + if isinstance(value, tuple | set): + return _canonicalize_for_fingerprint(list(value)) + return value + + +def _collision_discriminator(finding: Finding) -> str: + identity = { + "agent_id": finding.agent_id, + "category": finding.category, + "check_id": finding.check_id, + "confidence": finding.confidence, + "recommendation": finding.recommendation, + "source": finding.source.model_dump(mode="json") if finding.source else None, + "title": finding.title, + "tool_id": finding.tool_id, + "tool_name": finding.tool_name, + } + digest = hashlib.sha256( + json.dumps( + _canonicalize_for_fingerprint(identity), + sort_keys=True, + default=str, + ).encode("utf-8") + ).hexdigest()[:8] + return digest diff --git a/src/agents_shipgate/core/findings/mutations.py b/src/agents_shipgate/core/findings/mutations.py new file mode 100644 index 00000000..844e53b0 --- /dev/null +++ b/src/agents_shipgate/core/findings/mutations.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from agents_shipgate.core.check_ids import expands_to_check_id +from agents_shipgate.schemas.common import Severity +from agents_shipgate.schemas.manifest import SuppressionConfig +from agents_shipgate.schemas.report import Finding + + +def apply_suppressions( + findings: list[Finding], suppressions: list[SuppressionConfig] +) -> list[Finding]: + for finding in findings: + match = _matching_suppression(finding, suppressions) + if match: + finding.suppressed = True + finding.suppression_reason = match.reason + return findings + + +def apply_severity_overrides( + findings: list[Finding], overrides: dict[str, Severity] +) -> list[Finding]: + for finding in findings: + override = _severity_override_for_check(finding.check_id, overrides) + if override: + # Keep this audit field out of fingerprinting so overrides can be + # applied before or after ID assignment without changing identity. + finding.evidence.setdefault("default_severity", finding.severity) + finding.severity = override + return findings + + +def _matching_suppression( + finding: Finding, suppressions: list[SuppressionConfig] +) -> SuppressionConfig | None: + for suppression in suppressions: + if not expands_to_check_id(suppression.check_id, finding.check_id): + continue + if not suppression.tool: + return suppression + possible_tools = { + finding.tool_name, + finding.tool_id, + finding.tool_id.replace("tool:", "") if finding.tool_id else None, + } + if suppression.tool in possible_tools: + return suppression + return None + + +def _severity_override_for_check( + check_id: str, overrides: dict[str, Severity] +) -> Severity | None: + if override := overrides.get(check_id): + return override + for configured_check_id, override in overrides.items(): + if expands_to_check_id(configured_check_id, check_id): + return override + return None diff --git a/src/agents_shipgate/core/findings/remediation.py b/src/agents_shipgate/core/findings/remediation.py new file mode 100644 index 00000000..4de02a05 --- /dev/null +++ b/src/agents_shipgate/core/findings/remediation.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +from agents_shipgate.schemas.checks import CheckMetadata +from agents_shipgate.schemas.common import AgentAction +from agents_shipgate.schemas.patches import ManualPatch +from agents_shipgate.schemas.report import Finding + +# v0.7: safe-closed default for findings whose check_id isn't in the +# loaded catalog — policy-pack rules, third-party plugins, or any check +# emitted outside the built-in set. The static catalog is silent for +# these, so we default-close: human review required, no auto-fix kind +# claimed. +_REMEDIATION_FALLBACK = { + "autofix_safe": False, + "requires_human_review": True, + "suggested_patch_kind": "manual", + "docs_url": None, +} + +def annotate_remediation( + findings: list[Finding], + check_metadata_lookup: dict[str, CheckMetadata], +) -> list[Finding]: + """Populate the v0.7 per-finding remediation fields in place. + + Strict derivation policy: + + - When ``finding.patches`` is non-empty, the safety bools are derived + from the actual emitted patches: + * ``autofix_safe=True`` iff EVERY patch is non-manual AND has + ``confidence == "high"``. Mixed-state (e.g. one safe + one + manual, one high + one medium) → ``autofix_safe=False``. + * ``requires_human_review`` is the inverse of ``autofix_safe``. + * ``suggested_patch_kind`` = kind of the first non-manual patch, + or ``"manual"`` when all are manual, or ``"none"`` when the + list is empty. + - When ``finding.patches`` is None (scan ran without + ``--suggest-patches``), the safety bools and + ``suggested_patch_kind`` come from the matching ``CheckMetadata`` + entry, with the safe-closed fallback for unknown check IDs. + - ``docs_url`` is always sourced from CheckMetadata (or None for + unknown check IDs). Patches don't carry per-instance doc URLs. + + Caller (`scan.run_scan`) builds the metadata lookup from the + catalog with the scan's actual ``plugins_enabled`` setting, so this + function never triggers plugin loading at serialization time. + """ + for finding in findings: + meta = check_metadata_lookup.get(finding.check_id) + catalog_doc_url = meta.docs_url if meta is not None else None + + # Three states, treated distinctly: + # 1. `patches is None` → scan ran without --suggest-patches. + # Seed from CheckMetadata (or safe-closed fallback for + # unknown check IDs). + # 2. `patches == []` → scan ran WITH --suggest-patches but + # the generator emitted nothing for this finding. Treat as + # safe-closed with `suggested_patch_kind="none"` — falling + # back to the catalog would misleadingly report a patch + # kind that the report doesn't actually carry. + # 3. `patches` non-empty → derive from the actual patches + # via the strict rule below. + if finding.patches is None: + if meta is not None: + autofix_safe = meta.autofix_safe + requires_human_review = meta.requires_human_review + suggested_patch_kind = meta.suggested_patch_kind + else: + autofix_safe = bool(_REMEDIATION_FALLBACK["autofix_safe"]) + requires_human_review = bool( + _REMEDIATION_FALLBACK["requires_human_review"] + ) + suggested_patch_kind = str( + _REMEDIATION_FALLBACK["suggested_patch_kind"] + ) + else: + ( + autofix_safe, + requires_human_review, + suggested_patch_kind, + ) = _derive_from_patches(finding.patches) + + # Reviewer-grade escalation: when the catalog flags this check + # as requiring human review regardless of the per-patch state + # (approval/confirmation/idempotency, broad-scope, + # prohibited-action, runtime-trace, HITL evidence), force + # safe-closed values BEFORE assigning. Setting them here keeps + # `finding.autofix_safe`, `finding.requires_human_review`, and + # `finding.agent_action` (derived below) in agreement: the + # existing `auto_apply` early-return in `derive_agent_action` + # tests `finding.autofix_safe`, so flipping it to False naturally + # routes the verdict to `propose_patch_for_review` whenever + # patches are present, and to `escalate_to_human` otherwise. + if meta is not None and meta.requires_human_review_regardless_of_patch: + autofix_safe = False + requires_human_review = True + + finding.autofix_safe = autofix_safe + finding.requires_human_review = requires_human_review + finding.suggested_patch_kind = suggested_patch_kind + finding.docs_url = catalog_doc_url + finding.agent_action = derive_agent_action(finding) + # v0.14: ensure every emitted finding carries a real + # `provenance_kind`. Built-in checks set it via the required + # `tool_finding`/`agent_finding` kwarg. Third-party plugin + # checks may still construct `Finding(...)` directly without + # the field; coerce None → "static_declaration" so the wire + # schema's required + non-nullable enum is satisfied. Plugins + # that want a more accurate label should set the field + # themselves; this fallback is the conservative declarative + # label rather than a sentinel. + if finding.provenance_kind is None: + finding.provenance_kind = "static_declaration" + return findings + + +def derive_agent_action(finding: Finding) -> AgentAction: + """Project ``finding`` to a single ``AgentAction`` enum value. + + Deterministic projection of (``blocks_release``, ``patches``, + ``autofix_safe``, ``requires_human_review``). A release-blocking + finding always escalates to a human unless it is suppressed. + Order-invariant: the result depends on the SET of patches, not on + their list ordering. The first + non-manual patch's confidence drives the verdict, mirroring + :func:`_derive_from_patches` (which derives ``suggested_patch_kind`` + from the first non-manual patch). Earlier this function used + ``patches[0]`` directly, so a finding with + ``[ManualPatch, medium SetPointerPatch]`` mapped to + ``escalate_to_human`` while + ``[medium SetPointerPatch, ManualPatch]`` mapped to + ``propose_patch_for_review`` despite identical patch content + (#57 review P2). + + The strategy proposal in ``docs/agent-adoption-strategy.md`` §7 + G10 sketched an algorithm that ordered ``requires_human_review`` + before the medium/low confidence check, but that mapped non-manual + medium-confidence patches to ``escalate_to_human`` even though the + value's defined semantic ("no machine-applicable patch; needs + human judgment") excludes that case. We deviate by checking + confidence on the first non-manual patch BEFORE falling through + to escalate, keeping the value definitions consistent with the + projection. + + The ``suppress_with_reason`` value is reserved for future check + classes that explicitly mark themselves as suppressible. The + built-in projection does not emit it. + """ + if finding.suppressed: + return "informational" + if finding.blocks_release: + return "escalate_to_human" + + patches = finding.patches + + # No patch list (no --suggest-patches) or empty patch list: + # nothing machine-applicable. Route on the catalog flags. + if not patches: + if finding.requires_human_review: + return "escalate_to_human" + return "informational" + + # Pick the first non-manual patch (order-invariant: every patch + # generator produces a stable order, but the agent_action verdict + # should depend on the set, not on which manual patch happened to + # land first). All-manual lists fall through to escalate. + non_manual = [p for p in patches if p.kind != "manual"] + if not non_manual: + return "escalate_to_human" + + first = non_manual[0] + first_confidence = getattr(first, "confidence", None) + if first_confidence == "high" and finding.autofix_safe: + return "auto_apply" + + # Any non-manual patch with declared confidence (high, medium, or + # low) is machine-applicable, so the verdict is propose-for-review + # — including high-confidence patches in mixed lists where a + # ManualPatch sibling disqualified `autofix_safe`. The enum's + # `escalate_to_human` definition is "no machine-applicable patch", + # which doesn't fit this case; routing it to escalate would + # contradict the documented semantics (#57 review P3). + if first_confidence in {"high", "medium", "low"}: + return "propose_patch_for_review" + + # Rare: non-manual patch carries no confidence. Conservative escalate. + if finding.requires_human_review: + return "escalate_to_human" + return "informational" + + +def _derive_from_patches(patches: list) -> tuple[bool, bool, str]: + """Strict derivation: ``autofix_safe`` is True only when EVERY + emitted patch is non-manual AND high-confidence. Mixed states fall + to safe-closed.""" + if not patches: + return (False, True, "none") + + has_manual = any(isinstance(p, ManualPatch) for p in patches) + non_manual = [p for p in patches if not isinstance(p, ManualPatch)] + all_high_confidence_non_manual = ( + not has_manual + and bool(non_manual) + and all(getattr(p, "confidence", None) == "high" for p in non_manual) + ) + + # Per the plan §2 derivation rule: kind of the FIRST non-manual + # patch takes priority (even when ManualPatches are also present). + # All-manual → "manual". Empty list → "none" (handled above). + if non_manual: + suggested_patch_kind = non_manual[0].kind + else: + suggested_patch_kind = "manual" + + autofix_safe = all_high_confidence_non_manual + requires_human_review = not autofix_safe + return (autofix_safe, requires_human_review, suggested_patch_kind) diff --git a/src/agents_shipgate/core/findings/report_builder.py b/src/agents_shipgate/core/findings/report_builder.py new file mode 100644 index 00000000..e1432e8f --- /dev/null +++ b/src/agents_shipgate/core/findings/report_builder.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +from agents_shipgate.ci.release_decision import build_release_decision +from agents_shipgate.core.domain import Tool +from agents_shipgate.schemas.codex_plugin import CodexPluginSurface +from agents_shipgate.schemas.common import Severity +from agents_shipgate.schemas.manifest import AgentsShipgateManifest +from agents_shipgate.schemas.report import ( + BaselineSummary, + Finding, + LoadedPolicyPack, + PolicyAudit, + PrivacyAudit, + ReadinessReport, +) +from agents_shipgate.schemas.surfaces import ( + ActionSurfaceDiff, + ActionSurfaceFacts, + ToolSurfaceDiff, + ToolSurfaceFacts, +) + +from .agent_summary import build_agent_summary +from .summaries import ( + recommended_actions, + summarize_findings, + summarize_tool_surface, + tool_inventory, +) + + +def build_report( + *, + run_id: str, + manifest: AgentsShipgateManifest, + project: dict[str, object] | None = None, + agent: dict[str, object], + environment: dict[str, object], + tools: list[Tool], + findings: list[Finding], + generated_reports: dict[str, str], + ci_mode: str, + fail_on: list[Severity] | None = None, + new_findings_only: bool = False, + loaded_policy_packs: list[LoadedPolicyPack] | None = None, + loaded_plugins: list[dict[str, object]] | None = None, + loaded_adapters: list[dict[str, object]] | None = None, + source_warnings: list[str] | None = None, + api_surface: dict[str, object] | None = None, + anthropic_surface: dict[str, object] | None = None, + frameworks: dict[str, object] | None = None, + codex_plugin_surface: CodexPluginSurface | None = None, + baseline: BaselineSummary | None = None, + manifest_dir: str | None = None, + tool_surface_facts: ToolSurfaceFacts | None = None, + tool_surface_diff: ToolSurfaceDiff | None = None, + action_surface_facts: ActionSurfaceFacts | None = None, + action_surface_diff: ActionSurfaceDiff | None = None, + policy_audit: PolicyAudit | None = None, + privacy_audit: PrivacyAudit | None = None, +) -> ReadinessReport: + report = ReadinessReport( + run_id=run_id, + manifest_dir=manifest_dir, + project=project or manifest.project.model_dump(exclude_none=True), + agent=agent, + environment=environment, + summary=summarize_findings(findings, tools), + tool_surface=summarize_tool_surface(tools), + tool_surface_facts=tool_surface_facts or ToolSurfaceFacts(), + tool_surface_diff=tool_surface_diff or ToolSurfaceDiff(), + action_surface_facts=action_surface_facts or ActionSurfaceFacts(), + action_surface_diff=action_surface_diff or ActionSurfaceDiff(), + api_surface=api_surface, + anthropic_surface=anthropic_surface, + frameworks=frameworks or {}, + codex_plugin_surface=codex_plugin_surface, + baseline=baseline, + findings=findings, + recommended_actions=recommended_actions(findings), + generated_reports=generated_reports, + loaded_policy_packs=loaded_policy_packs or [], + loaded_plugins=loaded_plugins or [], + loaded_adapters=loaded_adapters or [], + tool_inventory=tool_inventory(tools), + source_warnings=source_warnings or [], + # v0.17 (M1): policy audit envelope. Always present on emitted + # scans (empty when no overrides applied) so consumers can read + # ``report.policy_audit.severity_overrides_applied`` without a + # null check. + policy_audit=policy_audit or PolicyAudit(), + privacy_audit=privacy_audit, + ) + report.release_decision = build_release_decision( + report=report, + tools=tools, + ci_mode=ci_mode, + fail_on=fail_on, + new_findings_only=new_findings_only, + ) + # v0.12: agent_summary is the deterministic projection of + # release_decision + per-finding agent_action. Built last so it + # picks up everything else. The JSON report path is threaded in + # so first_recommended_action.command names the real on-disk + # path the user just wrote (not the default — see #57 review P1.1). + report.agent_summary = build_agent_summary( + findings=findings, + release_decision=report.release_decision, + json_report_path=generated_reports.get("json"), + ) + # v0.20 NOTE: ``report.reviewer_summary`` is NOT built here. It + # depends on ``report.misalignments`` which ``apply_capability_diff`` + # populates AFTER ``build_report`` returns (see cli/scan/final_report.py). Building + # it here would project from incomplete state — ``capability_misalignments`` + # would always be 0 even on reports that later carry dozens of + # misalignments. The scan pipeline calls ``build_reviewer_summary`` + # post-capability-diff so the projection sees the final report state. + # Test fixtures that need a populated ``reviewer_summary`` should + # also call ``build_reviewer_summary`` after they finish assembling + # the report. + return report diff --git a/src/agents_shipgate/core/findings/reviewer_summary.py b/src/agents_shipgate/core/findings/reviewer_summary.py new file mode 100644 index 00000000..934a6ea3 --- /dev/null +++ b/src/agents_shipgate/core/findings/reviewer_summary.py @@ -0,0 +1,520 @@ +from __future__ import annotations + +from typing import Any + +from agents_shipgate.schemas.report import ( + Finding, + PolicyAudit, + PrivacyAudit, + ReadinessReport, + ReleaseDecision, + ReviewerSummary, + ReviewerSurfacePointer, +) + +# --- v0.20: reviewer_summary projection ------------------------------------- +# +# Parallels build_agent_summary. AgentSummary answers "what should an agent +# do next?"; ReviewerSummary answers "what should a reviewer look at first?". +# Both are deterministic projections of the same underlying scan state — no +# extra side effects, no I/O, no LLM calls. + +# Baseline integrity findings are emitted under these three check IDs (M2). +# Kept in sync with checks/registry.py + STABILITY.md "Baseline Integrity". +_BASELINE_INTEGRITY_CHECK_IDS: frozenset[str] = frozenset( + { + "SHIP-BASELINE-INTEGRITY-MISMATCH", + "SHIP-BASELINE-ENTRY-EXPIRED", + "SHIP-BASELINE-ENTRY-STALE", + } +) + +def _tool_surface_changes(report: ReadinessReport) -> int: + """Sum of structural-change counters in tool_surface_diff.summary. + + Returns zero when the diff is disabled (no baseline configured) or + when every counter is zero (no changes). The counters are + pre-computed by compute_tool_surface_diff so this is O(1). + """ + diff = report.tool_surface_diff + if diff is None or not getattr(diff, "enabled", False): + return 0 + summary = diff.summary + return ( + summary.tools_added + + summary.tools_removed + + summary.tools_changed + + summary.new_scopes + + summary.removed_scopes + + summary.new_high_risk_effects + + summary.removed_high_risk_effects + + summary.controls_added + + summary.controls_removed + + summary.metadata_changes + + summary.policy_drift_items + ) + + +def _action_surface_changes(report: ReadinessReport) -> int: + """Sum of structural-change counters in action_surface_diff.summary. + + Returns zero when the diff is disabled or empty. ``blocking_findings`` + is intentionally NOT summed here — that count flows into the release + decision, and is already reflected in ``AgentSummary.blocker_count``. + Reviewer-side activity is the structural delta itself. + """ + diff = report.action_surface_diff + if diff is None or not getattr(diff, "enabled", False): + return 0 + summary = diff.summary + return ( + summary.actions_added + + summary.actions_removed + + summary.actions_modified + + summary.scope_expansions + + summary.effect_escalations + + summary.risk_tags_added + + summary.approvals_removed + + summary.safeguards_removed + + summary.input_schema_expansions + ) + + +def _capability_misalignment_count(report: ReadinessReport) -> int: + """Total misalignments surfaced by the capability/intent diff (v0.9).""" + return len(report.misalignments) + + +def _evidence_matrix_gap_count( + report: ReadinessReport, + *, + evidence_matrix_payload: dict[str, Any] | None = None, +) -> int: + """Count of evidence-matrix rows whose status is a reviewer-actionable + gap. + + The matrix is normally a packet-only section, but ``build_evidence_matrix`` + accepts the report payload and produces the same shape. We call it + here so the count is available in JSON reports even when the packet + is disabled (``--no-packet`` runs). + + Only ``missing`` rows are counted as gaps. ``not_declared`` means + "this domain is not relevant to this manifest" (e.g., + ``memory_isolation`` on every scan today because the manifest schema + does not model it yet) — these are intentional non-coverage, not + reviewer signals. ``partial``, ``covered``, and ``informational`` + are also not gaps. + + Import is deferred to avoid a top-level cycle: ``packet`` imports + from ``schemas/report.py`` (and indirectly from ``core/findings/`` + via build helpers), so we import only when this function runs. + """ + try: + from agents_shipgate.packet.evidence_matrix import build_evidence_matrix + except ImportError: # pragma: no cover - defensive + return 0 + payload = evidence_matrix_payload or _evidence_matrix_payload(report) + section = build_evidence_matrix(payload) + return sum( + 1 + for row in section.rows + if getattr(row, "evidence_present", None) == "missing" + ) + + +def _evidence_matrix_payload(report: ReadinessReport) -> dict[str, Any]: + """Build the narrow report payload needed by build_evidence_matrix. + + Avoids serializing the entire ReadinessReport just to count reviewer + evidence gaps. Keep this in sync with packet.evidence_matrix's field reads. + """ + return { + "findings": _json_payload(report.findings), + "release_decision": _json_payload(report.release_decision), + "tool_inventory": _json_payload(report.tool_inventory), + "tool_surface": _json_payload(report.tool_surface), + "source_warnings": _json_payload(report.source_warnings), + "tool_surface_facts": _json_payload(report.tool_surface_facts), + "api_surface": _json_payload(report.api_surface), + "action_surface_facts": _json_payload(report.action_surface_facts), + "declared_intentions": _json_payload(report.declared_intentions), + "misalignments": _json_payload(report.misalignments), + "capability_facts": _json_payload(report.capability_facts), + "baseline": _json_payload(report.baseline), + "action_surface_diff": _json_payload(report.action_surface_diff), + } + + +def _json_payload(value): + if hasattr(value, "model_dump"): + return value.model_dump(mode="json") + if isinstance(value, list): + return [_json_payload(item) for item in value] + if isinstance(value, dict): + return {key: _json_payload(item) for key, item in value.items()} + return value + + +def _severity_override_counts( + policy_audit: PolicyAudit | None, +) -> tuple[int, int]: + """Return (total_applied, tier_crossed_subset). + + ``total_applied`` is len(severity_overrides_applied). + ``tier_crossed_subset`` counts the entries flagged ``tier_crossed=True``. + """ + if policy_audit is None: + return 0, 0 + rows = policy_audit.severity_overrides_applied + return len(rows), sum(1 for row in rows if row.tier_crossed) + + +def _privacy_redaction_count(privacy_audit: PrivacyAudit | None) -> int: + """Total redactions across the public output surfaces. + + Reads the pre-computed ``redacted_occurrence_count`` field rather + than re-summing per-path counts — they agree by construction (see + privacy.py) but the pre-computed total is the canonical surface and + keeps the projection cheap. + """ + if privacy_audit is None or not privacy_audit.enabled: + return 0 + return privacy_audit.redacted_occurrence_count + + +def _baseline_integrity_issue_count(findings: list[Finding]) -> int: + """Count of active findings emitted by the three baseline-integrity + checks (SHIP-BASELINE-{INTEGRITY-MISMATCH,ENTRY-EXPIRED,ENTRY-STALE}). + + Suppressed findings are excluded; matched (accepted-debt) findings + are included because tampering and expiry remain reviewer signals + regardless of baseline status. + """ + return sum( + 1 + for f in findings + if not f.suppressed and f.check_id in _BASELINE_INTEGRITY_CHECK_IDS + ) + + +def _reviewer_headline( + *, + verdict: str, + lens_total: int, + audit_total: int, + pointer: ReviewerSurfacePointer | None, +) -> str: + """Deterministic, ≤200-char one-sentence summary. + + Sort order in the headline mirrors the priority order in + ``_pick_first_recommended_surface`` so the headline and pointer + cannot disagree about which surface matters most. + """ + if verdict == "blocked": + head = "Release blocked" + elif verdict == "insufficient_evidence": + head = "Evidence coverage below threshold" + elif verdict == "review_required": + head = "Review required" + else: # passed + head = "Release ready" + + if lens_total == 0 and audit_total == 0: + body = "no reviewer signals (lenses + audits all clean)." + else: + body_parts: list[str] = [] + if lens_total > 0: + noun = "change" if lens_total == 1 else "changes" + body_parts.append(f"{lens_total} lens {noun}") + if audit_total > 0: + noun = "event" if audit_total == 1 else "events" + body_parts.append(f"{audit_total} audit {noun}") + body = "; ".join(body_parts) + "." + + headline = f"{head}: {body}" + if pointer is not None: + suffix = f" Start at {pointer.name}." + if len(headline) + len(suffix) <= 200: + headline += suffix + return headline + + +def _pick_first_recommended_surface( + *, + release_decision: ReleaseDecision | None, + action_surface_changes: int, + baseline_integrity_issues: int, + severity_overrides_tier_crossed: int, + severity_overrides_applied: int, + capability_misalignments: int, + tool_surface_changes: int, + privacy_redactions: int, + evidence_matrix_gaps: int, +) -> ReviewerSurfacePointer | None: + """Deterministic priority order. Returns None only when every + counter is zero AND the release decision is ``passed``. + + Priority encodes "which surface gives the highest-leverage reviewer + signal first": + + 1. Release decision when blocked or insufficient_evidence — the + verdict tells the reviewer what to gate on. + 2. Action-surface changes — first-class PR/release delta of what + the agent can do externally. + 3. Baseline integrity issues — tampering or expiry on accepted + debt is high-attention. + 4. Tier-crossed severity overrides — explicit downgrade across + a release-critical boundary. + 5. Capability/intent misalignments — declared purpose vs. + observed surface. + 6. Tool surface changes — registry-level PR diff. + 7. Privacy redactions — output sanitation events. + 8. Evidence matrix gaps — coverage holes for review. + 9. Non-tier-crossed severity overrides — same-tier downgrades + and upgrades. Lower priority than the tier-crossed case + because they don't cross a release-critical boundary, but + still a reviewer signal that warrants a glance. Without + this fallthrough, ``severity_overrides_applied > 0`` would + produce a non-zero headline but a ``null`` pointer — + contradicting the contract that ``null`` means a fully + clean scan. + 10. review_required verdict with no specific lens/audit signal — + source warnings (e.g., duplicate tool names) or evidence + gaps can produce ``decision=review_required`` with all + reviewer counters at zero. Without this fallthrough, such + scans emit a non-null verdict but a ``null`` pointer — + contradicting the contract that ``null`` means + ``passed + all-zero``. + + Each branch picks the most informative ``path`` and a single + sentence ``why`` suitable for a PR comment lead. + """ + verdict = (release_decision.decision if release_decision else "passed") + + if verdict == "blocked": + return ReviewerSurfacePointer( + kind="release_decision", + name="release_decision", + path="report.release_decision", + why=( + "Release is blocked; read release_decision.blockers[] for " + "the gating findings." + ), + ) + if verdict == "insufficient_evidence": + return ReviewerSurfacePointer( + kind="release_decision", + name="release_decision", + path="report.release_decision", + why=( + "Evidence coverage is below threshold; read " + "release_decision.reason and evidence_coverage." + ), + ) + + if action_surface_changes > 0: + return ReviewerSurfacePointer( + kind="lens", + name="action_surface_diff", + path="report.action_surface_diff", + why=( + "Action-surface diff has structural changes " + "(scope, effect, approval, or safeguard); read this lens " + "first to see what the agent can now do." + ), + ) + + if baseline_integrity_issues > 0: + return ReviewerSurfacePointer( + kind="audit", + name="baseline_integrity", + path="report.findings[]", + why=( + "Baseline integrity findings were emitted; filter " + "findings[] by SHIP-BASELINE-* check_id and inspect the " + "baseline-audit.log alongside the baseline file." + ), + ) + + if severity_overrides_tier_crossed > 0: + return ReviewerSurfacePointer( + kind="audit", + name="policy_audit", + path="report.policy_audit.severity_overrides_applied", + why=( + "One or more severity overrides crossed a tier boundary " + "with explicit acknowledgement; review the entries to " + "confirm the downgrade is still appropriate." + ), + ) + + if capability_misalignments > 0: + return ReviewerSurfacePointer( + kind="lens", + name="capability_intent_diff", + path="report.misalignments", + why=( + "The capability/intent diff surfaced misalignments " + "between declared agent purpose and observed tool " + "surface; read misalignments[] for the specifics." + ), + ) + + if tool_surface_changes > 0: + return ReviewerSurfacePointer( + kind="lens", + name="tool_surface_diff", + path="report.tool_surface_diff", + why=( + "Tool-surface diff has structural changes (tools, " + "scopes, controls, or policies); read this lens to see " + "the registry-level delta." + ), + ) + + if privacy_redactions > 0: + return ReviewerSurfacePointer( + kind="audit", + name="privacy_audit", + path="report.privacy_audit.redacted_paths", + why=( + "Sensitive values were redacted from the public outputs; " + "confirm the redactions match expectations and that no " + "secret should have been there in the first place." + ), + ) + + if evidence_matrix_gaps > 0: + return ReviewerSurfacePointer( + kind="evidence_matrix", + name="evidence_matrix", + path="packet.evidence_matrix.rows", + why=( + "Evidence-matrix rows report coverage gaps " + "(missing/not_declared); open the Release Evidence " + "Packet to see which domains lack reviewer-visible " + "evidence." + ), + ) + + # Low-priority fallthrough: same-tier severity overrides and + # upgrades. Without this branch, a manifest with a single + # medium → low override would produce a non-zero + # ``severity_overrides_applied`` counter AND a non-zero + # ``audit_total`` in the headline, but a ``null`` pointer — + # contradicting the contract that ``null`` means a fully clean + # scan. The tier-crossed case above handles the higher-attention + # subset; this branch covers the rest. + if severity_overrides_applied > 0: + return ReviewerSurfacePointer( + kind="audit", + name="policy_audit", + path="report.policy_audit.severity_overrides_applied", + why=( + "Severity overrides are applied (same-tier or upgrade); " + "review the policy_audit entries to confirm the overrides " + "match reviewer intent." + ), + ) + + # Final fallthrough: review_required verdict with no specific + # lens/audit signals. Source warnings (e.g., duplicate tool names, + # unresolvable imports) or an evidence gap can force + # decision=review_required even when findings=0 and all reviewer + # counters are zero. Without this branch, such scans would emit + # first_recommended_surface=null despite a non-passed verdict, + # contradicting the contract that null means passed + all-zero. + if verdict == "review_required": + return ReviewerSurfacePointer( + kind="release_decision", + name="release_decision", + path="report.release_decision", + why=( + "Review is required (source warnings or evidence gap " + "without specific lens/audit signals); read " + "release_decision.reason for details." + ), + ) + + return None + + +def build_reviewer_summary( + *, + findings: list[Finding], + report: ReadinessReport, + evidence_matrix_payload: dict[str, Any] | None = None, +) -> ReviewerSummary: + """Construct the top-level ``reviewer_summary`` block. + + Deterministic projection of the reviewer lens surfaces and audit + envelopes on ``report``. Parallels ``build_agent_summary`` but for + the audit/lens dimensions: a reviewer who wants headline activity + counts and a recommended starting surface reads this block instead + of opening every lens and audit envelope. + + ``findings`` is passed separately (not derived from + ``report.findings``) so we can run on the same active-findings list + callers have already filtered/annotated — same pattern as + ``build_agent_summary``. + """ + tool_surface_changes = _tool_surface_changes(report) + action_surface_changes = _action_surface_changes(report) + capability_misalignments = _capability_misalignment_count(report) + evidence_matrix_gaps = _evidence_matrix_gap_count( + report, + evidence_matrix_payload=evidence_matrix_payload, + ) + severity_overrides_applied, severity_overrides_tier_crossed = ( + _severity_override_counts(report.policy_audit) + ) + privacy_redactions = _privacy_redaction_count(report.privacy_audit) + baseline_integrity_issues = _baseline_integrity_issue_count(findings) + + lens_total = ( + tool_surface_changes + + action_surface_changes + + capability_misalignments + + evidence_matrix_gaps + ) + audit_total = ( + severity_overrides_applied + + privacy_redactions + + baseline_integrity_issues + ) + + verdict = ( + report.release_decision.decision if report.release_decision else "passed" + ) + + pointer = _pick_first_recommended_surface( + release_decision=report.release_decision, + action_surface_changes=action_surface_changes, + baseline_integrity_issues=baseline_integrity_issues, + severity_overrides_tier_crossed=severity_overrides_tier_crossed, + severity_overrides_applied=severity_overrides_applied, + capability_misalignments=capability_misalignments, + tool_surface_changes=tool_surface_changes, + privacy_redactions=privacy_redactions, + evidence_matrix_gaps=evidence_matrix_gaps, + ) + headline = _reviewer_headline( + verdict=verdict, + lens_total=lens_total, + audit_total=audit_total, + pointer=pointer, + ) + + return ReviewerSummary( + verdict=verdict, + headline=headline, + tool_surface_changes=tool_surface_changes, + capability_misalignments=capability_misalignments, + action_surface_changes=action_surface_changes, + evidence_matrix_gaps=evidence_matrix_gaps, + severity_overrides_applied=severity_overrides_applied, + severity_overrides_tier_crossed=severity_overrides_tier_crossed, + privacy_redactions=privacy_redactions, + baseline_integrity_issues=baseline_integrity_issues, + first_recommended_surface=pointer, + ) diff --git a/src/agents_shipgate/core/findings/summaries.py b/src/agents_shipgate/core/findings/summaries.py new file mode 100644 index 00000000..aa33c9d0 --- /dev/null +++ b/src/agents_shipgate/core/findings/summaries.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from collections import Counter + +from agents_shipgate.core.domain import Tool +from agents_shipgate.core.risk_hints import is_high_risk_tool, risk_tags +from agents_shipgate.schemas.common import confidence_rank +from agents_shipgate.schemas.report import Finding, ReportSummary, ToolSurfaceSummary + +from .constants import SEVERITY_ORDER + + +def summarize_findings(findings: list[Finding], tools: list[Tool]) -> ReportSummary: + active = [finding for finding in findings if not finding.suppressed] + counts = Counter(finding.severity for finding in active) + suppressed_count = len(findings) - len(active) + if counts["critical"] > 0: + status = "release_blockers_detected" + elif active: + status = "warnings_detected" + elif any(tool.extraction_confidence != "high" for tool in tools): + status = "human_review_recommended" + else: + status = "no_release_blockers_detected" + return ReportSummary( + status=status, + critical_count=counts["critical"], + high_count=counts["high"], + medium_count=counts["medium"], + low_count=counts["low"], + info_count=counts["info"], + suppressed_count=suppressed_count, + human_review_recommended=counts["critical"] > 0 or counts["high"] > 0 or status == "human_review_recommended", + evidence_coverage="mixed" if _has_mixed_evidence(tools) else "static", + ) + + +def summarize_tool_surface(tools: list[Tool]) -> ToolSurfaceSummary: + sources = Counter(tool.source_type for tool in tools) + return ToolSurfaceSummary( + total_tools=len(tools), + high_risk_tools=sum(1 for tool in tools if is_high_risk_tool(tool)), + sources=dict(sorted(sources.items())), + wildcard_tools=sum(1 for tool in tools if tool.annotations.get("wildcard_tools") is True), + missing_descriptions=sum(1 for tool in tools if not (tool.description or "").strip()), + ) + + +def recommended_actions(findings: list[Finding]) -> list[str]: + active = sorted( + [finding for finding in findings if not finding.suppressed], + key=lambda finding: (SEVERITY_ORDER[finding.severity], finding.check_id), + ) + actions: list[str] = [] + seen: set[str] = set() + for finding in active: + if finding.recommendation in seen: + continue + actions.append(finding.recommendation) + seen.add(finding.recommendation) + if len(actions) >= 8: + break + return actions + + +def tool_inventory(tools: list[Tool]) -> list[dict[str, object]]: + # v0.19 reviewer-grade provenance: ``source_path`` / ``source_start_line`` + # are additive optional keys per row. Post-scan renderers + # (scenario YAML, downstream consumers reading ``report.json``) + # use this lookup to cite ``path:line`` for tools touched by a + # finding without re-parsing the artifact. Older consumers ignore + # the new keys; new consumers can require them for high-risk tools. + return [ + { + "name": tool.name, + "source_type": tool.source_type, + "source_ref": tool.source_ref, + "source_path": tool.source_path, + "source_start_line": tool.source_start_line, + "source_pointer": tool.source_pointer, + "risk_tags": risk_tags(tool, min_confidence="medium"), + "risk_tag_confidence": _risk_tag_confidence(tool, min_confidence="medium"), + "auth_scopes": tool.auth.scopes, + "owner": tool.owner, + "confidence": tool.extraction_confidence, + } + for tool in sorted(tools, key=lambda item: item.name) + ] + + +def _risk_tag_confidence(tool: Tool, min_confidence: str) -> dict[str, str]: + threshold = confidence_rank(min_confidence) + by_tag: dict[str, str] = {} + for hint in tool.risk_hints: + if confidence_rank(hint.confidence) < threshold: + continue + current = by_tag.get(hint.tag) + if current is None or confidence_rank(hint.confidence) > confidence_rank(current): + by_tag[hint.tag] = hint.confidence + return dict(sorted(by_tag.items())) + + +def _has_mixed_evidence(tools: list[Tool]) -> bool: + return any( + tool.source_type == "sdk_function" or tool.extraction_confidence != "high" + for tool in tools + ) diff --git a/src/agents_shipgate/core/severity_overrides.py b/src/agents_shipgate/core/severity_overrides.py index fd6b16be..35e73ba8 100644 --- a/src/agents_shipgate/core/severity_overrides.py +++ b/src/agents_shipgate/core/severity_overrides.py @@ -1,9 +1,9 @@ """v0.17 (M1) severity-override validation + audit. -Splits responsibility cleanly from ``core/findings.py``: +Splits responsibility cleanly from ``core/findings/``: - ``findings.apply_severity_overrides`` stays the public mutation point - on the finding list (kept by ``cli/scan.py`` and existing tests). + on the finding list (kept by ``cli/scan/decision.py`` and existing tests). - This module owns the *policy validation* — floor enforcement, tier detection, acknowledgement matching, expiry checks — and produces an immutable ``SeverityOverrideResolution`` that the apply step consumes @@ -240,7 +240,7 @@ def resolve_severity_overrides( # SHIP-ACTION-POLICY-VIOLATION bypass — an action policy # declared at ``severity: critical`` makes the effective # default critical, even though the catalog static default - # for that check is high. See ``cli/scan.py`` for the + # for that check is high. See ``cli/scan/decision.py`` for the # call-site that aggregates action-policy declarations. catalog_default_severity = target_metadata.default_severity dynamic_default = extra_defaults.get(check_id) diff --git a/src/agents_shipgate/inputs/adapter_validation.py b/src/agents_shipgate/inputs/adapter_validation.py index 31199ee5..a89d7bd4 100644 --- a/src/agents_shipgate/inputs/adapter_validation.py +++ b/src/agents_shipgate/inputs/adapter_validation.py @@ -202,7 +202,7 @@ def run_validated_adapter( # Smuggling prevention: when the adapter declares a non-None # artifact_class, the returned artifact must be an instance of it. - # The dispatcher (cli/scan.py:_absorb) also enforces this for + # The dispatcher (cli/scan/source_loading.py:_absorb) also enforces this for # built-ins; recording it as a runtime error here gives external # consumers (loaded_adapters[].runtime_errors) visibility before # the dispatcher's TypeError fires. diff --git a/src/agents_shipgate/inputs/openapi.py b/src/agents_shipgate/inputs/openapi.py index 6e5c45e1..cd3a6ab7 100644 --- a/src/agents_shipgate/inputs/openapi.py +++ b/src/agents_shipgate/inputs/openapi.py @@ -131,7 +131,7 @@ def _operation_to_tool( # Note: `source_location` intentionally stays None for OpenAPI tools. # The legacy `path:line` string participates in `run_id` (see - # `cli/scan.py:_run_id`), and v0.10 OpenAPI tools never set it. + # `cli/scan/run_identity.py:_run_id`), and v0.10 OpenAPI tools never set it. # Reviewers get the line via the structured `source_start_line` / # `source_pointer` fields; SARIF prefers those over the legacy string. return Tool( diff --git a/src/agents_shipgate/packet/pdf.py b/src/agents_shipgate/packet/pdf.py index a7b37f8b..9016a617 100644 --- a/src/agents_shipgate/packet/pdf.py +++ b/src/agents_shipgate/packet/pdf.py @@ -47,7 +47,7 @@ def render_packet_pdf(packet: EvidencePacket, out_path: Path) -> Path: def is_pdf_available() -> bool: """Probe whether the PDF renderer is importable on this install. - Used by ``cli/scan.py`` during the path-planning phase so + Used by ``cli/scan/output_planning.py`` during the path-planning phase so ``report.generated_reports`` only references files that will actually be written. """ diff --git a/src/agents_shipgate/report/action_surface_diff.py b/src/agents_shipgate/report/action_surface_diff.py index e0c2f561..6a8e4778 100644 --- a/src/agents_shipgate/report/action_surface_diff.py +++ b/src/agents_shipgate/report/action_surface_diff.py @@ -138,7 +138,7 @@ def enrich_action_surface_diff_with_source( renderers read them explicitly). For safety, this function is only called on the PUBLIC diff - (``cli/scan.py``); the internal diff stays semantic so policy + (``cli/scan/sanitization.py``); the internal diff stays semantic so policy findings can be evaluated against unchanged evidence. """ if not tool_source_index: diff --git a/src/agents_shipgate/report/markdown.py b/src/agents_shipgate/report/markdown.py index 14dd4525..e08b5c57 100644 --- a/src/agents_shipgate/report/markdown.py +++ b/src/agents_shipgate/report/markdown.py @@ -5,7 +5,7 @@ from pathlib import Path from agents_shipgate.core.disclaimers import HITL_RUNTIME_CONTROL_DISCLAIMER -from agents_shipgate.core.findings import SEVERITY_ORDER +from agents_shipgate.core.findings.constants import SEVERITY_ORDER from agents_shipgate.core.privacy import sanitize_report from agents_shipgate.schemas.report import ( DeclaredIntention, diff --git a/src/agents_shipgate/report/tool_surface_diff.py b/src/agents_shipgate/report/tool_surface_diff.py index c14ade45..2c6a4ff2 100644 --- a/src/agents_shipgate/report/tool_surface_diff.py +++ b/src/agents_shipgate/report/tool_surface_diff.py @@ -14,7 +14,7 @@ ) from agents_shipgate.core.domain import Tool from agents_shipgate.core.errors import InputParseError -from agents_shipgate.core.findings import _canonicalize_for_fingerprint +from agents_shipgate.core.findings.identity import _canonicalize_for_fingerprint from agents_shipgate.core.heuristics import is_broad_scope from agents_shipgate.core.risk_hints import HIGH_RISK_TAGS, risk_tags from agents_shipgate.schemas.baseline import BaselineFile diff --git a/src/agents_shipgate/schemas/report.py b/src/agents_shipgate/schemas/report.py index d886609f..c498eadb 100644 --- a/src/agents_shipgate/schemas/report.py +++ b/src/agents_shipgate/schemas/report.py @@ -49,7 +49,7 @@ class Finding(BaseModel): # pass it explicitly. Third-party plugin checks that construct # `Finding(...)` directly without setting this field are coerced # to `"static_declaration"` by `annotate_remediation` in - # core/findings.py so the wire schema's required + non-nullable + # core/findings/remediation.py so the wire schema's required + non-nullable # enum stays satisfied — plugins that want a more specific label # should set the field themselves. Required + non-nullable on the # wire via scripts/generate_schemas.py. @@ -78,7 +78,7 @@ class Finding(BaseModel): # (per C4). patches: list[Patch] | None = None # v0.7 remediation enrichment. Populated by `annotate_remediation` - # in core/findings.py during build_report (regardless of + # in core/findings/remediation.py during build_report (regardless of # --suggest-patches), so any consumer reading `report.json` gets # remediation policy without opting into patches. # diff --git a/tests/test_adapter_registry.py b/tests/test_adapter_registry.py index 9eeb0d95..053d4369 100644 --- a/tests/test_adapter_registry.py +++ b/tests/test_adapter_registry.py @@ -167,7 +167,7 @@ def test_canonical_registration_order(): # --------------------------------------------------------------------------- # Dispatch-loop coverage. The dispatcher under test is -# ``cli/scan.py:_load_sources``. +# ``cli/scan/source_loading.py:_load_sources``. # --------------------------------------------------------------------------- diff --git a/tests/test_adapter_static_only.py b/tests/test_adapter_static_only.py index f32ac95c..9bce98de 100644 --- a/tests/test_adapter_static_only.py +++ b/tests/test_adapter_static_only.py @@ -1242,14 +1242,16 @@ def test_scanner_sources_covers_known_files() -> None: sources = _scanner_sources() relative_paths = {str(p.relative_to(SCANNER_DIR)) for p in sources} required = { - "cli/scan.py", + "cli/scan/orchestrator.py", + "cli/scan/source_loading.py", "inputs/protocol.py", "inputs/mcp.py", "checks/registry.py", "checks/plugin_validation.py", "core/domain.py", "core/severity_overrides.py", - "core/findings.py", + "core/findings/report_builder.py", + "core/findings/identity.py", "schemas/report.py", } missing = required - relative_paths diff --git a/tests/test_baseline_integrity.py b/tests/test_baseline_integrity.py index 86016ff3..cf92cb39 100644 --- a/tests/test_baseline_integrity.py +++ b/tests/test_baseline_integrity.py @@ -632,10 +632,6 @@ def patched(path): return manifest monkeypatch.setattr(loader_mod, "load_manifest", patched) - # cli.scan imports `load_manifest` at module level — patch the attribute it uses - import agents_shipgate.cli.scan as scan_mod - - monkeypatch.setattr(scan_mod, "load_manifest", patched) report, _ = run_scan( config_path=SAMPLE, output_dir=tmp_path / "out2", @@ -663,9 +659,6 @@ def patched(path): return manifest monkeypatch.setattr(loader_mod, "load_manifest", patched) - import agents_shipgate.cli.scan as scan_mod - - monkeypatch.setattr(scan_mod, "load_manifest", patched) report, _ = run_scan( config_path=SAMPLE, output_dir=tmp_path / "out2", @@ -697,9 +690,6 @@ def patched(path): return manifest monkeypatch.setattr(loader_mod, "load_manifest", patched) - import agents_shipgate.cli.scan as scan_mod - - monkeypatch.setattr(scan_mod, "load_manifest", patched) report, _ = run_scan( config_path=SAMPLE, output_dir=tmp_path / "out2", diff --git a/tests/test_e3_prime_compat.py b/tests/test_e3_prime_compat.py new file mode 100644 index 00000000..6d6c4109 --- /dev/null +++ b/tests/test_e3_prime_compat.py @@ -0,0 +1,55 @@ +from __future__ import annotations + + +def test_cli_scan_package_preserves_public_imports() -> None: + from agents_shipgate.cli.scan import ( + _build_agent, + _flatten_and_deduplicate_tools, + _load_sources, + _resolve_audit_log_path, + _run_id, + inspect_sources, + run_scan, + ) + + assert callable(run_scan) + assert callable(inspect_sources) + assert callable(_load_sources) + assert callable(_flatten_and_deduplicate_tools) + assert callable(_build_agent) + assert callable(_run_id) + assert callable(_resolve_audit_log_path) + + +def test_core_findings_package_preserves_public_imports() -> None: + from agents_shipgate.core.findings import ( + _REMEDIATION_FALLBACK, + SEVERITY_ORDER, + _canonicalize_for_fingerprint, + annotate_remediation, + apply_severity_overrides, + apply_suppressions, + assign_finding_ids, + build_agent_summary, + build_report, + build_reviewer_summary, + dedupe_findings, + derive_agent_action, + finding_fingerprint, + summarize_findings, + ) + + assert SEVERITY_ORDER["critical"] == 0 + assert _REMEDIATION_FALLBACK["suggested_patch_kind"] == "manual" + assert callable(assign_finding_ids) + assert callable(dedupe_findings) + assert callable(apply_severity_overrides) + assert callable(apply_suppressions) + assert callable(annotate_remediation) + assert callable(derive_agent_action) + assert callable(build_agent_summary) + assert callable(build_report) + assert callable(build_reviewer_summary) + assert callable(summarize_findings) + assert callable(finding_fingerprint) + assert callable(_canonicalize_for_fingerprint)