diff --git a/pyproject.toml b/pyproject.toml index db6521c..e297895 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ cf-sticky-check = "cf_quality.sticky_check:main" cf-mirror-check = "cf_quality.mirror_check:main" cf-recursion-check = "cf_quality.recursion_check:main" cf-exemptions = "cf_quality.exemptions:main" +cf-no-bon-ref = "cf_quality.no_bon_ref:main" cf-repo-config = "cf_quality.repo_config:main" cf-import-contract = "cf_quality.import_contract:main" cf-gate = "cf_quality.gate_runner:main" diff --git a/src/cf_quality/gate_runner.py b/src/cf_quality/gate_runner.py index f791b15..37becc0 100644 --- a/src/cf_quality/gate_runner.py +++ b/src/cf_quality/gate_runner.py @@ -352,7 +352,7 @@ def _pytest(layout: Layout, env: Mapping[str, str]) -> GateVerdict: def _stages() -> list[Stage]: - """The 12-stage battery (layout is stage 1, resolved before this list runs).""" + """The 13-stage battery (layout is stage 1, resolved before this list runs).""" return [ Stage("ruff-check", _ruff_check), Stage("ruff-format", _ruff_format), @@ -361,6 +361,7 @@ def _stages() -> list[Stage]: Stage("cf-mirror-check", _mirror_check), Stage("cf-recursion-check", _cf_source_scoped("cf-recursion-check")), Stage("cf-exemptions", _cf_runner("cf-exemptions")), + Stage("cf-no-bon-ref", _cf_runner("cf-no-bon-ref")), Stage("cf-import-contract", _cf_runner("cf-import-contract", "--root", ".")), Stage("mypy", _mypy), Stage("complexipy", _complexipy), diff --git a/src/cf_quality/no_bon_ref.py b/src/cf_quality/no_bon_ref.py new file mode 100644 index 0000000..6750427 --- /dev/null +++ b/src/cf_quality/no_bon_ref.py @@ -0,0 +1,253 @@ +"""cf-no-bon-ref — the no-ticket-ids law's shippable, consumer-facing sweep. + +A Linear ticket id (the ``BON`` + ``-`` shape) is a LOCAL index: meaningless +to anyone reading the code, the diff, or the git history. The law +([[no-ticket-ids-in-code]]) bans it from the CODE/CONFIG tree — comments, +docstrings, CSS, ``.gitignore``, config, test names — and says: describe the +work, never the ticket. This gauge enforces that on the CONSUMER tree. + +It is the missing teeth: the kit's existing ``test_design_doc`` sweep guards +only the kit's OWN docs, and the Python ``cf-gate`` battery never swept a +consumer tree for ticket refs at all — so a consumer (mexxa) leaked refs to +main uncaught. + +Jurisdiction — the law governs CODE, not provenance prose. The sweep covers +the code/config tree and SKIPS: + +- version control, caches, vendored trees and any hidden directory + (``.git``, ``__pycache__``, ``.venv``, ``node_modules``, ``build``, + ``dist``); ``.gitignore`` and other hidden FILES are still swept; +- ``docs/`` and all markdown/rst (``.md`` / ``.markdown`` / ``.rst``) — ADRs, + READMEs, design records and the law-debt ledger legitimately map epic -> + ticket; that mapping is documentation's FUNCTION, not a leak. The law + governs the CODE tree; this is a declared jurisdiction boundary, not a + silent paths-ignore; +- binary files (any NUL byte) — a ticket id only ever leaks into text. + +The reasoned escape: a ref that legitimately lives in a CODE path (generated +or vendored source carrying an upstream tag) is REGISTERED in +``no-bon-ref-exemptions.json`` ({frozen_count, entries:[{path, reason}]}), +mirroring the kit's exemptions ratchet — every blessing carries a reason and +is printed loudly, and adding one requires bumping ``frozen_count`` (a visible +decision, never a silent one). + +The pattern this gauge hunts is assembled by concatenation so this module's +own source never carries it (the kit self-sweeps for the same literal). + +Exit codes: 0 clean · 1 violations · 2 the gate itself could not run (typed +:class:`~cf_quality.errors.GateError` on stderr). +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +from collections.abc import Iterator +from fnmatch import fnmatch +from pathlib import Path +from typing import Any + +from cf_quality.errors import GateError, GateViolation +from cf_quality.reporting import print_verdict + +#: The ticket-ref shape, built so this source stays self-clean (the prefix and +#: the dash never sit contiguous here): a ``BON`` prefix, a dash, then digits, +#: on a word boundary, matched against raw bytes (NUL-free text only). +_TICKET_RE = re.compile(rb"\bBON" + rb"-[0-9]+") + +#: Named directories never swept (vendored / build output). Hidden dirs are +#: pruned separately; ``docs`` is the declared provenance jurisdiction. +_SKIP_DIRS = frozenset({"__pycache__", "node_modules", "build", "dist", "venv", "docs"}) + +#: Prose suffixes — markdown/rst are documentation, not code; a ticket ref in a +#: README or an ADR is provenance, not a leak. The law governs the code tree. +_PROSE_SUFFIXES = frozenset({".md", ".markdown", ".rst"}) + +_EXEMPTIONS_FILE = "no-bon-ref-exemptions.json" + + +def _is_binary(data: bytes) -> bool: + """A NUL byte marks a binary file; a ticket id only leaks into text.""" + return b"\x00" in data + + +def iter_source_files(root: Path) -> Iterator[Path]: + """Yield every text file under root, pruning hidden/vendored/provenance dirs.""" + for dirpath, dirnames, filenames in os.walk(root): + dirnames[:] = sorted(d for d in dirnames if not d.startswith(".") and d not in _SKIP_DIRS) + for name in sorted(filenames): + if Path(name).suffix.lower() in _PROSE_SUFFIXES: + continue + yield Path(dirpath) / name + + +def _scan_bytes(data: bytes) -> Iterator[tuple[int, list[str]]]: + """Yield (1-based line, refs) for every line carrying a ticket ref.""" + for index, line in enumerate(data.split(b"\n"), start=1): + matches = _TICKET_RE.findall(line) + if matches: + yield index, [m.decode("ascii") for m in matches] + + +def scan_file(path: Path, root: Path) -> list[GateViolation]: + """Scan one file; binary or unreadable files yield nothing (not a crash).""" + try: + data = path.read_bytes() + except OSError: + return [] + if _is_binary(data): + return [] + rel = path.relative_to(root).as_posix() + return [ + GateViolation( + code="TICKET_REF_IN_SOURCE", + message=( + f"ticket reference {', '.join(refs)} in source — a ticket id is a " + "local index, meaningless in the tree; describe the work, not the ticket" + ), + path=rel, + line=line, + context={"refs": refs}, + ) + for line, refs in _scan_bytes(data) + ] + + +def scan_tree(root: Path) -> list[GateViolation]: + """Scan the whole code/config tree; raise GateError when the root is absent.""" + if not root.exists(): + raise GateError( + code="GATE_PATH_MISSING", + message=f"tree does not exist: {root}", + context={"path": str(root)}, + ) + violations: list[GateViolation] = [] + for path in iter_source_files(root): + violations.extend(scan_file(path, root)) + return sorted(violations, key=lambda v: (v.path, v.line or 0)) + + +# --- the reasoned, ratcheted exemption registry ----------------------------- + + +def _config_error(message: str, context: dict[str, Any]) -> GateError: + return GateError(code="GATE_CONFIG_INVALID", message=message, context=context) + + +def _validate_entry(index: int, entry: object) -> dict[str, str]: + """Each entry is {path, reason}, both non-empty strings (a reasoned blessing).""" + if not isinstance(entry, dict): + raise _config_error(f"{_EXEMPTIONS_FILE} entry {index} is not an object", {"index": index}) + out: dict[str, str] = {} + for key in ("path", "reason"): + value = entry.get(key) + if not (isinstance(value, str) and value.strip()): + raise _config_error( + f"{_EXEMPTIONS_FILE} entry {index} is missing or has an empty '{key}'", + {"index": index, "key": key}, + ) + out[key] = value + return out + + +def load_exemptions(root: Path) -> tuple[list[dict[str, str]], int] | None: + """Parse the registry; None when absent; typed GateError when malformed.""" + config_path = root / _EXEMPTIONS_FILE + if not config_path.is_file(): + return None + try: + data = json.loads(config_path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError, OSError) as exc: + raise _config_error( + f"{_EXEMPTIONS_FILE} is not valid JSON: {exc}", {"path": str(config_path)} + ) from exc + frozen = data.get("frozen_count") if isinstance(data, dict) else None + if not isinstance(frozen, int) or isinstance(frozen, bool) or frozen < 0: + raise _config_error( + f"{_EXEMPTIONS_FILE} must carry an integer 'frozen_count' >= 0 (the ratchet)", + {"path": str(config_path)}, + ) + entries = data.get("entries") + if not isinstance(entries, list): + raise _config_error( + f"{_EXEMPTIONS_FILE} must carry an 'entries' list", {"path": str(config_path)} + ) + return [_validate_entry(i, entry) for i, entry in enumerate(entries)], frozen + + +def _ratchet_violation(entry_count: int, frozen: int) -> list[GateViolation]: + """Adding an exemption requires bumping frozen_count — a visible decision.""" + if entry_count > frozen: + return [ + GateViolation( + code="EXEMPTION_COUNT_EXCEEDED", + message=( + f"{entry_count} exemption entries exceed frozen_count {frozen}: " + "adding an exemption requires bumping frozen_count — visible, never silent" + ), + path=_EXEMPTIONS_FILE, + context={"entries": entry_count, "frozen_count": frozen}, + ) + ] + return [] + + +def _partition( + violations: list[GateViolation], entries: list[dict[str, str]] +) -> tuple[list[GateViolation], list[str]]: + """Split findings into still-failing and blessed-by-a-registered-entry (loud).""" + failing: list[GateViolation] = [] + blessed: list[str] = [] + for violation in violations: + match = next((e for e in entries if fnmatch(violation.path, e["path"])), None) + if match is None: + failing.append(violation) + else: + blessed.append( + f"blessed: {violation.path}:{violation.line} — covered by " + f"'{match['path']}' ({match['reason']})" + ) + return failing, blessed + + +def check(root: Path) -> tuple[list[GateViolation], list[str]]: + """Sweep the tree, apply the reasoned registry, ratchet it — (violations, notices).""" + found = scan_tree(root) + config = load_exemptions(root) + if config is None: + return found, [] + entries, frozen = config + failing, blessed = _partition(found, entries) + failing.extend(_ratchet_violation(len(entries), frozen)) + notices = [ + f"=== TICKET-REF EXEMPTIONS: {len(entries)} entries / frozen_count {frozen} ===", + *blessed, + ] + return failing, notices + + +def main(argv: list[str] | None = None) -> int: + """Console entry point. Exit 0 clean · 1 violations · 2 the gate could not run.""" + parser = argparse.ArgumentParser( + prog="cf-no-bon-ref", + description="No ticket id in the code/config tree — describe the work, not the ticket.", + ) + parser.add_argument("--root", default=".", help="repo root to sweep (default: cwd)") + args = parser.parse_args(argv) + try: + violations, notices = check(Path(args.root).resolve()) + except GateError as exc: + return print_verdict("cf-no-bon-ref", [], exc) + return print_verdict( + "cf-no-bon-ref", + violations, + notices=notices, + clean_summary="cf-no-bon-ref: OK (no ticket references in the code/config tree)", + fail_summary=f"cf-no-bon-ref: FAIL ({len(violations)} ticket reference(s))", + ) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_gate_runner.py b/tests/test_gate_runner.py index 2278637..2e937b2 100644 --- a/tests/test_gate_runner.py +++ b/tests/test_gate_runner.py @@ -89,6 +89,7 @@ def _clean_cf_responses() -> dict[str, tuple[int, str, str]]: "cf-file-budget", "cf-recursion-check", "cf-exemptions", + "cf-no-bon-ref", "cf-import-contract", ) responses: dict[str, tuple[int, str, str]] = { diff --git a/tests/test_no_bon_ref.py b/tests/test_no_bon_ref.py new file mode 100644 index 0000000..88c5839 --- /dev/null +++ b/tests/test_no_bon_ref.py @@ -0,0 +1,184 @@ +"""Tests for cf-no-bon-ref — the consumer-tree ticket-reference sweep. + +The law ([[no-ticket-ids-in-code]]): a Linear ticket id is a LOCAL index, +meaningless to anyone reading the code or the diff. It must never appear in +the code/config tree — describe the work, not the ticket. This gauge is the +shippable, consumer-facing enforcement of that law (the kit's existing +``test_design_doc`` sweep only guards the kit's OWN docs). + +Every ticket-ref literal in this file is BUILT BY CONCATENATION so this test +file itself never carries the contiguous pattern the gauge (and the kit's own +self-sweep) hunts for. Fixtures are written under ``tmp_path`` — outside any +swept tree — so the realistic offending content exists only at runtime. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from cf_quality.errors import GateError, GateViolation +from cf_quality.no_bon_ref import check, main, scan_tree + +# The ticket-ref shape the gauge hunts, assembled so this source stays clean. +_REF = "BON" + "-" + "1828" +_REF2 = "BON" + "-" + "1829" + + +def _write(root: Path, rel: str, body: str) -> Path: + path = root / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(body, encoding="utf-8") + return path + + +# --- the sweep: a ticket ref in the code/config tree FAILS ------------------- + + +def test_ref_in_python_comment_is_a_violation(tmp_path: Path) -> None: + _write(tmp_path, "src/widget.py", f"# touch hardening ({_REF})\nx = 1\n") + violations = scan_tree(tmp_path) + assert len(violations) == 1 + v = violations[0] + assert v.code == "TICKET_REF_IN_SOURCE" + assert v.path == "src/widget.py" + assert v.line == 1 + assert _REF in v.message + + +def test_ref_in_css_and_gitignore_and_config_all_caught(tmp_path: Path) -> None: + _write(tmp_path, "src/styles/builder.css", f"/* TOUCH HARDENING ({_REF}) */\n") + _write(tmp_path, ".gitignore", f"# Playwright ({_REF} e2e)\ntest-results\n") + _write(tmp_path, "playwright.config.js", f"// device matrix ({_REF}).\n") + paths = {v.path for v in scan_tree(tmp_path)} + assert paths == {"src/styles/builder.css", ".gitignore", "playwright.config.js"} + + +def test_ref_in_test_name_is_caught(tmp_path: Path) -> None: + # the law explicitly governs TEST NAMES, so the tests/ tree is swept. + _write(tmp_path, "tests/test_more.py", f'"""covers {_REF}."""\nx = 1\n') + paths = {v.path for v in scan_tree(tmp_path)} + assert "tests/test_more.py" in paths + + +def test_multiple_refs_one_violation_per_line(tmp_path: Path) -> None: + _write(tmp_path, "src/a.py", f"# {_REF}\n# {_REF2}\nok = 1\n") + violations = scan_tree(tmp_path) + assert len(violations) == 2 + assert {v.line for v in violations} == {1, 2} + + +def test_clean_tree_has_no_violations(tmp_path: Path) -> None: + _write(tmp_path, "src/widget.py", "# touch hardening (phone-first)\nx = 1\n") + assert scan_tree(tmp_path) == [] + + +# --- jurisdiction: docs/ carry provenance, not banned ----------------------- + + +def test_docs_dir_is_out_of_jurisdiction(tmp_path: Path) -> None: + # docs/ (ADRs, design, the debt ledger) legitimately map epic -> ticket; + # the law governs CODE, not provenance prose. + _write(tmp_path, "docs/design/plan.md", f"Epic {_REF} ships the builder.\n") + _write(tmp_path, "docs/law-debt.md", f"- {_REF2} backend typed-error debt\n") + assert scan_tree(tmp_path) == [] + + +def test_markdown_anywhere_is_prose_not_code(tmp_path: Path) -> None: + # a README or markdown note is documentation provenance, even outside docs/. + _write(tmp_path, "ts/README.md", f"Built under {_REF}.\n") + _write(tmp_path, "src/NOTES.md", f"see {_REF2}\n") + assert scan_tree(tmp_path) == [] + + +def test_vendored_and_vcs_and_caches_are_skipped(tmp_path: Path) -> None: + _write(tmp_path, "node_modules/dep/index.js", f"// {_REF}\n") + _write(tmp_path, "__pycache__/x.txt", f"{_REF}\n") + _write(tmp_path, ".git/COMMIT_EDITMSG", f"{_REF}\n") + assert scan_tree(tmp_path) == [] + + +def test_binary_files_are_skipped(tmp_path: Path) -> None: + (tmp_path / "asset.png").write_bytes(b"\x89PNG\x00\x00" + _REF.encode() + b"\x00") + assert scan_tree(tmp_path) == [] + + +# --- the reasoned, ratcheted exemption registry ----------------------------- + + +def test_registered_exemption_blesses_a_code_path_ref(tmp_path: Path) -> None: + _write(tmp_path, "src/generated/schema.py", f"# generated; upstream tag {_REF}\n") + _write( + tmp_path, + "no-bon-ref-exemptions.json", + '{"frozen_count": 1, "entries": [' + '{"path": "src/generated/*", "reason": "vendored upstream codegen carries its tag"}]}', + ) + violations, notices = check(tmp_path) + assert violations == [] + assert any("src/generated/schema.py" in line for line in notices) + + +def test_exemption_not_matching_still_fails(tmp_path: Path) -> None: + _write(tmp_path, "src/hand.py", f"# {_REF}\n") + _write( + tmp_path, + "no-bon-ref-exemptions.json", + '{"frozen_count": 1, "entries": [{"path": "src/other/*", "reason": "elsewhere"}]}', + ) + violations, _ = check(tmp_path) + assert [v.path for v in violations] == ["src/hand.py"] + + +def test_entries_over_frozen_count_fails_ratchet(tmp_path: Path) -> None: + _write( + tmp_path, + "no-bon-ref-exemptions.json", + '{"frozen_count": 0, "entries": [{"path": "src/x/*", "reason": "r"}]}', + ) + violations, _ = check(tmp_path) + assert any(v.code == "EXEMPTION_COUNT_EXCEEDED" for v in violations) + + +def test_malformed_exemption_registry_is_typed_error(tmp_path: Path) -> None: + _write(tmp_path, "no-bon-ref-exemptions.json", "{ not json") + with pytest.raises(GateError) as exc: + check(tmp_path) + assert exc.value.code == "GATE_CONFIG_INVALID" + + +def test_exemption_entry_missing_reason_is_typed_error(tmp_path: Path) -> None: + _write( + tmp_path, + "no-bon-ref-exemptions.json", + '{"frozen_count": 1, "entries": [{"path": "src/x/*"}]}', + ) + with pytest.raises(GateError) as exc: + check(tmp_path) + assert exc.value.code == "GATE_CONFIG_INVALID" + + +# --- the console entry point ------------------------------------------------ + + +def test_main_clean_returns_zero(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + _write(tmp_path, "src/ok.py", "x = 1\n") + assert main(["--root", str(tmp_path)]) == 0 + + +def test_main_violation_returns_one(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + _write(tmp_path, "src/bad.py", f"# {_REF}\n") + assert main(["--root", str(tmp_path)]) == 1 + + +def test_main_config_error_returns_two(tmp_path: Path) -> None: + _write(tmp_path, "src/ok.py", "x = 1\n") + _write(tmp_path, "no-bon-ref-exemptions.json", "{ not json") + assert main(["--root", str(tmp_path)]) == 2 + + +def test_isinstance_findings_are_gate_violations(tmp_path: Path) -> None: + _write(tmp_path, "src/bad.py", f"# {_REF}\n") + violations = scan_tree(tmp_path) + assert all(isinstance(v, GateViolation) for v in violations) diff --git a/ts/README.md b/ts/README.md index b675660..7230959 100644 --- a/ts/README.md +++ b/ts/README.md @@ -16,8 +16,8 @@ Pre-existing offenders are frozen in a baseline (ratchet: shrink-only, never gro ## 4-step consumer mount -These are exactly the steps SweetCRM Task 1 (BON-1701) follows when mounting -the kit into a new TypeScript repository. +These are exactly the steps the first SweetCRM mount follows when wiring the +kit into a new TypeScript repository. ### Step 1 — Copy the consumer templates