diff --git a/.ails/config.yml b/.ails/config.yml index 2a47614..09cbb12 100644 --- a/.ails/config.yml +++ b/.ails/config.yml @@ -1,9 +1,6 @@ default_agent: claude exclude_dirs: - fixtures - - research - - _archive - - _archived - .venv - docs - specs diff --git a/.gitignore b/.gitignore index 1de2367..5ffc82e 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,10 @@ framework/rules/**/tests/**/.claude/ # Bundled ONNX embedding model — fetched by scripts/fetch_bundled_model.py # (dev-only, not committed; populated on clone and in CI before hatch build) src/reporails_cli/bundled/models/ + +# npm packaging — README.md is copied from the repo root by the prepack +# script in packages/npm/package.json before `npm pack` / `npm publish`. +# Keeping the file gitignored avoids the maintenance drift of two committed +# READMEs while sidestepping the npmjs.com per-version display bug that the +# previous symlink approach hit. +packages/npm/README.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 392f957..8b2c2d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,29 @@ # Changelog +## 0.5.7 + +### Added + +- [framework/schemas/project.schema.yml]: New `surfaces` and `agents` keys for `.ails/config.yml`. `surfaces...include` / `.exclude` adjusts which globs each agent surface scans without modifying bundled configs. `agents..fallback_filenames` mirrors Codex `project_doc_fallback_filenames` so per-project alternative instruction filenames (e.g. `TEAM_GUIDE.md`) are picked up by the validator. +- [core/config]: `.ails/config.local.yml` (gitignored) layers on top of committed `.ails/config.yml` for personal/CI overrides — object keys merge recursively, array keys extend, scalars replace. +- [interfaces/cli/config_command]: `ails config set` writes `.ails/.gitignore` listing `.gitignore` itself and `config.local.yml` whenever `.ails/config.yml` is created/updated, so layered local config stays out of version control by default. +- [framework/rules]: `nested_context` declarations for codex / cursor / copilot / generic agents so per-package `**/AGENTS.md` files in monorepos are surfaced under the agent's on-demand loading model rather than skipped. +- [formatters/text]: Surface classifier distinguishes `main` (root-level instruction file) from `nested` (subdirectory copies). Scorecard shows a separate "Nested" section; nested file paths display the full relative path (`packages/web/CLAUDE.md`) so users can locate them. + +### Changed + +- [framework/schemas]: Added `scope: nested` to the `agent.schema.yml` and `rule.schema.yml` enums. Captures surfaces whose subtree applicability comes from file LOCATION (subdirectory CLAUDE.md / AGENTS.md / GEMINI.md) rather than from in-file frontmatter. Replaces the previous overload of `scope: path_scoped` for these surfaces. +- [core/agent_discovery]: Project root for `ails check ` is now `` itself — no walking up. Files outside the targeted subtree are out of scope, regardless of `.git` or `.ails/backbone.yml` location. `engine_helpers._find_project_root` continues to walk up for cache key derivation only and now also recognizes IDE workspace markers (`.vscode/`, `.idea/`, `.github/`) as project-root signals. +- [core/agent_discovery + core/agents]: Filename matching for agent instruction files is now case-sensitive, matching Codex's source (`codex-rs/core/src/agents_md.rs` — `DEFAULT_AGENTS_MD_FILENAME = "AGENTS.md"`, `LOCAL_AGENTS_MD_FILENAME = "AGENTS.override.md"`) and the agents.md spec. A file named `agents.md` (lowercase, no leading dot) is no longer falsely surfaced as a Codex AGENTS.md candidate. +- [framework/rules/cursor]: `cursor.rules` corrected to `scope: path_scoped` (frontmatter-based path filtering); `cursor.bugbot_rules` to `scope: global` (BugBot decides applicability). + +### Fixed + +- [core/classification + core/agent_discovery]: Instruction-file discovery and classification now correctly distinguish `main` files at the user's target from `nested_context` / `child_instruction` files in subdirectories. Per-package CLAUDE.md / AGENTS.md / GEMINI.md files in monorepos are classified as `nested_context` rather than `main`, so size and other `match: {type: main}` rules no longer false-positive on per-package nested files. Bug surfaced against [activepieces/activepieces](https://github.com/activepieces/activepieces). +- [core/registry]: `depends_on` resolves through supersession. When `CODEX:S:0003 supersedes CORE:S:0027`, rules that depend on `CORE:S:0027` (e.g., `CORE:S:0030`, `CORE:G:0006`) are satisfied by `CODEX:S:0003` instead of warning that the dependency is "not loaded". `_apply_supersession` returns a `{superseded_id: successor_id}` map; `_validate_depends_on` consults it before emitting the missing-dependency warning. +- [core/classification]: `_location_matches_mode` distinguishes "loose" leaf patterns (`**/CLAUDE.md`, bare `CLAUDE.md`) from "tight" path-prefixed patterns (`.github/copilot-instructions.md`). Path-prefixed patterns already constrain location via the prefix, so the ancestor-chain check is skipped — fixes false-negative classification of Copilot's `.github/copilot-instructions.md`. +- [tests/unit/test_scan_scope]: `test_codex_fallback_filenames_surface` now creates `.codex/config.toml` in the fixture so codex passes the codex/generic disambiguation deterministically — was HOME-dependent (locally `~/.codex/` let codex through, fresh CI runners without `~/.codex/` dropped codex and the fallback patterns never fired). + ## 0.5.6 ### Added diff --git a/README.md b/README.md index 02ab8dc..91dd201 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Reporails CLI (v0.5.6) +# Reporails CLI (v0.5.7) > **AI Instruction Diagnostics for coding agents. Validates the entire agentic instruction system against 92+ rules across six categories. Supports Claude, Codex, Copilot, Cursor, and Gemini.** > diff --git a/docs/configuration.md b/docs/configuration.md index dc50ce8..a3c72d5 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,8 +1,8 @@ --- title: "Configuration" description: "Disabling rules, project / global config, exclude paths" -version: "0.5.6" -last_updated: 2026-05-04 +version: "0.5.7" +last_updated: 2026-05-06 --- # Configuration @@ -96,6 +96,64 @@ For one-off runs, pass `--exclude-dirs` on the command line: ails check --exclude-dirs examples --exclude-dirs third_party ``` +## Per-surface include / exclude + +Each agent has a set of *surfaces* — `main` (the primary instruction file), `nested_context` (subdirectory variants), `rules`, `skills`, `agents`, etc. The `surfaces` key lets you adjust the glob patterns each surface scans, without modifying the bundled framework configs: + +```yaml +# .ails/config.yml +surfaces: + cursor.rules: + exclude: ["**/draft/**"] # drop matches under draft/ from Cursor rules + claude.skills: + include: [".github/skills/**/SKILL.md"] # also scan .github/skills/ for Claude + codex.main: + exclude: ["**/legacy/AGENTS.md"] # drop legacy AGENTS.md from Codex's main candidates +``` + +Keys are `.` (e.g. `cursor.rules`, `claude.main`, `codex.nested_context`). Each entry may set: + +- `include`: additional glob patterns to scan **on top of** the agent's bundled patterns. +- `exclude`: glob patterns whose matches are dropped from the surface's results. + +Patterns match relative to the project root (the directory you ran `ails check` from). + +## Codex fallback filenames + +Codex supports `project_doc_fallback_filenames` in its own `~/.codex/config.toml` to recognize alternative instruction filenames (e.g. `TEAM_GUIDE.md`, `.agents.md`). Reading that user-home config from the validator is fragile — CI users have different homes — so Reporails reads the same setting from the project's own `.ails/config.yml`: + +```yaml +# .ails/config.yml +agents: + codex: + fallback_filenames: ["TEAM_GUIDE.md", ".agents.md"] +``` + +These filenames are added as `**/` to Codex's `main` surface — they classify the same way `AGENTS.md` does and pick up the same rules. + +## Local overrides — `.ails/config.local.yml` + +Personal or CI-specific config that should not be committed goes in `.ails/config.local.yml`. The file is layered on top of `.ails/config.yml`: + +- Object keys merge recursively. +- Array keys extend (the local list is appended to the committed list). +- Scalar keys are replaced. + +```yaml +# .ails/config.local.yml — gitignored +surfaces: + claude.main: + exclude: ["**/legacy/CLAUDE.md"] # I personally don't care about legacy/ +``` + +When `ails config set …` writes `.ails/config.yml`, it also writes `.ails/.gitignore` listing `config.local.yml` and `.gitignore` itself — the gitignore is per-machine scaffolding (recreated on the next `ails config set`) and doesn't need to be committed. If you create `.ails/` manually, add the two lines yourself: + +``` +# .ails/.gitignore +.gitignore +config.local.yml +``` + ## Severity overrides Severity is what makes a finding "critical" vs "info". Default severity comes from the rule itself; you can override it per project: diff --git a/framework/rules/claude/config.yml b/framework/rules/claude/config.yml index ceb96e9..8daa5eb 100644 --- a/framework/rules/claude/config.yml +++ b/framework/rules/claude/config.yml @@ -50,10 +50,13 @@ file_types: maintainer: human child_instruction: + # Subdirectory CLAUDE.md files. Path-scoping comes from file LOCATION + # (no frontmatter filter). scope: nested captures this — the file's + # subtree applicability is implicit in where the file lives. source: https://code.claude.com/docs/en/memory#how-claude-md-files-load format: freeform - scope: path_scoped - cardinality: collection + scope: nested + cardinality: hierarchical lifecycle: static loading: on_demand scopes: @@ -184,8 +187,8 @@ file_types: maintainer: human hooks: - # Hooks configured in settings.json, skill/agent frontmatter, plugin hooks.json - # 28+ events, 5 types (command/HTTP/mcp_tool/prompt/agent) + # Hooks configured in settings.json, skill/agent frontmatter, plugin hooks/hooks.json + # 27 events, 5 types (command/HTTP/mcp_tool/prompt/agent). Verified 2026-05-06. source: https://code.claude.com/docs/en/hooks format: schema_validated scope: global diff --git a/framework/rules/codex/config.yml b/framework/rules/codex/config.yml index 159cd59..37a7098 100644 --- a/framework/rules/codex/config.yml +++ b/framework/rules/codex/config.yml @@ -4,6 +4,17 @@ # AGENTS.md is a cross-agent standard (agents.md) — its presence alone # does not indicate a Codex project. Detection requires .codex/ markers # (config.toml, rules/, agents/, hooks.json) or AGENTS.override.md. +# +# Synced from .claude/skills/audit-agent/assets/registry/codex/config.yml +# Last verified: 2026-05-06 +# +# Items intentionally NOT included here (cannot be statically measured): +# - fallback_main: project_doc_fallback_filenames is user-configurable in +# ~/.codex/config.toml; the validator cannot statically know what filenames +# a user has declared. +# - plugins: marketplace install path not documented in stable form. +# - scheduled_tasks: Codex Desktop Automations on-disk path not documented. +# - cloud-managed requirements / macOS MDM preferences (not flat files). agent: codex version: "0.5.0" @@ -12,9 +23,13 @@ name: OpenAI Codex file_types: main: + # AGENTS.md walked from project root to cwd, concatenated (chain). + # Per-directory order: AGENTS.override.md > AGENTS.md > project_doc_fallback_filenames. + # Combined byte cap: project_doc_max_bytes (32 KiB default). + source: https://developers.openai.com/codex/guides/agents-md/ format: freeform scope: global - cardinality: singleton + cardinality: chain lifecycle: static loading: session_start scopes: @@ -27,12 +42,16 @@ file_types: patterns: ["~/.codex/AGENTS.md", "~/.codex/AGENTS.override.md"] precedence: user vcs: external + cardinality: singleton maintainer: human override: + # AGENTS.override.md — same walk as main, checked first at each directory level. + # Also recognized at ~/.codex/AGENTS.override.md (declared on `main.user` above). + source: https://developers.openai.com/codex/guides/agents-md/ format: freeform scope: global - cardinality: optional + cardinality: chain lifecycle: mutable loading: session_start scopes: @@ -42,7 +61,30 @@ file_types: vcs: committed maintainer: human + nested_context: + # Per-package AGENTS.md in subdirectories below cwd. Codex's chain walk goes + # project-root → cwd, so files BELOW cwd aren't eagerly loaded — but a user + # who cd's into those subdirectories will pick them up. scope: nested + # captures the subtree-by-location semantic without overloading `global`. + source: https://developers.openai.com/codex/guides/agents-md/ + format: freeform + scope: nested + cardinality: hierarchical + lifecycle: static + loading: on_demand + scopes: + project: + patterns: ["**/AGENTS.md"] + precedence: project + vcs: committed + maintainer: human + skills: + # 6 discovery scopes per Codex docs: CWD .agents/skills, parent ../.agents/skills, + # repo root .agents/skills, user ~/.agents/skills + ~/.codex/skills, admin + # /etc/codex/skills (Linux/macOS) or %ProgramData%\OpenAI\Codex\skills (Windows), + # and system bundled (no on-disk path). + source: https://developers.openai.com/codex/skills/ format: [frontmatter, freeform] scope: task_scoped cardinality: hierarchical @@ -60,16 +102,19 @@ file_types: vcs: external maintainer: human system: - patterns: ["/etc/codex/skills/**/SKILL.md"] + patterns: + - "/etc/codex/skills/**/SKILL.md" + - "C:/ProgramData/OpenAI/Codex/skills/**/SKILL.md" precedence: managed vcs: external maintainer: system skill_metadata: - # Per-skill optional: display info, policy, MCP dependencies + # Per-skill optional: zero or one openai.yaml per skill + source: https://developers.openai.com/codex/skills/ format: schema_validated scope: task_scoped - cardinality: collection + cardinality: optional lifecycle: static loading: on_invocation scopes: @@ -80,6 +125,7 @@ file_types: maintainer: human agents: + source: https://developers.openai.com/codex/subagents format: schema_validated scope: task_scoped cardinality: collection @@ -98,6 +144,9 @@ file_types: maintainer: human rules: + # Starlark .rules — execution control via prefix_rule(). Both project and + # user scopes documented. + source: https://developers.openai.com/codex/rules format: schema_validated scope: global cardinality: collection @@ -109,13 +158,22 @@ file_types: precedence: project vcs: committed maintainer: human + user: + patterns: ["~/.codex/rules/*.rules"] + precedence: user + vcs: external + maintainer: human hooks: + # 6 events: SessionStart, PreToolUse, PostToolUse, PermissionRequest, + # UserPromptSubmit, Stop. Requires [features] codex_hooks = true. + # Hooks can also live inline in [hooks] table inside config.toml. + source: https://developers.openai.com/codex/hooks format: schema_validated scope: global - cardinality: singleton + cardinality: collection lifecycle: static - loading: session_start + loading: on_demand scopes: project: patterns: [".codex/hooks.json"] @@ -129,10 +187,13 @@ file_types: maintainer: human config: - # contains harness settings AND attention-channel instructions/developer_instructions fields + # User-level singleton: ~/.codex/config.toml. + # Project-level chain: .codex/config.toml walked from project root to cwd + # (closest wins). Trusted projects only. + source: https://developers.openai.com/codex/config-advanced/ format: schema_validated scope: global - cardinality: singleton + cardinality: chain lifecycle: static loading: session_start scopes: @@ -146,8 +207,43 @@ file_types: precedence: user vcs: external maintainer: human + cardinality: singleton + + mcp: + # MCP servers in [mcp_servers.] tables. Both user and project config.toml + # accept the section; project scope requires trusted-project status. + source: https://developers.openai.com/codex/mcp + format: schema_validated + scope: global + cardinality: collection + lifecycle: static + loading: session_start + scopes: + project: + patterns: [".codex/config.toml"] + precedence: project + vcs: committed + maintainer: human + user: + patterns: ["~/.codex/config.toml"] + precedence: user + vcs: external + maintainer: human + + enterprise: + # Admin-enforced requirements.toml. Constrains approval policy, sandbox, + # MCP allowlists, hooks, command rules, filesystem permissions. + source: https://developers.openai.com/codex/enterprise/managed-configuration + format: schema_validated + scope: global + cardinality: singleton + lifecycle: static + loading: session_start + scopes: system: - patterns: ["/etc/codex/requirements.toml"] + patterns: + - "/etc/codex/requirements.toml" + - "C:/ProgramData/OpenAI/Codex/requirements.toml" precedence: managed vcs: external maintainer: system diff --git a/framework/rules/copilot/config.yml b/framework/rules/copilot/config.yml index 28f21cf..9f931ab 100644 --- a/framework/rules/copilot/config.yml +++ b/framework/rules/copilot/config.yml @@ -1,5 +1,16 @@ # GitHub Copilot Agent Configuration # Schema: schemas/agent.schema.yml v0.5.0 +# +# Synced from .claude/skills/audit-agent/assets/registry/copilot/config.yml +# Last verified: 2026-05-06 +# +# Items intentionally NOT included here (not measurable on disk): +# - Copilot Memory: cloud-hosted (GitHub), no local file +# - Copilot Spaces: GitHub web UI, no local file +# - Org instructions: GitHub org settings, web-managed +# - GitHub MCP via web UI: not local file +# - Copilot Extensions: cloud-hosted GitHub Apps +# - chat.* / github.copilot.* settings (covered by `config` file_type at .vscode/settings.json) agent: copilot version: "0.5.0" @@ -8,6 +19,9 @@ name: GitHub Copilot file_types: main: + # .github/copilot-instructions.md only — repository-root file. + # AGENTS.md cross-agent read is a separate file_type (`agents_md` below). + source: https://docs.github.com/en/copilot/customizing-copilot/adding-custom-instructions-for-github-copilot required: true format: freeform scope: global @@ -16,12 +30,48 @@ file_types: loading: session_start scopes: project: - patterns: [".github/copilot-instructions.md", "**/AGENTS.md"] + patterns: [".github/copilot-instructions.md"] + precedence: project + vcs: committed + maintainer: human + + agents_md: + # AGENTS.md cross-agent read — directory tree walking, nearest ancestor wins. + # chat.useAgentsMdFile enables it; chat.useNestedAgentsMdFiles (experimental) + # enables recursive subfolder discovery. + source: https://code.visualstudio.com/docs/copilot/customization/custom-instructions + format: freeform + scope: global + cardinality: chain + lifecycle: static + loading: session_start + scopes: + project: + patterns: ["**/AGENTS.md"] + precedence: project + vcs: committed + maintainer: human + + nested_context: + # Per-directory AGENTS.md surfaced when running from a deeper cwd. Path-scoping + # comes from file LOCATION (no frontmatter), captured by scope: nested. + source: https://code.visualstudio.com/docs/copilot/customization/custom-instructions + format: freeform + scope: nested + cardinality: hierarchical + lifecycle: static + loading: on_demand + scopes: + project: + patterns: ["**/AGENTS.md"] precedence: project vcs: committed maintainer: human rules: + # Path-scoped instructions with applyTo / excludeAgent frontmatter. + # VS Code also reads .claude/rules/ for cross-agent compatibility. + source: https://code.visualstudio.com/docs/copilot/customization/custom-instructions format: [frontmatter, freeform] scope: path_scoped cardinality: collection @@ -40,6 +90,8 @@ file_types: maintainer: human skills: + # Custom skill discovery paths configurable via chat.agentSkillsLocations. + source: https://code.visualstudio.com/docs/copilot/customization/agent-skills format: [frontmatter, freeform] scope: task_scoped cardinality: hierarchical @@ -58,6 +110,7 @@ file_types: maintainer: human agents: + source: https://code.visualstudio.com/docs/copilot/customization/custom-agents format: [frontmatter, freeform] scope: task_scoped cardinality: collection @@ -76,6 +129,8 @@ file_types: maintainer: human hooks: + # 6 events on GitHub coding agent, 8 events in VS Code. + source: https://docs.github.com/en/copilot/reference/hooks-configuration format: schema_validated scope: global cardinality: collection @@ -89,6 +144,8 @@ file_types: maintainer: human prompts: + # Reusable prompt templates — manually invoked, not auto-applied. + source: https://code.visualstudio.com/docs/copilot/customization/prompt-files format: [frontmatter, freeform] scope: global cardinality: collection @@ -102,6 +159,7 @@ file_types: maintainer: human mcp: + source: https://docs.github.com/en/copilot/customizing-copilot/extending-copilot-coding-agent-with-mcp format: schema_validated scope: global cardinality: singleton @@ -114,6 +172,21 @@ file_types: vcs: committed maintainer: human + config: + # VS Code settings (chat.* and github.copilot.* namespaces) + source: https://code.visualstudio.com/docs/copilot/reference/copilot-settings + format: schema_validated + scope: global + cardinality: singleton + lifecycle: static + loading: session_start + scopes: + project: + patterns: [".vscode/settings.json"] + precedence: project + vcs: committed + maintainer: human + excludes: - CLAUDE:* - CODEX:* diff --git a/framework/rules/core/config.yml b/framework/rules/core/config.yml index ca8cbc6..6bd19c5 100644 --- a/framework/rules/core/config.yml +++ b/framework/rules/core/config.yml @@ -1,9 +1,12 @@ # Generic Agent Configuration (agents.md convention) # Schema: schemas/agent.schema.yml v0.5.0 # -# Default agent when no --agent flag is specified. +# Default agent when no --agent flag is specified or no specific agent detected. # Targets AGENTS.md — the cross-agent instruction standard. -# See: https://agents.md +# Spec: https://agents.md +# +# Synced from .claude/skills/audit-agent/assets/registry/generic/config.yml +# Last verified: 2026-05-06 agent: generic version: "0.5.0" @@ -12,10 +15,14 @@ core: true file_types: main: + # Primary AGENTS.md per spec. cardinality: chain since "the closest one + # takes precedence" implies multiple files at multiple directory levels + # are legitimate (per-package precedence). + source: https://agents.md required: true format: freeform scope: global - cardinality: singleton + cardinality: chain lifecycle: static loading: session_start scopes: @@ -25,7 +32,27 @@ file_types: vcs: committed maintainer: human + nested_context: + # Per-package AGENTS.md in subdirectories below cwd. Per spec: "Place another + # AGENTS.md inside each package. Agents automatically read the nearest file + # in the directory tree, so the closest one takes precedence." + # scope: nested captures the location-based subtree applicability. + source: https://agents.md + format: freeform + scope: nested + cardinality: hierarchical + lifecycle: static + loading: on_demand + scopes: + project: + patterns: ["**/AGENTS.md"] + precedence: project + vcs: committed + maintainer: human + skills: + # Agent Skills standard — the .agents/skills/ convention is shared across + # multiple agents via this directory. source: https://agentskills.io/specification format: [frontmatter, freeform] scope: task_scoped @@ -44,5 +71,7 @@ file_types: vcs: external maintainer: human -# No excludes — generic supports all core rules including cross-agent compatibility -# No severity overrides +# No agent-specific override files. AGENTS.local.md and AGENTS.override.md are +# NOT in the agents.md spec — those are extensions defined by individual agents +# (e.g., Codex defines AGENTS.override.md). +# No excludes — generic supports all core rules including cross-agent compatibility. diff --git a/framework/rules/cursor/config.yml b/framework/rules/cursor/config.yml index 2a80eb6..5f2f672 100644 --- a/framework/rules/cursor/config.yml +++ b/framework/rules/cursor/config.yml @@ -1,5 +1,15 @@ # Cursor Agent Configuration # Schema: schemas/agent.schema.yml v0.5.0 +# +# Synced from .claude/skills/audit-agent/assets/registry/cursor/config.yml +# Last verified: 2026-05-06 +# +# Items intentionally NOT included here (not measurable on disk): +# - User Rules (Cursor Settings UI text) +# - Team Rules (Cursor dashboard, cloud-managed) +# - Cursor Automations (cloud-only, no on-disk file) +# - Design Mode (in-app UI) +# - CLI --output-format (CLI flag, not a file) agent: cursor version: "0.5.0" @@ -8,7 +18,10 @@ name: Cursor file_types: main: - # AGENTS.md is Cursor's freeform root instruction (no frontmatter) + # AGENTS.md walked from project root through ancestors of cwd. Per docs: + # "Cursor supports AGENTS.md in the project root and subdirectories. ... + # combined with parent directories, with more specific instructions taking precedence." + source: https://cursor.com/docs/rules format: freeform scope: global cardinality: chain @@ -21,14 +34,34 @@ file_types: vcs: committed maintainer: human + nested_context: + # Subdirectory AGENTS.md files surfaced when running from a deeper cwd. + # Per Cursor docs: nested AGENTS.md combined with parent directories, + # closer files take precedence. scope: nested captures the subtree-by-location. + source: https://cursor.com/docs/rules + format: freeform + scope: nested + cardinality: hierarchical + lifecycle: static + loading: on_demand + scopes: + project: + patterns: ["**/AGENTS.md"] + precedence: project + vcs: committed + maintainer: human + rules: - # .cursor/rules/ with MDC frontmatter — 4 activation modes: - # alwaysApply (global), intelligent (agent decides), globs (path-scoped), manual (@mention) + # .cursor/rules/ with MDC frontmatter — 4 activation modes per file: + # alwaysApply (always loaded), intelligent (agent decides), globs (file-scoped), + # manual (@-mention only). globs frontmatter makes this path_scoped — the + # path filter is in-file, not just at the surface level. + source: https://cursor.com/docs/rules format: [frontmatter, freeform] - scope: global + scope: path_scoped cardinality: collection lifecycle: static - loading: session_start + loading: on_demand scopes: project: patterns: [".cursor/rules/**/*.mdc", ".cursor/rules/**/*.md"] @@ -37,6 +70,8 @@ file_types: maintainer: human legacy_cursorrules: + # Deprecated, replaced by .cursor/rules/ + source: https://cursor.com/docs/rules deprecated: true format: freeform scope: global @@ -51,6 +86,8 @@ file_types: maintainer: human skills: + # Cross-agent skill paths supported (.claude/skills, .codex/skills, .agents/skills) + source: https://cursor.com/docs/skills format: [frontmatter, freeform] scope: task_scoped cardinality: hierarchical @@ -69,6 +106,8 @@ file_types: maintainer: human agents: + # Built-in subagents: Explore, Bash, Browser. Cross-agent paths supported. + source: https://cursor.com/docs/subagents format: [frontmatter, freeform] scope: task_scoped cardinality: collection @@ -87,9 +126,11 @@ file_types: maintainer: human hooks: + # 20+ events; command + prompt hook types + source: https://cursor.com/docs/hooks format: schema_validated scope: global - cardinality: singleton + cardinality: collection lifecycle: static loading: session_start scopes: @@ -113,6 +154,7 @@ file_types: maintainer: system mcp: + source: https://cursor.com/docs/mcp format: schema_validated scope: global cardinality: singleton @@ -130,8 +172,46 @@ file_types: vcs: external maintainer: human + commands: + # Slash commands — freeform markdown + source: https://cursor.com/docs/context/commands + format: freeform + scope: task_scoped + cardinality: collection + lifecycle: static + loading: on_invocation + scopes: + project: + patterns: [".cursor/commands/*.md"] + precedence: project + vcs: committed + maintainer: human + + config: + # Cursor settings — both project (.vscode/settings.json or .cursor/settings.json) + # and user (~/.cursor/settings.json) levels + source: https://cursor.com/docs/cli/reference/configuration + format: schema_validated + scope: global + cardinality: singleton + lifecycle: static + loading: session_start + scopes: + project: + patterns: [".vscode/settings.json", ".cursor/settings.json"] + precedence: project + vcs: committed + maintainer: human + user: + patterns: ["~/.cursor/settings.json"] + precedence: user + vcs: external + maintainer: human + managed_policy: - # MDM on macOS (.mobileconfig), Group Policy on Windows (ADMX/ADML), policy.json on Linux + # MDM on macOS (.mobileconfig), Group Policy on Windows (ADMX/ADML), + # policy.json on Linux + source: https://cursor.com/docs/enterprise format: schema_validated scope: global cardinality: singleton @@ -144,10 +224,43 @@ file_types: vcs: external maintainer: system + plugins: + # .cursor-plugin/ manifest — marketplace + team marketplaces + source: https://cursor.com/docs/plugins + format: schema_validated + scope: global + cardinality: collection + lifecycle: static + loading: session_start + scopes: + project: + patterns: [".cursor-plugin/plugin.json"] + precedence: project + vcs: committed + maintainer: human + + bugbot: + # BugBot project-level review instructions + source: https://cursor.com/docs/bugbot + format: freeform + scope: global + cardinality: singleton + lifecycle: static + loading: session_start + scopes: + project: + patterns: [".cursor/BUGBOT.md"] + precedence: project + vcs: committed + maintainer: human + bugbot_rules: - # Auto-generated from PR feedback and reviewer comments, managed by Bugbot + # Auto-generated learned rules under .cursor/rules/ — agent-managed. + # Bugbot decides when each rule applies (auto-promoted/disabled), so the + # path-filter aspect is agent-driven rather than user-authored frontmatter. + source: https://cursor.com/docs/bugbot format: frontmatter - scope: path_scoped + scope: global cardinality: collection lifecycle: mutable loading: on_demand @@ -158,6 +271,20 @@ file_types: vcs: committed maintainer: agent + ignore: + source: https://cursor.com/docs/reference/ignore-file + format: freeform + scope: global + cardinality: singleton + lifecycle: static + loading: session_start + scopes: + project: + patterns: [".cursorignore"] + precedence: project + vcs: committed + maintainer: human + excludes: - CLAUDE:* - CODEX:* diff --git a/framework/rules/gemini/config.yml b/framework/rules/gemini/config.yml index 6d828c2..aa96234 100644 --- a/framework/rules/gemini/config.yml +++ b/framework/rules/gemini/config.yml @@ -1,5 +1,12 @@ # Google Gemini Agent Configuration # Schema: schemas/agent.schema.yml v0.5.0 +# +# Synced from .claude/skills/audit-agent/assets/registry/gemini/config.yml +# Last verified: 2026-05-06 +# +# Items intentionally NOT included here (not measurable on disk): +# - Cloud-managed enterprise policies (Google Cloud admin / Code Assist Enterprise) +# - Scheduled tasks (CLI itself doesn't have native scheduling — uses external cron) agent: gemini version: "0.5.0" @@ -8,20 +15,24 @@ name: Google Gemini file_types: main: + # GEMINI.md walked from project root through ancestors of cwd to .git or home. + # All discovered files concatenated. Subdirectory GEMINI.md is "Just-In-Time" + # loaded on-demand (`nested_context`). User-scope file at ~/.gemini/GEMINI.md + # also doubles as the memory surface (agent appends to ## Gemini Added Memories). + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/gemini-md.md required: true format: freeform scope: global - cardinality: singleton + cardinality: chain lifecycle: static loading: session_start scopes: project: - patterns: ["**/GEMINI.md", "**/AGENTS.md"] + patterns: ["**/GEMINI.md"] precedence: project vcs: committed maintainer: human user: - # Also serves as memory surface (agent appends to ## Gemini Added Memories) patterns: ["~/.gemini/GEMINI.md"] precedence: user vcs: external @@ -29,8 +40,12 @@ file_types: lifecycle: mutable nested_context: + # Subdirectory GEMINI.md files — Just-In-Time loaded when Gemini accesses + # files in those directories. scope: nested captures the location-based + # subtree applicability without overloading other scope values. + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/gemini-md.md format: freeform - scope: path_scoped + scope: nested cardinality: hierarchical lifecycle: static loading: on_demand @@ -41,7 +56,24 @@ file_types: vcs: committed maintainer: human + cross_read: + # Cross-agent files (AGENTS.md, CONTEXT.md) when context.fileName is configured. + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/reference/configuration.md + format: freeform + scope: global + cardinality: chain + lifecycle: static + loading: session_start + scopes: + project: + patterns: ["**/AGENTS.md", "**/CONTEXT.md"] + precedence: project + vcs: committed + maintainer: human + skills: + # `.agents/skills/` is the cross-agent alias supported by Gemini. + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/skills.md format: [frontmatter, freeform] scope: task_scoped cardinality: hierarchical @@ -60,6 +92,9 @@ file_types: maintainer: human agents: + # Subagents — built-ins: codebase_investigator, cli_help, generalist_agent, + # browser_agent (experimental). Custom agents invoked via @name syntax. + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/core/subagents.md format: [frontmatter, freeform] scope: task_scoped cardinality: collection @@ -78,6 +113,8 @@ file_types: maintainer: human commands: + # TOML format with prompt + description, supports {{args}}, !{shell}, @{file} + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/custom-commands.md format: schema_validated scope: task_scoped cardinality: collection @@ -96,7 +133,8 @@ file_types: maintainer: human hooks: - # 11 events configured in settings.json hooks key + # 11 events configured under hooks key in settings.json. + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/hooks/index.md format: schema_validated scope: global cardinality: collection @@ -115,6 +153,8 @@ file_types: maintainer: human config: + # Gemini supports both JSON and TOML settings files. + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/reference/configuration.md format: schema_validated scope: global cardinality: singleton @@ -146,16 +186,30 @@ file_types: precedence: managed vcs: external maintainer: system - system_policies: - patterns: - - "/etc/gemini-cli/policies" - - "/Library/Application Support/GeminiCli/policies" - precedence: managed + + mcp: + # MCP servers under mcpServers key in settings.json/settings.toml. + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/reference/configuration.md + format: schema_validated + scope: global + cardinality: collection + lifecycle: static + loading: session_start + scopes: + project: + patterns: [".gemini/settings.json", ".gemini/settings.toml"] + precedence: project + vcs: committed + maintainer: human + user: + patterns: ["~/.gemini/settings.json", "~/.gemini/settings.toml"] + precedence: user vcs: external - maintainer: system - cardinality: collection + maintainer: human extensions: + # Plugin system: bundles MCP servers, commands, skills, hooks, themes + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/extensions/writing-extensions.md format: schema_validated scope: global cardinality: collection @@ -169,6 +223,7 @@ file_types: maintainer: human geminiignore: + source: https://github.com/google-gemini/gemini-cli format: freeform scope: global cardinality: singleton @@ -181,6 +236,21 @@ file_types: vcs: committed maintainer: human + system_prompt: + # Replace default system prompt entirely + source: https://github.com/google-gemini/gemini-cli/blob/main/docs/cli/system-prompt.md + format: freeform + scope: global + cardinality: singleton + lifecycle: static + loading: session_start + scopes: + project: + patterns: [".gemini/system-prompt.md"] + precedence: project + vcs: committed + maintainer: human + excludes: - CLAUDE:* - CODEX:* diff --git a/framework/schemas/agent.schema.yml b/framework/schemas/agent.schema.yml index d30c5e8..af47221 100644 --- a/framework/schemas/agent.schema.yml +++ b/framework/schemas/agent.schema.yml @@ -1,4 +1,4 @@ -# Agent configuration schema v0.5.0 + # Agent configuration schema v0.5.0 # # Foundational assumption: coding agents are more alike than different. # An instruction file has the same quality properties regardless of which @@ -64,8 +64,13 @@ fields: description: "File content format(s). Shared across scopes." scope: type: enum - values: [global, path_scoped, task_scoped] - description: "Effect scope — what the surface applies to. Distinct from location scope (the scopes key)." + values: [global, path_scoped, task_scoped, nested] + description: | + Effect scope — what the surface applies to. Distinct from location scope (the scopes key). + - global: applies project-wide (e.g. main, override, agents_md, cross_read). + - path_scoped: applies via in-file frontmatter path filter (e.g. Claude rules with paths:, Cursor rules with globs:). + - task_scoped: invocation-time loading (skills, agents, commands). + - nested: applies to a subdirectory subtree by virtue of file location (no frontmatter); subdirectory CLAUDE.md / AGENTS.md / GEMINI.md. cardinality: type: enum values: [singleton, optional, collection, hierarchical, chain] @@ -108,7 +113,7 @@ fields: description: "Who maintains the file" # Any shared property can be overridden per scope format: { type: [enum, array], values: [freeform, frontmatter, schema_validated] } - scope: { type: enum, values: [global, path_scoped, task_scoped] } + scope: { type: enum, values: [global, path_scoped, task_scoped, nested] } cardinality: { type: enum, values: [singleton, optional, collection, hierarchical, chain] } lifecycle: { type: enum, values: [static, mutable, transient] } loading: { type: enum, values: [session_start, on_demand, on_invocation] } diff --git a/framework/schemas/project.schema.yml b/framework/schemas/project.schema.yml index a12fcac..27dc6b8 100644 --- a/framework/schemas/project.schema.yml +++ b/framework/schemas/project.schema.yml @@ -93,6 +93,40 @@ fields: default: true description: "Enforce rules backed by community sources only (weight < 0.8)" + agents: + required: false + type: object + description: "Per-agent overrides keyed by agent id (claude, codex, cursor, etc.)." + additionalProperties: + type: object + properties: + fallback_filenames: + type: array + items: { type: string } + description: | + Additional instruction filenames to scan for this agent's `main` + surface. Use to mirror Codex `project_doc_fallback_filenames` (e.g. + ["TEAM_GUIDE.md", ".agents.md"]) without round-tripping through the + agent's own home config. + + surfaces: + required: false + type: object + description: | + Per-surface include/exclude pattern adjustments. Keys are + `.` (e.g. `codex.main`, `cursor.rules`). + additionalProperties: + type: object + properties: + include: + type: array + items: { type: string } + description: "Additional glob patterns to scan for this surface." + exclude: + type: array + items: { type: string } + description: "Glob patterns whose matches are dropped from this surface." + defaults: schema_version: "0.1.0" agent: "claude" @@ -140,3 +174,15 @@ examples: tiers: core: true experimental: false + + with_surface_adjustments: | + schema_version: "0.1.0" + agent: codex + agents: + codex: + fallback_filenames: ["TEAM_GUIDE.md", ".agents.md"] + surfaces: + cursor.rules: + exclude: ["**/draft/**"] + claude.skills: + include: [".github/skills/**/SKILL.md"] diff --git a/framework/schemas/rule.schema.yml b/framework/schemas/rule.schema.yml index e4cfc21..c130352 100644 --- a/framework/schemas/rule.schema.yml +++ b/framework/schemas/rule.schema.yml @@ -93,7 +93,7 @@ fields: description: "Named file type (main, scoped_rule, skill, config, override, memory, mcp, managed)" scope: type: enum - values: [global, path_scoped, task_scoped, user, system] + values: [global, path_scoped, task_scoped, nested, user, system] format: type: [enum, array] values: [freeform, frontmatter, schema_validated] diff --git a/packages/npm/README.md b/packages/npm/README.md deleted file mode 120000 index fe84005..0000000 --- a/packages/npm/README.md +++ /dev/null @@ -1 +0,0 @@ -../../README.md \ No newline at end of file diff --git a/packages/npm/package.json b/packages/npm/package.json index c0577fe..e0ba367 100644 --- a/packages/npm/package.json +++ b/packages/npm/package.json @@ -1,6 +1,6 @@ { "name": "@reporails/cli", - "version": "0.5.6", + "version": "0.5.7", "description": "AI instruction diagnostics for coding agents", "type": "module", "bin": { @@ -14,6 +14,9 @@ "bin/", "README.md" ], + "scripts": { + "prepack": "cp ../../README.md ./README.md" + }, "keywords": [ "ai-instructions", "claude", diff --git a/pyproject.toml b/pyproject.toml index b9d393d..0029a4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "reporails-cli" -version = "0.5.6" +version = "0.5.7" description = "AI instruction diagnostics for coding agents" readme = "README.md" license = "BUSL-1.1" diff --git a/src/reporails_cli/core/agent_discovery.py b/src/reporails_cli/core/agent_discovery.py index 2f9412a..f632602 100644 --- a/src/reporails_cli/core/agent_discovery.py +++ b/src/reporails_cli/core/agent_discovery.py @@ -9,7 +9,10 @@ import logging import os from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from reporails_cli.core.results import ProjectConfig logger = logging.getLogger(__name__) @@ -19,13 +22,16 @@ def ci_glob(target: Path, pattern: str) -> list[Path]: - """Case-insensitive glob for root-level filenames, standard glob for nested.""" + """Case-sensitive glob — agent specs treat filename casing as authoritative. + + Name retained from the previous case-insensitive implementation; behavior + is now case-sensitive per the agents.md spec ("Filenames not on this list + are ignored for instruction discovery."). + """ parts = Path(pattern).parts if len(parts) == 1 and "*" not in pattern: - # Root-level exact filename — match case-insensitively - lower = pattern.lower() try: - return [p for p in target.iterdir() if p.name.lower() == lower and not p.is_dir()] + return [p for p in target.iterdir() if p.name == pattern and not p.is_dir()] except OSError: return [] return list(target.glob(pattern)) @@ -68,14 +74,27 @@ def is_excluded(path: Path, target: Path, exclude_dirs: frozenset[str]) -> bool: def walk_glob(root: Path, filename: str, exclude_dirs: frozenset[str]) -> list[Path]: - """Walk directory tree matching a filename, skipping excluded dirs. + """Walk directory tree matching a filename exactly, skipping excluded dirs. Much faster than Path.glob("**/name") because it prunes excluded subtrees during traversal instead of filtering afterwards. Uses os.scandir for efficient directory traversal. + + Match is case-SENSITIVE per agent implementations. The OpenAI Codex + source (`codex-rs/core/src/agents_md.rs`) declares: + + pub const DEFAULT_AGENTS_MD_FILENAME: &str = "AGENTS.md"; + pub const LOCAL_AGENTS_MD_FILENAME: &str = "AGENTS.override.md"; + + and looks them up via exact `path.join(filename)` + `std::fs::read_to_string` + — case-sensitive on Linux, exact-case lookup on macOS/Windows. The + agents.md spec is consistent: discovery list is "AGENTS.override.md, + AGENTS.md, TEAM_GUIDE.md, .agents.md. Filenames not on this list are + ignored." A file named `agents.md` (lowercase, no leading dot) is NOT + the same as `AGENTS.md` and must not be matched. Same convention applies + to `CLAUDE.md` and `GEMINI.md`. """ skip = exclude_dirs | _ALWAYS_SKIP - lower_name = filename.lower() results: list[Path] = [] stack = [str(root)] while stack: @@ -87,7 +106,7 @@ def walk_glob(root: Path, filename: str, exclude_dirs: frozenset[str]) -> list[P with scanner: for entry in scanner: name = entry.name - if name.lower() == lower_name: + if name == filename: try: is_match = entry.is_file(follow_symlinks=True) except OSError: @@ -101,50 +120,150 @@ def walk_glob(root: Path, filename: str, exclude_dirs: frozenset[str]) -> list[P return results +def walk_ancestors(start: Path, filename: str, stop: Path) -> list[Path]: + """Walk up from start, collecting filename matches at each ancestor. + + Returns paths in walked order (closest first). Match is case-SENSITIVE + per agent specs (see walk_glob docstring for citation). + """ + results: list[Path] = [] + current = start if start.is_dir() else start.parent + while True: + try: + for entry in os.scandir(current): + if entry.name == filename: + try: + is_match = entry.is_file(follow_symlinks=True) + except OSError: + is_match = entry.is_symlink() + if is_match: + results.append(Path(entry.path)) + break + except OSError: + pass + if current == stop or current == current.parent: + break + current = current.parent + return results + + +def resolve_project_root(target: Path) -> Path: + """Project root for discovery — the directory `ails check` was pointed at. + + Discovery never walks above this directory. Whoever runs `ails check` + chooses the scope — `target` IS the project root, regardless of what + `.git` / `.ails/backbone.yml` / IDE config dirs may exist above it. + + Files outside `target`'s subtree are out of scope. This bounds the scan + strictly to what the user pointed at and avoids leaking files from a + surrounding repo into a fixture/subdirectory check. + + For cache-key derivation and mapper coordination (which need a stable + repo-wide identifier even when running from a subdirectory), see + `engine_helpers._find_project_root` — that function continues to walk up + looking for project markers and is unaffected by this change. + """ + return target if target.is_dir() else target.parent + + +def _is_eager_global(properties: dict[str, str]) -> bool: + """File_type loads at session start with global scope (e.g., main, override). + + These files are loaded by the agent from the cwd ancestor chain — not + via descendant traversal. Validator must mirror that: ancestor walk for + files-at-cwd-and-above, descendant walk for nested per-subdirectory files. + """ + return properties.get("scope") == "global" and properties.get("loading") == "session_start" + + +def _is_nested(properties: dict[str, str]) -> bool: + """File_type whose subtree applicability comes from file LOCATION, not frontmatter. + + `scope: nested` declares: this surface applies to a subdirectory subtree + by virtue of where the file lives (no frontmatter filter). Maps to + nested_context / child_instruction declarations — files below cwd that + the agent loads only when descending into those subdirectories. + """ + return properties.get("scope") == "nested" + + +def _is_external_pattern(pattern: str) -> bool: + """Pattern resolves outside project (~/..., /abs, C:/...).""" + return pattern.startswith("~") or pattern.startswith("/") or (len(pattern) > 1 and pattern[1] == ":") + + +def _run_descendant_recursive(target: Path, pattern: str, nested: bool, exclude_dirs: frozenset[str]) -> list[Path]: + """Descendant walk for **/ patterns. + + `nested` (scope: nested) excludes cwd itself — those files belong to the + eager file_type (main) discovered via ancestor walk. + """ + parts = Path(pattern).parts + filename = parts[-1] if parts else "" + prefix_parts: list[str] = [] + for p in parts: + if "**" in p: + break + prefix_parts.append(p) + walk_root = target / Path(*prefix_parts) if prefix_parts else target + if not walk_root.is_dir(): + return [] + results = walk_glob(walk_root, filename, exclude_dirs) + if nested: + results = [m for m in results if m.parent != target] + return [m for m in results if not is_excluded(m, target, exclude_dirs)] + + def glob_file_type_patterns( target: Path, patterns: list[str], + properties: dict[str, str] | None = None, exclude_dirs: frozenset[str] = frozenset(), ) -> list[Path]: """Glob file_type patterns against target directory. - For recursive (**/) patterns with a literal filename (e.g., **/CLAUDE.md), - uses pruning walk to avoid traversing excluded directory trees. - Falls back to Path.glob for wildcard filenames (e.g., **/*.md). - - External paths (~/... and /absolute/...) are resolved outside the project - directory. These are part of the instruction surface (user-level config, - managed policies, auto-memory) even though they live outside the repo. + Dispatch by file_type properties: + - external (~/..., /abs, C:/...) -> _glob_external + - bare leaf or **/ + eager global -> walk_ancestors from cwd + - .../ (no **/) + global scope -> resolve relative to project_root + - **/ + scope: nested -> walk_glob descendant from cwd, exclude cwd + - everything else -> walk_glob descendant from cwd + + Properties drive the dispatch so a pattern like **/CLAUDE.md can mean + "ancestor walk" under file_types.main (scope: global) and "descendant walk" + under file_types.nested_context (scope: nested) — same regex, different + loading model. """ + props = properties or {} + eager_global = _is_eager_global(props) + nested = _is_nested(props) + project_root: Path | None = None + found: list[Path] = [] for pattern in patterns: - # Skip directory-only patterns if pattern.endswith("/"): continue - - # External paths: ~/... or /absolute/... or C:/... - if pattern.startswith("~") or pattern.startswith("/") or (len(pattern) > 1 and pattern[1] == ":"): + if _is_external_pattern(pattern): _glob_external(pattern, target, found) continue - parts = Path(pattern).parts - filename = parts[-1] if parts else "" - - # Use pruning walk only for recursive patterns with literal filenames - if "**" in pattern and "*" not in filename: - # Extract prefix before ** (e.g., ".claude/skills/**/SKILL.md" -> ".claude/skills") - prefix_parts = [] - for p in parts: - if "**" in p: - break - prefix_parts.append(p) - walk_root = target / Path(*prefix_parts) if prefix_parts else target - if walk_root.is_dir(): - found.extend( - m for m in walk_glob(walk_root, filename, exclude_dirs) if not is_excluded(m, target, exclude_dirs) - ) + filename = Path(pattern).parts[-1] if Path(pattern).parts else "" + is_recursive_leaf = "**" in pattern and "*" not in filename + is_bare_leaf = len(Path(pattern).parts) == 1 and "*" not in pattern + + if eager_global and (is_recursive_leaf or is_bare_leaf): + if project_root is None: + project_root = resolve_project_root(target) + found.extend( + m for m in walk_ancestors(target, filename, project_root) if not is_excluded(m, target, exclude_dirs) + ) + elif eager_global and "**" not in pattern: + if project_root is None: + project_root = resolve_project_root(target) + found.extend(m for m in ci_glob(project_root, pattern) if not is_excluded(m, target, exclude_dirs)) + elif is_recursive_leaf: + found.extend(_run_descendant_recursive(target, pattern, nested, exclude_dirs)) else: - # Non-recursive or wildcard filename — use standard glob found.extend(m for m in ci_glob(target, pattern) if not is_excluded(m, target, exclude_dirs)) return found @@ -200,14 +319,85 @@ def load_config_file_types( return None +def _surface_include_patterns(agent_id: str, file_type_name: str, project_config: ProjectConfig | None) -> list[str]: + """Patterns to ADD to a file_type's declared list, sourced from project config. + + Reads `surfaces...include` from `.ails/config.yml`. + Special case: for `.main`, also injects `**/` for each + entry in `agents..fallback_filenames` so user-declared alternative + instruction filenames (e.g., Codex `project_doc_fallback_filenames`) are + treated as main candidates. + """ + if project_config is None: + return [] + extra: list[str] = [] + surfaces = getattr(project_config, "surfaces", {}) or {} + surface_key = f"{agent_id}.{file_type_name}" + surface_cfg = surfaces.get(surface_key, {}) + if isinstance(surface_cfg, dict): + include = surface_cfg.get("include", []) + if isinstance(include, list): + extra.extend(str(p) for p in include) + + if file_type_name == "main": + agents_cfg = getattr(project_config, "agents", {}) or {} + agent_cfg = agents_cfg.get(agent_id, {}) + if isinstance(agent_cfg, dict): + fallbacks = agent_cfg.get("fallback_filenames", []) + if isinstance(fallbacks, list): + extra.extend(f"**/{name}" for name in fallbacks if isinstance(name, str)) + return extra + + +def _surface_exclude_patterns(agent_id: str, file_type_name: str, project_config: ProjectConfig | None) -> list[str]: + """Glob patterns whose matches should be DROPPED from a surface's results.""" + if project_config is None: + return [] + surfaces = getattr(project_config, "surfaces", {}) or {} + surface_key = f"{agent_id}.{file_type_name}" + surface_cfg = surfaces.get(surface_key, {}) + if not isinstance(surface_cfg, dict): + return [] + exclude = surface_cfg.get("exclude", []) + if not isinstance(exclude, list): + return [] + return [str(p) for p in exclude] + + +def _matches_any_glob(path: Path, patterns: list[str], target: Path) -> bool: + """Check whether path matches any of the glob patterns relative to target.""" + if not patterns: + return False + try: + rel = path.relative_to(target).as_posix() + except ValueError: + rel = str(path) + for pattern in patterns: + # PurePath.match supports glob-style; **/ wildcards may need normalization + try: + if Path(rel).match(pattern): + return True + # Also try absolute match for patterns that include the full prefix + if path.match(pattern): + return True + except ValueError: + continue + return False + + def discover_from_config( target: Path, agent_id: str, rules_paths: list[Path] | None = None, extra_exclude_dirs: frozenset[str] = frozenset(), + project_config: ProjectConfig | None = None, ) -> tuple[list[Path], list[Path], list[Path]] | None: """Discover files using config.yml file_types. + Optionally consults `project_config` (a `ProjectConfig`) for per-surface + include/exclude pattern adjustments and Codex fallback filenames declared + in `.ails/config.yml` (or `.ails/config.local.yml`). + Returns (instruction_files, rule_files, config_files) or None if no config.yml is available for this agent. """ @@ -221,17 +411,27 @@ def discover_from_config( rule_files: list[Path] = [] config_files: list[Path] = [] - for spec in file_types.values(): + for ft_name, spec in file_types.items(): if not isinstance(spec, dict): continue - patterns = _extract_patterns(spec) + patterns = list(_extract_patterns(spec)) properties = _extract_properties(spec) bucket = categorize_file_type(patterns, properties) if bucket == "skip": continue - found = glob_file_type_patterns(target, patterns, extra_exclude_dirs) + # Inject per-surface include patterns from .ails/config.yml + extra_include = _surface_include_patterns(agent_id, ft_name, project_config) + if extra_include: + patterns = patterns + extra_include + + found = glob_file_type_patterns(target, patterns, properties, extra_exclude_dirs) + + # Apply per-surface exclude filters + exclude_globs = _surface_exclude_patterns(agent_id, ft_name, project_config) + if exclude_globs: + found = [p for p in found if not _matches_any_glob(p, exclude_globs, target)] if bucket == "instruction": instruction_files.extend(found) @@ -241,7 +441,42 @@ def discover_from_config( config_files.extend(found) return ( - sorted(set(instruction_files)), - sorted(set(rule_files)), - sorted(set(config_files)), + _dedupe_by_canonical(instruction_files), + _dedupe_by_canonical(rule_files), + _dedupe_by_canonical(config_files), ) + + +def _canonical_path(path: Path) -> Path: + """Return path's canonical (symlink-resolved) form, or the original on error. + + Mirrors the error handling in `applicability.resolve_symlinked_files`: + `Path.resolve(strict=True)` raises `OSError` (broken symlink, errno + `ELOOP`) or `RuntimeError` (Python's symlink-loop guard) on bad + symlinks. Treat unresolvable paths as canonical-to-themselves so they + are still surfaced for downstream error reporting. + """ + try: + return path.resolve(strict=True) + except (OSError, RuntimeError): + return path + + +def _dedupe_by_canonical(paths: list[Path]) -> list[Path]: + """Sort and dedupe paths by their canonical (symlink-resolved) target. + + Two surface paths can refer to the same underlying file when one or + both are symlinks (common pattern: `.claude/skills -> ../.agents/skills`). + Naive `set(paths)` keeps both because path equality compares strings. + Canonicalizing via `Path.resolve(strict=True)` collapses symlinks; the + first surface path encountered for a canonical target wins. + """ + seen_canonical: set[Path] = set() + out: list[Path] = [] + for p in sorted(set(paths)): + canonical = _canonical_path(p) + if canonical in seen_canonical: + continue + seen_canonical.add(canonical) + out.append(p) + return out diff --git a/src/reporails_cli/core/agents.py b/src/reporails_cli/core/agents.py index 0948aab..700bda7 100644 --- a/src/reporails_cli/core/agents.py +++ b/src/reporails_cli/core/agents.py @@ -249,38 +249,43 @@ def _load_project_exclude_dirs(target: Path) -> frozenset[str]: return _DEFAULT_EXCLUDE_DIRS -def _agent_has_marker(target: Path, agent_type: AgentType) -> bool: - """Fast existence check — does this agent likely exist in the project? - - Checks for root-level instruction files or agent-specific directories. - Returns False only when we're certain the agent is absent (a few stat() calls). - Uses os.path.lexists to detect symlinks (even broken/circular ones). - - Root-level files are matched case-insensitively (claude.md == CLAUDE.md) - because repos in the wild use both conventions. +def _scan_marker_at(target: Path, agent_type: AgentType) -> bool: + """Check whether agent markers exist at exactly this directory level. + + Root-level files match case-SENSITIVELY per agent specs. The agents.md + spec lists exact filenames ("AGENTS.override.md, AGENTS.md, TEAM_GUIDE.md, + .agents.md. Filenames not on this list are ignored for instruction + discovery.") so wrong-case copies (e.g. `agents.md` lowercase) are not + real instruction files. """ - # Build lowercase index of root files once per call (cheap — root only) try: - root_lower = { - entry.name.lower(): entry.name - for entry in os.scandir(target) - if entry.is_file(follow_symlinks=False) or entry.is_symlink() + root_files = { + entry.name for entry in os.scandir(target) if entry.is_file(follow_symlinks=False) or entry.is_symlink() } except OSError: - root_lower = {} + root_files = set() for pattern in agent_type.instruction_patterns: - # Patterns with path separators or globs — check exact path if "/" in pattern or "*" in pattern: if os.path.lexists(target / pattern): return True - else: - # Root-level file — case-insensitive match - if pattern.lower() in root_lower: - return True + elif pattern in root_files: + return True return any((target / dir_path).is_dir() for _, dir_path in agent_type.directory_patterns) +def _agent_has_marker(target: Path, agent_type: AgentType) -> bool: + """Fast existence check — does this agent likely apply at target? + + Checks the target directory only. Cwd is the project root for discovery + (per resolve_project_root); files in ancestor directories are out of scope. + + Root-level files match case-insensitively (claude.md == CLAUDE.md) since + repos in the wild use both conventions. + """ + return _scan_marker_at(target, agent_type) + + def detect_agents( # pylint: disable=too-many-locals target: Path, rules_paths: list[Path] | None = None, @@ -288,6 +293,8 @@ def detect_agents( # pylint: disable=too-many-locals """Detect coding agents in the target directory. Uses config.yml file_types from bundled framework for discovery. + Per-surface include/exclude and Codex fallback filenames come from the + project's `.ails/config.yml` (and `.ails/config.local.yml`). Cached per target path. """ cache_key = str(target) @@ -298,6 +305,15 @@ def detect_agents( # pylint: disable=too-many-locals # Load project exclude_dirs early so discovery skips noise directories project_excludes = _load_project_exclude_dirs(target) + # Load project config for per-surface include/exclude + fallback filenames + project_config = None + try: + from reporails_cli.core.config import get_project_config + + project_config = get_project_config(target) + except Exception: + logger.debug("Project config load failed for %s", target, exc_info=True) + detected: list[DetectedAgent] = [] for agent_id, agent_type in get_known_agents().items(): @@ -306,7 +322,9 @@ def detect_agents( # pylint: disable=too-many-locals continue # Config-driven discovery from bundled config.yml - config_result = _discover_from_config(target, agent_id, rules_paths, project_excludes) + config_result = _discover_from_config( + target, agent_id, rules_paths, project_excludes, project_config=project_config + ) if config_result is None: continue diff --git a/src/reporails_cli/core/classification.py b/src/reporails_cli/core/classification.py index 9398a3c..91d7754 100644 --- a/src/reporails_cli/core/classification.py +++ b/src/reporails_cli/core/classification.py @@ -22,14 +22,19 @@ def load_file_types( agent: str, rules_paths: list[Path] | None = None, + project_root: Path | None = None, ) -> list[FileTypeDeclaration]: - """Load file_types from agent config.yml. + """Load file_types from agent config.yml, with optional project overrides. - Searches rules_paths first, then falls back to default config path. + Searches rules_paths first, then falls back to default config path. When + `project_root` is provided, reads `.ails/config.yml` (+ `.ails/config.local.yml`) + and merges per-surface `include` / `exclude` patterns plus + `agents..fallback_filenames` into the matching FileTypeDeclarations. Args: agent: Agent identifier (e.g., "claude") rules_paths: Optional rules directories to search first + project_root: Optional project root for reading `.ails/config.yml` Returns: List of FileTypeDeclaration, empty if no config found @@ -52,13 +57,68 @@ def load_file_types( file_types_data = data.get("file_types", {}) if not file_types_data: continue - return _parse_file_types(file_types_data) + decls = _parse_file_types(file_types_data) + if project_root is not None: + decls = _apply_project_overrides(decls, agent, project_root) + return decls except (yaml.YAMLError, OSError) as exc: logger.warning("Failed to parse agent file_types %s: %s", config_path, exc) continue return [] +def _apply_project_overrides( + declarations: list[FileTypeDeclaration], + agent: str, + project_root: Path, +) -> list[FileTypeDeclaration]: + """Merge project config overrides into FileTypeDeclarations. + + Adds patterns from `surfaces...include` and Codex + `agents..fallback_filenames` (for `main`) so classification can + match user-configured fallback instruction files. + """ + try: + from reporails_cli.core.config import get_project_config + except ImportError: + return declarations + + try: + project_config = get_project_config(project_root) + except Exception: + return declarations + + surfaces = getattr(project_config, "surfaces", {}) or {} + agents_cfg = getattr(project_config, "agents", {}) or {} + + out: list[FileTypeDeclaration] = [] + for decl in declarations: + extra: list[str] = [] + surface_cfg = surfaces.get(f"{agent}.{decl.name}", {}) + if isinstance(surface_cfg, dict): + include = surface_cfg.get("include", []) + if isinstance(include, list): + extra.extend(str(p) for p in include) + if decl.name == "main": + agent_cfg = agents_cfg.get(agent, {}) + if isinstance(agent_cfg, dict): + fallbacks = agent_cfg.get("fallback_filenames", []) + if isinstance(fallbacks, list): + extra.extend(f"**/{name}" for name in fallbacks if isinstance(name, str)) + if extra: + out.append( + FileTypeDeclaration( + name=decl.name, + patterns=decl.patterns + tuple(extra), + required=decl.required, + properties=decl.properties, + ) + ) + else: + out.append(decl) + return out + + def _parse_file_types(data: dict[str, object]) -> list[FileTypeDeclaration]: """Parse file_types dict from agent config into FileTypeDeclaration list. @@ -222,6 +282,84 @@ def detect_content_format(text: str) -> list[str]: return sorted(formats) +def _compute_ancestor_chain(scan_root: Path) -> set[Path]: + """Directories from scan_root UP to the project root, inclusive. + + Used by classify_files to distinguish files loaded eagerly by the agent + (in cwd's ancestor chain) from files loaded on-demand (in descendant + subdirectories). Mirrors the agent's actual loading model. + """ + from reporails_cli.core.agent_discovery import resolve_project_root + + root = resolve_project_root(scan_root) + chain: set[Path] = set() + current = scan_root if scan_root.is_dir() else scan_root.parent + while True: + chain.add(current) + if current == root or current == current.parent: + break + current = current.parent + return chain + + +def _is_loose_leaf_pattern(pattern: str) -> bool: + """Pattern that can match a file at ANY directory depth. + + A "loose" pattern is either a bare filename (e.g. `CLAUDE.md`) or starts + with `**/` (e.g. `**/CLAUDE.md`). Such patterns are location-ambiguous — + the same file matches them whether it lives at cwd, an ancestor, or a + descendant. These need ancestor-chain disambiguation to distinguish + `main` (eager) from `nested_context` (on-demand). + + Path-prefixed patterns (e.g. `.github/copilot-instructions.md`, + `.claude/rules/**/*.md`) are NOT loose — the path prefix already + constrains where the file lives, so no further disambiguation is + needed. + """ + if pattern.startswith("**/"): + return True + # Bare leaf with no path separators + return "/" not in pattern and "**" not in pattern + + +def _location_matches_mode( + file_path: Path, + ft: FileTypeDeclaration, + ancestor_chain: set[Path], + matched_pattern: str, +) -> bool: + """Check whether file's location fits the file_type's loading model. + + Eager global file_types (scope=global, loading=session_start) — like + `main`, `override`, `agents_md`, `cross_read` — match only files in cwd's + ancestor chain WHEN the matched pattern is a loose leaf glob (`**/X.md` + or bare `X.md`). For path-prefixed patterns like `.github/X.md` the + pattern itself constrains location, so the ancestor-chain check is + skipped. + + Nested file_types (scope=nested) match only files OUTSIDE the ancestor + chain (descendants of cwd). Subtree applicability comes from file location + (no frontmatter); the agent loads these when descending into subdirs. + + Other file_types (path_scoped rules, skills, agents, configs, etc.) + match anywhere their patterns find them. + """ + scope = ft.properties.get("scope") + loading = ft.properties.get("loading") + parent = file_path.parent + in_ancestor_chain = parent in ancestor_chain + + if scope == "global" and loading == "session_start": + # Only enforce ancestor-chain for loose leaf patterns; path-prefixed + # patterns already pin the file's location via the pattern itself. + if _is_loose_leaf_pattern(matched_pattern): + return in_ancestor_chain + return True + if scope == "nested": + return not in_ancestor_chain + return True + + def classify_files( scan_root: Path, files: list[Path], @@ -229,16 +367,24 @@ def classify_files( ) -> list[ClassifiedFile]: """Classify files against type declarations. First pattern match wins. + File_type semantics drive ancestor-vs-descendant disambiguation: when + two file_types share a pattern (e.g. main and nested_context both use + **/CLAUDE.md), the file's location relative to scan_root's ancestor + chain decides which declaration wins. + For freeform files, content_format is detected from file content. Args: - scan_root: Project root for computing relative paths + scan_root: Project root / cwd-equivalent for relative paths and + ancestor-chain anchoring. files: Files to classify file_types: Type declarations from agent config Returns: List of ClassifiedFile for matched files """ + ancestor_chain = _compute_ancestor_chain(scan_root) + classified: list[ClassifiedFile] = [] for file_path in files: try: @@ -247,27 +393,31 @@ def classify_files( rel = str(file_path) for ft in file_types: - if _matches_any_pattern(rel, ft.patterns): - props = dict(ft.properties) - # Detect content_format for freeform files - fmt = props.get("format") - is_freeform = fmt == "freeform" or (isinstance(fmt, list) and "freeform" in fmt) - if is_freeform and "content_format" not in props: - try: - text = file_path.read_text(encoding="utf-8", errors="replace") - cf = detect_content_format(text) - if cf: - props["content_format"] = cf - except OSError: - pass - classified.append( - ClassifiedFile( - path=file_path, - file_type=ft.name, - properties=props, - ) + matched_pattern = _first_matching_pattern(rel, ft.patterns) + if matched_pattern is None: + continue + if not _location_matches_mode(file_path, ft, ancestor_chain, matched_pattern): + continue + props = dict(ft.properties) + # Detect content_format for freeform files + fmt = props.get("format") + is_freeform = fmt == "freeform" or (isinstance(fmt, list) and "freeform" in fmt) + if is_freeform and "content_format" not in props: + try: + text = file_path.read_text(encoding="utf-8", errors="replace") + cf = detect_content_format(text) + if cf: + props["content_format"] = cf + except OSError: + pass + classified.append( + ClassifiedFile( + path=file_path, + file_type=ft.name, + properties=props, ) - break # First match wins + ) + break # First valid match wins return classified @@ -363,13 +513,23 @@ def _matches_any_pattern(rel_path: str, patterns: tuple[str, ...]) -> bool: zero-or-more directory components by generating collapsed variants (e.g. ``a/**/b`` also tries ``a/b``). """ + return _first_matching_pattern(rel_path, patterns) is not None + + +def _first_matching_pattern(rel_path: str, patterns: tuple[str, ...]) -> str | None: + """Return the first pattern that matches `rel_path`, or None. + + Used by classify_files so downstream location-mode checks can inspect + the specific matched pattern (loose leaf vs path-prefixed) for its + location-disambiguation decision. + """ p = PurePosixPath(rel_path) for pattern in patterns: clean = pattern.removeprefix("./") for variant in _expand_doublestar(clean): if p.match(variant): - return True - return False + return pattern + return None def _expand_doublestar(pattern: str) -> list[str]: diff --git a/src/reporails_cli/core/config.py b/src/reporails_cli/core/config.py index 89824f0..04bb877 100644 --- a/src/reporails_cli/core/config.py +++ b/src/reporails_cli/core/config.py @@ -83,10 +83,53 @@ def get_global_config() -> GlobalConfig: return GlobalConfig() +def _deep_merge_config(base: dict[str, object], overlay: dict[str, object]) -> dict[str, object]: + """Deep-merge `overlay` onto `base` for project config layering. + + Object keys merge recursively. Array keys extend (overlay appended after + base). Scalar keys are replaced by overlay. Used to layer + `.ails/config.local.yml` on top of `.ails/config.yml`. + """ + if not overlay: + return base + if not base: + return dict(overlay) + merged: dict[str, object] = dict(base) + for key, ov in overlay.items(): + existing = merged.get(key) + if isinstance(existing, dict) and isinstance(ov, dict): + merged[key] = _deep_merge_config(existing, ov) + elif isinstance(existing, list) and isinstance(ov, list): + merged[key] = list(existing) + [v for v in ov if v not in existing] + else: + merged[key] = ov + return merged + + +def _load_yaml_dict(config_path: Path) -> dict[str, object] | None: + """Read a YAML file and return its top-level dict, or None on error/missing.""" + if not config_path.exists(): + return None + try: + data = load_yaml_file(config_path) + if not data: + return None + if not isinstance(data, dict): + logger.warning("Config file %s did not parse to a mapping", config_path) + return None + return data + except (yaml.YAMLError, OSError) as exc: + logger.warning("Failed to parse project config %s: %s", config_path, exc) + return None + + def get_project_config(project_root: Path) -> ProjectConfig: - """Load project configuration from .ails/config.yml. + """Load project configuration from .ails/config.yml + .ails/config.local.yml. - Returns default config if file doesn't exist or is malformed. + `.ails/config.local.yml` (gitignored) layers on top of the committed + `.ails/config.yml` for personal/CI-specific overrides. + + Returns default config if neither file exists or both are malformed. Args: project_root: Root directory of the project @@ -96,40 +139,51 @@ def get_project_config(project_root: Path) -> ProjectConfig: """ from reporails_cli.core.models import ProjectConfig - config_path = project_root / ".ails" / "config.yml" - if not config_path.exists(): - global_cfg = get_global_config() - return ProjectConfig( - default_agent=global_cfg.default_agent, - recommended=global_cfg.recommended, - ) + base = _load_yaml_dict(project_root / ".ails" / "config.yml") or {} + local = _load_yaml_dict(project_root / ".ails" / "config.local.yml") or {} + data = _deep_merge_config(base, local) - try: - data = load_yaml_file(config_path) - if not data: - msg = f"Project config is empty: {config_path}" - raise ValueError(msg) - has_recommended = "recommended" in data - config = ProjectConfig( - framework_version=data.get("framework_version"), - packages=data.get("packages", []), - disabled_rules=data.get("disabled_rules", []), - overrides=data.get("overrides", {}), - recommended=data.get("recommended", True), - exclude_dirs=data.get("exclude_dirs", []), - default_agent=data.get("default_agent", ""), - ) - # Apply global defaults where project doesn't override - global_cfg = get_global_config() - if not config.default_agent: - config.default_agent = global_cfg.default_agent - if not has_recommended: - config.recommended = global_cfg.recommended - return config - except (yaml.YAMLError, OSError, ValueError) as exc: - logger.warning("Failed to parse project config %s: %s", config_path, exc) + if not data: global_cfg = get_global_config() return ProjectConfig( default_agent=global_cfg.default_agent, recommended=global_cfg.recommended, ) + + has_recommended = "recommended" in data + + def _str_list(key: str) -> list[str]: + val = data.get(key) + return list(val) if isinstance(val, list) else [] + + def _str_dict(key: str) -> dict[str, dict[str, object]]: + val = data.get(key) + return dict(val) if isinstance(val, dict) else {} + + fw = data.get("framework_version") + fw_str = fw if isinstance(fw, str) else None + rec = data.get("recommended", True) + rec_bool = bool(rec) if not isinstance(rec, bool) else rec + da = data.get("default_agent", "") + da_str = da if isinstance(da, str) else "" + ovr = data.get("overrides", {}) + ovr_dict: dict[str, dict[str, str]] = ovr if isinstance(ovr, dict) else {} + + config = ProjectConfig( + framework_version=fw_str, + packages=_str_list("packages"), + disabled_rules=_str_list("disabled_rules"), + overrides=ovr_dict, + recommended=rec_bool, + exclude_dirs=_str_list("exclude_dirs"), + default_agent=da_str, + agents=_str_dict("agents"), + surfaces=_str_dict("surfaces"), + ) + # Apply global defaults where project doesn't override + global_cfg = get_global_config() + if not config.default_agent: + config.default_agent = global_cfg.default_agent + if not has_recommended: + config.recommended = global_cfg.recommended + return config diff --git a/src/reporails_cli/core/engine_helpers.py b/src/reporails_cli/core/engine_helpers.py index ac89fa4..26dffcf 100644 --- a/src/reporails_cli/core/engine_helpers.py +++ b/src/reporails_cli/core/engine_helpers.py @@ -15,23 +15,56 @@ Violation, ) +# Project-root marker directories that signal "this is a project". Used by +# _find_project_root for cache-key derivation and mapper coordination — NOT +# by discovery (see agent_discovery.resolve_project_root). +# +# Only IDE-workspace markers and the GitHub root marker are treated as project +# signals. Agent-specific config dirs (.cursor/, .claude/, .codex/, .gemini/, +# .agents/) are NOT project-root indicators — they can legitimately exist in +# subdirectories (per-package agent configs in monorepos), so using them as +# project-root signals would misidentify subprojects as the actual project root. +_PROJECT_MARKER_DIRS: frozenset[str] = frozenset( + { + ".vscode", + ".idea", + ".github", + } +) + def _find_project_root(target: Path) -> Path: - """Walk up from target to find project root (nearest backbone > .git > target). + """Walk up from target to find project root for cache/mapper purposes. + + Priority (closer wins; first match returned): + 1. .ails/backbone.yml — Reporails-aware project marker + 2. .git — version control root + 3. Any IDE / agent config directory: .vscode/, .idea/, .cursor/, + .claude/, .codex/, .gemini/, .github/ + + Falls back to `target` if no marker is found anywhere up the tree. Used + for cache key derivation and mapper coordination so that worktrees and + subdirectories of the same repo share one cache namespace. - Nearest backbone wins: if the target itself has a backbone.yml, that IS - the project root — don't walk past it to a parent coordination root. + Discovery does NOT consult this function — see + agent_discovery.resolve_project_root for discovery boundary semantics. """ current = target if target.is_dir() else target.parent first_git = None + first_marker = None while current != current.parent: - if (current / ".git").exists() and first_git is None: - first_git = current backbone = current / ".ails" / "backbone.yml" if backbone.exists(): return current + if (current / ".git").exists() and first_git is None: + first_git = current + if first_marker is None: + for marker in _PROJECT_MARKER_DIRS: + if (current / marker).is_dir(): + first_marker = current + break current = current.parent - return first_git or target + return first_git or first_marker or target _SEVERITY_ORDER = { diff --git a/src/reporails_cli/core/registry.py b/src/reporails_cli/core/registry.py index 4b64981..1c6a51f 100644 --- a/src/reporails_cli/core/registry.py +++ b/src/reporails_cli/core/registry.py @@ -152,14 +152,16 @@ def load_rules( # pylint: disable=too-many-locals # 4c. Handle supersession: agent rules that supersede CORE rules # inherit the CORE checks and replace the CORE rule in the set. - _apply_supersession(rules) + superseded_by = _apply_supersession(rules) # 4d. Handle inheritance: rules that inherit checks from a parent # without replacing the parent (both stay active). _apply_inheritance(rules) - # 4e. Validate depends_on: detect circular dependency chains. - _validate_depends_on(rules) + # 4e. Validate depends_on: detect circular dependency chains. Supersession + # redirects are honored — a depends_on pointing at a superseded rule is + # satisfied by its active successor. + _validate_depends_on(rules, superseded_by) # 5. Remove disabled rules (merge from project_root + scan_root configs) config = _load_project_config(project_root) @@ -201,25 +203,31 @@ def _is_other_agent_rule(rule_id: str, agent_prefix: str) -> bool: return agent_prefix not in namespace -def _apply_supersession(rules: dict[str, Rule]) -> None: +def _apply_supersession(rules: dict[str, Rule]) -> dict[str, str]: """Handle rule supersession: agent rules inherit CORE checks and replace CORE rules. When CLAUDE:S:0012 supersedes CORE:S:0038, the CORE checks are inherited (unless explicitly replaced), and the CORE rule is removed from the set. Modifies the rules dict in place. + + Returns a mapping `{superseded_id: successor_id}` so downstream validators + (e.g. `_validate_depends_on`) can resolve `depends_on: [CORE:S:xxxx]` + references that point at a superseded rule. The successor satisfies the + dependency in place of the original. """ - superseded_ids: set[str] = set() + superseded_by: dict[str, str] = {} for rule_id, rule in list(rules.items()): if not rule.supersedes or rule.supersedes not in rules: continue parent = rules[rule.supersedes] - superseded_ids.add(rule.supersedes) + superseded_by[rule.supersedes] = rule_id # Inherit parent checks that aren't replaced by the agent rule replaced_ids = {c.replaces for c in rule.checks if c.replaces} inherited = [c for c in parent.checks if c.id not in replaced_ids] rules[rule_id] = rule.model_copy(update={"checks": inherited + list(rule.checks)}) - for sid in superseded_ids: + for sid in superseded_by: del rules[sid] + return superseded_by def _apply_inheritance(rules: dict[str, Rule]) -> None: @@ -238,16 +246,24 @@ def _apply_inheritance(rules: dict[str, Rule]) -> None: rules[rule_id] = rule.model_copy(update={"checks": parent_checks + list(rule.checks)}) -def _validate_depends_on(rules: dict[str, Rule]) -> None: +def _validate_depends_on(rules: dict[str, Rule], superseded_by: dict[str, str] | None = None) -> None: """Validate depends_on references and detect circular dependency chains. Logs warnings for invalid references and circular chains rather than raising errors, so rule loading is resilient to bad metadata. + + `superseded_by` maps removed rule ids to their successor ids; a depends_on + pointing at a superseded rule is satisfied by the successor. """ + redirects = superseded_by or {} for rule_id, rule in rules.items(): for dep_id in rule.depends_on: - if dep_id not in rules: - logger.warning("Rule %s depends_on %s which is not loaded", rule_id, dep_id) + if dep_id in rules: + continue + if dep_id in redirects and redirects[dep_id] in rules: + # Dependency was superseded by an active rule — treat as satisfied. + continue + logger.warning("Rule %s depends_on %s which is not loaded", rule_id, dep_id) _detect_dependency_cycles(rules) diff --git a/src/reporails_cli/core/results.py b/src/reporails_cli/core/results.py index 82cd962..0f0cfa8 100644 --- a/src/reporails_cli/core/results.py +++ b/src/reporails_cli/core/results.py @@ -116,7 +116,7 @@ class GlobalConfig: @dataclass class ProjectConfig: # pylint: disable=too-many-instance-attributes - """Project-level configuration (.ails/config.yml).""" + """Project-level configuration (.ails/config.yml + .ails/config.local.yml).""" framework_version: str | None = None # Pin version packages: list[str] = field(default_factory=list) # Project rule packages @@ -125,6 +125,12 @@ class ProjectConfig: # pylint: disable=too-many-instance-attributes recommended: bool = True # Include recommended rules (opt out with false) exclude_dirs: list[str] = field(default_factory=list) # Directory names to exclude default_agent: str = "" # Default agent when --agent not specified (e.g., "claude") + # Per-agent overrides keyed by agent id. Currently supports `fallback_filenames` + # (additional instruction filenames Codex / others may treat as candidates). + agents: dict[str, dict[str, object]] = field(default_factory=dict) + # Per-surface include/exclude pattern adjustments. Keys are `.`. + # Each entry may have `include: [glob...]` and `exclude: [glob...]`. + surfaces: dict[str, dict[str, object]] = field(default_factory=dict) # ============================================================================= diff --git a/src/reporails_cli/formatters/text/display.py b/src/reporails_cli/formatters/text/display.py index 267ff93..b8daa7c 100644 --- a/src/reporails_cli/formatters/text/display.py +++ b/src/reporails_cli/formatters/text/display.py @@ -172,9 +172,10 @@ def _print_file_card( # ── Group rendering ─────────────────────────────────────────────────── -_GROUP_ORDER = ("main", "agent", "skill", "rule", "config", "memory") +_GROUP_ORDER = ("main", "nested", "agent", "skill", "rule", "config", "memory") _GROUP_LABELS = { "main": "Main", + "nested": "Nested", "agent": "Agents", "skill": "Skills", "rule": "Rules", diff --git a/src/reporails_cli/formatters/text/display_constants.py b/src/reporails_cli/formatters/text/display_constants.py index 25e7db4..10e45c8 100644 --- a/src/reporails_cli/formatters/text/display_constants.py +++ b/src/reporails_cli/formatters/text/display_constants.py @@ -125,11 +125,16 @@ # ── File classification lookup tables ───────────────────────────────── _CONFIG_NAMES = frozenset(("settings.json", ".mcp.json", "config.yml", "settings.local.json")) -_MAIN_NAMES = frozenset(("CLAUDE.MD", "AGENTS.MD", ".CURSORRULES", ".WINDSURFRULES", "COPILOT-INSTRUCTIONS.MD")) +# Case-sensitive — matches agent specs (CLAUDE.md, AGENTS.md uppercase per +# Codex source `DEFAULT_AGENTS_MD_FILENAME = "AGENTS.md"` and the agents.md +# spec). Wrong-case copies (e.g. `agents.md` lowercase in skill assets) are +# not real instruction files. +_MAIN_NAMES = frozenset(("CLAUDE.md", "AGENTS.md", ".cursorrules", ".windsurfrules", "copilot-instructions.md")) -_TYPE_ORDER = ["main", "rule", "skill", "agent", "config", "memory", "file"] +_TYPE_ORDER = ["main", "nested", "rule", "skill", "agent", "config", "memory", "file"] _TYPE_PLURALS = { "main": "main", + "nested": "nested", "rule": "rules", "skill": "skills", "agent": "agents", @@ -186,16 +191,31 @@ def _classify_by_name(name: str, parts: tuple[str, ...]) -> str: return "config" if "memory" in parts: return "memory" - if name.upper() in _MAIN_NAMES: - return "main" + # Case-sensitive — matches discovery (walk_glob) and agent specs. + # Wrong-case copies (e.g. `agents.md` lowercase) are not instruction files. + if name in _MAIN_NAMES: + # Files at the project root are `main`; subdirectory copies of the + # same filename are `nested` (per scope: nested in agent.schema.yml). + # `parts` for a relative path like `tests/CLAUDE.md` has length 2; + # a root-level `CLAUDE.md` has length 1. + return "main" if len(parts) <= 1 else "nested" return "" def friendly_name(filepath: str, tag: str) -> str: - """Extract a friendly display name from the tag. Falls back to filename.""" + """Extract a friendly display name from the tag. Falls back to filename. + + For `nested` files (subdirectory copies of CLAUDE.md / AGENTS.md / + GEMINI.md), return the FULL relative path so users can locate the file + — `web/CLAUDE.md` alone is ambiguous when the file actually lives at + `packages/web/CLAUDE.md`. + """ if ":" in tag: return tag.split(":", 1)[1] p = Path(filepath) + if tag == "nested" and not p.is_absolute(): + # Show the full relative path for nested files so the user can find them + return p.as_posix() if p.parent.name and p.parent.name != ".": return f"{p.parent.name}/{p.name}" return p.name diff --git a/src/reporails_cli/formatters/text/scorecard.py b/src/reporails_cli/formatters/text/scorecard.py index a24e77e..b398abe 100644 --- a/src/reporails_cli/formatters/text/scorecard.py +++ b/src/reporails_cli/formatters/text/scorecard.py @@ -71,8 +71,15 @@ def print_score_line(score: float, tw: int) -> None: # ── Surface health ──────────────────────────────────────────────────── -_SURFACE_NAMES = {"main": "Main", "rule": "Rules", "skill": "Skills", "agent": "Agents", "memory": "Memory"} -_SURFACE_ORDER = ["main", "rule", "skill", "agent", "memory"] +_SURFACE_NAMES = { + "main": "Main", + "nested": "Nested", + "rule": "Rules", + "skill": "Skills", + "agent": "Agents", + "memory": "Memory", +} +_SURFACE_ORDER = ["main", "nested", "rule", "skill", "agent", "memory"] @dataclass diff --git a/src/reporails_cli/interfaces/cli/config_command.py b/src/reporails_cli/interfaces/cli/config_command.py index beb4bbc..fdf25ae 100644 --- a/src/reporails_cli/interfaces/cli/config_command.py +++ b/src/reporails_cli/interfaces/cli/config_command.py @@ -69,6 +69,41 @@ def _save_config(path: Path, data: dict[str, Any]) -> None: cp = _project_config_path(path) cp.parent.mkdir(parents=True, exist_ok=True) cp.write_text(yaml.safe_dump(data, default_flow_style=False, sort_keys=True), encoding="utf-8") + _ensure_ails_gitignore(cp.parent) + + +# .gitignore content for .ails/ — ignores itself plus the layered local config. +# The .gitignore is per-machine scaffolding (recreated when `ails config set` runs); +# it does not need to be tracked in version control. +_AILS_GITIGNORE_LINES = (".gitignore", "config.local.yml") + + +def _ensure_ails_gitignore(ails_dir: Path) -> None: + """Ensure `.ails/.gitignore` exists and lists `.gitignore` + `config.local.yml`. + + Idempotent: if the file already lists both entries, no change. If it + exists but is missing entries, append the missing ones. + """ + import contextlib + + gitignore = ails_dir / ".gitignore" + if gitignore.exists(): + try: + existing = gitignore.read_text(encoding="utf-8") + except OSError: + return + existing_lines = {line.strip() for line in existing.splitlines()} + missing = [entry for entry in _AILS_GITIGNORE_LINES if entry not in existing_lines] + if not missing: + return + suffix = "" if existing.endswith("\n") else "\n" + with contextlib.suppress(OSError): + gitignore.write_text(existing + suffix + "\n".join(missing) + "\n", encoding="utf-8") + return + body = "# Personal/CI overrides — see docs/configuration.md\n" + body += "\n".join(_AILS_GITIGNORE_LINES) + "\n" + with contextlib.suppress(OSError): + gitignore.write_text(body, encoding="utf-8") def _save_global_config(data: dict[str, Any]) -> None: diff --git a/tests/fixtures/projects/claude_only/.ails/backbone.yml b/tests/fixtures/projects/claude_only/.ails/backbone.yml new file mode 100644 index 0000000..0a70aff --- /dev/null +++ b/tests/fixtures/projects/claude_only/.ails/backbone.yml @@ -0,0 +1 @@ +version: 3 diff --git a/tests/fixtures/projects/codex_only/.ails/backbone.yml b/tests/fixtures/projects/codex_only/.ails/backbone.yml new file mode 100644 index 0000000..0a70aff --- /dev/null +++ b/tests/fixtures/projects/codex_only/.ails/backbone.yml @@ -0,0 +1 @@ +version: 3 diff --git a/tests/fixtures/projects/config_only/.ails/backbone.yml b/tests/fixtures/projects/config_only/.ails/backbone.yml new file mode 100644 index 0000000..0a70aff --- /dev/null +++ b/tests/fixtures/projects/config_only/.ails/backbone.yml @@ -0,0 +1 @@ +version: 3 diff --git a/tests/fixtures/projects/copilot_only/.ails/backbone.yml b/tests/fixtures/projects/copilot_only/.ails/backbone.yml new file mode 100644 index 0000000..0a70aff --- /dev/null +++ b/tests/fixtures/projects/copilot_only/.ails/backbone.yml @@ -0,0 +1 @@ +version: 3 diff --git a/tests/fixtures/projects/generic_only/.ails/backbone.yml b/tests/fixtures/projects/generic_only/.ails/backbone.yml new file mode 100644 index 0000000..0a70aff --- /dev/null +++ b/tests/fixtures/projects/generic_only/.ails/backbone.yml @@ -0,0 +1 @@ +version: 3 diff --git a/tests/fixtures/projects/multi_agent/.ails/backbone.yml b/tests/fixtures/projects/multi_agent/.ails/backbone.yml new file mode 100644 index 0000000..0a70aff --- /dev/null +++ b/tests/fixtures/projects/multi_agent/.ails/backbone.yml @@ -0,0 +1 @@ +version: 3 diff --git a/tests/fixtures/projects/nested_claude/.ails/backbone.yml b/tests/fixtures/projects/nested_claude/.ails/backbone.yml new file mode 100644 index 0000000..0a70aff --- /dev/null +++ b/tests/fixtures/projects/nested_claude/.ails/backbone.yml @@ -0,0 +1 @@ +version: 3 diff --git a/tests/unit/test_scan_scope.py b/tests/unit/test_scan_scope.py index 49ed088..fd24f86 100644 --- a/tests/unit/test_scan_scope.py +++ b/tests/unit/test_scan_scope.py @@ -1,7 +1,8 @@ """Tests for scan scope containment. -Verifies that `ails check /foo` only discovers and validates files inside /foo, -even when /foo is nested inside a larger project with .git or backbone markers. +Verifies that `ails check /foo` discovers files matching the agent's actual +loading model: ancestor walk from /foo to project root for eager files +(main, override), descendant walk from /foo for nested per-subdirectory files. External instruction surface files (~/..., absolute paths from config patterns) are intentionally included by agent discovery — they are part of the instruction @@ -18,6 +19,7 @@ get_all_instruction_files, get_all_scannable_files, ) +from reporails_cli.core.classification import classify_files, load_file_types from reporails_cli.core.engine_helpers import _find_project_root @@ -88,6 +90,12 @@ def setup_method(self) -> None: clear_agent_cache() def test_child_does_not_see_parent_files(self, tmp_path: Path) -> None: + """Running from child does NOT surface parent files — cwd is project root. + + Per the cwd-as-project-root semantic, `ails check child/` treats `child/` + as the project root. Parent files outside that subtree are out of scope, + regardless of whether a `.git` exists higher up. + """ child = _make_nested_project(tmp_path) agents = detect_agents(child) @@ -97,63 +105,72 @@ def test_child_does_not_see_parent_files(self, tmp_path: Path) -> None: all_files.extend(a.rule_files) all_files.extend(a.config_files) - # External instruction surface files (~/...) are by-design outside the repo local_files = [f for f in all_files if not _is_external(f, child)] for f in local_files: assert str(f).startswith(str(child)), f"File outside child scope: {f}" def test_parent_sees_hierarchical_files(self, tmp_path: Path) -> None: - """Config-driven discovery with **/CLAUDE.md finds hierarchical instruction files.""" + """Running from project root: root CLAUDE.md is main, descendant is nested.""" _make_nested_project(tmp_path) agents = detect_agents(tmp_path) all_files: list[Path] = [] for a in agents: all_files.extend(a.instruction_files) + all_files.extend(a.rule_files) paths = {f.as_posix() for f in all_files} - assert any(p.endswith("/CLAUDE.md") and "child" not in p for p in paths), "Should find root CLAUDE.md" - assert any("child/CLAUDE.md" in p for p in paths), "Should find child CLAUDE.md via ** pattern" + assert any(p.endswith("/CLAUDE.md") and "/child/" not in p for p in paths), "Should find root CLAUDE.md" + assert any("/child/CLAUDE.md" in p for p in paths), "Should find child CLAUDE.md via descendant walk" class TestInstructionFilesScope: - """get_all_instruction_files must only return files under the given root.""" + """get_all_instruction_files returns files inside cwd's subtree only (cwd = project root).""" def setup_method(self) -> None: clear_agent_cache() - def test_child_scope(self, tmp_path: Path) -> None: + def test_child_scope_bounded_by_target(self, tmp_path: Path) -> None: + """From child, files outside cwd's subtree are NOT in scope.""" child = _make_nested_project(tmp_path) files = get_all_instruction_files(child) - for f in files: - assert str(f).startswith(str(child)), f"File outside child scope: {f}" + # All non-external files must be under child (cwd is the project root). + local_files = [f for f in files if not _is_external(f, child)] + for f in local_files: + assert str(f).startswith(str(child)), f"File outside child subtree: {f}" - def test_child_finds_its_own_files(self, tmp_path: Path) -> None: + def test_child_finds_own_files_only(self, tmp_path: Path) -> None: + """From child, only child's own files surface — parent files are out of scope.""" child = _make_nested_project(tmp_path) files = get_all_instruction_files(child) - names = {f.name for f in files} + paths = {f.as_posix() for f in files} - assert "CLAUDE.md" in names - assert "child.md" in names + assert (tmp_path / "CLAUDE.md").as_posix() not in paths, ( + "Parent CLAUDE.md must NOT surface — cwd is project root" + ) + assert any("/child/CLAUDE.md" in p for p in paths), "Child CLAUDE.md must surface" + # child.md is in child/.claude/rules/ — path_scoped descendant, surfaces + assert any(p.endswith("/child.md") for p in paths) class TestScannableFilesScope: - """get_all_scannable_files must only return files under the given root.""" + """get_all_scannable_files returns ancestor + descendant files of cwd, bounded by project root.""" def setup_method(self) -> None: clear_agent_cache() - def test_child_scope(self, tmp_path: Path) -> None: + def test_child_scope_bounded_by_project_root(self, tmp_path: Path) -> None: child = _make_nested_project(tmp_path) files = get_all_scannable_files(child) # External instruction surface files (~/...) are by-design outside the repo local_files = [f for f in files if not _is_external(f, child)] for f in local_files: - assert str(f).startswith(str(child)), f"File outside child scope: {f}" + assert str(f).startswith(str(tmp_path)), f"File outside project root: {f}" def test_child_does_not_include_parent_rules(self, tmp_path: Path) -> None: + """parent.md lives in tmp_path/.claude/rules/ — path_scoped descendant from child, not in scope.""" child = _make_nested_project(tmp_path) files = get_all_scannable_files(child) names = {f.name for f in files} @@ -162,23 +179,26 @@ def test_child_does_not_include_parent_rules(self, tmp_path: Path) -> None: class TestProjectRootVsScanRoot: - """project_root can differ from scan_root, but scan must stay in scan_root.""" + """Ancestor walk is bounded by project root; descendant walk anchors at scan_root.""" def setup_method(self) -> None: clear_agent_cache() def test_project_root_above_scan_root(self, tmp_path: Path) -> None: - """When project_root is above scan_root, file discovery still scoped to scan_root.""" + """Backbone project root above child: ancestor walk reaches it, files outside it are excluded.""" child = _make_backbone_project(tmp_path) project_root = _find_project_root(child) assert project_root == tmp_path, "project_root should walk up to backbone" - # But file discovery must stay within child (external surface files excluded) + # Ancestor walk reaches tmp_path/CLAUDE.md (the parent main file). External + # surface files are by-design outside; everything else stays within tmp_path. files = get_all_scannable_files(child) local_files = [f for f in files if not _is_external(f, child)] for f in local_files: - assert str(f).startswith(str(child)), f"File outside scan scope: {f}" + assert str(f).startswith(str(tmp_path)), f"File outside project root: {f}" + names = {f.name for f in files} + assert "CLAUDE.md" in names def test_project_root_equals_scan_root_when_no_parent(self, tmp_path: Path) -> None: """Standalone project: project_root == scan_root.""" @@ -217,15 +237,256 @@ def test_parent_agents_passed_to_child_scannable(self, tmp_path: Path) -> None: assert len(parent_files) > 0, "Confirms parent agents leak (by design of agents param)" def test_engine_uses_scan_root_agents(self, tmp_path: Path) -> None: - """The engine must detect agents at scan_root, not project_root.""" + """The engine detects agents at scan_root; ancestor walk surfaces parent files within project root.""" child = _make_backbone_project(tmp_path) - # This is what the engine should do (and now does) scan_root = child agents = detect_agents(scan_root) files = get_all_scannable_files(scan_root, agents=agents) - # External instruction surface files (~/...) are by-design outside the repo + # Files must be within the project root (tmp_path = backbone root). + # External instruction surface files (~/...) are by-design outside the repo. local_files = [f for f in files if not _is_external(f, child)] for f in local_files: - assert str(f).startswith(str(child)), f"File outside scan scope: {f}" + assert str(f).startswith(str(tmp_path)), f"File outside project root: {f}" + + +def _classify_for_agent(scan_root: Path, files: list[Path], agent: str) -> dict[str, str]: + """Classify files via the agent's config and return {filename → file_type} for inspection.""" + file_types = load_file_types(agent) + classified = classify_files(scan_root, files, file_types) + out: dict[str, str] = {} + for cf in classified: + out[cf.path.as_posix()] = cf.file_type + return out + + +class TestAncestorWalkAndClassification: + """Discovery and classification mirror agent loading model.""" + + def setup_method(self) -> None: + clear_agent_cache() + + def test_target_files_only_no_ancestor_walk(self, tmp_path: Path) -> None: + """Cwd is project root: parent CLAUDE.md files are NOT surfaced.""" + (tmp_path / ".git").mkdir() + (tmp_path / "CLAUDE.md").write_text("# root") + a = tmp_path / "a" + a.mkdir() + (a / "CLAUDE.md").write_text("# a") + b = a / "b" + b.mkdir() + + # Running from `b` — both ancestors are out of scope + files = get_all_instruction_files(b) + names = {f.as_posix() for f in files} + assert (tmp_path / "CLAUDE.md").as_posix() not in names + assert (a / "CLAUDE.md").as_posix() not in names + + def test_files_above_target_are_out_of_scope(self, tmp_path: Path) -> None: + """A file above the target is excluded even if a `.git` lives between them.""" + (tmp_path / "CLAUDE.md").write_text("# outside") + repo = tmp_path / "repo" + repo.mkdir() + (repo / ".git").mkdir() + (repo / "CLAUDE.md").write_text("# repo") + a = repo / "a" + a.mkdir() + (a / "CLAUDE.md").write_text("# a") + + # Running from `a` — neither tmp_path/CLAUDE.md nor repo/CLAUDE.md surfaces + files = get_all_instruction_files(a) + names = {f.as_posix() for f in files} + assert (a / "CLAUDE.md").as_posix() in names + assert (repo / "CLAUDE.md").as_posix() not in names + assert (tmp_path / "CLAUDE.md").as_posix() not in names + + def test_nested_classified_not_as_main(self, tmp_path: Path) -> None: + """A descendant CLAUDE.md must classify as child_instruction, not main. + + Regression test for the activepieces bug: nested per-package CLAUDE.md + was being tagged `main`, causing the size rule to fire against it. + """ + (tmp_path / ".git").mkdir() + (tmp_path / "CLAUDE.md").write_text("# root") + sub = tmp_path / "sub" + sub.mkdir() + (sub / "CLAUDE.md").write_text("# sub") + + files = get_all_instruction_files(tmp_path) + types = _classify_for_agent(tmp_path, files, "claude") + + assert types[(tmp_path / "CLAUDE.md").as_posix()] == "main" + assert types[(sub / "CLAUDE.md").as_posix()] == "child_instruction" + + def test_sibling_tree_excluded_from_subdir_run(self, tmp_path: Path) -> None: + """Running from a subdirectory: parent + sibling files are all out of scope.""" + (tmp_path / ".git").mkdir() + (tmp_path / "CLAUDE.md").write_text("# root") + a = tmp_path / "a" + a.mkdir() + (a / "CLAUDE.md").write_text("# a") + sibling = tmp_path / "sibling" + sibling.mkdir() + (sibling / "CLAUDE.md").write_text("# sibling") + b = a / "b" + b.mkdir() + + # Running from `b` — only `b` and below are in scope + files = get_all_instruction_files(b) + names = {f.as_posix() for f in files} + assert (tmp_path / "CLAUDE.md").as_posix() not in names + assert (a / "CLAUDE.md").as_posix() not in names, "Parent CLAUDE.md must NOT surface — cwd is project root" + assert (sibling / "CLAUDE.md").as_posix() not in names + + def test_local_override_at_target(self, tmp_path: Path) -> None: + """CLAUDE.local.md surfaces at cwd; ancestors are out of scope.""" + (tmp_path / ".git").mkdir() + (tmp_path / "CLAUDE.md").write_text("# root") + (tmp_path / "CLAUDE.local.md").write_text("# root local") + a = tmp_path / "a" + a.mkdir() + (a / "CLAUDE.md").write_text("# a") + (a / "CLAUDE.local.md").write_text("# a local") + + # Running from `a` — only `a/CLAUDE.local.md` surfaces; tmp_path's is out of scope + files = get_all_instruction_files(a) + names = {f.as_posix() for f in files} + assert (a / "CLAUDE.local.md").as_posix() in names + assert (tmp_path / "CLAUDE.local.md").as_posix() not in names + + def test_activepieces_shape(self, tmp_path: Path) -> None: + """Monorepo regression: root AGENTS.md is main; per-package files are nested. + + Mirrors specs/tmp/activepieces/ shape — the user-reported bug: + running `ails check` at the root tagged every per-package CLAUDE.md + as main, triggering false-positive size violations. + """ + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("# root agents") + # Activepieces has a CLAUDE.md → AGENTS.md symlink at root + (tmp_path / "CLAUDE.md").symlink_to(tmp_path / "AGENTS.md") + packages = tmp_path / "packages" + packages.mkdir() + for sub, leaf in [ + ("shared", "CLAUDE.md"), + ("server", "AGENTS.md"), + ("pieces", "CLAUDE.md"), + ("web", "AGENTS.md"), + ]: + d = packages / sub + d.mkdir() + (d / leaf).write_text(f"# {sub}") + engine = packages / "server" / "engine" + engine.mkdir() + (engine / "CLAUDE.md").write_text("# engine") + + files = get_all_instruction_files(tmp_path) + + # Codex sees AGENTS.md files: root as main, packages/server and packages/web as nested + codex_types = _classify_for_agent(tmp_path, files, "codex") + assert codex_types.get((tmp_path / "AGENTS.md").as_posix()) == "main" + assert codex_types.get((packages / "server" / "AGENTS.md").as_posix()) == "nested_context" + assert codex_types.get((packages / "web" / "AGENTS.md").as_posix()) == "nested_context" + + # Claude sees CLAUDE.md files: nested ones are child_instruction, not main + claude_types = _classify_for_agent(tmp_path, files, "claude") + nested_claude_paths = [ + packages / "shared" / "CLAUDE.md", + packages / "pieces" / "CLAUDE.md", + packages / "server" / "engine" / "CLAUDE.md", + ] + for p in nested_claude_paths: + assert claude_types.get(p.as_posix()) == "child_instruction", f"{p} must NOT be classified as main" + + def test_copilot_root_only_pattern(self, tmp_path: Path) -> None: + """`.github/copilot-instructions.md` is project-root-only — resolved from cwd.""" + (tmp_path / ".git").mkdir() + gh = tmp_path / ".github" + gh.mkdir() + (gh / "copilot-instructions.md").write_text("# repo") + # Stray copilot-instructions.md in a subdirectory of cwd + sub_gh = tmp_path / "sub" / ".github" + sub_gh.mkdir(parents=True) + (sub_gh / "copilot-instructions.md").write_text("# stray") + + # Running from tmp_path (the project root) — only tmp_path/.github/ surfaces + files = get_all_scannable_files(tmp_path) + names = {f.as_posix() for f in files} + assert (gh / "copilot-instructions.md").as_posix() in names + assert (sub_gh / "copilot-instructions.md").as_posix() not in names + + +class TestProjectConfigSurfaceAdjustments: + """`.ails/config.yml` surface include/exclude + Codex fallback filenames.""" + + def setup_method(self) -> None: + clear_agent_cache() + + def test_codex_fallback_filenames_surface(self, tmp_path: Path) -> None: + """`agents.codex.fallback_filenames` adds candidate main files for codex. + + `.codex/config.toml` is required to make codex unambiguously detected — + without it, the codex/generic disambiguation drops codex (per the + `_disambiguate_codex_generic` heuristic) and the fallback patterns + attached to the codex agent never fire. The fixture mirrors a real + Codex-using project. + """ + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("# main") + (tmp_path / "TEAM_GUIDE.md").write_text("# fallback") + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "config.toml").write_text("# codex marker\n") + ails = tmp_path / ".ails" + ails.mkdir() + (ails / "config.yml").write_text( + 'schema_version: "0.1.0"\nagents:\n codex:\n fallback_filenames: ["TEAM_GUIDE.md"]\n' + ) + + file_types = load_file_types("codex", project_root=tmp_path) + files = get_all_instruction_files(tmp_path) + types = {cf.path.name: cf.file_type for cf in classify_files(tmp_path, files, file_types)} + assert types.get("AGENTS.md") == "main" + assert types.get("TEAM_GUIDE.md") == "main", "fallback filename must classify as main" + + def test_surface_exclude_drops_files(self, tmp_path: Path) -> None: + """`surfaces...exclude` filters out matching files.""" + (tmp_path / ".git").mkdir() + (tmp_path / "CLAUDE.md").write_text("# main") + legacy = tmp_path / "legacy" + legacy.mkdir() + (legacy / "CLAUDE.md").write_text("# legacy") + ails = tmp_path / ".ails" + ails.mkdir() + (ails / "config.yml").write_text( + 'schema_version: "0.1.0"\nsurfaces:\n claude.child_instruction:\n exclude: ["legacy/**"]\n' + ) + + files = get_all_instruction_files(tmp_path) + names = {f.as_posix() for f in files} + assert (tmp_path / "CLAUDE.md").as_posix() in names + assert (legacy / "CLAUDE.md").as_posix() not in names, "exclude pattern must drop matching files" + + def test_config_local_layered_overrides_committed(self, tmp_path: Path) -> None: + """`.ails/config.local.yml` layers on top of `.ails/config.yml`.""" + (tmp_path / ".git").mkdir() + (tmp_path / "CLAUDE.md").write_text("# main") + keep = tmp_path / "keep" + keep.mkdir() + (keep / "CLAUDE.md").write_text("# keep") + drop = tmp_path / "drop" + drop.mkdir() + (drop / "CLAUDE.md").write_text("# drop") + + ails = tmp_path / ".ails" + ails.mkdir() + # Committed config: no excludes + (ails / "config.yml").write_text('schema_version: "0.1.0"\n') + # Local override: exclude drop/ + (ails / "config.local.yml").write_text('surfaces:\n claude.child_instruction:\n exclude: ["drop/**"]\n') + + files = get_all_instruction_files(tmp_path) + names = {f.as_posix() for f in files} + assert (keep / "CLAUDE.md").as_posix() in names + assert (drop / "CLAUDE.md").as_posix() not in names diff --git a/uv.lock b/uv.lock index 12b769b..5153395 100644 --- a/uv.lock +++ b/uv.lock @@ -1934,7 +1934,7 @@ wheels = [ [[package]] name = "reporails-cli" -version = "0.5.6" +version = "0.5.7" source = { editable = "." } dependencies = [ { name = "httpx" },