diff --git a/pyproject.toml b/pyproject.toml index 47d87be..37dd033 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,8 @@ include = [ { path = "LICENSE", format = "wheel" }, { path = "roar/bin/*", format = "sdist" }, { path = "roar/bin/*", format = "wheel" }, + { path = "roar/cli/templates/**/*", format = "sdist" }, + { path = "roar/cli/templates/**/*", format = "wheel" }, { path = "roar_inject.pth", format = "wheel" }, ] diff --git a/roar/cli/commands/init.py b/roar/cli/commands/init.py index 2a2c3fd..8d71377 100644 --- a/roar/cli/commands/init.py +++ b/roar/cli/commands/init.py @@ -203,7 +203,7 @@ def init_project(cwd: Path) -> Path: return roar_dir -@click.command("init") +@click.group("init", invoke_without_command=True) @click.option( "--yes", "-y", @@ -225,8 +225,8 @@ def init_project(cwd: Path) -> Path: type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), help="Initialize roar in the given directory instead of the current directory.", ) -@click.pass_obj -def init(ctx: RoarContext, yes: bool, no: bool, init_path: Path | None) -> None: +@click.pass_context +def init(click_ctx: click.Context, yes: bool, no: bool, init_path: Path | None) -> None: """Initialize roar in current directory. Creates a .roar directory for storing tracking data, a config.toml @@ -242,7 +242,13 @@ def init(ctx: RoarContext, yes: bool, no: bool, init_path: Path | None) -> None: roar init -n # Initialize without modifying gitignore roar init --path /some/dir # Initialize in a specific directory + + roar init agents # Install agent-facing guidance (skill + AGENTS.md) """ + if click_ctx.invoked_subcommand is not None: + return + + ctx: RoarContext = click_ctx.obj cwd = init_path if init_path is not None else ctx.cwd target_repo_root = RoarContext._get_repo_root(cwd) @@ -305,3 +311,10 @@ def init(ctx: RoarContext, yes: bool, no: bool, init_path: Path | None) -> None: click.echo("Skipped .gitignore update.") click.echo("Done.") + + +# Register subcommands. Imported here (not at top of file) so the heavier +# commands above keep their lazy import behavior unaffected. +from .init_agents import init_agents as _init_agents # noqa: E402 + +init.add_command(_init_agents) diff --git a/roar/cli/commands/init_agents.py b/roar/cli/commands/init_agents.py new file mode 100644 index 0000000..aab8906 --- /dev/null +++ b/roar/cli/commands/init_agents.py @@ -0,0 +1,292 @@ +"""Implementation of ``roar init agents``. + +Sets up agent-facing guidance for the current project and machine: + +* writes ``~/.claude/skills/roar/SKILL.md`` (Claude-specific skill) +* appends a roar section to ``./AGENTS.md`` (cross-agent guidance), bracketed + by managed markers so the section can be re-rendered idempotently without + disturbing surrounding content. +""" + +from __future__ import annotations + +import hashlib +from dataclasses import dataclass +from pathlib import Path + +import click + +from roar.cli import __version__ + +# ---- template loading ---- + +_TEMPLATE_DIR = Path(__file__).parent.parent / "templates" / "agents" +_SKILL_TEMPLATE_PATH = _TEMPLATE_DIR / "SKILL.md.tmpl" +_AGENTS_SECTION_TEMPLATE_PATH = _TEMPLATE_DIR / "agents_section.md.tmpl" + +# Markers used to fence the managed AGENTS.md region. +AGENTS_BEGIN_MARKER = ( + "" +) +AGENTS_END_MARKER = "" + + +def render_skill(version: str = __version__) -> str: + """Render the Claude skill markdown with the given version stamped in.""" + return _SKILL_TEMPLATE_PATH.read_text().format(version=version) + + +def render_agents_section() -> str: + """Return the AGENTS.md section body (without surrounding markers).""" + return _AGENTS_SECTION_TEMPLATE_PATH.read_text() + + +def render_agents_block(version: str = __version__) -> str: + """Return the full marker-fenced AGENTS.md block.""" + body = render_agents_section() + return ( + f"{AGENTS_BEGIN_MARKER}\n" + f"\n" + f"{body.rstrip()}\n" + f"{AGENTS_END_MARKER}\n" + ) + + +# ---- AGENTS.md edit logic ---- + + +@dataclass(frozen=True) +class AgentsUpdate: + new_content: str + action: str # "create", "append", "update", "noop" + + +def update_agents_file(existing: str | None, version: str = __version__) -> AgentsUpdate: + """Compute the new AGENTS.md content given the existing content (or None). + + * If the file does not exist, create one with a brief header and the block. + * If the file exists but has no roar block, append the block. + * If a block exists and matches the new content, no-op. + * If a block exists and differs, replace just the block. + """ + block = render_agents_block(version) + + if existing is None: + new = ( + "# AGENTS.md\n" + "\n" + "Guidance for AI coding agents working in this repo.\n" + "See https://agents.md for the convention.\n" + "\n" + f"{block}" + ) + return AgentsUpdate(new_content=new, action="create") + + begin_idx = existing.find(AGENTS_BEGIN_MARKER) + end_idx = existing.find(AGENTS_END_MARKER) + if begin_idx == -1 or end_idx == -1 or end_idx < begin_idx: + sep = "" if existing.endswith("\n") else "\n" + new = f"{existing}{sep}\n{block}" + return AgentsUpdate(new_content=new, action="append") + + end_after = end_idx + len(AGENTS_END_MARKER) + # Consume a trailing newline so we don't accumulate blank lines on each run. + if end_after < len(existing) and existing[end_after] == "\n": + end_after += 1 + new = existing[:begin_idx] + block + existing[end_after:] + if new == existing: + return AgentsUpdate(new_content=existing, action="noop") + return AgentsUpdate(new_content=new, action="update") + + +# ---- skill file edit logic ---- + + +def _has_managed_marker(text: str) -> bool: + return " + +# roar + +`roar` is a CLI for ML/data pipeline provenance. It wraps normal commands with `roar run` to automatically capture inputs, outputs, command args, timing, and git context. Lineage is stored locally in `.roar/roar.db` and can be published to GLaaS. + +## Core principle + +**You don't change your code to use roar — you change how you launch it.** +Replace `python train.py` with `roar run python train.py`. Everything else (file reads, file writes, package versions, git state) is captured automatically. + +## When to use roar + +- The user wants reproducibility, lineage, or "what produced this artifact" +- They're running ML training, data processing, or any pipeline where inputs/outputs matter +- They've asked you to investigate provenance, trace artifacts back to producers, or compare runs + +## Essential commands + +| Command | What it does | +|---|---| +| `roar init` | Set up `.roar/` in the current project (one-time) | +| `roar run ` | Execute `` with full provenance capture | +| `roar build ` | Track a build/setup step (separate from main pipeline) | +| `roar status` | Show active session summary | +| `roar log` | List jobs in the active session | +| `roar show [REF]` | Inspect a session, job, or artifact. `REF` can be `@1` (step), 8-char hex (job UID), or longer hex (artifact hash) | +| `roar show --all [REF]` | Same, but disable truncation of packages, env vars, and job lists | +| `roar dag` | Visualize the local execution DAG | +| `roar lineage ` | Inspect lineage for a tracked artifact | +| `roar inputs ` | Show root input artifacts for a target | + +## Common workflows + +### Investigating a session +```bash +roar status # what's the active session? +roar log # what jobs ran? +roar show @1 # details of step 1 +roar show --all @1 # full details (no truncation) +``` + +### Tracing an artifact +```bash +roar show # who made it, what consumed it +roar lineage # full upstream/downstream graph +roar inputs # root inputs +``` + +### Running with provenance +```bash +roar run python train.py --epochs 10 +roar run bash scripts/preprocess.sh +roar build pip install -r requirements.txt # build step, not main pipeline +``` + +## Detection: is a script running under roar? + +If a Python script needs to *require* it's being run under `roar run`, add at the top: + +```python +from roar import require # exits with helpful message if not under `roar run` +``` + +The import itself walks the process tree — no env var spoofing. Set `ROAR_GUARD=0` to bypass in tests. + +## Tracer backends + +roar captures I/O via one of three tracers (auto-selected, configurable via `roar tracer`): +- **eBPF** — kernel-level, lowest overhead, requires Linux + privileges +- **preload** — `LD_PRELOAD` / `DYLD_INSERT_LIBRARIES`, works on Linux + macOS, no privileges +- **ptrace** — fallback, works anywhere, slower + +Workloads should NOT need to know which backend is active. + +## Gotchas + +- **Ray workers**: `ROAR_WRAP=1` is set on the driver but NOT on Ray workers. `ROAR_SESSION_ID` and `ROAR_JOB_ID` are propagated. Use those if a worker needs to detect provenance context. +- **Osmo workers**: no env var propagation today. +- **Don't make user code import roar** for tracking purposes — capture is transparent. The only exception is `from roar import require` for the abort-if-not-tracked guard. +- **Don't commit `.roar/`** — it's local lineage state. `roar init` offers to add it to `.gitignore`. + +## Reading `roar show` output + +By default, long lists are truncated: +- Packages: first 15 shown, then `... and N more` +- Env var values: cut at 60 chars +- Producer/consumer jobs: first 5 shown +- Commands: cut at ~50 chars + +Use `--all` when investigating to see everything. + +## When in doubt + +Run `roar --help` or `roar --help`. The CLI is the source of truth — this skill captures only the commonly needed patterns. diff --git a/roar/cli/templates/agents/agents_section.md.tmpl b/roar/cli/templates/agents/agents_section.md.tmpl new file mode 100644 index 0000000..c909462 --- /dev/null +++ b/roar/cli/templates/agents/agents_section.md.tmpl @@ -0,0 +1,9 @@ +## roar (provenance tracker) + +This project uses [roar](https://github.com/treqs/roar) to capture lineage for ML/data pipeline runs. + +- **Run scripts with provenance:** prefer `roar run ` over bare `` (e.g. `roar run python train.py`). +- **Don't modify code to use roar.** Capture is transparent. The exception: `from roar import require` at the top of a script aborts if it isn't being run under `roar run`. +- **Inspect lineage:** `roar show `, `roar log`, `roar dag`. Add `--all` to `roar show` to disable truncation. +- **Don't commit `.roar/`** — it's local lineage state. +- **`roar --help` is the source of truth** for current command behavior. diff --git a/tests/unit/test_init_agents.py b/tests/unit/test_init_agents.py new file mode 100644 index 0000000..0c67c8d --- /dev/null +++ b/tests/unit/test_init_agents.py @@ -0,0 +1,235 @@ +"""Tests for ``roar init agents``.""" + +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from click.testing import CliRunner + +# The require guard is imported transitively by `roar.cli`; bypass it in tests. +os.environ["ROAR_GUARD"] = "0" + +from roar.cli import cli +from roar.cli.commands.init_agents import ( + AGENTS_BEGIN_MARKER, + AGENTS_END_MARKER, + render_agents_block, + render_skill, + update_agents_file, + update_skill_file, +) + +# --------------------------------------------------------------------------- +# Pure-function tests (no filesystem) +# --------------------------------------------------------------------------- + + +class TestRenderSkill: + def test_includes_version_marker(self): + out = render_skill("9.9.9") + assert "" in out + + def test_has_frontmatter(self): + out = render_skill() + assert out.startswith("---\n") + assert "name: roar" in out + + +class TestUpdateSkillFile: + def test_create_when_missing(self): + result = update_skill_file(None, version="1.0.0") + assert result.action == "create" + assert "" in result.new_content + + def test_noop_when_identical(self): + rendered = render_skill("1.0.0") + result = update_skill_file(rendered, version="1.0.0") + assert result.action == "noop" + + def test_modified_when_no_marker_and_no_force(self): + result = update_skill_file("hand-written content", version="1.0.0") + assert result.action == "modified" + assert result.new_content == "hand-written content" + + def test_force_overwrites_modified(self): + result = update_skill_file("hand-written content", version="1.0.0", force=True) + assert result.action == "update" + assert "" in result.new_content + + def test_update_when_managed_but_old_version(self): + old = render_skill("0.9.0") + result = update_skill_file(old, version="1.0.0") + assert result.action == "update" + assert "" in result.new_content + + +class TestUpdateAgentsFile: + def test_create_when_missing(self): + result = update_agents_file(None) + assert result.action == "create" + assert AGENTS_BEGIN_MARKER in result.new_content + assert AGENTS_END_MARKER in result.new_content + assert result.new_content.startswith("# AGENTS.md\n") + + def test_append_when_no_block(self): + existing = "# AGENTS.md\n\nExisting project guidance.\n" + result = update_agents_file(existing) + assert result.action == "append" + assert result.new_content.startswith(existing) + assert AGENTS_BEGIN_MARKER in result.new_content + + def test_noop_when_block_matches(self): + existing = f"# AGENTS.md\n\nFoo.\n\n{render_agents_block()}" + result = update_agents_file(existing) + assert result.action == "noop" + assert result.new_content == existing + + def test_update_replaces_only_the_block(self): + old_block = render_agents_block("0.0.0") + existing = f"# AGENTS.md\n\nProject conventions.\n\n{old_block}\nMore stuff after.\n" + result = update_agents_file(existing) + assert result.action == "update" + # Surrounding content is preserved. + assert "Project conventions." in result.new_content + assert "More stuff after." in result.new_content + # Block contents are refreshed. + assert "" not in result.new_content + + def test_idempotent_re_runs(self): + """Calling update twice in a row should not accumulate blank lines.""" + first = update_agents_file(None).new_content + second = update_agents_file(first).new_content + third = update_agents_file(second).new_content + assert second == third + + def test_handles_existing_without_trailing_newline(self): + existing = "# AGENTS.md\n\nNo trailing newline" + result = update_agents_file(existing) + assert result.action == "append" + # Should have a clean separator between existing content and the block. + assert "newline\n\n