From 1d15144341b116f5d05116d24e14945027b3e1dd Mon Sep 17 00:00:00 2001 From: Cody Hart Date: Thu, 29 Jan 2026 11:47:20 -0500 Subject: [PATCH] Migrate commands/ to skills/ format for reliable CLI resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Code 2.1.x changed how commands/ are discovered, causing multi-command plugins to misroute (e.g. /look:again running tidy). The skills/ directory format uses the newer, supported resolution path. See glittercowboy/get-shit-done#218 for background. - Move commands/again.md → skills/again/SKILL.md - Move commands/tidy.md → skills/tidy/SKILL.md - Add disable-model-invocation: true to both skills - Rename frontmatter field tools → allowed-tools (skills format) - Remove commands array from plugin.json and marketplace.json - Update test suite, build script, evals, and documentation - Add integration test script - Bump version to 0.4.0 No changes to user-facing command names: /look:again and /look:tidy work as before. --- .claude-plugin/marketplace.json | 5 +- CHANGELOG.md | 10 + CONTRIBUTING.md | 48 ++- Makefile | 3 + README.md | 19 +- evals/promptfooconfig.yaml | 38 +-- scripts/integration-test.sh | 321 ++++++++++++++++++ scripts/package.sh | 1 - scripts/test.sh | 30 +- src/dot-claude-plugin/plugin.json | 5 +- .../again.md => skills/again/SKILL.md} | 1 + .../tidy.md => skills/tidy/SKILL.md} | 3 +- 12 files changed, 417 insertions(+), 67 deletions(-) create mode 100755 scripts/integration-test.sh rename src/{commands/again.md => skills/again/SKILL.md} (99%) rename src/{commands/tidy.md => skills/tidy/SKILL.md} (94%) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 029a9c4..343df47 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,14 +11,13 @@ "name": "look", "source": "./src", "description": "Sequential code review with fresh agent contexts. Runs multiple independent review passes to catch more issues.", - "version": "0.3.0", + "version": "0.4.0", "author": { "name": "HartBrook" }, "repository": "https://github.com/HartBrook/lookagain", "license": "MIT", "keywords": ["code-review", "quality", "iterative", "multi-pass"], - "commands": ["./commands/again.md", "./commands/tidy.md"], "agents": ["./agents/lookagain-reviewer.md"], - "skills": ["./skills/lookagain-output-format"], + "skills": ["./skills/again", "./skills/tidy", "./skills/lookagain-output-format"], "strict": false } ] diff --git a/CHANGELOG.md b/CHANGELOG.md index a83a8c6..11f57aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.0] - 2026-01-29 + +### Changed + +- Migrated commands from `commands/` directory to `skills/` directory format (`SKILL.md` files) to fix CLI command resolution issues on newer Claude Code versions. See [GSD plugin issue #218](https://github.com/glittercowboy/get-shit-done/issues/218) for background. +- Added `disable-model-invocation: true` to `/look:again` and `/look:tidy` to prevent unintended auto-invocation. +- Renamed frontmatter field `tools` to `allowed-tools` per skills format. +- Updated test suite, build script, evals, and documentation to reflect new file layout. +- No changes to user-facing command names: `/look:again` and `/look:tidy` work as before. + ## [0.3.0] - 2026-01-28 ### Changed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7cb8827..e819e1c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,18 +17,18 @@ lookagain/ ├── .claude-plugin/ │ └── marketplace.json # Marketplace manifest ├── src/ -│ ├── commands/ # Plugin commands -│ │ ├── again.md # Main orchestrator -│ │ └── tidy.md # Tidy old review runs │ ├── agents/ # Subagent definitions │ │ └── lookagain-reviewer.md -│ ├── skills/ # Output format specs -│ │ └── lookagain-output-format/ +│ ├── skills/ # Plugin skills (commands + reference) +│ │ ├── again/ # Main orchestrator (/look:again) +│ │ ├── tidy/ # Tidy old review runs (/look:tidy) +│ │ └── lookagain-output-format/ # Output format spec │ ├── dot-claude-plugin/ # Plugin manifest (becomes .claude-plugin/) │ └── dot-claude/ # Claude settings (becomes .claude/) ├── scripts/ │ ├── package.sh # Build script -│ └── test.sh # Plugin validation tests +│ ├── test.sh # Plugin validation tests +│ └── integration-test.sh # End-to-end integration test ├── evals/ # Behavioral evals (promptfoo) │ ├── promptfooconfig.yaml │ └── prompt-loader.js @@ -65,13 +65,27 @@ make test # Behavioral evals — verifies models interpret prompt arguments correctly # Requires ANTHROPIC_API_KEY make eval + +# Integration test — end-to-end run of look:again with auto-fix +# Requires ANTHROPIC_API_KEY +make integration ``` -`make test` runs fast, offline checks that validate plugin structure: file existence, JSON validity, frontmatter fields, cross-references between manifests, and that commands accepting arguments use the correct pattern (`argument-hint` in frontmatter, `$ARGUMENTS` placeholder, and a defaults table in the body). +The three test layers catch different kinds of regressions: + +| Layer | Command | What it catches | Cost | +|-------|---------|----------------|------| +| **Structure** | `make test` | Broken manifests, missing files, frontmatter drift, cross-reference mismatches | Free, offline, fast | +| **Evals** | `make eval` | Model misinterpreting arguments (e.g. ignoring `auto-fix=false`, wrong pass count) | API calls (~$0.05/run) | +| **Integration** | `make integration` | End-to-end failures: review not finding bugs, auto-fix not applying, output artifacts missing, tidy not cleaning up | API calls (~$0.50/run) | + +**When to run each:** -`make eval` runs [promptfoo](https://promptfoo.dev) evals that send the interpolated prompts to Claude and assert on behavioral correctness. For example, it verifies that `auto-fix=false` causes the model to skip fixes, and that `passes=5` results in 5 planned passes. +- **`make test`** — always, before every commit. It's fast and offline. Catches most structural mistakes from editing manifests, renaming files, or changing frontmatter. +- **`make eval`** — after changing prompt wording or argument handling in skill files. The evals send the interpolated prompts to Claude and assert on behavioral correctness (e.g. that `auto-fix=false` causes the model to skip fixes, that `passes=5` results in 5 planned passes). Cheap enough to run liberally. +- **`make integration`** — after changes that affect the review pipeline end-to-end (orchestration logic, agent prompts, output format). Spins up a temp git repo with contrived bugs, runs `/look:again` with auto-fix, and verifies that bugs are found, fixed, and that output artifacts are correct. Also tests `/look:tidy` cleanup. More expensive, so run when the cheaper layers aren't sufficient. -Evals require an Anthropic API key and cost a small amount per run. Set the key before running: +Both `make eval` and `make integration` require an Anthropic API key. Set it before running: ```bash # Option 1: export for the current shell session @@ -109,22 +123,24 @@ You can also test the plugin through the marketplace install flow, which is clos ### Key Files -- **[src/commands/again.md](src/commands/again.md)**: Main orchestrator logic. Controls pass execution, auto-fixing, and aggregation. +- **[src/skills/again/SKILL.md](src/skills/again/SKILL.md)**: Main orchestrator logic. Controls pass execution, auto-fixing, and aggregation. +- **[src/skills/tidy/SKILL.md](src/skills/tidy/SKILL.md)**: Tidy skill for pruning old review runs. - **[src/agents/lookagain-reviewer.md](src/agents/lookagain-reviewer.md)**: Reviewer subagent. Defines how individual review passes work. - **[src/skills/lookagain-output-format/SKILL.md](src/skills/lookagain-output-format/SKILL.md)**: JSON output format specification. - **[src/dot-claude-plugin/plugin.json](src/dot-claude-plugin/plugin.json)**: Plugin metadata and version. -- **[src/commands/tidy.md](src/commands/tidy.md)**: Tidy command for pruning old review runs. - **[.claude-plugin/marketplace.json](.claude-plugin/marketplace.json)**: Marketplace manifest for plugin discovery and installation. -### Writing Command Prompts +### Writing Skill Prompts -When editing or adding command prompts in `src/commands/`: +When editing or adding skills in `src/skills/`: -- **Use `$ARGUMENTS` for the raw string.** Claude Code replaces `$ARGUMENTS` with whatever the user typed after the command name. There is no `$ARGUMENTS.name` dot-access syntax — only `$ARGUMENTS` (whole string) and `$ARGUMENTS[N]` (positional). Do NOT use an `arguments:` array in frontmatter — it is not a supported Claude Code feature and will not be interpolated. +- Each skill is a directory containing a `SKILL.md` file (e.g., `src/skills/again/SKILL.md`). +- **Use `$ARGUMENTS` for the raw string.** Claude Code replaces `$ARGUMENTS` with whatever the user typed after the skill name. There is no `$ARGUMENTS.name` dot-access syntax — only `$ARGUMENTS` (whole string) and `$ARGUMENTS[N]` (positional). Do NOT use an `arguments:` array in frontmatter — it is not a supported Claude Code feature and will not be interpolated. - **Add `argument-hint`** in frontmatter to document expected input format (e.g., `argument-hint: "[key=value ...]"`). +- **Add `disable-model-invocation: true`** for user-triggered actions to prevent Claude from auto-invoking them. - **Include a defaults table** in the body listing each key, its default, and a description. Instruct the agent to parse `key=value` pairs from `$ARGUMENTS` and fall back to defaults for missing keys. - **Log the resolved configuration** so it is visible in output and reviewable in evals. -- `make test` enforces that commands using `$ARGUMENTS` have `argument-hint` in frontmatter, a defaults table in the body, and do NOT use the unsupported `arguments:` frontmatter array. +- `make test` enforces that skills using `$ARGUMENTS` have `argument-hint` in frontmatter, a defaults table in the body, and do NOT use the unsupported `arguments:` frontmatter array. - After changing prompt logic, run `make eval` to verify models still interpret the arguments correctly. ## Pull Requests @@ -138,4 +154,4 @@ When editing or adding command prompts in `src/commands/`: ## Versioning -Update the version in `src/dot-claude-plugin/plugin.json` when making releases. The marketplace entry in `.claude-plugin/marketplace.json` also has a `version` field — keep both in sync. The test suite (`make test`) validates that commands, agents, and skills match between the two files. +Update the version in `src/dot-claude-plugin/plugin.json` when making releases. The marketplace entry in `.claude-plugin/marketplace.json` also has a `version` field — keep both in sync. The test suite (`make test`) validates that agents and skills match between the two files. diff --git a/Makefile b/Makefile index d837424..8e41568 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,9 @@ dev: build ## Build and start Claude Code with plugin loaded eval: ## Run behavioral evals (requires ANTHROPIC_API_KEY) @npx promptfoo@latest eval -c evals/promptfooconfig.yaml +integration: build ## Run integration test (requires ANTHROPIC_API_KEY) + @./scripts/integration-test.sh + clean: ## Remove build artifacts @rm -rf dist/ @echo "Cleaned dist/" diff --git a/README.md b/README.md index 32f9839..f7df2c2 100644 --- a/README.md +++ b/README.md @@ -140,15 +140,26 @@ Previous runs are preserved. Use `/look:tidy` to prune old results: | should_fix | No | Performance issues, poor error handling | | suggestion | No | Refactoring, documentation, style | +## Updating + +To update to the latest version: + +```bash +/plugin marketplace update hartbrook-plugins +/plugin uninstall look@hartbrook-plugins +/plugin install look@hartbrook-plugins +``` + ## Development ```bash -make test # Structural validation (offline, fast) -make eval # Behavioral evals via promptfoo (requires ANTHROPIC_API_KEY) -make dev # Build and start Claude Code with the plugin loaded +make test # Structural validation (offline, fast) +make eval # Behavioral evals via promptfoo (requires ANTHROPIC_API_KEY) +make integration # End-to-end integration test (requires ANTHROPIC_API_KEY) +make dev # Build and start Claude Code with the plugin loaded ``` -See [CONTRIBUTING.md](CONTRIBUTING.md) for full development setup and guidelines. +Run `make test` before every commit (free, offline). Run `make eval` after changing prompt wording or argument handling. Run `make integration` after changes to the review pipeline or output format. See [CONTRIBUTING.md](CONTRIBUTING.md) for details on what each layer catches. ## License diff --git a/evals/promptfooconfig.yaml b/evals/promptfooconfig.yaml index 49f322d..af195f7 100644 --- a/evals/promptfooconfig.yaml +++ b/evals/promptfooconfig.yaml @@ -10,11 +10,11 @@ providers: tests: # ================================================================== - # again.md — no arguments (defaults) + # again — no arguments (defaults) # ================================================================== - description: "no arguments → model uses all defaults (auto-fix=true, passes=3, thorough)" vars: - prompt_file: src/commands/again.md + prompt_file: src/skills/again/SKILL.md assert: - type: llm-rubric value: > @@ -26,11 +26,11 @@ tests: simply means the user wants all defaults. # ================================================================== - # again.md — auto-fix interpretation + # again — auto-fix interpretation # ================================================================== - description: "auto-fix=true → model plans to apply fixes" vars: - prompt_file: src/commands/again.md + prompt_file: src/skills/again/SKILL.md arg_passes: "3" arg_target: staged arg_auto-fix: "true" @@ -45,7 +45,7 @@ tests: - description: "auto-fix=false → model skips fixes" vars: - prompt_file: src/commands/again.md + prompt_file: src/skills/again/SKILL.md arg_passes: "3" arg_target: staged arg_auto-fix: "false" @@ -59,11 +59,11 @@ tests: It should not describe applying any code fixes. # ================================================================== - # again.md — passes count + # again — passes count # ================================================================== - description: "passes=5 → model plans exactly 5 initial passes" vars: - prompt_file: src/commands/again.md + prompt_file: src/skills/again/SKILL.md arg_passes: "5" arg_target: staged arg_auto-fix: "true" @@ -79,11 +79,11 @@ tests: 5 sequential passes before considering additional passes. # ================================================================== - # again.md — model resolution + # again — model resolution # ================================================================== - description: "model=fast → reviewer uses haiku" vars: - prompt_file: src/commands/again.md + prompt_file: src/skills/again/SKILL.md arg_passes: "3" arg_target: staged arg_auto-fix: "true" @@ -99,7 +99,7 @@ tests: - description: "model=thorough → no explicit model override" vars: - prompt_file: src/commands/again.md + prompt_file: src/skills/again/SKILL.md arg_passes: "3" arg_target: staged arg_auto-fix: "true" @@ -113,11 +113,11 @@ tests: current model). It should NOT set the model to haiku or sonnet. # ================================================================== - # again.md — scope resolution + # again — scope resolution # ================================================================== - description: "target=branch → branch-based diff scope" vars: - prompt_file: src/commands/again.md + prompt_file: src/skills/again/SKILL.md arg_passes: "3" arg_target: branch arg_auto-fix: "true" @@ -131,11 +131,11 @@ tests: branch. It should reference branch comparison or merge-base. # ================================================================== - # again.md — partial arguments (only auto-fix specified) + # again — partial arguments (only auto-fix specified) # ================================================================== - description: "only auto-fix=false → defaults for everything else, no fixing" vars: - prompt_file: src/commands/again.md + prompt_file: src/skills/again/SKILL.md arg_auto-fix: "false" assert: - type: llm-rubric @@ -146,11 +146,11 @@ tests: auto-fix is explicitly false. # ================================================================== - # tidy.md — all flag + # tidy — all flag # ================================================================== - description: "all=true → removes all runs" vars: - prompt_file: src/commands/tidy.md + prompt_file: src/skills/tidy/SKILL.md arg_keep: "1" arg_all: "true" assert: @@ -161,7 +161,7 @@ tests: - description: "all=false, keep=3 → date-based retention" vars: - prompt_file: src/commands/tidy.md + prompt_file: src/skills/tidy/SKILL.md arg_keep: "3" arg_all: "false" assert: @@ -172,11 +172,11 @@ tests: It should keep runs from the last 3 days. # ================================================================== - # tidy.md — no arguments (defaults) + # tidy — no arguments (defaults) # ================================================================== - description: "tidy with no arguments → keep=1, all=false" vars: - prompt_file: src/commands/tidy.md + prompt_file: src/skills/tidy/SKILL.md assert: - type: llm-rubric value: > diff --git a/scripts/integration-test.sh b/scripts/integration-test.sh new file mode 100755 index 0000000..581e493 --- /dev/null +++ b/scripts/integration-test.sh @@ -0,0 +1,321 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" + +# ============================================================ +# Integration test for look:again +# +# Creates a temp git repo with a contrived buggy file, runs +# the plugin with 2 passes (expecting pass 1 fixes the bugs +# and pass 2 finds no must_fix issues), then chains a tidy +# to verify cleanup works. +# +# Requires: ANTHROPIC_API_KEY, claude CLI +# +# Uses --dangerously-skip-permissions since this runs in an +# isolated temp directory. Times out after 5 minutes per +# claude invocation. +# ============================================================ + +TIMEOUT=300 + +if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then + echo "Error: ANTHROPIC_API_KEY is required" + exit 1 +fi + +if ! command -v claude >/dev/null 2>&1; then + echo "Error: claude CLI is required but not found" + exit 1 +fi + +# Build the plugin +echo "Building plugin..." +"$PROJECT_ROOT/scripts/package.sh" > /dev/null + +WORK_DIR=$(mktemp -d) +cleanup() { + if [[ "${TEST_FAILED:-0}" -eq 1 ]]; then + echo "" + echo "Preserving work directory for debugging: $WORK_DIR" + else + rm -rf "$WORK_DIR" + fi +} +trap cleanup EXIT + +echo "Work directory: $WORK_DIR" + +# Helper: run claude with a timeout (perl, since macOS lacks GNU timeout) +run_claude() { + local output_file="$1" + shift + perl -e ' + alarm shift; + $SIG{ALRM} = sub { kill "TERM", $pid; exit 124 }; + $pid = fork // die; + unless ($pid) { exec @ARGV; die "exec failed: $!" } + waitpid $pid, 0; + exit ($? >> 8); + ' "$TIMEOUT" \ + claude -p "$@" \ + --plugin-dir "$PROJECT_ROOT/dist/lookagain" \ + --dangerously-skip-permissions \ + > "$output_file" 2>&1 + local exit_code=$? + + if [[ $exit_code -eq 124 ]]; then + echo "Error: claude timed out after ${TIMEOUT}s" + elif [[ $exit_code -ne 0 ]]; then + echo "Warning: claude exited with code $exit_code (continuing to check output)" + fi + return 0 +} + +# ============================================================ +# Setup: create a git repo with a buggy file +# ============================================================ + +cd "$WORK_DIR" +git init -q +git config user.email "test@test.com" +git config user.name "Test" + +# Create a Python file with obvious logic bugs: +# 1. Variable is overwritten before use (result always 0.0) +# 2. Accessing attribute on None without null check +cat > buggy.py << 'PYEOF' +def calculate_total(items: list[dict]) -> float: + """Calculate the total price of all items.""" + total = 0.0 + for item in items: + total += item["price"] * item["quantity"] + total = 0.0 + return total + + +def find_user_name(users: list[dict], user_id: str) -> str: + """Find a user's name by ID.""" + result = None + for user in users: + if user["id"] == user_id: + result = user + return result["name"] +PYEOF + +ORIGINAL_CONTENT=$(cat buggy.py) + +git add buggy.py +git commit -q -m "Add buggy module" + +# ============================================================ +# Phase 1: run look:again (2 passes, fast model, auto-fix on) +# ============================================================ + +echo "" +echo "Phase 1: Running look:again against buggy.py (2 passes, fast model, auto-fix on)..." +echo "" + +run_claude "$WORK_DIR/again_output.txt" \ + "/look:again target=buggy.py passes=2 auto-fix=true model=fast" + +echo "look:again output saved to $WORK_DIR/again_output.txt" + +# ============================================================ +# Verify look:again results +# +# Disable set -e so all checks run even if commands fail. +# ============================================================ + +set +e + +PASS=0 +FAIL=0 + +pass() { + PASS=$((PASS + 1)) + echo " ✓ $1" +} + +fail() { + FAIL=$((FAIL + 1)) + echo " ✗ $1" +} + +echo "" +echo "=== look:again checks ===" +echo "" + +# 1. File was modified (auto-fix applied) +CURRENT_CONTENT=$(cat buggy.py) +if [[ "$CURRENT_CONTENT" != "$ORIGINAL_CONTENT" ]]; then + pass "buggy.py was modified (auto-fix applied)" +else + fail "buggy.py was NOT modified (auto-fix did not apply)" +fi + +# 2. The double-assignment bug is fixed +if ! grep -q 'total = 0\.0' buggy.py || [[ $(grep -c 'total = 0\.0' buggy.py) -lt 2 ]]; then + pass "double-assignment bug appears fixed (total = 0.0 not duplicated)" +else + fail "double-assignment bug still present" +fi + +# 3. Output directory exists +if [[ -d ".lookagain" ]]; then + pass ".lookagain/ directory created" +else + fail ".lookagain/ directory not created" +fi + +# 4. Run directory matches expected pattern +RUN_DIR="" +if [[ -d ".lookagain" ]]; then + RUN_DIRS=$(find .lookagain -maxdepth 1 -mindepth 1 -type d 2>/dev/null | sort) + RUN_COUNT=$(echo "$RUN_DIRS" | grep -c . 2>/dev/null || echo 0) + if [[ "$RUN_COUNT" -ge 1 ]]; then + pass "found $RUN_COUNT run directory(ies)" + RUN_DIR=$(echo "$RUN_DIRS" | head -1) + else + fail "no run directories found" + fi +else + fail "no run directories found (no .lookagain/)" +fi + +# 5. Both pass output files exist and are valid JSON +for pass_num in 1 2; do + if [[ -n "$RUN_DIR" ]] && [[ -f "$RUN_DIR/pass-${pass_num}.json" ]]; then + if python3 -c "import json,sys; json.load(open(sys.argv[1]))" "$RUN_DIR/pass-${pass_num}.json" 2>/dev/null; then + pass "pass-${pass_num}.json exists and is valid JSON" + else + fail "pass-${pass_num}.json exists but is not valid JSON" + fi + else + fail "pass-${pass_num}.json not found" + fi +done + +# 6. Pass 1 found must_fix issues (the contrived bugs) +if [[ -n "$RUN_DIR" ]] && [[ -f "$RUN_DIR/pass-1.json" ]]; then + if python3 -c " +import json, sys +d = json.load(open(sys.argv[1])) +issues = d.get('issues', []) +must_fix = [i for i in issues if i.get('severity') == 'must_fix'] +sys.exit(0 if must_fix else 1) +" "$RUN_DIR/pass-1.json" 2>/dev/null; then + pass "pass 1 found must_fix issues" + else + fail "pass 1 did not find must_fix issues" + fi +else + fail "cannot check pass 1 must_fix issues (file missing)" +fi + +# 7. Pass 2 found no must_fix issues (bugs were fixed after pass 1) +if [[ -n "$RUN_DIR" ]] && [[ -f "$RUN_DIR/pass-2.json" ]]; then + if python3 -c " +import json, sys +d = json.load(open(sys.argv[1])) +issues = d.get('issues', []) +must_fix = [i for i in issues if i.get('severity') == 'must_fix'] +sys.exit(0 if not must_fix else 1) +" "$RUN_DIR/pass-2.json" 2>/dev/null; then + pass "pass 2 found no must_fix issues (bugs were fixed)" + else + fail "pass 2 still found must_fix issues after auto-fix" + fi +else + fail "cannot check pass 2 must_fix issues (file missing)" +fi + +# 8. Aggregate JSON exists with issues array +if [[ -n "$RUN_DIR" ]] && [[ -f "$RUN_DIR/aggregate.json" ]]; then + if python3 -c " +import json, sys +d = json.load(open(sys.argv[1])) +assert isinstance(d.get('issues', d.get('findings', [])), list), 'no issues array' +" "$RUN_DIR/aggregate.json" 2>/dev/null; then + pass "aggregate.json exists with issues array" + else + fail "aggregate.json missing issues array" + fi +else + fail "aggregate.json not found" +fi + +# 9. Aggregate markdown exists and is non-empty +if [[ -n "$RUN_DIR" ]] && [[ -f "$RUN_DIR/aggregate.md" ]]; then + if [[ -s "$RUN_DIR/aggregate.md" ]]; then + pass "aggregate.md exists and is non-empty" + else + fail "aggregate.md exists but is empty" + fi +else + fail "aggregate.md not found" +fi + +# ============================================================ +# Phase 2: run look:tidy to clean up the run +# ============================================================ + +set -e + +echo "" +echo "Phase 2: Running look:tidy all=true..." +echo "" + +run_claude "$WORK_DIR/tidy_output.txt" \ + "/look:tidy all=true" + +echo "look:tidy output saved to $WORK_DIR/tidy_output.txt" + +set +e + +echo "" +echo "=== look:tidy checks ===" +echo "" + +# 10. Run directory was removed by tidy +if [[ -n "$RUN_DIR" ]] && [[ ! -d "$RUN_DIR" ]]; then + pass "run directory removed by tidy" +elif [[ -n "$RUN_DIR" ]]; then + fail "run directory still exists after tidy" +else + fail "cannot check tidy (no run directory to remove)" +fi + +# 11. .lookagain/ is empty or gone +if [[ ! -d ".lookagain" ]]; then + pass ".lookagain/ removed entirely" +else + REMAINING=$(find .lookagain -maxdepth 1 -mindepth 1 -type d 2>/dev/null | wc -l | tr -d ' ') + if [[ "$REMAINING" -eq 0 ]]; then + pass ".lookagain/ exists but is empty (all runs removed)" + else + fail ".lookagain/ still has $REMAINING run directories after tidy all=true" + fi +fi + +# ============================================================ +# Summary +# ============================================================ + +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" + +if [[ $FAIL -gt 0 ]]; then + TEST_FAILED=1 + echo "" + echo "--- look:again output (first 200 lines) ---" + head -200 "$WORK_DIR/again_output.txt" + echo "--- end again output ---" + echo "" + echo "--- look:tidy output (first 100 lines) ---" + head -100 "$WORK_DIR/tidy_output.txt" 2>/dev/null || echo "(no tidy output)" + echo "--- end tidy output ---" + exit 1 +fi diff --git a/scripts/package.sh b/scripts/package.sh index 44940b3..ed4025d 100755 --- a/scripts/package.sh +++ b/scripts/package.sh @@ -32,7 +32,6 @@ cp -r "$PROJECT_ROOT/src/dot-claude" "$DIST_DIR/lookagain/.claude" cp -r "$PROJECT_ROOT/src/dot-claude-plugin" "$DIST_DIR/lookagain/.claude-plugin" # Overlay marketplace.json from repo root into the dist .claude-plugin/ directory cp "$PROJECT_ROOT/.claude-plugin/marketplace.json" "$DIST_DIR/lookagain/.claude-plugin/" -cp -r "$PROJECT_ROOT/src/commands" "$DIST_DIR/lookagain/commands" cp -r "$PROJECT_ROOT/src/agents" "$DIST_DIR/lookagain/agents" cp -r "$PROJECT_ROOT/src/skills" "$DIST_DIR/lookagain/skills" diff --git a/scripts/test.sh b/scripts/test.sh index 87b8198..6eadb97 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -82,7 +82,7 @@ test_plugin_json() { fi # Required fields - for field in name version description author commands agents; do + for field in name version description author agents skills; do check_json_field "$pjson" "$field" done @@ -111,8 +111,8 @@ sys.exit(0 if isinstance(d.get('author'), dict) and 'name' in d['author'] else 1 test_required_files() { check_file "src/dot-claude-plugin/plugin.json" - check_file "src/commands/again.md" - check_file "src/commands/tidy.md" + check_file "src/skills/again/SKILL.md" + check_file "src/skills/tidy/SKILL.md" check_file "src/agents/lookagain-reviewer.md" check_file "src/skills/lookagain-output-format/SKILL.md" check_file "src/dot-claude/settings.local.json" @@ -123,21 +123,21 @@ test_required_files() { } test_frontmatter() { - check_frontmatter "$PROJECT_ROOT/src/commands/again.md" name description - check_frontmatter "$PROJECT_ROOT/src/commands/tidy.md" name description + check_frontmatter "$PROJECT_ROOT/src/skills/again/SKILL.md" name description + check_frontmatter "$PROJECT_ROOT/src/skills/tidy/SKILL.md" name description check_frontmatter "$PROJECT_ROOT/src/agents/lookagain-reviewer.md" name description tools check_frontmatter "$PROJECT_ROOT/src/skills/lookagain-output-format/SKILL.md" name description } test_argument_handling() { - # Verify that command files using arguments follow the correct pattern: + # Verify that skill files using arguments follow the correct pattern: # 1. Frontmatter has argument-hint (not the unsupported arguments: array) # 2. Body contains $ARGUMENTS placeholder for the raw argument string # 3. Body contains a defaults table with Key/Default columns # This ensures the agent receives and parses arguments at runtime # rather than relying on non-existent compile-time interpolation. - for file in "$PROJECT_ROOT"/src/commands/*.md; do + for file in "$PROJECT_ROOT"/src/skills/*/SKILL.md; do local relpath="${file#"$PROJECT_ROOT"/}" local frontmatter @@ -150,7 +150,7 @@ test_argument_handling() { !in_fm { print } ' "$file") - # Skip commands that don't accept arguments + # Skip skills that don't accept arguments if ! echo "$body" | grep -qF '$ARGUMENTS'; then continue fi @@ -180,16 +180,6 @@ test_argument_handling() { test_cross_references() { local pjson="$PROJECT_ROOT/src/dot-claude-plugin/plugin.json" - # Commands resolve - while IFS= read -r cmd; do - local resolved="$PROJECT_ROOT/src/${cmd#./}" - if [[ -f "$resolved" ]]; then - pass "command $cmd resolves" - else - fail "command $cmd not found at src/${cmd#./}" - fi - done < <(python3 -c "import json,sys; [print(c) for c in json.load(open(sys.argv[1])).get('commands', [])]" "$pjson") - # Agents resolve while IFS= read -r agent; do local resolved="$PROJECT_ROOT/src/${agent#./}" @@ -243,7 +233,7 @@ test_build() { fail "dist marketplace.json is invalid" fi - for f in commands/again.md commands/tidy.md agents/lookagain-reviewer.md skills/lookagain-output-format/SKILL.md README.md; do + for f in agents/lookagain-reviewer.md skills/again/SKILL.md skills/tidy/SKILL.md skills/lookagain-output-format/SKILL.md README.md; do if [[ -f "$dist/$f" ]]; then pass "dist/$f exists" else @@ -339,7 +329,7 @@ sys.exit(0 if pv == mv else 1) fi # Commands, agents, skills match plugin.json - for field in commands agents skills; do + for field in agents skills; do if python3 -c " import json, sys p = sorted(json.load(open(sys.argv[1])).get(sys.argv[3], [])) diff --git a/src/dot-claude-plugin/plugin.json b/src/dot-claude-plugin/plugin.json index a60ffe1..61ad0b7 100644 --- a/src/dot-claude-plugin/plugin.json +++ b/src/dot-claude-plugin/plugin.json @@ -1,11 +1,10 @@ { "name": "look", - "version": "0.3.0", + "version": "0.4.0", "description": "Sequential code review with fresh agent contexts. Runs multiple independent review passes to catch more issues.", "author": { "name": "HartBrook" }, "repository": "https://github.com/HartBrook/lookagain", "keywords": ["code-review", "quality", "iterative", "multi-pass"], - "commands": ["./commands/again.md", "./commands/tidy.md"], "agents": ["./agents/lookagain-reviewer.md"], - "skills": ["./skills/lookagain-output-format"] + "skills": ["./skills/again", "./skills/tidy", "./skills/lookagain-output-format"] } diff --git a/src/commands/again.md b/src/skills/again/SKILL.md similarity index 99% rename from src/commands/again.md rename to src/skills/again/SKILL.md index 4c2f395..83a816e 100644 --- a/src/commands/again.md +++ b/src/skills/again/SKILL.md @@ -1,6 +1,7 @@ --- name: again description: Run sequential code review passes with fresh contexts to catch more issues +disable-model-invocation: true argument-hint: "[key=value ...]" --- diff --git a/src/commands/tidy.md b/src/skills/tidy/SKILL.md similarity index 94% rename from src/commands/tidy.md rename to src/skills/tidy/SKILL.md index b79b221..d16aea5 100644 --- a/src/commands/tidy.md +++ b/src/skills/tidy/SKILL.md @@ -1,7 +1,8 @@ --- name: tidy description: Remove old lookagain review runs, keeping today's results by default -tools: Glob, Bash(rm -rf .lookagain/????-??-??T??-??-??) +disable-model-invocation: true +allowed-tools: Glob, Bash(rm -rf .lookagain/????-??-??T??-??-??) argument-hint: "[key=value ...]" ---