diff --git a/.env.example b/.env.example index b8a2b35..960756a 100644 --- a/.env.example +++ b/.env.example @@ -18,6 +18,11 @@ ELIZACLOUD_API_KEY=your-elizacloud-key-here # Default model (optional, defaults to gpt-4o): # PRR_LLM_MODEL=gpt-4o +# Stronger models for verification / final audit (optional; invalid ids ignored with warning — see shared/config.ts). +# Order: PRR_FINAL_AUDIT_MODEL → PRR_VERIFIER_MODEL → PRR_LLM_MODEL (README / AGENTS.md). +# PRR_VERIFIER_MODEL=anthropic/claude-sonnet-4-5-20250929 +# PRR_FINAL_AUDIT_MODEL=anthropic/claude-opus-4-5-20251101 + # ElizaCloud: extra 500/502/504 retries inside each complete() (0–15). CI defaults to 5 HTTP attempts when unset; locally 3. # PRR_ELIZACLOUD_SERVER_ERROR_RETRIES=6 @@ -48,12 +53,18 @@ ELIZACLOUD_API_KEY=your-elizacloud-key-here # LLM concurrency (default 1). Raised values speed batches when the gateway allows it. # PRR_MAX_CONCURRENT_LLM=3 +# Min ms between ElizaCloud request starts per concurrent slot (default from shared/constants/models.ts). +# PRR_LLM_MIN_DELAY_MS=6000 + # Optional: max ms per concurrent pool task (batch analysis / parallel fix groups). Unset or 0 = no cap. # PRR_LLM_TASK_TIMEOUT_MS=600000 +# llm-api fixer: client wait per HTTP attempt when NOT in full-file rewrite mode. Unset = auto (90s, then 120s/150s/180s by prompt size). Full-file rewrite always uses 180s unless you change code constants. +# PRR_LLM_API_REQUEST_TIMEOUT_MS=240000 + # Skip a fixer model for the rest of the run after this many verification failures with zero verified fixes (default 4). Set to 0 to disable. # PRR_SESSION_MODEL_SKIP_FAILURES=4 -# Every N fix iterations, clear session-skipped models so rotation retries them (0 = off). Pill-output #847. +# After N fix iterations since each model was session-skipped, drop that skip so rotation can retry (0 = off). Pill-output #847. # PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS=8 # Warn once after this many consecutive iterations with no new verified fixes (default 10). Set to 0 to disable. @@ -72,8 +83,10 @@ ELIZACLOUD_API_KEY=your-elizacloud-key-here # Catalog model advice (see AGENTS.md): disable 0a6 dismissal and/or quoted-literal auto-heal. # PRR_DISABLE_MODEL_CATALOG_SOLVABILITY=1 # PRR_DISABLE_MODEL_CATALOG_AUTOHEAL=1 +# Override committed JSON snapshot (default: generated/model-provider-catalog.json). Malformed file → empty catalog + warn. +# PRR_MODEL_CATALOG_PATH=/path/to/model-provider-catalog.json -# Thread replies: set to the GitHub login that posts replies so re-runs skip duplicate posts. +# Thread replies: optional override for cross-run idempotency (default: token login from GET /user). # PRR_BOT_LOGIN=my-bot # PRR_REPLY_TO_THREADS=true # Also reply on threads dismissed as chronic-failure (default: no — batch token-saving dismissals). @@ -88,9 +101,28 @@ ELIZACLOUD_API_KEY=your-elizacloud-key-here # Exit setup before clone when GitHub says PR is not mergeable / dirty (unless you pass --merge-base). # PRR_EXIT_ON_UNMERGEABLE=1 -# On PR HEAD change: clear every dismissal (default clears only "already-fixed" dismissals). +# On PR HEAD change: clear every dismissal category (default clears already-fixed, chronic-failure, stale; keeps e.g. not-an-issue). # PRR_CLEAR_ALL_DISMISSED_ON_HEAD=1 +# Fixer allowlist: strict first-segment filter (static REPO_TOP_LEVEL + PR changed-file roots). +# Default (unset): open — any repo-relative path is OK except absolute, node_modules, dist/, .cursor, .prr. +# WHY default open: unknown roots like agent/ were silently stripped from allowedPaths → no injection, wasted iterations (Cycle 72). +# WHY set strict: comment bodies that paste dependency-style paths; pair with PR diff so touched roots still whitelist. +# PRR_STRICT_ALLOWED_PATHS=1 + +# Max new bot review threads to enqueue per mid–fix-loop batch (0 = unlimited). Default 45. +# PRR_MID_LOOP_NEW_COMMENT_CAP=45 + +# Blast radius (changed files + import graph + proximity) — deprioritize / optional dismiss / narrower injection: +# WHY: Order and llm-api injection focus on PR-related files; fixer allowlist stays full (see README "Blast radius"). +# Graph uses async FS in specifier-resolver; timeout/max-files abort → all issues in-scope (no silent skip). +# PRR_DISABLE_BLAST_RADIUS=1 +# PRR_BLAST_RADIUS_DEPTH=2 +# PRR_BLAST_RADIUS_DISMISS=1 +# PRR_BLAST_RADIUS_MAX_FILES=5000 +# PRR_BLAST_RADIUS_TIMEOUT_MS=30000 +# PRR_BLAST_RADIUS_MAX_DIR_NEIGHBORS=30 + # Dry-merge probes after fetch (git merge-tree; warns before pull if conflicts): # PRR_DISABLE_LATENT_MERGE_PROBE=1 # skip HEAD vs origin/ # PRR_DISABLE_LATENT_MERGE_PROBE_BASE=1 # skip HEAD vs origin/ (GitHub mergeable/dirty) @@ -99,9 +131,16 @@ ELIZACLOUD_API_KEY=your-elizacloud-key-here # PRR_MATERIALIZE_LATENT_MERGE=1 # merge origin/ --no-commit # PRR_MATERIALIZE_LATENT_MERGE_BASE=1 # merge origin/ --no-commit +# Clone / fetch (ms). Large or slow remotes: raise timeouts (README Troubleshooting / AGENTS.md). +# PRR_CLONE_TIMEOUT_MS=900000 +# PRR_FETCH_TIMEOUT_MS=120000 + # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # Pill (log audit — optional, e.g. prr --pill) # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # When tools/prr exists under the pill target directory, drop improvements whose file paths # look like the PR clone (src/, packages/, …). Set to 0 to record everything the LLM returns. # PILL_TOOL_REPO_SCOPE_FILTER=0 +# Rerun pill on explicit log files (absolute or cwd-relative). CLI --output-log / --prompts-log wins. +# PILL_OUTPUT_LOG_PATH=/path/to/output.log +# PILL_PROMPTS_LOG_PATH=/path/to/prompts.log diff --git a/AGENTS.md b/AGENTS.md index b9c0c4e..0c58856 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -10,15 +10,15 @@ These are created by tools and should not be committed: `.split-plan.md`, `.spli **PRR vs stale bot model advice:** Some review bots claim a valid API id is wrong and suggest another valid id. **WHY dismiss:** That is not a real fix task when both strings appear in the catalog — it would waste fix-loop iterations and risk applying the wrong id. **`assessSolvability`** check **0a6** (`tools/prr/workflow/helpers/outdated-model-advice.ts`) returns `not-an-issue`. **WHY auto-heal:** If the branch already contains the bot’s suggested id inside quotes/backticks near the comment line, **`applyCatalogModelAutoHeals`** (`catalog-model-autoheal.ts`) restores the catalog-correct id in quoted literals (±20 lines around the anchor, then **full-file fallback** if needed). If the file already has the catalog id and the wrong id never appears quoted, it **`markVerified`** with no disk edit (**`catalog-autoheal-noop`**). **WHY `verifiedThisSession`:** Commits are gated on verified session ids; heal calls **`markVerified`** so **`commitAndPushChanges`** can run on the “all resolved, no fix loop” path when the only dirty change is the heal. Opt out: **`PRR_DISABLE_MODEL_CATALOG_SOLVABILITY`**, **`PRR_DISABLE_MODEL_CATALOG_AUTOHEAL`**. Details: **DEVELOPMENT.md** (Commit gate and catalog model auto-heal), **docs/MODELS.md**. -**Pill hook:** Pill runs on close only when the user passes **`--pill`** on the command line. **prr**, **split-exec**, **story**, and **split-plan** accept `--pill`; after parsing, they call `setPillEnabled(true)`. After shutdown, each entry point calls **`closeOutputLog()`** then **`runPillAfterClosedLogs()`** (`tools/pill/after-close-logs.ts`) so **`shared/`** does not import **`tools/pill/`**. When `--pill` is not passed, pill does not run. **WHY opt-in:** Default runs stay fast; tools like split-exec have no LLM calls, so pill would often have nothing to analyze. When `--pill` is set, pill runs if the output log has content or the prompts log has PROMPT/RESPONSE/ERROR entries. +**Pill hook:** Pill runs on close only when the user passes **`--pill`** on the command line. **Standalone** **`pill `** can pass **`--output-log`** / **`--prompts-log`** (or **`PILL_OUTPUT_LOG_PATH`** / **`PILL_PROMPTS_LOG_PATH`**) to rerun on specific log files; **``** still supplies docs/source for the audit (**`tools/pill/README.md`**). **prr**, **split-exec**, **story**, and **split-plan** accept `--pill`; after parsing, they call `setPillEnabled(true)`. After shutdown, each entry point calls **`closeOutputLog()`** then **`runPillAfterClosedLogs()`** (`tools/pill/after-close-logs.ts`) so **`shared/`** does not import **`tools/pill/`**. When `--pill` is not passed, pill does not run. **WHY opt-in:** Default runs stay fast; tools like split-exec have no LLM calls, so pill would often have nothing to analyze. When `--pill` is set, pill runs if the output log has content or the prompts log has PROMPT/RESPONSE/ERROR entries. -**prompts.log:** `initOutputLog()` always opens `prompts.log` (or `{prefix}-prompts.log`) next to `output.log`. **Full** prompt/response text is appended when the **in-process** LLM path runs (`LLMClient.complete()` → `debugPrompt` / `debugResponse` in `tools/prr/llm/client.ts`), not when `--verbose` is set. The file stays **empty** if the run never calls that path (e.g. exits before any LLM, or only subprocess fixers). Entries with zero content between markers indicate a logging bug or empty model output; pill and audit cycles rely on non-empty bodies. If the provider returns **success with an empty/whitespace body**, the client writes an **`ERROR`** line for that slug (so audits do not see a PROMPT with no paired RESPONSE); merge/conflict steps set **`phase`** in metadata for grep (e.g. `conflict-syntax-fix`, `conflict-chunk`). +**prompts.log:** `initOutputLog()` always opens `prompts.log` (or `{prefix}-prompts.log`) next to `output.log`. **Full** prompt/response text is appended when the **in-process** LLM path runs (`LLMClient.complete()` → `debugPrompt` / `debugResponse` in `tools/prr/llm/client.ts`), not when `--verbose` is set. The file stays **empty** if the run never calls that path (e.g. exits before any LLM, or only subprocess fixers). Entries with zero content between markers indicate a logging bug or empty model output; pill and audit cycles rely on non-empty bodies. If the provider returns **success with an empty/whitespace body**, the client writes an **`ERROR`** line for that slug (so audits do not see a PROMPT with no paired RESPONSE); merge/conflict steps set **`phase`** in metadata for grep (e.g. `conflict-syntax-fix`, `conflict-chunk`). LLM comment-dedup grouping uses **`dedup-v2-grouping`** (per file) and **`dedup-v2-cross-file`**; the model may answer with the literal **`NONE`** (4 characters) when it finds no duplicate groups — **`output.log`** then shows **`RESPONSE … { chars: 4, phase: … }`**, which is **not** the same as an empty response (see **`prompts.log`** body). **Troubleshooting empty prompts.log:** If the **primary LLM path** (in-process, e.g. elizacloud via `tools/prr/llm/client.ts`) produces empty PROMPT/RESPONSE entries, the fix is in that code path: ensure the full prompt string is passed to `debugPrompt()` and the full response body to `debugResponse()` (e.g. after streaming, pass the accumulated content, not a placeholder). `shared/logger.ts`'s `writeToPromptLog` refuses zero-length or whitespace-only body and **warns to stderr** (and console) with the slug and a stack trace so you can identify the caller. If most entries in prompts.log are from **llm-elizacloud** and every body is empty, the streaming path in the elizacloud client may not be passing the accumulated response to the logger — check `shared/llm/elizacloud.ts` (or the code that calls `debugResponse()` after streaming). Check that `initOutputLog()` was called before the first LLM call and that `promptLogStream` is non-null. **⚠️ Known issue: empty prompts.log entries:** **When llm-api is the sole fixer**, the subprocess does not call `initOutputLog`, so **prompts.log entries for llm-api-fix will be empty** even though the fixer ran. This is expected behavior, not a bug. **Additionally, elizacloud streaming entries may also be empty** if the streaming path does not pass accumulated response content to the logger — this IS a bug (see troubleshooting section below). To audit llm-api fixer activity, use **`PRR_DEBUG_PROMPTS=1`** to get per-prompt files under `~/.prr/debug/`, or inspect `output.log` (e.g. `PROMPT #0001 → { chars: N }`) for evidence of calls. Do not waste time investigating empty llm-api-fix entries — they are expected to be empty. However, if you see empty elizacloud entries, investigate the streaming path. -**Crash / truncation:** Writes are buffered. If the process exits abruptly (crash, kill), the last entry may be missing or truncated. The logger uses cork/uncork per prompts.log entry so each PROMPT/RESPONSE/ERROR is flushed as a unit, reducing truncated entries. `closeOutputLog()` flushes and closes streams on normal shutdown. +**Crash / truncation:** Writes are buffered. If the process exits abruptly (crash, kill), the last entry may be missing or truncated. The logger uses cork/uncork per prompts.log entry so each PROMPT/RESPONSE/ERROR is flushed as a unit, reducing truncated entries. `closeOutputLog()` flushes and closes streams on normal shutdown. If any **empty PROMPT/RESPONSE** bodies were refused, shutdown also appends a **WARNING** to **output.log** with a **per `kind:slug` count** (top 20 keys) so audits and pill see which labels fired without reparsing **prompts.log** (`getEmptyPromptBodyRejectionStats()` for the same breakdown before close). **Pill and large logs:** When output.log (or prompts.log) exceeds the token budget, pill summarizes it and may miss single-line or tabular evidence (e.g. RESULTS SUMMARY counts, Model Performance table, overlap IDs). For critical runs, inspect output.log manually for those sections; pill now also extracts and appends key evidence when the log is summarized. **Very large prompts.log** (e.g. full-file conflict PROMPTs) is truncated per pair before story-read and capped per entry in the small-log path so one slug cannot blow the digest. **Vercel FUNCTION_INVOCATION_TIMEOUT** on ElizaCloud often hits **slow audit models** (e.g. Opus) even at ~40k-char POST bodies; defaults use **~12k user chars/request** for Opus-class / heavy OpenAI ids (not gpt-5-mini/nano) and **~20k** for others, plus smaller story-read chapters. **Chunked audits** run up to **`PILL_AUDIT_CHUNK_CONCURRENCY`** requests in parallel (default **4**; **`1`** = sequential) so very large contexts do not spend wall time on hundreds of serial audit calls. If pill still 504s, set **`PILL_AUDIT_MAX_USER_CHARS=8000`**, **`PILL_CONTEXT_BUDGET_TOKENS=20000`**, **`PILL_OUTPUT_LOG_MAX_CHARS=20000`**, or use a **faster `PILL_AUDIT_MODEL`** (e.g. Sonnet). @@ -28,7 +28,7 @@ These are created by tools and should not be committed: `.split-plan.md`, `.spli **Model skip list (ElizaCloud):** Some models are skipped by default. Reasons are separate: **known timeout/504** (transient possible — retry with `PRR_ELIZACLOUD_INCLUDE_MODELS`) vs **0% fix rate** (audit). The list lives in **`shared/constants/models.ts`** (barreled as **`shared/constants.js`** via **`shared/constants.ts`** shim): `ELIZACLOUD_SKIP_MODEL_IDS`; reasons in `ELIZACLOUD_SKIP_REASON`. DEBUG logs show which reason per model. **`PRR_ELIZACLOUD_EXTRA_SKIP_MODELS`** (comma-separated) merges **additional** ids into that list for this environment. To re-enable a skipped model (e.g. timeout was gateway-specific), set **`PRR_ELIZACLOUD_INCLUDE_MODELS`** to a comma-separated list (e.g. `openai/gpt-4o,anthropic/claude-3.7-sonnet`, or `alibaba/qwen-3-14b` if you intentionally use Qwen despite skip-list audits). See `getEffectiveElizacloudSkipModelIds()` and `getElizaCloudSkipReason()`. -**Session model skip (this run):** Independently of the catalog skip list, **`PRR_SESSION_MODEL_SKIP_FAILURES`** (default **4**) skips a tool/model for the **rest of the process** after that many **verification** failures with **zero** verified fixes; **`PRR_SESSION_MODEL_SKIP_FAILURES=0`** disables. **`PRR_DIMINISHING_RETURNS_ITERATIONS`** (default **10**) emits one warning after that many consecutive iterations with **no** new verified fixes; **`0`** disables. +**Session model skip (this run):** Independently of the catalog skip list, **`PRR_SESSION_MODEL_SKIP_FAILURES`** (default **4**) skips a tool/model for the **rest of the process** after that many **verification** failures with **zero** verified fixes; **`PRR_SESSION_MODEL_SKIP_FAILURES=0`** disables. Skip keys and per-key failure counts are **persisted** in `.pr-resolver-state.json` (`sessionSkippedModelKeys`, `sessionModelStats`, …) so a restart does not re-probe the same bad model; **`PRR_PERSIST_SESSION_MODEL_SKIP=0`** keeps the old in-memory-only behavior. Cleared when **PR head SHA** changes (same as verified reset). **`PRR_DIMINISHING_RETURNS_ITERATIONS`** (default **10**) emits one warning after that many consecutive iterations with **no** new verified fixes; **`0`** disables. **Clone / git output:** During clone and fetch, git's stdout and stderr are forwarded to the terminal so you see progress (e.g. "Receiving objects: 45%") and any prompts. If it appears to hang with no output, the process may be waiting on a git prompt (e.g. SSH host key verification or credentials). For first-time SSH, set **`GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=accept-new"`** to avoid the host key prompt; for HTTPS, ensure a token is set so git does not prompt for a password. Clone timeout: **`PRR_CLONE_TIMEOUT_MS`** (default 900s). Optional **`PRR_CLONE_DEPTH`** (e.g. `1`) passes **`git clone --depth`** for a shallow clone on very large repos (trade-off: incomplete history). @@ -82,9 +82,9 @@ When code takes a parameter **`workdir`**, **`pathExists(p)`** must resolve **`p ## PRR thread replies -With **`--reply-to-threads`** (or **`PRR_REPLY_TO_THREADS=true`**), PRR posts a short reply on each GitHub review thread when it fixes or dismisses an issue (e.g. "Fixed in \`abc1234\`." or "No changes needed — already addressed before this run."). Use **`--resolve-threads`** to also resolve (collapse) threads after replying. Optional **`PRR_BOT_LOGIN`** (GitHub login of the bot that posts replies) enables cross-run idempotency: PRR skips posting if that thread already has a comment from that login. **Note:** Replies need a **real** inline review thread (not synthetic **`ic-*`** issue-comment rows) and a **`databaseId`** on the comment. Recovered-from-git ids are matched **case-insensitively** to GraphQL ids. "Fixed in …" is posted after push when possible, and at **final cleanup** for **`verifiedThisSession`** when push did not run (e.g. `--no-push` / nothing to push). +With **`--reply-to-threads`** (or **`PRR_REPLY_TO_THREADS=true`**), PRR posts a short reply on each GitHub review thread when it fixes or dismisses an issue (e.g. "Fixed in \`abc1234\`." or "No changes needed — already addressed before this run."). Use **`--resolve-threads`** to also resolve (collapse) threads after replying. **Cross-run idempotency:** PRR skips posting if that thread already has a comment from the same GitHub login as the token (**`GET /user`**) when **`PRR_BOT_LOGIN`** is unset; set **`PRR_BOT_LOGIN`** to override (e.g. token is not the account that posts replies). **Note:** Replies need a **real** inline review thread (not synthetic **`ic-*`** issue-comment rows) and a **`databaseId`** on the comment. Recovered-from-git ids are matched **case-insensitively** to GraphQL ids. "Fixed in …" is posted after push when possible, and at **final cleanup** for **`verifiedThisSession`** when push did not run (e.g. `--no-push` / nothing to push). -**WHY opt-in:** Default runs stay fast and unchanged; posting to GitHub is a conscious choice. **WHY one reply per thread:** Keeps noise low and leaves room for human follow-up in the same thread. **WHY fixed replies only after push:** "Fixed in \." is posted only when the commit has been successfully pushed (commit-and-push phase), not after incremental pushes. **WHY reply for remaining/exhausted:** We reply for `already-fixed`, `stale`, `not-an-issue`, `false-positive`, and also for `remaining` and `exhausted` with a short "Could not auto-fix; manual review recommended." so threads (e.g. wrong-file exhaust) are not left without any reply. We do not reply for `chronic-failure` by default (batch token-saving dismissals without a per-thread fix cycle — avoids duplicate “could not fix” noise vs `remaining`/`exhausted`); set **`PRR_THREAD_REPLY_INCLUDE_CHRONIC_FAILURE=1`** to opt in. **WHY cross-run idempotency:** Re-runs would otherwise duplicate replies; `PRR_BOT_LOGIN` lets us skip threads we already replied to. Full WHYs: [docs/THREAD-REPLIES.md](docs/THREAD-REPLIES.md). +**WHY opt-in:** Default runs stay fast and unchanged; posting to GitHub is a conscious choice. **WHY one reply per thread:** Keeps noise low and leaves room for human follow-up in the same thread. **WHY fixed replies only after push:** "Fixed in \." is posted only when the commit has been successfully pushed (commit-and-push phase), not after incremental pushes. **WHY reply for remaining/exhausted:** We reply for `already-fixed`, `stale`, `not-an-issue`, `false-positive`, and also for `remaining` and `exhausted` with a short "Could not auto-fix; manual review recommended." so threads (e.g. wrong-file exhaust) are not left without any reply. We do not reply for `chronic-failure` by default (batch token-saving dismissals without a per-thread fix cycle — avoids duplicate “could not fix” noise vs `remaining`/`exhausted`); set **`PRR_THREAD_REPLY_INCLUDE_CHRONIC_FAILURE=1`** to opt in. **WHY cross-run idempotency:** Re-runs would otherwise duplicate replies; matching thread authors to the token login (or **`PRR_BOT_LOGIN`**) skips threads we already replied to. Full WHYs: [docs/THREAD-REPLIES.md](docs/THREAD-REPLIES.md). ## Fix-loop lifecycle (for mapping pill “src/*” items) @@ -112,20 +112,21 @@ flowchart LR ## State and path invariants (pill / audit) -- **Verified ∩ dismissed = ∅:** A comment ID must not appear in both verified (`verifiedFixed` / `verifiedComments`) and `dismissedIssues`. **`markVerified`** and **`dismissIssue`** remove the ID from the opposite set; **`load` / `loadState`** cleans overlaps and drops **`verifiedComments`** rows for dismissed IDs. Prefer verified when repairing legacy overlap. -- **HEAD change:** When **`headSha`** changes, **verified** state is cleared so fixes are re-checked. **`already-fixed`** dismissals are also cleared (code-state-dependent). Other dismissals (e.g. not-an-issue) are kept unless overlap cleanup removes them. Set **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD=1`** to clear **every** dismissal on HEAD change (aggressive; use after rebases when you want a full re-triage). +- **Verified ∩ dismissed = ∅:** A comment ID must not appear in both verified (`verifiedFixed` / `verifiedComments`) and `dismissedIssues`. **`markVerified`**, **`unmarkVerified`**, **`dismissIssue`**, **`undismissIssue`**, and legacy **`StateManager`** verified/dismissed helpers all mutate state through **`transitionIssue`** (`tools/prr/state/state-transitions.ts`) so **`verifiedThisSession`**, **`commentStatuses`**, apply-failure fields, and the two verified stores stay aligned; **`load` / `loadState`** still cleans legacy overlaps and drops **`verifiedComments`** rows for dismissed IDs. Prefer verified when repairing legacy overlap. **Load repair logs:** overlap cleanup emits console lines with up to **15** affected comment ids (**`tools/prr/state/state-core.ts`**, **`StateManager.load`**). +- **HEAD change:** When **`headSha`** changes, **verified** state is cleared so fixes are re-checked. **`already-fixed`**, **`chronic-failure`**, and **`stale`** dismissals are cleared by default (code-state-dependent / thread verdicts may be wrong after rebase). Other dismissals (e.g. not-an-issue) are kept unless overlap cleanup removes them. Set **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD=1`** to clear **every** dismissal on HEAD change (aggressive; use after rebases when you want a full re-triage). - **Final audit:** If the adversarial pass reports **UNFIXED**, the issue is re-queued (removed from verified) even if it was verified earlier in the run — see README “Safe over sorry verification”. -- **Path resolution:** Review **`comment.path`** is normalized (slashes, etc.). Fragment / extension-only paths use **`isReviewPathFragment`** and **`pathDismissCategoryForNotFound`** (`shared/path-utils.ts`) so dismissal is **`path-unresolved`**, not **`missing-file`**, when the path cannot name a single file (e.g. `.d.ts`, bare `d.ts`). Real root files like **`.env`** are **not** treated as fragments. Extension fallbacks for “tracked file not found” live in **`tryResolvePathWithExtensionVariants`** and solvability — extend there rather than duplicating ad hoc rules. The **fix prompt** (`buildFixPrompt`) receives the **clone workdir** (see above) during normal runs and applies the same **`tryResolvePathWithExtensionVariants`** step before **`pathExists`** / basename-prefix fallback. **Ambiguous bare filenames:** when **`git ls-files`** would match multiple paths, **`resolveTrackedPathWithPrFiles`** (`tools/prr/workflow/helpers/solvability.ts`) can pick the unique candidate that also appears in the PR **changed-file list** (diff vs base). **WHY:** Avoid targeting the wrong `foo.ts` in another tree; see **DEVELOPMENT.md** (path accounting). **Dedup + `ALREADY_FIXED`:** no-change **`RESULT: ALREADY_FIXED`** dismisses the full LLM dedup cluster (**`getDuplicateClusterCommentIds`**) so duplicate thread IDs are not left neither verified nor dismissed. **WHY:** Prevents empty-queue / “BUG DETECTED” repopulate loops (see **DEVELOPMENT.md**). +- **Path resolution:** Review **`comment.path`** is normalized (slashes, etc.). Fragment / extension-only paths use **`isReviewPathFragment`** and **`pathDismissCategoryForNotFound`** (`shared/path-utils.ts`) so dismissal is **`path-fragment`**, not **`missing-file`**, when the path cannot name a single file (e.g. `.d.ts`, bare `d.ts`). **Ambiguous** basename matches (multiple tracked files) use **`path-unresolved`**. Real root files like **`.env`** are **not** treated as fragments. Extension fallbacks for “tracked file not found” live in **`tryResolvePathWithExtensionVariants`** and solvability — extend there rather than duplicating ad hoc rules. The **fix prompt** (`buildFixPrompt`) receives the **clone workdir** (see above) during normal runs and applies the same **`tryResolvePathWithExtensionVariants`** step before **`pathExists`** / basename-prefix fallback. **Ambiguous bare filenames:** when **`git ls-files`** would match multiple paths, **`resolveTrackedPathWithPrFiles`** (`tools/prr/workflow/helpers/solvability.ts`) can pick the unique candidate that also appears in the PR **changed-file list** (diff vs base). **`UnresolvedIssue.resolvedPath`** and **`getIssuePrimaryPath`** (`tools/prr/analyzer/types.ts`) are the usual way to get the path to use for disk/git in workflow code after analysis — **WHY:** Raw **`comment.path`** can name the wrong file or not exist on disk; logs may still show the API path for human correlation. **Dedup + `ALREADY_FIXED`:** no-change **`RESULT: ALREADY_FIXED`** dismisses the full LLM dedup cluster (**`getDuplicateClusterCommentIds`**) so duplicate thread IDs are not left neither verified nor dismissed. **WHY:** Prevents empty-queue / “BUG DETECTED” repopulate loops (see **DEVELOPMENT.md**). ### Path resolution rules (canonical) 1. **Extension variants:** If the review path is missing on disk, **`tryResolvePathWithExtensionVariants`** (`shared/path-utils.ts`) tries mapped alternatives (e.g. `.js` → `.json`, `.ts`, `.mjs`, …) before dismissing. -2. **Fragments:** Bare **`.d.ts`** / extension-only paths are **`path-unresolved`** (not **`missing-file`**); pill sometimes calls this a **path-fragment** — same rule, same persisted category **`path-unresolved`**. Use **`pathDismissCategoryForNotFound`** + **`isReviewPathFragment`** so legacy state can be normalized on load. +2. **Fragments:** Bare **`.d.ts`** / extension-only paths are dismissed as **`path-fragment`** (not **`missing-file`**). **Ambiguous** paths use **`path-unresolved`**. Use **`pathDismissCategoryForNotFound`** + **`isReviewPathFragment`**; state load normalizes legacy **`missing-file`** / **`path-unresolved`** rows for fragment paths to **`path-fragment`**. 3. **One path → one category:** Do not assign the same logical path different dismissal categories in different code paths; extend **`path-utils`** / solvability instead of ad hoc branches. +4. **Open allowed-path policy (default):** `isPathAllowedForFix` (`shared/path-utils.ts`) allows any repo-relative path that passes hard deny rules (absolute, `node_modules`, `dist/`, `.cursor`, `.prr`, leading `root/` segment). The legacy first-segment heuristic (reject lowercase “package-shaped” roots not in a whitelist) is **off by default** so monorepos with roots like `agent/`, `cmd/`, `contracts/` and **adjacent** files cited in reviews are not silently stripped from `allowedPaths` / injection. Set **`PRR_STRICT_ALLOWED_PATHS=1`** to restore strict mode — then **`REPO_TOP_LEVEL`** plus **`dynamicRepoTopLevel`** (first segments from **`git diff --name-only`**, via **`setDynamicRepoTopLevelDirs`** in **`main-loop-setup.ts`**) whitelist segments. **WHY open default:** Cycle 72 — empty allowlists after filter caused no injection and burned iterations; pasted dependency paths rarely exist on disk, so `pathExists` already limits damage. **WHY keep `isReferencePathInComment`:** Comments that only *reference* another file must not auto-add that path to allowedPaths (canonical path rule) — separate from this gate; see **`.cursor/rules/prr-canonical-paths.mdc`**. ## Pill output (`pill-output.md`) -Root **`pill-output.md`** (when present) is a **pill** improvement list from a concrete **`output.log`**. That log is about PRR’s work on **another checkout** (the PR); the audit LLM may still suggest fixes for **clone paths** (`src/`, `packages/`, …). **Default:** When pill’s **`targetDir`** contains **`tools/prr`** (this monorepo layout), pill **post-filters** improvements so only paths under the tool repo (`tools/`, `shared/`, `tests/`, `docs/`, …) are **written** to **`pill-output.md`**. **`PILL_TOOL_REPO_SCOPE_FILTER=0`** disables that filter. Older runs and external layouts may still use **`**Status:** N/A (external)`** in **`pill-output.md`** for hand-triaged clone-only items; see **`DEVELOPMENT.md`** (“Pill output triage”). +Root **`pill-output.md`** (when present) is a **pill** improvement list from a concrete **`output.log`**. In this repo it is maintained as a **short index** of remaining Open / Partial items (not a full historical dump — **CHANGELOG**, **`tools/prr/AUDIT-CYCLES.md`** Cycle 71, **git history**). That log is about PRR’s work on **another checkout** (the PR); the audit LLM may still suggest fixes for **clone paths** (`src/`, `packages/`, …). **Default:** When pill’s **`targetDir`** contains **`tools/prr`** (this monorepo layout), pill **post-filters** improvements so only paths under the tool repo (`tools/`, `shared/`, `tests/`, `docs/`, …) are **written** to **`pill-output.md`**. **`PILL_TOOL_REPO_SCOPE_FILTER=0`** disables that filter. Older runs and external layouts may still use **`**Status:** N/A (external)`** in **`pill-output.md`** for hand-triaged clone-only items; see **`DEVELOPMENT.md`** (“Pill output triage”). ## Conventions @@ -153,13 +154,15 @@ Root **`pill-output.md`** (when present) is a **pill** improvement list from a c | PRR orchestration | `tools/prr/resolver.ts`, `tools/prr/workflow/` | | GitHub API | `tools/prr/github/api.ts` | | Review ingestion / dedup | `tools/prr/github/review-ingestion-filters.ts`, `tools/prr/github/issue-comment-dedup.ts`, `tools/prr/github/bot-author-normalize.ts`, `tools/prr/workflow/helpers/review-body-normalize.ts`, `workflow/issue-analysis.ts` (heuristic + LLM + cross-file dedup, **`dedup-v2`** cache) | -| LLM / rotation | `tools/prr/llm/`, `tools/prr/models/rotation.ts`, `shared/llm/` (rate-limit, model-context-limits, elizacloud) | +| LLM / rotation | `tools/prr/llm/` (`client.ts`, `llm-client-transport.ts`, `llm-client-types.ts`), `tools/prr/models/rotation.ts`, `shared/llm/` (rate-limit, model-context-limits, elizacloud) | | Catalog model advice (dismiss + auto-heal) | `tools/prr/workflow/helpers/outdated-model-advice.ts`, `tools/prr/workflow/catalog-model-autoheal.ts`, `shared/model-catalog.ts`, `generated/model-provider-catalog.json` | -| State, lessons | `tools/prr/state/` | +| State, lessons | `tools/prr/state/` (`state-transitions.ts` — **`transitionIssue`**; verification / dismissed modules delegate there) | | Split plan (planner) | `tools/split-plan/` | | Split rewrite plan | `tools/split-rewrite-plan/` (generates `.split-rewrite-plan.yaml` from group plan + clone) | | Split exec (runner) | `tools/split-exec/` (branch/PR logic: `run.ts`; optional rewrite plan → rebuild branches, else one commit per split) | | Shared logger | `shared/logger.ts` (re-exports `shared/timing.ts`, `shared/token-tracking.ts`) | | Shared constants | `shared/constants.ts` (shim) → `shared/constants/index.ts` + `shared/constants/*.ts` | +| Prompt / code budgeting | `shared/prompt-budget.ts` (`computeBudget`, `fitToBudget`) — injected file text size vs model context | +| Blast radius / dependency graph | `shared/dependency-graph/` (`import-scanner.ts`, `specifier-resolver.ts` — async path probes, `proximity.ts`, `graph.ts` — index-based BFS) — regex imports + proximity; wired in `main-loop-setup.ts`, `issue-analysis.ts`, `execute-fix-iteration.ts`. **WHY:** PR-adjacent scope without language toolchains; build failure or disable ⇒ treat all issues in-scope. | | Shared config | `shared/config.ts` | | Shared git | `shared/git/` | diff --git a/CHANGELOG.md b/CHANGELOG.md index dc73eac..5dfe70e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,131 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **Dismissal category `path-fragment`:** Extension-only / bare **`.d.ts`** review paths now persist as **`path-fragment`**; **ambiguous** basename matches stay **`path-unresolved`**. **`pathDismissCategoryForNotFound`** and state load normalize legacy **`missing-file`** / **`path-unresolved`** fragment rows to **`path-fragment`**. Thread replies include **`path-fragment`** with a distinct one-liner (**`thread-replies.ts`**, **`docs/THREAD-REPLIES.md`**). **`PathDismissCategory`** exported from **`shared/path-utils.ts`**. +- **`getEmptyPromptBodyRejectionStats()`** (**`shared/logger.ts`**) — snapshot of empty PROMPT/RESPONSE refusals by **`kind:slug`** before **`closeOutputLog()`**. Shutdown appends the same breakdown (top 20) to **output.log** next to the empty-body **WARNING** (pill-output). Tests: **`tests/prompt-log-empty-stats.test.ts`**. + +### Fixed + +- **`verifyFixes` dedup cluster:** After a successful verifier (or pattern-absent auto-verify), mark **every** id in **`getDuplicateClusterCommentIds(anchor, duplicateMap)`** verified — not only **`duplicateMap.get(anchor)`**. Queued rows can be a non-canonical dupe; the old path left canonical/sibling threads unverified → empty queue vs full **`comments`** accounting (**`tools/prr/workflow/fix-verification.ts`** uses **`duplicate-cluster-verify.ts`**). + +- **Recovery + final-audit dedup cluster:** **`trySingleIssueFix`** / **`tryDirectLLMFix`** now call **`markVerifiedClusterForFixedIssue`** with **`stateContext.duplicateMapForSession`** (set from analysis each push iteration). **`runFinalAudit`** receives **`duplicateMap`** and marks the full cluster on “no action needed” / FIXED pass paths. **WHY:** Same gap as batch verify — only the queued comment id was verified, leaving dedup siblings unverified. + +- **Issue analysis — stale / already-fixed dismiss + solvability autoVerify:** **`dismissDuplicateCluster`** (**`issue-analysis-dedup.ts`**) dismisses every id in **`getDuplicateClusterCommentIds`** (not only the canonical analyzed row). Sequential + batch LLM paths use it for **stale** and **already-fixed** (with **`markVerifiedClusterForFixedIssue`**). Solvability **autoVerify** defers until after dedup, then marks the full cluster. **WHY:** **`propagateStatusToDuplicates`** updated **`commentStatuses`** only; **`dismissedIssues`** / reporting stayed single-id. + +- **Catalog model auto-heal cluster:** When **`state.dedupCache`** matches the current PR comment-id set (**`dedup-v2`**), noop + disk heal **`markVerified`** applies to the full dedup cluster — canonical keeps **`catalog-autoheal`** / **`catalog-autoheal-noop`**; dupes use **`autoVerifiedFrom`** = canonical id. Non-canonical rows skip when any cluster member is already verified. **First push iteration** has no cache → singleton cluster (unchanged). Tests: **`tests/outdated-model-advice.test.ts`**. + +- **Issue analysis — status cache re-dismiss + stale re-check unmark:** Persisted **`commentStatuses`** “resolved” hits now **`dismissDuplicateCluster`** (same as fresh LLM dismiss). Batch + sequential **still exists** re-check uses **`unmarkVerifiedClusterForStaleRecheck`** so verified dedup siblings re-enter the fix queue. **`duplicate-cluster-verify.ts`**. Tests: **`tests/mark-verified-cluster.test.ts`**. + +- **Fix loop dismissals — dedup cluster:** **`dismissDuplicateClusterFromComments`** (**`issue-analysis-dedup.ts`**) dismisses every cluster id using **`comments[]`** for path/body (no **`duplicateItems`** map). Wired in **`push-iteration-loop.ts`** (could-not-inject, delete-entirely, mid-loop solvability chronic / already-fixed / remaining) and **`execute-fix-iteration.ts`** (H3 hallucination threshold, pre-fixer solvability, no-progress remaining). Queue eviction uses **`getClusterIdsAccountedOnState`** — only ids **verified or dismissed** after the attempt — so cluster members skipped when missing from **`comments`** are not removed from **`unresolvedIssues`** (fixes empty queue + BUG DETECTED repopulate). Tests: **`tests/dismiss-duplicate-cluster.test.ts`**. + +- **`mergeCommentsForClusterDismiss`:** When the full PR **`comments`** list is absent, union batch issue rows with **`allComments`** for sibling dismiss lookup (PR row wins on id clash): **`tryDirectLLMFix`** ALREADY_FIXED / unchanged-code paths, **`fix-verification`** file-unchanged, **`recheckSolvability`** file-deleted, **`applyBlastRadiusToUnresolved`** (**`issue-analysis-dedup.ts`**, **`recovery.ts`**, **`fix-verification.ts`**, **`solvability.ts`**, **`issue-analysis.ts`**). + +- **Single-issue recovery — dedup map key:** **`trySingleIssueFix`** passes optional PR **`comments`** through resolver callbacks into **`resolveDuplicateMapForRecovery`**, aligned with **`tryDirectLLMFix`** (**`recovery.ts`**, **`resolver.ts`**, **`run-orchestrator.ts`**, **`push-iteration-loop.ts`**, **`fix-loop-rotation.ts`**, **`post-verification-handling.ts`**, **`execute-fix-iteration.ts`**). + +- **ElizaCloud transport:** **`llmComplete`** acquires the rate-limit slot with **`await acquireElizacloud()`** then sets **`elizaAcquired`** (**`llm-client-transport.ts`**). + +- **Final-audit truncation demotion vs line-centered excerpts:** **`getFullFileForAudit`** now returns **`{ snippet, fixSiteInWindow }`** (full file or keyword/line-centered budget excerpt ⇒ **`fixSiteInWindow: true`**; head-only fallback without anchor ⇒ **`false`**). **`runFinalAudit`** passes the flag into **`LLMClient.finalAudit`**; the UNFIXED→pass truncation guard skips when **`fixSiteInWindow`** so adversarial UNFIXED on anchored excerpts is not demoted by footer heuristics alone (**`issue-analysis-snippet-helpers.ts`**, **`workflow/analysis.ts`**, **`tools/prr/llm/client.ts`**). Legacy **`getFullFile`** callbacks may still return a plain string (**`fixSiteInWindow`** treated as false). + +- **Empty LLM success bodies → prompts.log ERROR:** **`llm-api`** fixer now uses **`openAiChatCompletionContentToString`** for OpenAI-style **`message.content`** (array parts were coerced to `''` before). On whitespace-only success, writes **`debugPromptError`** instead of an empty RESPONSE (**`shared/runners/llm-api.ts`**). **Pill** **`debugPrompt`** returns a **slug**; **`debugResponse(slug, …)`** / **`debugPromptError`** pair with it; **`writeToPromptLog`** supports **ERROR** and refuses empty PROMPT/RESPONSE with a marker line (**`tools/pill/logger.ts`**, **`tools/pill/llm/client.ts`**). **PRR transport** logs a **console.warn** for empty success on **any** provider, not only ElizaCloud (**`tools/prr/llm/llm-client-transport.ts`**). + +- **Final-audit truncation demotion:** Line-centered budget excerpts from **`fitToBudget`** (footer `excerpt — … centered on line …` / `excerpt only — file has … centered on line …`) are no longer treated as blind truncation for UNFIXED→pass demotion in **`finalAuditSnippetLooksTruncatedOrExcerpt`** — the review anchor is in the visible window (**`tools/prr/llm/verification-heuristics.ts`**). **`getFullFileForAudit`** **`debug`** logs when an excerpt is produced due to budget (**`tools/prr/workflow/issue-analysis-snippet-helpers.ts`**). + +- **HEAD change + stale dismissals:** **`StateManager.load()`** clears **`stale`** category dismissals together with **`already-fixed`** / **`chronic-failure`** when the PR head SHA changes (unless **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD`** already cleared all). **WHY:** Stale-thread verdicts can be wrong after rebase/revert. + +### Changed + +- **llm-api request timeout:** Non-full-file fix calls scale client-side wait **90s → 120s / 150s / 180s** by enriched prompt length (tiers at **60k / 100k / 140k** chars) so large search/replace batches are less likely to hit **`Request timeout after 90s`** before the model returns. Full-file rewrite remains **180s**. Optional fixed override: **`PRR_LLM_API_REQUEST_TIMEOUT_MS`**. **`getLlmApiRequestTimeoutMs`** in **`shared/constants/polling.ts`**; **`shared/runners/llm-api.ts`**. + +- **Git submodule (gitlink) review paths:** **`assessSolvability`** check **0e0** dismisses threads anchored on index mode **160000** paths as **`not-an-issue`** with a remediation hint; **`issue-analysis`** treats snippet placeholder + gitlink like **`not-an-issue`**; final audit skips adversarial LLM with a synthetic **FIXED (git submodule)** when the snippet is unreadable (**`shared/git/git-submodule-path.ts`**, **`solvability.ts`**, **`issue-analysis.ts`**, **`analysis.ts`**). Tests: **`tests/git-submodule-path.test.ts`**, **`tests/solvability-submodule.test.ts`**. +- **RESULTS SUMMARY:** Prints **Final audit non-affirming passes: N (X UNCERTAIN, Y truncation guard)** when applicable (**`tools/prr/ui/reporter.ts`**). Success-path final-audit message splits the same counts (**`workflow/analysis.ts`**). +- **Git recovery scan:** When no merge-base ref resolves (`origin/` / main / master / develop), **`scanCommittedFixes`** logs a **once-per-workdir** yellow warning and uses the last **100** commits for **`prr-fix:`** grep (pill-output). When **`git log`** throws, logs a **once-per-workdir** warning and returns **[]** non-fatally (**`shared/git/git-commit-scan.ts`**). **`clearScanCommittedFixesCache()`** clears warn dedupe sets for tests. +- **State load — dismissed row dedupe (pill #539):** **`dedupeDismissedIssuesByCommentId`** collapses duplicate **`dismissedIssues`** rows for the same **`commentId`** (latest **`dismissedAt`** wins; timestamp tie → **`path-fragment`** > **`path-unresolved`** > **`missing-file`**). Runs after fragment category normalization (**`tools/prr/state/state-core.ts`**). Tests: **`tests/dismissed-issues-dedupe.test.ts`**. +- **`applyDismissedIssuesLoadNormalization`:** Shared helper (**`state-core.ts`**) used by **`loadState`** and legacy **`StateManager.load`** so fragment migration + id dedupe stay aligned (**pill-output**). +- **CodeRabbit stale review vs HEAD:** The yellow “inline comments may be stale” line is emitted **once per process** per **`(owner, repo, PR#, HEAD, bot review SHA)`** if setup calls **`checkCodeRabbitStatus`** multiple times with the same refs (**`workflow/startup.ts`** — pill-output #619). +- **State load helpers:** **`applyResolverStateLoadCoreNormalization`** (verified dedupe, **`noProgressCycles`** reset, timing hydration) and **`applyResolverStatePostOverlapCleanup`** (**`recoveredFromGitCommentIds`**, skip-list **modelPerformance** prune) are shared by **`loadState`** and **`StateManager.load`** (**`state-core.ts`**, **`manager.ts`**). Tests: **`tests/state-load-normalization.test.ts`**. +- **`getFullFileForAudit`:** **`debug`** always logs **`full file within budget`** vs **`budget excerpt`** with **`anchorHow`** (`review-line` / `keyword` / `none`), **`fixSiteInWindow`**, and **`formatNumber`** counts (**`issue-analysis-snippet-helpers.ts`** — pill-output #509). +- **Pull / rebase:** **`pullLatest`** prints a one-line hint (**`git rebase --continue`** / **`--abort`**) when rebase stops on conflicts (**`shared/git/git-pull.ts`**). +- **Docs:** **README** troubleshooting — PRR does not bundle git hooks (pill cites foreign repos); **AGENTS.md** — **`closeOutputLog`** empty-body **kind:slug** summary + **`getEmptyPromptBodyRejectionStats`**. +- **README** operator env table: **`PRR_REPLY_TO_THREADS`**, thinking budget, min delay, task timeout, clone/fetch timeouts, conflict-separator repair, model-catalog overrides, pill log paths (**pill-output open-items triage**). +- **`shared/constants/models.ts`:** Skip-list docblock — maintainer refresh contract + **last reviewed 2026-04-08** note. +- **AAR Summary:** When bucket union ≠ loaded comment count, prints a second gray line explaining larger (state/exhaustion IDs off-fetch) vs smaller (outdated-only rows) (**`tools/prr/ui/reporter.ts`**); **DEVELOPMENT.md** documents the union. **Solvability 0a2:** Wider rollup scan (**3k** chars), **bold-only** and **HTML ``** variants for CodeRabbit-style recap headings (**`tools/prr/workflow/helpers/solvability.ts`**); tests in **`tests/solvability-pr-comment.test.ts`**. +- **Thread replies (422 UX):** **`postThreadReplies`** returns **`failed422`**, **`failedOther`**, **`skippedDueTo422Stop`**; prints an end-of-run summary with **`formatNumber`** and an expanded yellow line when consecutive all-422 batches stop posting (**`thread-replies.ts`**). Low post-rate nudge in **`final-cleanup`** distinguishes 422 vs other failures. **README** troubleshooting + **docs/THREAD-REPLIES.md** (422 storms). +- **Dedup LLM phases + output.log:** **`completeWithCheapModel`** accepts optional **`CompleteOptions`** (except **`model`**). Per-file dedup passes **`phase: dedup-v2-grouping`**, cross-file **`dedup-v2-cross-file`**. **`debugPrompt` / `debugResponse`** one-liners in **`output.log`** include **`phase`** when set (**`shared/logger.ts`**). **AGENTS.md** / **DEVELOPMENT.md**: 4-char **`NONE`** responses for dedup are expected. + +- **Thread-reply idempotency:** When **`PRR_BOT_LOGIN`** is unset, PRR calls GitHub **`GET /user`** (via **`octokit.users.getAuthenticated`**) once per API client to use the token’s **`login`** for cross-run “already replied” checks — same behavior as explicitly setting **`PRR_BOT_LOGIN`**. Warns only if there are reply candidates and the user cannot be resolved. Removed the startup warning that always nagged when the env was unset (**`tools/prr/github/api.ts`**, **`tools/prr/workflow/thread-replies.ts`**, **`tools/prr/index.ts`**). + +- **Catalog model auto-heal:** Skips entirely when the clone workdir is **dirty** (`git status --porcelain` non-empty) or git status cannot be read — avoids mixing auto-heal edits with unrelated local changes (**`tools/prr/workflow/catalog-model-autoheal.ts`**). + +- **Path variants:** **`EXTENSION_VARIANT_MAP`** includes **`.json` → `.js`, `.ts`, `.cjs`, `.mjs`** for reviews that cite a JSON path when only a JS/TS config exists (**`shared/path-utils.ts`**). + +- **Session model skip reset:** **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`** now drops each **`skippedModelKeys`** entry individually after that many fix iterations **since that key was skipped** (tracked in **`sessionSkippedSinceFixIteration`**) instead of clearing the whole set on a global boundary (**`tools/prr/state/state-context.ts`**, **`tools/prr/models/rotation.ts`**, **`iteration-cleanup.ts`** passes **`fixIteration`** into **`recordSessionModelVerificationOutcome`**). + +- **ElizaCloud 429 backoff:** **`notifyRateLimitHit`** adds up to **30s** random jitter after the **60s** base so concurrent processes do not resume in lockstep (**`shared/llm/rate-limit.ts`**). + +- **Latent merge-tree probe:** If **`git merge-tree`** fails without parseable conflict paths and stderr looks like an **unsupported/old git** error, the probe returns **`ran: false`** with a **`skipReason`** instead of treating it as a latent conflict (**`shared/git/git-conflicts.ts`** — **`mergeTreeFailureLooksUnsupported`**). + +- **Model env validation:** **`MODEL_NAME_PATTERN`** rejects **`//`**; **`MODEL_NAME_MAX_LENGTH`** (**200**); **`loadConfig()`** falls back to the provider default when **`PRR_LLM_MODEL`** is invalid and ignores invalid optional **`PRR_VERIFIER_MODEL`** / **`PRR_FINAL_AUDIT_MODEL`** / **`SPLIT_PLAN_LLM_MODEL`** with a warning (**`shared/config.ts`**). + +- **Skip-list env:** **`PRR_ELIZACLOUD_EXTRA_SKIP_MODELS`** and **`PRR_ELIZACLOUD_INCLUDE_MODELS`** ignore malformed comma-separated tokens (empty, **`//`**, bad chars) with a one-time warning (**`shared/constants/models.ts`**). + +- **State load overlap repair:** **`loadState`** / **`StateManager.load()`** log up to **15** affected **comment id(s)** when cleaning verified∩dismissed overlap (**`tools/prr/state/state-core.ts`**, **`manager.ts`**). + +- **RESULTS SUMMARY:** Clarifies that **Final audit re-queued** counts only **previously verified** threads re-opened by the adversarial pass vs **Remaining** (**`tools/prr/ui/reporter.ts`**). + +- **prompts.log shutdown summary:** Empty-body count uses **`formatNumber`** (**`shared/logger.ts`**). + +- **Model catalog load:** **`providers.*.apiIds`** arrays are sanitized to non-empty strings only; all-invalid arrays fall back to an empty catalog (**`shared/model-catalog.ts`**). + +- **Model rotation visibility:** **`rotateModel`**, recommended-model advance, and **`switchToNextRunner`** use **`warn()`** (⚠) and append per-model **verified / failed** counts for the outgoing selection when stats exist (**`tools/prr/models/rotation.ts`**). + +- **Operator docs:** **`docs/MODELS.md`** (canonical vs re-export, re-evaluate skips, last reviewed); **`.env.example`** (**`PRR_VERIFIER_MODEL`**, **`PRR_FINAL_AUDIT_MODEL`**, **`PRR_LLM_MIN_DELAY_MS`**, **`PRR_MODEL_CATALOG_PATH`**, **`PRR_CLONE_TIMEOUT_MS`**, **`PRR_FETCH_TIMEOUT_MS`**, corrected **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD`** comment); **`DEVELOPMENT.md`** (HEAD-change dismissal set + load overlap repair contract); **`AGENTS.md`** (HEAD-change categories, overlap log id cap). + +### Added + +- **Blast radius (multi-signal dependency scope):** After `git diff --name-only` vs base, PRR builds a best-effort **import/include graph** (whole-file regex for TS/JS, Python, Go, Rust, C/C++, Java/Kotlin, Ruby, PHP) plus **same-directory** and **filename-pattern** proximity (tests, CSS modules, stories), then BFS **both directions** with **`PRR_BLAST_RADIUS_DEPTH`** (default **2**). Issues get **`inBlastRadius`** / **`blastRadiusDepth`**; **`sortByPriority`** lists out-of-scope last. **`allowedPathsForInjection`** is intersected with the radius set (fixer batch paths unchanged; empty intersection falls back to full batch). Opt-in **`PRR_BLAST_RADIUS_DISMISS=1`** dismisses as **`out-of-scope`** (thread reply: “Outside PR scope — manual review recommended.”). **`PRR_DISABLE_BLAST_RADIUS`**, **`PRR_BLAST_RADIUS_MAX_FILES`**, **`PRR_BLAST_RADIUS_TIMEOUT_MS`**, **`PRR_BLAST_RADIUS_MAX_DIR_NEIGHBORS`** (proximity cap per directory). Analysis cache stores **`blastRadiusPaths`** for injection on cache hit. **WHY:** Focus fix order and prompt context on PR-relevant files without requiring language toolchains; failures fall back to “all in-scope.” **`shared/dependency-graph/`**, **`tests/dependency-graph.test.ts`**, **README**, **DEVELOPMENT.md**, **AGENTS.md**, **`.env.example`**, **`docs/ROADMAP.md`** (optional follow-ups section). Post-ship: **Changed** — async specifier resolution + O(1) BFS queue (see below). + +- **Unified issue state writes (`transitionIssue`):** All per-comment transitions among **verified**, **dismissed**, **unverified**, and **undismissed** go through **`tools/prr/state/state-transitions.ts`** (`transitionIssue`). **`markVerified`**, **`unmarkVerified`**, **`dismissIssue`**, **`undismissIssue`**, and legacy **`StateManager`** methods **`markCommentVerifiedFixed`**, **`unmarkCommentVerifiedFixed`**, **`addDismissedIssue`** delegate there. **WHY:** Audits found duplicated array surgery and drift (e.g. **`verifiedThisSession`** or **`commentStatuses`** out of sync with **`verifiedFixed`** / **`dismissedIssues`**). One writer keeps mutual exclusion, apply-failure cleanup on verify, and session tracking consistent. **`recoverVerificationState`** uses **`markVerified(..., { skipSessionTracking: true })`** so git-recovered **`prr-fix:`** IDs do not count as “fixed this session” for the commit gate. **`addDismissedIssue`** passes **`replaceExistingDismissal: true`** to preserve legacy “replace row” semantics vs idempotent **`dismissIssue`**. Tests: **`tests/state-transitions.test.ts`**. **Docs:** **DEVELOPMENT.md** (unified state + prompt budget), **AGENTS.md** (state invariant bullet), **README** (brief mention under robustness). + +- **Prompt context budgeting (`shared/prompt-budget.ts`):** **`computeBudget`** derives how many characters of code fit for a model given **`reservedChars`** (instructions, wrappers) and optional **`divisor`** (e.g. fixes per batch). **`fitToBudget`** builds line-numbered excerpts centered on the review line or a keyword anchor from the comment body. **`computePerFixVerifyCurrentCodeBudget`** and **`truncateNumberedCodeAroundAnchor`** align batch verify “current code” blocks with **`LLMClient.buildBatchVerifyPrompt`** and **`getCurrentCodeAtLine`** in **`fix-verification.ts`**. **WHY:** Separate char/line caps per path (snippet vs wider analysis vs verify) drifted and caused either tiny context (false STALE/YES) or oversized prompts (timeouts / 500s). One shared model-aware budget scales with **`getMaxElizacloudLlmCompleteInputChars`** / fix-prompt ceilings. **`buildWindowedSnippet`**, **`getFullFileForAudit`**, and **`getCodeSnippet`** consume **`computeBudget`** / **`fitToBudget`** (with **`getCodeSnippet`** still using line-window constants before char shrink). Tests: **`tests/prompt-budget.test.ts`**. + +- **Canonical path use in workflow (audit-cycle follow-through):** File reads, git checks, dismissals, and bailout records prefer **`getIssuePrimaryPath(issue)`** (`resolvedPath ?? comment.path`) or **`resolveTrackedPath(workdir, comment.path, body)`** where the clone must see the real tracked file. **`analysis.ts`** uses **`commentFilePathForWorkdir`** for snippets and **`pathTrackedAtGitHead`**; raw **`comment.path`** stays intentional for GitHub-facing logs and fragment gates (**`shouldSkipFinalAuditLlmForPath`**). **`main-loop-setup`** resolves **`primaryPath`** for final-audit re-entry. **WHY:** Basename-only or extension-variant paths from the API are ambiguous; using the resolved path for disk/git avoids wrong-file edits and “file not found” loops. + +- **pill CLI:** **`--output-log `** and **`--prompts-log `** (and env **`PILL_OUTPUT_LOG_PATH`** / **`PILL_PROMPTS_LOG_PATH`**) to audit explicit log files while keeping code context from **``** — reruns without moving logs into the project root. + +- **Open allowed-path policy (default):** `isPathAllowedForFix` (`shared/path-utils.ts`) no longer applies the legacy first-segment heuristic unless **`PRR_STRICT_ALLOWED_PATHS=1`**. **WHY change:** That heuristic treated unknown lowercase first segments as “external package” paths. Real monorepos use roots like `agent/`, `cmd/`, `contracts/` that were not in the static `REPO_TOP_LEVEL` set — `filterAllowedPathsForFix` dropped the primary file from `allowedPaths` and injection, so the fixer ran without file contents and iterations burned (audited eliza-style run, **Cycle 72**). **WHY default open:** Reviews often need **adjacent** repo files (callers, shared modules) even when the PR diff never touched that top-level dir; hard denies still block what we must never edit (absolute paths, `node_modules`, `dist/`, `.cursor`, `.prr`, `root/` segment). **Strict mode:** **`PRR_STRICT_ALLOWED_PATHS=1`** restores the old filter using **`REPO_TOP_LEVEL`** plus **`dynamicRepoTopLevel`** (first segments from **`git diff --name-only`** in **`processCommentsAndPrepareFixLoop`**). **Docs:** **README** (Configuration table + “Fixer allowed paths”), **DEVELOPMENT.md** (fixer allowed paths), **AGENTS.md** (path rules), **docs/ROADMAP.md** (single-issue / allow-path item marked done), **`.env.example`**. Code comments: **`shared/path-utils.ts`** file header and **`isPathAllowedForFix`**. + +- **Solvability: bot rollup headings (Cycle 72):** `isSummaryOrMetaReviewComment` (`tools/prr/workflow/helpers/solvability.ts`) now treats common CodeRabbit-style section headers in the first ~1.5k chars as meta-review: **`### Remaining Issues`**, **`Issues Fixed Since Previous Reviews`**, **`Issues Addressed in Previous Reviews`**, **`Previously Fixed Issues`**, **`Outstanding Issues`**, **`Issues from Previous Reviews`**. **WHY:** Those threads are PR-wide recaps, not a single edit target; they previously missed the table/`### Summary` heuristics and burned single-issue / couldNotInject iterations. **`(PR comment)`** bodies with the same headings dismiss at check **0a2** before long-body path inference. Tests: **`tests/solvability-pr-comment.test.ts`**. + +- **Batch issue analysis: smaller batches for Qwen-3-235b-class models (ElizaCloud):** `LLMClient.batchCheckIssuesExist` uses the same **10 issues per batch** cap as small models when the model id matches **`qwen-3-235b`** / **`qwen-3-235`**. **WHY:** Cycle 72 — a single ~21-issue batch took ~8 minutes wall time; smaller batches reduce latency and timeout risk on heavy verifiers. + ### Fixed +- **`commentStatuses` not cleared on HEAD change (`tools/prr/state/manager.ts`):** The head-change block in `StateManager.load()` now also deletes `commentStatuses` entries with `status: 'resolved'` or `status: 'verified'` when clearing verified arrays. **WHY:** Without this, a rebase would zero `verifiedFixed`/`verifiedComments` but leave stale `status: 'resolved'` entries in the status map, causing callers to see contradictory state (verified arrays empty, but status map says resolved). Logs the count of cleared entries. (Audit Pattern H, 2026-04-05) + +- **SSH URL redaction in `shared/git/redact-url.ts`:** `redactUrlCredentials` now also redacts SSH-style git URLs (`git@host:org/repo` → `git@***:***`) and the HTTPS char class includes `\r` so Windows CRLF git output doesn't expose credentials. (Audit Pattern C, 2026-04-05) + +- **`prr-fix:` commit-scan regex tightened (`shared/git/git-commit-scan.ts`):** Changed from `^prr-fix:(.+)$` to `^prr-fix:(\S+)` so trailing non-whitespace text after a commit ID (e.g. an inline note) is not captured as part of the ID. (Audit Pattern B, 2026-04-05) + +- **Pill chunked audit fail-fast (`tools/pill/orchestrator.ts`):** Pill's chunk audit loop now uses `runWithConcurrencyAllSettled` instead of `runWithConcurrency` (fail-fast). A single chunk HTTP error no longer aborts all remaining chunks; partial results from successful chunks are still collected and merged. (Audit Pattern D, 2026-04-05) + +### Changed + +- **Blast-radius graph build (`shared/dependency-graph/`):** **`resolveSpecifier`** now uses **`fs/promises`** (**`access`**, **`readFile`**, **`readdir`**, **`stat`**) instead of sync **`existsSync` / read / readdir / stat**. **`computeBlastRadius`** BFS uses an **index cursor** on the queue instead of **`Array.shift()`** (O(1) dequeue on large graphs). **WHY:** Thousands of specifier probes no longer block the Node event loop (signals, concurrent timers, pill hooks stay responsive); BFS avoids quadratic dequeue cost when hop counts and fan-out are large. Behavior and fallbacks unchanged (build failure → all issues in-scope). + +- **Docs / `pill-output.md`:** Trimmed to a **remaining follow-ups** index (removed thousands of **Done** / obsolete items). Implementation history stays in **[Unreleased]** below and **`tools/prr/AUDIT-CYCLES.md`** (Cycle 71). **`DEVELOPMENT.md`** (Pill output triage) documents the index + append workflow. + +- **Test target paths:** **`testBasenameWithSuffix`** / **`normalizeDoubledTestExtension`** in **`test-path-inference.ts`** — avoids **`foo.test.test.ts`** when the source basename already ends with **`.test`** / **`.spec`** (recovery **`__tests__/`** candidate, prompt-builder mentioned-test paths, and colocated inference). **`PRR_MID_LOOP_NEW_COMMENT_CAP`** (default **45**, **`0`** = unlimited) caps how many new bot threads are enqueued per mid–fix-loop batch; overflow stays on the PR for the next full analysis. **Stale verification** expiry scales with **`floor(iterations / 15)`** instead of **`/ 10`**. On PR **HEAD** change, **`chronic-failure`** dismissals are cleared with **`already-fixed`** unless **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD`**. **llm-api** fixer: **`debug`** logs a response tail when no **``** blocks parse; user message includes formatted response size. + +### Fixed + +- **`LlmApiRunner`:** On API errors (timeout, connection, 4xx/5xx), **`debugPromptError`** writes an **`ERROR`** line to **`prompts.log`** for the same slug as **`PROMPT`** — matches in-process **`LLMClient`** behavior for audits. + +- **`LLMClient.batchVerifyFixes`:** Batches are packed by **prompt character budget** on ElizaCloud (min of **90%** of **`getMaxElizacloudLlmCompleteInputChars`** and **72,000** chars) as well as **`MAX_VERIFY_FIXES_PER_BATCH`**, reducing oversized verify calls that stall or hit connection errors. Transient retry matcher includes **`connection error`** / **`ETIMEDOUT`**. + +- **`LLMClient.complete` (ElizaCloud / in-process):** On terminal failures (connection error, exhausted retries, non-retry 4xx/5xx), **`debugPromptError`** now writes an **`ERROR`** line to **`prompts.log`** for the same slug as **`PROMPT`** — avoids orphan prompts when no **`RESPONSE`** (audit: **`#0022/llm-elizacloud`** + **`output.log`** “Connection error”). **401** path logs before the wrapped error. Removed unreachable post-loop block that never ran. + - **`cloneOrUpdate` / `fetchAdditionalBranches`:** Replaces **`remote.origin`** fetch branches with **`git remote set-branches origin …`** (no **`--add`**) before fetches. **WHY:** **`--add`** accumulates stale branch names in the workdir; **`git fetch origin `** merges CLI refspecs with **`remote.origin.fetch`**, so one old invalid name (e.g. split **New PR:** titles with **`:`**) broke every subsequent fetch until the list was reset. - **split-exec clone:** Fetches only **`target_branch`** as an extra ref when it differs from **`source_branch`** — no longer passes every split **New PR** branch to **`cloneOrUpdate`**. **WHY:** Output branch names are not on **origin** until push; including them caused useless fetch warnings and **`invalid refspec`** when names contained **`:`** (conventional-commit-style titles). diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 70b33b8..3e78aec 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -25,7 +25,7 @@ Audits and agents sometimes conflate these when logs mention “workdir” next ## Pill output triage (`pill-output.md`) -**What it is:** Optional artifact from **pill** after auditing a run’s `output.log`. This repo may keep a copy at **`pill-output.md`** for traceability. +**What it is:** Optional artifact from **pill** after auditing a run’s `output.log`. **`pill-output.md`** is maintained as a **short index** of **remaining** Open / Partial follow-ups (not a full historical dump — **CHANGELOG** [Unreleased], **`tools/prr/AUDIT-CYCLES.md`**, and **git history** hold landed work and older pill text). **Tool-repo scope filter (default on here):** When pill’s **`targetDir`** contains **`tools/prr`**, only improvements whose **`file`** is under **`tools/`**, **`shared/`**, **`tests/`**, **`docs/`**, **`generated/`**, **`.cursor/`**, **`.github/`**, or an allowlisted root file (e.g. **`README.md`**, **`package.json`**) are **appended** to **`pill-output.md`**. Clone-shaped paths (`src/`, `packages/`, `apps/`, …) are dropped (with console / summary notes). **`PILL_TOOL_REPO_SCOPE_FILTER=0`** turns filtering off. **`PILL_TOOL_REPO_SCOPE_FILTER=1`** forces it on even when **`tools/prr`** is absent (rare). @@ -33,7 +33,7 @@ Audits and agents sometimes conflate these when logs mention “workdir” next **Mixed sources:** Items that reference **`src/`** or **`packages/`** usually mean **that other repository**, not prr’s layout — treat as **N/A (external)** when porting fixes into **this** repo. PRR work maps to **`tools/prr/`** and **`shared/`** (e.g. state under **`tools/prr/state`**, not root **`src/state.ts`**). **In this repo’s docs,** lesson examples mostly use **`tools/prr/`** / **`shared/`**; a few **downstream-style** snippets (e.g. eliza **`src/runtime.rs`**) illustrate foreign-repo lesson files — not paths in this tree. -**Per-item status:** Each improvement line includes **`**Status:** …`** and a legend at the top of **`pill-output.md`** (`Done (prr)`, `Partial (prr)`, `Open (prr)`, `N/A (external)`, etc.). +**Per-item status:** When you **append** new pill sections, use **`**Status:** …`** per line; the header of **`pill-output.md`** defines **`Done (prr)`**, **`Partial (prr)`**, **`Open (prr)`**, **`N/A (external)`**, etc. Merge new items into the index and drop **Done** blocks so the file stays short. **WHY document this here:** Contributors otherwise grep for `src/` in pill text and assume missing files are a bug in prr. The status lines record what was implemented in **this** tree vs. what was eliza/downstream-only. @@ -46,7 +46,7 @@ Many **`pill-output.md`** lines use **`src/...`**, **`packages/core/...`**, or * | External theme (pill path) | Action in prr monorepo | |----------------------------|-------------------------| | **`src/config.ts`** skip models | **`shared/constants.ts`** (`ELIZACLOUD_SKIP_MODEL_IDS`), **`PRR_ELIZACLOUD_EXTRA_SKIP_MODELS`**, **`PRR_ELIZACLOUD_INCLUDE_MODELS`**, **`validateAndFilterModels`** warning in **`tools/prr/models/rotation.ts`**. | -| **`src/state.ts`** verified ∩ dismissed | **`tools/prr/state/`** (`StateManager.load`, **`markVerified`** / **`markDismissed`**, overlap warnings in **`analysis.ts`**). | +| **`src/state.ts`** verified ∩ dismissed | **`tools/prr/state/`** — **`transitionIssue`** + **`StateManager.load`**, **`markVerified`** / **`dismissIssue`**, overlap warnings in **`analysis.ts`**. | | **`src/commit.ts`** emoji / noun phrase in commits | **`shared/git/git-commit-message.ts`** (`stripMarkdownForCommit`, **`generateCommitFirstLine`**). | | **`src/lessons.ts`** lesson bloat | **`.prr/lessons.md`** + **`tools/prr/state/lessons-prune.ts`**, **`compactLessons`**, **`prr --tidy-lessons`**. | | **`src/git.ts` / `scanCommittedFixes` / `baseBranch: null`** | **`shared/git/git-commit-scan.ts`**: pass **`prBaseBranch`** from the GitHub PR (wired from **`recoverVerificationState`** in **`run-setup-phase.ts`**) so `git log` uses `origin/..branch` when the clone isn’t `main`/`master`/`develop`. | @@ -61,20 +61,28 @@ Many **`pill-output.md`** lines use **`src/...`**, **`packages/core/...`**, or * | **CodeRabbit SHA ≠ HEAD** | Warn by default; **`PRR_EXIT_ON_STALE_BOT_REVIEW=1`** exits after workdir setup **before clone** (**`run-setup-phase.ts`**). | | **GitHub mergeable false / dirty** | Warn after clone by default; **`PRR_EXIT_ON_UNMERGEABLE=1`** exits **before clone** when **`--merge-base` is not set**. | | **Clear all dismissals on rebase** | Default: only **`already-fixed`** cleared on HEAD change; **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD=1`** clears entire **`dismissedIssues`** (**`state-core.ts`** / **`manager.ts`**). | -| **“path-fragment” in pill** | Same as **`path-unresolved`** in state — see **AGENTS.md** path rules (no separate category value). | +| **“path-fragment” in pill** | Persisted as **`path-fragment`** in state; **`path-unresolved`** is for ambiguous basename resolution — see **AGENTS.md** path rules. | | **merge-tree / latent conflicts (pill #32)** | **`shared/git/git-conflicts.ts`**: after fetch, **`probeLatentMergeConflictsWithOrigin`** runs **`git merge-tree`** for **`HEAD`** vs **`origin/`** and (when **`prBase ≠ prBranch`**) a **second** probe vs **`origin/`** (GitHub mergeable/dirty). **`checkAndSyncWithRemote`** warns for each; **`PRR_MATERIALIZE_LATENT_MERGE`** / **`PRR_MATERIALIZE_LATENT_MERGE_BASE`** materialize the corresponding **`git merge --no-commit`**. Skip: **`PRR_DISABLE_LATENT_MERGE_PROBE`**, **`PRR_DISABLE_LATENT_MERGE_PROBE_BASE`**. | **Fix-loop lifecycle (short):** Setup → clone/sync → **`recoverVerificationState`** (scan `prr-fix:` markers, optional **`prBaseBranch`**) → analysis → solvability/dismissals → fix iterations (push cycles, verification, rotation) → final audit → optional thread replies. **Resolver state file:** **`/.pr-resolver-state.json`** (see **`tools/prr/state/manager.ts`**). Lessons and other artifacts may live under **`/.prr/`** — do not confuse that folder with the resolver JSON path. **Diagram:** **AGENTS.md** (mermaid under “Fix-loop lifecycle”). ### State invariants, paths, and skip-list (operator reference) -**Verified vs dismissed:** A comment ID must not appear in both **verified** (`verifiedFixed` / `verifiedComments`) and **`dismissedIssues`**. **`markVerified`** / **`dismissIssue`** remove the ID from the opposite set; **`StateManager.load`** / **`loadState`** repair legacy overlap (prefer verified). If **RESULTS SUMMARY** still shows overlap at exit, capture **`output.log`** and delete **`.pr-resolver-state.json`** in the workdir — see **README.md** (Troubleshooting). +**Verified vs dismissed:** A comment ID must not appear in both **verified** (`verifiedFixed` / `verifiedComments`) and **`dismissedIssues`**. **`markVerified`** / **`dismissIssue`** (and legacy **`StateManager`** helpers) apply transitions through **`transitionIssue`** (`state-transitions.ts`) so **`verifiedThisSession`** and **`commentStatuses`** stay in sync; **`StateManager.load`** / **`loadState`** still repair legacy overlap (prefer verified). If **RESULTS SUMMARY** still shows overlap at exit, capture **`output.log`** and delete **`.pr-resolver-state.json`** in the workdir — see **README.md** (Troubleshooting). -**HEAD change:** When GitHub PR **head SHA** changes, **verified** state is cleared so fixes are re-checked; **`already-fixed`** dismissals are cleared. **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD=1`** clears **all** dismissals (aggressive, e.g. after a messy rebase). See **AGENTS.md** and **`tools/prr/state/state-core.ts`**. +**HEAD change:** When GitHub PR **head SHA** changes, **verified** state is cleared so fixes are re-checked; **`already-fixed`**, **`chronic-failure`**, and **`stale`** dismissals are cleared by default (others, e.g. **not-an-issue**, are kept unless overlap repair removes them). **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD=1`** clears **all** dismissals (aggressive, e.g. after a messy rebase). See **AGENTS.md** and **`tools/prr/state/manager.ts`** / **`state-core.ts`**. **State repair quick ref (pill / audits):** On load, **`StateManager.load`** / **`loadState`** may log **Cleaned N overlap** or **removed … from verifiedFixed** — that is automatic repair of legacy **`verified ∩ dismissed`**; a one-time message is normal. If **RESULTS SUMMARY** still warns **verified ∩ dismissed** at exit, delete **`/.pr-resolver-state.json`**, keep **`output.log`**, re-run (**README** Troubleshooting). After a messy rebase, consider **`PRR_CLEAR_ALL_DISMISSED_ON_HEAD=1`** once. **`prr --clean-state`** removes state accidentally committed in the workdir. -**Path resolution (review comments):** Extension fallbacks (**`tryResolvePathWithExtensionVariants`** in **`shared/path-utils.ts`**) and fragment handling (**`isReviewPathFragment`**, **`pathDismissCategoryForNotFound`**) keep **one path → one dismissal category**; legacy fragment **`missing-file`** is normalized to **`path-unresolved`** on load. Extend rules in **`path-utils`** / solvability, not ad hoc branches. +**State overlap repair contract (load):** After fragment-path normalization on **`dismissedIssues`**, **`loadState`** (**`tools/prr/state/state-core.ts`**) builds **`verifiedSet`** from **`verifiedFixed`** ∪ **`verifiedComments`** and snapshots **`dismissedIds`** from **`dismissedIssues`**. (1) Remove dismissed rows whose **`commentId`** is in **`verifiedSet`**. (2) Remove **`verifiedFixed`** ids that appear in that **snapshot** **`dismissedIds`**. (3) Remove **`verifiedComments`** rows whose **`commentId`** is in **`dismissedIds`**. Repair logs include up to **15** comment ids per step. **WHY snapshot:** Steps (2)–(3) use the pre-(1) dismissed set so legacy double-membership is scrubbed in one pass; new code should use **`transitionIssue`** only. + +**Path resolution (review comments):** Extension fallbacks (**`tryResolvePathWithExtensionVariants`** in **`shared/path-utils.ts`**) and fragment handling (**`isReviewPathFragment`**, **`pathDismissCategoryForNotFound`**) keep **one path → one dismissal category**; legacy fragment **`missing-file`** or old **`path-unresolved`** for the same path shape is normalized to **`path-fragment`** on load. Extend rules in **`path-utils`** / solvability, not ad hoc branches. **WHY one category per shape:** If one path is sometimes **`missing-file`** and sometimes **`path-fragment`** (e.g. bare **`.d.ts`**), state and solvability can disagree across runs and operators see churn; central rules prevent that. + +**Committed-fix scan cache (`scanCommittedFixes`):** **`shared/git/git-commit-scan.ts`** keeps an in-process map from a composite key to the list of recovered comment ids. **Key fields:** **`workdir`** (absolute clone root) **+** **`branch`** **+** **`headSha`** **+** **`prBaseBranch`** (GitHub base name or empty) **+** **`resolvedBaseLabel`** (the **`origin/…`** ref actually used for **`base..branch`**, or **`n100`** when the scan falls back to **`-n 100`**). **WHY `resolvedBaseLabel`:** The same workdir path and HEAD can still pick a different log range if **`origin/`** appears later or resolution falls back differently — without this segment, cache hits could return wrong ids. **Hit vs miss:** Same key → skip **`git log`**; any segment changes → rescan. **Markers:** Lines are parsed with **`/prr-fix:(\S+)/g`** so multiple ids on one line (squash commits) are all recovered. + +**Meta-review / rollup comments (solvability 0a2):** **`isSummaryOrMetaReviewComment`** (**`tools/prr/workflow/helpers/solvability.ts`**) dismisses status tables, **`### Summary`** with multiple status phrases, and **rollup section headings** in the first ~1.5k chars (e.g. **`### Remaining Issues`**, **`Issues Fixed Since Previous Reviews`**). **WHY:** Those posts summarize many threads; they are not one searchable fix. Cycle 72 showed they could miss the table heuristic yet still enter the fix loop and burn **`couldNotInject`** / single-issue slots. + +**Fixer allowed paths (`isPathAllowedForFix` / `filterAllowedPathsForFix`):** Paths in **`allowedPaths`**, **`TARGET FILE(S)`**, and the llm-api runner allowlist must pass **`isPathAllowedForFix`** in **`shared/path-utils.ts`**. **Default (open):** any repo-relative path is allowed if it is not absolute, does not live under **`node_modules`** or **`dist/`**, and does not contain internal segments (**.cursor**, **.prr**, leading **`root/`**). **WHY open:** A static “first segment must look like `src` or `packages`” rule silently dropped real targets (`agent/`, `cmd/`, `contracts/`, …), so the fixer could not inject file contents and burned iterations (**`tools/prr/AUDIT-CYCLES.md`** Cycle 72). Reviews that cite **adjacent** files (callers, shared utils) need those paths in the allow set even when the PR diff never touched that top-level dir — open default makes that possible without expanding a hardcoded list per customer repo. **WHY we still have strict mode:** Some operators may want the old “reject package-shaped first segments” behavior when comment bodies paste dependency paths; set **`PRR_STRICT_ALLOWED_PATHS=1`**. In strict mode, **`REPO_TOP_LEVEL`** plus **`setDynamicRepoTopLevelDirs`** (called from **`processCommentsAndPrepareFixLoop`** after **`git diff --name-only`**) whitelist first segments from the PR’s changed files. **WHY `isReferencePathInComment` stays separate:** Do not add a path to allowedPaths when the comment only *references* another file (e.g. “duplicates logic in X”) — that guard lives in solvability / CANNOT_FIX handling, not in **`isPathAllowedForFix`**. **Ambiguous basename + PR diff (`resolveTrackedPathWithPrFiles`):** **`resolveTrackedPathDetailed`** may return **`ambiguous`** when the review path is a bare filename and **`git ls-files`** finds several matches. **`resolveTrackedPathWithPrFiles`** (in **`tools/prr/workflow/helpers/solvability.ts`**) intersects those candidates with the PR’s **`changedFiles`** list (**`git diff --name-only`** `origin/...HEAD` from **`processCommentsAndPrepareFixLoop`**). **WHY:** The PR almost always intends the file it modifies; guessing another same-named file would be wrong-file fixes or “path does not exist” skips. If **0** or **2+** candidates lie in **`changedFiles`**, resolution stays unset (conservative). @@ -84,7 +92,9 @@ Many **`pill-output.md`** lines use **`src/...`**, **`packages/core/...`**, or * **AAR “Fixed this session” detail filter:** **`printAfterActionReport`** (**`tools/prr/ui/reporter.ts`**) omits per-line previews for threads whose sanitized body starts with **`### What this adds`** and for **`verifiedComments`** rows with **`autoVerifiedFrom`** (duplicate-of-canonical). **WHY:** Those lines are noise in operator handoff; the header still shows the total verified-this-session count plus a gray line counting omitted threads. -**Model skip list (ElizaCloud / llm-api):** Built-in skip IDs and reasons live in **`shared/constants.ts`** (`ELIZACLOUD_SKIP_MODEL_IDS`, `ELIZACLOUD_SKIP_REASON`). Operators can add removals via **`PRR_ELIZACLOUD_INCLUDE_MODELS`** or extra skips via **`PRR_ELIZACLOUD_EXTRA_SKIP_MODELS`** (see **README** / **`.env.example`**). **Session-level** skip after repeated zero-fix failures: **`PRR_SESSION_MODEL_SKIP_FAILURES`** (**`tools/prr/models/rotation.ts`**). **Session skip reset (pill #847):** **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`** clears session **`skippedModelKeys`** every N fix iterations (see **`maybeResetSessionSkippedModelsAfterFixIteration`** in **`rotation.ts`**, wired from **`push-iteration-loop.ts`**). **Maintainer cadence (ops):** From **`output.log`** **Model Performance**, add persistent **0%** ids to **`constants.ts`** with **`ELIZACLOUD_SKIP_REASON`** and a dated comment; mirror the table in **`docs/MODELS.md`** (“last reviewed” line). There is no automatic PR for the static list. +**AAR Summary bucket union vs “loaded”:** The line **Distinct comment IDs in at least one bucket** is the union of Fixed, Dismissed, Remaining, and exhausted IDs. It can **exceed** **PR comments loaded this run** when dismissed/remaining/exhausted reference IDs not returned in this fetch (state, exhaustion records). It can be **lower** when many fetched rows are only outdated / never queued. **WHY:** Operators misread 41 vs 51 as a bug (output.log audit eliza#6702). + +**Model skip list (ElizaCloud / llm-api):** Built-in skip IDs and reasons live in **`shared/constants.ts`** (`ELIZACLOUD_SKIP_MODEL_IDS`, `ELIZACLOUD_SKIP_REASON`). Operators can add removals via **`PRR_ELIZACLOUD_INCLUDE_MODELS`** or extra skips via **`PRR_ELIZACLOUD_EXTRA_SKIP_MODELS`** (see **README** / **`.env.example`**). **Session-level** skip after repeated zero-fix failures: **`PRR_SESSION_MODEL_SKIP_FAILURES`** (**`tools/prr/models/rotation.ts`**). **Session skip reset (pill #847):** **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`** removes each key from session **`skippedModelKeys`** after N **completed fix iterations since that key was skipped** (`sessionSkippedSinceFixIteration` in **`state-context.ts`**; see **`maybeResetSessionSkippedModelsAfterFixIteration`** in **`rotation.ts`**, wired from **`push-iteration-loop.ts`**). **Maintainer cadence (ops):** From **`output.log`** **Model Performance**, add persistent **0%** ids to **`constants.ts`** with **`ELIZACLOUD_SKIP_REASON`** and a dated comment; mirror the table in **`docs/MODELS.md`** (“last reviewed” line). There is no automatic PR for the static list. **Fetch / concurrent LLM pool:** **`PRR_FETCH_TIMEOUT_MS`** — non-integer values use the default; with **`--verbose`**, a debug line records the bad value (**`parseFetchTimeoutMs`** in **`shared/git/git-conflicts.ts`**). Branch names for fetch use **`isBranchRefSafeForOriginFetch`** (**`git check-ref-format --branch`**). **`fetchOriginBranch`** logs (verbose) why one-shot HTTPS auth was skipped; spawn **`error`** messages are redacted. **`PRR_LLM_TASK_TIMEOUT_MS`** — optional per-slot wall clock for **`runWithConcurrency`** / **`runWithConcurrencyAllSettled`** (**`shared/run-with-concurrency.ts`**); see **README** Troubleshooting. @@ -92,6 +102,36 @@ Many **`pill-output.md`** lines use **`src/...`**, **`packages/core/...`**, or * **Final audit vs queue:** When the final adversarial audit returns **UNFIXED** for an issue that was **verified** earlier in the run, PRR **re-queues** it (removes from verified, fix loop again). **RESULTS SUMMARY** prints **◆ Final audit re-queued: N** next to fixed/dismissed outcome lines (**`auditOverridesThisRun`**); follow-up gray/yellow lines explain recovery vs **Remaining**. **WHY:** Scannable counts (pill-output #18); “safe over sorry” in **README** / **AGENTS.md**. +**Final-audit snippet metadata:** **`getFullFileForAudit`** returns **`fixSiteInWindow`** when the GitHub line or keyword anchor lies inside the shown numbered excerpt (or the whole file fits the budget). **`LLMClient.finalAudit`** skips the UNFIXED truncation-demotion guard when that flag is true so line-centered budget excerpts are not treated like blind head/tail clips (**`issue-analysis-snippet-helpers.ts`**, **`workflow/analysis.ts`**, **`tools/prr/llm/client.ts`**). + +### Unified issue state writes (`transitionIssue`) + +**What:** **`tools/prr/state/state-transitions.ts`** exports **`transitionIssue(ctx, commentId, transition)`** — the single mutation path for **verified**, **dismissed**, **unverified**, and **undismissed** per comment ID. + +**WHY one function:** Output.log / pill audits showed some code paths updated **`verifiedFixed`** or **`dismissedIssues`** without updating **`verifiedThisSession`**, **`commentStatuses`**, or **`lastApplyErrorByCommentId`** / **`applyFailureCountByCommentId`**, or left **verified ∩ dismissed** overlap. Centralizing writes makes new call sites harder to get wrong. + +**Public API:** Prefer **`Verification.markVerified`**, **`Verification.unmarkVerified`**, **`Dismissed.dismissIssue`**, **`Dismissed.undismissIssue`** from workflow code. **`StateManager.markCommentVerifiedFixed`** / **`unmarkCommentVerifiedFixed`** / **`addDismissedIssue`** build a minimal **`StateContext`** (no session **`Set`**) and delegate — **WHY:** Legacy callers stay stable; **`verifiedThisSession`** is owned by the resolver context, not the class. + +**Flags:** **`skipSessionTracking`** on verify — used when **`recoverVerificationState`** marks IDs from **`prr-fix:`** git history so the commit gate does not treat recovery as “newly verified this iteration”. **`forceVerificationRefresh`** — **`markCommentVerifiedFixed`** forces timestamp refresh even in the same iteration. **`replaceExistingDismissal`** — only **`addDismissedIssue`** sets this so a second dismiss replaces the row; procedural **`dismissIssue`** stays idempotent (no duplicate rows). + +**Bulk clears:** **`clearAllVerifications`**, **`clearVerificationCache`**, **`StateManager.load`** overlap repair, and **`state-core`** normalization still manipulate arrays directly — **WHY:** Those are cross-cutting resets or migration repair, not single-comment lifecycle events. + +### Prompt context budgeting (`shared/prompt-budget.ts`) + +**What:** **`computeBudget({ model, reservedChars, divisor? })`** returns **`availableForCode`** from the model’s input ceiling (see **`shared/llm/model-context-limits.ts`**) minus reserved non-code chars, optionally split across N slots. **`fitToBudget(rawFile, anchorLine, maxChars, { commentBody, findKeywordAnchor })`** returns numbered-line excerpts centered on the review line or a keyword anchor. **`computePerFixVerifyCurrentCodeBudget`** + **`truncateNumberedCodeAroundAnchor`** shrink already-numbered “current code” blocks for batch verify prompts. + +**WHY:** Fix-loop audits repeatedly showed inconsistent caps: one path used a huge window and timed out on small-context models; another used a tiny window and produced **STALE** / wrong **YES** because the bug line was not in view. Sharing math avoids chasing seven magic constants when the gateway or default model changes. + +**Consumers (non-exhaustive):** **`issue-analysis-snippet-helpers.ts`** (`buildWindowedSnippet`, **`getFullFileForAudit`**), **`issue-analysis-snippets.ts`** (**`getCodeSnippet`** — char shrink after line-window build), **`tools/prr/llm/client.ts`** batch verify, **`tools/prr/workflow/fix-verification.ts`** **`getCurrentCodeAtLine`**. + +### Canonical paths in workflow (file operations vs display) + +**Rule of thumb:** For **`readFile`**, **`pathTrackedAtGitHead`**, **`getCodeSnippet(path, …)`**, dismissal **`filePath`**, bailout **`remainingIssues`**, use **`getIssuePrimaryPath(issue)`** or **`resolveTrackedPath(workdir, comment.path, comment.body)`** when **`workdir`** is known — same as **`resolvedPath ?? comment.path`** after analysis. + +**WHY:** GitHub’s **`path`** may be a bare basename, wrong extension, or diff-prefixed; the clone resolves to a single tracked path. Using the raw string for disk I/O targets the wrong file or misses it. + +**Intentional raw `comment.path`:** Thread display, **`auditOverridesThisRun.path`** for operator correlation with GitHub, **`shouldSkipFinalAuditLlmForPath(comment.path)`** (fragment / synthetic path gate aligned with solvability), and some **`checkForNewComments`** dismissal rows when **`resolvedPath`** was not yet stored — document with **`// INTENTIONAL`** when adding new sites. + **Technical implications**: - State persistence is critical (resume after interruption) - Workdir preservation by default (inspect before pushing) @@ -158,6 +198,7 @@ PRR’s tree was refactored to **separate concerns without changing intended run | **LLM** | **`tools/prr/llm/client.ts`** (**`LLMClient`**) + **`verification-heuristics.ts`**, **`provider-probes.ts`**, **`error-helpers.ts`** (re-exported from **`client.ts`**) | Probes and pure string/heuristic logic do not need a client instance; one barrel (**`client.js`**) avoids churn for **split-plan**, rotation, and tests. | | **Issue analysis** | **`issue-analysis.ts`** (orchestrator, **`findUnresolvedIssues`**) + **`issue-analysis-snippet-helpers.ts`**, **`issue-analysis-snippets.ts`**, **`issue-analysis-dedup.ts`**, **`issue-analysis-context.ts`** | Dedup, low-level snippets, and STALE/ordering context evolve on different cadences; the orchestrator reads as a pipeline driver. | | **Resolver surface** | **`tools/prr/resolver-proc.ts`** — **only** **`export { … } from './workflow/…'`** | **`resolver.ts`** and integration tests import one facade; implementations stay next to related workflow code (**`bot-wait.ts`**, **`bailout.ts`**, …). | +| **Blast radius** | **`shared/dependency-graph/`** — import scanners (multi-language regex), **`specifier-resolver.ts`** (**async** `fs/promises` probes — **WHY:** thousands of **`existsSync`**-style calls blocked the event loop during graph builds), **`proximity.ts`** (directory + filename stems), **`graph.ts`** (**`buildDependencyGraph`**, **`computeBlastRadius`** — BFS uses an **index queue** instead of **`Array.shift()`** — **WHY:** O(1) dequeue when the frontier is large) | **WHY feature:** Approximate “PR scope” without `tsc`/`go`/`javac` in the clone; union of graph + proximity reduces false negatives vs regex-only. **`main-loop-setup.ts`** builds after **`git diff --name-only`** (try/catch: failure → no map → all in-scope); **`issue-analysis.ts`** annotates **`UnresolvedIssue`** and optional **`PRR_BLAST_RADIUS_DISMISS`**; **`execute-fix-iteration.ts`** intersects **`allowedPathsForInjection`** with **`stateContext.blastRadiusPaths`** (empty intersection → full batch — **WHY:** never starve the fixer of file contents). Analysis cache persists **`blastRadiusPaths`** for injection on cache hit. Disable: **`PRR_DISABLE_BLAST_RADIUS`**. | ### Commit gate and catalog model auto-heal @@ -183,7 +224,7 @@ Review bots sometimes claim a **valid** vendor model id is a “typo” and tell ## Key Files -Paths below are relative to the repo root. PRR-specific code lives under `tools/prr/`; shared modules (logger, git) under `shared/` (pill-output.md #8). +Paths below are relative to the repo root. PRR-specific code lives under `tools/prr/`; shared modules (logger, git) under `shared/` (see **Pill output triage** above for clone vs tool paths). ### Core @@ -192,6 +233,7 @@ Paths below are relative to the repo root. PRR-specific code lives under `tools/ |------|---------| | `tools/prr/index.ts` | CLI entry point, signal handlers | | `tools/prr/cli.ts` | Argument parsing, validation | +| `shared/dependency-graph/` | Blast-radius graph (regex imports + proximity + BFS); see **Architecture — Blast radius** | | `shared/config.ts` | Environment/config loading | | `tools/prr/resolver.ts` | Main orchestration (delegates to workflow/) | | `tools/prr/resolver-proc.ts` | **Facade only** — re-exports workflow APIs for resolver/tests (**WHY:** one stable import surface; see *Codebase structure*) | @@ -199,6 +241,7 @@ Paths below are relative to the repo root. PRR-specific code lives under `tools/ | `shared/timing.ts` | Session/overall timers (**WHY:** separated from logger I/O; imported via **`logger.js`** for most code) | | `shared/token-tracking.ts` | Token phase + usage (**WHY:** same as timing) | | `shared/constants.ts` | Shim → **`shared/constants/index.ts`** barrel (**WHY:** domain-sized constant files; see **AGENTS.md**) | +| `shared/prompt-budget.ts` | **`computeBudget`**, **`fitToBudget`**, batch-verify current-code caps (**WHY:** model-aware shared math for injected code text; see *Prompt context budgeting* above) | ### GitHub Integration @@ -268,7 +311,8 @@ Paths below are relative to the repo root. PRR-specific code lives under `tools/ | File | Purpose | |------|---------| -| `tools/prr/state/state-*.ts` | Per-workdir state modules (verification, iterations, rotation, bail-out) | +| `tools/prr/state/state-transitions.ts` | **`transitionIssue`** — single write path for verified / dismissed / unverified / undismissed (**WHY:** keeps **`verifiedThisSession`**, **`commentStatuses`**, and mutual exclusion consistent; see *Unified issue state writes*) | +| `tools/prr/state/state-*.ts` | Per-workdir state modules (verification, dismissed, iterations, rotation, bail-out) | | `tools/prr/state/lessons-*.ts` | Branch-permanent lessons (~/.prr/lessons/) | | `tools/prr/state/types.ts` | State interfaces (ResolverState, BailOutRecord, ModelPerformance) | @@ -434,6 +478,8 @@ Data flows between subsystems so the next step has the right context. Improving 5. **State:** **`dedupCache.schema === 'dedup-v2'`** required for cache hit. **WHY:** Cross-file phase invalidates pre-schema caches; recompute avoids wrong groupings. +6. **LLM dedup responses and logs:** Per-file **`llmDedup`** asks for **`GROUP: … → canonical …`** lines or the literal **`NONE`** when there are no duplicate groups (see **`LLM_DEDUP_SYSTEM_PROMPT`** in **`issue-analysis-dedup.ts`**). A **`RESPONSE #…/llm-elizacloud → { chars: 4 }`** line in **`output.log`** is very often the four letters **`NONE`** — expected, not an empty or truncated model bug. Check **`prompts.log`** for the body. In-process calls set **`phase: dedup-v2-grouping`** (per file) or **`dedup-v2-cross-file`** (cross-file batch); **`output.log`** PROMPT/RESPONSE debug lines include **`phase`** when set so you can grep separately from verification or fix prompts. + **Note:** Inline GraphQL review comments are **not** merged by **`deduplicateSameBotAcrossComments`** (only synthetic **`ic-*`** from issue comments). Dropping **`ic-*`** ids can orphan resolver state entries; comment-set key changes invalidate dedup cache. ### 4. Model & Tool Rotation with Single-Issue Focus @@ -1778,7 +1824,7 @@ if (hasForbidden) { } ``` -**Why `verifiedComments` with timestamps?** Enables verification expiry. If a verification is N iterations old, re-check it. +**Why `verifiedComments` with timestamps?** Enables verification expiry. If a verification is past the expiry threshold (at least **`VERIFICATION_EXPIRY_ITERATIONS`**, scaled up on long runs via **`getVerificationExpiryForIterationCount`** — **`max(5, floor(totalIterations/15))`**), re-check it. **Why `currentRunnerIndex` and `modelIndices`?** Resume rotation from where we left off. Without this, every restart begins with the same tool/model. diff --git a/README.md b/README.md index e6c165b..9da38cf 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ There are plenty of AI tools that autonomously create PRs, write code, and push **Safe over sorry verification**: When PRR is unsure whether a fix really covers a lifecycle, cache, cleanup, or multi-path issue, it should keep the issue open instead of optimistically marking it fixed. -**What real logs actually showed (and how PRR responds now)**: Audits found genuine problems — not hypotheticals — including **verified ∩ dismissed** overlap (misleading “done”), **tracked file not found** on paths that existed under a different extension or diff prefix, **bare `.d.ts` / fragment** paths misclassified, **0%-success models** burning rotation, and summaries that could look greener than the threads. Today: **state load** and **`markVerified` / `dismissIssue`** enforce mutual exclusivity and log overlap repair; **`tryResolvePathWithExtensionVariants`** + **`stripGitDiffPathPrefix`** (**`shared/path-utils.ts`**) address common `tsconfig.js` / `.tsx` / etc. cases; fragments use **`path-unresolved`** via **`isReviewPathFragment`** / **`pathDismissCategoryForNotFound`**; ElizaCloud uses **`ELIZACLOUD_SKIP_MODEL_IDS`**, **`PRR_SESSION_MODEL_SKIP_FAILURES`**, optional **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`**, and startup warnings when the post-skip rotation is very thin; **RESULTS SUMMARY** excludes dismissed IDs from the verified “fixed” count and **warns** if overlap still appears at exit. **Residual risk**: LLMs and heuristics can still be wrong on edge paths or weak verifiers — use **RESULTS SUMMARY**, **After Action Report**, **`PRR_STRICT_FINAL_AUDIT`** / **`PRR_STRICT_FINAL_AUDIT_UNCERTAIN`**, and **GitHub’s threads** together; after rebases, delete **`.pr-resolver-state.json`** in the clone workdir if numbers disagree with the PR (see Troubleshooting). +**What real logs actually showed (and how PRR responds now)**: Audits found genuine problems — not hypotheticals — including **verified ∩ dismissed** overlap (misleading “done”), **tracked file not found** on paths that existed under a different extension or diff prefix, **bare `.d.ts` / fragment** paths misclassified, **0%-success models** burning rotation, and summaries that could look greener than the threads. Today: **all per-comment verified/dismissed/unverified transitions** go through **`transitionIssue`** (`tools/prr/state/state-transitions.ts`) so **`verifiedThisSession`**, **`commentStatuses`**, and legacy **`verifiedFixed` / `verifiedComments`** stay aligned — **`markVerified` / `dismissIssue`** are thin wrappers; **state load** still repairs legacy overlap and logs repair; **`tryResolvePathWithExtensionVariants`** + **`stripGitDiffPathPrefix`** (**`shared/path-utils.ts`**) address common `tsconfig.js` / `.tsx` / etc. cases; **open allowed-path policy** (**`isPathAllowedForFix`**) avoids silently dropping valid repo files under non-standard top-level dirs and lets reviews target adjacent paths; set **`PRR_STRICT_ALLOWED_PATHS=1`** to restore the legacy first-segment filter; fragments use **`path-unresolved`** via **`isReviewPathFragment`** / **`pathDismissCategoryForNotFound`**; ElizaCloud uses **`ELIZACLOUD_SKIP_MODEL_IDS`**, **`PRR_SESSION_MODEL_SKIP_FAILURES`**, optional **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`**, and startup warnings when the post-skip rotation is very thin; **RESULTS SUMMARY** excludes dismissed IDs from the verified “fixed” count and **warns** if overlap still appears at exit. **Residual risk**: LLMs and heuristics can still be wrong on edge paths or weak verifiers — use **RESULTS SUMMARY**, **After Action Report**, **`PRR_STRICT_FINAL_AUDIT`** / **`PRR_STRICT_FINAL_AUDIT_UNCERTAIN`**, and **GitHub’s threads** together; after rebases, delete **`.pr-resolver-state.json`** in the clone workdir if numbers disagree with the PR (see Troubleshooting). **WHY**: False negatives cost another pass. False positives hide real bugs, create misleading "all fixed" states, and make PR threads look cleaner than the code really is. @@ -50,6 +50,7 @@ There are plenty of AI tools that autonomously create PRs, write code, and push - **Conservative issue detection for distributed bugs**: Lifecycle/cache/leak comments and ordering/history comments now get broader analysis context before PRR decides they are already fixed. *Why*: Some bugs live across declaration, usage, cleanup, and trimming sites; a narrow anchor snippet can make a real issue look resolved. - **Path-resolution categories instead of blanket stale dismissals**: PRR now distinguishes `missing-file` from `path-unresolved`, and carries canonical resolved paths forward when a review cites a basename or truncated path. *Why*: "File no longer exists" was previously hiding very different root causes such as ambiguous basenames, summary-table leakage, and path fragments that only needed repo-path expansion. - **PR-scoped basename disambiguation**: When a bare filename matches **multiple** tracked files, PRR can resolve it to the **single** path that also appears in the PR’s **changed-file list** (diff vs base). *Why*: Issue comments on `foo.ts` should target the copy the PR actually edits, not another package’s same-named file; without this, the fix loop could skip the real path as “not in clone” (see **DEVELOPMENT.md** — path accounting). +- **Canonical path for disk and git**: Where PRR reads files, checks **`git ls-tree`**, or records dismissals for a thread, it prefers **`resolvedPath`** / **`getIssuePrimaryPath`** over raw GitHub **`comment.path`** when the clone resolved a basename or extension variant. Logs and fragment gates may still show the API path so operators match GitHub. *Why*: Same rationale as basename disambiguation — wrong string → wrong file or “unreadable” snippets. - **Dedup cluster + no-change `ALREADY_FIXED`**: If the fixer returns **`RESULT: ALREADY_FIXED`** with no disk edits, PRR dismisses the **whole LLM dedup group** (canonical + merged duplicates), not only the one row left in the queue. *Why*: Otherwise sibling thread IDs stay “open” in GitHub terms while the queue is empty → confusing **BUG DETECTED** repopulate and a false “remaining” handoff. - **After Action Report (fixed this session)**: Boilerplate bodies (e.g. leading **`### What this adds`**) and threads verified only as **duplicates** of a canonical fix are collapsed into a short count line instead of full previews. *Why*: Keeps the AAR readable without hiding how many threads were satisfied this run. - **Catalog-backed dismissal + auto-heal for bogus model-id advice**: Bots with stale training sometimes flag a **valid** OpenAI/Anthropic API id as a “typo” and suggest another valid id. When **both** ids appear in the committed **`generated/model-provider-catalog.json`**, PRR dismisses the comment in solvability and (by default) restores the catalog id inside quoted literals near the review line, then can commit when the run would otherwise skip the fix loop. *Why*: Avoids burning the fixer on bad vendor advice and prevents silent adoption of the wrong model string in code. See [DEVELOPMENT.md](DEVELOPMENT.md) (“Commit gate and catalog model auto-heal”) and [docs/MODELS.md](docs/MODELS.md). @@ -129,6 +130,7 @@ There are plenty of AI tools that autonomously create PRs, write code, and push - **Cross-file dedup (Phase 3)**: When **≥5** issues remain after per-file dedup, one batched cheap-model pass may merge items on **different files** that share the same root-cause fix; canonical gets a **`contextHints`** line listing sibling paths. *Why*: One mistake repeated across services (e.g. CoT + temperature) should not require four separate fix cycles. - **maxFixIterations 0 = unlimited**: `--max-fix-iterations 0` is treated as unlimited (not zero). *Why*: Without this, 0 meant zero iterations and the run did analysis-only with no fix attempts. - **File injection by issue count & dynamic budget**: Injected file contents are chosen by how many issues reference each file (most first); total injection budget is tied to the model’s context cap. *Why*: Puts the injection cap toward files most likely to need search/replace; avoids overshooting small-context or underusing large-context models. +- **Shared prompt budget for injected code text**: Snippet windows, full-file audit excerpts (when too large for one prompt), and per-fix “current code” in batch verification share **`shared/prompt-budget.ts`** — **`computeBudget`** (model ceiling minus reserved wrapper chars, optional per-slot divisor) and **`fitToBudget`** (line-centered excerpts). *Why*: Previously each path used its own char/line caps; they drifted and caused either too little context (weak verifier / false STALE) or prompts that blew small model windows. One module tracks **WHY** each knob exists; see **DEVELOPMENT.md**. - **Batch injection filter (rounds 2+)**: In later fix rounds, file injection is limited to files that still have at least one unfixed issue via `allowedPathsForInjection`. *Why*: Already-fixed files waste context budget; filtering keeps the prompt focused and leaves room for files that need changes. - **Single-issue full file context**: Single-issue fix prompts send the full file (up to 600 lines) instead of a short snippet. *Why*: Models responded INCOMPLETE_FILE/UNCLEAR when given only 15-30 lines; full file gives enough context for correct fixes. - **Rewrite escalation for non-injected files**: Files mentioned in the prompt but not injected (or with repeated S/R failures) are escalated to full-file rewrite. *Why*: When the model never saw file content, search/replace usually fails; asking for the full file avoids matching failures. @@ -188,7 +190,7 @@ The **split-plan** tool analyzes a large PR (diffs, commits, dependencies), disc ### Pill: Program Improvement Log Looker -**pill** audits a project using its output.log and prompts.log (from prr, story, split-exec, or a previous pill run) and appends an improvement plan to **pill-output.md** and **pill-summary.md**. It is analysis-only: no fixers, verification, or commits. *Why*: Logs are evidence of behavior (failures, retries, model rotations); turning that into an actionable plan helps improve the project without duplicating prr’s fix loop. Pill runs on close only when you pass **`--pill`** (prr, story, split-exec, split-plan). See **[tools/pill/README.md](tools/pill/README.md)** for full documentation and WHYs. +**pill** audits a project using its output.log and prompts.log (from prr, story, split-exec, or a previous pill run) and appends an improvement plan to **pill-output.md** and **pill-summary.md**. If you keep **pill-output.md** in this repository, maintain it as a short **index** of open follow-ups and merge new pill output into that index (**DEVELOPMENT.md** — Pill output triage). It is analysis-only: no fixers, verification, or commits. *Why*: Logs are evidence of behavior (failures, retries, model rotations); turning that into an actionable plan helps improve the project without duplicating prr’s fix loop. Pill runs on close only when you pass **`--pill`** (prr, story, split-exec, split-plan). See **[tools/pill/README.md](tools/pill/README.md)** for full documentation and WHYs. ```bash # Or link globally (prr, pill, split-plan, split-exec, and story available) @@ -221,16 +223,34 @@ story --help # PR narrative & changelog | `PRR_ELIZACLOUD_EXTRA_SKIP_MODELS` | Comma-separated ids **added** to the built-in ElizaCloud skip list (`shared/constants.ts` **`ELIZACLOUD_SKIP_MODEL_IDS`**) | | `PRR_ELIZACLOUD_INCLUDE_MODELS` | Comma-separated ids to **remove** from the built-in skip list (re-enable after transient timeouts) | | `PRR_SESSION_MODEL_SKIP_FAILURES` | Skip a model for the rest of the run after N zero-fix verification failures (`0` = off) | -| `PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS` | Every N fix iterations, clear session skips so rotation retries those models (`0` / unset = off) | +| `PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS` | After N fix iterations **since each model was session-skipped**, drop that key so rotation can retry it (`0` / unset = off) | | `PRR_DIMINISHING_RETURNS_ITERATIONS` | Warn after N consecutive iterations with no new verified fixes (`0` = off) | | `PRR_EXIT_ON_STALE_BOT_REVIEW` | `1` / `true` — exit setup **before clone** if bot review SHA ≠ PR HEAD (stale inline comments) | | `PRR_EXIT_ON_UNMERGEABLE` | `1` / `true` — exit setup **before clone** when GitHub reports **`mergeable: false`** or **`mergeableState: dirty`** and **`--merge-base` is not set** | -| `PRR_CLEAR_ALL_DISMISSED_ON_HEAD` | `1` / `true` — on PR HEAD change, clear **all** dismissals (default: only **`already-fixed`**) | +| `PRR_CLEAR_ALL_DISMISSED_ON_HEAD` | `1` / `true` — on PR HEAD change, clear **all** dismissals (default: clear **`already-fixed`** and **`chronic-failure`**; keep other categories) | +| `PRR_STRICT_ALLOWED_PATHS` | `1` / `true` — restore **legacy** first-segment allowlist for fixer paths (static **`REPO_TOP_LEVEL`** + PR **`changedFiles`** roots). **Default (unset):** any repo-relative path passes except absolute, **`node_modules`**, **`dist/`**, **`.cursor` / `.prr` / `root`**. **WHY default open:** audits showed unknown roots like **`agent/`** were stripped from **`allowedPaths`**, blocking injection and wasting iterations; adjacent files in reviews need to be editable without maintaining a global dir list. | +| `PRR_MID_LOOP_NEW_COMMENT_CAP` | Max new bot threads to enqueue **per mid–fix-loop batch** (default **`45`**). **`0`** = unlimited. Defers overflow until the next full comment analysis. | +| `PRR_DISABLE_BLAST_RADIUS` | `1` / `true` — skip blast-radius graph (no deprioritization or injection subset from radius) | +| `PRR_BLAST_RADIUS_DEPTH` | Max graph hops from changed files over imports **and** reverse edges (default **`2`**) | +| `PRR_BLAST_RADIUS_DISMISS` | `1` / `true` — dismiss issues whose primary path is outside the radius as **`out-of-scope`** (default: deprioritize only) | +| `PRR_BLAST_RADIUS_MAX_FILES` | Max tracked source files scanned for the graph (default **`5000`**) | +| `PRR_BLAST_RADIUS_TIMEOUT_MS` | Abort graph build after this many ms (default **`30000`**) | +| `PRR_BLAST_RADIUS_MAX_DIR_NEIGHBORS` | Skip same-directory proximity when a directory has more than this many tracked files (default **`30`**) | | `PRR_DISABLE_LATENT_MERGE_PROBE` | `1` / `true` — skip **`git merge-tree`** dry-merge vs `origin/` during sync (default: probe on) | | `PRR_DISABLE_LATENT_MERGE_PROBE_BASE` | `1` / `true` — skip the **second** dry-merge vs `origin/` (GitHub mergeable/dirty); default runs when base ≠ PR branch | | `PRR_MATERIALIZE_LATENT_MERGE` | `1` / `true` — when the PR-tip probe predicts conflicts, run **`git merge origin/ --no-commit --no-ff`** before pull so LLM conflict resolution can run early | | `PRR_MATERIALIZE_LATENT_MERGE_BASE` | `1` / `true` — when the **PR-vs-base** probe predicts conflicts, run **`git merge origin/ --no-commit --no-ff`** for early LLM resolution | -| `PRR_BOT_LOGIN` | GitHub login for thread-reply idempotency when using `--reply-to-threads` | +| `PRR_BOT_LOGIN` | Optional override for thread-reply idempotency; if unset, PRR uses `GET /user` with your token | +| `PRR_REPLY_TO_THREADS` | `true` / `1` — opt in to posting thread replies (same as CLI **`--reply-to-threads`**) | +| `PRR_THINKING_BUDGET` | Extended thinking token budget for Claude-class models; values above **500,000** clamp with a warning (**`shared/config.ts`**) | +| `PRR_LLM_MIN_DELAY_MS` | Override min ms between ElizaCloud request starts per slot (default **6,000** — see **`shared/constants/models.ts`**) | +| `PRR_LLM_TASK_TIMEOUT_MS` | Optional cap (ms) on concurrent pool tasks (**`0`** = none) | +| `PRR_LLM_API_REQUEST_TIMEOUT_MS` | **llm-api** only: fixed per-request timeout (ms) for non-full-file fix calls; unset = auto **90s → 180s** by prompt size (full-file rewrite stays **180s**) | +| `PRR_CLONE_TIMEOUT_MS` / `PRR_FETCH_TIMEOUT_MS` | Clone / fetch timeouts for large remotes (**AGENTS.md** / **Troubleshooting**) | +| `PRR_DISABLE_CONFLICT_SEPARATOR_REPAIR` | `1` — disable automatic insertion of missing **`=======`** between conflict markers | +| `PRR_DISABLE_MODEL_CATALOG_SOLVABILITY` / `PRR_DISABLE_MODEL_CATALOG_AUTOHEAL` | Disable catalog **0a6** dismissal and/or quoted-literal auto-heal (**AGENTS.md**) | +| `PRR_MODEL_CATALOG_PATH` | Override path to **`model-provider-catalog.json`** (malformed → empty catalog + warn) | +| `PILL_OUTPUT_LOG_PATH` / `PILL_PROMPTS_LOG_PATH` | Standalone **pill** rerun on explicit log paths (**`.env.example`** pill section) | **CLI (related):** pass **`--merge-base`** when GitHub reports the PR as not mergeable / dirty and you want PRR to merge the PR base before the fix loop. @@ -260,6 +280,8 @@ ANTHROPIC_API_KEY=sk-ant-xxxx # PRR_LLM_MIN_DELAY_MS=6000 # Optional: comma-separated ElizaCloud model IDs to include even if on the skip list (e.g. if timeouts were gateway-specific). # PRR_ELIZACLOUD_INCLUDE_MODELS=openai/gpt-4o-mini,anthropic/claude-3.7-sonnet +# Optional: legacy strict first-segment allowlist for fixer paths (see README “Fixer allowed paths”). +# PRR_STRICT_ALLOWED_PATHS=1 ``` **Concurrency (optional)** @@ -269,9 +291,21 @@ ANTHROPIC_API_KEY=sk-ant-xxxx **ElizaCloud skip-list override (optional)** - **`PRR_ELIZACLOUD_INCLUDE_MODELS`** (comma-separated model IDs): Models to *include* in rotation even if they are on the default skip list (e.g. `openai/gpt-4o`, `openai/gpt-4o-mini`, `anthropic/claude-3.7-sonnet`). **WHY:** Those models are skipped by default because audits showed timeouts or 0% fix rate on some gateways; if your environment is different, set this to re-enable them (e.g. `PRR_ELIZACLOUD_INCLUDE_MODELS=openai/gpt-4o-mini`). Full IDs or short names (e.g. `gpt-4o-mini`) both work. +**Fixer allowed paths (optional)** +- **`PRR_STRICT_ALLOWED_PATHS`** (`1` / `true` / `yes`): Enables the **strict** first-segment heuristic in **`shared/path-utils.ts`** **`isPathAllowedForFix`**. **WHY off by default:** Real PRs use top-level dirs outside the old static list; stripping those paths emptied **`allowedPaths`** and **`allowedPathsForInjection`**, so batch fixes could not see the target file (Cycle 72). **WHY on sometimes:** If comment bodies often mention dependency-style paths you do not want in the allowlist, strict mode rejects segments that look like package names unless they appear in **`REPO_TOP_LEVEL`** or in the PR’s **`git diff --name-only`** file list (**`setDynamicRepoTopLevelDirs`** in **`tools/prr/workflow/main-loop-setup.ts`**). Hard denies (absolute paths, **`node_modules`**, **`dist/`**, **`.cursor` / `.prr`**) always apply. See **DEVELOPMENT.md** (State invariants — fixer allowed paths). + +**Blast radius (optional)** +After the PR’s changed files are known, PRR can build a **best-effort** dependency “bubble” (whole-file regex for imports/includes across common languages, plus same-directory and filename-pattern neighbors), then: + +- **Deprioritize** threads whose primary file is outside that set (**`sortByPriority`** — out-of-scope last). +- **Narrow llm-api prompt injection** to paths inside the bubble (**`allowedPathsForInjection`**); the fixer’s **batch allowlist stays full** so edits to legitimately related files are not blocked — **WHY:** Save context on huge repos without the silent “empty injection” failure mode strict path filters caused. +- **Opt-in dismiss** with **`PRR_BLAST_RADIUS_DISMISS=1`** (**`out-of-scope`** + thread reply). Default is deprioritize only. + +**WHY the feature:** Focus order and tokens on files likely related to the PR diff without requiring **`tsc`**, **`go list`**, language servers, or parsers in the clone. **WHY graceful degradation:** If the graph hits **`PRR_BLAST_RADIUS_TIMEOUT_MS`**, **`PRR_BLAST_RADIUS_MAX_FILES`**, or any error, PRR treats **all** issues as in-scope (same as **`PRR_DISABLE_BLAST_RADIUS=1`**) — no silent “fix nothing” path. Implementation uses **async** filesystem probes in **`shared/dependency-graph/specifier-resolver.ts`** so large scans do not block the event loop. See **DEVELOPMENT.md** (Blast radius), **`.env.example`**, **docs/ROADMAP.md** (optional follow-ups). + **Fix-loop hygiene (optional)** - **`PRR_SESSION_MODEL_SKIP_FAILURES`** (integer, default **4**; set **`0`** to disable): After this many cumulative verification failures for a tool/model pair **with no verified fix in this process**, skip that model until the next run; a verified fix clears the skip. **WHY:** Audit runs showed 0%-success models still consuming rotation slots; skipping for the rest of the session saves tokens without editing the static skip list in code. -- **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`** (integer; unset = off): Every N completed fix iterations, clear **session** skips so those models can rotate again **without** restarting PRR. **WHY:** Pill-output #847 — otherwise a model skipped early is dead until process exit; periodic reset gives one more chance after other models have run. +- **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`** (integer; unset = off): Each session-skipped model key is removed after N **subsequent** completed fix iterations (per key, not a single global reset). **WHY:** Pill-output #847 — models skipped early get another chance after the loop has moved on, without clearing fresher skips on the same boundary. - **`PRR_DIMINISHING_RETURNS_ITERATIONS`** (integer, default **10**; set **`0`** to disable): Emit one **warning** when this many consecutive fix iterations produce **no** new verified fixes. **WHY:** Gives operators a visible cue to intervene (merge base, manual edits, or stop) instead of burning API budget quietly. **Clone / fetch (optional)** @@ -307,6 +341,8 @@ On 429 (rate limit), PRR calls `notifyRateLimitHit()` and temporarily halves eff - **`PRR_LLM_TASK_TIMEOUT_MS`:** Optional per pool-task wall-clock cap (ms) for concurrent LLM batches / fix groups (`runWithConcurrency`). Unset or `0` = no cap. Env values below `5,000` ms are clamped to `5,000`. Invalid values disable the cap and log a debug line when verbose. Programmatic override: `runWithConcurrency(tasks, n, { taskTimeoutMs })` (no clamp; for advanced use / tests). - **Partial base-merge resolutions:** When merge with **`origin/`** fails part-way, PRR stores resolved file text in state for the next run. If **`origin/`** moves to a new commit before you re-run, that cache is **cleared** so you don’t reuse content from an old merge attempt. - **Model catalog missing:** If **`generated/model-provider-catalog.json`** is absent, solvability **0a6** (dismiss bogus “model typo” noise) is **skipped** with a one-time console warning — run **`npm run update-model-catalog`** (or set **`PRR_MODEL_CATALOG_PATH`**). +- **Thread replies: many HTTP 422 / “Validation Failed”:** PRR prints a **summary line** (succeeded vs 422 vs other vs skipped). Mass 422 usually means review comments are anchored on an **old commit** (see startup warning when a bot’s review SHA ≠ PR HEAD) or GitHub will not accept a reply on that thread anymore. **Mitigations:** wait for bots to re-review current HEAD, see **`PRR_EXIT_ON_STALE_BOT_REVIEW`** in **AGENTS.md**, and **[docs/THREAD-REPLIES.md](docs/THREAD-REPLIES.md)** (422 section). +- **Pre-commit hooks / staged-file automation:** This **prr** repository does **not** ship bundled git hooks (pill sometimes cites hook paths from **application** repos). See **AGENTS.md** (**Pre-commit hooks**); install hooks in the repo you are developing, not here. ### Why These Defaults? diff --git a/docs/MODELS.md b/docs/MODELS.md index 0d51247..23eda48 100644 --- a/docs/MODELS.md +++ b/docs/MODELS.md @@ -1,6 +1,6 @@ # LLM Models Reference -This doc summarizes **current and legacy models** from official provider docs. Use it when choosing models or updating context limits in **`shared/llm/model-context-limits.ts`** (re-exported from `tools/prr/llm/model-context-limits.ts`). +This doc summarizes **current and legacy models** from official provider docs. Use it when choosing models or updating context limits in **`shared/llm/model-context-limits.ts`** (**`tools/prr/llm/model-context-limits.ts`** re-exports the same symbols for stable imports from workflow code). **Sources (check for latest):** @@ -21,6 +21,18 @@ Vendor doc pages change often; review bots may lag and suggest wrong renames (e. **PRR behavior:** Outdated bot comments that call a catalog-valid id a “typo” and suggest another id are **dismissed** (`assessSolvability`, check **0a6**) and optionally **auto-healed** in the workdir before issue analysis. +### ElizaCloud built-in skip list — when to add or re-enable + +PRR maintains **`ELIZACLOUD_SKIP_MODEL_IDS`** in **`shared/constants/models.ts`** with per-id reasons in **`ELIZACLOUD_SKIP_REASON`** (`timeout` vs `zero-fix-rate`). + +| Criterion | Typical action | +|-----------|------------------| +| **Repeated 504 / gateway timeout** on modest prompts (not a one-off blip) | Add id with reason **`timeout`**; operators may re-enable with **`PRR_ELIZACLOUD_INCLUDE_MODELS`** if the gateway improves. | +| **0% fix rate or systematic verifier/fix failures** in output.log / pill audits | Add id with reason **`zero-fix-rate`**. | +| **Session-only bad behavior** | **`PRR_SESSION_MODEL_SKIP_FAILURES`** + persisted **`sessionSkippedModelKeys`** (see **AGENTS.md**); no catalog change required. | + +**Re-evaluate:** After gateway or model updates, try **`PRR_ELIZACLOUD_INCLUDE_MODELS=`** on a small PR; if stable, propose removing the id from the built-in list in a PR with evidence (log snippet or audit cycle). + | Mechanism | WHY | |-----------|-----| | **Dismiss in solvability** | Stops non-actionable “rename valid A → valid B” advice from entering the LLM analysis/fix queue. | @@ -124,7 +136,7 @@ For full list, deprecations, and pricing see [OpenAI Models](https://developers. - **llm-api / ElizaCloud:** Fallback rotation order is **`DEFAULT_MODEL_ROTATIONS`** in `shared/runners/types.ts`; at runtime the list usually comes from the runner’s **`supportedModels`** (gateway/API discovery) and is **filtered** in `tools/prr/models/rotation.ts` using **`getEffectiveElizacloudSkipModelIds()`** from `shared/constants.ts`. Do not assume the static table in `types.ts` is the exact live order. - **Skip list (authoritative):** **`ELIZACLOUD_SKIP_MODEL_IDS`** in **`shared/constants.ts`**. The table below is a **snapshot for operators**; if it disagrees with the source array, **trust the source file** and update this table when you change skips. -**Last reviewed (skip table):** 2026-03-28 — pill-output / audit follow-up (Qwen 14B default churn, empty-response logging). +**Last reviewed (skip table):** 2026-04-05 — constants sync + env skip-list validation (`PRR_ELIZACLOUD_EXTRA_SKIP_MODELS` / `INCLUDE` malformed tokens ignored with one-time warn). | Model id | Reason in **`ELIZACLOUD_SKIP_REASON`** | Notes | |----------|----------------------------------------|--------| @@ -147,7 +159,14 @@ For full list, deprecations, and pricing see [OpenAI Models](https://developers. - **`getElizaCloudSkipReason(id)`:** ids **not** in **`ELIZACLOUD_SKIP_REASON`** use default **`timeout`** so new skip entries still rotate with a sensible debug line until you assign **`zero-fix-rate`**. - **Operational habit:** When **RESULTS SUMMARY** / Model Performance shows **0%** fix rate for an ElizaCloud id, add it (with reason + comment) to **`shared/constants.ts`** and bump the “last reviewed” line above — same guidance as **AGENTS.md**. +### Re-evaluating skips (maintainer) + +1. **Evidence:** Use **RESULTS SUMMARY** → **Model Performance** in **`output.log`** (per-model success/fail counts). Pill may omit tables when the log is summarized — grep **`Model Performance`** in the raw log for critical runs (**AGENTS.md**). +2. **Timeout vs zero-fix:** **`getElizaCloudSkipReason(id)`** returns **`timeout`** (default) or **`zero-fix-rate`**. Timeout-skipped models may be worth retrying after gateway changes — set **`PRR_ELIZACLOUD_INCLUDE_MODELS`** to the full id (or short suffix per **`getEffectiveElizacloudSkipModelIds`**) for a trial run. +3. **Edit source of truth:** Change **`ELIZACLOUD_SKIP_MODEL_IDS`** and **`ELIZACLOUD_SKIP_REASON`** in **`shared/constants/models.ts`** (barreled as **`shared/constants.js`**). Run **`npm test`**; update the snapshot table above and **Last reviewed**. +4. **Env-only skips:** **`PRR_ELIZACLOUD_EXTRA_SKIP_MODELS`** merges comma-separated ids; **`PRR_ELIZACLOUD_INCLUDE_MODELS`** subtracts. Entries with **`//`**, empty tokens, or invalid characters are **dropped** with a one-time **`console.warn`** — fix the env string if a model you expected is missing from the effective list. + - **Per-run performance:** Success/failure is recorded in state; rotation can prefer better-performing models within the same run. **`PRR_SESSION_MODEL_SKIP_FAILURES`** skips a tool/model for the rest of the process after repeated verification failures with zero verified fixes. -- **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`:** positive integer — every N completed **fix** iterations (inner loop inside a push iteration), clear **session** skips so rotation can retry those models **without** restarting the process. **`0`** / unset = off. **WHY:** Long runs otherwise never revisit a model skipped early for transient failures (pill-output #847). +- **`PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS`:** positive integer — each session-skipped tool/model key is removed after N **subsequent** completed **fix** iterations (counted from when that key was skipped), so rotation can retry it **without** restarting the process. **`0`** / unset = off. **WHY:** Long runs otherwise never revisit a model skipped early for transient failures (pill-output #847); per-key timing avoids clearing fresher skips on a single global boundary. *Provider model tables: last curated from linked docs; verify there for current IDs and pricing.* diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index c26c02d..2fd48d3 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -10,17 +10,29 @@ Items here are potential directions to explore, not committed plans. Each idea i **WHY:** Would reduce API round-trips when many threads are reply candidates; current parallel approach is already fast, so this is low priority unless we see latency issues on very large PRs. -## Single-issue focus: allowedPaths must include issue target +## Blast radius: optional follow-ups -**Status:** **Improved** — **`trySingleIssueFix`** mirrors **`getAllowedPathsForIssues`** for **`getRenameTargetPath`** and **`issueRequestsTests` → `__tests__/…`** (same as batch). **`REPO_TOP_LEVEL`** includes common e2e roots (`e2e`, `playwright`, `cypress`, `fixtures`, `integration`, `wdio`). Empty-after-filter still falls back to **`[primaryPath]`**. +**Status:** **Shipped** — regex import/include graph + directory + filename proximity, BFS both directions, issue annotation, optional dismiss, injection subset. See **CHANGELOG [Unreleased]** and **DEVELOPMENT.md** (Architecture — Blast radius). -**Idea (ongoing):** Ensure single-issue focus always passes a runner-compatible allow set. When `allowedPaths` is empty or filtered to empty (e.g. path under a top-level not in `REPO_TOP_LEVEL`), the runner rejects every change and wrong-file counter can fire falsely. +**Remaining (exploration only):** -**WHY:** Pill audit (output.log) showed `expectedPaths: []` for single-issue fixes; the fixer correctly edited the target file but the runner rejected edits. We add top-level dirs as audits surface them, fallback to `[primaryPath]` when filter yields empty in recovery, and do not count edits to the issue's target as wrong-file. +- **Parallel specifier resolution per file:** **`Promise.all`** over **`extractImports`** results for one source file could cut wall time; **trade-off:** burst of concurrent **`access`** / **`stat`** calls (FD pressure, noisy on slow/network FS). **WHY consider:** Very large monorepos with dense import lists. +- **Cooperative yield:** **`setImmediate`** (or batch **`await`**) every N scanned files so a single graph build cannot monopolize the microtask queue end-to-end. **WHY:** Marginal for typical sizes; helps if **`PRR_BLAST_RADIUS_MAX_FILES`** is raised sharply. +- **Recall without parsers:** e.g. read **`composer.json`** / **`tsconfig`** paths only if audits show systematic false negatives — **trade-off:** more config surface and maintenance; current design prefers regex + proximity over toolchain coupling. + +**WHY this section:** Operators and agents asked “what’s next” after the feature landed; these are **not** commitments — safe defaults and graceful degradation already cover most runs. + +## Single-issue focus + fixer allowed paths (non-standard repo roots) + +**Status:** **Done** (see **CHANGELOG [Unreleased]** — open allowed-path policy, Cycle 72). **`isPathAllowedForFix`** defaults to **open**: hard deny only (absolute, `node_modules`, `dist/`, `.cursor`, `.prr`, `root/` segment). **`PRR_STRICT_ALLOWED_PATHS=1`** restores the legacy first-segment heuristic; **`setDynamicRepoTopLevelDirs`** (from PR **`git diff --name-only`**) still extends **`REPO_TOP_LEVEL`** in strict mode. **`trySingleIssueFix`** continues to mirror **`getAllowedPathsForIssues`** for rename targets, tests, etc. + +**WHY (original pain):** Output.log audits showed **`expectedPaths: []`** / injection filtered when the primary path lived under a top-level dir not in the static list — runner rejected edits even though the issue targeted a real file. Open default plus docs removes the need to grow **`REPO_TOP_LEVEL`** for every customer layout; adjacent files in reviews remain editable without being in the PR diff’s first segment set. + +**Remaining (optional):** If strict mode users still see edge cases, consider logging when strict mode drops a path (debug-only) to tune **`REPO_TOP_LEVEL`** without flipping default behavior. ## State consistency: verifiedFixed vs dismissedIssues (mutual exclusivity) -**Status:** Largely **done** — `markVerified` / `dismissIssue` and load paths enforce mutual exclusivity; `verifiedComments` included in overlap cleanup; `already-fixed` dismissals clear on HEAD change. **CHANGELOG [Unreleased]** has the full list. +**Status:** **Done** for the write path — all comment lifecycle mutations that add/remove verified or dismissed rows go through **`transitionIssue`** (`tools/prr/state/state-transitions.ts`); **`markVerified`**, **`dismissIssue`**, **`StateManager`** helpers delegate there so **`verifiedThisSession`**, **`commentStatuses`**, and apply-failure fields stay aligned. Load paths still repair legacy overlap; **`already-fixed`** dismissals clear on HEAD change. See **CHANGELOG [Unreleased]** and **DEVELOPMENT.md**. **Remaining (optional):** Broader “clear all dismissals on HEAD change” (trade-off vs. stable not-an-issue dismissals); explicit migration notes for very old state files; extra tests if new edge cases appear. @@ -45,18 +57,6 @@ From root **`pill-output.md`** triage — **prr** scope only: **WHY (original):** Wrong-file lessons keyed under the same path blocked the fixer; load-time prune + prompt-time filter reduce reliance on single-issue-only workarounds. -## Blast radius and focus masking - -**Idea:** Use the PR diff to compute a "blast radius" (changed files plus their upstream dependencies and downstream dependents), then focus the fix loop on that set and effectively ignore or deprioritize the rest. - -- **Upstream:** files that changed files import/depend on. -- **Downstream:** files that import/depend on changed files. -- **Use:** Restrict which issues we process and which files appear in the fix prompt so the model and tooling focus on the scope of the PR; mask off out-of-scope code. - -**WHY:** Audits show waste when the fix loop processes comments on files outside the PR's logical scope or when the prompt is diluted by many unrelated files. Focusing on blast radius reduces prompt size, improves fix accuracy, and avoids cross-file confusion (e.g. wrong-file exhaust). Tradeoff: some valid cross-file fixes might be deprioritized; depth limit and "changed files only" fallback keep scope reasonable. - -Would require: PR changed-file list (`git diff base...HEAD --name-only`), a dependency graph (e.g. TS/JS import/require parsing), radius computation (depth limit), and integration into issue filtering and prompt building. Start with TS/JS; fallback to "changed files only" when no graph is available. - ## Final audit: deleted files and outdated threads **Status (partial):** **`runFinalAudit`** now (1) skips the adversarial LLM when the full-file snippet is **`(file not found or unreadable)`** and **`git ls-tree HEAD -- path`** shows the path is **not** at HEAD — synthetic **FIXED (git check)**; (2) **L1 tie-break:** if the model still says **UNFIXED** for a previously verified comment in that situation, we **keep verified** instead of re-queueing; (3) **Rule 6** post-check uses the same **`pathTrackedAtGitHead`** helper (non-empty `ls-tree` output = still tracked) instead of relying on `ls-tree` throwing; (4) **outdated** threads: a short **`[GitHub: thread OUTDATED …]`** prefix is prepended to the review text in the audit prompt. **`tools/prr/workflow/helpers/git-path-at-head.ts`**, **`tests/git-path-at-head.test.ts`**. @@ -87,9 +87,15 @@ From [tools/prr/AUDIT-CYCLES.md](../tools/prr/AUDIT-CYCLES.md) consolidated find **WHY:** Current runs show high dismissal rates (e.g. 62% EXISTING for already-fixed, many stale/file-unchanged). That implies the generator often flags issues that the judge then dismisses. Closing the loop would reduce tokens (fewer issues to analyze/fix), improve signal-to-noise for humans, and make PRR's behavior more predictable. Tradeoff: requires generator support or a separate "dismissal → analysis prompt" pipeline; we already persist dismissal reasons, so export and pattern analysis are low-hanging first steps. +## Prompt / snippet budgeting (consolidation) + +**Status:** **Done** for the shared layer — **`shared/prompt-budget.ts`** (`computeBudget`, `fitToBudget`, verify-batch helpers) replaces ad hoc per-call char caps for windowed snippets, full-file audit excerpts, and batch-verify “current code” truncation. **WHY:** One place to tune model limits vs reserved prompt overhead; reduces audit-cycle drift between paths. + +**Remaining (optional):** Thread an explicit **`modelId`** through every **`getCodeSnippet`** call site if we want fix-loop snippets to track the active fixer model (today some paths default to the generic ceiling). + ## Further structural follow-ups (optional) -**Idea A — Slim `LLMClient`:** Extract internal prompt builders (e.g. final-audit batching, conflict sub-prompts) into dedicated modules or a thin mixin, keeping **`complete()`** and transport as the single network entry. **WHY:** `client.ts` remains large; smaller units reduce review load and make provider-specific quirks easier to test in isolation. **Tradeoff:** Touch a hot file; needs careful re-export or import churn. +**Idea A — Slim `LLMClient`:** **Partial** — **`llm-client-transport.ts`** and **`llm-client-types.ts`** split transport/types from **`client.ts`**; final-audit batching, conflict prompts, and other large builders may still move to dedicated modules. **WHY (remaining):** `client.ts` is still a hot file; smaller units reduce review load. **Tradeoff:** Further splits need careful re-export or import churn. **Idea B — `shared/` GitHub + LLM surfaces:** Move a stable **`GitHubAPI`** (or narrower port) to **`shared/github/`** and core **`LLMClient`** (transport + **`complete`**) to **`shared/llm/`** (names TBD) so **split-plan**, **split-exec**, and **story** depend only on **`shared/`** instead of **`tools/prr/`**. **WHY:** Clear package boundaries and fewer accidental PRR→tool cycles. **Tradeoff:** Large migration; wait until GitHub/LLM module APIs stop churning (see **AGENTS.md** — *Future shared migration*). diff --git a/docs/THREAD-REPLIES.md b/docs/THREAD-REPLIES.md index 2c0d448..2185f52 100644 --- a/docs/THREAD-REPLIES.md +++ b/docs/THREAD-REPLIES.md @@ -28,14 +28,14 @@ We only know the full set of dismissals at end of run (after audit, bail-out, et ## WHY only some dismissal categories get a reply -We reply for: `already-fixed`, `stale`, `not-an-issue`, `false-positive`, `remaining`, `exhausted`, `path-unresolved`, `missing-file`, `duplicate`, `file-unchanged` (see **`dismissedCategoriesWithReply()`** / **`DISMISSED_CATEGORIES_BASE`** in `tools/prr/workflow/thread-replies.ts`). By default we do **not** reply for `chronic-failure` (and other categories omitted from that set). Set **`PRR_THREAD_REPLY_INCLUDE_CHRONIC_FAILURE=1`** (or `true`) to also reply on **`chronic-failure`** threads with a short batch-dismissal line. +We reply for: `already-fixed`, `stale`, `not-an-issue`, `false-positive`, `remaining`, `exhausted`, `path-unresolved`, `path-fragment`, `missing-file`, `duplicate`, `file-unchanged`, `out-of-scope` (see **`dismissedCategoriesWithReply()`** / **`DISMISSED_CATEGORIES_BASE`** in `tools/prr/workflow/thread-replies.ts`). By default we do **not** reply for `chronic-failure` (and other categories omitted from that set). Set **`PRR_THREAD_REPLY_INCLUDE_CHRONIC_FAILURE=1`** (or `true`) to also reply on **`chronic-failure`** threads with a short batch-dismissal line. -**WHY:** Clear dismissals (`already-fixed`, `stale`, `not-an-issue`, `false-positive`) give the reviewer a definitive outcome. `remaining` / `exhausted` get a short “Could not auto-fix; manual review recommended.” so threads are not left silent after we stop the fix loop. `path-unresolved` / `missing-file` / `duplicate` / `file-unchanged` get a specific line so the thread shows why PRR stopped. **`chronic-failure` is excluded by default:** those threads are bulk-dismissed to save tokens without a full fix cycle on each one — replying can add noise; operators who want visible closure on every thread can opt in with the env var above. +**WHY:** Clear dismissals (`already-fixed`, `stale`, `not-an-issue`, `false-positive`) give the reviewer a definitive outcome. `remaining` / `exhausted` get a short “Could not auto-fix; manual review recommended.” so threads are not left silent after we stop the fix loop. `path-unresolved` / `path-fragment` / `missing-file` / `duplicate` / `file-unchanged` get a specific line so the thread shows why PRR stopped. **`out-of-scope`** (opt-in via **`PRR_BLAST_RADIUS_DISMISS=1`**) gets “Outside PR scope — manual review recommended.” **`chronic-failure` is excluded by default:** those threads are bulk-dismissed to save tokens without a full fix cycle on each one — replying can add noise; operators who want visible closure on every thread can opt in with the env var above. ## WHY in-run and cross-run idempotency - **In-run:** A single `repliedThreadIds` set is shared across commit-and-push (fixed replies) and final cleanup (dismissed replies). We never post twice to the same thread in one run. -- **Cross-run:** If `PRR_BOT_LOGIN` is set, we fetch each candidate thread’s comments and skip posting when that login already commented. **WHY:** Re-runs (e.g. after manual edits) would otherwise post duplicate “Fixed in …” or “Dismissed: …” for threads we already replied to. Checking by bot login makes re-runs safe and avoids spamming threads. +- **Cross-run:** We need a GitHub **login** to match against thread comment authors. If **`PRR_BOT_LOGIN`** is set, we use it; otherwise we call **`GET /user`** with the same token (see **`GitHubAPI.getAuthenticatedLogin`**) when there are reply candidates. We then fetch each candidate thread’s comments and skip posting when that login already commented. **WHY:** Re-runs (e.g. after manual edits) would otherwise post duplicate “Fixed in …” or “Dismissed: …” for threads we already replied to. **`PRR_BOT_LOGIN`** remains useful to override when the token identity is not the account that posts review replies (rare). ## WHY batch idempotency check @@ -65,10 +65,22 @@ Some “comments” are synthetic: we create them from issue comments (e.g. bot | `--no-reply-to-threads` | Disable (default). | | `PRR_REPLY_TO_THREADS=true` | Enable via env (e.g. CI). | | `--resolve-threads` | After replying, resolve the thread (collapse with checkmark). Default off. | -| `PRR_BOT_LOGIN` | GitHub login of the bot that posts replies. When set, we skip threads that already have a comment from this login (cross-run idempotency). | +| `PRR_BOT_LOGIN` | Optional override: GitHub login for cross-run idempotency. If unset, PRR uses the token’s login from **`GET /user`** when there are threads to reply to. | + +## 422 Validation Failed and retries + +On **`pulls.createReplyForReviewComment`**, GitHub may return **422** with structured **`errors`** (e.g. **`PullRequestReviewComment`** / **`in_reply_to`**) when the thread is not replyable (stale diff, wrong anchor). PRR logs the full response body in **debug** and **does not** send the short fallback body in that case — a shorter string would 422 the same way and wastes an API call. Plain **422** without those fields still gets one retry with the short fallback (e.g. `Addressed.`). Reply bodies are clamped to a safe max length before send. After several consecutive batches where **every** reply in the batch returns **422**, PRR stops attempting further replies for that run (see **`postThreadReplies`**). + +### User-visible summary (422 storms) + +At the end of **`postThreadReplies`**, PRR prints a **single line** with **`formatNumber`**: how many attempts **succeeded**, how many failed with **422**, how many failed for **other** reasons, and how many threads were **not attempted** because posting stopped early (repeated all-422 batches). When the stop threshold triggers, a **yellow** line also states how many were posted **so far** and how many remain **skipped**, plus a short pointer to this doc. + +**Common causes of mass 422:** (1) Review bots commented on an **older commit** than the PR head — inline anchors no longer match the current diff (PRR warns when CodeRabbit’s review SHA ≠ HEAD; wait for a re-review or use **`PRR_EXIT_ON_STALE_BOT_REVIEW=1`** to fail fast before clone). (2) Threads **resolved or outdated** on GitHub so REST reply is rejected. (3) Wrong **`databaseId`** / thread state (rare if GraphQL ingestion is consistent). + +**Mitigations:** Re-run after bots catch up; avoid **`--reply-to-threads`** on huge PRs until reviews target HEAD; ensure the token can post PR review comments; set **`PRR_BOT_LOGIN`** if cross-run idempotency should match a specific bot account. ## See also - **AGENTS.md** — “PRR thread replies” for a short reference. - **README.md** — “Thread replies (GitHub feedback)” in Features and CLI options table. -- **Code:** `tools/prr/workflow/thread-replies.ts`, `tools/prr/github/api.ts` (`replyToReviewThread`, `resolveReviewThread`, `getThreadComments`). +- **Code:** `tools/prr/workflow/thread-replies.ts`, `tools/prr/github/api.ts` (`replyToReviewThread`, `resolveReviewThread`, `getThreadComments`, `getAuthenticatedLogin`). diff --git a/shared/config.ts b/shared/config.ts index b9fccfb..3e2e0f6 100644 --- a/shared/config.ts +++ b/shared/config.ts @@ -175,13 +175,39 @@ export function loadConfig(): Config { const verifierModelRaw = process.env.PRR_VERIFIER_MODEL?.trim(); const finalAuditModelRaw = process.env.PRR_FINAL_AUDIT_MODEL?.trim(); const splitPlanModelRaw = process.env.SPLIT_PLAN_LLM_MODEL?.trim(); + + const llmModelRaw = getEnvOrDefault('PRR_LLM_MODEL', defaultModel); + let llmModel = llmModelRaw; + if (!isValidModelName(llmModel)) { + console.warn( + chalk.yellow( + `PRR_LLM_MODEL is not a valid model id (${llmModelRaw.slice(0, 80)}${llmModelRaw.length > 80 ? '…' : ''}) — falling back to default for provider.`, + ), + ); + llmModel = defaultModel; + } + + const optionalModel = (envKey: string, raw: string | undefined): string | undefined => { + const t = raw?.trim(); + if (!t) return undefined; + if (!isValidModelName(t)) { + console.warn( + chalk.yellow( + `Ignoring ${envKey} — not a valid model id (${t.slice(0, 80)}${t.length > 80 ? '…' : ''}).`, + ), + ); + return undefined; + } + return t; + }; + const config: Config = { githubToken: getEnvOrThrow('GITHUB_TOKEN'), llmProvider, - llmModel: getEnvOrDefault('PRR_LLM_MODEL', defaultModel), - verifierModel: verifierModelRaw && verifierModelRaw.length > 0 ? verifierModelRaw : undefined, - finalAuditModel: finalAuditModelRaw && finalAuditModelRaw.length > 0 ? finalAuditModelRaw : undefined, - splitPlanModel: splitPlanModelRaw && splitPlanModelRaw.length > 0 ? splitPlanModelRaw : undefined, + llmModel, + verifierModel: optionalModel('PRR_VERIFIER_MODEL', verifierModelRaw), + finalAuditModel: optionalModel('PRR_FINAL_AUDIT_MODEL', finalAuditModelRaw), + splitPlanModel: optionalModel('SPLIT_PLAN_LLM_MODEL', splitPlanModelRaw), defaultTool: validateTool(getEnvOrDefault('PRR_TOOL', 'auto')), workdirBase: join(homedir(), '.prr', 'work'), anthropicThinkingBudget: thinkingBudget, @@ -233,8 +259,12 @@ export function loadConfig(): Config { * Pattern for validating model names. * Allows alphanumeric, dots, underscores, hyphens, and forward slashes * (for provider-prefixed names like "anthropic/claude-3-opus"). + * Rejects `//` and other ambiguous slash runs. */ -export const MODEL_NAME_PATTERN = /^[A-Za-z0-9._\/-]+$/; +export const MODEL_NAME_PATTERN = /^(?!.*\/\/)[A-Za-z0-9._\/-]+$/; + +/** Max length for env-supplied model ids (defense against garbage / paste errors). */ +export const MODEL_NAME_MAX_LENGTH = 200; /** * Validate that a model name is safe and well-formed. @@ -246,6 +276,7 @@ export const MODEL_NAME_PATTERN = /^[A-Za-z0-9._\/-]+$/; * @returns True if model name matches expected pattern */ export function isValidModelName(model: string): boolean { + if (!model || model.length > MODEL_NAME_MAX_LENGTH) return false; return MODEL_NAME_PATTERN.test(model); } diff --git a/shared/constants/fix-loop.ts b/shared/constants/fix-loop.ts index 9ec44be..ea02b84 100644 --- a/shared/constants/fix-loop.ts +++ b/shared/constants/fix-loop.ts @@ -31,6 +31,20 @@ export const CHRONIC_FAILURE_THRESHOLD = typeof process !== 'undefined' && proce ? Math.max(1, parseInt(process.env.PRR_CHRONIC_FAILURE_THRESHOLD, 10) || 5) : 5; +/** + * Max new bot review threads to enqueue in one mid-fix-loop batch (PRR_MID_LOOP_NEW_COMMENT_CAP). + * WHY: Each push triggers more bot comments; unbounded enqueue refills the queue faster than fixes land. + * 0 = unlimited. Default 45. + */ +export function getMidLoopNewCommentCap(): number { + const raw = typeof process !== 'undefined' ? process.env.PRR_MID_LOOP_NEW_COMMENT_CAP?.trim() : undefined; + if (raw === undefined || raw === '') return 45; + const n = parseInt(raw, 10); + if (!Number.isFinite(n)) return 45; + if (n <= 0) return 0; + return n; +} + /** * Number of "tool modified wrong files" lessons for an issue before we mark as remaining. * WHY: When the fix requires a different file than the comment's path (e.g. duplicate interface in commit.ts diff --git a/shared/constants/models.ts b/shared/constants/models.ts index 84acf80..64e83d6 100644 --- a/shared/constants/models.ts +++ b/shared/constants/models.ts @@ -47,6 +47,11 @@ export type ElizaCloudSkipReason = 'timeout' | 'zero-fix-rate'; * Model IDs to skip when using ElizaCloud, with reason. WHY: Audits showed these models * 500/timeout repeatedly or had 0% fix rate. Timeout-only models may be retried after cooldown * (transient gateway issues); zero-fix-rate are skipped for audit (pill-output #2). + * + * **Maintainer refresh:** When **RESULTS SUMMARY → Model Performance** shows a model at **0%** verified + * fixes across meaningful attempts, add it here with **`ELIZACLOUD_SKIP_REASON`** **`zero-fix-rate`** and a + * short evidence comment. **Last reviewed:** 2026-04-08 — no new static entries from recent CI conflict + * runs (client **90s** timeouts on bulk **llm-api** are operator/config, not automatic skip-list adds). */ export const ELIZACLOUD_SKIP_MODEL_IDS: readonly string[] = [ 'openai/gpt-5.2-codex', @@ -89,12 +94,27 @@ export function getElizaCloudSkipReason(modelId: string): ElizaCloudSkipReason { let loggedElizacloudIncludeModels = false; let loggedElizacloudExtraSkip = false; +let loggedElizacloudExtraSkipInvalid = false; + +/** Skip-list ids must be sane strings (no `//`, bounded length) — avoids junk env breaking merges. */ +function isPlausibleSkipListModelId(id: string): boolean { + if (!id || id.length > 200 || id.includes('//')) return false; + return /^[A-Za-z0-9._\/-]+$/.test(id); +} export function getEffectiveElizacloudSkipModelIds(): string[] { const extraRaw = process.env.PRR_ELIZACLOUD_EXTRA_SKIP_MODELS?.trim(); - const extraIds = extraRaw + const extraParsed = extraRaw ? extraRaw.split(',').map((s) => s.trim()).filter(Boolean) : []; + const extraDropped = extraParsed.filter((id) => !isPlausibleSkipListModelId(id)); + const extraIds = extraParsed.filter((id) => isPlausibleSkipListModelId(id)); + if (extraDropped.length > 0 && !loggedElizacloudExtraSkipInvalid) { + loggedElizacloudExtraSkipInvalid = true; + console.warn( + `PRR_ELIZACLOUD_EXTRA_SKIP_MODELS: ignored ${extraDropped.length.toLocaleString()} malformed id(s) (empty, //, or invalid chars).`, + ); + } const mergedBase = [...new Set([...ELIZACLOUD_SKIP_MODEL_IDS, ...extraIds])]; if (extraIds.length > 0 && !loggedElizacloudExtraSkip) { loggedElizacloudExtraSkip = true; @@ -105,7 +125,12 @@ export function getEffectiveElizacloudSkipModelIds(): string[] { const raw = process.env.PRR_ELIZACLOUD_INCLUDE_MODELS?.trim(); if (!raw) return mergedBase; - const include = new Set(raw.split(',').map(s => s.trim()).filter(Boolean)); + const include = new Set( + raw + .split(',') + .map((s) => s.trim()) + .filter((s) => s && isPlausibleSkipListModelId(s)), + ); const match = (id: string) => include.has(id) || include.has(id.replace(/^(openai|anthropic|google)\//, '')); const filtered = mergedBase.filter(id => !match(id)); if (!loggedElizacloudIncludeModels) { diff --git a/shared/constants/polling.ts b/shared/constants/polling.ts index c92683d..fa40814 100644 --- a/shared/constants/polling.ts +++ b/shared/constants/polling.ts @@ -64,3 +64,31 @@ export const LLM_REQUEST_TIMEOUT_MS = 90_000; // 90 seconds * so the request can complete before the gateway returns 504. */ export const LLM_REQUEST_TIMEOUT_FULL_FILE_MS = 180_000; // 3 minutes + +/** + * Client-side wait for each llm-api HTTP attempt (wrapped by with504Retry in shared/runners/llm-api.ts). + * Full-file rewrite prompts use {@link LLM_REQUEST_TIMEOUT_FULL_FILE_MS} always. + * + * Large search/replace prompts (e.g. 100k+ chars) often need more than 90s wall time; output.log audits + * showed Opus timing out at 90s with ~137k input while `isFullFileRewrite` was false. + * + * **Override:** set **`PRR_LLM_API_REQUEST_TIMEOUT_MS`** to a positive integer (ms) to use a fixed cap for + * non-full-file fix calls (skips size tiers below). + */ +export function getLlmApiRequestTimeoutMs(promptCharCount: number, isFullFileRewrite: boolean): number { + if (isFullFileRewrite) { + return LLM_REQUEST_TIMEOUT_FULL_FILE_MS; + } + const raw = process.env.PRR_LLM_API_REQUEST_TIMEOUT_MS?.trim(); + if (raw) { + const n = parseInt(raw, 10); + if (Number.isFinite(n) && n > 0) { + return n; + } + } + let ms = LLM_REQUEST_TIMEOUT_MS; + if (promptCharCount > 60_000) ms = Math.max(ms, 120_000); + if (promptCharCount > 100_000) ms = Math.max(ms, 150_000); + if (promptCharCount > 140_000) ms = Math.max(ms, 180_000); + return Math.min(ms, LLM_REQUEST_TIMEOUT_FULL_FILE_MS); +} diff --git a/shared/constants/runners.ts b/shared/constants/runners.ts index f0ff760..89edad8 100644 --- a/shared/constants/runners.ts +++ b/shared/constants/runners.ts @@ -9,9 +9,10 @@ export const MAX_WHITESPACE_IN_RUNNER_OUTPUT = 1000; /** - * Every N completed fix iterations (within a push iteration’s inner loop), clear session-skipped model keys - * so rotation can retry them. **0** = disabled (default). **WHY:** Pill-output #847 — long runs otherwise - * never revisit a model skipped early for transient failures; next process run was the only retry. + * After N completed fix iterations **since each key was added to** session **`skippedModelKeys`**, remove that + * key so rotation can retry that model. Checked at the start of each fix iteration. **0** = disabled (default). + * **WHY:** Pill-output #847 — long runs otherwise never revisit a model skipped early; per-key timing avoids + * wiping fresher skips on one global boundary. */ export function getSessionModelSkipResetAfterFixIterations(): number { const raw = process.env.PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS?.trim(); diff --git a/shared/constants/snippets.ts b/shared/constants/snippets.ts index 628e889..0d3bf52 100644 --- a/shared/constants/snippets.ts +++ b/shared/constants/snippets.ts @@ -20,3 +20,9 @@ export const CODE_SNIPPET_CONTEXT_AFTER = 30; * Default line range when only start line is provided (for bugbot comments). */ export const DEFAULT_LINE_RANGE_SIZE = 20; + +// Per-model **character** budgets for injected file text live in `shared/prompt-budget.ts` +// (`computeBudget`, `fitToBudget`). **WHY keep these line constants:** `getCodeSnippet` still +// builds an initial window from `CODE_SNIPPET_CONTEXT_*` and `MAX_SNIPPET_LINES`, then **shrinks** +// with `computeBudget` if the numbered slice is still too large — line caps anchor on review +// structure; char caps respect the active model gateway. diff --git a/shared/constants/verification.ts b/shared/constants/verification.ts index 1927fcc..fa0b36b 100644 --- a/shared/constants/verification.ts +++ b/shared/constants/verification.ts @@ -11,10 +11,10 @@ export const VERIFICATION_EXPIRY_ITERATIONS = 5; /** * Scale stale-verification threshold with total iteration count. * WHY: At 131 iterations a fixed threshold of 5 causes 40+ re-checks per run (time/tokens). - * Using max(5, floor(iterations/10)) keeps re-checks bounded on long-running PRs. + * Using max(5, floor(iterations/15)) keeps re-checks rarer on very long runs (output.log audit: /10 caused large stale batches). */ export function getVerificationExpiryForIterationCount(iterationCount: number): number { - return Math.max(VERIFICATION_EXPIRY_ITERATIONS, Math.floor(iterationCount / 10)); + return Math.max(VERIFICATION_EXPIRY_ITERATIONS, Math.floor(iterationCount / 15)); } /** diff --git a/shared/dependency-graph/graph.ts b/shared/dependency-graph/graph.ts new file mode 100644 index 0000000..13e0df4 --- /dev/null +++ b/shared/dependency-graph/graph.ts @@ -0,0 +1,200 @@ +/** + * Build a best-effort file dependency graph and compute blast radius (BFS + proximity union). + * + * **WHY async file reads in `buildDependencyGraph`:** Source bodies are read with `fs/promises`; + * specifier → path mapping is async in `specifier-resolver.ts` so probe storms do not block the + * event loop (see that module’s header). + * + * **WHY index-based BFS queue:** `Array.shift()` is O(n) per dequeue; large frontiers made radius + * computation quadratic in queue length. Cursor + `push` keeps dequeue O(1). + */ + +import { execFile } from 'child_process'; +import { promisify } from 'util'; +import { readFile } from 'fs/promises'; +import { join } from 'path'; + +import { detectDepScanLang, extractImports } from './import-scanner.js'; +import { resolveSpecifier, type LangContext } from './specifier-resolver.js'; +import { getDirectoryNeighbors, getFilenamePatternMatches } from './proximity.js'; + +const execFileAsync = promisify(execFile); + +export interface FileDepGraph { + imports: Map>; + importedBy: Map>; + nodeCount: number; + edgeCount: number; +} + +export interface BuildDependencyGraphOptions { + /** Max source files to scan (default from env or 5000). */ + maxFiles?: number; + timeoutMs?: number; + /** Override file list (tests); otherwise `git ls-files`. */ + fileList?: string[]; +} + +function envInt(key: string, fallback: number): number { + const raw = process.env[key]; + if (raw == null || raw === '') return fallback; + const n = parseInt(raw, 10); + return Number.isFinite(n) && n >= 0 ? n : fallback; +} + +export function isBlastRadiusDisabled(): boolean { + const v = process.env.PRR_DISABLE_BLAST_RADIUS?.trim(); + return v === '1' || /^true$/i.test(v ?? ''); +} + +export function getBlastRadiusDepth(): number { + return envInt('PRR_BLAST_RADIUS_DEPTH', 2); +} + +export function getBlastRadiusMaxFiles(): number { + return envInt('PRR_BLAST_RADIUS_MAX_FILES', 5000); +} + +export function getBlastRadiusTimeoutMs(): number { + return envInt('PRR_BLAST_RADIUS_TIMEOUT_MS', 30_000); +} + +export function isBlastRadiusDismissEnabled(): boolean { + const v = process.env.PRR_BLAST_RADIUS_DISMISS?.trim(); + return v === '1' || /^true$/i.test(v ?? ''); +} + +/** Tracked repo paths (git output uses `/`). */ +export async function listGitTrackedFiles(workdir: string): Promise { + const { stdout } = await execFileAsync('git', ['ls-files'], { + cwd: workdir, + maxBuffer: 50 * 1024 * 1024, + encoding: 'utf8', + }); + return stdout + .split('\n') + .map((l) => l.trim()) + .filter(Boolean); +} + +function addEdge(imports: Map>, importedBy: Map>, from: string, to: string): void { + if (from === to) return; + if (!imports.has(from)) imports.set(from, new Set()); + if (!importedBy.has(to)) importedBy.set(to, new Set()); + imports.get(from)!.add(to); + importedBy.get(to)!.add(from); +} + +/** + * Scan tracked source files and resolve import edges (best-effort). + */ +export async function buildDependencyGraph( + workdir: string, + options?: BuildDependencyGraphOptions +): Promise { + const maxFiles = options?.maxFiles ?? getBlastRadiusMaxFiles(); + const timeoutMs = options?.timeoutMs ?? getBlastRadiusTimeoutMs(); + const started = Date.now(); + + const allRel = options?.fileList ?? (await listGitTrackedFiles(workdir)); + const toScan = allRel.filter((p) => detectDepScanLang(p) != null); + if (toScan.length > maxFiles) { + throw new Error( + `blast-radius: ${toScan.length} source files exceeds PRR_BLAST_RADIUS_MAX_FILES (${maxFiles})` + ); + } + + const imports = new Map>(); + const importedBy = new Map>(); + const ctx: LangContext = {}; + + for (const rel of toScan) { + if (Date.now() - started > timeoutMs) { + throw new Error(`blast-radius: build exceeded timeout (${timeoutMs}ms)`); + } + const lang = detectDepScanLang(rel)!; + let content: string; + try { + content = await readFile(join(workdir, rel), 'utf8'); + } catch { + continue; + } + const specs = extractImports(rel, content); + for (const spec of specs) { + const target = await resolveSpecifier(spec, rel, lang, workdir, ctx); + if (target) addEdge(imports, importedBy, rel, target); + } + } + + const nodes = new Set([...imports.keys(), ...importedBy.keys()]); + let edgeCount = 0; + for (const s of imports.values()) edgeCount += s.size; + + return { + imports, + importedBy, + nodeCount: nodes.size, + edgeCount, + }; +} + +/** + * BFS from seeds over imports ∪ importedBy, depth-limited; union directory + filename proximity at depth 1. + * + * **WHY bidirectional edges:** Review comments may sit on a callee while the PR changed the caller + * (or the reverse); traversing both `imports` and `importedBy` keeps related files within `maxDepth`. + * + * **WHY merge proximity after BFS:** Regex edges miss co-located tests and style modules; directory + * and stem heuristics add depth-1 candidates without parsing each language’s test conventions. + */ +export function computeBlastRadius( + graph: FileDepGraph, + seedFiles: string[], + maxDepth: number, + allTrackedFiles?: string[] +): Map { + const { imports, importedBy } = graph; + const dist = new Map(); + const q: string[] = []; + /** Head index — avoid `shift()` reallocating the whole queue each step. */ + let qi = 0; + + for (const s of seedFiles) { + if (!dist.has(s)) { + dist.set(s, 0); + q.push(s); + } + } + + while (qi < q.length) { + const u = q[qi++]!; + const d = dist.get(u)!; + if (d >= maxDepth) continue; + const nextD = d + 1; + const neigh = [...(imports.get(u) ?? []), ...(importedBy.get(u) ?? [])]; + for (const v of neigh) { + const prev = dist.get(v); + if (prev === undefined || nextD < prev) { + dist.set(v, nextD); + q.push(v); + } + } + } + + if (allTrackedFiles && allTrackedFiles.length > 0) { + const dirProx = getDirectoryNeighbors(seedFiles, allTrackedFiles); + const nameProx = getFilenamePatternMatches(seedFiles, allTrackedFiles); + for (const m of [dirProx, nameProx]) { + for (const [path, depth] of m) { + const cur = dist.get(path); + if (cur === undefined || depth < cur) dist.set(path, depth); + } + } + } + + return dist; +} + +export function isInBlastRadius(repoRelativePath: string, radiusMap: Map): boolean { + return radiusMap.has(repoRelativePath); +} diff --git a/shared/dependency-graph/import-scanner.ts b/shared/dependency-graph/import-scanner.ts new file mode 100644 index 0000000..b528ea0 --- /dev/null +++ b/shared/dependency-graph/import-scanner.ts @@ -0,0 +1,141 @@ +/** + * Regex-based import/include extraction for blast-radius dependency graph. + * + * **WHY whole-file regex (not line-by-line):** Multi-line `import { … } from 'x'` and Go + * `import ( … )` blocks are the norm; line-only patterns miss most edges (false negatives). + * + * **WHY no comment stripping:** False positives (import text in strings/comments) only widen + * the radius (safe); stripping comments correctly across languages converges on a parser. + */ + +import { extname } from 'path'; + +/** Internal language keys used by resolver + scanner. */ +export type DepScanLang = + | 'ts' + | 'python' + | 'go' + | 'rust' + | 'c' + | 'java' + | 'kotlin' + | 'ruby' + | 'php'; + +const TS_EXT = new Set(['.ts', '.tsx', '.mts', '.cts', '.js', '.jsx', '.mjs', '.cjs']); + +/** Map file extension → scanner language, or null if not scanned. */ +export function detectDepScanLang(filePath: string): DepScanLang | null { + const ext = extname(filePath).toLowerCase(); + if (TS_EXT.has(ext)) return 'ts'; + if (ext === '.py' || ext === '.pyi') return 'python'; + if (ext === '.go') return 'go'; + if (ext === '.rs') return 'rust'; + if (['.c', '.cpp', '.cc', '.cxx', '.h', '.hpp', '.hxx', '.hh'].includes(ext)) return 'c'; + if (ext === '.java') return 'java'; + if (ext === '.kt' || ext === '.kts') return 'kotlin'; + if (ext === '.rb') return 'ruby'; + if (ext === '.php') return 'php'; + return null; +} + +// Static + multi-line named/type/side-effect imports; `\s` in class crosses newlines. +const TS_IMPORT_RE = + /import\s+(?:type\s+)?(?:(?:[\w*{}\s,]+)\s+from\s+)?['"]([^'"]+)['"]/gs; +const TS_DYNAMIC_IMPORT_RE = /\bimport\s*\(\s*['"]([^'"]+)['"]\s*\)/g; +const TS_REQUIRE_RE = /\brequire\s*\(\s*['"]([^'"]+)['"]\s*\)/g; +const TS_REEXPORT_RE = + /export\s+(?:type\s+)?(?:\{[^}]*\}|\*(?:\s+as\s+\w+)?)\s+from\s+['"]([^'"]+)['"]/gs; + +const GO_IMPORT_BLOCK_RE = /\bimport\s*\(([\s\S]*?)\)/g; +const GO_SPEC_IN_BLOCK_RE = /(?:\w+\s+)?"([^"]+)"/g; +const GO_SINGLE_IMPORT_RE = /\bimport\s+(?:\w+\s+)?"([^"]+)"/g; + +const PYTHON_IMPORT_RE = /^\s*import\s+([\w.]+)/gm; +const PYTHON_FROM_RE = /^\s*from\s+(\.{0,3}[\w.]*)\s+import/gm; + +const RUST_MOD_RE = /^\s*mod\s+(\w+)\s*;/gm; + +const C_INCLUDE_RE = /^\s*#\s*include\s*"([^"]+)"/gm; + +const JAVA_IMPORT_RE = /^\s*import\s+(?:static\s+)?([\w.]+(?:\.\*)?)\s*;/gm; + +const RUBY_REL_RE = /require_relative\s+['"]([^'"]+)['"]/g; +const RUBY_REQ_RE = /require\s+['"]([^'"]+)['"]/g; + +const PHP_REQ_RE = /(?:require|include)(?:_once)?\s*\(?\s*['"]([^'"]+)['"]\s*\)?\s*;/gim; + +function addMatches(re: RegExp, content: string, out: Set): void { + re.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = re.exec(content)) !== null) { + const s = m[1]?.trim(); + if (s) out.add(s); + } +} + +function extractGoImports(content: string, out: Set): void { + GO_SINGLE_IMPORT_RE.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = GO_SINGLE_IMPORT_RE.exec(content)) !== null) { + const inner = m[0]; + if (/\bimport\s*\(/.test(inner)) continue; + out.add(m[1]!); + } + GO_IMPORT_BLOCK_RE.lastIndex = 0; + while ((m = GO_IMPORT_BLOCK_RE.exec(content)) !== null) { + const blockBody = m[1] ?? ''; + GO_SPEC_IN_BLOCK_RE.lastIndex = 0; + let s: RegExpExecArray | null; + while ((s = GO_SPEC_IN_BLOCK_RE.exec(blockBody)) !== null) { + out.add(s[1]!); + } + } +} + +/** + * Return raw specifier strings (npm-style, paths, package ids, etc.) for dependency resolution. + */ +export function extractImports(filePath: string, content: string): string[] { + const lang = detectDepScanLang(filePath); + if (!lang) return []; + + const out = new Set(); + + switch (lang) { + case 'ts': + addMatches(TS_IMPORT_RE, content, out); + addMatches(TS_DYNAMIC_IMPORT_RE, content, out); + addMatches(TS_REQUIRE_RE, content, out); + addMatches(TS_REEXPORT_RE, content, out); + break; + case 'go': + extractGoImports(content, out); + break; + case 'python': + addMatches(PYTHON_IMPORT_RE, content, out); + addMatches(PYTHON_FROM_RE, content, out); + break; + case 'rust': + addMatches(RUST_MOD_RE, content, out); + break; + case 'c': + addMatches(C_INCLUDE_RE, content, out); + break; + case 'java': + case 'kotlin': + addMatches(JAVA_IMPORT_RE, content, out); + break; + case 'ruby': + addMatches(RUBY_REL_RE, content, out); + addMatches(RUBY_REQ_RE, content, out); + break; + case 'php': + addMatches(PHP_REQ_RE, content, out); + break; + default: + break; + } + + return [...out]; +} diff --git a/shared/dependency-graph/index.ts b/shared/dependency-graph/index.ts new file mode 100644 index 0000000..1e954a0 --- /dev/null +++ b/shared/dependency-graph/index.ts @@ -0,0 +1,24 @@ +export { + detectDepScanLang, + extractImports, + type DepScanLang, +} from './import-scanner.js'; +export { resolveSpecifier, type LangContext } from './specifier-resolver.js'; +export { + getDirectoryNeighbors, + getFilenamePatternMatches, + DEFAULT_MAX_DIR_NEIGHBORS, +} from './proximity.js'; +export { + type FileDepGraph, + type BuildDependencyGraphOptions, + buildDependencyGraph, + computeBlastRadius, + isInBlastRadius, + listGitTrackedFiles, + isBlastRadiusDisabled, + getBlastRadiusDepth, + getBlastRadiusMaxFiles, + getBlastRadiusTimeoutMs, + isBlastRadiusDismissEnabled, +} from './graph.js'; diff --git a/shared/dependency-graph/proximity.ts b/shared/dependency-graph/proximity.ts new file mode 100644 index 0000000..f1e8e86 --- /dev/null +++ b/shared/dependency-graph/proximity.ts @@ -0,0 +1,109 @@ +/** + * Zero-parse proximity signals for blast radius: same-directory neighbors and filename conventions. + * + * **WHY:** Regex import graphs miss co-located tests, CSS modules, and stories; proximity pulls + * them into scope without language-specific parsers. + */ + +import { dirname, basename } from 'path'; + +/** Default cap so a flat `src/` does not add hundreds of files (plan: MAX_DIR_NEIGHBORS). */ +export const DEFAULT_MAX_DIR_NEIGHBORS = 30; + +function envMaxDirNeighbors(): number { + const raw = process.env.PRR_BLAST_RADIUS_MAX_DIR_NEIGHBORS; + if (raw == null || raw === '') return DEFAULT_MAX_DIR_NEIGHBORS; + const n = parseInt(raw, 10); + return Number.isFinite(n) && n > 0 ? n : DEFAULT_MAX_DIR_NEIGHBORS; +} + +/** + * Files in the same directory as any seed file get depth 1, only if that directory has + * at most `maxDirNeighbors` tracked files. + */ +export function getDirectoryNeighbors( + seedFiles: string[], + allFiles: string[], + maxDirNeighbors: number = envMaxDirNeighbors() +): Map { + const out = new Map(); + const byDir = new Map(); + for (const f of allFiles) { + const d = dirname(f); + if (!byDir.has(d)) byDir.set(d, []); + byDir.get(d)!.push(f); + } + const seedDirs = new Set(seedFiles.map((f) => dirname(f))); + for (const dir of seedDirs) { + const neighbors = byDir.get(dir); + if (!neighbors || neighbors.length > maxDirNeighbors) continue; + for (const f of neighbors) { + if (!out.has(f)) out.set(f, 1); + } + } + return out; +} + +const STRIP_SUFFIXES = [ + /\.test\.[^.]+$/i, + /\.spec\.[^.]+$/i, + /\.stories\.[^.]+$/i, + /\.story\.[^.]+$/i, + /\.module\.css$/i, + /\.module\.scss$/i, + /\.styles?\.[^.]+$/i, + /-test\.[^.]+$/i, + /_test\.[^.]+$/i, + /\.mock\.[^.]+$/i, + /\.fixture\.[^.]+$/i, + /\.d\.ts$/i, +]; + +function stripKnownSuffixes(fileName: string): Set { + const bases = new Set(); + bases.add(fileName); + let current = fileName; + for (let i = 0; i < 4; i++) { + let changed = false; + for (const re of STRIP_SUFFIXES) { + const next = current.replace(re, ''); + if (next !== current && next.length > 0) { + current = next; + bases.add(current); + changed = true; + break; + } + } + if (!changed) break; + } + const dot = current.lastIndexOf('.'); + if (dot > 0) bases.add(current.slice(0, dot)); + return bases; +} + +/** + * Match files that share a stem with any seed (e.g. `Button.tsx` ↔ `Button.test.tsx`). + */ +export function getFilenamePatternMatches(seedFiles: string[], allFiles: string[]): Map { + const out = new Map(); + const seedSet = new Set(seedFiles); + const stems = new Set(); + for (const f of seedFiles) { + const base = basename(f); + for (const s of stripKnownSuffixes(base)) { + stems.add(s); + } + } + for (const f of allFiles) { + if (seedSet.has(f)) continue; + const base = basename(f); + const fileStems = stripKnownSuffixes(base); + for (const st of fileStems) { + if (stems.has(st)) { + out.set(f, 1); + break; + } + } + } + return out; +} diff --git a/shared/dependency-graph/specifier-resolver.ts b/shared/dependency-graph/specifier-resolver.ts new file mode 100644 index 0000000..afcceb1 --- /dev/null +++ b/shared/dependency-graph/specifier-resolver.ts @@ -0,0 +1,256 @@ +/** + * Map raw import specifiers to repo-relative paths (best-effort). + * + * **WHY null:** External packages, angle includes, or ambiguous specifiers are skipped; missing + * edges keep blast radius conservative (smaller), not wrong-file edits. + * + * **WHY async:** Graph build walks thousands of specifiers; sync `existsSync` / `readFileSync` + * block the event loop. `fs/promises` keeps PRR responsive to signals and concurrent work. + */ + +import { constants } from 'fs'; +import { access, readFile, readdir, stat } from 'fs/promises'; +import { dirname, join, normalize, posix, relative, sep } from 'path'; + +import type { DepScanLang } from './import-scanner.js'; + +export interface LangContext { + /** First line of go.mod: module example.com/foo */ + goModulePath?: string; + /** Repo-relative dirs containing Java/Kotlin sources (e.g. src/main/java). */ + javaStyleRoots?: string[]; +} + +const TS_PROBE_EXT = ['.ts', '.tsx', '.mts', '.cts', '.js', '.jsx', '.mjs', '.cjs']; + +function toPosix(p: string): string { + return p.split(sep).join('/'); +} + +async function fileExistsUnderWorkdir(workdir: string, rel: string): Promise { + const n = normalize(join(workdir, rel)); + if (!n.startsWith(normalize(workdir + sep))) return false; + try { + await access(n, constants.F_OK); + return true; + } catch { + return false; + } +} + +async function tryProbeExtensions(workdir: string, baseRel: string, exts: string[]): Promise { + const clean = baseRel.replace(/\/$/, ''); + for (const ext of exts) { + const p = clean + ext; + if (await fileExistsUnderWorkdir(workdir, p)) return toPosix(p); + } + for (const ext of exts) { + const idx = clean + `/index${ext}`; + if (await fileExistsUnderWorkdir(workdir, idx)) return toPosix(idx); + } + return null; +} + +async function resolveTsLikeSpecifier(spec: string, fromFile: string, workdir: string): Promise { + if (!spec.startsWith('./') && !spec.startsWith('../')) return null; + const fromDir = dirname(fromFile); + const joined = normalize(join(fromDir, spec)); + const rel = relative(workdir, join(workdir, joined)); + if (rel.startsWith('..')) return null; + const relPosix = toPosix(rel); + return tryProbeExtensions(workdir, relPosix, TS_PROBE_EXT); +} + +async function parseGoModulePath(workdir: string): Promise { + try { + const raw = await readFile(join(workdir, 'go.mod'), 'utf8'); + const m = /^\s*module\s+(\S+)/m.exec(raw); + return m?.[1]; + } catch { + return undefined; + } +} + +async function discoverJavaStyleRoots(workdir: string): Promise { + const roots: string[] = []; + const candidates = [ + 'src/main/java', + 'src/main/kotlin', + 'src/test/java', + 'src/test/kotlin', + 'app/src/main/java', + 'app/src/main/kotlin', + 'src', + ]; + for (const c of candidates) { + try { + const s = await stat(join(workdir, c)); + if (s.isDirectory()) roots.push(c); + } catch { + /* not present */ + } + } + return [...new Set(roots)]; +} + +async function resolveGoSpecifier(spec: string, workdir: string, ctx: LangContext): Promise { + if (!spec.includes('/')) return null; + const mod = ctx.goModulePath; + if (!mod) return null; + let packageDir: string; + if (spec === mod) { + packageDir = '.'; + } else if (spec.startsWith(mod + '/')) { + packageDir = spec.slice(mod.length + 1); + } else { + return null; + } + const absDir = packageDir === '.' ? workdir : join(workdir, packageDir); + try { + const names = await readdir(absDir, { withFileTypes: true }); + const goFiles = names.filter((d) => d.isFile() && d.name.endsWith('.go')).map((d) => d.name); + if (goFiles.length === 0) return null; + goFiles.sort(); + const fileRel = packageDir === '.' ? goFiles[0]! : join(packageDir, goFiles[0]!); + return toPosix(fileRel); + } catch { + return null; + } +} + +async function resolvePythonSpecifier(spec: string, fromFile: string, workdir: string): Promise { + const fromDir = dirname(fromFile); + let up = 0; + let rest = spec; + while (rest.startsWith('.')) { + up++; + rest = rest.slice(1); + } + let baseDir = fromDir; + for (let i = 1; i < up; i++) { + const next = dirname(baseDir); + if (next === baseDir) break; + baseDir = next; + } + const parts = rest.split('.').filter(Boolean); + if (parts.length === 0) return null; + const subPath = parts.join('/'); + if (up > 0) { + const candidatePy = join(baseDir, subPath + '.py'); + const relPy = relative(workdir, join(workdir, candidatePy)); + if (!relPy.startsWith('..') && (await fileExistsUnderWorkdir(workdir, relPy))) return toPosix(relPy); + const initPath = join(baseDir, subPath, '__init__.py'); + const relInit = relative(workdir, join(workdir, initPath)); + if (!relInit.startsWith('..') && (await fileExistsUnderWorkdir(workdir, relInit))) return toPosix(relInit); + return null; + } + const absPath = join(subPath + '.py'); + if (await fileExistsUnderWorkdir(workdir, absPath)) return toPosix(absPath); + const pkgInit = join(subPath, '__init__.py'); + if (await fileExistsUnderWorkdir(workdir, pkgInit)) return toPosix(pkgInit); + return null; +} + +async function resolveRustMod(spec: string, fromFile: string, workdir: string): Promise { + const fromDir = dirname(fromFile); + const f1 = join(fromDir, spec + '.rs'); + const r1 = relative(workdir, join(workdir, f1)); + if (!r1.startsWith('..') && (await fileExistsUnderWorkdir(workdir, r1))) return toPosix(r1); + const f2 = join(fromDir, spec, 'mod.rs'); + const r2 = relative(workdir, join(workdir, f2)); + if (!r2.startsWith('..') && (await fileExistsUnderWorkdir(workdir, r2))) return toPosix(r2); + return null; +} + +async function resolveCInclude(spec: string, fromFile: string, workdir: string): Promise { + const fromDir = dirname(fromFile); + const candidates = [ + join(fromDir, spec), + spec, + join('include', spec), + join('src', spec), + ]; + for (const c of candidates) { + const r = relative(workdir, join(workdir, c)); + if (!r.startsWith('..') && (await fileExistsUnderWorkdir(workdir, r))) return toPosix(r); + } + return null; +} + +async function resolveJavaLikeImport(spec: string, workdir: string, ctx: LangContext, ext: string): Promise { + if (spec.endsWith('.*')) return null; + const pathPart = spec.replace(/\./g, '/') + ext; + const roots = ctx.javaStyleRoots ?? (await discoverJavaStyleRoots(workdir)); + ctx.javaStyleRoots = roots; + for (const root of roots) { + const rel = join(root, pathPart); + if (await fileExistsUnderWorkdir(workdir, rel)) return toPosix(rel); + } + return null; +} + +async function resolveRubySpecifier(spec: string, fromFile: string, workdir: string): Promise { + if (spec.startsWith('./') || spec.startsWith('../')) { + const fromDir = dirname(fromFile); + const joined = normalize(join(fromDir, spec)); + const rel = relative(workdir, join(workdir, joined)); + if (rel.startsWith('..')) return null; + const base = rel.endsWith('.rb') ? rel : rel + '.rb'; + if (await fileExistsUnderWorkdir(workdir, base)) return toPosix(base); + return null; + } + const libPath = join('lib', spec.replace(/\//g, posix.sep) + '.rb'); + if (await fileExistsUnderWorkdir(workdir, libPath)) return toPosix(libPath); + return null; +} + +async function resolvePhpSpecifier(spec: string, fromFile: string, workdir: string): Promise { + const fromDir = dirname(fromFile); + if (spec.startsWith('./') || spec.startsWith('../')) { + const joined = normalize(join(fromDir, spec)); + const rel = relative(workdir, join(workdir, joined)); + if (!rel.startsWith('..') && (await fileExistsUnderWorkdir(workdir, rel))) return toPosix(rel); + return null; + } + if (await fileExistsUnderWorkdir(workdir, spec)) return toPosix(spec); + return null; +} + +/** + * Resolve one specifier to a single tracked-style repo-relative path, or null. + */ +export async function resolveSpecifier( + specifier: string, + fromFilePath: string, + lang: DepScanLang, + workdir: string, + ctx: LangContext +): Promise { + const spec = specifier.trim(); + if (!spec) return null; + + switch (lang) { + case 'ts': + return resolveTsLikeSpecifier(spec, fromFilePath, workdir); + case 'python': + return resolvePythonSpecifier(spec, fromFilePath, workdir); + case 'go': { + if (!ctx.goModulePath) ctx.goModulePath = await parseGoModulePath(workdir); + return resolveGoSpecifier(spec, workdir, ctx); + } + case 'rust': + return resolveRustMod(spec, fromFilePath, workdir); + case 'c': + return resolveCInclude(spec, fromFilePath, workdir); + case 'java': + return resolveJavaLikeImport(spec, workdir, ctx, '.java'); + case 'kotlin': + return (await resolveJavaLikeImport(spec, workdir, ctx, '.kt')) ?? (await resolveJavaLikeImport(spec, workdir, ctx, '.kts')); + case 'ruby': + return resolveRubySpecifier(spec, fromFilePath, workdir); + case 'php': + return resolvePhpSpecifier(spec, fromFilePath, workdir); + default: + return null; + } +} diff --git a/shared/git/git-commit-scan.ts b/shared/git/git-commit-scan.ts index bc59bbf..b2d6897 100644 --- a/shared/git/git-commit-scan.ts +++ b/shared/git/git-commit-scan.ts @@ -19,15 +19,57 @@ * recovery share one HEAD. Tests call **`clearScanCommittedFixesCache()`**. */ import type { SimpleGit } from 'simple-git'; -import { debug } from '../logger.js'; +import { debug, formatNumber, warn } from '../logger.js'; + +/** One warning per process per workdir+reason when merge base for prr-fix scan is missing (pill-output #559). */ +const warnedScanBaseFallback = new Set(); +/** One warning per process per workdir when git log --grep scan throws (non-fatal degrade). */ +const warnedScanRawFailure = new Set(); + +function scanDegradeWarnKey(workdir: string | undefined, tag: string): string { + return `${workdir ?? '?'}\0${tag}`; +} /** In-process cache: same workdir + branch + HEAD → same grep scan (pill: avoid redundant git log). */ const committedFixScanCache = new Map(); const MAX_SCAN_CACHE_ENTRIES = 64; -function scanCacheKey(workdir: string, branch: string, headSha: string, prBaseBranch?: string): string { +/** + * Include resolved merge base (or `n100` when using recent-commit cap) so two clones reusing the + * same workdir path cannot share a cache entry when fallback picks different bases (pill-output). + */ +function scanCacheKey( + workdir: string, + branch: string, + headSha: string, + prBaseBranch: string | undefined, + resolvedBaseLabel: string, +): string { const base = prBaseBranch?.trim() ?? ''; - return `${workdir}\0${branch}\0${headSha}\0${base}`; + return `${workdir}\0${branch}\0${headSha}\0${base}\0${resolvedBaseLabel}`; +} + +/** Resolve `origin/` or first existing of origin/main|master|develop for `base..branch` log range. */ +async function resolveScanBaseBranch(git: SimpleGit, prBaseBranch?: string): Promise { + const prBase = prBaseBranch?.trim(); + if (prBase) { + const prRef = `origin/${prBase}`; + try { + await git.raw(['rev-parse', '--verify', prRef]); + return prRef; + } catch { + /* fall through — base branch may not be fetched yet */ + } + } + for (const candidate of ['origin/main', 'origin/master', 'origin/develop'] as const) { + try { + await git.raw(['rev-parse', '--verify', candidate]); + return candidate; + } catch { + /* try next */ + } + } + return null; } function rememberScanCache(key: string, ids: string[]): void { @@ -41,6 +83,8 @@ function rememberScanCache(key: string, ids: string[]): void { /** Clear process-wide scan cache (tests or long-lived hosts). */ export function clearScanCommittedFixesCache(): void { committedFixScanCache.clear(); + warnedScanBaseFallback.clear(); + warnedScanRawFailure.clear(); } export interface ScanCommittedFixesOptions { @@ -93,80 +137,93 @@ export async function scanCommittedFixes( branch: string, opts?: ScanCommittedFixesOptions ): Promise { + let resolvedBase: string | null = null; + try { + resolvedBase = await resolveScanBaseBranch(git, opts?.prBaseBranch); + } catch (error) { + debug('resolveScanBaseBranch failed', { error }); + resolvedBase = null; + } + const cacheKeySuffix = resolvedBase ?? 'n100'; + + if (resolvedBase === null) { + const prBase = opts?.prBaseBranch?.trim(); + const tag = prBase ? `missing-base:pr:${prBase}` : 'missing-base:no-origin-default'; + const wk = scanDegradeWarnKey(opts?.workdir, tag); + if (!warnedScanBaseFallback.has(wk)) { + warnedScanBaseFallback.add(wk); + if (prBase) { + warn( + `[PRR] Git recovery scan: could not resolve \`origin/${prBase}\` or a default branch ref (main/master/develop). Using last ${formatNumber(100)} commits instead of \`base..HEAD\` — fetch the PR base with additionalBranches if needed; older \`prr-fix:\` markers may be missed.`, + ); + } else { + warn( + `[PRR] Git recovery scan: no \`origin/main\`, \`origin/master\`, or \`origin/develop\` ref — using last ${formatNumber(100)} commits for \`prr-fix:\` recovery (typical of shallow/single-branch clones).`, + ); + } + } + } + if (opts?.workdir && opts?.headSha) { - const key = scanCacheKey(opts.workdir, branch, opts.headSha, opts.prBaseBranch); + const key = scanCacheKey(opts.workdir, branch, opts.headSha, opts.prBaseBranch, cacheKeySuffix); const hit = committedFixScanCache.get(key); if (hit) { - debug('scanCommittedFixes (cache hit)', { branch, headSha: opts.headSha.slice(0, 7) }); + debug('scanCommittedFixes (cache hit)', { + branch, + headSha: opts.headSha.slice(0, 7), + resolvedBase: cacheKeySuffix, + }); return [...hit]; } } - try { - // Find the base branch — PR's GitHub base first, then common default names - const baseBranches = ['origin/main', 'origin/master', 'origin/develop']; - let baseBranch: string | null = null; - const prBase = opts?.prBaseBranch?.trim(); - if (prBase) { - const prRef = `origin/${prBase}`; - try { - await git.raw(['rev-parse', '--verify', prRef]); - baseBranch = prRef; - } catch { - // Single-branch clones may not have fetched base yet; fall through to heuristics - } - } + try { + const baseBranch = resolvedBase; - for (const candidate of baseBranches) { - if (baseBranch) break; - try { - await git.raw(['rev-parse', '--verify', candidate]); - baseBranch = candidate; - break; - } catch { - // Branch doesn't exist, try next - } - } - // If no common base branch found, fall back to searching all history // WHY limit to 100: Prevents scanning thousands of commits in large repos // WHY still safe: Typical PRs have < 20 commits, 100 is very generous const logArgs = baseBranch ? ['log', '--grep=prr-fix:', '--format=%B', `${baseBranch}..${branch}`] : ['log', '--grep=prr-fix:', '--format=%B', '-n', '100']; - + debug('scanCommittedFixes', { baseBranch, branch, logArgs }); const logOutput = await git.raw(logArgs); - + const commentIds: string[] = []; - - // Parse all prr-fix:ID markers from commit messages - // Format: One marker per line: "prr-fix:IC_kwDOAbc123_defGHI" + + // Parse prr-fix:ID markers (multiple per line for squash-style messages; pill-output). if (logOutput) { const lines = logOutput.split('\n'); for (const line of lines) { - const match = line.match(/^prr-fix:(.+)$/); - if (match) { - // Preserve original casing from commit messages. - // WHY NOT lowercase: The state's verifiedFixed array stores IDs in - // their original case (from the GitHub API). Lowercasing here causes - // case-sensitive includes() checks to miss existing entries, leading - // to duplicate IDs accumulating across sessions. - commentIds.push(match[1].trim()); + const markerRe = /prr-fix:(\S+)/g; + let m: RegExpExecArray | null; + while ((m = markerRe.exec(line)) !== null) { + commentIds.push(m[1]!.trim()); } } } - + // Deduplicate: the same ID can appear in multiple commits - // (e.g., re-verified after a push, or re-committed after interruption) const unique = [...new Set(commentIds)]; if (opts?.workdir && opts?.headSha) { - rememberScanCache(scanCacheKey(opts.workdir, branch, opts.headSha, opts.prBaseBranch), unique); + rememberScanCache( + scanCacheKey(opts.workdir, branch, opts.headSha, opts.prBaseBranch, cacheKeySuffix), + unique, + ); } return unique; } catch (error) { // WHY catch and return empty instead of throw: // Scan failure shouldn't prevent startup - we'll just verify everything fresh + const wk = scanDegradeWarnKey(opts?.workdir, 'log-failed'); + if (!warnedScanRawFailure.has(wk)) { + warnedScanRawFailure.add(wk); + const detail = error instanceof Error ? error.message : String(error); + warn( + `[PRR] Git recovery scan failed (${detail}) — continuing without recovered prr-fix markers. Next verification may re-run for issues fixed in prior commits.`, + ); + } debug('Failed to scan committed fixes', { error }); return []; } diff --git a/shared/git/git-conflicts.ts b/shared/git/git-conflicts.ts index 93e0c7a..aba10f8 100644 --- a/shared/git/git-conflicts.ts +++ b/shared/git/git-conflicts.ts @@ -223,12 +223,34 @@ async function resolveGitWorkdir(git: SimpleGit): Promise { * Parse `git merge-tree` stderr/stdout for conflict paths. * Handles `Merge conflict in path` and `CONFLICT (type): path ...` (e.g. modify/delete). */ +/** True when git is too old or merge-tree failed for a non-conflict reason. */ +export function mergeTreeFailureLooksUnsupported(combinedOutput: string): boolean { + const o = combinedOutput.toLowerCase(); + return ( + /is not a git command/.test(o) || + /unknown option/.test(o) || + /ambiguous argument/.test(o) || + /bad object/.test(o) || + /unknown revision/.test(o) + ); +} + export function parseMergeTreeConflictPaths(combinedOutput: string): string[] { const files = new Set(); for (const m of combinedOutput.matchAll(/Merge conflict in (.+)$/gm)) { files.add(m[1].trim()); } - for (const m of combinedOutput.matchAll(/^CONFLICT \([^)]+\):\s*(\S+)/gm)) { + // WHY anchored "CONFLICT ... Merge conflict in ": git merge-tree emits + // CONFLICT (submodule): Merge conflict in eliza + // CONFLICT (content): Merge conflict in package.json + // The old (\S+) after the colon captured "Merge" as a file path. + // Only capture from "Merge conflict in ..." on this line format. + for (const m of combinedOutput.matchAll(/^CONFLICT \([^)]+\):\s*Merge conflict in (.+)$/gm)) { + files.add(m[1].trim()); + } + // Also capture other CONFLICT formats that don't use "Merge conflict in" + // e.g. "CONFLICT (modify/delete): path deleted in ..." + for (const m of combinedOutput.matchAll(/^CONFLICT \([^)]+\):\s*(\S+)\s+(?:deleted|renamed|added)/gm)) { files.add(m[1].trim()); } return [...files]; @@ -299,7 +321,23 @@ export async function probeLatentMergeConflictsWithOrigin( const e = err as { stdout?: Buffer | string; stderr?: Buffer | string; code?: number }; const out = `${e.stdout?.toString?.() ?? e.stdout ?? ''}\n${e.stderr?.toString?.() ?? e.stderr ?? ''}`; const files = parseMergeTreeConflictPaths(out); - return { ran: true, hasLatentConflicts: true, files }; + if (files.length > 0) { + return { ran: true, hasLatentConflicts: true, files }; + } + if (mergeTreeFailureLooksUnsupported(out)) { + return { + ran: false, + hasLatentConflicts: false, + files: [], + skipReason: 'git merge-tree unavailable or failed (need Git 2.38+ for latent merge probe)', + }; + } + return { + ran: true, + hasLatentConflicts: false, + files: [], + skipReason: 'merge-tree exited without parseable conflict paths', + }; } } diff --git a/shared/git/git-lock-files.ts b/shared/git/git-lock-files.ts index 24c7a37..aa9ce75 100644 --- a/shared/git/git-lock-files.ts +++ b/shared/git/git-lock-files.ts @@ -1,7 +1,7 @@ /** * Lock file utilities for conflict detection */ -import { existsSync, readFileSync } from 'fs'; +import { existsSync, readFileSync, lstatSync } from 'fs'; import { join } from 'path'; export function isLockFile(filepath: string): boolean { @@ -171,12 +171,16 @@ export function findFilesWithConflictMarkers(workdir: string, files: string[]): const fullPath = join(workdir, file); if (existsSync(fullPath)) { try { + // WHY lstat guard: submodules/directories exist on disk but readFileSync + // throws EISDIR. Skip them — they are resolved by the submodule handler. + const stat = lstatSync(fullPath); + if (stat.isDirectory()) continue; const content = readFileSync(fullPath, 'utf-8'); if (hasConflictMarkers(content)) { conflicted.push(file); } } catch { - // Skip files that can't be read + // Skip files that can't be read (EISDIR, permission, etc.) } } } diff --git a/shared/git/git-pull.ts b/shared/git/git-pull.ts index 334d6d3..8a71562 100644 --- a/shared/git/git-pull.ts +++ b/shared/git/git-pull.ts @@ -33,7 +33,14 @@ export async function pullLatest( console.log(` Stashed ${status.modified.length + status.created.length + status.deleted.length} local changes`); } catch (stashError) { debug('Failed to stash', { error: stashError }); - // Continue anyway - pull might still work + console.warn( + ' ⚠ Could not stash local changes before pull — aborting pull to avoid merging/rebasing on a dirty tree (resolve or stash manually, then retry).', + ); + return { + success: false, + error: stashError instanceof Error ? stashError.message : String(stashError), + stashLeft: false, + }; } } @@ -81,6 +88,9 @@ export async function pullLatest( // Don't abort - leave conflicts for programmatic resolution // WHY: If we abort, git status shows no conflicts and we can't resolve them debug('Rebase has conflicts - leaving in conflicted state for resolution'); + console.log( + ' Hint: resolve conflicted files, then `git rebase --continue`, or `git rebase --abort` to undo.', + ); await restoreStashOnFailure(); return { success: false, error: `Rebase conflicts detected` }; } diff --git a/shared/git/git-submodule-path.ts b/shared/git/git-submodule-path.ts new file mode 100644 index 0000000..6782a6a --- /dev/null +++ b/shared/git/git-submodule-path.ts @@ -0,0 +1,41 @@ +import { execFileSync } from 'child_process'; + +/** + * Normalize a repo-relative path for git index lookups. + */ +function normalizeRepoRelativePath(repoRelativePath: string): string { + return repoRelativePath.replace(/\\/g, '/').replace(/^\.\/+/, '').replace(/\/+$/, ''); +} + +/** + * True when the path is recorded in the index as a git submodule (mode 160000, gitlink). + * + * WHY: Review bots often anchor threads on submodule roots (e.g. `plugins/plugin-sql`). + * There is no regular file text at line N; snippet reads fail and PRR used to dismiss as + * generic stale / "unreadable". This check uses the index so it works even when the + * submodule is not checked out in the worktree. + */ +export function isTrackedGitSubmodulePath(workdir: string, repoRelativePath: string): boolean { + const normalized = normalizeRepoRelativePath(repoRelativePath); + if (!normalized) return false; + try { + const out = execFileSync('git', ['ls-files', '-s', '--', normalized], { + cwd: workdir, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'pipe'], + maxBuffer: 256 * 1024, + }); + for (const line of out.split('\n')) { + const trimmed = line.trim(); + if (!trimmed) continue; + const m = trimmed.match(/^160000\s+\S+\s+\d\t(.+)$/); + if (m) { + const indexedPath = normalizeRepoRelativePath(m[1]!); + if (indexedPath === normalized) return true; + } + } + return false; + } catch { + return false; + } +} diff --git a/shared/git/redact-url.ts b/shared/git/redact-url.ts index c6cc4bf..cbade2a 100644 --- a/shared/git/redact-url.ts +++ b/shared/git/redact-url.ts @@ -2,9 +2,21 @@ * Redact credentials from URLs in git output or error messages. * WHY shared: git-push.ts and git-conflicts.ts both need this; single source of truth * so we never log tokens (https://token@... or https://x-access-token:TOKEN@...). + * + * Handles: + * - HTTPS with credentials: https://token@host/... → https://***@host/... + * - SSH clone URLs: git@github.com:org/repo → git@***:*** + * - Authorization headers with base64 tokens + * + * WHY include \r: git output on Windows / CI with CRLF line endings could otherwise + * leave a credential dangling before the carriage return and escape the char class. */ export function redactUrlCredentials(text: string): string { - let out = text.replace(/https:\/\/[^@\s]+@/g, 'https://***@'); + // HTTPS URLs with embedded credentials (token or user:password) + let out = text.replace(/https:\/\/[^@\s\r]+@/g, 'https://***@'); + // SSH-style git URLs: git@:/ — no credentials per se, but redact the + // host+path so private-repo names are not emitted to output.log. + out = out.replace(/git@[^:\s\r]+:[^\s\r]+/g, 'git@***:***'); // Redact Git extraheader auth (AUTHORIZATION: basic ) so we never log token-derived base64. out = out.replace(/AUTHORIZATION:\s*basic\s+[A-Za-z0-9+/=]+/g, 'AUTHORIZATION: basic ***'); return out; diff --git a/shared/llm/model-context-limits.ts b/shared/llm/model-context-limits.ts index fa21810..392cd2f 100644 --- a/shared/llm/model-context-limits.ts +++ b/shared/llm/model-context-limits.ts @@ -192,6 +192,10 @@ function getMaxElizacloudTotalInputCharsSmallContextUnified(model: string): numb * WHY: Gateways often return HTTP 500 with no body when upstream rejects oversized input; * failing fast avoids useless retries and matches the budget already logged in debug fields. * + * This budget includes any runtime-lowered cap from `lowerModelMaxPromptChars`. Use + * {@link getMaxElizacloudHardInputCeiling} for the context-window-derived hard ceiling + * that ignores runtime lowering. + * * Small-context models (≤32k): **min**(legacy fix+overhead, unified token budget) so batching and * preflight match real context limits. */ @@ -201,6 +205,21 @@ export function getMaxElizacloudLlmCompleteInputChars(model: string): number { return unified != null ? Math.min(legacy, unified) : legacy; } +/** + * Context-window-derived hard ceiling for input chars (ignores `modelMaxCharsOverride`). + * WHY: `lowerModelMaxPromptChars` adaptively shrinks the budget after timeouts, but a + * timeout on a 200k-context model at 34k chars is gateway lag — not context overflow. + * Prompts under this ceiling can always be sent; the transport should only hard-reject + * above it. Prompt builders should still respect the (possibly lowered) budget from + * `getMaxElizacloudLlmCompleteInputChars` for voluntary trimming. + */ +export function getMaxElizacloudHardInputCeiling(model: string): number { + const spec = getElizaCloudModelContextSpec(model); + const derived = deriveMaxFixPromptCharsFromContext(spec) + ELIZACLOUD_LLM_COMPLETE_INPUT_OVERHEAD_CHARS; + const unified = getMaxElizacloudTotalInputCharsSmallContextUnified(model); + return unified != null ? Math.min(derived, unified) : derived; +} + /** * Default `max_completion_tokens` for ElizaCloud OpenAI-style chat completions. * WHY centralized: must align with token estimates and context-window capping in `LLMClient.completeOpenAI`. @@ -234,7 +253,11 @@ export function estimateElizacloudInputTokensFromCharLength( } /** - * Lower the effective cap for this model after a 504 / timeout / context overflow. + * Lower the effective **fix-prompt** cap for this model after a 504 / timeout / context overflow. + * WHY floor: For large-context models (≥128k tokens) a timeout is usually gateway lag, + * not context overflow. Lowering the cap too aggressively (e.g. 640k → 26k on Sonnet 4.5) + * blocks subsequent conflict/verify prompts that are well within the context window. + * Floor at 50% of the context-derived cap or 60k chars, whichever is larger. */ export function lowerModelMaxPromptChars( provider: 'elizacloud' | 'anthropic' | 'openai', @@ -243,7 +266,12 @@ export function lowerModelMaxPromptChars( ): void { if (!model) return; const currentCap = getMaxFixPromptCharsForModel(provider, model); - const suggested = Math.max(20_000, Math.floor(sentPromptChars * 0.75)); + const spec = getElizaCloudModelContextSpec(model); + const contextDerived = deriveMaxFixPromptCharsFromContext(spec); + const floor = spec.maxContextTokens >= 128_000 + ? Math.max(60_000, Math.floor(contextDerived * 0.5)) + : 20_000; + const suggested = Math.max(floor, Math.floor(sentPromptChars * 0.75)); const next = Math.min(currentCap, suggested); modelMaxCharsOverride.set(model, next); } diff --git a/shared/llm/rate-limit.ts b/shared/llm/rate-limit.ts index 52f7f0c..d075a06 100644 --- a/shared/llm/rate-limit.ts +++ b/shared/llm/rate-limit.ts @@ -13,8 +13,10 @@ let elizacloudInFlight = 0; let elizacloudLastStartTime = 0; const elizacloudQueue: Array<() => void> = []; -/** After a 429, we use halved concurrency for this many ms. */ +/** After a 429, we use halved concurrency for at least this long (plus jitter). */ const RATE_LIMIT_BACKOFF_MS = 60_000; +/** Extra random delay up to this many ms so concurrent processes don't wake in lockstep. */ +const RATE_LIMIT_BACKOFF_JITTER_MS = 30_000; let rateLimitBackoffUntil = 0; let wasIn429Backoff = false; @@ -32,9 +34,10 @@ function getMaxInFlight(): number { return cap; } -/** Call when a 429 (or rate-limit) response is received. Reduces effective concurrency for 60s. */ +/** Call when a 429 (or rate-limit) response is received. Reduces effective concurrency for ~60s + jitter. */ export function notifyRateLimitHit(): void { - rateLimitBackoffUntil = Date.now() + RATE_LIMIT_BACKOFF_MS; + const jitter = Math.floor(Math.random() * (RATE_LIMIT_BACKOFF_JITTER_MS + 1)); + rateLimitBackoffUntil = Date.now() + RATE_LIMIT_BACKOFF_MS + jitter; } /** Acquire ElizaCloud rate-limit slot (used by llm-api runner and LLM client). */ diff --git a/shared/logger.ts b/shared/logger.ts index 5b81f30..9ffe08d 100644 --- a/shared/logger.ts +++ b/shared/logger.ts @@ -34,6 +34,8 @@ let promptLogPath: string | null = null; let outputLogExitHandlerRegistered = false; // Pill #8: Counter for empty prompt bodies to emit summary at close let emptyPromptBodyCount = 0; +/** Counts empty PROMPT/RESPONSE refusals by `kind:slug` for closeOutputLog breakdown (pill-output audit). */ +const emptyPromptBodyByKindSlug = new Map(); /** Same `requestId` on PROMPT + RESPONSE metadata when concurrent LLM calls reorder prompts.log (grep `requestId` to pair). */ const promptRequestIdBySlug = new Map(); @@ -230,7 +232,20 @@ export async function closeOutputLog(): Promise { // Pill #8: Emit summary of empty prompt bodies to output.log so operators see it if (emptyPromptBodyCount > 0 && outputLogPath) { - const summaryMsg = `WARNING: ${emptyPromptBodyCount} prompts.log entr${emptyPromptBodyCount === 1 ? 'y' : 'ies'} had empty bodies — see stderr for details. This may indicate a logging bug (e.g. elizacloud streaming not passing accumulated response to logger).\n`; + const breakdown = + emptyPromptBodyByKindSlug.size > 0 + ? (() => { + const sorted = [...emptyPromptBodyByKindSlug.entries()].sort((a, b) => b[1] - a[1]); + const top = sorted.slice(0, 20); + const lines = top.map(([k, n]) => ` ${k}: ${formatNumber(n)}`).join('\n'); + const more = + sorted.length > 20 + ? `\n … and ${formatNumber(sorted.length - 20)} more kind:slug key(s)` + : ''; + return `\n By kind:slug (top ${formatNumber(Math.min(20, sorted.length))}):\n${lines}${more}\n`; + })() + : ''; + const summaryMsg = `WARNING: ${formatNumber(emptyPromptBodyCount)} prompts.log entr${emptyPromptBodyCount === 1 ? 'y' : 'ies'} had empty bodies — see stderr for details. This may indicate a logging bug (e.g. elizacloud streaming not passing accumulated response to logger).${breakdown}`; try { appendFileSync(outputLogPath, summaryMsg, 'utf-8'); if (origWarnRef) origWarnRef(summaryMsg.trim()); @@ -239,9 +254,24 @@ export async function closeOutputLog(): Promise { } // Reset counter for next run emptyPromptBodyCount = 0; + emptyPromptBodyByKindSlug.clear(); } } +/** + * Snapshot of prompts.log empty-body refusals (PROMPT/RESPONSE with zero content). + * WHY: Lets pill/tests read counts without reparsing prompts.log; cleared when `closeOutputLog` runs. + */ +export function getEmptyPromptBodyRejectionStats(): { + total: number; + byKindSlug: Array<{ key: string; count: number }>; +} { + const byKindSlug = [...emptyPromptBodyByKindSlug.entries()] + .map(([key, count]) => ({ key, count })) + .sort((a, b) => b.count - a.count); + return { total: emptyPromptBodyCount, byKindSlug }; +} + /** * Get the path to the current output log file. */ @@ -395,8 +425,10 @@ function writeToPromptLog( if (isEmpty && kind !== 'ERROR') { // No RESPONSE will follow — drop pairing slot (debugPrompt already registered slug). if (kind === 'PROMPT') promptRequestIdBySlug.delete(slug); - // Pill #8: Increment counter for summary at close + // Pill #8: Increment counter for summary at close + per-slug breakdown (pill-output.md audit). emptyPromptBodyCount++; + const ksKey = `${kind}:${slug}`; + emptyPromptBodyByKindSlug.set(ksKey, (emptyPromptBodyByKindSlug.get(ksKey) ?? 0) + 1); const phaseFromMeta = metadata && typeof metadata === 'object' && metadata !== null && 'phase' in metadata ? String((metadata as { phase?: unknown }).phase ?? '').trim() @@ -422,20 +454,19 @@ function writeToPromptLog( // avoid throwing from logger } // Pill / audit: record in prompts.log so CI and pill see empty-body events (not only stderr). - try { - if (promptLogStream) { - const stamp = new Date().toISOString(); - const phase = - metadata && typeof metadata === 'object' && metadata !== null && 'phase' in metadata - ? String((metadata as { phase?: unknown }).phase ?? '') - : ''; - const phasePart = phase ? ` phase=${JSON.stringify(phase)}` : ''; - const line = `--- PROMPTLOG_EMPTY_BODY slug=${slug} kind=${kind} label=${JSON.stringify(label)}${phasePart} at=${stamp} ---\n`; - promptLogStream.write(line); - } - } catch { - // ignore - } + // Pill / audit: standard ERROR block in prompts.log (not only stderr / one-line marker) so greps match. + const emptyMeta: Record = { + ...(metadata && typeof metadata === 'object' && metadata !== null ? { ...metadata } : {}), + emptyBody: true, + originalKind: kind, + }; + writeToPromptLog( + slug, + 'ERROR', + label, + `[empty-body] ${kind} refused: zero or whitespace-only content (see AGENTS.md prompts.log troubleshooting).`, + emptyMeta, + ); return; } const bodyToWrite = content; @@ -501,8 +532,10 @@ export function debugPrompt(label: string, prompt: string, metadata?: Record = { chars: prompt.length, requestId }; + if (metadata?.phase != null) promptLine.phase = metadata.phase; + debug(`PROMPT ${slug}`, promptLine); return slug; } @@ -545,7 +578,12 @@ export function debugResponse( writeFileSync(filepath, content, 'utf-8'); // Searchable one-liner in output.log - debug(`RESPONSE ${slug}`, { chars: response.length, ...(requestId ? { requestId } : {}) }); + const responseLine: Record = { + chars: response.length, + ...(requestId ? { requestId } : {}), + }; + if (metadata && metadata.phase != null) responseLine.phase = metadata.phase; + debug(`RESPONSE ${slug}`, responseLine); } /** diff --git a/shared/model-catalog.ts b/shared/model-catalog.ts index ccc95c7..8d6bb5f 100644 --- a/shared/model-catalog.ts +++ b/shared/model-catalog.ts @@ -123,6 +123,16 @@ export function loadModelProviderCatalog(path?: string): ModelProviderCatalog { warnCatalogOnce(warnKey, `Model catalog at ${p} is missing providers.openai/apiIds or providers.anthropic/apiIds — using empty catalog.`); return emptyCatalogFresh(); } + const sanitizeApiIds = (arr: unknown): string[] => + Array.isArray(arr) + ? arr.filter((x): x is string => typeof x === 'string' && x.trim().length > 0).map((x) => x.trim()) + : []; + catalog.providers.openai.apiIds = sanitizeApiIds(catalog.providers.openai.apiIds); + catalog.providers.anthropic.apiIds = sanitizeApiIds(catalog.providers.anthropic.apiIds); + if (catalog.providers.openai.apiIds.length === 0 && catalog.providers.anthropic.apiIds.length === 0) { + warnCatalogOnce(warnKey, `Model catalog at ${p} has no valid string entries in provider apiIds — using empty catalog.`); + return emptyCatalogFresh(); + } if (!catalog.lookup?.openaiHyphenless || !catalog.lookup?.anthropicHyphenless || !Array.isArray(catalog.lookup?.ambiguousHyphenless)) { warnCatalogOnce(warnKey, `Model catalog at ${p} is missing lookup tables — using empty catalog.`); return emptyCatalogFresh(); diff --git a/shared/path-utils.ts b/shared/path-utils.ts index c4ab906..9d21a28 100644 --- a/shared/path-utils.ts +++ b/shared/path-utils.ts @@ -1,11 +1,38 @@ /** - * Path helpers for PRR. Used when building allowedPaths / TARGET FILE(S) so we never - * send absolute or internal paths to the fixer (avoids "file outside workdir" and wasted LLM calls). + * Path helpers for PRR. Used when building allowedPaths / TARGET FILE(S) for the fixer and + * when normalizing review paths (diff prefixes, extension variants, fragments). + * + * ## Allowed paths — WHY open by default (Cycle 72) + * + * Historically we rejected any path whose first segment looked like an npm package name unless + * it appeared in a static `REPO_TOP_LEVEL` list. **WHY that seemed right:** comment bodies can + * mention paths such as `lodash/fp/merge.js` that must not become editable targets. In practice + * those paths almost never exist in the clone; `pathExists` / injection already fail safely. + * **What went wrong:** repos with legitimate top-level dirs not in the static set (`agent/`, + * `cmd/`, `contracts/`, …) had **every** issue on those files stripped from allowedPaths and + * injection — the model could not see the file, edits were rejected, and iterations burned with + * no progress. **Default today:** allow any repo-relative path that passes **hard deny** rules + * only (absolute paths, `node_modules`, `dist/`, `.cursor` / `.prr` / `root` segments). **Opt-in + * strict:** `PRR_STRICT_ALLOWED_PATHS=1` restores the first-segment heuristic; then + * `setDynamicRepoTopLevelDirs(prChangedFiles)` adds first segments from the PR diff so + * non-standard roots touched by the PR are still allowed without editing the static list. */ import { join } from 'path'; import { existsSync } from 'fs'; +/** + * Legacy “package-like first segment” filter for `isPathAllowedForFix`. + * + * **WHY it exists:** In strict mode, block paths whose first segment looks like an external + * package id (`foo-bar/baz`) unless it is in `REPO_TOP_LEVEL` or `dynamicRepoTopLevel`, to reduce + * noise from pasted dependency paths in review bodies. + * + * **WHY default is off:** Same heuristic blocked real monorepo roots; audits showed silent + * empty allowlists and wrong-file / couldNotInject churn outweighed the rare bad path case. + */ +const strictAllowedPaths = /^(1|true|yes)$/i.test(process.env.PRR_STRICT_ALLOWED_PATHS ?? ''); + /** Segments that indicate an internal path not under the repo (e.g. .cursor plans, .prr state). */ const INTERNAL_PATH_SEGMENTS = ['.cursor', '.prr', 'root']; @@ -15,6 +42,7 @@ const INTERNAL_PATH_SEGMENTS = ['.cursor', '.prr', 'root']; */ const EXTENSION_VARIANT_MAP: Record = { '.js': ['.json', '.ts', '.jsx', '.mjs', '.cjs'], + '.json': ['.js', '.ts', '.cjs', '.mjs'], '.ts': ['.tsx', '.js', '.json', '.mts', '.cts'], '.tsx': ['.ts', '.jsx'], '.jsx': ['.tsx', '.js'], @@ -66,7 +94,10 @@ export function stripGitDiffPathPrefix(rawPath: string): string { const rest = m[2]!; const first = rest.split('/')[0] ?? ''; if (!first) return t; - if (GIT_DIFF_PREFIX_STRIP_FIRST_SEGMENTS.has(first) || first.startsWith('@')) { + // WHY dynamicRepoTopLevel: under strict allowed paths, odd roots only strip when in REPO_TOP_LEVEL + // or PR changed files; open mode does not need it for allow checks but diff paths like + // `a/agent/foo.ts` still benefit from stripping once `setDynamicRepoTopLevelDirs` ran. + if (GIT_DIFF_PREFIX_STRIP_FIRST_SEGMENTS.has(first) || dynamicRepoTopLevel.has(first) || first.startsWith('@')) { return rest; } if (first === 'package.json' || first === 'pnpm-lock.yaml' || first === 'bun.lockb') { @@ -111,15 +142,57 @@ export function tryResolvePathWithExtensionVariants(workdir: string, path: strin return path; } -/** Top-level dirs that are typical repo source (not node_modules or external package refs from comments). */ +/** + * Typical first-segment names for repo source trees. **Used when `PRR_STRICT_ALLOWED_PATHS=1`:** + * together with `dynamicRepoTopLevel`, paths whose first segment matches `/^[a-z@][a-z0-9.-]*$/` + * must appear here or in the PR changed-file set or they are rejected. **WHY keep the set:** + * strict mode operators get predictable defaults without listing every possible root. **WHY not + * rely on this alone:** the list cannot cover every customer repo; open default + dynamic set + * covers the common failure mode (Cycle 72). + */ const REPO_TOP_LEVEL = new Set([ 'src', 'lib', 'app', 'apps', 'packages', 'plugins', 'scripts', 'test', 'tests', 'docs', 'build', 'tools', 'shared', '.github', 'config', 'public', 'components', 'db', 'migrations', 'api', 'server', 'client', 'examples', 'types', 'typings', 'benchmarks', - /** Common e2e / integration roots (TestCafe, Playwright, Cypress, etc.) — WHY: otherwise `isPathAllowedForFix` treats first segment as external package-like and strips paths from TARGET FILE(S). */ + /** E2e / integration roots — WHY: strict mode would otherwise reject Playwright/Cypress trees. */ 'e2e', 'playwright', 'cypress', 'fixtures', 'integration', 'wdio', ]); +/** + * First path segments seen on files changed in the PR (`git diff --name-only` base...HEAD). + * + * **WHY:** When `PRR_STRICT_ALLOWED_PATHS=1`, this extends `REPO_TOP_LEVEL` so roots that only + * appear in *this* PR (e.g. `agent/`) are not misclassified as “external package” paths. + * **WHY still call it when strict mode is off:** `stripGitDiffPathPrefix` uses the same set so + * unified-diff-style paths like `a/agent/foo.ts` normalize correctly after analysis runs. + */ +const dynamicRepoTopLevel = new Set(); + +/** + * Record PR top-level segments before building issues / prompts / runner allowlists. + * **Call site:** `processCommentsAndPrepareFixLoop` after resolving `changedFiles` (fresh diff or + * analysis cache), before `findUnresolvedIssues`. + * + * **WHY before analysis:** `getEffectiveAllowedPathsForNewIssue` → `filterAllowedPathsForFix` runs + * during issue construction; without this, strict mode + cache miss would filter valid targets. + */ +export function setDynamicRepoTopLevelDirs(changedFiles: string[]): void { + dynamicRepoTopLevel.clear(); + for (const file of changedFiles) { + const normalized = normalizeRepoPath(file); + const first = normalized.split('/')[0]; + if (!first || first === '.' || first === '..') continue; + if (first === 'node_modules' || first === 'dist') continue; + if (INTERNAL_PATH_SEGMENTS.some(seg => first === seg)) continue; + dynamicRepoTopLevel.add(first); + } +} + +/** Visible for testing. */ +export function getDynamicRepoTopLevelDirs(): ReadonlySet { + return dynamicRepoTopLevel; +} + /** * Normalize a path to forward slashes and trim (no leading ./ strip). * Use when comparing or splitting paths (e.g. segment count, prefix match). @@ -145,6 +218,9 @@ export type TrackedPathResolutionKind = | 'missing' | 'fragment'; +/** Dismissal when a review path cannot be mapped to a single tracked file (pill-output / AGENTS). */ +export type PathDismissCategory = 'missing-file' | 'path-unresolved' | 'path-fragment'; + /** * True when the review path cannot denote a single repo file (extension-only / bot fragments). * WHY: Distinguish from real root files like `.env` — do **not** use "starts with dot, no slash" @@ -175,16 +251,20 @@ export function shouldSkipFinalAuditLlmForPath(path: string | undefined | null): /** * When a tracked file is not found after resolution, pick a single dismissal category. * WHY: Same logical case must not flip between missing-file and path-unresolved (pill-output). + * **Fragments** (bare `.d.ts`, extension-only): **`path-fragment`**. **Ambiguous** basename matches: **`path-unresolved`**. */ export function pathDismissCategoryForNotFound( reviewPath: string, resolutionKind: TrackedPathResolutionKind -): 'missing-file' | 'path-unresolved' { - if (resolutionKind === 'fragment' || resolutionKind === 'ambiguous') return 'path-unresolved'; - if (isReviewPathFragment(reviewPath)) return 'path-unresolved'; +): PathDismissCategory { + if (resolutionKind === 'fragment' || isReviewPathFragment(reviewPath)) return 'path-fragment'; + if (resolutionKind === 'ambiguous') return 'path-unresolved'; return 'missing-file'; } +/** Alias for {@link pathDismissCategoryForNotFound} — single name for “not found” dismissal (pill-output). */ +export const dismissPathNotFound = pathDismissCategoryForNotFound; + /** * Fix URL-encoding artifacts in path segments (e.g. from GitHub links in comment bodies). * A segment like "2Fmessage-service.test.ts" comes from "%2Fmessage..." with % stripped; @@ -200,9 +280,17 @@ export function normalizePathSegmentEncoding(path: string): string { } /** - * True if the path is safe to use as an allowed path for the fixer (repo-relative, not internal). - * WHY: Comment bodies can contain absolute paths (e.g. /root/.cursor/plans/foo.plan.md). Adding - * those to allowedPaths causes "file outside workdir" and wasted LLM calls. + * Whether a string may appear in the fixer allowlist / injection set. + * + * **Always denied (hard rules — WHY):** + * - Absolute paths — would escape the clone or hit host paths from pasted plans. + * - `.cursor`, `.prr`, leading `root/` segment — tool state, not PR product code. + * - `node_modules` (anywhere), `dist/` prefix — generated or vendored; editing is unsafe/noisy. + * + * **Optional strict segment rule:** When `PRR_STRICT_ALLOWED_PATHS=1`, reject paths whose first + * segment looks like a package id unless it is in `REPO_TOP_LEVEL` or `dynamicRepoTopLevel`. + * **Default (strict off):** any other repo-relative path is allowed so reviews can target + * adjacent files and uncommon roots without silent stripping (see file-level WHY above). */ export function isPathAllowedForFix(path: string): boolean { if (!path || typeof path !== 'string') return false; @@ -213,15 +301,17 @@ export function isPathAllowedForFix(path: string): boolean { if (normalized.includes(`/${seg}/`) || normalized.startsWith(`${seg}/`)) return false; } if (normalized.includes('node_modules') || normalized.startsWith('dist/')) return false; - const first = normalized.split('/')[0]; - if (first && !REPO_TOP_LEVEL.has(first) && /^[a-z@][a-z0-9.-]*$/.test(first)) return false; + if (strictAllowedPaths) { + const first = normalized.split('/')[0]; + if (first && !REPO_TOP_LEVEL.has(first) && !dynamicRepoTopLevel.has(first) && /^[a-z@][a-z0-9.-]*$/.test(first)) return false; + } return true; } /** - * Filter an array of paths to only those allowed for fix (repo-relative, not internal). - * Normalizes path segment encoding (e.g. "2F" prefix from URL-encoded "/") so TARGET FILE(S) - * never show artifacts like "packages/.../2Fmessage-service.test.ts". + * Deduplicate and filter paths through `isPathAllowedForFix`. + * **WHY normalize encoding first:** GitHub-linked comments can leave `%2F` artifacts as `2F` in + * a segment; we fix that so TARGET FILE(S) and runner sets stay consistent (pill-output audit). */ export function filterAllowedPathsForFix(paths: string[]): string[] { const normalized = paths diff --git a/shared/prompt-budget.ts b/shared/prompt-budget.ts new file mode 100644 index 0000000..db161a8 --- /dev/null +++ b/shared/prompt-budget.ts @@ -0,0 +1,212 @@ +/** + * Shared prompt / code context budgeting: one place to derive how many characters of + * file content fit for a model, and line-centered fitting when content exceeds the budget. + * + * WHY centralize: Output.log audits showed different code paths each had their own “max snippet + * chars” or line counts. They drifted — one path sent 80k+ to a 32k-window model (opaque 500s), + * another trimmed so aggressively the review line never appeared (false STALE / wrong YES). + * **`computeBudget`** ties the cap to **`getMaxElizacloudLlmCompleteInputChars`** / fix-prompt + * ceilings so changing defaults or models updates every consumer. **`reservedChars`** is the + * caller’s estimate of non-file text (instructions, comment bodies, diff wrappers); **`divisor`** + * splits the remainder across N injected slots (e.g. N fixes in one verify batch). + * + * WHY **`fitToBudget`**: When the whole file does not fit, we prefer a **line-centered** excerpt + * on the GitHub review line or a **keyword anchor** from the comment body — not only “first N + * lines”, which hid tail bugs and drove false final-audit UNFIXED (see DEVELOPMENT.md). + * + * Consumers: **`issue-analysis-snippet-helpers`**, **`issue-analysis-snippets`**, **`LLMClient`** + * batch verify, **`fix-verification`** (`getCurrentCodeAtLine`). Tests: **`tests/prompt-budget.test.ts`**. + */ +import { formatNumber } from './logger.js'; +import { + ELIZACLOUD_LLM_COMPLETE_INPUT_OVERHEAD_CHARS, + getMaxElizacloudLlmCompleteInputChars, + getMaxFixPromptCharsForModel, +} from './llm/model-context-limits.js'; + +/** Hard rail: never treat more than this as "full file" for budgeting (pathological files). */ +export const PROMPT_BUDGET_MAX_FULL_FILE_CHARS = 500_000; + +export function inputCeilingCharsForModel(model: string | undefined): number { + const m = model?.trim(); + if (!m) return getMaxElizacloudLlmCompleteInputChars('openai/gpt-4o-mini'); + if (m.includes('/') || m.startsWith('Qwen/')) return getMaxElizacloudLlmCompleteInputChars(m); + return getMaxFixPromptCharsForModel('openai', m) + ELIZACLOUD_LLM_COMPLETE_INPUT_OVERHEAD_CHARS; +} + +export interface ComputeBudgetOptions { + model?: string; + /** Non-code prompt chars to reserve (instructions, comment, diff wrappers, etc.). */ + reservedChars: number; + /** Split remaining code budget across N slots (e.g. N fixes in one verify batch). */ + divisor?: number; +} + +export function computeBudget(opts: ComputeBudgetOptions): { + availableForCode: number; + inputCeilingChars: number; +} { + const ceiling = inputCeilingCharsForModel(opts.model); + const div = Math.max(1, opts.divisor ?? 1); + const raw = Math.floor((ceiling - opts.reservedChars) / div); + const available = Math.max(3_000, Math.min(raw, PROMPT_BUDGET_MAX_FULL_FILE_CHARS)); + return { availableForCode: available, inputCeilingChars: ceiling }; +} + +/** + * Per-fix cap for "current code" in batch verify — aligns with buildBatchVerifyPrompt + * (comment + diff + template overhead per fix). + */ +export function computePerFixVerifyCurrentCodeBudget(model: string | undefined, fixesInBatch: number): number { + const n = Math.max(1, fixesInBatch); + const overheadPerFix = 4_500; + const batchTemplate = 12_000; + const { availableForCode } = computeBudget({ + model, + reservedChars: batchTemplate + n * overheadPerFix, + divisor: n, + }); + return Math.max(4_000, Math.min(availableForCode, 20_000)); +} + +export interface FitToBudgetResult { + content: string; + truncated: boolean; +} + +/** + * Line-numbered excerpt of `rawFileContent` within `maxChars`, centered on `anchorLine1Based` + * when possible. Uses keyword anchor from `commentBody` when anchor is unknown. + */ +export function fitToBudget( + rawFileContent: string, + anchorLine1Based: number | null, + maxChars: number, + opts?: { + commentBody?: string; + findKeywordAnchor?: (lines: string[], body: string) => number | null; + } +): FitToBudgetResult { + const lines = rawFileContent.split('\n'); + const numbered = (from: number, to: number) => + lines.slice(from, to).map((l, i) => `${from + i + 1}: ${l}`).join('\n'); + + let anchor = anchorLine1Based != null && anchorLine1Based > 0 && anchorLine1Based <= lines.length ? anchorLine1Based : null; + if (anchor === null && opts?.commentBody && opts.findKeywordAnchor) { + const k = opts.findKeywordAnchor(lines, opts.commentBody); + if (k != null) anchor = k; + } + + const full = numbered(0, lines.length); + if (full.length <= maxChars) { + return { content: full, truncated: false }; + } + + if (anchor === null) { + const avg = 48; + let n = Math.min(lines.length, Math.max(20, Math.floor(maxChars / avg))); + let body = numbered(0, n); + const note = `\n... (${formatNumber(lines.length - n)} more lines omitted — file exceeds budget; no line anchor)`; + let out = body + note; + if (out.length > maxChars) out = out.slice(0, maxChars - 40) + '\n... (truncated)'; + return { content: out, truncated: true }; + } + + let before = 80; + let after = 120; + const build = () => { + const start = Math.max(0, anchor! - before - 1); + const end = Math.min(lines.length, anchor! + after); + const body = numbered(start, end); + const foot = `\n... (excerpt — ${formatNumber(lines.length)} lines; centered on line ${formatNumber(anchor!)})`; + return body + foot; + }; + let text = build(); + while (text.length > maxChars && (before > 10 || after > 10)) { + before = Math.max(10, Math.floor(before * 0.82)); + after = Math.max(10, Math.floor(after * 0.82)); + text = build(); + } + if (text.length > maxChars) { + text = text.slice(0, maxChars - 60) + '\n... (truncated to char budget)'; + } + return { content: text, truncated: true }; +} + +/** + * Shrink an already line-numbered snippet toward `anchorLine` to fit `maxChars`. + * Preserves trailing "(end of file)" / "(truncated — file has …)" footer lines when present. + */ +export function truncateNumberedCodeAroundAnchor( + rawNumberedSnippet: string, + anchorLine: number | null | undefined, + maxChars: number +): string { + if (rawNumberedSnippet.length <= maxChars) return rawNumberedSnippet; + const lines = rawNumberedSnippet.split('\n'); + const footerLines: string[] = []; + const bodyLines = [...lines]; + while (bodyLines.length > 0) { + const last = bodyLines[bodyLines.length - 1] ?? ''; + if ( + /^\(end of file — \d+ lines total\)\s*$/.test(last) || + /^\.\.\. \(truncated — file has \d+ lines total\)\s*$/.test(last) + ) { + footerLines.unshift(last); + bodyLines.pop(); + continue; + } + break; + } + type Row = { lineNum: number; text: string }; + const rows: Row[] = []; + for (const text of bodyLines) { + const m = text.match(/^(\d+):\s?(.*)$/); + if (m) { + rows.push({ lineNum: parseInt(m[1]!, 10), text: m[2] ?? '' }); + } + } + if (rows.length === 0) { + return rawNumberedSnippet.substring(0, Math.max(0, maxChars - 80)) + '\n... (truncated — snippet was cut for prompt size)'; + } + let center = Math.floor(rows.length / 2); + if (anchorLine != null && anchorLine > 0) { + let best = 0; + let bestDist = Infinity; + for (let k = 0; k < rows.length; k++) { + const d = Math.abs(rows[k]!.lineNum - anchorLine); + if (d < bestDist) { + bestDist = d; + best = k; + } + } + center = best; + } + let lo = center; + let hi = center; + const sliceText = () => rows.slice(lo, hi + 1).map((r) => r.text).join('\n'); + let chunk = sliceText(); + const note = '\n... (truncated — centered on review line for prompt budget)'; + const maxBody = Math.max(400, maxChars - note.length - footerLines.reduce((s, l) => s + l.length + 1, 0)); + while (chunk.length < maxBody && (lo > 0 || hi < rows.length - 1)) { + const canHi = hi < rows.length - 1; + const canLo = lo > 0; + if (canHi && (!canLo || hi - center <= center - lo)) hi++; + else if (canLo) lo--; + else if (canHi) hi++; + else break; + const next = sliceText(); + if (next.length > maxBody) break; + chunk = next; + } + while (chunk.length > maxBody && lo < hi) { + if (hi - center >= center - lo) hi--; + else lo--; + chunk = sliceText(); + } + if (chunk.length > maxBody) { + chunk = chunk.substring(0, Math.max(0, maxBody - 60)) + '\n...'; + } + const footer = footerLines.length > 0 ? '\n' + footerLines.join('\n') : ''; + return chunk + note + footer; +} diff --git a/shared/prr-runtime-meta.ts b/shared/prr-runtime-meta.ts index 48c8de2..508fad4 100644 --- a/shared/prr-runtime-meta.ts +++ b/shared/prr-runtime-meta.ts @@ -3,7 +3,9 @@ * * WHY: Operators need to confirm which tool revision ran (especially when PRR is vendored * or only dist/ is copied). We show package.json version always; revision from env or - * `git rev-parse` only when `.git` exists in the prr package root. + * `git rev-parse` only when a `.git` file or directory exists on the prr package root + * **or any parent directory** (up to a depth cap), so vendored `milady/prr/` layouts still + * show a revision when the host repo root has `.git`. * * WHY not GITHUB_SHA: In downstream workflows that runs inside another repo, GITHUB_SHA is * that repo's head — not the PRR checkout. Use PRR_GIT_SHA when you want to stamp the PRR commit. @@ -38,9 +40,26 @@ export function getPrrPackageRoot(): string { return join(getModuleDir(), '..', '..'); } -/** True when the prr root looks like a git checkout (`.git` file or dir, including worktrees). */ +const MAX_GIT_METADATA_WALK = 32; + +/** + * First directory at or above the prr package root that contains `.git` (file or dir). + * WHY: PRR may live under a monorepo (`host/prr/`) while `.git` is only at `host/`. + */ +export function findPrrGitMetadataDir(): string | undefined { + let dir = getPrrPackageRoot(); + for (let i = 0; i < MAX_GIT_METADATA_WALK; i++) { + if (existsSync(join(dir, '.git'))) return dir; + const parent = dirname(dir); + if (parent === dir) break; + dir = parent; + } + return undefined; +} + +/** True when some ancestor of the prr package root is a git work tree (`.git` file or dir). */ export function hasPrrGitMetadata(): boolean { - return existsSync(join(getPrrPackageRoot(), '.git')); + return findPrrGitMetadataDir() !== undefined; } let cachedVersion: string | undefined; @@ -66,8 +85,9 @@ function normalizeRevDisplay(raw: string): string { /** * Optional source revision: PRR_GIT_SHA or PRR_SOURCE_COMMIT (short or full SHA), else - * `git rev-parse --short HEAD` in the prr package root **only if** `.git` exists there - * (avoids calling git for vendored / npm-packaged trees with no repo metadata). + * `git rev-parse --short HEAD` with cwd at the prr package root **only if** a `.git` exists + * on the package root or a parent (see `findPrrGitMetadataDir`). Git discovers the repo from + * any path inside the work tree (avoids calling git when there is no enclosing repo). */ export function getPrrSourceRevision(): string | undefined { const env = process.env.PRR_GIT_SHA?.trim() || process.env.PRR_SOURCE_COMMIT?.trim(); @@ -85,7 +105,7 @@ export function getPrrSourceRevision(): string | undefined { } } -/** CI hint only when prr package root has no `.git` and no env stamp (e.g. prr as subfolder of another repo). */ +/** CI hint when no `.git` is found walking up from the prr package root and no env stamp. */ export function shouldSuggestPrrGitShaInCi(): boolean { if (process.env.CI !== 'true') return false; if (process.env.PRR_GIT_SHA?.trim() || process.env.PRR_SOURCE_COMMIT?.trim()) return false; diff --git a/shared/runners/llm-api.ts b/shared/runners/llm-api.ts index 4365621..18e2055 100644 --- a/shared/runners/llm-api.ts +++ b/shared/runners/llm-api.ts @@ -4,12 +4,13 @@ import { mkdir } from 'fs/promises'; import type { Runner, RunnerResult, RunnerOptions, RunnerStatus } from './types.js'; import { DEFAULT_MODEL_ROTATIONS } from './types.js'; import chalk from 'chalk'; -import { debug, debugPrompt, debugResponse } from '../logger.js'; +import { debug, debugPrompt, debugPromptError, debugResponse, formatNumber } from '../logger.js'; import Anthropic from '@anthropic-ai/sdk'; import OpenAI from 'openai'; -import { DEFAULT_ANTHROPIC_MODEL, DEFAULT_ELIZACLOUD_MODEL, DEFAULT_OPENAI_MODEL, ELIZACLOUD_API_BASE_URL, LLM_REQUEST_TIMEOUT_MS, LLM_REQUEST_TIMEOUT_FULL_FILE_MS, MAX_FIX_PROMPT_CHARS, MAX_ENRICHED_FIX_PROMPT_CHARS, MAX_ENRICHED_FIX_PROMPT_HARD_CAP, REWRITE_ESCALATION_RESERVE_CHARS } from '../constants.js'; -import { getMaxFixPromptCharsForModel, lowerModelMaxPromptChars } from '../llm/model-context-limits.js'; +import { DEFAULT_ANTHROPIC_MODEL, DEFAULT_ELIZACLOUD_MODEL, DEFAULT_OPENAI_MODEL, ELIZACLOUD_API_BASE_URL, getLlmApiRequestTimeoutMs, LLM_REQUEST_TIMEOUT_MS, MAX_FIX_PROMPT_CHARS, MAX_ENRICHED_FIX_PROMPT_CHARS, MAX_ENRICHED_FIX_PROMPT_HARD_CAP, REWRITE_ESCALATION_RESERVE_CHARS } from '../constants.js'; +import { getMaxFixPromptCharsForModel, getMaxElizacloudHardInputCeiling, lowerModelMaxPromptChars } from '../llm/model-context-limits.js'; import { createElizaCloudOpenAIClient } from '../llm/elizacloud.js'; +import { openAiChatCompletionContentToString } from '../llm/openai-chat-content.js'; import { acquireElizacloud, releaseElizacloud, notifyRateLimitHit } from '../llm/rate-limit.js'; import { normalizePathForAllow, normalizeRepoPath } from '../path-utils.js'; @@ -455,8 +456,9 @@ Working directory: ${workdir}`; throw new Error(`Prompt too large (${enrichedPrompt.length.toLocaleString()} chars, max ${maxEnrichedChars.toLocaleString()} for ${model}). Reduce batch size or file count.`); } - // Full-file rewrite prompts are larger; use a longer timeout so the request can complete. - const requestTimeoutMs = rewriteFiles.length > 0 ? LLM_REQUEST_TIMEOUT_FULL_FILE_MS : LLM_REQUEST_TIMEOUT_MS; + const isFullFileRewrite = rewriteFiles.length > 0; + const requestTimeoutMs = getLlmApiRequestTimeoutMs(enrichedPrompt.length, isFullFileRewrite); + debug('Request timeout for this call', { timeoutMs: requestTimeoutMs, isFullFileRewrite }); // Cooldown: after 3+ consecutive 504/timeouts, pause so gateway can recover. if (this.consecutive504Count >= CONSECUTIVE_504_COOLDOWN_THRESHOLD) { @@ -472,9 +474,9 @@ Working directory: ${workdir}`; if (this.provider === 'anthropic' && anthropic) { const model = options?.model || DEFAULT_ANTHROPIC_MODEL; - debug('Calling Anthropic API', { model }); + debug('Calling Anthropic API', { model, timeoutMs: requestTimeoutMs }); - console.log(`\n🧠 Calling ${model}...\n`); + console.log(`\n🧠 Calling ${model} (timeout ${Math.round(requestTimeoutMs / 1000)}s)...\n`); const maxTokens = getAnthropicMaxTokens(model); const result = await with504Retry( @@ -498,9 +500,9 @@ Working directory: ${workdir}`; outputTokens: result.usage.output_tokens, }); } else if ((this.provider === 'elizacloud' || this.provider === 'openai') && openai) { - debug(`Calling ${this.provider === 'elizacloud' ? 'ElizaCloud' : 'OpenAI'} API`, { model }); + debug(`Calling ${this.provider === 'elizacloud' ? 'ElizaCloud' : 'OpenAI'} API`, { model, timeoutMs: requestTimeoutMs }); - console.log(`\n🧠 Calling ${model}...\n`); + console.log(`\n🧠 Calling ${model} (timeout ${Math.round(requestTimeoutMs / 1000)}s)...\n`); if (this.provider === 'elizacloud') { await acquireElizacloud(); @@ -521,7 +523,7 @@ Working directory: ${workdir}`; requestTimeoutMs ); - response = result.choices[0]?.message?.content || ''; + response = openAiChatCompletionContentToString(result.choices[0]?.message?.content); debug(`${this.provider === 'elizacloud' ? 'ElizaCloud' : 'OpenAI'} response received`, { inputTokens: result.usage?.prompt_tokens, @@ -540,7 +542,18 @@ Working directory: ${workdir}`; }; } - debugResponse(promptSlug, 'llm-api-fix', response, { workdir, model: options?.model, responseLength: response.length }); + if (!response.trim()) { + debugPromptError(promptSlug, 'llm-api-fix', 'Empty or whitespace-only LLM response body (HTTP success; cannot write RESPONSE to prompts.log).', { + workdir, + model: options?.model, + emptyBody: true, + }); + console.warn( + chalk.yellow(` ⚠ llm-api: empty response body from model — prompts.log ERROR entry pairs with this request’s PROMPT slug.`), + ); + } else { + debugResponse(promptSlug, 'llm-api-fix', response, { workdir, model: options?.model, responseLength: response.length }); + } // Parse and apply file changes (pass escalated files so blocks are applied even when S/R ran) const applyResult = await this.applyFileChanges(workdir, response, rewriteFiles, options?.allowedPathsForBatch); @@ -574,7 +587,17 @@ Working directory: ${workdir}`; }; } // No change blocks at all (no noMeaningfulChanges, no disallowed) — LLM didn't emit changes. - console.log(' No file changes extracted from LLM response'); + const tail = response.replace(/\s+$/, '').slice(-600); + debug('No file changes extracted — response tail (for prompts.log correlation)', { + responseChars: response.length, + tailChars: tail.length, + tail, + }); + console.log( + chalk.gray( + ` No file changes extracted from LLM response (${formatNumber(response.length)} chars; tail logged at debug — set PRR_DEBUG or check prompts.log)`, + ), + ); this.consecutive504Count = 0; return { success: true, @@ -607,6 +630,11 @@ Working directory: ${workdir}`; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); debug('LLM API error', { error: errorMessage }); + debugPromptError(promptSlug, 'llm-api-fix', errorMessage.slice(0, 12_000), { + workdir, + model: options?.model, + status: (error as { status?: number })?.status, + }); const status = (error as { status?: number })?.status; if (status === 429 || /429|Too many requests|rate limit/i.test(errorMessage)) { @@ -617,8 +645,14 @@ Working directory: ${workdir}`; if (is504OrTimeout) { this.consecutive504Count++; if (this.provider === 'elizacloud' && model) { - lowerModelMaxPromptChars(this.provider ?? 'elizacloud', model, enrichedPrompt.length); - debug('Lowered prompt cap for model after timeout', { model, sentChars: enrichedPrompt.length }); + const hardCeiling = getMaxElizacloudHardInputCeiling(model); + const promptRatio = enrichedPrompt.length / hardCeiling; + if (promptRatio > 0.3) { + lowerModelMaxPromptChars(this.provider ?? 'elizacloud', model, enrichedPrompt.length); + debug('Lowered prompt cap for model after timeout', { model, sentChars: enrichedPrompt.length, promptRatio: promptRatio.toFixed(2) }); + } else { + debug('Timeout on small prompt relative to context — not lowering cap', { model, sentChars: enrichedPrompt.length, hardCeiling, promptRatio: promptRatio.toFixed(2) }); + } } // De-escalate full-file rewrite so next attempt uses smaller prompt and may complete. if (rewriteFiles.length > 0) { diff --git a/tests/dedup-group-overlap.test.ts b/tests/dedup-group-overlap.test.ts new file mode 100644 index 0000000..f3e4870 --- /dev/null +++ b/tests/dedup-group-overlap.test.ts @@ -0,0 +1,43 @@ +import { describe, it, expect } from 'vitest'; +import { resolveOverlappingDedupGroupsByIndex } from '../tools/prr/workflow/issue-analysis-dedup.js'; +import type { ReviewComment } from '../tools/prr/github/types.js'; + +function c(id: string, line: number | null, body = 'x'): { comment: ReviewComment; codeSnippet: string } { + return { + comment: { + id, + path: 'f.ts', + line, + body, + author: 'bot', + threadId: `t-${id}`, + databaseId: 1, + createdAt: new Date().toISOString(), + } as ReviewComment, + codeSnippet: '', + }; +} + +describe('resolveOverlappingDedupGroupsByIndex', () => { + it('keeps first group when the same index appears in a later GROUP', () => { + const items = [c('a', 1, 'longer body wins if needed'), c('b', 1), c('c', 2)]; + const groups = [ + { canonical: items[0]!, dupes: [items[1]!] }, + { canonical: items[2]!, dupes: [items[1]!] }, + ]; + const out = resolveOverlappingDedupGroupsByIndex(groups, items); + expect(out).toHaveLength(1); + expect(out[0]!.canonical.comment.id).toBe('a'); + expect(out[0]!.dupes.map((d) => d.comment.id).sort()).toEqual(['b']); + }); + + it('keeps two disjoint groups', () => { + const items = [c('a', 1), c('b', 1), c('c', 2), c('d', 2)]; + const groups = [ + { canonical: items[0]!, dupes: [items[1]!] }, + { canonical: items[2]!, dupes: [items[3]!] }, + ]; + const out = resolveOverlappingDedupGroupsByIndex(groups, items); + expect(out).toHaveLength(2); + }); +}); diff --git a/tests/dependency-graph.test.ts b/tests/dependency-graph.test.ts new file mode 100644 index 0000000..8654685 --- /dev/null +++ b/tests/dependency-graph.test.ts @@ -0,0 +1,126 @@ +import { mkdtemp, mkdir, writeFile } from 'fs/promises'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { describe, expect, test } from 'vitest'; + +import { extractImports, detectDepScanLang } from '../shared/dependency-graph/import-scanner.js'; +import { resolveSpecifier, type LangContext } from '../shared/dependency-graph/specifier-resolver.js'; +import { + getDirectoryNeighbors, + getFilenamePatternMatches, +} from '../shared/dependency-graph/proximity.js'; +import { + buildDependencyGraph, + computeBlastRadius, + isInBlastRadius, +} from '../shared/dependency-graph/graph.js'; + +async function tempWorkdir(): Promise { + return mkdtemp(join(tmpdir(), 'prr-depgraph-')); +} + +describe('import-scanner', () => { + test('detectDepScanLang', () => { + expect(detectDepScanLang('x.ts')).toBe('ts'); + expect(detectDepScanLang('x.tsx')).toBe('ts'); + expect(detectDepScanLang('x.py')).toBe('python'); + expect(detectDepScanLang('x.go')).toBe('go'); + expect(detectDepScanLang('x.rs')).toBe('rust'); + expect(detectDepScanLang('x.java')).toBe('java'); + expect(detectDepScanLang('x.kt')).toBe('kotlin'); + expect(detectDepScanLang('x.rb')).toBe('ruby'); + expect(detectDepScanLang('x.php')).toBe('php'); + expect(detectDepScanLang('README.md')).toBeNull(); + }); + + test('extractImports TS multi-line destructured', () => { + const src = `import { + foo, + bar, +} from './utils'; +import './side.css'; +`; + expect(extractImports('m.ts', src).sort()).toEqual(['./utils', './side.css'].sort()); + }); + + test('extractImports Go import block', () => { + const src = `package main +import ( + "fmt" + x "github.com/foo/bar" +) +`; + const specs = extractImports('m.go', src); + expect(specs).toContain('fmt'); + expect(specs).toContain('github.com/foo/bar'); + }); + + test('extractImports Python', () => { + const src = 'import os\nfrom .utils import x\n'; + const specs = extractImports('m.py', src); + expect(specs).toContain('os'); + expect(specs.some((s) => s.includes('utils'))).toBe(true); + }); +}); + +describe('specifier-resolver', () => { + test('resolve TS relative', async () => { + const workdir = await tempWorkdir(); + await writeFile(join(workdir, 'a.ts'), ''); + await writeFile(join(workdir, 'b.ts'), ''); + const ctx: LangContext = {}; + expect(await resolveSpecifier('./b', 'a.ts', 'ts', workdir, ctx)).toBe('b.ts'); + }); + + test('resolve Rust mod', async () => { + const workdir = await tempWorkdir(); + await mkdir(join(workdir, 'src'), { recursive: true }); + await writeFile(join(workdir, 'src', 'lib.rs'), ''); + await writeFile(join(workdir, 'src', 'foo.rs'), ''); + const ctx: LangContext = {}; + expect(await resolveSpecifier('foo', 'src/lib.rs', 'rust', workdir, ctx)).toBe('src/foo.rs'); + }); +}); + +describe('proximity', () => { + test('getDirectoryNeighbors respects cap', () => { + const seeds = ['src/a.ts']; + const many = ['src/a.ts', ...Array.from({ length: 40 }, (_, i) => `src/f${i}.ts`)]; + const m = getDirectoryNeighbors(seeds, many, 30); + expect(m.size).toBe(0); + }); + + test('getFilenamePatternMatches links test file', () => { + const seeds = ['components/Button.tsx']; + const all = ['components/Button.tsx', 'components/Button.test.tsx', 'components/Other.tsx']; + const m = getFilenamePatternMatches(seeds, all); + expect(m.has('components/Button.test.tsx')).toBe(true); + expect(m.has('components/Other.tsx')).toBe(false); + }); +}); + +describe('graph', () => { + test('buildDependencyGraph and computeBlastRadius', async () => { + const workdir = await tempWorkdir(); + await writeFile( + join(workdir, 'a.ts'), + `import { x } from './b'; +export { x } from './c'; +`, + ); + await writeFile(join(workdir, 'b.ts'), 'export const x = 1;\n'); + await writeFile(join(workdir, 'c.ts'), 'export const x = 1;\n'); + + const graph = await buildDependencyGraph(workdir, { + fileList: ['a.ts', 'b.ts', 'c.ts'], + timeoutMs: 30_000, + maxFiles: 5000, + }); + expect(graph.edgeCount).toBeGreaterThanOrEqual(2); + + const radius = computeBlastRadius(graph, ['b.ts'], 2, ['a.ts', 'b.ts', 'c.ts']); + expect(radius.get('b.ts')).toBe(0); + expect(radius.get('a.ts')).toBeDefined(); + expect(isInBlastRadius('a.ts', radius)).toBe(true); + }); +}); diff --git a/tests/dismiss-duplicate-cluster.test.ts b/tests/dismiss-duplicate-cluster.test.ts new file mode 100644 index 0000000..53b03f7 --- /dev/null +++ b/tests/dismiss-duplicate-cluster.test.ts @@ -0,0 +1,314 @@ +import { describe, it, expect } from 'vitest'; +import type { StateContext } from '../tools/prr/state/state-context.js'; +import type { ResolverState } from '../tools/prr/state/types.js'; +import type { ReviewComment } from '../tools/prr/github/types.js'; +import * as Dismissed from '../tools/prr/state/state-dismissed.js'; +import { + buildMergedDuplicatesForAnchor, + dismissDuplicateCluster, + dismissDuplicateClusterFromComments, + getPersistedDedupMapForCommentSet, + propagateStatusToDuplicates, + resolveDuplicateMapForRecovery, + resolveEffectiveDuplicateMapForComments, + mergeCommentsForClusterDismiss, + getClusterIdsAccountedOnState, + type DedupResult, +} from '../tools/prr/workflow/issue-analysis-dedup.js'; +import * as CommentStatus from '../tools/prr/state/state-comment-status.js'; + +function review(id: string, path: string): ReviewComment { + return { + id, + threadId: 't1', + author: 'bot', + body: `body ${id}`, + path, + line: 1, + createdAt: '2020-01-01T00:00:00Z', + }; +} + +function makeCtx(): StateContext { + const state: ResolverState = { + pr: 'o/r#1', + branch: 'main', + headSha: 'abc', + startedAt: 's', + lastUpdated: 'u', + lessonsLearned: [], + iterations: [{ timestamp: 't', commentsAddressed: [], changesMade: [], verificationResults: {} }], + verifiedComments: [], + verifiedFixed: [], + dismissedIssues: [], + commentStatuses: {}, + } as ResolverState; + return { statePath: '/tmp/dismiss-cluster-test', state, currentPhase: 'test' }; +} + +describe('dismissDuplicateCluster', () => { + it('dismisses anchor and all dedup siblings with per-comment paths', () => { + const ctx = makeCtx(); + const anchor = review('c1', 'a.ts'); + const dup = review('d1', 'b.ts'); + const map = new Map([['c1', ['d1']]]); + const duplicateItems = new Map([ + [ + 'd1', + { + comment: dup, + codeSnippet: '', + }, + ], + ]); + + dismissDuplicateCluster(ctx, anchor, map, duplicateItems, 'same issue', 'stale'); + + expect(Dismissed.isCommentDismissed(ctx, 'c1')).toBe(true); + expect(Dismissed.isCommentDismissed(ctx, 'd1')).toBe(true); + const d1 = Dismissed.getDismissedIssue(ctx, 'd1'); + expect(d1?.filePath).toBe('b.ts'); + }); +}); + +describe('getPersistedDedupMapForCommentSet', () => { + it('returns duplicate map when cache key and schema match', () => { + const ctx = makeCtx(); + ctx.state!.dedupCache = { + commentIds: 'a,b', + schema: 'dedup-v2', + duplicateMap: { a: ['b'] }, + dedupedIds: ['a'], + }; + const m = getPersistedDedupMapForCommentSet(ctx, 'a,b'); + expect(m?.get('a')).toEqual(['b']); + }); + + it('returns undefined when comment id key differs', () => { + const ctx = makeCtx(); + ctx.state!.dedupCache = { + commentIds: 'x', + schema: 'dedup-v2', + duplicateMap: { x: [] }, + dedupedIds: ['x'], + }; + expect(getPersistedDedupMapForCommentSet(ctx, 'a,b')).toBeUndefined(); + }); +}); + +describe('resolveEffectiveDuplicateMapForComments', () => { + it('returns in-memory map when non-empty', () => { + const ctx = makeCtx(); + const mem = new Map([['a', ['b']]]); + const a = review('a', 'x.ts'); + const b = review('b', 'y.ts'); + expect(resolveEffectiveDuplicateMapForComments(ctx, mem, [a, b])).toBe(mem); + }); + + it('falls back to persisted cache when duplicateMap is empty', () => { + const ctx = makeCtx(); + ctx.state!.dedupCache = { + commentIds: 'a,b', + schema: 'dedup-v2', + duplicateMap: { a: ['b'] }, + dedupedIds: ['a'], + }; + const a = review('a', 'x.ts'); + const b = review('b', 'y.ts'); + const empty = new Map(); + const eff = resolveEffectiveDuplicateMapForComments(ctx, empty, [a, b]); + expect(eff?.get('a')).toEqual(['b']); + }); + + it('returns undefined when no map and no matching cache', () => { + const ctx = makeCtx(); + const a = review('a', 'x.ts'); + expect(resolveEffectiveDuplicateMapForComments(ctx, undefined, [a])).toBeUndefined(); + }); +}); + +describe('resolveDuplicateMapForRecovery', () => { + it('uses persisted cache when session map is empty and allComments omitted', () => { + const ctx = makeCtx(); + ctx.state!.dedupCache = { + commentIds: 'a,b', + schema: 'dedup-v2', + duplicateMap: { a: ['b'] }, + dedupedIds: ['a'], + }; + const m = resolveDuplicateMapForRecovery(ctx, undefined, undefined); + expect(m?.get('a')).toEqual(['b']); + }); + + it('does not use persisted cache when allComments key disagrees with cache', () => { + const ctx = makeCtx(); + ctx.state!.dedupCache = { + commentIds: 'a,b', + schema: 'dedup-v2', + duplicateMap: { a: ['b'] }, + dedupedIds: ['a'], + }; + const x = review('x', 'z.ts'); + const m = resolveDuplicateMapForRecovery(ctx, undefined, [x]); + expect(m).toBeUndefined(); + }); +}); + +describe('buildMergedDuplicatesForAnchor', () => { + it('uses effective cluster when duplicateMap empty and dedup cache matches', () => { + const ctx = makeCtx(); + ctx.state!.dedupCache = { + commentIds: 'a,b', + schema: 'dedup-v2', + duplicateMap: { a: ['b'] }, + dedupedIds: ['a'], + }; + const a = review('a', 'x.ts'); + const b = review('b', 'y.ts'); + const eff = resolveEffectiveDuplicateMapForComments(ctx, new Map(), [a, b]); + const rows = buildMergedDuplicatesForAnchor('a', eff, new Map(), [a, b]); + expect(rows).toEqual([expect.objectContaining({ commentId: 'b', path: 'y.ts' })]); + }); + + it('prefers duplicateItems over allComments when both exist', () => { + const a = review('a', 'x.ts'); + const b = review('b', 'from-comments.ts'); + const map = new Map([['a', ['b']]]); + const duplicateItems: DedupResult['duplicateItems'] = new Map([ + [ + 'b', + { + comment: { ...b, path: 'from-dedup-item.ts' }, + codeSnippet: '', + }, + ], + ]); + const rows = buildMergedDuplicatesForAnchor('a', map, duplicateItems, [a, b]); + expect(rows?.[0]?.path).toBe('from-dedup-item.ts'); + }); +}); + +describe('propagateStatusToDuplicates', () => { + it('propagates to canonical when the analyzed row is a duplicate (map keyed by canonical)', () => { + const ctx = makeCtx(); + const c = review('c1', 'a.ts'); + const d = review('d1', 'b.ts'); + const dedupResult: DedupResult = { + dedupedToCheck: [], + duplicateMap: new Map([['c1', ['d1']]]), + duplicateItems: new Map([ + ['c1', { comment: c, codeSnippet: '' }], + ['d1', { comment: d, codeSnippet: '' }], + ]), + }; + const hashes = new Map([ + ['a.ts', 'ha'], + ['b.ts', 'hb'], + ]); + CommentStatus.markOpen(ctx, 'd1', 'exists', 'dup analyzed', 2, 2, 'b.ts', 'hb'); + propagateStatusToDuplicates( + ctx, + 'd1', + dedupResult, + hashes, + { kind: 'open', classification: 'exists', explanation: 'dup analyzed', importance: 2, ease: 2 }, + [c, d], + ); + expect(CommentStatus.getStatus(ctx, 'c1')?.status).toBe('open'); + expect(CommentStatus.getStatus(ctx, 'c1')?.filePath).toBe('a.ts'); + }); + + it('uses persisted dedup cache when duplicateMap is empty', () => { + const ctx = makeCtx(); + ctx.state!.dedupCache = { + commentIds: 'a,b', + schema: 'dedup-v2', + duplicateMap: { a: ['b'] }, + dedupedIds: ['a'], + }; + const a = review('a', 'x.ts'); + const b = review('b', 'y.ts'); + const dedupResult: DedupResult = { + dedupedToCheck: [], + duplicateMap: new Map(), + duplicateItems: new Map(), + }; + const hashes = new Map([ + ['x.ts', 'hx'], + ['y.ts', 'hy'], + ]); + CommentStatus.markResolved(ctx, 'a', 'fixed', 'done', 'x.ts', 'hx'); + propagateStatusToDuplicates( + ctx, + 'a', + dedupResult, + hashes, + { kind: 'resolved', classification: 'fixed', explanation: 'done' }, + [a, b], + ); + expect(CommentStatus.getStatus(ctx, 'b')?.status).toBe('resolved'); + expect(CommentStatus.getStatus(ctx, 'b')?.filePath).toBe('y.ts'); + }); +}); + +describe('mergeCommentsForClusterDismiss', () => { + it('returns batch issue comments when allComments is undefined', () => { + const a = review('a', 'x.ts'); + const b = review('b', 'y.ts'); + const merged = mergeCommentsForClusterDismiss(undefined, [ + { comment: a, codeSnippet: '', stillExists: true, explanation: '' }, + { comment: b, codeSnippet: '', stillExists: true, explanation: '' }, + ]); + expect(merged.map((c) => c.id).sort()).toEqual(['a', 'b']); + }); + + it('prefers allComments row over batch when same id', () => { + const fromList = review('a', 'from-list.ts'); + const fromBatch = review('a', 'from-batch.ts'); + const merged = mergeCommentsForClusterDismiss([fromList], [{ comment: fromBatch, codeSnippet: '', stillExists: true, explanation: '' }]); + expect(merged).toHaveLength(1); + expect(merged[0]!.path).toBe('from-list.ts'); + }); +}); + +describe('pre-dismiss queue removal (execute-fix-iteration contract)', () => { + it('only cluster ids that were actually dismissed count for queue eviction', () => { + const ctx = makeCtx(); + const anchor = review('c1', 'a.ts'); + const map = new Map([['c1', ['d1']]]); + dismissDuplicateClusterFromComments(ctx, anchor, map, [anchor], 'r', 'remaining'); + expect(Dismissed.isCommentDismissed(ctx, 'c1')).toBe(true); + expect(Dismissed.isCommentDismissed(ctx, 'd1')).toBe(false); + expect(getClusterIdsAccountedOnState(ctx, 'c1', map).sort()).toEqual(['c1']); + }); +}); + +describe('dismissDuplicateClusterFromComments', () => { + it('resolves siblings from allComments list', () => { + const ctx = makeCtx(); + const anchor = review('c1', 'a.ts'); + const dup = review('d1', 'b.ts'); + const map = new Map([['c1', ['d1']]]); + const all = [anchor, dup]; + + dismissDuplicateClusterFromComments(ctx, anchor, map, all, 'r', 'stale'); + + expect(Dismissed.isCommentDismissed(ctx, 'c1')).toBe(true); + expect(Dismissed.isCommentDismissed(ctx, 'd1')).toBe(true); + }); + + it('dismisses cluster siblings from merge(batch) when PR list is absent', () => { + const ctx = makeCtx(); + const anchor = review('c1', 'a.ts'); + const dup = review('d1', 'b.ts'); + const map = new Map([['c1', ['d1']]]); + const batchIssues = [ + { comment: anchor, codeSnippet: '', stillExists: true, explanation: '' }, + { comment: dup, codeSnippet: '', stillExists: true, explanation: '' }, + ]; + const rows = mergeCommentsForClusterDismiss(undefined, batchIssues); + dismissDuplicateClusterFromComments(ctx, anchor, map, rows, 'r', 'stale'); + expect(Dismissed.isCommentDismissed(ctx, 'c1')).toBe(true); + expect(Dismissed.isCommentDismissed(ctx, 'd1')).toBe(true); + }); +}); diff --git a/tests/dismissed-issues-dedupe.test.ts b/tests/dismissed-issues-dedupe.test.ts new file mode 100644 index 0000000..f8325d8 --- /dev/null +++ b/tests/dismissed-issues-dedupe.test.ts @@ -0,0 +1,71 @@ +import { describe, expect, it } from 'vitest'; +import { + applyDismissedIssuesLoadNormalization, + dedupeDismissedIssuesByCommentId, +} from '../tools/prr/state/state-core.js'; +import type { DismissedIssue } from '../tools/prr/state/types.js'; + +function row( + id: string, + at: string, + cat: DismissedIssue['category'], + path = 'x.ts', +): DismissedIssue { + return { + commentId: id, + reason: 'r', + dismissedAt: at, + dismissedAtIteration: 1, + category: cat, + filePath: path, + line: null, + commentBody: 'b', + }; +} + +describe('dedupeDismissedIssuesByCommentId', () => { + it('returns the same array when length <= 1', () => { + const a = row('ic1', '2026-01-01T00:00:00Z', 'stale'); + expect(dedupeDismissedIssuesByCommentId([])).toEqual({ merged: [], removedCount: 0 }); + expect(dedupeDismissedIssuesByCommentId([a])).toEqual({ merged: [a], removedCount: 0 }); + }); + + it('keeps latest dismissedAt for duplicate comment ids', () => { + const older = row('ic_dup', '2026-01-01T00:00:00Z', 'missing-file'); + const newer = row('ic_dup', '2026-02-01T00:00:00Z', 'path-unresolved'); + const { merged, removedCount } = dedupeDismissedIssuesByCommentId([older, newer]); + expect(removedCount).toBe(1); + expect(merged).toEqual([newer]); + }); + + it('on same timestamp prefers path-fragment over missing-file', () => { + const a = row('ic_t', '2026-01-02T12:00:00Z', 'missing-file'); + const b = row('ic_t', '2026-01-02T12:00:00Z', 'path-fragment'); + const { merged, removedCount } = dedupeDismissedIssuesByCommentId([a, b]); + expect(removedCount).toBe(1); + expect(merged[0]!.category).toBe('path-fragment'); + }); + + it('preserves first-seen order of unique ids', () => { + const x = row('ic_x', '2026-01-01T00:00:00Z', 'stale'); + const y = row('ic_y', '2026-01-01T00:00:00Z', 'stale'); + const { merged } = dedupeDismissedIssuesByCommentId([x, y]); + expect(merged.map((d) => d.commentId)).toEqual(['ic_x', 'ic_y']); + }); +}); + +describe('applyDismissedIssuesLoadNormalization', () => { + it('normalizes fragment paths then dedupes by comment id', () => { + const dupOlder = { + ...row('ic_d', '2026-01-01T00:00:00Z', 'missing-file', '.d.ts'), + reason: 'Tracked file not found for review path: .d.ts', + }; + const dupNewer = row('ic_d', '2026-02-01T00:00:00Z', 'path-unresolved', '.d.ts'); + const { list, fragmentNormalized, dedupeRemoved } = applyDismissedIssuesLoadNormalization([dupOlder, dupNewer]); + expect(fragmentNormalized).toBe(2); + expect(dedupeRemoved).toBe(1); + expect(list).toHaveLength(1); + expect(list[0]!.category).toBe('path-fragment'); + expect(list[0]!.dismissedAt).toBe('2026-02-01T00:00:00Z'); + }); +}); diff --git a/tests/final-audit-snippet.test.ts b/tests/final-audit-snippet.test.ts index 49bc131..772fec2 100644 --- a/tests/final-audit-snippet.test.ts +++ b/tests/final-audit-snippet.test.ts @@ -8,12 +8,17 @@ describe('finalAuditSnippetLooksTruncatedOrExcerpt', () => { ).toBe(true); }); - it('detects huge-file excerpt footers from getFullFileForAudit', () => { + it('does not treat line-centered budget excerpts as blind truncation (fix site in window)', () => { expect( finalAuditSnippetLooksTruncatedOrExcerpt( '1: a\n... (excerpt only — file has 2,000 lines; centered on line 500)', ), - ).toBe(true); + ).toBe(false); + expect( + finalAuditSnippetLooksTruncatedOrExcerpt( + '1: a\n... (excerpt — 2,000 lines; centered on line 500)', + ), + ).toBe(false); expect( finalAuditSnippetLooksTruncatedOrExcerpt( '... (1,500 more lines omitted — file exceeds 50,000 chars; no line anchor', diff --git a/tests/get-llm-api-request-timeout.test.ts b/tests/get-llm-api-request-timeout.test.ts new file mode 100644 index 0000000..f0ad509 --- /dev/null +++ b/tests/get-llm-api-request-timeout.test.ts @@ -0,0 +1,46 @@ +/** + * llm-api client timeout tiers vs prompt size (shared/constants/polling.ts). + */ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { + getLlmApiRequestTimeoutMs, + LLM_REQUEST_TIMEOUT_FULL_FILE_MS, + LLM_REQUEST_TIMEOUT_MS, +} from '../shared/constants/polling.js'; + +const ENV_KEY = 'PRR_LLM_API_REQUEST_TIMEOUT_MS'; + +describe('getLlmApiRequestTimeoutMs', () => { + afterEach(() => { + vi.unstubAllEnvs(); + }); + + it('uses full-file constant when full-file rewrite', () => { + expect(getLlmApiRequestTimeoutMs(5_000, true)).toBe(LLM_REQUEST_TIMEOUT_FULL_FILE_MS); + }); + + it('defaults to 90s for small prompts', () => { + expect(getLlmApiRequestTimeoutMs(10_000, false)).toBe(LLM_REQUEST_TIMEOUT_MS); + }); + + it('raises tier at 60k+, 100k+, 140k+ chars', () => { + expect(getLlmApiRequestTimeoutMs(60_001, false)).toBe(120_000); + expect(getLlmApiRequestTimeoutMs(100_001, false)).toBe(150_000); + expect(getLlmApiRequestTimeoutMs(140_001, false)).toBe(180_000); + }); + + it('respects PRR_LLM_API_REQUEST_TIMEOUT_MS for non-full-file', () => { + vi.stubEnv(ENV_KEY, '240000'); + expect(getLlmApiRequestTimeoutMs(200_000, false)).toBe(240_000); + }); + + it('env override does not apply to full-file rewrite', () => { + vi.stubEnv(ENV_KEY, '240000'); + expect(getLlmApiRequestTimeoutMs(200_000, true)).toBe(LLM_REQUEST_TIMEOUT_FULL_FILE_MS); + }); + + it('ignores invalid env and uses tiers', () => { + vi.stubEnv(ENV_KEY, 'nope'); + expect(getLlmApiRequestTimeoutMs(150_000, false)).toBe(180_000); + }); +}); diff --git a/tests/git-commit-scan-cache.test.ts b/tests/git-commit-scan-cache.test.ts index 3532f49..2dfb6f4 100644 --- a/tests/git-commit-scan-cache.test.ts +++ b/tests/git-commit-scan-cache.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect, beforeEach, vi } from 'vitest'; import type { SimpleGit } from 'simple-git'; +import * as Logger from '../shared/logger.js'; import { scanCommittedFixes, clearScanCommittedFixesCache } from '../shared/git/git-commit-scan.js'; beforeEach(() => { @@ -8,6 +9,7 @@ beforeEach(() => { describe('scanCommittedFixes cache', () => { it('skips git log on cache hit for same workdir, branch, and HEAD', async () => { + let logCalls = 0; const git = { raw: vi.fn(async (args: string[]) => { if (args[0] === 'rev-parse' && args[1] === '--verify') { @@ -16,6 +18,7 @@ describe('scanCommittedFixes cache', () => { throw err; } if (args[0] === 'log') { + logCalls++; return 'prr-fix:IC_cached_marker\n'; } return ''; @@ -23,12 +26,28 @@ describe('scanCommittedFixes cache', () => { } as unknown as SimpleGit; const a = await scanCommittedFixes(git, 'feature/x', { workdir: '/tmp/prr-w', headSha: 'deadbeef01' }); - const rawAfterFirst = git.raw.mock.calls.length; - const b = await scanCommittedFixes(git, 'feature/x', { workdir: '/tmp/prr-w', headSha: 'deadbeef01' }); + expect(logCalls).toBe(1); + await scanCommittedFixes(git, 'feature/x', { workdir: '/tmp/prr-w', headSha: 'deadbeef01' }); expect(a).toEqual(['IC_cached_marker']); - expect(b).toEqual(['IC_cached_marker']); - expect(rawAfterFirst).toBeGreaterThan(0); - expect(git.raw.mock.calls.length).toBe(rawAfterFirst); + expect(logCalls).toBe(1); + }); + + it('captures multiple prr-fix markers on one commit message line', async () => { + const git = { + raw: vi.fn(async (args: string[]) => { + if (args[0] === 'rev-parse' && args[1] === '--verify') { + if (args[2] === 'origin/main') return 'abc\n'; + throw new Error('no'); + } + if (args[0] === 'log') { + return 'prr-fix:IC_a prr-fix:IC_b\n'; + } + return ''; + }), + } as unknown as SimpleGit; + + const ids = await scanCommittedFixes(git, 'feature/y'); + expect(ids.sort()).toEqual(['IC_a', 'IC_b'].sort()); }); it('does not use cache when workdir or headSha omitted', async () => { @@ -51,4 +70,71 @@ describe('scanCommittedFixes cache', () => { await scanCommittedFixes(git, 'b'); expect(logCalls).toBe(2); }); + + it('warns once when no merge base ref exists (n100 fallback)', async () => { + const warnSpy = vi.spyOn(Logger, 'warn').mockImplementation(() => {}); + const git = { + raw: vi.fn(async (args: string[]) => { + if (args[0] === 'rev-parse' && args[1] === '--verify') { + throw new Error('unknown ref'); + } + if (args[0] === 'log') { + return 'prr-fix:IC_fallback\n'; + } + return ''; + }), + } as unknown as SimpleGit; + + await scanCommittedFixes(git, 'feature/z', { workdir: '/tmp/warn-base', headSha: 'aaa' }); + await scanCommittedFixes(git, 'feature/z', { workdir: '/tmp/warn-base', headSha: 'bbb' }); + expect(warnSpy).toHaveBeenCalledTimes(1); + expect(String(warnSpy.mock.calls[0]?.[0])).toContain('Git recovery scan'); + expect(String(warnSpy.mock.calls[0]?.[0])).toContain('100'); + warnSpy.mockRestore(); + }); + + it('warns once when pr base ref missing (mentions origin/)', async () => { + const warnSpy = vi.spyOn(Logger, 'warn').mockImplementation(() => {}); + const git = { + raw: vi.fn(async (args: string[]) => { + if (args[0] === 'rev-parse' && args[1] === '--verify') { + throw new Error('unknown ref'); + } + if (args[0] === 'log') return ''; + return ''; + }), + } as unknown as SimpleGit; + + await scanCommittedFixes(git, 'feature/z', { + workdir: '/tmp/warn-prbase', + headSha: 'ccc', + prBaseBranch: 'staging', + }); + expect(warnSpy).toHaveBeenCalledWith(expect.stringMatching(/origin\/staging/)); + warnSpy.mockRestore(); + }); + + it('warns once when git log scan throws', async () => { + const warnSpy = vi.spyOn(Logger, 'warn').mockImplementation(() => {}); + const git = { + raw: vi.fn(async (args: string[]) => { + if (args[0] === 'rev-parse' && args[1] === '--verify') { + if (args[2] === 'origin/main') return 'abc\n'; + throw new Error('no'); + } + if (args[0] === 'log') { + throw new Error('git log exploded'); + } + return ''; + }), + } as unknown as SimpleGit; + + const a = await scanCommittedFixes(git, 'feature/e', { workdir: '/tmp/warn-log', headSha: 'ddd' }); + const b = await scanCommittedFixes(git, 'feature/e', { workdir: '/tmp/warn-log', headSha: 'eee' }); + expect(a).toEqual([]); + expect(b).toEqual([]); + expect(warnSpy).toHaveBeenCalledTimes(1); + expect(String(warnSpy.mock.calls[0]?.[0])).toContain('Git recovery scan failed'); + warnSpy.mockRestore(); + }); }); diff --git a/tests/git-conflict-lock-defer.test.ts b/tests/git-conflict-lock-defer.test.ts new file mode 100644 index 0000000..4232734 --- /dev/null +++ b/tests/git-conflict-lock-defer.test.ts @@ -0,0 +1,39 @@ +import { describe, expect, it } from 'vitest'; +import { mkdirSync, writeFileSync, rmSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { + lockRegenerationRequiresCleanPackageJson, + packageJsonHasConflictMarkers, +} from '../tools/prr/git/git-conflict-lockfiles.js'; + +describe('lock regeneration vs package.json', () => { + it('lockRegenerationRequiresCleanPackageJson is true for bun/npm/yarn/pnpm locks', () => { + expect(lockRegenerationRequiresCleanPackageJson(['bun.lock'])).toBe(true); + expect(lockRegenerationRequiresCleanPackageJson(['package-lock.json'])).toBe(true); + expect(lockRegenerationRequiresCleanPackageJson(['yarn.lock'])).toBe(true); + expect(lockRegenerationRequiresCleanPackageJson(['pnpm-lock.yaml'])).toBe(true); + }); + + it('lockRegenerationRequiresCleanPackageJson is false for non-JS lockfiles', () => { + expect(lockRegenerationRequiresCleanPackageJson(['Cargo.lock'])).toBe(false); + expect(lockRegenerationRequiresCleanPackageJson(['Gemfile.lock'])).toBe(false); + }); + + it('packageJsonHasConflictMarkers detects markers', () => { + const dir = join(tmpdir(), `prr-pkg-test-${Date.now()}`); + mkdirSync(dir, { recursive: true }); + try { + writeFileSync( + join(dir, 'package.json'), + '{\n "name": "x"\n<<<<<<< HEAD\n}\n=======\n,\n"b":1\n}\n>>>>>>> other\n', + 'utf-8' + ); + expect(packageJsonHasConflictMarkers(dir)).toBe(true); + writeFileSync(join(dir, 'package.json'), '{"name":"x"}', 'utf-8'); + expect(packageJsonHasConflictMarkers(dir)).toBe(false); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/tests/git-latent-merge-probe.test.ts b/tests/git-latent-merge-probe.test.ts index ad2aed5..437468f 100644 --- a/tests/git-latent-merge-probe.test.ts +++ b/tests/git-latent-merge-probe.test.ts @@ -6,10 +6,19 @@ import { execFileSync } from 'child_process'; import { simpleGit } from 'simple-git'; import { parseMergeTreeConflictPaths, + mergeTreeFailureLooksUnsupported, probeLatentMergeConflictsWithOrigin, checkForConflicts, } from '../shared/git/git-conflicts.js'; +describe('mergeTreeFailureLooksUnsupported', () => { + it('detects old-git / unknown-option style errors', () => { + expect(mergeTreeFailureLooksUnsupported("git: 'merge-tree' is not a git command")).toBe(true); + expect(mergeTreeFailureLooksUnsupported('error: unknown option `write-tree`')).toBe(true); + expect(mergeTreeFailureLooksUnsupported('CONFLICT (content): Merge conflict in f.txt')).toBe(false); + }); +}); + describe('parseMergeTreeConflictPaths', () => { it('parses Merge conflict in and CONFLICT lines', () => { const s = [ diff --git a/tests/git-submodule-path.test.ts b/tests/git-submodule-path.test.ts new file mode 100644 index 0000000..36cd00c --- /dev/null +++ b/tests/git-submodule-path.test.ts @@ -0,0 +1,63 @@ +import { afterEach, describe, expect, it } from 'vitest'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { execFileSync } from 'child_process'; +import { isTrackedGitSubmodulePath } from '../shared/git/git-submodule-path.js'; + +const tempDirs: string[] = []; + +afterEach(() => { + while (tempDirs.length > 0) { + const dir = tempDirs.pop(); + if (dir) rmSync(dir, { recursive: true, force: true }); + } +}); + +function git(dir: string, args: string[]): void { + execFileSync('git', args, { cwd: dir, stdio: 'ignore' }); +} + +describe('isTrackedGitSubmodulePath', () => { + it('returns true for a path recorded as mode 160000 in the index', () => { + const parent = mkdtempSync(join(tmpdir(), 'prr-submod-parent-')); + tempDirs.push(parent); + const child = join(parent, 'child-repo'); + mkdirSync(child, { recursive: true }); + + git(child, ['init', '-b', 'main']); + writeFileSync(join(child, 'README.md'), '# child\n', 'utf8'); + git(child, ['add', 'README.md']); + git(child, [ + '-c', + 'user.email=test@test', + '-c', + 'user.name=test', + 'commit', + '-m', + 'init', + ]); + const childHead = execFileSync('git', ['rev-parse', 'HEAD'], { + cwd: child, + encoding: 'utf8', + }).trim(); + + git(parent, ['init', '-b', 'main']); + writeFileSync(join(parent, 'root.txt'), 'root\n', 'utf8'); + git(parent, ['add', 'root.txt']); + git(parent, ['-c', 'user.email=test@test', '-c', 'user.name=test', 'commit', '-m', 'root']); + // Avoid `git submodule add` (file:// transport may be disabled); record a real gitlink in the index. + git(parent, ['update-index', '--add', '--cacheinfo', `160000,${childHead},plugins/plugin-sql`]); + git(parent, ['-c', 'user.email=test@test', '-c', 'user.name=test', 'commit', '-m', 'add gitlink']); + + expect(isTrackedGitSubmodulePath(parent, 'plugins/plugin-sql')).toBe(true); + expect(isTrackedGitSubmodulePath(parent, 'root.txt')).toBe(false); + expect(isTrackedGitSubmodulePath(parent, 'nope')).toBe(false); + }); + + it('returns false for a non-git directory', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-submod-nogit-')); + tempDirs.push(dir); + expect(isTrackedGitSubmodulePath(dir, 'anything')).toBe(false); + }); +}); diff --git a/tests/issue-analysis.test.ts b/tests/issue-analysis.test.ts index 9a727c9..1272e76 100644 --- a/tests/issue-analysis.test.ts +++ b/tests/issue-analysis.test.ts @@ -222,9 +222,11 @@ describe('getFullFileForAudit', () => { tempDirs.push(dir); writeFileSync(join(dir, 'small.ts'), ['alpha', 'beta', 'gamma'].join('\n'), 'utf-8'); const out = await getFullFileForAudit(dir, 'small.ts', 2, ''); - expect(out).toContain('1: alpha'); - expect(out).toContain('2: beta'); - expect(out).toContain('3: gamma'); + expect(out.snippet).toContain('[PRR final-audit context]'); + expect(out.snippet).toContain('1: alpha'); + expect(out.snippet).toContain('2: beta'); + expect(out.snippet).toContain('3: gamma'); + expect(out.fixSiteInWindow).toBe(true); }); it('centers excerpt on review line when file exceeds audit char cap', async () => { @@ -238,8 +240,25 @@ describe('getFullFileForAudit', () => { expect(content.length).toBeGreaterThan(50_000); writeFileSync(join(dir, 'big.ts'), content, 'utf-8'); const out = await getFullFileForAudit(dir, 'big.ts', 1500, ''); - expect(out).toContain('excerpt only'); - expect(out).toMatch(/1500:\s*\/\/ line 1500/); - expect(out).not.toMatch(/^1:\s*\/\/ line 1/m); + expect(out.snippet).toContain('[PRR final-audit context]'); + expect(out.snippet).toMatch(/excerpt —/); + expect(out.snippet).toMatch(/1500:\s*\/\/ line 1500/); + expect(out.snippet).not.toMatch(/^1:\s*\/\/ line 1/m); + expect(out.fixSiteInWindow).toBe(true); + }); + + it('marks fixSiteInWindow false for head-only excerpt when no line anchor', async () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-audit-')); + tempDirs.push(dir); + const lines: string[] = []; + for (let i = 1; i <= 8000; i++) { + lines.push(`// line ${i} ${'x'.repeat(60)}`); + } + writeFileSync(join(dir, 'huge.ts'), lines.join('\n'), 'utf-8'); + const out = await getFullFileForAudit(dir, 'huge.ts', null, ''); + expect(out.fixSiteInWindow).toBe(false); + expect(out.snippet).toContain('[PRR final-audit context]'); + expect(out.snippet).toMatch(/1:\s*\/\/ line 1/); + expect(out.snippet.length).toBeLessThan(lines.join('\n').length); }); }); diff --git a/tests/mark-verified-cluster.test.ts b/tests/mark-verified-cluster.test.ts new file mode 100644 index 0000000..db27f6a --- /dev/null +++ b/tests/mark-verified-cluster.test.ts @@ -0,0 +1,134 @@ +import { describe, it, expect } from 'vitest'; +import type { StateContext } from '../tools/prr/state/state-context.js'; +import type { ResolverState } from '../tools/prr/state/types.js'; +import * as Verification from '../tools/prr/state/state-verification.js'; +import { + expandGitRecoveredVerificationFromDedupCache, + markVerifiedClusterForFixedIssue, + unmarkVerifiedClusterForStaleRecheck, + unmarkVerifiedClustersForFinalAuditFailures, +} from '../tools/prr/workflow/duplicate-cluster-verify.js'; + +function makeCtx(): StateContext { + const state: ResolverState = { + pr: 'o/r#1', + branch: 'main', + headSha: 'abc', + startedAt: 's', + lastUpdated: 'u', + lessonsLearned: [], + iterations: [{ timestamp: 't', commentsAddressed: [], changesMade: [], verificationResults: {} }], + verifiedComments: [], + verifiedFixed: [], + dismissedIssues: [], + commentStatuses: {}, + } as ResolverState; + return { + statePath: '/tmp/mark-cluster-test', + state, + currentPhase: 'test', + verifiedThisSession: new Set(), + }; +} + +describe('markVerifiedClusterForFixedIssue', () => { + it('marks anchor and dedup siblings', () => { + const ctx = makeCtx(); + const session = ctx.verifiedThisSession!; + const map = new Map([['c1', ['d1', 'd2']]]); + const extra = markVerifiedClusterForFixedIssue(ctx, 'd1', map, session); + expect(extra).toBe(2); + expect(Verification.isVerified(ctx, 'c1')).toBe(true); + expect(Verification.isVerified(ctx, 'd1')).toBe(true); + expect(Verification.isVerified(ctx, 'd2')).toBe(true); + expect(session.has('c1') && session.has('d1') && session.has('d2')).toBe(true); + }); + + it('is a no-op for unknown id when map missing', () => { + const ctx = makeCtx(); + const extra = markVerifiedClusterForFixedIssue(ctx, 'solo', undefined, ctx.verifiedThisSession); + expect(extra).toBe(0); + expect(Verification.isVerified(ctx, 'solo')).toBe(true); + }); +}); + +describe('expandGitRecoveredVerificationFromDedupCache', () => { + it('marks dedup siblings when dedupCache matches comment set', () => { + const ctx = makeCtx(); + const key = ['c1', 'd1', 'd2'].sort().join(','); + ctx.state!.dedupCache = { + commentIds: key, + schema: 'dedup-v2', + duplicateMap: { c1: ['d1', 'd2'] }, + dedupedIds: ['c1'], + }; + Verification.markVerified(ctx, 'c1', Verification.PRR_GIT_RECOVERY_VERIFIED_MARKER, { + skipSessionTracking: true, + }); + const { staleSkipIds, addedVerified } = expandGitRecoveredVerificationFromDedupCache(ctx, ['c1'], key); + expect(addedVerified).toBe(true); + expect(new Set(staleSkipIds)).toEqual(new Set(['c1', 'd1', 'd2'])); + expect(Verification.isVerified(ctx, 'd1')).toBe(true); + expect(Verification.isVerified(ctx, 'd2')).toBe(true); + }); + + it('does not expand when dedupCache commentIds differ', () => { + const ctx = makeCtx(); + ctx.state!.dedupCache = { + commentIds: 'other', + schema: 'dedup-v2', + duplicateMap: { c1: ['d1'] }, + dedupedIds: ['c1'], + }; + Verification.markVerified(ctx, 'c1', Verification.PRR_GIT_RECOVERY_VERIFIED_MARKER, { + skipSessionTracking: true, + }); + const { staleSkipIds, addedVerified } = expandGitRecoveredVerificationFromDedupCache(ctx, ['c1'], 'c1'); + expect(addedVerified).toBe(false); + expect(staleSkipIds).toEqual(['c1']); + expect(Verification.isVerified(ctx, 'd1')).toBe(false); + }); +}); + +describe('unmarkVerifiedClusterForStaleRecheck', () => { + it('unmarks every verified id in the cluster', () => { + const ctx = makeCtx(); + const map = new Map([['c1', ['d1']]]); + markVerifiedClusterForFixedIssue(ctx, 'c1', map, ctx.verifiedThisSession); + expect(Verification.isVerified(ctx, 'c1')).toBe(true); + expect(Verification.isVerified(ctx, 'd1')).toBe(true); + unmarkVerifiedClusterForStaleRecheck(ctx, 'd1', map, undefined); + expect(Verification.isVerified(ctx, 'c1')).toBe(false); + expect(Verification.isVerified(ctx, 'd1')).toBe(false); + }); + + it('skips unmark for ids in recoveredSet only', () => { + const ctx = makeCtx(); + const map = new Map([['c1', ['d1', 'd2']]]); + markVerifiedClusterForFixedIssue(ctx, 'c1', map, ctx.verifiedThisSession); + unmarkVerifiedClusterForStaleRecheck(ctx, 'c1', map, new Set(['c1'])); + expect(Verification.isVerified(ctx, 'c1')).toBe(true); + expect(Verification.isVerified(ctx, 'd1')).toBe(false); + expect(Verification.isVerified(ctx, 'd2')).toBe(false); + }); +}); + +describe('unmarkVerifiedClustersForFinalAuditFailures', () => { + it('unmarks canonical when only duplicate id is listed as failed', () => { + const ctx = makeCtx(); + const map = new Map([['c1', ['d1']]]); + markVerifiedClusterForFixedIssue(ctx, 'c1', map, ctx.verifiedThisSession); + unmarkVerifiedClustersForFinalAuditFailures(ctx, ['d1'], map); + expect(Verification.isVerified(ctx, 'c1')).toBe(false); + expect(Verification.isVerified(ctx, 'd1')).toBe(false); + }); + + it('dedupes when two failed rows are in the same cluster', () => { + const ctx = makeCtx(); + const map = new Map([['c1', ['d1']]]); + markVerifiedClusterForFixedIssue(ctx, 'c1', map, ctx.verifiedThisSession); + unmarkVerifiedClustersForFinalAuditFailures(ctx, ['c1', 'd1'], map); + expect(Verification.isVerified(ctx, 'c1')).toBe(false); + expect(Verification.isVerified(ctx, 'd1')).toBe(false); + }); +}); diff --git a/tests/model-context-limits.test.ts b/tests/model-context-limits.test.ts index a41d733..bc59136 100644 --- a/tests/model-context-limits.test.ts +++ b/tests/model-context-limits.test.ts @@ -4,8 +4,10 @@ import { ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS, ELIZACLOUD_LLM_COMPLETE_INPUT_OVERHEAD_CHARS, estimateElizacloudInputTokensFromCharLength, + getMaxElizacloudHardInputCeiling, getMaxElizacloudLlmCompleteInputChars, getMaxFixPromptCharsForModel, + lowerModelMaxPromptChars, } from '../shared/llm/model-context-limits.js'; describe('getMaxElizacloudLlmCompleteInputChars', () => { @@ -26,6 +28,28 @@ describe('getMaxElizacloudLlmCompleteInputChars', () => { }); }); +describe('getMaxElizacloudHardInputCeiling', () => { + it('hard ceiling is not affected by lowerModelMaxPromptChars on large-context models', () => { + const model = 'anthropic/claude-sonnet-4-5-20250929'; + const ceilingBefore = getMaxElizacloudHardInputCeiling(model); + expect(ceilingBefore).toBeGreaterThan(600_000); + + lowerModelMaxPromptChars('elizacloud', model, 35_000); + const softAfter = getMaxElizacloudLlmCompleteInputChars(model); + const ceilingAfter = getMaxElizacloudHardInputCeiling(model); + + expect(ceilingAfter).toBe(ceilingBefore); + expect(softAfter).toBeLessThan(ceilingAfter); + }); + + it('floor prevents large-context models from being lowered below 60k', () => { + const model = 'anthropic/claude-sonnet-4-5-20250929'; + lowerModelMaxPromptChars('elizacloud', model, 35_000); + const fix = getMaxFixPromptCharsForModel('elizacloud', model); + expect(fix).toBeGreaterThanOrEqual(60_000); + }); +}); + describe('estimateElizacloudInputTokensFromCharLength', () => { it('uses ~1.6 chars/token for small-context models (Qwen 14B)', () => { const { approxTokens, assumedCharsPerToken } = estimateElizacloudInputTokensFromCharLength( diff --git a/tests/no-changes-already-fixed-cluster.test.ts b/tests/no-changes-already-fixed-cluster.test.ts new file mode 100644 index 0000000..1a090e1 --- /dev/null +++ b/tests/no-changes-already-fixed-cluster.test.ts @@ -0,0 +1,126 @@ +/** + * ALREADY_FIXED no-change path: duplicate cluster must stay consistent with dismissed state + * (no “empty queue + unaccounted cluster siblings”). + */ +import { describe, it, expect } from 'vitest'; +import type { StateContext } from '../tools/prr/state/state-context.js'; +import type { ResolverState } from '../tools/prr/state/types.js'; +import type { UnresolvedIssue } from '../tools/prr/analyzer/types.js'; +import type { ReviewComment } from '../tools/prr/github/types.js'; +import type { LLMClient } from '../tools/prr/llm/client.js'; +import { handleNoChangesWithVerification } from '../tools/prr/workflow/no-changes-verification.js'; +import { createLessonsContext } from '../tools/prr/state/lessons-context.js'; +import * as Dismissed from '../tools/prr/state/state-dismissed.js'; +import * as Verification from '../tools/prr/state/state-verification.js'; +import { parseNoChangesExplanation } from '../tools/prr/workflow/utils.js'; +import { createMockLLMClient } from './test-utils/llm-mock.js'; + +function review(id: string): ReviewComment { + return { + id, + threadId: 't1', + author: 'bot', + body: 'review body', + path: 'packages/x.ts', + line: 10, + createdAt: '2020-01-01T00:00:00Z', + }; +} + +function makeCtx(): StateContext { + const state: ResolverState = { + pr: 'o/r#1', + branch: 'main', + headSha: 'abc', + startedAt: 's', + lastUpdated: 'u', + lessonsLearned: [], + iterations: [{ timestamp: 't', commentsAddressed: [], changesMade: [], verificationResults: {} }], + verifiedComments: [], + verifiedFixed: [], + dismissedIssues: [], + commentStatuses: {}, + } as ResolverState; + return { + statePath: '/tmp/no-changes-cluster-test', + state, + currentPhase: 'test', + verifiedThisSession: new Set(), + }; +} + +describe('handleNoChangesWithVerification ALREADY_FIXED cluster', () => { + it('dismisses dedup siblings not present in comments using anchor row', async () => { + const ctx = makeCtx(); + const anchor = review('comment-A'); + const issue: UnresolvedIssue = { + comment: anchor, + codeSnippet: 'code', + stillExists: true, + explanation: 'test', + }; + const duplicateMap = new Map([['comment-A', ['comment-B']]]); + const lessons = createLessonsContext('o', 'r', 'main', '/tmp/lessons'); + const llm = {} as LLMClient; + + const result = await handleNoChangesWithVerification( + [issue], + 'llm-api', + 'anthropic/test', + 'RESULT: ALREADY_FIXED — already ok', + llm, + ctx, + lessons, + ctx.verifiedThisSession!, + () => null, + undefined, + [anchor], + duplicateMap, + ); + + expect(result.updatedUnresolvedIssues).toHaveLength(0); + expect(Dismissed.isCommentDismissed(ctx, 'comment-A')).toBe(true); + expect(Dismissed.isCommentDismissed(ctx, 'comment-B')).toBe(true); + }); + + it('batch verify (legacy already-fixed claim) marks entire dedup cluster verified', async () => { + const ctx = makeCtx(); + const anchor = review('comment-A'); + const dupRow: ReviewComment = { ...anchor, id: 'comment-B' }; + const issue: UnresolvedIssue = { + comment: anchor, + codeSnippet: 'code', + stillExists: true, + explanation: 'test', + }; + const duplicateMap = new Map([['comment-A', ['comment-B']]]); + const lessons = createLessonsContext('o', 'r', 'main', '/tmp/lessons'); + const llm = createMockLLMClient({ + batchCheckResponses: { + issue_1: { exists: false, explanation: 'verifier ok', stale: false }, + }, + }); + + const result = await handleNoChangesWithVerification( + [issue], + 'llm-api', + 'anthropic/test', + 'The implementation is already correct and no changes are needed here.', + llm, + ctx, + lessons, + ctx.verifiedThisSession!, + parseNoChangesExplanation, + undefined, + [anchor, dupRow], + duplicateMap, + ); + + expect(result.shouldBreak).toBe(true); + expect(result.updatedUnresolvedIssues).toHaveLength(0); + expect(Verification.isVerified(ctx, 'comment-A')).toBe(true); + expect(Verification.isVerified(ctx, 'comment-B')).toBe(true); + expect(ctx.verifiedThisSession!.has('comment-A')).toBe(true); + expect(ctx.verifiedThisSession!.has('comment-B')).toBe(true); + }); +}); diff --git a/tests/outdated-model-advice.test.ts b/tests/outdated-model-advice.test.ts index 6d67b0d..1d1ae94 100644 --- a/tests/outdated-model-advice.test.ts +++ b/tests/outdated-model-advice.test.ts @@ -308,6 +308,109 @@ describe('applyCatalogModelAutoHeals', () => { } }); + it('noop heal marks dedup cluster when dedupCache matches full comment set', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-heal-cluster-')); + try { + execFileSync('git', ['init'], { cwd: dir, env: gitEnv }); + const rel = 'examples/telegram-agent.ts'; + mkdirSync(join(dir, 'examples'), { recursive: true }); + const lines: string[] = []; + for (let i = 0; i < 70; i++) { + if (i === 5) lines.push('export const OPENAI_SMALL_MODEL = "gpt-5-mini";'); + else lines.push(`// line ${i}`); + } + writeFileSync(join(dir, rel), lines.join('\n') + '\n'); + execFileSync('git', ['add', '.'], { cwd: dir, env: gitEnv }); + execFileSync('git', ['commit', '-m', 'init'], { cwd: dir, env: gitEnv }); + + const body = + '❌ CRITICAL: Model name typo in example\nChange gpt-5-mini to gpt-4o-mini'; + const canonical: ReviewComment = { + id: 'ic_canon', + threadId: 't_canon', + author: 'claude', + body, + path: rel, + line: 50, + createdAt: new Date().toISOString(), + }; + const dupe: ReviewComment = { + id: 'ic_dupe', + threadId: 't_dupe', + author: 'greptile', + body, + path: rel, + line: 51, + createdAt: new Date().toISOString(), + }; + const sortedIds = ['ic_canon', 'ic_dupe'].sort().join(','); + const ctx: StateContext = { + statePath: join(dir, '.pr-resolver-state.json'), + state: { + iterations: [], + verifiedFixed: [], + verifiedComments: [], + dismissedIssues: [], + dedupCache: { + commentIds: sortedIds, + duplicateMap: { ic_canon: ['ic_dupe'] }, + dedupedIds: ['ic_canon'], + schema: 'dedup-v2', + }, + } as ResolverState, + currentPhase: 'test', + }; + const outcome = applyCatalogModelAutoHeals(dir, [canonical, dupe], ctx); + expect(outcome.modifiedPaths).toEqual([]); + expect(outcome.verificationTouched).toBe(true); + expect(ctx.verifiedThisSession?.has('ic_canon')).toBe(true); + expect(ctx.verifiedThisSession?.has('ic_dupe')).toBe(true); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('skips auto-heal when workdir has uncommitted changes', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-heal-dirty-')); + try { + execFileSync('git', ['init'], { cwd: dir, env: gitEnv }); + const rel = 'examples/telegram-agent.ts'; + mkdirSync(join(dir, 'examples'), { recursive: true }); + writeFileSync(join(dir, rel), 'export const X = "gpt-4o-mini";\n', 'utf8'); + execFileSync('git', ['add', '.'], { cwd: dir, env: gitEnv }); + execFileSync('git', ['commit', '-m', 'init'], { cwd: dir, env: gitEnv }); + writeFileSync(join(dir, 'other.ts'), '// dirty\n', 'utf8'); + + const body = + '❌ CRITICAL: Model name typo in example\nChange gpt-5-mini to gpt-4o-mini'; + const comment: ReviewComment = { + id: 'ic_heal_dirty', + threadId: 't_dirty', + author: 'claude', + body, + path: rel, + line: 1, + createdAt: new Date().toISOString(), + }; + const ctx: StateContext = { + statePath: join(dir, '.pr-resolver-state.json'), + state: { + iterations: [], + verifiedFixed: [], + verifiedComments: [], + dismissedIssues: [], + } as ResolverState, + currentPhase: 'test', + }; + const outcome = applyCatalogModelAutoHeals(dir, [comment], ctx); + expect(outcome.modifiedPaths).toEqual([]); + expect(outcome.verificationTouched).toBe(false); + expect(readFileSync(join(dir, rel), 'utf8')).toContain('gpt-4o-mini'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + it('heals quoted wrong id outside ±20 line window via full-file fallback', () => { const dir = mkdtempSync(join(tmpdir(), 'prr-heal-full-')); try { diff --git a/tests/path-utils.test.ts b/tests/path-utils.test.ts index 61a14f6..7803ecf 100644 --- a/tests/path-utils.test.ts +++ b/tests/path-utils.test.ts @@ -3,7 +3,10 @@ * Used in allowed-path filtering and TARGET FILE(S) construction; edge cases include * URL-encoded segments, internal paths, node_modules/dist, and repo top-level detection. */ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, afterEach } from 'vitest'; +import { mkdtempSync, writeFileSync, rmSync, mkdirSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; import { normalizeRepoPath, normalizePathForAllow, @@ -13,7 +16,11 @@ import { isReviewPathFragment, shouldSkipFinalAuditLlmForPath, pathDismissCategoryForNotFound, + dismissPathNotFound, stripGitDiffPathPrefix, + setDynamicRepoTopLevelDirs, + getDynamicRepoTopLevelDirs, + tryResolvePathWithExtensionVariants, } from '../shared/path-utils.js'; describe('normalizeRepoPath', () => { @@ -70,9 +77,12 @@ describe('isPathAllowedForFix', () => { expect(isPathAllowedForFix('packages/x/node_modules/y')).toBe(false); expect(isPathAllowedForFix('dist/index.js')).toBe(false); }); - it('rejects external package-like first segment', () => { - expect(isPathAllowedForFix('elizaos/core/lib/types.d.ts')).toBe(false); - expect(isPathAllowedForFix('some-pkg/bar')).toBe(false); + it('allows any repo-relative path by default (strict mode off)', () => { + expect(isPathAllowedForFix('elizaos/core/lib/types.d.ts')).toBe(true); + expect(isPathAllowedForFix('some-pkg/bar')).toBe(true); + expect(isPathAllowedForFix('agent/typescript/index.ts')).toBe(true); + expect(isPathAllowedForFix('cmd/server/main.go')).toBe(true); + expect(isPathAllowedForFix('contracts/ERC20.sol')).toBe(true); }); it('allows repo top-level dirs', () => { expect(isPathAllowedForFix('src/foo.ts')).toBe(true); @@ -122,6 +132,44 @@ describe('stripGitDiffPathPrefix', () => { }); }); +describe('setDynamicRepoTopLevelDirs', () => { + afterEach(() => { + setDynamicRepoTopLevelDirs([]); + }); + + it('hard deny rules still apply regardless of dynamic dirs', () => { + setDynamicRepoTopLevelDirs(['node_modules/foo/bar.js', 'dist/index.js']); + expect(isPathAllowedForFix('node_modules/foo/bar.js')).toBe(false); + expect(isPathAllowedForFix('dist/index.js')).toBe(false); + }); + + it('internal segments denied even if in changed files', () => { + setDynamicRepoTopLevelDirs(['.cursor/plans/x.md', '.prr/state.json']); + expect(isPathAllowedForFix('.cursor/plans/x.md')).toBe(false); + expect(isPathAllowedForFix('.prr/state.json')).toBe(false); + }); + + it('enables stripGitDiffPathPrefix for dynamic dirs', () => { + expect(stripGitDiffPathPrefix('a/agent/typescript/index.ts')).toBe('a/agent/typescript/index.ts'); + setDynamicRepoTopLevelDirs(['agent/typescript/index.ts']); + expect(stripGitDiffPathPrefix('a/agent/typescript/index.ts')).toBe('agent/typescript/index.ts'); + }); + + it('extracts correct first segments from changed files', () => { + setDynamicRepoTopLevelDirs([ + 'agent/typescript/index.ts', + 'contracts/ERC20.sol', + 'cmd/server/main.go', + 'package.json', + ]); + const dirs = getDynamicRepoTopLevelDirs(); + expect(dirs.has('agent')).toBe(true); + expect(dirs.has('contracts')).toBe(true); + expect(dirs.has('cmd')).toBe(true); + expect(dirs.has('package.json')).toBe(true); + }); +}); + describe('isReviewPathFragment', () => { it('treats extension-only review paths as fragments', () => { expect(isReviewPathFragment('.d.ts')).toBe(true); @@ -155,14 +203,50 @@ describe('shouldSkipFinalAuditLlmForPath', () => { }); describe('pathDismissCategoryForNotFound', () => { - it('uses path-unresolved for ambiguous or fragment resolution', () => { + it('uses path-unresolved for ambiguous resolution; path-fragment for fragments', () => { expect(pathDismissCategoryForNotFound('foo.ts', 'ambiguous')).toBe('path-unresolved'); - expect(pathDismissCategoryForNotFound('x', 'fragment')).toBe('path-unresolved'); + expect(pathDismissCategoryForNotFound('x', 'fragment')).toBe('path-fragment'); }); - it('uses path-unresolved for fragment-shaped review path even when resolution is missing', () => { - expect(pathDismissCategoryForNotFound('.d.ts', 'missing')).toBe('path-unresolved'); + it('uses path-fragment for fragment-shaped review path even when resolution is missing', () => { + expect(pathDismissCategoryForNotFound('.d.ts', 'missing')).toBe('path-fragment'); }); it('uses missing-file for normal paths with missing resolution', () => { expect(pathDismissCategoryForNotFound('src/nope.ts', 'missing')).toBe('missing-file'); }); + it('matches dismissPathNotFound alias', () => { + expect(dismissPathNotFound('.d.ts', 'missing')).toBe('path-fragment'); + }); +}); + +describe('tryResolvePathWithExtensionVariants', () => { + it('resolves tsconfig.js to tsconfig.json when only json exists', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-path-')); + try { + writeFileSync(join(dir, 'tsconfig.json'), '{}'); + expect(tryResolvePathWithExtensionVariants(dir, 'tsconfig.js')).toBe('tsconfig.json'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('resolves Component.ts to Component.tsx when only tsx exists', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-path-')); + try { + writeFileSync(join(dir, 'Component.tsx'), 'export {}'); + expect(tryResolvePathWithExtensionVariants(dir, 'Component.ts')).toBe('Component.tsx'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('strips a/ git diff prefix before variant lookup', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-path-')); + try { + mkdirSync(join(dir, 'src'), { recursive: true }); + writeFileSync(join(dir, 'src', 'foo.tsx'), 'export {}'); + expect(tryResolvePathWithExtensionVariants(dir, 'a/src/foo.ts')).toBe('src/foo.tsx'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); }); diff --git a/tests/prompt-budget.test.ts b/tests/prompt-budget.test.ts new file mode 100644 index 0000000..7ea1150 --- /dev/null +++ b/tests/prompt-budget.test.ts @@ -0,0 +1,37 @@ +import { describe, it, expect } from 'vitest'; +import { + computeBudget, + computePerFixVerifyCurrentCodeBudget, + fitToBudget, + truncateNumberedCodeAroundAnchor, +} from '../shared/prompt-budget.js'; + +describe('prompt-budget', () => { + it('computeBudget returns positive availableForCode', () => { + const b = computeBudget({ model: 'openai/gpt-4o-mini', reservedChars: 20_000 }); + expect(b.availableForCode).toBeGreaterThan(5_000); + expect(b.inputCeilingChars).toBeGreaterThan(b.availableForCode); + }); + + it('fitToBudget returns full file when under maxChars', () => { + const raw = 'a\nb\nc'; + const { content, truncated } = fitToBudget(raw, 2, 10_000); + expect(truncated).toBe(false); + expect(content).toContain('1: a'); + expect(content).toContain('3: c'); + }); + + it('truncateNumberedCodeAroundAnchor keeps anchor vicinity', () => { + const lines = Array.from({ length: 40 }, (_, i) => `${i + 1}: line${i + 1}`); + const big = lines.join('\n'); + const out = truncateNumberedCodeAroundAnchor(big, 25, 400); + expect(out.length).toBeLessThanOrEqual(500); + expect(out).toContain('line25'); + }); + + it('computePerFixVerifyCurrentCodeBudget shrinks with more fixes', () => { + const one = computePerFixVerifyCurrentCodeBudget('openai/gpt-4o-mini', 1); + const many = computePerFixVerifyCurrentCodeBudget('openai/gpt-4o-mini', 12); + expect(many).toBeLessThanOrEqual(one); + }); +}); diff --git a/tests/prompt-log-empty-stats.test.ts b/tests/prompt-log-empty-stats.test.ts new file mode 100644 index 0000000..1e61d1b --- /dev/null +++ b/tests/prompt-log-empty-stats.test.ts @@ -0,0 +1,72 @@ +import { mkdtempSync, readFileSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { + initOutputLog, + closeOutputLog, + debugPrompt, + debugResponse, + getEmptyPromptBodyRejectionStats, + getOutputLogPath, +} from '../shared/logger.js'; + +/** + * Pill-output audit: per kind:slug counts + closeOutputLog summary on output.log. + * WHY isolated PRR_LOG_DIR: avoids touching repo-root output.log; restores console after. + */ +describe('getEmptyPromptBodyRejectionStats / closeOutputLog empty-body summary', () => { + const savedConsole = { + log: console.log, + warn: console.warn, + error: console.error, + }; + let logDir: string; + + beforeAll(() => { + logDir = mkdtempSync(join(tmpdir(), 'prr-logger-empty-')); + process.env.PRR_LOG_DIR = logDir; + }); + + afterAll(async () => { + delete process.env.PRR_LOG_DIR; + console.log = savedConsole.log; + console.warn = savedConsole.warn; + console.error = savedConsole.error; + try { + rmSync(logDir, { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('tracks PROMPT and RESPONSE refusals by kind:slug and writes breakdown to output.log on close', async () => { + initOutputLog({ prefix: 'vitest-empty-stats' }); + const slugP = debugPrompt('test-label', ''); + expect(slugP).toMatch(/^#\d{4}\//); + + let stats = getEmptyPromptBodyRejectionStats(); + expect(stats.total).toBe(1); + expect(stats.byKindSlug).toHaveLength(1); + expect(stats.byKindSlug[0]?.key.startsWith('PROMPT:')).toBe(true); + expect(stats.byKindSlug[0]?.count).toBe(1); + + debugResponse(slugP, 'test-label', ' '); + stats = getEmptyPromptBodyRejectionStats(); + expect(stats.total).toBe(2); + expect(stats.byKindSlug.length).toBeGreaterThanOrEqual(2); + + await closeOutputLog(); + + stats = getEmptyPromptBodyRejectionStats(); + expect(stats.total).toBe(0); + expect(stats.byKindSlug).toHaveLength(0); + + const outPath = getOutputLogPath(); + expect(outPath).toBeTruthy(); + const text = readFileSync(outPath!, 'utf8'); + expect(text).toContain('By kind:slug'); + expect(text).toContain('PROMPT:'); + expect(text).toContain('RESPONSE:'); + }); +}); diff --git a/tests/prr-runtime-meta.test.ts b/tests/prr-runtime-meta.test.ts index e077510..cfb53d8 100644 --- a/tests/prr-runtime-meta.test.ts +++ b/tests/prr-runtime-meta.test.ts @@ -1,5 +1,8 @@ +import { existsSync } from 'fs'; +import { join } from 'path'; import { describe, expect, it } from 'vitest'; import { + findPrrGitMetadataDir, formatPrrStartupVersionLine, getPrrPackageRoot, getPrrPackageVersion, @@ -14,8 +17,13 @@ describe('prr-runtime-meta', () => { expect(v).toMatch(/^\d+\.\d+\.\d+/); }); - it('detects .git in this checkout', () => { + it('detects .git walking up from package root', () => { expect(hasPrrGitMetadata()).toBe(true); + const gitDir = findPrrGitMetadataDir(); + expect(gitDir).toBeDefined(); + expect(existsSync(join(gitDir!, '.git'))).toBe(true); + const pkg = getPrrPackageRoot(); + expect(gitDir === pkg || pkg.startsWith(gitDir! + '/') || pkg.startsWith(gitDir! + '\\')).toBe(true); }); it('formatPrrStartupVersionLine includes version', () => { diff --git a/tests/redact-url.test.ts b/tests/redact-url.test.ts new file mode 100644 index 0000000..5b85b46 --- /dev/null +++ b/tests/redact-url.test.ts @@ -0,0 +1,14 @@ +import { describe, it, expect } from 'vitest'; +import { redactUrlCredentials } from '../shared/git/redact-url.js'; + +describe('redactUrlCredentials', () => { + it('redacts https://x-access-token:TOKEN@github.com/... (colon in userinfo)', () => { + const raw = + 'remote https://x-access-token:ghp_secret12345@github.com/org/repo.git'; + expect(redactUrlCredentials(raw)).toBe('remote https://***@github.com/org/repo.git'); + }); + + it('redacts simple token@host https URLs', () => { + expect(redactUrlCredentials('https://abc123@github.com/x')).toBe('https://***@github.com/x'); + }); +}); diff --git a/tests/session-model-skip.test.ts b/tests/session-model-skip.test.ts index 7e79c36..6443549 100644 --- a/tests/session-model-skip.test.ts +++ b/tests/session-model-skip.test.ts @@ -1,5 +1,11 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; -import { createStateContext, ensureRotationSession } from '../tools/prr/state/state-context.js'; +import { + createStateContext, + ensureRotationSession, + hydrateRotationSessionFromPersistedState, + persistRotationSessionToState, +} from '../tools/prr/state/state-context.js'; +import { createInitialState } from '../tools/prr/state/types.js'; import * as Rotation from '../tools/prr/models/rotation.js'; import type { Runner } from '../shared/runners/types.js'; import type { CLIOptions } from '../tools/prr/cli.js'; @@ -14,6 +20,33 @@ const runner: Runner = { checkStatus: async () => ({ installed: true, ready: true }), }; +describe('session skip persistence (state file fields)', () => { + beforeEach(() => { + vi.stubEnv('PRR_SESSION_MODEL_SKIP_FAILURES', '3'); + }); + afterEach(() => { + vi.unstubAllEnvs(); + }); + + it('hydrates skipped model keys from ctx.state session fields', () => { + const stateContext = createStateContext('/tmp/w'); + stateContext.state = createInitialState('https://github.com/o/r/pull/1', 'branch', 'deadbeef'); + stateContext.state.sessionSkippedModelKeys = ['llm-api/bad/model']; + stateContext.state.sessionModelStats = { 'llm-api/bad/model': { fixes: 0, failures: 3 } }; + hydrateRotationSessionFromPersistedState(stateContext); + expect(ensureRotationSession(stateContext).skippedModelKeys.has('llm-api/bad/model')).toBe(true); + }); + + it('persistRotationSessionToState writes skip keys into ResolverState', () => { + const stateContext = createStateContext('/tmp/w'); + stateContext.state = createInitialState('https://github.com/o/r/pull/2', 'branch', 'abc123'); + Rotation.recordSessionModelVerificationOutcome(stateContext, 'llm-api', 'bad/model', 0, 3); + persistRotationSessionToState(stateContext); + expect(stateContext.state.sessionSkippedModelKeys).toContain('llm-api/bad/model'); + expect(stateContext.state.sessionModelStats?.['llm-api/bad/model']?.failures).toBe(3); + }); +}); + describe('recordSessionModelVerificationOutcome', () => { beforeEach(() => { vi.stubEnv('PRR_SESSION_MODEL_SKIP_FAILURES', '3'); @@ -69,7 +102,7 @@ describe('maybeResetSessionSkippedModelsAfterFixIteration', () => { vi.unstubAllEnvs(); }); - it('clears session skips when fix iteration is a multiple of N', () => { + it('clears each session skip after N fix iterations since that key was skipped', () => { vi.stubEnv('PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS', '2'); const stateContext = createStateContext('/tmp/w'); ensureRotationSession(stateContext).skippedModelKeys.add('llm-api/x'); @@ -79,6 +112,18 @@ describe('maybeResetSessionSkippedModelsAfterFixIteration', () => { expect(ensureRotationSession(stateContext).skippedModelKeys.size).toBe(0); }); + it('per-key: skip added at iteration K clears at K+N, not earlier', () => { + vi.stubEnv('PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS', '2'); + vi.stubEnv('PRR_SESSION_MODEL_SKIP_FAILURES', '1'); + const stateContext = createStateContext('/tmp/w'); + Rotation.recordSessionModelVerificationOutcome(stateContext, 'llm-api', 'bad/model', 0, 1, 3); + expect(ensureRotationSession(stateContext).skippedModelKeys.has('llm-api/bad/model')).toBe(true); + Rotation.maybeResetSessionSkippedModelsAfterFixIteration(stateContext, 4); + expect(ensureRotationSession(stateContext).skippedModelKeys.has('llm-api/bad/model')).toBe(true); + Rotation.maybeResetSessionSkippedModelsAfterFixIteration(stateContext, 5); + expect(ensureRotationSession(stateContext).skippedModelKeys.has('llm-api/bad/model')).toBe(false); + }); + it('does nothing when env unset or iteration not on boundary', () => { const stateContext = createStateContext('/tmp/w'); ensureRotationSession(stateContext).skippedModelKeys.add('llm-api/x'); diff --git a/tests/solvability-pr-comment.test.ts b/tests/solvability-pr-comment.test.ts index 7426158..fcce019 100644 --- a/tests/solvability-pr-comment.test.ts +++ b/tests/solvability-pr-comment.test.ts @@ -128,6 +128,112 @@ describe('(PR comment) path inference in solvability', () => { }); }); +describe('review rollup headings (solvability 0a2 — Cycle 72)', () => { + it('dismisses "### Remaining Issues" recap anchored on a file', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-solv-rollup-')); + tempDirs.push(dir); + initGitRepo(dir); + mkdirSync(join(dir, 'agent'), { recursive: true }); + writeFileSync(join(dir, 'agent', 'x.ts'), 'export {};\n', 'utf8'); + execFileSync('git', ['add', 'agent/x.ts'], { cwd: dir, stdio: 'ignore' }); + const comment: ReviewComment = { + id: 'ic-rollup-rem', + threadId: 't-r1', + author: 'coderabbitai', + path: 'agent/x.ts', + line: 1, + createdAt: new Date().toISOString(), + body: '### Remaining Issues\n\n- [ ] Thread A still open\n- [ ] Thread B still open\n', + }; + const result = assessSolvability(dir, comment, makeStateContext(dir)); + expect(result.solvable).toBe(false); + expect(result.dismissCategory).toBe('not-an-issue'); + expect(result.reason).toMatch(/meta-review|rollup/i); + }); + + it('dismisses "Issues Fixed Since Previous Reviews" heading', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-solv-rollup-fix')); + tempDirs.push(dir); + initGitRepo(dir); + writeFileSync(join(dir, 'z.ts'), 'export const z = 1;\n', 'utf8'); + execFileSync('git', ['add', 'z.ts'], { cwd: dir, stdio: 'ignore' }); + const comment: ReviewComment = { + id: 'ic-rollup-fixed', + threadId: 't-r2', + author: 'coderabbitai', + path: 'z.ts', + line: 1, + createdAt: new Date().toISOString(), + body: '## Issues Fixed Since Previous Reviews\n\n✅ Item one\n', + }; + const result = assessSolvability(dir, comment, makeStateContext(dir)); + expect(result.solvable).toBe(false); + expect(result.dismissCategory).toBe('not-an-issue'); + }); + + it('dismisses bold-only "Issues Fixed Since Previous Reviews" (no # heading)', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-solv-rollup-bold')); + tempDirs.push(dir); + initGitRepo(dir); + writeFileSync(join(dir, 'a.ts'), 'export const a = 1;\n', 'utf8'); + execFileSync('git', ['add', 'a.ts'], { cwd: dir, stdio: 'ignore' }); + const comment: ReviewComment = { + id: 'ic-rollup-bold', + threadId: 't-r2b', + author: 'coderabbitai', + path: 'a.ts', + line: 1, + createdAt: new Date().toISOString(), + body: '**Issues Fixed Since Previous Reviews**\n\n- ✅ Thread one addressed\n', + }; + const result = assessSolvability(dir, comment, makeStateContext(dir)); + expect(result.solvable).toBe(false); + expect(result.dismissCategory).toBe('not-an-issue'); + }); + + it('dismisses HTML h3 "Issues Fixed Since Previous Reviews"', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-solv-rollup-html')); + tempDirs.push(dir); + initGitRepo(dir); + writeFileSync(join(dir, 'b.ts'), 'export const b = 1;\n', 'utf8'); + execFileSync('git', ['add', 'b.ts'], { cwd: dir, stdio: 'ignore' }); + const comment: ReviewComment = { + id: 'ic-rollup-html', + threadId: 't-r2h', + author: 'coderabbitai', + path: 'b.ts', + line: 1, + createdAt: new Date().toISOString(), + body: '

Issues Fixed Since Previous Reviews

\n

Recap only.

\n', + }; + const result = assessSolvability(dir, comment, makeStateContext(dir)); + expect(result.solvable).toBe(false); + expect(result.dismissCategory).toBe('not-an-issue'); + }); + + it('dismisses (PR comment) with rollup heading before path inference', () => { + const dir = mkdtempSync(join(tmpdir(), 'prr-solv-rollup-pr')); + tempDirs.push(dir); + initGitRepo(dir); + const longBody = + '### Remaining Issues\n\n' + + '- [ ] a\n'.repeat(20) + + 'Some filler so body length exceeds short-path threshold.'; + const comment: ReviewComment = { + id: 'ic-rollup-pr', + threadId: 't-r3', + author: 'coderabbitai', + path: '(PR comment)', + line: null, + createdAt: new Date().toISOString(), + body: longBody, + }; + const result = assessSolvability(dir, comment, makeStateContext(dir)); + expect(result.solvable).toBe(false); + expect(result.dismissCategory).toBe('not-an-issue'); + }); +}); + describe('human-confirmed addressed (solvability 0a5b)', () => { it('dismisses when maintainer confirmed the thread is addressed', () => { const dir = mkdtempSync(join(tmpdir(), 'prr-solv-confirmed-')); diff --git a/tests/solvability-submodule.test.ts b/tests/solvability-submodule.test.ts new file mode 100644 index 0000000..1a97b67 --- /dev/null +++ b/tests/solvability-submodule.test.ts @@ -0,0 +1,71 @@ +import { afterEach, describe, expect, it } from 'vitest'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { execFileSync } from 'child_process'; +import type { ReviewComment } from '../tools/prr/github/types.js'; +import type { StateContext } from '../tools/prr/state/state-context.js'; +import { createInitialState } from '../tools/prr/state/types.js'; +import { assessSolvability } from '../tools/prr/workflow/helpers/solvability.js'; + +const tempDirs: string[] = []; + +afterEach(() => { + while (tempDirs.length > 0) { + const dir = tempDirs.pop(); + if (dir) rmSync(dir, { recursive: true, force: true }); + } +}); + +function makeStateContext(workdir: string): StateContext { + return { + statePath: join(workdir, '.pr-resolver-state.json'), + state: createInitialState('owner/repo#1', 'feature', 'abc123'), + currentPhase: 'test', + }; +} + +function git(dir: string, args: string[]): void { + execFileSync('git', args, { cwd: dir, stdio: 'ignore' }); +} + +describe('assessSolvability — git submodule path (0e0)', () => { + it('dismisses comments on submodule paths as not-an-issue before snippet phase', () => { + const parent = mkdtempSync(join(tmpdir(), 'prr-solv-sub-')); + tempDirs.push(parent); + const child = join(parent, 'child-repo'); + mkdirSync(child, { recursive: true }); + + git(child, ['init', '-b', 'main']); + writeFileSync(join(child, 'README.md'), '# child\n', 'utf8'); + git(child, ['add', 'README.md']); + git(child, ['-c', 'user.email=test@test', '-c', 'user.name=test', 'commit', '-m', 'init']); + const childHead = execFileSync('git', ['rev-parse', 'HEAD'], { + cwd: child, + encoding: 'utf8', + }).trim(); + + git(parent, ['init', '-b', 'main']); + writeFileSync(join(parent, 'root.txt'), 'root\n', 'utf8'); + git(parent, ['add', 'root.txt']); + git(parent, ['-c', 'user.email=test@test', '-c', 'user.name=test', 'commit', '-m', 'root']); + git(parent, ['update-index', '--add', '--cacheinfo', `160000,${childHead},plugins/plugin-sql`]); + git(parent, ['-c', 'user.email=test@test', '-c', 'user.name=test', 'commit', '-m', 'add gitlink']); + + const comment: ReviewComment = { + id: 'ic-sub-1', + threadId: 't-sub', + author: 'coderabbit', + path: 'plugins/plugin-sql', + line: 1, + createdAt: new Date().toISOString(), + body: 'Consider fixing SQL adapter exports.', + }; + + const r = assessSolvability(parent, comment, makeStateContext(parent)); + expect(r.solvable).toBe(false); + expect(r.dismissCategory).toBe('not-an-issue'); + expect(r.reason).toContain('git submodule'); + expect(r.remediationHint).toContain('submodule'); + }); +}); diff --git a/tests/state-load-normalization.test.ts b/tests/state-load-normalization.test.ts new file mode 100644 index 0000000..2acde0f --- /dev/null +++ b/tests/state-load-normalization.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, it } from 'vitest'; +import type { ResolverState } from '../tools/prr/state/types.js'; +import { + applyResolverStateLoadCoreNormalization, + applyResolverStatePostOverlapCleanup, +} from '../tools/prr/state/state-core.js'; + +function baseState(over: Partial): ResolverState { + return { + pr: 'o/r#1', + branch: 'main', + headSha: 'abc', + startedAt: 's', + lastUpdated: 'u', + lessonsLearned: [], + iterations: [], + verifiedComments: [], + verifiedFixed: [], + dismissedIssues: [], + ...over, + } as ResolverState; +} + +describe('applyResolverStateLoadCoreNormalization', () => { + it('dedupes verifiedFixed and verifiedComments', () => { + const state = baseState({ + verifiedFixed: ['ic_a', 'ic_a', 'ic_b'], + verifiedComments: [ + { commentId: 'ic_x', verifiedAt: '2026-01-01T00:00:00Z', verifiedAtIteration: 1 }, + { commentId: 'ic_x', verifiedAt: '2026-02-01T00:00:00Z', verifiedAtIteration: 2 }, + ], + noProgressCycles: 9, + }); + applyResolverStateLoadCoreNormalization(state); + expect(state.verifiedFixed).toEqual(['ic_a', 'ic_b']); + expect(state.verifiedComments).toHaveLength(1); + expect(state.verifiedComments[0]!.verifiedAt).toBe('2026-02-01T00:00:00Z'); + expect(state.noProgressCycles).toBe(0); + }); +}); + +describe('applyResolverStatePostOverlapCleanup', () => { + it('clears recoveredFromGitCommentIds and skip-listed model performance keys', () => { + const state = baseState({ + recoveredFromGitCommentIds: ['ic_1'], + modelPerformance: { + 'llm-api/anthropic/claude-3.5-sonnet': { fixes: 0, failures: 1, noChanges: 0, errors: 0, lastUsed: 't' }, + 'llm-api/anthropic/claude-opus-4.5': { fixes: 1, failures: 0, noChanges: 0, errors: 0, lastUsed: 't' }, + }, + }); + applyResolverStatePostOverlapCleanup(state); + expect(state.recoveredFromGitCommentIds).toBeUndefined(); + expect(state.modelPerformance?.['llm-api/anthropic/claude-opus-4.5']).toBeDefined(); + }); +}); diff --git a/tests/state-transitions.test.ts b/tests/state-transitions.test.ts new file mode 100644 index 0000000..09f3d95 --- /dev/null +++ b/tests/state-transitions.test.ts @@ -0,0 +1,136 @@ +/** + * Invariants for {@link transitionIssue}: mutual exclusion, session set, commentStatuses. + */ +import { describe, it, expect } from 'vitest'; +import type { StateContext } from '../tools/prr/state/state-context.js'; +import type { ResolverState } from '../tools/prr/state/types.js'; +import { transitionIssue } from '../tools/prr/state/state-transitions.js'; +import * as Verification from '../tools/prr/state/state-verification.js'; +import { getState } from '../tools/prr/state/state-context.js'; + +function makeCtx(partial: Partial, session?: Set): StateContext { + const state: ResolverState = { + pr: 'o/r#1', + branch: 'main', + headSha: 'abc', + startedAt: 's', + lastUpdated: 'u', + lessonsLearned: [], + iterations: partial.iterations ?? [{ timestamp: 't', commentsAddressed: [], changesMade: [], verificationResults: {} }], + verifiedComments: partial.verifiedComments ?? [], + verifiedFixed: partial.verifiedFixed ?? [], + dismissedIssues: partial.dismissedIssues ?? [], + commentStatuses: partial.commentStatuses ?? {}, + ...partial, + } as ResolverState; + return { + statePath: '/tmp/test-state', + state, + currentPhase: 'test', + verifiedThisSession: session, + }; +} + +function verifiedIds(state: ResolverState): Set { + const fromLegacy = state.verifiedFixed ?? []; + const fromNew = state.verifiedComments?.map((v) => v.commentId) ?? []; + return new Set([...fromLegacy, ...fromNew]); +} + +function dismissedIds(state: ResolverState): Set { + return new Set((state.dismissedIssues ?? []).map((d) => d.commentId)); +} + +describe('transitionIssue', () => { + it('keeps verified and dismissed disjoint after verify then dismiss', () => { + const session = new Set(); + const ctx = makeCtx({}, session); + Verification.markVerified(ctx, 'ic_1'); + transitionIssue(ctx, 'ic_1', { + kind: 'dismissed', + reason: 'r', + category: 'not-an-issue', + filePath: 'a.ts', + line: null, + commentBody: 'body', + }); + const st = getState(ctx); + expect(verifiedIds(st).has('ic_1')).toBe(false); + expect(dismissedIds(st).has('ic_1')).toBe(true); + expect(session.has('ic_1')).toBe(false); + }); + + it('adds to verifiedThisSession on verify unless skipSessionTracking', () => { + const session = new Set(); + const ctx = makeCtx({}, session); + Verification.markVerified(ctx, 'ic_a'); + expect(session.has('ic_a')).toBe(true); + + const session2 = new Set(); + const ctx2 = makeCtx({}, session2); + Verification.markVerified(ctx2, 'ic_b', undefined, { skipSessionTracking: true }); + expect(session2.has('ic_b')).toBe(false); + }); + + it('removes from verifiedThisSession on unverified', () => { + const session = new Set(['ic_x']); + const ctx = makeCtx( + { + verifiedComments: [{ commentId: 'ic_x', verifiedAt: 't', verifiedAtIteration: 0 }], + verifiedFixed: ['ic_x'], + }, + session + ); + Verification.unmarkVerified(ctx, 'ic_x'); + expect(session.has('ic_x')).toBe(false); + expect(Verification.isVerified(ctx, 'ic_x')).toBe(false); + }); + + it('undismissed removes dismissed row and commentStatuses entry', () => { + const ctx = makeCtx({ + dismissedIssues: [ + { + commentId: 'ic_d', + reason: 'x', + dismissedAt: 'd', + dismissedAtIteration: 0, + category: 'stale', + filePath: 'f.ts', + line: null, + commentBody: '', + }, + ], + commentStatuses: { + ic_d: { + status: 'resolved', + classification: 'stale', + explanation: '', + importance: 1, + ease: 1, + filePath: 'f.ts', + fileContentHash: 'h', + updatedAt: 'u', + updatedAtIteration: 0, + }, + }, + }); + transitionIssue(ctx, 'ic_d', { kind: 'undismissed' }); + expect(getState(ctx).dismissedIssues).toHaveLength(0); + expect(getState(ctx).commentStatuses?.ic_d).toBeUndefined(); + }); + + it('dismiss is idempotent — second dismiss does not duplicate rows', () => { + const ctx = makeCtx({}); + const d = { + kind: 'dismissed' as const, + reason: 'r', + category: 'not-an-issue' as const, + filePath: 'a.ts', + line: null as number | null, + commentBody: 'b', + }; + transitionIssue(ctx, 'ic_dup', d); + transitionIssue(ctx, 'ic_dup', d); + expect(getState(ctx).dismissedIssues?.filter((x) => x.commentId === 'ic_dup').length).toBe(1); + }); +}); diff --git a/tests/test-path-inference.test.ts b/tests/test-path-inference.test.ts new file mode 100644 index 0000000..529d125 --- /dev/null +++ b/tests/test-path-inference.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from 'vitest'; +import { + getTestPathForIssueLike, + normalizeDoubledTestExtension, + testBasenameWithSuffix, +} from '../tools/prr/analyzer/test-path-inference.js'; + +describe('testBasenameWithSuffix', () => { + it('does not double-append .test when stem already ends with .test', () => { + expect(testBasenameWithSuffix('x402-topup.test', '.ts', 'test')).toBe('x402-topup.test.ts'); + expect(testBasenameWithSuffix('x402-topup', '.ts', 'test')).toBe('x402-topup.test.ts'); + }); + + it('does not double-append .spec when stem already ends with .spec', () => { + expect(testBasenameWithSuffix('foo.spec', '.tsx', 'spec')).toBe('foo.spec.tsx'); + }); +}); + +describe('normalizeDoubledTestExtension', () => { + it('collapses .test.test and .spec.spec', () => { + expect(normalizeDoubledTestExtension('__tests__/a.test.test.ts')).toBe('__tests__/a.test.ts'); + expect(normalizeDoubledTestExtension('b.spec.spec.js')).toBe('b.spec.js'); + }); +}); + +describe('getTestPathForIssueLike', () => { + it('infers colocated test path without .test.test when source is already *.test.ts', () => { + const path = getTestPathForIssueLike( + { + comment: { + path: 'packages/foo/bar.test.ts', + body: 'Add coverage for edge case', + }, + }, + { keepExistingTestPath: true }, + ); + expect(path).toBe('packages/foo/bar.test.ts'); + }); + + it('maps source file to single .test suffix', () => { + const path = getTestPathForIssueLike( + { + comment: { + path: 'src/util/pay.ts', + body: 'missing tests', + }, + }, + {}, + ); + expect(path).toBe('src/util/pay.test.ts'); + }); +}); diff --git a/tests/thread-replies.test.ts b/tests/thread-replies.test.ts index 394df91..eff16bb 100644 --- a/tests/thread-replies.test.ts +++ b/tests/thread-replies.test.ts @@ -69,6 +69,8 @@ describe('postThreadReplies', () => { getThreadCommentsCalls.push(threadId); return getThreadCommentsMap.get(threadId) ?? []; }), + // Synthetic login: idempotency queries run without matching real thread authors; avoids stderr warn when env unset. + getAuthenticatedLogin: vi.fn().mockResolvedValue('__prr_test_token_user__'), resolveReviewThread: vi.fn(async (_o, _r, threadId: string) => { resolveCalls.push(threadId); }), @@ -250,6 +252,22 @@ describe('postThreadReplies', () => { expect(replyCalls).toHaveLength(1); }); + it('skips reply when PRR_BOT_LOGIN is unset but getAuthenticatedLogin matches thread author (token user)', async () => { + vi.unstubAllEnvs(); + getThreadCommentsMap.set('thread-1', [{ author: 'reviewer' }, { author: 'octocat' }]); + (mockGithub as { getAuthenticatedLogin: ReturnType }).getAuthenticatedLogin.mockResolvedValue( + 'octocat', + ); + const comments = [makeComment('c1', 'thread-1', 100)]; + await run({ + replyToThreads: true, + comments, + verifiedCommentIds: new Set(['c1']), + }); + expect(replyCalls).toHaveLength(0); + expect(getThreadCommentsCalls).toContain('thread-1'); + }); + it('adds thread to repliedThreadIds after successful reply', async () => { const comments = [makeComment('c1', 'thread-1', 100)]; const repliedThreadIds = new Set(); @@ -274,14 +292,20 @@ describe('postThreadReplies', () => { expect(replyCalls[0].body).toMatch(/^Dismissed: .{197}\.\.\.$/); }); - it('returns { attempted, replied } when replyToThreads is true', async () => { + it('returns reply stats when replyToThreads is true', async () => { const comments = [makeComment('c1', 'thread-1', 100)]; const result = await run({ replyToThreads: true, comments, verifiedCommentIds: new Set(['c1']), }); - expect(result).toEqual({ attempted: 1, replied: 1 }); + expect(result).toEqual({ + attempted: 1, + replied: 1, + failed422: 0, + failedOther: 0, + skippedDueTo422Stop: 0, + }); }); it('returns undefined when replyToThreads is false', async () => { @@ -329,9 +353,78 @@ describe('postThreadReplies', () => { verifiedCommentIds: new Set(['c1']), dismissedIssues: [makeDismissed('c2', 'already-fixed', 'Done.')], }); - expect(result).toEqual({ attempted: 2, replied: 0 }); + expect(result).toEqual({ + attempted: 2, + replied: 0, + failed422: 2, + failedOther: 0, + skippedDueTo422Stop: 0, + }); expect(replyMock).toHaveBeenCalledTimes(4); }); + + it('skips short-body retry when 422 errors indicate thread/comment state (no redundant fallback call)', async () => { + const err422 = Object.assign(new Error('Validation Failed'), { + status: 422, + response: { + data: { + message: 'Validation Failed', + errors: [{ resource: 'PullRequestReviewComment', field: 'in_reply_to', code: 'invalid' }], + }, + }, + }); + const replyMock = vi.fn(async () => { + throw err422; + }); + mockGithub.replyToReviewThread = replyMock; + const comments = [makeComment('c1', 'thread-1', 100)]; + const result = await run({ + replyToThreads: true, + comments, + verifiedCommentIds: new Set(['c1']), + }); + expect(result).toEqual({ + attempted: 1, + replied: 0, + failed422: 1, + failedOther: 0, + skippedDueTo422Stop: 0, + }); + expect(replyMock).toHaveBeenCalledTimes(1); + }); + + it('stops after 3 consecutive all-422 batches and reports skipped count (verified phase only)', async () => { + const err422 = Object.assign(new Error('Validation Failed'), { + status: 422, + response: { + data: { + message: 'Validation Failed', + errors: [{ resource: 'PullRequestReviewComment', field: 'in_reply_to', code: 'invalid' }], + }, + }, + }); + const replyMock = vi.fn(async () => { + throw err422; + }); + mockGithub.replyToReviewThread = replyMock; + const comments = Array.from({ length: 10 }, (_, n) => + makeComment(`c${n}`, `thread-${n}`, 100 + n), + ); + const verified = new Set(comments.map((c) => c.id)); + const result = await run({ + replyToThreads: true, + comments, + verifiedCommentIds: verified, + }); + expect(result).toEqual({ + attempted: 9, + replied: 0, + failed422: 9, + failedOther: 0, + skippedDueTo422Stop: 1, + }); + expect(replyMock).toHaveBeenCalledTimes(9); + }); }); describe('dismissedCategoriesWithReply', () => { @@ -344,4 +437,8 @@ describe('dismissedCategoriesWithReply', () => { vi.stubEnv('PRR_THREAD_REPLY_INCLUDE_CHRONIC_FAILURE', 'true'); expect(dismissedCategoriesWithReply().has('chronic-failure')).toBe(true); }); + + it('includes out-of-scope in base reply set', () => { + expect(dismissedCategoriesWithReply().has('out-of-scope')).toBe(true); + }); }); diff --git a/tests/verification-heuristics-final-audit.test.ts b/tests/verification-heuristics-final-audit.test.ts new file mode 100644 index 0000000..b196b7c --- /dev/null +++ b/tests/verification-heuristics-final-audit.test.ts @@ -0,0 +1,22 @@ +import { describe, it, expect } from 'vitest'; +import { finalAuditExplanationClaimsSnippetIsIncomplete } from '../tools/prr/llm/verification-heuristics.js'; + +describe('finalAuditExplanationClaimsSnippetIsIncomplete', () => { + it('is true when the model says the shown window is insufficient', () => { + expect(finalAuditExplanationClaimsSnippetIsIncomplete('not visible in the provided excerpt')).toBe(true); + expect(finalAuditExplanationClaimsSnippetIsIncomplete('The rest of the file may still import the old API')).toBe( + true, + ); + expect(finalAuditExplanationClaimsSnippetIsIncomplete('cannot verify — excerpt does not include line 900')).toBe( + true, + ); + }); + + it('is false for substantive UNFIXED that does not hinge on missing context', () => { + expect( + finalAuditExplanationClaimsSnippetIsIncomplete( + 'The handler still returns 500 on empty body; no validation before parse.', + ), + ).toBe(false); + }); +}); diff --git a/tools/pill/README.md b/tools/pill/README.md index ae5c043..f4d298e 100644 --- a/tools/pill/README.md +++ b/tools/pill/README.md @@ -43,9 +43,14 @@ node dist/tools/pill/index.js [options] # or after npm link pill [options] + +# Rerun on specific log files (e.g. copies under ~/runs); audit code in . but read logs from paths: +pill . --output-log ~/runs/prr-2026-04-05/output.log --prompts-log ~/runs/prr-2026-04-05/prompts.log ``` -- **<directory>** — Directory that contains the log files and project to audit (e.g. `.` or `~/.prr` if logs are there). +- **<directory>** — Directory that contains the project to audit (docs, source, tree). Log files default to this directory unless overridden below. +- **--output-log <path>** — Use this file as **output.log** instead of `<directory>/[prefix-]output.log`. Handy to rerun pill on a saved copy or logs in another folder (path is resolved from the current working directory). Overrides **`PILL_OUTPUT_LOG_PATH`**. +- **--prompts-log <path>** — Same for **prompts.log**. Overrides **`PILL_PROMPTS_LOG_PATH`**. You can set only one of the pair; the other still uses the default name under **<directory>**. - **--audit-model <model>** — Model for the audit call (default: claude-opus-4-6). - **--output-only** — Use only output.log (no prompts.log). - **--prompts-only** — Use only prompts.log (no output.log). @@ -60,6 +65,7 @@ Config (API keys, provider) is loaded from `/.env` and then `~/.pill/ - **PILL_AUDIT_CHUNK_CONCURRENCY** (optional, **1–16**, default **4**) — How many **audit** HTTP requests may run in parallel when context is split into chunks. Higher speeds large runs; use **`1`** to restore fully sequential behavior (e.g. strict rate limits). - **PILL_OUTPUT_LOG_MAX_CHARS** (optional) — Hard cap on output-log chars in the audit payload (default **28000**). - **PILL_TOOL_REPO_SCOPE_FILTER** (optional) — **`0`** / **`false`** / **`off`** disables dropping clone-only paths; **`1`** / **`true`** forces the filter on. **Unset:** filter is **on** only when **`tools/prr`** exists under **`targetDir`** (typical prr monorepo). **WHY:** Keeps **`pill-output.md`** focused on improving **this** tool repo, not the PR under review. +- **PILL_OUTPUT_LOG_PATH** / **PILL_PROMPTS_LOG_PATH** (optional) — Absolute or cwd-relative paths to log files for a **standalone** pill run. **CLI `--output-log` / `--prompts-log` override these.** **WHY:** Rerun pill on archived or out-of-tree logs without changing **<directory>** (code context still comes from **<directory>**). ### Integrated (prr / story / split-exec / split-plan) — opt-in with --pill @@ -79,7 +85,7 @@ When pill records **no improvements**, it returns a distinct **reason** so you c | Reason | Meaning | What to do | |--------|---------|------------| -| **no_logs** | Output/prompts log for this prefix is empty or missing. | Ensure the tool that produced the logs (prr, story, split-exec) wrote to the expected files (e.g. `split-exec-output.log` when prefix is `split-exec`). Run from the directory that contains those logs, or pass that directory to the pill CLI. | +| **no_logs** | Output/prompts log for this prefix is empty or missing. | Ensure the tool that produced the logs (prr, story, split-exec) wrote to the expected files (e.g. `split-exec-output.log` when prefix is `split-exec`). Run from the directory that contains those logs, pass that directory to the pill CLI, or use **`--output-log`** / **`--prompts-log`** to point at the files. | | **no_api_key** | No LLM API key configured for the chosen provider. | Set the right key in `.env`: `ELIZACLOUD_API_KEY`, `ANTHROPIC_API_KEY`, or `OPENAI_API_KEY` (see Configuration in main README). When pill runs from the hook, it uses the same env as the parent process. | | **api_call_failed** | The audit LLM request failed (network, rate limit, model error). | Check the error message in the console or in the log line. Ensure the model ID is valid and the key has access. Look at **pill-prompts.log** for the request if it was written before the failure. | | **zero_improvements_from_llm** | The audit ran successfully but the LLM suggested zero improvements. | Not a failure — the logs were analyzed and the model had nothing to add. | diff --git a/tools/pill/cli.ts b/tools/pill/cli.ts index 930bd0f..11bec15 100644 --- a/tools/pill/cli.ts +++ b/tools/pill/cli.ts @@ -19,6 +19,10 @@ export interface CLIOptions { dryRun: boolean; verbose: boolean; instructionsOut?: string; + /** Resolved absolute path to output log (optional) */ + outputLog?: string; + /** Resolved absolute path to prompts log (optional) */ + promptsLog?: string; } export interface ParsedArgs { @@ -38,12 +42,23 @@ export function createCLI(): Command { .name('pill') .description('Program Improvement Log Looker - improve code from output.log and prompts.log') .version('0.1.0', '-V, --version', 'output the version number') - .argument('', 'Target directory containing logs and code to improve') + .argument( + '', + 'Project root for docs/source/tree; logs default here unless --output-log / --prompts-log' + ) .option('--audit-model ', 'Model for audit', validateModel, 'claude-opus-4-6') .option('--output-only', 'Only use output.log as evidence', false) .option('--prompts-only', 'Only use prompts.log as evidence', false) .option('--dry-run', 'Show audit findings without writing files', false) .option('--instructions-out ', 'Override path for pill-output.md') + .option( + '--output-log ', + 'Read this file as output.log (default: /[prefix-]output.log). Overrides PILL_OUTPUT_LOG_PATH.' + ) + .option( + '--prompts-log ', + 'Read this file as prompts.log (default: /[prefix-]prompts.log). Overrides PILL_PROMPTS_LOG_PATH.' + ) .option('-v, --verbose', 'Verbose logging', false); return program; @@ -65,6 +80,8 @@ export function parseArgs(program: Command): ParsedArgs { dryRun: opts.dryRun ?? false, verbose: opts.verbose ?? false, instructionsOut: opts.instructionsOut, + outputLog: opts.outputLog !== undefined ? path.resolve(opts.outputLog) : undefined, + promptsLog: opts.promptsLog !== undefined ? path.resolve(opts.promptsLog) : undefined, }; return { directory, options }; diff --git a/tools/pill/config.ts b/tools/pill/config.ts index 80bdeb3..f697397 100644 --- a/tools/pill/config.ts +++ b/tools/pill/config.ts @@ -4,7 +4,7 @@ */ import dotenv from 'dotenv'; import { homedir } from 'os'; -import { join } from 'path'; +import { join, resolve } from 'path'; import { existsSync, statSync } from 'fs'; import type { PillConfig } from './types.js'; import { resolveToolRepoScopeFilter } from './tool-repo-scope.js'; @@ -47,6 +47,23 @@ export interface LoadConfigInput { verbose: boolean; logPrefix?: string; instructionsOut?: string; + /** Resolved absolute path; wins over PILL_OUTPUT_LOG_PATH */ + outputLogPath?: string; + /** Resolved absolute path; wins over PILL_PROMPTS_LOG_PATH */ + promptsLogPath?: string; +} + +/** Resolve optional log path to absolute file path, or undefined. */ +function resolveOptionalLogFilePath(raw: string | undefined, label: string): string | undefined { + if (raw === undefined || raw === '') return undefined; + const abs = resolve(raw); + if (!existsSync(abs)) { + throw new Error(`Pill: ${label} not found: ${abs}`); + } + if (!statSync(abs).isFile()) { + throw new Error(`Pill: ${label} is not a regular file: ${abs}`); + } + return abs; } /** @@ -116,6 +133,15 @@ export function loadConfig(input: LoadConfigInput): PillConfig { const toolRepoScopeFilter = resolveToolRepoScopeFilter(input.targetDir, getEnv('PILL_TOOL_REPO_SCOPE_FILTER')); + const outputLogPath = resolveOptionalLogFilePath( + input.outputLogPath ?? getEnv('PILL_OUTPUT_LOG_PATH'), + 'Output log (PILL_OUTPUT_LOG_PATH or --output-log)' + ); + const promptsLogPath = resolveOptionalLogFilePath( + input.promptsLogPath ?? getEnv('PILL_PROMPTS_LOG_PATH'), + 'Prompts log (PILL_PROMPTS_LOG_PATH or --prompts-log)' + ); + const config: PillConfig = { targetDir: input.targetDir, llmProvider, @@ -130,6 +156,8 @@ export function loadConfig(input: LoadConfigInput): PillConfig { promptsOnly: input.promptsOnly, dryRun: input.dryRun, verbose: input.verbose, + outputLogPath, + promptsLogPath, }; config.instructionsOut = input.instructionsOut; diff --git a/tools/pill/context.ts b/tools/pill/context.ts index 163f120..e4a7eff 100644 --- a/tools/pill/context.ts +++ b/tools/pill/context.ts @@ -5,7 +5,7 @@ * (default 50k; PILL_OUTPUT_LOG_MAX_CHARS) to avoid 504 / FUNCTION_INVOCATION_TIMEOUT. */ import { readFileSync, existsSync, statSync } from 'fs'; -import { join } from 'path'; +import { join, resolve } from 'path'; import type { PillConfig, PillContext } from './types.js'; import { DEFAULT_PILL_CONTEXT_BUDGET_TOKENS } from './config.js'; import { @@ -126,7 +126,10 @@ export async function assembleContext( const prefix = config.logPrefix; const outputLogName = prefix ? `${prefix}-output.log` : 'output.log'; const promptsLogName = prefix ? `${prefix}-prompts.log` : 'prompts.log'; - const outputLogPath = join(targetDir, outputLogName); + const defaultOutputPath = join(targetDir, outputLogName); + const defaultPromptsPath = join(targetDir, promptsLogName); + const outputLogPath = config.outputLogPath ?? defaultOutputPath; + const promptsPath = config.promptsLogPath ?? defaultPromptsPath; // Debug: Log where pill is looking for logs console.log(`[Pill debug] Target directory: ${targetDir}`); @@ -172,7 +175,6 @@ export async function assembleContext( } let promptsDigest: string | undefined; - const promptsPath = join(targetDir, promptsLogName); // Debug: Log where pill is looking for prompts.log console.log(`[Pill debug] Looking for prompts.log: ${promptsPath}`); if (existsSync(promptsPath)) { @@ -217,20 +219,25 @@ export async function assembleContext( // Pill-on-itself: if primary logs are not pill's own, also include pill-output.log when present. const pillOutputName = 'pill-output.log'; const pillPromptsName = 'pill-prompts.log'; - if (outputLogName !== pillOutputName) { - const pillOutputPath = join(targetDir, pillOutputName); - if (existsSync(pillOutputPath)) { + const pillOutputPathInTarget = join(targetDir, pillOutputName); + const pillPromptsPathInTarget = join(targetDir, pillPromptsName); + const primaryOutputIsTargetPillSelf = resolve(outputLogPath) === resolve(pillOutputPathInTarget); + if (!primaryOutputIsTargetPillSelf) { + if (existsSync(pillOutputPathInTarget)) { try { - const pillRaw = readFileSync(pillOutputPath, 'utf-8'); + const pillRaw = readFileSync(pillOutputPathInTarget, 'utf-8'); if (pillRaw.trim()) { outputLog += '\n\n[PILL SELF-LOG]\n' + pillRaw; } } catch { /* ignore */ } } - const pillPromptsPath = join(targetDir, pillPromptsName); - if (existsSync(pillPromptsPath) && (!promptsDigest || promptsPath !== pillPromptsPath)) { + const primaryPromptsIsTargetPillSelf = resolve(promptsPath) === resolve(pillPromptsPathInTarget); + if ( + existsSync(pillPromptsPathInTarget) && + (!promptsDigest || !primaryPromptsIsTargetPillSelf) + ) { try { - const pillPromptsRaw = readFileSync(pillPromptsPath, 'utf-8'); + const pillPromptsRaw = readFileSync(pillPromptsPathInTarget, 'utf-8'); if (pillPromptsRaw.trim()) { const entries = parsePromptsLog(pillPromptsRaw); const formatted = formatPromptsRaw(entries); diff --git a/tools/pill/index.ts b/tools/pill/index.ts index 334e788..bf99f11 100644 --- a/tools/pill/index.ts +++ b/tools/pill/index.ts @@ -75,6 +75,8 @@ async function main(): Promise { dryRun: parsed.options.dryRun, verbose: parsed.options.verbose, instructionsOut: parsed.options.instructionsOut, + outputLogPath: parsed.options.outputLog, + promptsLogPath: parsed.options.promptsLog, }); console.log(getBanner()); if (config.verbose) { @@ -82,6 +84,8 @@ async function main(): Promise { directory: config.targetDir, auditModel: config.auditModel, dryRun: config.dryRun, + outputLogPath: config.outputLogPath ?? '(default under directory)', + promptsLogPath: config.promptsLogPath ?? '(default under directory)', }); } const out = await runPillAnalysis(config); diff --git a/tools/pill/llm/client.ts b/tools/pill/llm/client.ts index 578818e..e585c71 100644 --- a/tools/pill/llm/client.ts +++ b/tools/pill/llm/client.ts @@ -5,7 +5,8 @@ import Anthropic from '@anthropic-ai/sdk'; import OpenAI from 'openai'; import type { PillConfig } from '../types.js'; -import { debugPrompt, debugResponse } from '../logger.js'; +import { openAiChatCompletionContentToString } from '../../../shared/llm/openai-chat-content.js'; +import { debugPrompt, debugPromptError, debugResponse } from '../logger.js'; const ELIZACLOUD_API_BASE_URL = 'https://elizacloud.ai/api/v1'; @@ -116,13 +117,27 @@ export class LLMClient { const chosenModel = options?.model ?? this.model; const fullPrompt = systemPrompt ? `[SYSTEM]\n${systemPrompt}\n\n[USER]\n${prompt}` : prompt; - debugPrompt(`pill-${this.provider}`, fullPrompt, { model: chosenModel }); + const promptSlug = debugPrompt(`pill-${this.provider}`, fullPrompt, { model: chosenModel }); const is429 = (e: unknown) => (e as { status?: number })?.status === 429; const is5xx = (e: unknown) => { const s = (e as { status?: number })?.status; return s && s >= 500 && s < 600; }; + /** Fetch/TLS/socket failures before a normal HTTP response (OpenAI SDK often says "Connection error"). */ + const isTransientConnectionError = (e: unknown): boolean => { + if (is429(e)) return false; + const status = (e as { status?: number })?.status; + if (typeof status === 'number' && status >= 400 && status < 500) return false; + if (is5xx(e)) return false; + const msg = e instanceof Error ? e.message : String(e); + const node = e as NodeJS.ErrnoException; + const c = node?.cause as NodeJS.ErrnoException | undefined; + const codes = [node?.code, c?.code].filter(Boolean) as string[]; + if (codes.some((x) => /^(ECONNRESET|ETIMEDOUT|ENOTFOUND|ECONNREFUSED|EAI_AGAIN|EPIPE)$/i.test(x))) + return true; + return /connection error|fetch failed|socket hang up|network request failed|TLS|certificate/i.test(msg); + }; const requestUrl = this.provider === 'anthropic' @@ -143,24 +158,37 @@ export class LLMClient { for (let attempt = 0; attempt <= max429Retries; attempt++) { try { let response: LLMResponse | undefined; - for (let retry5xx = 0; retry5xx <= 1; retry5xx++) { + const maxTransientAttempts = 3; + transient: for (let transientTry = 0; transientTry < maxTransientAttempts; transientTry++) { try { - response = - this.provider === 'anthropic' - ? await this.completeAnthropic(prompt, systemPrompt, chosenModel) - : await this.completeOpenAI(prompt, systemPrompt, chosenModel); - break; + for (let retry5xx = 0; retry5xx <= 1; retry5xx++) { + try { + response = + this.provider === 'anthropic' + ? await this.completeAnthropic(prompt, systemPrompt, chosenModel) + : await this.completeOpenAI(prompt, systemPrompt, chosenModel); + break; + } catch (e) { + if (retry5xx < 1 && is5xx(e)) { + await new Promise((r) => setTimeout(r, 10_000)); + continue; + } + throw e; + } + } + break transient; } catch (e) { - if (retry5xx < 1 && is5xx(e)) { - await new Promise((r) => setTimeout(r, 10_000)); + if (transientTry < maxTransientAttempts - 1 && isTransientConnectionError(e)) { + const waitMs = 2000 * (transientTry + 1); + await new Promise((r) => setTimeout(r, waitMs)); continue; } - throw formatErrorWithHeaders(e, requestContext); + throw e; } } if (!response) throw new Error('LLM request failed'); - debugResponse(`pill-${this.provider}`, response.content, { + debugResponse(promptSlug, `pill-${this.provider}`, response.content, { model: chosenModel, usage: response.usage, }); @@ -172,6 +200,8 @@ export class LLMClient { await new Promise((r) => setTimeout(r, wait)); continue; } + const msg = err instanceof Error ? err.message : String(err); + debugPromptError(promptSlug, `pill-${this.provider}`, msg.slice(0, 12_000), { model: chosenModel }); throw formatErrorWithHeaders(err, requestContext); } } @@ -219,7 +249,7 @@ export class LLMClient { messages, max_completion_tokens: 16384, }); - const content = response.choices[0]?.message?.content ?? ''; + const content = openAiChatCompletionContentToString(response.choices[0]?.message?.content); return { content, usage: response.usage diff --git a/tools/pill/logger.ts b/tools/pill/logger.ts index 6a2da7b..15c6e0c 100644 --- a/tools/pill/logger.ts +++ b/tools/pill/logger.ts @@ -38,18 +38,27 @@ function safeStringify(value: unknown, pretty = false): string { function writeToPromptLog( slug: string, - kind: 'PROMPT' | 'RESPONSE', + kind: 'PROMPT' | 'RESPONSE' | 'ERROR', label: string, body: string, metadata?: Record ): void { if (!promptLogPath) return; try { - let header = `${DELIMITER}\n ${slug} ${kind}: ${label} (${body.length} chars)\n`; + const content = typeof body === 'string' ? body : String(body ?? ''); + if ((kind === 'PROMPT' || kind === 'RESPONSE') && (content.length === 0 || content.trim().length === 0)) { + appendFileSync( + promptLogPath, + `--- PILL_PROMPTLOG_EMPTY_BODY slug=${slug} kind=${kind} label=${JSON.stringify(label)} at=${new Date().toISOString()} ---\n`, + 'utf-8', + ); + return; + } + let header = `${DELIMITER}\n ${slug} ${kind}: ${label} (${content.length} chars)\n`; header += ` ${new Date().toISOString()}\n`; if (metadata) header += ` ${safeStringify(metadata, true)}\n`; header += `${DELIMITER}\n`; - appendFileSync(promptLogPath, header + body + `\n${DELIMITER}\n\n`, 'utf-8'); + appendFileSync(promptLogPath, header + content + `\n${DELIMITER}\n\n`, 'utf-8'); } catch (err) { console.error('Prompt log write failed:', err); } @@ -120,24 +129,48 @@ export function getPromptLogPath(): string | null { return promptLogPath; } -export function debugPrompt(label: string, prompt: string, metadata?: Record): void { +/** Returns slug — pass to {@link debugResponse} / {@link debugPromptError} for the same request. */ +export function debugPrompt(label: string, prompt: string, metadata?: Record): string { promptLogCounter++; const slug = promptSlug(promptLogCounter, label); const requestId = randomUUID(); promptRequestIdBySlug.set(slug, requestId); const mergedMeta = { ...metadata, requestId }; writeToPromptLog(slug, 'PROMPT', label, prompt, mergedMeta); + return slug; } -/** Uses the same counter as the preceding debugPrompt so PROMPT/RESPONSE share a slug for pairing. */ -export function debugResponse(label: string, response: string, metadata?: Record): void { - const slug = promptSlug(promptLogCounter, label); +export function debugResponse(slug: string, label: string, response: string, metadata?: Record): void { const requestId = promptRequestIdBySlug.get(slug); const mergedMeta = requestId ? { ...metadata, requestId } : metadata; if (requestId) promptRequestIdBySlug.delete(slug); + const trimmed = typeof response === 'string' ? response.trim() : ''; + if (!trimmed) { + writeToPromptLog( + slug, + 'ERROR', + label, + 'Empty or whitespace-only response body (HTTP success; no RESPONSE written).', + { ...mergedMeta, emptyBody: true }, + ); + return; + } writeToPromptLog(slug, 'RESPONSE', label, response, mergedMeta); } +export function debugPromptError( + slug: string, + label: string, + errorMessage: string, + metadata?: Record, +): void { + if (!promptLogPath) return; + const requestId = promptRequestIdBySlug.get(slug); + const mergedMeta = requestId ? { ...metadata, requestId } : metadata; + if (requestId) promptRequestIdBySlug.delete(slug); + writeToPromptLog(slug, 'ERROR', label, errorMessage, mergedMeta); +} + export function debug(_msg: string, _data?: unknown): void { // Only to console when verbose; log file gets everything via console.log // So we don't need to do anything special here - callers can use console.log for verbose diff --git a/tools/pill/orchestrator.ts b/tools/pill/orchestrator.ts index e23e955..c7446c5 100644 --- a/tools/pill/orchestrator.ts +++ b/tools/pill/orchestrator.ts @@ -18,7 +18,7 @@ import { AUDIT_SYSTEM_PROMPT } from './llm/prompts.js'; import { extractJsonLenient } from './llm/parse-json.js'; import { truncateHeadAndTailByChars, CHARS_PER_TOKEN } from '../../shared/utils/tokens.js'; import { chunkPlainText } from '../../shared/llm/story-read.js'; -import { runWithConcurrency } from '../../shared/run-with-concurrency.js'; +import { runWithConcurrencyAllSettled } from '../../shared/run-with-concurrency.js'; import { filterImprovementsByToolRepoScope } from './tool-repo-scope.js'; /** Default hard cap on user message length (chars) per audit HTTP request. Override: PILL_AUDIT_MAX_USER_CHARS. @@ -431,13 +431,22 @@ export async function runPillAnalysis(config: PillConfig): Promise< return parseImprovementPlan(chunkResponse.content); }); }); - const chunkPlans = await runWithConcurrency(chunkTasks, conc); + // WHY AllSettled: a single chunk HTTP error must not abort all remaining chunks. + // runWithConcurrency uses Promise.all (fail-fast); AllSettled collects partial results + // so the audit still produces improvements from successful chunks. (Pattern D, 2026-04-05) + const chunkSettled = await runWithConcurrencyAllSettled(chunkTasks, conc); // Merge chunk results: combine improvements, use first non-empty pitch/summary + // Log chunk failures but continue with partial results. const allImprovements: Improvement[] = []; let mergedPitch = ''; let mergedSummary = ''; - for (const chunkPlan of chunkPlans) { + for (const result of chunkSettled) { + if (result.status === 'rejected') { + console.warn(`[pill] Audit chunk failed (partial results will still be used): ${result.reason}`); + continue; + } + const chunkPlan = result.value; allImprovements.push(...chunkPlan.improvements); if (!mergedPitch && chunkPlan.pitch) mergedPitch = chunkPlan.pitch; if (!mergedSummary && chunkPlan.summary) mergedSummary = chunkPlan.summary; diff --git a/tools/pill/types.ts b/tools/pill/types.ts index c689781..0faee57 100644 --- a/tools/pill/types.ts +++ b/tools/pill/types.ts @@ -8,6 +8,16 @@ export interface PillConfig { openaiApiKey?: string; /** '' | undefined = output.log; 'story' = story-output.log; 'pill' = pill-output.log */ logPrefix?: string; + /** + * Absolute path to the output log to audit. When unset, uses `join(targetDir, logPrefix-output.log | output.log)`. + * CLI `--output-log` or env `PILL_OUTPUT_LOG_PATH`. + */ + outputLogPath?: string; + /** + * Absolute path to the prompts log. When unset, uses default name under targetDir. + * CLI `--prompts-log` or env `PILL_PROMPTS_LOG_PATH`. + */ + promptsLogPath?: string; /** Override path for pill-output.md (e.g. from --instructions-out). */ instructionsOut?: string; /** Max context tokens for the audit request (user + system). Overridable via PILL_CONTEXT_BUDGET_TOKENS. Default 35k; use 20k for small-context models. */ diff --git a/tools/prr/AUDIT-CYCLES.md b/tools/prr/AUDIT-CYCLES.md index e21e34c..677ed03 100644 --- a/tools/prr/AUDIT-CYCLES.md +++ b/tools/prr/AUDIT-CYCLES.md @@ -1,6 +1,6 @@ # Audit cycles -**Last updated:** 2026-03-28 · **Recorded cycles:** 70 · **Historical (legacy):** 4 +**Last updated:** 2026-04-09 · **Recorded cycles:** 79 · **Historical (legacy):** 4 Single audit log for output.log, prompts.log, and code changes. Use it to spot recurring patterns and avoid flip-flopping. @@ -49,13 +49,13 @@ Improvements should reinforce these, not reverse. | **Prompt size / noise** | Cap lessons, trim diff for tiny batches, filter global lessons by path relevance. | | **Snippet visibility** | Quality gate (too-short note), wider fallback for analysis batch, anchor-aware expansion. | | **Queue / log clarity** | One clear "still in queue" line, queue subtitle (to-fix vs already-verified), no contradictory "No issues" vs "N in queue". | -| **Allow-path / test path** | Expand for test-coverage, plausible-path checks, test path at issue build; migration journal in allowedPaths when review mentions it; consolidate-duplicate "other file" when refactor issue. Do *not* add a file when comment only *references* it — use isReferencePathInComment before persisting otherFile from CANNOT_FIX/WRONG_LOCATION. | +| **Allow-path / test path** | Expand for test-coverage, plausible-path checks, test path at issue build; migration journal in allowedPaths when review mentions it; consolidate-duplicate "other file" when refactor issue. Do *not* add a file when comment only *references* it — use isReferencePathInComment before persisting otherFile from CANNOT_FIX/WRONG_LOCATION. **Open by default** (Cycle 72): `isPathAllowedForFix` allows any repo-relative path that passes hard deny rules; `PRR_STRICT_ALLOWED_PATHS=1` restores the old first-segment heuristic (static `REPO_TOP_LEVEL` + dynamic PR changed-file dirs). | | **Loop prevention** | Counters and thresholds (WRONG_LOCATION/UNCLEAR, wrong-file, verifier rejection, CANNOT_FIX missing content); exhaust and dismiss instead of burning models. Auto-verify when bug pattern absent after N verifier rejections. Apply threshold checks (couldNotInject, ALREADY_FIXED) inside the fix loop, not only at analysis; run solvability on new comments before adding to queue. | | **File injection** | Basename fallback for short/fragment paths; placeholder detection before injection; hallucination guard for full-file rewrite output (< 15% of original = reject). **llm-api:** files over 200k chars or 5k lines get a **line-anchored excerpt** (from `### Issue N: path:line` / `primary:` / `path:line` in the prompt) or a **head excerpt** when no anchors — avoids skipping injection entirely on mega-files (Cycle 67). | -| **Approval/noise filter** | Summary/meta-review tables, approval comments ("Approve", "LGTM", "All issues resolved"), PR metadata requests — all dismissed in solvability. | +| **Approval/noise filter** | Summary/meta-review tables, rollup headings (**`Remaining Issues`**, **`Issues Fixed Since Previous Reviews`**, etc.), approval comments ("Approve", "LGTM", "All issues resolved"), PR metadata requests — all dismissed in solvability (0a2 / 0a3 / 0a). | | **Judge / verifier** | Judge NO must cite specific code or line numbers; format colons. Verifier: LESSON only for NO; for duplicate/shared-util steer to canonical lib/utils/..., not reference file; "Code before fix" empty/artifact → base verdict on Current Code and diff; multi-fix same file → judge by review comment. STALE→YES override when explanation indicates code/snippet not visible or "can't evaluate" (per judge instructions: if you would say "not in excerpt", say YES not STALE). | | **Output / UX** | Pluralize (1 file / N files); timing aggregated by phase; model recommendation only when real reasoning; AAR title from first meaningful line. Exhausted issues appear in AAR and handoff until resolved (fix, conversation, or other). | -| **Conflict resolution** | Skip batch when prompt > 40 KB; hasConflictMarkers(); 504/timeout → chunked fallback; heartbeat every 30 s. | +| **Conflict resolution** | Skip batch when prompt > 40 KB; hasConflictMarkers(); 504/timeout → chunked fallback; heartbeat every 30 s. **Submodule/directory** conflicts: `rm -rf` worktree path then checkout; **`git update-index --cacheinfo 160000,oid`** from `ls-files -u` when checkout says "no commit checked out" (Cycle 75). **Defer JS lock regen** when package.json has conflict markers; run after code merge (Cycle 75). **Lock file fallback:** ENOENT on primary pkg manager → try JS ecosystem alternatives. **JSON dupe key:** `findDuplicateJsonKey` rejects LLM output with repeated keys. | | **Dedup across authors** | Same file + same primary symbol + same caller file (e.g. runner.py) → heuristic merge even when authors differ. LLM dedup still runs for 3+ issues per file; GROUP lines take priority over NONE. | | **Verifier strength** | Escalation for previous rejections; stronger model for API/signature-related fixes (async, await, caller, TypeError). Weak default verifier kept approving call-site bugs. | | **Dismissal comments** | Skip when reason says "file no longer exists" / "file not found"; skip when file missing in workdir; post-filter comments that only restate code (e.g. "extracts metrics"). | @@ -73,7 +73,7 @@ Quick checks each audit. Drill into the category that matches what you changed. **Log vs reality (output.log / prompts.log)** - [ ] For runs that report "already verified" or "fixed": spot-check at least one such issue by reading the file at the cited path in the workdir (path from log: `Reusing existing workdir:` / `Workdir preserved:`). Confirm the bug pattern is actually gone. If the log says fixed but the file still has the bug, treat as a finding (stale verification, head change). - [ ] **prompts.log (Cycle 67):** PROMPT and RESPONSE JSON metadata share the same **`requestId`** (UUID) when using `shared/logger` — grep `requestId` to pair entries when concurrent calls reorder the file; slug number still pairs by convention. -- [ ] RESULTS SUMMARY "N issue(s) fixed and verified" counts only verifiedFixed/verifiedComments; it must not include issues dismissed as already-fixed (pill-output.md #2; cycles 33/34). +- [ ] RESULTS SUMMARY "N issue(s) fixed and verified" counts only verifiedFixed/verifiedComments; it must not include issues dismissed as already-fixed (cycles 33/34; see pill-output index / CHANGELOG for accounting themes). - [ ] Base-merge push: when log says "Merged latest X into Y" followed by "Everything up-to-date", the merge was a no-op (already merged). Verify `mergeBaseBranch` returns `alreadyUpToDate: true` so the caller doesn't attempt a pointless push. **Prompt quality** @@ -92,7 +92,7 @@ Quick checks each audit. Drill into the category that matches what you changed. - [ ] Judge: NO must cite specific code or line numbers; format uses colons. - [ ] Verifier: YES→NO override when explanation says "already correct", "comment mistaken", etc. - [ ] Verifier: NO→YES override when explanation says "not visible in excerpt", "can't confirm whether", "missing from excerpts", "truncated portion would contain" (Cycle 14). -- [ ] Summary/meta-review comments (status recap tables, "### Summary" with 3+ status phrases) dismissed as not-an-issue (solvability). +- [ ] Summary/meta-review comments (status recap tables, "### Summary" with 3+ status phrases, rollup headings: Remaining Issues / Issues Fixed Since Previous Reviews / …) dismissed as not-an-issue (solvability 0a2). - [ ] When snippet is "(file not found or unreadable)", batch analysis tries getFileContentFromRepo (git show HEAD:path) before sending to verifier. - [ ] No-changes lessons: single-issue uses "Fix for path:line - ..."; batch uses "(N issues in batch)" in global lesson. - [ ] Multi-file fix: when allowedPaths.length > 1 and body mentions callers (calls/caller/await/file:line), prompt includes nudge to update all listed files and call sites. @@ -164,6 +164,170 @@ Copy the block below for each new cycle. ## Recorded cycles +### Cycle 79 — 2026-04-09 (Cycle 78 audit → code improvements) + +**Artifacts audited:** Cycle 78 recommendations (verbose log noise, dismissal LLM waste, ops hints). + +**Findings:** N/A (implementation cycle). + +**Improvements implemented:** **`catalog-model-autoheal.ts`:** removed per-comment debug on `!dismissal` (Summary retained). **`outdated-model-advice.ts`:** removed per-comment debug for framing-without-parseable-pair (false positives on CodeRabbit bodies). **`dismissal-comments.ts`:** extended **`DISMISSAL_COMMENT_PHRASES`** (`intentional`, `downstream`, `error boundary`, `by design`) so Pass 1 skips LLM when comments already explain intent (matches common gpt-4o-mini **EXISTING** cases). **`rotation.ts`:** single-model warn now mentions **`PRR_LLM_MODEL`** / verifier / final-audit pins. **`git-conflict-lockfiles.ts`:** after failed lock regen, gray hint to resolve conflict markers in **package.json**/lockfile then re-run install manually. + +**Flip-flop check:** N — logging quieter; dismissal pre-check strictly expands matches; rotation/lockfile text additive. + +**Notes:** Deep catalog-detection tracing: use verbose + inspect comment bodies; no new env flag added. + +--- + +### Cycle 78 — 2026-04-09 (output.log + prompts.log: elizaOS/eliza#6702, workdir 4a425a4f) + +**Artifacts audited:** `/root/prr/output.log` (~1,361 lines), `/root/prr/prompts.log` (~2,488 lines, 16 in-process `llm-elizacloud` PROMPT blocks). PRR **b1b0b29**. Workdir: **`/root/.prr/work/4a425a4f063fc1bb`**. + +**Findings:** +- **Medium (ops):** No **`PRR_LLM_MODEL`** — run defaulted to **qwen-3-235b** while fixer path used **anthropic/claude-opus-4.5** after rotation; **only one** ElizaCloud model left after skip list → single failure blocks fixes until rotation (log warns). +- **Medium (environment):** PR **mergeable: dirty** vs **develop**; dry-merge reported **`bun.lock` modify/delete**; **`bun install failed, continuing...`** — risk of confusing local install state when resolving lock conflicts (mostly PR hygiene, not a PRR logic bug). +- **Low:** **Dismissal-comments** phase: **3** gpt-4o-mini calls → **0** comments posted (skips: already exists, too generic, fix-failure categories) — useful idempotency but measurable token spend for no GitHub delta. +- **Low:** **Catalog auto-heal** ran full comment scans twice with **0** heals and verbose per-comment debug — fine for correctness; could rate-limit debug on large PRs. + +**Improvements implemented:** None in this cycle (audit-only). Prior cycle: RESULTS SUMMARY note when success exit + **Remaining > 0** (Cycle 77); this log shows that note present (lines ~1290–1292). + +**Flip-flop check:** N. + +**Notes:** Exit **audit_passed** / **All issues resolved**; **5** verified relevant; **48** dismissed; **Remaining 4** = exhausted/**remaining** by location. **Spot-check:** `agent/typescript/index.ts` in workdir **~331–334** — `messageService` absent → log line + **`continue`** (not hard exit); **~347–356** — **`for (const rt of runtimes)`** **`stop()`** loop present — aligns with verified/dismissal narrative for harness/REPL issues. + +--- + +### Cycle 77 — 2026-04-08 (eliza #6702 audit: ALREADY_FIXED cluster vs empty queue) + +**Artifacts audited:** Conversation handoff from `output.log` audit (PRR b1b0b29 on elizaOS/eliza#6702): `BUG DETECTED: unresolvedIssues is empty but N comments are neither verified nor dismissed` after no-change **ALREADY_FIXED**; RESULTS SUMMARY “All issues resolved” beside **Remaining** from exhausted dismissals. + +**Findings:** +- **Medium:** **ALREADY_FIXED** cluster handling removed every cluster id from **`unresolvedIssues`** even when **`dismissIssue`** was skipped (e.g. dedup sibling id missing from the fetched **`comments`** array), leaving threads unaccounted and triggering **`checkEmptyIssues`** repopulate. +- **Low:** Success exit read as “zero backlog” while **Remaining** still counted exhausted/**remaining** dismissals (deduped by file:line). + +**Improvements implemented:** **`no-changes-verification.ts`:** resolve dismiss row from **`comments`**, queued issues, or anchor comment for same-cluster ids; **`filterUnresolvedKeepUnaccountedClusterMembers`** — only drop queued rows that are verified or dismissed; **ALREADY_FIXED exhaust** path dismisses full dedup cluster (same as any-threshold). **`reporter.ts`:** gray note under Exit when success-like exit and **Remaining > 0**. Test: **`tests/no-changes-already-fixed-cluster.test.ts`**. + +**Flip-flop check:** N — stricter accounting + UX copy; repopulate guard still exists for genuine mismatches. + +**Notes:** Spot-check N/A (log-only handoff); behavior covered by new unit test for sibling **B** missing from **`comments`** while **`duplicateMap`** links **A → B**. + +--- + +### Cycle 76 — 2026-04-08 (pill-output open index: path-fragment, README env, skip-list doc) + +**Artifacts audited:** `pill-output.md` PATTERNS & OPEN WORK index (2026-04-08); no new `output.log` Model Performance table for this pass. + +**Findings:** +- **Medium:** Pill index called for a dedicated **`path-fragment`** dismissal value vs lumping fragments under **`path-unresolved`** — metrics and thread-reply copy are clearer when split. +- **Low:** README operator table omitted several vars already documented in **`.env.example`**. +- **Low:** Skip list refresh is recurring **ops** work; code lacked an explicit “last reviewed” / refresh contract on the static array. + +**Improvements implemented:** **`pathDismissCategoryForNotFound`** returns **`path-fragment`** for **`isReviewPathFragment`** / resolution **`fragment`**; **`path-unresolved`** for **`ambiguous`** only. **`DismissedIssue.category`**, **`assessSolvability`**, thread-reply set + copy, **AGENTS** / **DEVELOPMENT** path rules, **README** env rows, **`ELIZACLOUD_SKIP_MODEL_IDS`** docblock (**last reviewed 2026-04-08**). State load migrates legacy fragment **`missing-file`** and fragment-shaped **`path-unresolved`** → **`path-fragment`**. + +**Flip-flop check:** N — additive category + load migration; **`path-unresolved`** retained for ambiguous paths. + +**Notes:** No new skip-list IDs added (no fresh Model Performance evidence in this pass). + +--- + +### Cycle 75 — 2026-04-07 (milady#1722 re-run: defer lock regen + submodule index) + +**Artifacts audited:** CI output after Cycle 74 landed — same PR merge (`develop` into `odi-dev`). + +**Findings:** +- **High:** Deferred lock regen was not implemented in Cycle 74. `bun install` / `npm install` / `yarn install` still ran while `package.json` contained `<<<<<<<`, causing EJSONPARSE; all fallbacks failed; user saw "No JS package manager available" incorrectly. +- **High:** Submodule `eliza`: `git checkout --theirs` + `git add` still failed (`does not have a commit checked out` / `unable to index file`). Attempt 2 then hit EISDIR again. + +**Improvements implemented:** Defer `handleLockFileConflicts` when JS lockfiles are present and `package.json` has conflict markers; `runDeferredLockRegenIfNeeded` after Attempt 1 and before final return. Submodule: `rm -rf` path under worktree, retry checkout, then **`stageSubmoduleGitlinkFromIndex`** via `ls-files -u` + `update-index --cacheinfo 160000,oid`. Attempt 2 retries directory paths with `resolveSubmoduleConflict` before `readFileSync`. Tests: `tests/git-conflict-lock-defer.test.ts`. + +**Flip-flop check:** N. + +**Notes:** Spot-check N/A (CI workdir). + +--- + +### Cycle 74 — 2026-04-07 (conflict resolution audit; milady#1722 EISDIR + merge quality) + +**Artifacts audited:** `logs/output.log`, `logs/prompts.log` — milady-ai/milady#1722 (`odi-dev` ← `develop`), exit `merge_conflicts` after 5/6 files auto-resolved, 1 remaining (`eliza` submodule). No review issues were processed. Workdir: `/home/runner/.prr/work/e1728b2ad8df995b` (CI, not accessible for spot-check). + +**Findings:** +- **High:** `eliza` is a git submodule (gitlink). `readFileSync` on Attempt 2 threw `EISDIR: illegal operation on a directory, read` — generic catch logged the error but the run exited with `merge_conflicts`. No submodule-aware handling existed anywhere in the conflict resolution stack. The entire run was blocked; zero review issues were processed. +- **Medium:** `bun.lock` correctly deleted, but `bun install` failed with `ENOENT` (bun not in CI PATH). No fallback to `npm install` or `yarn install`; lock file left missing. Conflict technically cleared by delete+stage, but regeneration silently failed. +- **Medium:** LLM (claude-sonnet-4-5) resolved `package.json` but **dropped** HEAD-side scripts (`verify`, `verify:typecheck`, `verify:lint`, `dev:web:ui`, `milady:doctor`, `milady:db-reset`) and produced a **duplicate `"dev"` key** (lines 387+390 in response). Search/replace fell back to fuzzy matching (73.8%) and progressive-trim — both indicate the LLM's search block didn't match the file well. +- **Low:** `parseMergeTreeConflictPaths` listed `Merge` as a conflicted file path. The second regex `CONFLICT ([^)]+): (\S+)` captured the word `Merge` from `CONFLICT (submodule): Merge conflict in eliza`. + +**Improvements implemented:** +1. **Submodule/directory detection** (`git-conflict-resolve.ts`): `detectSubmoduleConflicts` checks `git ls-files -s` for mode `160000` (gitlink entries) and `lstatSync` for directories. `resolveSubmoduleConflict` accepts theirs (base branch pointer) with `git checkout --theirs`, falls back to ours. Runs before the LLM code-file loop so EISDIR never fires. +2. **Directory guard in marker scan** (`shared/git/git-lock-files.ts`): `findFilesWithConflictMarkers` now calls `lstatSync` and skips directories before `readFileSync`. +3. **Directory guard in prompt build** (`git-conflict-prompts.ts`): `buildConflictResolutionPromptWithContent` checks `lstatSync` before reading; directories go to `unreadable` list instead of throwing. +4. **Lock file fallback chain** (`git-conflict-lockfiles.ts`): When primary package manager ENOENT's (e.g. `bun` not in PATH), tries `npm install`, `yarn install`, `pnpm install` before giving up. Extracted `trySpawn` helper. +5. **`parseMergeTreeConflictPaths` fix** (`shared/git/git-conflicts.ts`): Changed second regex from `CONFLICT ([^)]+): (\S+)` to specifically match `Merge conflict in ` and ` deleted/renamed/added` formats so `Merge` is not captured as a file path. +6. **Duplicate JSON key detection** (`git-conflict-resolve.ts`): `findDuplicateJsonKey` scans resolved JSON for duplicate keys at top two nesting levels; `validateResolvedContent` rejects resolutions with duplicate keys (catches LLM merge artifacts like duplicate `"dev"` in package.json). +7. **`package.json`-specific prompt rules** (`error-helpers.ts`): `getConflictFileTypeRules` adds explicit instructions for package.json — no duplicate keys, merge ALL entries from both sides, prefer HEAD when keys conflict. + +**Flip-flop check:** N — all changes are additive; no prior behavior reverted. + +**Notes:** Workdir is CI (GitHub Actions runner), not accessible for spot-check of resolved files. Prompt log confirms the LLM response merged both sides of ROADMAP.md, vite.config.ts, repository.ts, and most of package.json correctly — the duplicate `"dev"` key and dropped scripts were the main quality issues. The `eliza` EISDIR was the blocker that prevented the run from reaching review issues. + +--- + +### Cycle 73 — 2026-04-05 (pill-output.md full triage; 4 code fixes) + +**Artifacts audited:** `pill-output.md` (4,240+ lines across 16 pill runs, 2026-03-23 through 2026-04-05). No single output.log; this was a cross-run triage pass adding per-item Status lines and a PATTERNS & OPEN WORK report section. + +**Findings:** +- **High:** `commentStatuses` not cleared on HEAD SHA change. `manager.ts` zeroed `verifiedFixed`/`verifiedComments` on rebase but left `commentStatuses` with stale `status: 'resolved'` entries — callers see contradictory maps. (Pattern H) +- **High:** `redactUrlCredentials` (`shared/git/redact-url.ts`) only handled HTTPS URLs; SSH-style `git@host:org/repo` and Windows `\r` were not redacted. (Pattern C) +- **Medium:** Pill chunked audit used `runWithConcurrency` (fail-fast via `Promise.all`) — a single chunk HTTP error aborted all remaining chunks with no partial results. (Pattern D) +- **Medium:** `prr-fix:` commit-scan regex `^prr-fix:(.+)$` could capture trailing non-whitespace text as part of the ID. (Pattern B) +- **Low:** `tryResolvePathWithExtensionVariants` doesn't call `stripGitDiffPathPrefix` before trying variants — a path like `a/tsconfig.js` won't match. (Open) +- **Low:** Truncation guard in `tools/prr/llm/client.ts` may demote UNFIXED→UNCERTAIN for line-centered excerpts that intentionally cover the fix site. (Pattern G, Open) + +**Improvements implemented:** +- `tools/prr/state/manager.ts`: Clear `commentStatuses` verified/resolved entries on HEAD change; log count. +- `shared/git/redact-url.ts`: Add SSH URL redaction + `\r` to HTTPS char class. +- `shared/git/git-commit-scan.ts`: Tighten `prr-fix:` regex to `^prr-fix:(\S+)`. +- `tools/pill/orchestrator.ts`: Switch to `runWithConcurrencyAllSettled` for chunk audit; partial results collected even on chunk failure. + +**Flip-flop check:** N — all changes are additive or narrowing fixes; none revert prior behavior. The regex tightening could theoretically miss an ID with embedded whitespace, but such IDs are not valid GitHub comment IDs. + +**Notes:** Spot-checked `manager.ts` load() — confirmed repair pass does mutate state (adds to dismissed from overlap). Workdir verification not applicable (no single run workdir; triage pass only). SSH redaction is opportunistic (SSH auth doesn't embed tokens, but repo names can be private; redaction prevents repo name leakage in logs). Pattern K (central CONFIGURATION.md) and Pattern G (fixSiteInWindow flag) remain open. + +--- + +### Cycle 72 — 2026-04-05 (elizaOS/eliza#6702; allowedPaths blocks primary target) + +**Artifacts audited:** output.log (229 KB, 2572 lines) from elizaOS/eliza#6702 (`odi-develop` → `develop`). Workdir `/root/.prr/work/4a425a4f063fc1bb`. Fixer: `anthropic/claude-opus-4.5` via ElizaCloud; verifier: `alibaba/qwen-3-235b`; dedup: `openai/gpt-4o-mini`. Duration ~24 min, 52 LLM calls. + +**Findings:** +- **Medium:** `agent/typescript/index.ts` was the primary target for 5+ issues but `isPathAllowedForFix` rejected it because first segment `agent` is not in `REPO_TOP_LEVEL`. File never injected; fixer couldn't see it in batch runs. Single-issue focus mode eventually worked (no injection needed — S/R on prompt snippet), but this burned 3+ full batch iterations and many focus slots doing nothing. Root cause: `REPO_TOP_LEVEL` acts as a static allowlist of first-segment names, and any repo with a non-standard top-level dir (e.g. `agent/`, `harness/`, `service/`) silently blocks all issues targeting those paths. +- **Medium:** Qwen-3-235b batch analysis (#0005, 43k chars, 21 issues) took 8 minutes — the longest single call. Dominated wall time. Splitting large batches or using a faster analysis model would help. +- **Medium:** Stale re-check false positive: Qwen said `break` still existed at index.ts:303 during push iteration 2 analysis, but workdir had `continue`. The fix had been pushed; verifier hallucinated or used stale snippet. Caused unnecessary un-verify + re-fix cycle. +- **Low:** Meta-review checklist comments (`ic-4188073508-2` "Remaining Issues", `ic-4188073508-1` "Issues Fixed Since Previous Reviews") cycled 3-4 times each through single-issue focus before couldNotInject dismissal. Solvability should catch these earlier. +- **Low:** `(PR comment)` synthetic-path issues consumed focus slots across iterations before dismissal. + +**Improvements implemented:** (1) **Open-by-default** allowed paths + `PRR_STRICT_ALLOWED_PATHS=1` strict mode; `setDynamicRepoTopLevelDirs` in `main-loop-setup.ts`; see CHANGELOG / AGENTS.md / README. (2) **Solvability rollups:** `isSummaryOrMetaReviewComment` extended for CodeRabbit-style headings (`Remaining Issues`, `Issues Fixed Since Previous Reviews`, etc.); dismisses at 0a2 including `(PR comment)` before path inference. (3) **Analysis batching:** ElizaCloud `batchCheckIssuesExist` caps **10 issues per batch** for **qwen-3-235b** / **qwen-3-235** (Cycle 72 wall-time finding). Tests: `tests/path-utils.test.ts`, `tests/solvability-pr-comment.test.ts`. **Not implemented here:** stale re-check false positive (batch judge said YES vs pushed `continue`) — needs separate verifier / snippet / head-sync design. + +**Flip-flop check:** Mixed — open paths: N (strict opt-in). Rollup headings: **Y** if a rare comment uses e.g. `### Remaining Issues` for a *single* concrete fix (unusual); batch cap for Qwen-235b: N (smaller batches only). + +**Notes:** Spot-checked `agent/typescript/index.ts:305-307` (break→continue fix present), `scripts/plugin-submodules-dev.mjs:80-83` (readRootPackage dedup fix present), `agent/typescript/index.ts:212` (createRuntimes fix present). The old first-segment heuristic was meant to block `lodash/fp/...`-style package references from comment bodies, but those never resolve to real files in the workdir anyway — `pathExists` catches them. The real protection (absolute paths, node_modules, dist, internal segments) is unaffected. + +--- + +### Cycle 71 — 2026-04-02 (pill-output index; docs alignment) + +**Artifacts audited:** Historical **`pill-output.md`** (~5.7k lines of mixed **Done** / **Open** / **N/A** / obsolete foreign-path items); **CHANGELOG** [Unreleased]; **AUDIT-CYCLES** through Cycle 70; **DEVELOPMENT.md** pill triage section. + +**Findings:** +- **Low:** Monolithic pill-output duplicated **CHANGELOG** / cycle narrative and buried remaining work under thousands of closed items. + +**Improvements implemented:** Replaced **`pill-output.md`** with a **short deduplicated index** (Open / Partial / ops only) + re-triage instructions; **DEVELOPMENT.md** — **`pill-output.md`** described as index + append workflow; **CHANGELOG** [Unreleased] — doc thinning note; this cycle. + +**Flip-flop check:** N — documentation and artifact shape only; no runtime behavior change. + +**Notes:** No workdir spot-check (not an output.log “already verified” audit). Old numbered pill items (e.g. #18, #1793) remain discoverable via **git history** and **CHANGELOG** / earlier cycles. + +--- + ### Cycle 70 — 2026-03-28 (basename + PR diff, repopulate resolvedPath, dedup-cluster ALREADY_FIXED, AAR) **Artifacts audited:** output.log / handoff from milady-ai/milady#1511-style run (workdir `~/.prr/work/f4b02ae0e531442b`); themes: bare **`smoke.testcafe.js`**, empty **`unresolvedIssues`** vs unaccounted duplicate IDs, misleading “remaining”. diff --git a/tools/prr/analyzer/prompt-builder.ts b/tools/prr/analyzer/prompt-builder.ts index 52b73bd..1a40bdb 100644 --- a/tools/prr/analyzer/prompt-builder.ts +++ b/tools/prr/analyzer/prompt-builder.ts @@ -10,7 +10,7 @@ import { } from '../../../shared/path-utils.js'; import { SNIPPET_PLACEHOLDER } from '../workflow/helpers/solvability.js'; import { estimateTokens } from '../../../shared/utils/tokens.js'; -import { getTestPathForIssueLike, issueRequestsTestsText } from './test-path-inference.js'; +import { getTestPathForIssueLike, issueRequestsTestsText, testBasenameWithSuffix } from './test-path-inference.js'; import { debug } from '../../../shared/logger.js'; import { getOutdatedModelCatalogDismissal } from '../workflow/helpers/outdated-model-advice.js'; @@ -205,12 +205,12 @@ export function getMentionedTestFilePaths( if (dir && !ancestorDirs.includes(dir)) ancestorDirs.unshift(dir); for (const ancestor of ancestorDirs) { - push(`${ancestor}/__tests__/${stem}.test${ext}`); - push(`${ancestor}/__tests__/${stem}.spec${ext}`); + push(`${ancestor}/__tests__/${testBasenameWithSuffix(stem, ext, 'test')}`); + push(`${ancestor}/__tests__/${testBasenameWithSuffix(stem, ext, 'spec')}`); } if (dir) { - push(`${dir}/${stem}.test${ext}`); - push(`${dir}/${stem}.spec${ext}`); + push(`${dir}/${testBasenameWithSuffix(stem, ext, 'test')}`); + push(`${dir}/${testBasenameWithSuffix(stem, ext, 'spec')}`); } const existing = options?.pathExists ? ranked.filter((p) => options.pathExists!(p)) : []; diff --git a/tools/prr/analyzer/severity.ts b/tools/prr/analyzer/severity.ts index 9391d55..f984b0e 100644 --- a/tools/prr/analyzer/severity.ts +++ b/tools/prr/analyzer/severity.ts @@ -75,6 +75,11 @@ export function sortByPriority(issues: UnresolvedIssue[], order: PriorityOrder): const sorted = [...issues]; // Clone to avoid mutating input sorted.sort((a, b) => { + // Blast radius: in-scope (true/undefined) before explicit out-of-scope (false). WHY undefined = no graph. + const aOut = a.inBlastRadius === false ? 1 : 0; + const bOut = b.inBlastRadius === false ? 1 : 0; + if (aOut !== bOut) return aOut - bOut; + let primary: number; switch (order) { case 'important': diff --git a/tools/prr/analyzer/test-path-inference.ts b/tools/prr/analyzer/test-path-inference.ts index 8d29a3e..0b4b3f9 100644 --- a/tools/prr/analyzer/test-path-inference.ts +++ b/tools/prr/analyzer/test-path-inference.ts @@ -34,6 +34,25 @@ function normalizeRelativePath(path: string): string { return path.replace(/\/\.\//g, '/').replace(/\/[^/]+\/\.\.\//g, '/'); } +/** + * Build `name.test.ts` / `name.spec.ts` from a basename stem (no extension). + * WHY: When stem is already `foo.test` (from `foo.test.ts`), appending `.test` again yields `foo.test.test.ts` (recovery/prompt-builder audit). + */ +export function testBasenameWithSuffix(stem: string, extWithDot: string, kind: 'test' | 'spec'): string { + const marker = kind === 'test' ? '.test' : '.spec'; + if (stem.toLowerCase().endsWith(marker)) { + return `${stem}${extWithDot}`; + } + return `${stem}${marker}${extWithDot}`; +} + +/** Collapse accidental `.test.test.ts` / `.spec.spec.ts` suffixes (duplicated inference or bot typos). */ +export function normalizeDoubledTestExtension(path: string): string { + return path + .replace(/\.test\.test\.(ts|tsx|js|jsx)$/i, '.test.$1') + .replace(/\.spec\.spec\.(ts|tsx|js|jsx)$/i, '.spec.$1'); +} + export function getTestPathForIssueLike( issue: TestPathIssueLike, options?: { pathExists?: (path: string) => boolean; forceTestPath?: boolean; keepExistingTestPath?: boolean } @@ -45,12 +64,13 @@ export function getTestPathForIssueLike( const body = issue.comment.body ?? ''; const explanation = issue.explanation ?? ''; const combined = `${body} ${explanation}`; + const normOut = (p: string) => normalizeDoubledTestExtension(p.replace(/\\/g, '/')); // WHY preserve explicit test paths first: when the review is already anchored on // `foo.test.ts`, that path is stronger evidence than the wording in the body. // Coverage-only phrasing ("missing coverage here") should not kick the issue out // of the create-file/test-file flow just because it doesn't repeat "add tests". - if (isTestOrSpecPath(path)) return keepExistingTestPath ? path : null; + if (isTestOrSpecPath(path)) return keepExistingTestPath ? normOut(path) : null; if (!forceTestPath && !issueRequestsTestsText(combined)) return null; const dir = path.includes('/') ? path.replace(/\/[^/]+$/, '') : ''; @@ -64,34 +84,36 @@ export function getTestPathForIssueLike( }; const explicitFull = body.match(/(?:^|[\s(])`?([a-zA-Z0-9_/.()-]+__tests__[a-zA-Z0-9_/.()-]+\.(?:test|spec)\.(?:ts|js))`?(?:\s|$|[,)])/); - if (explicitFull?.[1]) return explicitFull[1].replace(/^[\s(]+|[\s)]+$/g, ''); + if (explicitFull?.[1]) return normOut(explicitFull[1].replace(/^[\s(]+|[\s)]+$/g, '')); const explicitRel = body.match(/(?:in|to|add\s+tests?\s+to?|tests?\s+in)\s+[`']?([a-zA-Z0-9_/.()-]+\.(?:test|spec)\.(?:ts|js))[`']?(?:\s|$|[,)])/i); if (explicitRel?.[1]) { const name = explicitRel[1].replace(/^[\s'`]+|[\s'`]+$/g, ''); - if (name.includes('/')) return name; + if (name.includes('/')) return normOut(name); if (dir) { const colocated = normalizeRelativePath(`${dir}/${name}`); const integration = normalizeRelativePath(`${dir}/../__tests__/integration/${name}`); - return preferOrFallback(colocated, integration); + return normOut(preferOrFallback(colocated, integration)); } - return name; + return normOut(name); } const backtick = body.match(/`([a-zA-Z0-9_/.()-]+\.(?:test|spec)\.(?:ts|js))`/); if (backtick?.[1]) { const name = backtick[1]; - if (name.includes('/')) return name; + if (name.includes('/')) return normOut(name); if (dir) { const colocated = normalizeRelativePath(`${dir}/${name}`); const integration = normalizeRelativePath(`${dir}/../__tests__/integration/${name}`); - return preferOrFallback(colocated, integration); + return normOut(preferOrFallback(colocated, integration)); } - return name; + return normOut(name); } if (!/\.(?:ts|tsx|js|jsx)$/.test(path)) return null; - const base = path.replace(/^.*\//, '').replace(/\.(ts|tsx|js|jsx)$/, '.test.$1'); + const fileStem = path.replace(/^.*\//, '').replace(/\.(ts|tsx|js|jsx)$/i, ''); + const ext = (path.match(/\.(ts|tsx|js|jsx)$/i) ?? [])[1] ?? 'ts'; + const base = testBasenameWithSuffix(fileStem, `.${ext}`, 'test'); if (dir) { const colocated = normalizeRelativePath(`${dir}/${base}`); const integration = normalizeRelativePath(`${dir}/../__tests__/integration/${base}`); @@ -99,13 +121,13 @@ export function getTestPathForIssueLike( // Same src-level __tests__ (e.g. packages/typescript/src/__tests__/database.test.ts when path is src/types/database.ts). Prompts.log audit: TARGET FILE(S) listed non-existent src/types/database.test.ts. const srcLevelTests = /\/src\//.test(dir) ? normalizeRelativePath(`${dir}/../__tests__/${base}`) : null; if (pathExists && srcLevelTests) { - if (pathExists(srcLevelTests)) return srcLevelTests; - if (pathExists(colocated)) return colocated; - if (pathExists(testsRoot)) return testsRoot; - if (integration && pathExists(integration)) return integration; - return srcLevelTests; + if (pathExists(srcLevelTests)) return normOut(srcLevelTests); + if (pathExists(colocated)) return normOut(colocated); + if (pathExists(testsRoot)) return normOut(testsRoot); + if (integration && pathExists(integration)) return normOut(integration); + return normOut(srcLevelTests); } - return preferOrFallback(colocated, integration, testsRoot); + return normOut(preferOrFallback(colocated, integration, testsRoot)); } - return base; + return normOut(base); } diff --git a/tools/prr/analyzer/types.ts b/tools/prr/analyzer/types.ts index e0ebd75..1335588 100644 --- a/tools/prr/analyzer/types.ts +++ b/tools/prr/analyzer/types.ts @@ -63,6 +63,13 @@ export interface UnresolvedIssue { * Used for primary path in prompts and snippet fetch so the fixer sees the correct file. */ resolvedPath?: string; + /** + * Blast radius: primary path is inside PR changed set + import/proximity graph (when graph was built). + * **WHY undefined:** Feature disabled, build failed, or no graph — treat as in-scope (no behavior change). + */ + inBlastRadius?: boolean; + /** Shortest hop distance from a changed file (0 = changed in PR). Set when graph was built and path was in radius. */ + blastRadiusDepth?: number; } /** Canonical primary path for an issue. Prefer resolvedPath once basename comments are expanded to a tracked repo path. */ diff --git a/tools/prr/git/git-conflict-chunked.ts b/tools/prr/git/git-conflict-chunked.ts index b47f35e..71583a2 100644 --- a/tools/prr/git/git-conflict-chunked.ts +++ b/tools/prr/git/git-conflict-chunked.ts @@ -7,6 +7,7 @@ */ import type { LLMClient } from '../llm/client.js'; +import { getConflictFileTypeRules } from '../llm/error-helpers.js'; import { debug } from '../../../shared/logger.js'; import { MIN_CONFLICT_RESOLUTION_SIZE_RATIO, @@ -113,7 +114,11 @@ export function buildConflictResolutionPromptThreeWay( const parseHint = previousParseError ? `\n\nIMPORTANT: A previous resolution attempt had a syntax/parse error: "${previousParseError}". Ensure the RESOLVED code is complete, valid code (e.g. close all block comments with */, no missing commas or brackets).\n` : ''; - return `${fileHint}${overviewBlock}Merge the changes from both sides relative to BASE. Produce a single resolved version (no conflict markers).${parseHint} + const fileRules = filePath ? getConflictFileTypeRules(filePath) : ''; + const fileRulesBlock = fileRules + ? `\n\nApply to the RESOLVED block:${fileRules}` + : ''; + return `${fileHint}${overviewBlock}Merge the changes from both sides relative to BASE. Produce a single resolved version (no conflict markers).${parseHint}${fileRulesBlock} BASE (common ancestor): \`\`\` @@ -1211,6 +1216,28 @@ export async function resolveConflictsWithTopTailsFallback( resolvedLines = resolvedCode.split('\n'); explanations.push(`Lines ${chunk.startLine}-${chunk.endLine}: top+tails`); } + // Strip contextBefore lines that the model may have echoed back. + // The stitching code already preserves non-conflict lines before the chunk, + // so including them in the resolved output would duplicate them. + if (chunk.contextBefore.length > 0 && resolvedLines.length > chunk.contextBefore.length) { + const ctxLines = chunk.contextBefore; + let prefixMatch = true; + for (let ci = 0; ci < ctxLines.length; ci++) { + if (resolvedLines[ci]?.trim() !== ctxLines[ci]?.trim()) { + prefixMatch = false; + break; + } + } + if (prefixMatch) { + debug('Top+tails: stripping echoed contextBefore from resolved output', { + filePath, + strippedLines: ctxLines.length, + chunkStart: chunk.startLine, + }); + resolvedLines = resolvedLines.slice(ctxLines.length); + } + } + resolutions.set(chunk.startLine, resolvedLines); } catch (e) { debug('Top+tails fallback LLM error', { filePath, error: e }); diff --git a/tools/prr/git/git-conflict-lockfiles.ts b/tools/prr/git/git-conflict-lockfiles.ts index 8470052..70431b3 100644 --- a/tools/prr/git/git-conflict-lockfiles.ts +++ b/tools/prr/git/git-conflict-lockfiles.ts @@ -3,13 +3,44 @@ */ import chalk from 'chalk'; import { join } from 'path'; -import { existsSync } from 'fs'; +import { existsSync, readFileSync } from 'fs'; import { unlink } from 'fs/promises'; import type { SimpleGit } from 'simple-git'; -import { isLockFile, getLockFileInfo, findFilesWithConflictMarkers } from '../../../shared/git/git-clone-index.js'; +import { + isLockFile, + getLockFileInfo, + findFilesWithConflictMarkers, + hasConflictMarkers, +} from '../../../shared/git/git-clone-index.js'; import type { Config } from '../../../shared/config.js'; import { setTokenPhase, debug } from '../../../shared/logger.js'; +/** Regenerate commands that read package.json (install fails if JSON still has conflict markers). */ +const JS_LOCK_REGEN_CMDS = new Set(['bun install', 'npm install', 'yarn install', 'pnpm install']); + +/** + * True when any lock file in the list is regenerated via a JS package manager that + * parses package.json. WHY: Running install while package.json contains `<<<<<<<` + * yields EJSONPARSE and wastes time (audit milady#1722 re-run). + */ +export function lockRegenerationRequiresCleanPackageJson(lockFiles: string[]): boolean { + for (const f of lockFiles) { + const info = getLockFileInfo(f); + if (info && JS_LOCK_REGEN_CMDS.has(info.regenerateCmd)) return true; + } + return false; +} + +/** True when workdir package.json exists and still has merge conflict markers. */ +export function packageJsonHasConflictMarkers(workdir: string): boolean { + const p = join(workdir, 'package.json'); + if (!existsSync(p)) return false; + try { + return hasConflictMarkers(readFileSync(p, 'utf-8')); + } catch { + return true; + } +} export async function handleLockFileConflicts( git: SimpleGit, @@ -121,8 +152,41 @@ export async function handleLockFileConflicts( } } + // WHY fallback chain: CI may not have the primary package manager (e.g. bun not + // installed but npm is). ENOENT on the primary command should try alternatives + // from the same ecosystem before giving up (audit Cycle 74, milady#1722). + const JS_INSTALL_FALLBACKS: string[][] = [ + ['bun', 'install'], + ['npm', 'install'], + ['yarn', 'install'], + ['pnpm', 'install'], + ]; + + async function trySpawn(exe: string, args: string[]): Promise<{ ok: boolean; enoent: boolean }> { + if (exe.includes('/') || exe.includes('\\')) return { ok: false, enoent: false }; + return new Promise((resolve) => { + const proc = spawn(exe, args, { + cwd: resolvedWorkdir, + stdio: 'inherit', + env: safeEnv, + shell: false, + }); + const timeout = setTimeout(() => { + proc.kill('SIGTERM'); + setTimeout(() => proc.kill('SIGKILL'), 5000); + resolve({ ok: false, enoent: false }); + }, 60_000); + proc.on('close', (code) => { clearTimeout(timeout); resolve({ ok: code === 0, enoent: false }); }); + proc.on('error', (err: NodeJS.ErrnoException) => { + clearTimeout(timeout); + resolve({ ok: false, enoent: err.code === 'ENOENT' }); + }); + }); + } + + const isJsLockCmd = (cmd: string): boolean => /^(bun|npm|yarn|pnpm)\s+install$/i.test(cmd); + // Run regenerate commands using spawn with validated args - // Security: Only execute whitelisted commands with spawn (no shell) for (const cmd of regenerateCommands) { const cmdArgs = ALLOWED_COMMANDS[cmd]; if (!cmdArgs) { @@ -131,59 +195,57 @@ export async function handleLockFileConflicts( } const [executable, ...args] = cmdArgs; - - // Security: Verify executable is a simple name (no path components) - // This ensures we use the system PATH lookup, not a potentially malicious local file - if (executable.includes('/') || executable.includes('\\')) { - console.log(chalk.yellow(` ⚠ Skipping command with path in executable: ${executable}`)); - continue; - } - console.log(chalk.cyan(` Running: ${cmd}`)); - try { - await new Promise((resolve, reject) => { - const proc = spawn(executable, args, { - cwd: resolvedWorkdir, - stdio: 'inherit', - env: safeEnv, - shell: false, // Never use shell - prevents shell injection - }); - - // Security: 60 second timeout prevents resource exhaustion - const timeout = setTimeout(() => { - proc.kill('SIGTERM'); - // Give process 5s to terminate gracefully, then SIGKILL - setTimeout(() => proc.kill('SIGKILL'), 5000); - reject(new Error('Timeout exceeded (60s)')); - }, 60000); - - proc.on('close', (code) => { - clearTimeout(timeout); - if (code === 0) { - resolve(); - } else { - reject(new Error(`Exit code ${code}`)); - } - }); - - proc.on('error', (err) => { - clearTimeout(timeout); - reject(err); - }); - }); + const result = await trySpawn(executable, args); + if (result.ok) { console.log(chalk.green(` ✓ ${cmd} completed`)); - } catch (e) { - console.log(chalk.yellow(` ⚠ ${cmd} failed: ${e}, continuing...`)); + } else if (result.enoent && isJsLockCmd(cmd)) { + // Primary not found — try JS ecosystem fallbacks + console.log(chalk.yellow(` ⚠ ${executable} not found, trying fallback package managers...`)); + let fallbackOk = false; + for (const [fbExe, ...fbArgs] of JS_INSTALL_FALLBACKS) { + if (fbExe === executable) continue; + console.log(chalk.cyan(` Trying: ${fbExe} ${fbArgs.join(' ')}`)); + const fb = await trySpawn(fbExe, fbArgs); + if (fb.ok) { + console.log(chalk.green(` ✓ ${fbExe} ${fbArgs.join(' ')} completed (fallback)`)); + fallbackOk = true; + break; + } + if (fb.enoent) continue; + console.log(chalk.yellow(` ⚠ ${fbExe} ${fbArgs.join(' ')} failed, trying next...`)); + } + if (!fallbackOk) { + console.log(chalk.yellow(` ⚠ No JS package manager available; lock file will be removed to clear conflict`)); + } + } else { + console.log(chalk.yellow(` ⚠ ${cmd} failed, continuing...`)); + console.log( + chalk.gray( + ` If package.json or the lockfile still has merge conflict markers, resolve those in the workdir first, then run ${cmd} manually.`, + ), + ); } } - // Stage the regenerated lock files + // Stage regenerated lock files, or record deletion when regen left no file (clears UU conflicts). + // WHY: Blind `git add` on a missing path fails with "pathspec did not match"; `git rm` resolves + // many merge conflicts when we intentionally drop the lock after a failed install. for (const lockFile of lockFiles) { + const stagedPath = path.join(resolvedWorkdir, lockFile); try { - await git.add(lockFile); + if (fs.existsSync(stagedPath)) { + await git.add(lockFile); + } else { + await git + .raw(['rm', '-f', '--', lockFile]) + .catch(async () => { + await git.raw(['add', '-u', '--', lockFile]).catch(() => {}); + }); + } } catch { - // File might not exist if regenerate failed, ignore + // Last resort: ignore (caller treats remaining git conflicts as unresolved) } } } diff --git a/tools/prr/git/git-conflict-prompts.ts b/tools/prr/git/git-conflict-prompts.ts index ccdf818..4ea3cd4 100644 --- a/tools/prr/git/git-conflict-prompts.ts +++ b/tools/prr/git/git-conflict-prompts.ts @@ -2,7 +2,7 @@ * Git conflict resolution prompts */ -import { readFileSync } from 'fs'; +import { readFileSync, lstatSync } from 'fs'; import { join } from 'path'; import { CONFLICT_USE_CHUNKED_FIRST_CHUNKS } from '../../../shared/constants.js'; import { hasConflictMarkers } from '../../../shared/git/git-clone-index.js'; @@ -71,9 +71,20 @@ export function buildConflictResolutionPromptWithContent( const unreadable: string[] = []; for (const file of conflictedFiles) { + // WHY lstat guard: submodules/directories throw EISDIR on readFileSync. + // They are resolved by the submodule handler, not the LLM prompt. + const fullFilePath = join(workdir, file); + try { + if (lstatSync(fullFilePath).isDirectory()) { + unreadable.push(file); + continue; + } + } catch { + // stat failed — fall through to readFileSync which will catch it + } let content: string; try { - content = readFileSync(join(workdir, file), 'utf-8'); + content = readFileSync(fullFilePath, 'utf-8'); } catch { unreadable.push(file); continue; diff --git a/tools/prr/git/git-conflict-resolve.ts b/tools/prr/git/git-conflict-resolve.ts index 3ac7dd3..43bdcb7 100644 --- a/tools/prr/git/git-conflict-resolve.ts +++ b/tools/prr/git/git-conflict-resolve.ts @@ -7,7 +7,7 @@ * output (JSON validity, size regression) to catch truncation or corruption. */ import chalk from 'chalk'; -import { join } from 'path'; +import { join, resolve, sep } from 'path'; import { existsSync, readFileSync, writeFileSync } from 'fs'; import type { SimpleGit } from 'simple-git'; import { @@ -44,7 +44,11 @@ import { buildConflictResolutionPromptWithContent, splitConflictFilesIntoBatches, } from './git-conflict-prompts.js'; -import { handleLockFileConflicts } from './git-conflict-lockfiles.js'; +import { + handleLockFileConflicts, + lockRegenerationRequiresCleanPackageJson, + packageJsonHasConflictMarkers, +} from './git-conflict-lockfiles.js'; import { resolveConflictsChunked, resolveConflictsWithTopTailsFallback, @@ -528,6 +532,135 @@ ${content} return null; } +/** + * Scan JSON text for duplicate keys at the top two nesting levels. + * + * WHY: `JSON.parse` silently accepts `{ "dev": "a", "dev": "b" }` (last wins), + * so standard validation misses this. LLMs merging package.json often produce + * duplicate "scripts" entries from both sides. We scan raw text rather than + * a custom reviver because the reviver approach breaks on nested objects. + * + * Returns the first duplicate key found, or null if none. + */ +function findDuplicateJsonKey(text: string): string | null { + // Line-based approach: works for indented JSON where keys are on separate lines + // (the common LLM output pattern for package.json). + // Track brace depth; at each level, record keys seen. When a `}` closes a level, + // clear that level's keys (the next `{` starts a new sibling object). + const MAX_DEPTH = 2; + let depth = 0; + // Stack: each depth has its own set of seen keys. Use a depth-indexed map + // so closing `}` clears the correct level. + const seenAtDepth = new Map>(); + + for (const line of text.split('\n')) { + const trimmed = line.trim(); + + // Process structural chars before checking for a key on this line. + // Count opens/closes carefully — a line like `},` or `}` only has one close. + for (const c of trimmed) { + if (c === '{') { + depth++; + seenAtDepth.set(depth, new Set()); + } else if (c === '}') { + seenAtDepth.delete(depth); + depth--; + } + } + if (depth > MAX_DEPTH || depth < 1) continue; + + const m = trimmed.match(/^"([^"]+)"\s*:/); + if (m) { + const key = m[1]; + const seen = seenAtDepth.get(depth); + if (seen) { + if (seen.has(key)) return key; + seen.add(key); + } + } + } + return null; +} + +/** + * Attempt programmatic repair of common LLM JSON merge artifacts before rejecting. + * Fixes trailing commas, missing commas between merged sections, and duplicate keys + * (keeps the last occurrence, matching JSON.parse semantics). + * Returns null if the result still doesn't parse. + */ +/** Extract the character position from a JSON.parse error message (e.g. "at position 4460"). */ +function extractJsonErrorPosition(err: unknown): number | null { + const msg = err instanceof Error ? err.message : String(err); + const m = msg.match(/at position (\d+)/); + return m ? parseInt(m[1]!, 10) : null; +} + +function tryRepairJson(text: string): string | null { + // Bail early on conflict markers — those need resolution, not syntax repair + if (/^<{7}\s|^={7}$|^>{7}\s/m.test(text)) return null; + + let s = text; + + // 1. Remove trailing commas before } or ] + s = s.replace(/,(\s*[}\]])/g, '$1'); + + // 2. Insert missing commas: line ending with a JSON value followed by a key line. + // Common when LLM resolves chunks separately — last line of chunk N has no + // trailing comma, first line of chunk N+1 starts a new key. + // Handles: "value", }, ], number, true, false, null + s = s.replace(/"(\s*\n\s*"[^"]+"\s*:)/g, '",$1'); + s = s.replace(/}(\s*\n\s*"[^"]+"\s*:)/g, '},$1'); + s = s.replace(/](\s*\n\s*"[^"]+"\s*:)/g, '],$1'); + s = s.replace(/(true|false|null|\d)(\s*\n\s*"[^"]+"\s*:)/g, '$1,$2'); + + // 3. Try parse; if still broken, try iterative position-based comma insertion + // (up to 5 rounds — each round finds the error position and inserts a comma) + for (let round = 0; round < 5; round++) { + try { + JSON.parse(s); + break; + } catch (e: unknown) { + const pos = extractJsonErrorPosition(e); + if (pos === null || pos <= 0) return null; + // Look backward from the error position for the last non-whitespace char; + // if it's a JSON value terminator without a trailing comma, insert one. + const beforeErr = s.slice(0, pos); + const trimmed = beforeErr.trimEnd(); + const lastChar = trimmed[trimmed.length - 1]; + if (lastChar && /["\d}\]eE]/.test(lastChar) && !trimmed.endsWith(',')) { + s = trimmed + ',' + s.slice(trimmed.length); + } else { + return null; + } + } + } + // Final verification after iterative repair + try { JSON.parse(s); } catch { return null; } + + // 4. Remove duplicate keys (keep last occurrence) while preserving formatting. + // Loop so we handle multiple different duplicate keys. + const MAX_DEDUP_ROUNDS = 10; + for (let dr = 0; dr < MAX_DEDUP_ROUNDS; dr++) { + const dupeKey = findDuplicateJsonKey(s); + if (!dupeKey) break; + const lines = s.split('\n'); + const keyPattern = new RegExp(`^\\s*"${dupeKey.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}"\\s*:`); + const matchingIndices: number[] = []; + for (let i = 0; i < lines.length; i++) { + if (keyPattern.test(lines[i]!)) matchingIndices.push(i); + } + if (matchingIndices.length <= 1) break; + for (let k = 0; k < matchingIndices.length - 1; k++) { + lines[matchingIndices[k]!] = ''; + } + s = lines.filter(l => l !== '').join('\n'); + s = s.replace(/,(\s*[}\]])/g, '$1'); + try { JSON.parse(s); } catch { return null; } + } + + return s; +} + /** * Validate that resolved content is sane before writing to disk. * @@ -537,7 +670,8 @@ ${content} * * Checks performed: * 1. JSON validation for .json files (catches structural corruption) - * 2. Size regression detection (catches catastrophic truncation; skipped for keep-ours / take-theirs) + * 2. Duplicate key detection for JSON (catches LLM merge artifacts) + * 3. Size regression detection (catches catastrophic truncation; skipped for keep-ours / take-theirs) */ function validateResolvedContent( filePath: string, @@ -553,6 +687,13 @@ function validateResolvedContent( const message = e instanceof Error ? e.message : String(e); return { valid: false, reason: `Invalid JSON after resolution: ${message}` }; } + // WHY: JSON.parse silently accepts duplicate keys (last wins). In package.json + // this means dropped scripts or dependencies. Scan the raw text for dupe keys + // at the top two nesting levels where LLM merges commonly produce them. + const dupeKey = findDuplicateJsonKey(resolvedContent); + if (dupeKey) { + return { valid: false, reason: `Duplicate JSON key "${dupeKey}" — LLM merged both sides but repeated a key` }; + } } // Size regression: compare resolved content to the larger side of conflicts. @@ -627,9 +768,48 @@ export async function resolveConflictsWithLLM( console.log(chalk.cyan(` - ${file}${isLock ? chalk.gray(' (lock file - will regenerate)') : ''}`)); } - // Handle lock files first - delete and regenerate - if (lockFiles.length > 0) { + // Lock regeneration runs `npm install` / `bun install`, which parse package.json. + // WHY defer: If package.json still has <<<<<<< markers, every install fails with + // EJSONPARSE; we regenerate after the LLM clears JSON (milady#1722 re-run). + let lockRegenDeferred = + lockFiles.length > 0 && + lockRegenerationRequiresCleanPackageJson(lockFiles) && + packageJsonHasConflictMarkers(workdir); + + if (lockFiles.length > 0 && !lockRegenDeferred) { await handleLockFileConflicts(git, lockFiles, workdir, config); + } else if (lockRegenDeferred) { + console.log( + chalk.cyan( + ' Deferring lock file regeneration until package.json has no conflict markers ' + + '(install would fail while JSON is conflicted).' + ) + ); + } + + const runDeferredLockRegenIfNeeded = async (): Promise => { + if (!lockRegenDeferred || lockFiles.length === 0) return; + if (packageJsonHasConflictMarkers(workdir)) return; + console.log( + chalk.cyan('\n Running deferred lock file regeneration (package.json is clean)...') + ); + await handleLockFileConflicts(git, lockFiles, workdir, config); + lockRegenDeferred = false; + }; + + // Handle submodule/directory conflicts before code files. + // WHY: Git submodules (gitlinks) show up as directories on disk. readFileSync throws + // EISDIR and the entire per-file loop catches it as a generic error, leaving the + // conflict unresolved and blocking the run. Detect and resolve them deterministically. + const submoduleConflicts = await detectSubmoduleConflicts(git, codeFiles, workdir); + if (submoduleConflicts.length > 0) { + for (const sm of submoduleConflicts) { + const resolved = await resolveSubmoduleConflict(git, sm, workdir); + if (resolved) { + const idx = codeFiles.indexOf(sm.file); + if (idx !== -1) codeFiles.splice(idx, 1); + } + } } // Handle delete conflicts (e.g. "deleted by them", "deleted by us") @@ -758,7 +938,9 @@ export async function resolveConflictsWithLLM( } else if (codeFiles.length > 0 && skipRunnerAttempt) { console.log(chalk.blue(`\n Skipping runner attempt (not available yet), using direct LLM API...`)); } - + + await runDeferredLockRegenIfNeeded(); + // Check if conflicts remain after first attempt // Check both git status AND actual file contents for conflict markers let statusAfter = await git.status(); @@ -829,6 +1011,21 @@ export async function resolveConflictsWithLLM( const fullPath = join(workdir, conflictFile); try { + // WHY: Early submodule pass can fail before package.json is fixed; retry here so + // index-based gitlink staging runs after the tree is cleaner (milady#1722). + try { + if (fs.lstatSync(fullPath).isDirectory()) { + const subOk = await resolveSubmoduleConflict( + git, + { file: conflictFile, isDirectory: true }, + workdir + ); + if (subOk) continue; + } + } catch { + /* missing path — fall through */ + } + let conflictedContent = fs.readFileSync(fullPath, 'utf-8'); conflictedContent = preprocessConflictFileContent(conflictedContent); // WHY: When the main path fails due to parse validation we pass this into the top+tails fallback @@ -1073,9 +1270,29 @@ export async function resolveConflictsWithLLM( // WHY: Catches corrupted resolutions (invalid JSON, catastrophic truncation) // before they get committed and pushed. Better to bail to manual resolution // than to push garbage. - const validation = validateResolvedContent(conflictFile, conflictedContent, result.content, { + let validation = validateResolvedContent(conflictFile, conflictedContent, result.content, { skipSizeRegression: resolutionSkipsSizeRegression, }); + // Auto-repair common JSON merge artifacts before rejecting + if (!validation.valid && conflictFile.endsWith('.json')) { + debug('Attempting JSON auto-repair', { file: conflictFile, reason: validation.reason, contentChars: result.content.length, hasMarkers: hasConflictMarkers(result.content) }); + const repaired = tryRepairJson(result.content); + if (repaired) { + const recheck = validateResolvedContent(conflictFile, conflictedContent, repaired, { + skipSizeRegression: resolutionSkipsSizeRegression, + }); + if (recheck.valid) { + debug('JSON auto-repair succeeded', { file: conflictFile, originalReason: validation.reason }); + console.log(chalk.blue(` → Auto-repaired JSON (${validation.reason})`)); + result = { resolved: true, content: repaired, explanation: result.explanation + ' (JSON auto-repaired)' }; + validation = recheck; + } else { + debug('JSON auto-repair: repaired content still fails validation', { file: conflictFile, recheckReason: recheck.reason }); + } + } else { + debug('JSON auto-repair: tryRepairJson returned null (could not fix)', { file: conflictFile }); + } + } if (!validation.valid) { debug('Resolution rejected by validation', { file: conflictFile, reason: validation.reason }); result = { @@ -1201,7 +1418,26 @@ export async function resolveConflictsWithLLM( } if (fallbackResult.resolved) { // WHY: Same validation as main path — size/JSON and parse — so we never stage broken output. - const fbValidation = validateResolvedContent(conflictFile, conflictedContent, fallbackResult.content); + let fbContent = fallbackResult.content; + let fbValidation = validateResolvedContent(conflictFile, conflictedContent, fbContent); + if (!fbValidation.valid && conflictFile.endsWith('.json')) { + debug('Attempting JSON auto-repair (top+tails fallback)', { file: conflictFile, reason: fbValidation.reason, contentChars: fbContent.length, hasMarkers: hasConflictMarkers(fbContent) }); + const repaired = tryRepairJson(fbContent); + if (repaired) { + const rc = validateResolvedContent(conflictFile, conflictedContent, repaired); + if (rc.valid) { + debug('JSON auto-repair succeeded (top+tails fallback)', { file: conflictFile, originalReason: fbValidation.reason }); + console.log(chalk.blue(` → Auto-repaired JSON in fallback (${fbValidation.reason})`)); + fbContent = repaired; + fbValidation = rc; + fallbackResult = { ...fallbackResult, content: repaired }; + } else { + debug('JSON auto-repair (top+tails): repaired content still fails validation', { file: conflictFile, recheckReason: rc.reason }); + } + } else { + debug('JSON auto-repair (top+tails): tryRepairJson returned null', { file: conflictFile }); + } + } if (fbValidation.valid) { const fbParse = await validateResolvedFileContent(fallbackResult.content, conflictFile); if (fbParse.valid) { @@ -1251,6 +1487,12 @@ export async function resolveConflictsWithLLM( remainingConflicts = [...new Set([...gitConflicts, ...markerConflicts])]; } + await runDeferredLockRegenIfNeeded(); + statusAfter = await git.status(); + gitConflicts = statusAfter.conflicted || []; + markerConflicts = await findFilesWithConflictMarkers(workdir, codeFiles); + remainingConflicts = [...new Set([...gitConflicts, ...markerConflicts])]; + return { success: remainingConflicts.length === 0, remainingConflicts @@ -1375,6 +1617,165 @@ async function resolveDeleteConflict( } } +/** + * Submodule/directory conflict info. + */ +interface SubmoduleConflict { + file: string; + /** true when the path is a directory on disk (submodule checkout or gitlink). */ + isDirectory: boolean; +} + +/** + * Detect git submodule (gitlink) or directory conflicts. + * + * WHY: Submodules show up as directories on disk. `readFileSync` throws EISDIR, + * and the per-file LLM loop catches it generically — leaving the conflict unresolved + * and blocking the entire run (audit Cycle 74, milady#1722 `eliza` submodule). + * + * Detection: `git ls-files -s` shows mode 160000 for gitlinks. We also check + * `lstatSync` so plain directories (e.g. nested repos without .gitmodules) are caught. + */ +async function detectSubmoduleConflicts( + git: SimpleGit, + conflictedFiles: string[], + workdir: string +): Promise { + const results: SubmoduleConflict[] = []; + const { lstatSync } = await import('fs'); + + // Check git ls-files for mode 160000 (gitlink entries) + const gitlinkPaths = new Set(); + try { + const lsOutput = await git.raw(['ls-files', '-s', '--', ...conflictedFiles]); + for (const line of lsOutput.split('\n')) { + // Format: \t + const m = line.match(/^160000\s+\S+\s+\d\t(.+)$/); + if (m) gitlinkPaths.add(m[1]); + } + } catch { + // ls-files may fail during merge; fall back to stat below + } + + for (const file of conflictedFiles) { + const fullPath = join(workdir, file); + let isDir = gitlinkPaths.has(file); + if (!isDir) { + try { + isDir = lstatSync(fullPath).isDirectory(); + } catch { + // Path doesn't exist or can't be stat'd — not a directory conflict + } + } + if (isDir) { + results.push({ file, isDirectory: true }); + } + } + return results; +} + +/** + * Stage a submodule gitlink from unmerged index stages (mode 160000). + * + * WHY: `git checkout --theirs -- path` fails with "does not have a commit checked out" + * when the submodule directory is empty or not initialized. The merge index still + * holds both OIDs — we can record the chosen commit directly (milady#1722 `eliza`). + */ +async function stageSubmoduleGitlinkFromIndex( + git: SimpleGit, + file: string, + preferTheirs: boolean +): Promise { + const raw = await git.raw(['ls-files', '-u', '--', file]).catch(() => ''); + const stages = new Map(); + for (const line of raw.split('\n')) { + const m = line.match(/^160000\s+(\S+)\s+(\d)\t(.+)$/); + if (!m) continue; + const pathFromGit = m[3]; + if (pathFromGit !== file && pathFromGit.replace(/\\/g, '/') !== file.replace(/\\/g, '/')) { + continue; + } + stages.set(Number(m[2]), m[1]); + } + const oid = preferTheirs + ? (stages.get(3) ?? stages.get(2) ?? stages.get(1)) + : (stages.get(2) ?? stages.get(3) ?? stages.get(1)); + if (!oid) return false; + await git.raw(['update-index', '--cacheinfo', `160000,${oid},${file}`]); + return true; +} + +/** + * Resolve a submodule/directory conflict by accepting "theirs" (base branch) gitlink. + * + * WHY theirs: The PR is being merged into the base; the base branch typically has the + * authoritative submodule pointer. If both sides updated the pointer, accepting theirs + * keeps the base branch's commit reference. This is a safe default — the PR author can + * always update the submodule pointer in a follow-up commit. + * + * Order: remove dirty worktree dir → checkout --theirs/--ours → else stage OID from index. + * WHY rm first: Git refuses checkout when the path is a broken/empty submodule checkout. + */ +async function resolveSubmoduleConflict( + git: SimpleGit, + conflict: SubmoduleConflict, + workdir: string +): Promise { + const { file } = conflict; + const fullPath = join(workdir, file); + const resolvedRoot = resolve(workdir); + const resolvedPath = resolve(fullPath); + if (resolvedPath !== resolvedRoot && !resolvedPath.startsWith(resolvedRoot + sep)) { + console.log(chalk.red(` ✗ ${file}: path escapes workdir`)); + return false; + } + + const rmTree = async (): Promise => { + const fs = await import('fs'); + try { + fs.rmSync(resolvedPath, { recursive: true, force: true }); + } catch { + /* absent or not a directory */ + } + }; + + try { + await rmTree(); + try { + await git.raw(['checkout', '--theirs', '--', file]); + await git.add(file); + console.log(chalk.green(` ✓ ${file}: submodule conflict resolved (accepted base branch pointer)`)); + return true; + } catch { + await rmTree(); + try { + await git.raw(['checkout', '--ours', '--', file]); + await git.add(file); + console.log(chalk.green(` ✓ ${file}: submodule conflict resolved (kept current branch pointer)`)); + return true; + } catch { + if (await stageSubmoduleGitlinkFromIndex(git, file, true)) { + console.log( + chalk.green(` ✓ ${file}: submodule conflict resolved (staged gitlink from index, theirs)`) + ); + return true; + } + if (await stageSubmoduleGitlinkFromIndex(git, file, false)) { + console.log( + chalk.green(` ✓ ${file}: submodule conflict resolved (staged gitlink from index, ours)`) + ); + return true; + } + } + } + console.log(chalk.red(` ✗ ${file}: could not resolve submodule (no gitlink in merge index)`)); + return false; + } catch (e) { + console.log(chalk.red(` ✗ ${file}: failed to resolve submodule conflict: ${e}`)); + return false; + } +} + /** * Clean up sync target files (CLAUDE.md, CONVENTIONS.md) that were created by prr. * diff --git a/tools/prr/github/api.ts b/tools/prr/github/api.ts index 621257c..22f5a8a 100644 --- a/tools/prr/github/api.ts +++ b/tools/prr/github/api.ts @@ -73,6 +73,8 @@ function isBotNoiseComment(body: string): boolean { export class GitHubAPI { private octokit: Octokit; private graphqlWithAuth: typeof graphql; + /** Memoized `GET /user` for thread-reply idempotency when PRR_BOT_LOGIN is unset. */ + private authenticatedLoginPromise: Promise | undefined; constructor(token: string) { this.octokit = new Octokit({ auth: token }); @@ -84,6 +86,29 @@ export class GitHubAPI { debug('GitHub API client initialized'); } + /** + * GitHub login for the current auth token (`GET /user`). + * WHY: Thread-reply cross-run idempotency matches review comment `author` to a login; PAT / Actions + * tokens can resolve that login here so PRR_BOT_LOGIN is optional. + */ + async getAuthenticatedLogin(): Promise { + if (!this.authenticatedLoginPromise) { + this.authenticatedLoginPromise = (async () => { + try { + const { data } = await this.octokit.users.getAuthenticated(); + const login = data.login?.trim(); + return login || undefined; + } catch (err) { + debug('users.getAuthenticated failed', { + error: err instanceof Error ? err.message : String(err), + }); + return undefined; + } + })(); + } + return this.authenticatedLoginPromise; + } + private escapeRegex(value: string): string { return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } @@ -852,7 +877,7 @@ export class GitHubAPI { /** * Get comment authors in a review thread (for cross-run idempotency: skip if we already replied). - * WHY: When PRR_BOT_LOGIN is set, callers check whether this thread already has a comment from that login; if so, we skip posting to avoid duplicate replies on re-runs. + * WHY: When we know the bot login (PRR_BOT_LOGIN or token from getAuthenticatedLogin), callers check whether this thread already has a comment from that login; if so, we skip posting to avoid duplicate replies on re-runs. * owner/repo/prNumber are unused (GraphQL node(id) only needs threadId) but kept for API consistency and future use. */ async getThreadComments( diff --git a/tools/prr/index.ts b/tools/prr/index.ts index 3eec50c..b1bfe37 100644 --- a/tools/prr/index.ts +++ b/tools/prr/index.ts @@ -193,14 +193,6 @@ async function main(): Promise { const maxConcurrent = getEffectiveMaxConcurrentLLM(); console.log(chalk.gray(` LLM concurrency: ${maxConcurrent === 1 ? '1 (default)' : maxConcurrent} — set PRR_MAX_CONCURRENT_LLM to tune`)); - if (options.replyToThreads && !process.env.PRR_BOT_LOGIN?.trim()) { - console.warn( - chalk.yellow( - ' --reply-to-threads: PRR_BOT_LOGIN is not set — cross-run idempotency is off; re-runs may post duplicate thread replies. Set PRR_BOT_LOGIN to your bot GitHub login.', - ), - ); - } - // Create and run resolver resolver = new PRResolver(config, options); await resolver.run(prUrl); diff --git a/tools/prr/llm/client.ts b/tools/prr/llm/client.ts index d466568..d6de059 100644 --- a/tools/prr/llm/client.ts +++ b/tools/prr/llm/client.ts @@ -1,55 +1,46 @@ /** * LLM client for verification, issue detection, and commit message generation. - * + * + * **Module layout:** Low-level completion (Anthropic / OpenAI / ElizaCloud retries, prompts.log) + * lives in `llm-client-transport.ts`. Shared response/options types are in `llm-client-types.ts`. + * Batch existence checks, final audit, batch verify, conflict resolution, and commit/dismissal + * prompts remain on this class for now — they delegate to `complete()` which uses the transport. + * * WHY separate from fixer tools: Verification needs different models than fixing. * We use Claude Haiku/Sonnet for fast verification checks, while fixer tools * might use Opus or GPT for actual code changes. - * + * * WHY extended thinking support: For complex verification, Claude's "thinking" * capability improves accuracy by reasoning through the problem before answering. - * + * * WHY adversarial prompts: Regular "is this fixed?" prompts have high false positive * rates - LLMs tend toward "yes". Adversarial prompts ("find what's NOT fixed") * are more reliable. */ import Anthropic from '@anthropic-ai/sdk'; -import chalk from 'chalk'; import OpenAI from 'openai'; -import type { Fetch } from 'openai/core'; import type { Config, LLMProvider } from '../../../shared/config.js'; -import { debug, warn, trackTokens, debugPrompt, debugResponse, debugPromptError, formatNumber } from '../../../shared/logger.js'; +import { debug, warn, formatNumber } from '../../../shared/logger.js'; import { ELIZACLOUD_API_BASE_URL, getEffectiveMaxConcurrentLLM, - getElizacloudGatewayFallbackModels, - getElizacloudServerErrorMaxRetries, MAX_CONFLICT_SINGLE_SHOT_LLM_CHARS, } from '../../../shared/constants.js'; -import { acquireElizacloud, releaseElizacloud, notifyRateLimitHit } from '../../../shared/llm/rate-limit.js'; import { createElizaCloudOpenAIClient } from '../../../shared/llm/elizacloud.js'; -import { openAiChatCompletionContentToString } from '../../../shared/llm/openai-chat-content.js'; import { sanitizeCommentForPrompt } from '../analyzer/prompt-builder.js'; import { hasConflictMarkers } from '../../../shared/git/git-lock-files.js'; import { buildConflictResolutionPromptThreeWay } from '../git/git-conflict-chunked.js'; import { runWithConcurrencyAllSettled } from '../../../shared/run-with-concurrency.js'; import { getOutdatedModelCatalogDismissal } from '../workflow/helpers/outdated-model-advice.js'; +import { getMaxElizacloudLlmCompleteInputChars } from '../../../shared/llm/model-context-limits.js'; import { - ELIZACLOUD_COMPLETION_CONTEXT_RESERVE_TOKENS, - ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS, - estimateElizacloudInputTokensFromCharLength, - getElizaCloudModelContextSpec, - getMaxElizacloudLlmCompleteInputChars, - lowerModelMaxPromptChars, -} from '../../../shared/llm/model-context-limits.js'; + computePerFixVerifyCurrentCodeBudget, + truncateNumberedCodeAroundAnchor, +} from '../../../shared/prompt-budget.js'; import { - elizaCloudServerErrorExpectationDebug, getConflictFileTypeRules, - getElizaCloudErrorContext, - isElizaCloudServerClassError, - isLikelyContextLengthExceededError, maskApiKey, normalizeIssueId, - sanitizeForJson, } from './error-helpers.js'; import type { ModelRecommendationContext } from './provider-probes.js'; import { getCheapModelForProvider } from './provider-probes.js'; @@ -57,9 +48,13 @@ import { commentNeedsConservativeExistenceCheck, explanationHasConcreteFixEvidence, explanationMentionsMissingCodeVisibility, + finalAuditExplanationClaimsSnippetIsIncomplete, finalAuditSnippetLooksTruncatedOrExcerpt, snippetShowsUuidCommentAlignedWithVersionRange, } from './verification-heuristics.js'; +import { llmComplete, type LlmTransportDeps } from './llm-client-transport.js'; +import type { BatchCheckResult, CompleteOptions, LLMResponse } from './llm-client-types.js'; +import { filterAttemptHistoryToBatch } from './llm-client-types.js'; /** * Re-exports from split modules so `import { … } from '…/llm/client.js'` stays stable. @@ -74,6 +69,7 @@ export { commentNeedsConservativeExistenceCheck, explanationHasConcreteFixEvidence, explanationMentionsMissingCodeVisibility, + finalAuditExplanationClaimsSnippetIsIncomplete, finalAuditSnippetLooksTruncatedOrExcerpt, snippetShowsUuidCommentAlignedWithVersionRange, } from './verification-heuristics.js'; @@ -98,73 +94,8 @@ export { sanitizeForJson, } from './error-helpers.js'; -export interface LLMResponse { - content: string; - usage?: { - inputTokens: number; - outputTokens: number; - /** Tokens written to Anthropic's prompt cache (1.25x cost, 5-min TTL). */ - cacheCreationInputTokens?: number; - /** Tokens read from Anthropic's prompt cache (0.1x cost — 90% savings). */ - cacheReadInputTokens?: number; - }; -} - -interface CompleteOptions { - model?: string; - /** - * Override the generic ElizaCloud 500/504 retry count for special callers. - * WHY: Conflict resolution should fall back to chunked/manual strategies quickly - * instead of spending ~10 minutes exhausting the global retry ladder first. - */ - max504Retries?: number; - /** Optional phase label for prompts.log metadata (e.g. batch-verify, final-audit). Helps pill and auditors filter by step. */ - phase?: string; -} - -/** - * Batch check result with optional model recommendation - */ -export interface BatchCheckResult { - issues: Map; - /** Recommended models to use for fixing, in order of preference */ - recommendedModels?: string[]; - /** Reasoning behind the model recommendation */ - modelRecommendationReasoning?: string; - /** True when a batch failed (e.g. 504) but earlier batches were returned so state can be persisted */ - partial?: boolean; -} - -/** - * Filter attempt history to only lines for issues in the current batch. - * WHY: Audit showed full history (all issues) sent to every verify batch; only the current batch is relevant. - * NOTE: batchIds should be raw comment IDs (PRRC_...) matching the format from getAttemptHistoryForIssues. - * The batch input uses synthetic issue_N IDs, so callers must map back to comment IDs before calling this. - */ -function filterAttemptHistoryToBatch(attemptHistory: string, batchIds: string[]): string { - const set = new Set(batchIds); - return attemptHistory - .split('\n') - .filter((line) => { - const m = line.match(/^Issue\s+(\S+):/); - return m && set.has(m[1]); - }) - .join('\n'); -} +export type { BatchCheckResult, CompleteOptions, LLMResponse } from './llm-client-types.js'; +export { filterAttemptHistoryToBatch } from './llm-client-types.js'; export class LLMClient { /** Cap noisy per-batch final-audit truncation debug (output.log: dozens of identical lines per run). */ @@ -231,487 +162,37 @@ export class LLMClient { } } - async complete(prompt: string, systemPrompt?: string, options?: CompleteOptions): Promise { - // Sanitize inputs: strip unpaired UTF-16 surrogates that cause JSON serialization - // errors (Anthropic API returns 400 "no low surrogate in string"). These can appear - // in code snippets read from binary or corrupted files. - prompt = sanitizeForJson(prompt); - if (systemPrompt) { - systemPrompt = sanitizeForJson(systemPrompt); - } - - // Allow callers to override the model for this request (no instance mutation to avoid race conditions) - // WHY: The LLM client defaults to the verification model (often haiku), - // but some callers (like tryDirectLLMFix) need a stronger model for code fixing - const chosenModel = options?.model ?? this.model; - - const baseDebug: Record = { - promptLength: prompt.length, - hasSystemPrompt: !!systemPrompt, - }; - if (this.provider === 'elizacloud') { - const sysLen = systemPrompt?.length ?? 0; - const totalChars = prompt.length + sysLen; - const { approxTokens, assumedCharsPerToken } = estimateElizacloudInputTokensFromCharLength( - chosenModel, - totalChars, - ); - const spec = getElizaCloudModelContextSpec(chosenModel); - const worstOut = approxTokens + ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS; - baseDebug.requestTotalChars = totalChars; - baseDebug.estimatedInputTokensApprox = approxTokens; - baseDebug.tokenizerAssumptionCharsPerToken = assumedCharsPerToken; - baseDebug.maxCompletionTokensDefault = ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS; - baseDebug.estimatedInputPlusDefaultMaxOutputApprox = worstOut; - baseDebug.estimatedExceedsContextWithDefaultMaxOut = worstOut > spec.maxContextTokens; - } - debug(`LLM request to ${this.provider}/${chosenModel}`, baseDebug); - - // ElizaCloud: fail fast when total input exceeds configured budget. Gateways often - // return 500 (no body) for oversize upstream — retries waste minutes (audit: qwen 93k vs ~42k cap). - if (this.provider === 'elizacloud') { - const maxTotal = getMaxElizacloudLlmCompleteInputChars(chosenModel); - const total = prompt.length + (systemPrompt?.length ?? 0); - if (total > maxTotal) { - const detail = elizaCloudServerErrorExpectationDebug(chosenModel, prompt, systemPrompt); - warn( - `ElizaCloud prompt exceeds model input budget (${formatNumber(total)} chars > ${formatNumber(maxTotal)}). Use a larger-context model, split verification batches, or adjust ELIZACLOUD_MODEL_CONTEXT.`, - ); - debug('ElizaCloud input budget exceeded (detail)', detail); - throw new Error( - `ElizaCloud request too large for ${chosenModel}: ${formatNumber(total)} chars (max ${formatNumber(maxTotal)}).`, - ); - } - } - - // Log full prompt to debug file - const fullPrompt = systemPrompt ? `[SYSTEM]\n${systemPrompt}\n\n[USER]\n${prompt}` : prompt; - const promptMeta: Record = { model: chosenModel }; - if (options?.phase != null) promptMeta.phase = options.phase; - const promptSlug = debugPrompt(`llm-${this.provider}`, fullPrompt, promptMeta); - - const is429 = (e: unknown) => { - const status = (e as { status?: number })?.status; - const msg = e instanceof Error ? e.message : String(e); - return status === 429 || /429|Too many requests|rate limit/i.test(msg); - }; - const isServerError = (e: unknown) => { - const status = (e as { status?: number })?.status; - const msg = e instanceof Error ? e.message : String(e); - return status === 500 || /500|504|502|gateway.*timeout|deployment.*timeout|error occurred with your deployment/i.test(msg); + private transportDeps(): LlmTransportDeps { + return { + provider: this.provider, + model: this.model, + thinkingBudget: this.thinkingBudget, + anthropic: this.anthropic, + openai: this.openai, + elizacloudKeyHint: this.elizacloudKeyHint, + runAbortSignal: this.runAbortSignal, }; + } - let elizaAcquired = false; - try { - if (this.provider === 'elizacloud') { - await acquireElizacloud().then(() => elizaAcquired = true); // uses exported fn so same global limit as llm-api runner - elizaAcquired = true; - } - const max429Retries = this.provider === 'elizacloud' ? 3 : 0; - const max504Retries = - options?.max504Retries ?? - (this.provider === 'elizacloud' ? getElizacloudServerErrorMaxRetries() : 0); - const backoffMs = this.provider === 'elizacloud' ? [60_000, 60_000, 60_000] : [2000, 4000, 8000]; - const backoff504Ms = this.provider === 'elizacloud' ? [10_000, 20_000] : [10_000]; - // ElizaCloud STRICT = 10 req/min; short backoff (2s/4s/8s) sends 4 requests in ~14s → 429. Use 60s so retries stay under limit. - let lastErr: unknown; - for (let attempt = 0; attempt <= max429Retries; attempt++) { - try { - let response: LLMResponse | undefined; - let requestModel = chosenModel; - let consecutiveElizacloudGatewayErrors = 0; - let elizacloudFallbackIdx = 0; - const elizacloudGatewayFallbackChain = - this.provider === 'elizacloud' ? getElizacloudGatewayFallbackModels(chosenModel) : []; - - for (let attempt504 = 0; attempt504 <= max504Retries; attempt504++) { - try { - response = this.provider === 'anthropic' - ? await this.completeAnthropic(prompt, systemPrompt, chosenModel) - : await this.completeOpenAI(prompt, systemPrompt, requestModel); - break; - } catch (e504) { - if (this.provider === 'elizacloud') { - const base504 = getElizaCloudErrorContext(e504); - const payload504 = - isElizaCloudServerClassError(e504) - ? { ...base504, ...elizaCloudServerErrorExpectationDebug(requestModel, prompt, systemPrompt) } - : base504; - debug('ElizaCloud error (response context)', payload504); - } - const timeoutMsg = e504 instanceof Error && /timeout/i.test(e504.message); - const contextOverflow = isLikelyContextLengthExceededError(e504); - const totalChars = prompt.length + (systemPrompt?.length ?? 0); - const overConfiguredBudget = - this.provider === 'elizacloud' && - totalChars > getMaxElizacloudLlmCompleteInputChars(requestModel); - if (contextOverflow && this.provider === 'elizacloud') { - lowerModelMaxPromptChars('elizacloud', requestModel, prompt.length); - debug('ElizaCloud context length exceeded — lowered prompt cap for this model', { - model: requestModel, - promptLength: formatNumber(prompt.length), - ...elizaCloudServerErrorExpectationDebug(requestModel, prompt, systemPrompt), - }); - } - - const gatewayClassRetry = - this.provider === 'elizacloud' && (isServerError(e504) || timeoutMsg); - if (gatewayClassRetry) { - consecutiveElizacloudGatewayErrors++; - } else { - consecutiveElizacloudGatewayErrors = 0; - } - - if ( - this.provider === 'elizacloud' && - consecutiveElizacloudGatewayErrors >= 2 && - elizacloudFallbackIdx < elizacloudGatewayFallbackChain.length - ) { - const nextModel = elizacloudGatewayFallbackChain[elizacloudFallbackIdx]!; - elizacloudFallbackIdx++; - console.warn( - chalk.yellow( - `ElizaCloud: ${formatNumber(2)} consecutive gateway/server errors on ${requestModel} — trying fallback model ${nextModel} (override chain: PRR_ELIZACLOUD_GATEWAY_FALLBACK_MODELS; disable: off).`, - ), - ); - requestModel = nextModel; - consecutiveElizacloudGatewayErrors = 0; - attempt504--; - continue; - } - - if ( - attempt504 < max504Retries && - (isServerError(e504) || timeoutMsg) && - !contextOverflow && - !overConfiguredBudget - ) { - const delayMs = Array.isArray(backoff504Ms) ? backoff504Ms[attempt504] ?? backoff504Ms[backoff504Ms.length - 1] : backoff504Ms; - debug('Server error or request timeout, retrying', { - attempt: attempt504 + 1, - maxRetries: max504Retries, - delayMs, - model: this.provider === 'elizacloud' ? requestModel : chosenModel, - ...(this.provider === 'elizacloud' - ? elizaCloudServerErrorExpectationDebug(requestModel, prompt, systemPrompt) - : {}), - }); - await new Promise(r => setTimeout(r, delayMs)); - } else { - throw e504; - } - } - } - - if (!response) throw new Error('LLM request failed after retries'); - - debug('LLM response', { - responseLength: response.content.length, - usage: response.usage, - }); - - // Pill #1, #4: Ensure we pass the accumulated response content, not empty string. - // The OpenAI/Anthropic SDKs should return full content, but add safeguard. - const responseContent = response.content || ''; - if (!responseContent && response.usage?.outputTokens && response.usage.outputTokens > 0) { - debug('WARNING: LLM response has usage tokens but empty content — possible streaming accumulation bug', { - provider: this.provider, - model: requestModel, - outputTokens: response.usage.outputTokens, - }); - } - - if (response.usage) { - trackTokens(response.usage.inputTokens, response.usage.outputTokens); - } - - // WHY: writeToPromptLog refuses empty RESPONSE — audits would see orphan PROMPT slugs with no ERROR. - if (!responseContent.trim()) { - debugPromptError( - promptSlug, - `llm-${this.provider}`, - 'Empty or whitespace-only response body (HTTP success but no text; prompts.log would not record a RESPONSE).', - { - model: requestModel, - usage: response.usage, - ...(options?.phase != null ? { phase: options.phase } : {}), - emptyBody: true, - } - ); - // WHY: Operators and CI often skip prompts.log; one stderr line ties empty LLM output to the ERROR slug. - if (this.provider === 'elizacloud') { - console.warn( - chalk.yellow( - `ElizaCloud: empty response body from ${requestModel} (prompts.log has ERROR for this request).`, - ), - ); - } - } else { - const responseMeta: Record = { model: requestModel, usage: response.usage }; - if (options?.phase != null) responseMeta.phase = options.phase; - debugResponse(promptSlug, `llm-${this.provider}`, responseContent, responseMeta); - } - - return response; - } catch (err) { - lastErr = err; - if (this.provider === 'elizacloud') { - const status = (err as { status?: number })?.status; - const msg = err instanceof Error ? err.message : String(err); - if (status === 401 || /401|Unauthorized|Authentication required/i.test(msg)) { - const url = ELIZACLOUD_API_BASE_URL; - const keyHint = this.elizacloudKeyHint ?? maskApiKey(undefined); - debug('ElizaCloud 401', { requestURL: `${url}/chat/completions`, apiKey: keyHint, ...getElizaCloudErrorContext(err) }); - throw new Error( - `ElizaCloud API key was rejected (401 Unauthorized). ` + - `Request URL: ${url}/chat/completions. API key: ${keyHint}. ` + - `Check that ELIZACLOUD_API_KEY in .env is correct for this URL, has no extra spaces/newlines, and has not been revoked.` - ); - } - if (is429(err)) { - notifyRateLimitHit(); - if (attempt < max429Retries) { - const wait = backoffMs[attempt] ?? 8000; - debug(`ElizaCloud 429, retry ${attempt + 1}/${max429Retries} in ${wait}ms`); - await new Promise(r => setTimeout(r, wait)); - continue; - } - } - } - if (this.provider === 'elizacloud') { - const baseErr = getElizaCloudErrorContext(err); - const payloadErr = - isElizaCloudServerClassError(err) - ? { ...baseErr, ...elizaCloudServerErrorExpectationDebug(chosenModel, prompt, systemPrompt) } - : baseErr; - debug('ElizaCloud error (response context)', payloadErr); - } - throw err; - } - } - if (this.provider === 'elizacloud' && lastErr != null) { - const baseLast = getElizaCloudErrorContext(lastErr); - const payloadLast = - isElizaCloudServerClassError(lastErr) - ? { ...baseLast, ...elizaCloudServerErrorExpectationDebug(chosenModel, prompt, systemPrompt) } - : baseLast; - debug('ElizaCloud error (response context)', payloadLast); - } - const lastMsg = lastErr instanceof Error ? lastErr.message : String(lastErr); - debugPromptError(promptSlug, `llm-${this.provider}`, lastMsg, { - model: chosenModel, - status: (lastErr as { status?: number })?.status, - is504: lastErr != null && isServerError(lastErr), - isTimeout: /timeout/i.test(lastMsg), - }); - throw lastErr; - } finally { - if (this.provider === 'elizacloud' && elizaAcquired) { - releaseElizacloud(); - } - // Review: ensures slot release only if acquisition is successful to maintain accurate in-flight count. - } + async complete(prompt: string, systemPrompt?: string, options?: CompleteOptions): Promise { + return llmComplete(this.transportDeps(), prompt, systemPrompt, options); } /** * Same as complete() but uses the cheap model for this provider (haiku/mini). * Use for lightweight tasks (e.g. LLM dedup) to save cost; default model is for verification/fixing. + * Pass **`phase`** in options for prompts.log / output.log (e.g. **`dedup-v2-grouping`**). */ - async completeWithCheapModel(prompt: string, systemPrompt?: string): Promise { + async completeWithCheapModel( + prompt: string, + systemPrompt?: string, + options?: Omit, + ): Promise { const cheapModel = getCheapModelForProvider(this.provider); if (!cheapModel) { - return this.complete(prompt, systemPrompt); - } - return this.complete(prompt, systemPrompt, { model: cheapModel }); - } - - private async completeAnthropic(prompt: string, systemPrompt?: string, model?: string): Promise { - if (!this.anthropic) { - throw new Error('Anthropic client not initialized'); - } - - const chosenModel = model ?? this.model; - - // Build request options - // max_tokens is required by the Anthropic API — we can't omit it. - // Set it high so it's never the constraint; response length is controlled - // via prompt instructions, not this parameter. You only pay for tokens - // actually generated, not the budget ceiling. - // - // WHY 64K default: Sonnet/Haiku cap at 64K. Opus also caps at 64K unless - // extended thinking is enabled — requesting 128K without thinking causes 400. - const isHighOutputModel = chosenModel.includes('opus'); - const maxOutputTokens = (isHighOutputModel && this.thinkingBudget) ? 128_000 : 64_000; - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const requestOptions: any = { - model: chosenModel, - max_tokens: maxOutputTokens, - messages: [ - { - role: 'user', - content: prompt, - }, - ], - }; - - const maxTokens = requestOptions.max_tokens; - if (this.thinkingBudget && this.thinkingBudget >= maxTokens) { - throw new Error(`PRR_THINKING_BUDGET (${this.thinkingBudget}) must be < max_tokens (${maxTokens})`); - } - - // Add extended thinking if budget is set - if (this.thinkingBudget) { - requestOptions.thinking = { - type: 'enabled', - budget_tokens: this.thinkingBudget, - }; - debug('Using extended thinking', { budget: this.thinkingBudget }); - } else { - // Only use system prompt when not using extended thinking - // (extended thinking doesn't support system prompts). - // Use block format with cache_control so Anthropic caches the system - // prompt prefix across calls. Cache reads are 90% cheaper than base - // input — big win for repeated calls like batch analysis and verification. - const systemText = systemPrompt || 'You are a helpful code review assistant.'; - requestOptions.system = [ - { - type: 'text', - text: systemText, - cache_control: { type: 'ephemeral' }, - }, - ]; - } - - const requestOpts = this.runAbortSignal ? { signal: this.runAbortSignal } : undefined; - const response = await this.anthropic.messages.create(requestOptions, requestOpts); - - // Extract text content (skip thinking blocks) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const content = response.content - .filter((block: any) => block.type === 'text' && 'text' in block) - .map((block: any) => block.text) - .join(''); - - // Log thinking if present (extended thinking feature) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const thinkingBlock = response.content.find((block: any) => block.type === 'thinking'); - if (thinkingBlock && 'thinking' in thinkingBlock) { - debug('Extended thinking output', (thinkingBlock as any).thinking); - } - - // Capture cache usage stats from Anthropic's response. - // WHY log: Without observability, you can't tell if caching is actually - // working. Cache hits depend on the system prompt exceeding the model's - // minimum cacheable size (1024 tokens for Sonnet, 2048 for Haiku). If - // you see only cacheWrite with zero cacheRead, the system prompt is too - // small or the prefix changed between calls. - const usage: any = response.usage; - const cacheCreation = usage.cache_creation_input_tokens || 0; - const cacheRead = usage.cache_read_input_tokens || 0; - if (cacheCreation > 0 || cacheRead > 0) { - debug('Anthropic prompt cache', { - cacheWrite: cacheCreation, - cacheRead: cacheRead, - inputTokens: response.usage.input_tokens, - outputTokens: response.usage.output_tokens, - savingsPercent: cacheRead > 0 - ? Math.round((cacheRead / (response.usage.input_tokens + cacheRead)) * 90) + '%' - : '0%', - }); - } - - return { - content, - usage: { - inputTokens: response.usage.input_tokens, - outputTokens: response.usage.output_tokens, - cacheCreationInputTokens: cacheCreation || undefined, - cacheReadInputTokens: cacheRead || undefined, - }, - }; - } - - private async completeOpenAI(prompt: string, systemPrompt?: string, model?: string): Promise { - if (!this.openai) { - throw new Error('OpenAI client not initialized'); - } - - const chosenModel = model ?? this.model; - - const messages: OpenAI.ChatCompletionMessageParam[] = []; - - // WHY suppress for Qwen: Asking the model not to emit reduces output tokens and latency; - // we still strip in response as a fallback for other models or when the instruction is ignored. - const noThinkSuffix = /\bqwen\b/i.test(chosenModel) - ? '\nDo NOT include tags or internal reasoning. Respond directly.' - : ''; - - if (systemPrompt) { - messages.push({ role: 'system', content: systemPrompt + noThinkSuffix }); - } else if (noThinkSuffix) { - messages.push({ role: 'system', content: noThinkSuffix.trim() }); - } - - messages.push({ role: 'user', content: prompt }); - - // Cap completion so estimated input + max_output stays under model context. - // WHY: Char preflight uses a separate budget; OpenAI-style APIs still validate **tokens**. - // Qwen3-14B is 24,576 ctx — ~32k chars ≈ ~20k input tok + 8192 max out → opaque HTTP 500 (audit). - const systemMessageChars = systemPrompt - ? (systemPrompt + noThinkSuffix).length - : noThinkSuffix - ? noThinkSuffix.trim().length - : 0; - const totalInputChars = systemMessageChars + prompt.length; - - let maxCompletionTokens = ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS; - if (this.provider === 'elizacloud') { - const spec = getElizaCloudModelContextSpec(chosenModel); - const { approxTokens } = estimateElizacloudInputTokensFromCharLength(chosenModel, totalInputChars); - const headroom = spec.maxContextTokens - approxTokens - ELIZACLOUD_COMPLETION_CONTEXT_RESERVE_TOKENS; - const capped = Math.min( - ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS, - Math.max(256, headroom), - ); - if (capped < ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS) { - debug('ElizaCloud: capping max_completion_tokens for context window', { - model: chosenModel, - estimatedInputTokensApprox: approxTokens, - maxContextTokens: spec.maxContextTokens, - maxCompletionTokens: capped, - }); - } - maxCompletionTokens = capped; + return this.complete(prompt, systemPrompt, options); } - - const requestOpts = this.runAbortSignal ? { signal: this.runAbortSignal } : undefined; - const response = await this.openai.chat.completions.create( - { model: chosenModel, messages, max_completion_tokens: maxCompletionTokens }, - requestOpts - ); - - let content = openAiChatCompletionContentToString(response.choices[0]?.message?.content); - - // Strip reasoning blocks emitted by models like Qwen. - // WHY: They waste ~30% output tokens and break parsers that expect content to start - // with the answer (e.g. startsWith('YES')). Second replace handles unclosed think (truncated output). - if (//i.test(content)) { - content = content - .replace(/[\s\S]*?<\/think>\s*/gi, '') - .replace(/[\s\S]*/i, '') - .trim(); - } - - return { - content, - usage: response.usage - ? { - inputTokens: response.usage.prompt_tokens, - outputTokens: response.usage.completion_tokens, - } - : undefined, - }; + return this.complete(prompt, systemPrompt, { ...options, model: cheapModel }); } // Static system prompt for checkIssueExists — extracted here so Anthropic can @@ -1011,10 +492,13 @@ ${codeSnippet} // ensure the model can actually respond to each one. // WHY smaller for ElizaCloud: Gateways often 500/504 on large requests. // Small models (14b, mini) get 10 issues per batch to avoid 200k-char prompts. + // Heavy reasoning models (e.g. Qwen-3-235b) also use 10 — audit (Cycle 72) showed ~8 min wall time + // for a single 21-issue batch; smaller batches improve latency and reduce timeout risk. + const isSmallOrHeavyElizaBatch = + /\b(14b|mini|qwen-3-14b|gpt-4o-mini)\b/i.test(this.model) || + /\bqwen-3-235b?\b/i.test(this.model); const defaultMaxPerBatch = - this.provider === 'elizacloud' - ? (/\b(14b|mini|qwen-3-14b|gpt-4o-mini)\b/i.test(this.model) ? 10 : 25) - : 50; + this.provider === 'elizacloud' ? (isSmallOrHeavyElizaBatch ? 10 : 25) : 50; const MAX_ISSUES_PER_BATCH = maxIssuesPerBatch ?? defaultMaxPerBatch; const batches: Array<{ issues: typeof issues; issueTexts: string[] }> = []; let currentBatch: typeof issues = []; @@ -1518,6 +1002,7 @@ ${codeSnippet} filePath: string; line: number | null; codeSnippet: string; + fixSiteInWindow?: boolean; }>; }>, batchIssueCount: number, @@ -1572,6 +1057,7 @@ ${codeSnippet} filePath: string; line: number | null; codeSnippet: string; + fixSiteInWindow?: boolean; }>, sourceGroups: Array<{ filePath: string; @@ -1582,6 +1068,7 @@ ${codeSnippet} filePath: string; line: number | null; codeSnippet: string; + fixSiteInWindow?: boolean; }>; }>, ): Array<{ @@ -1593,6 +1080,7 @@ ${codeSnippet} filePath: string; line: number | null; codeSnippet: string; + fixSiteInWindow?: boolean; }>; }> { const snippetById = new Map(); @@ -1639,6 +1127,7 @@ ${codeSnippet} filePath: string; line: number | null; codeSnippet: string; + fixSiteInWindow?: boolean; }>; }>, batchIssueCount: number, @@ -1732,6 +1221,7 @@ ${codeSnippet} filePath: string; line: number | null; codeSnippet: string; + fixSiteInWindow?: boolean; }>; }>, headerParts: string[], @@ -1827,6 +1317,8 @@ ${codeSnippet} filePath: string; line: number | null; codeSnippet: string; + /** When true (from `getFullFileForAudit`), skip UNFIXED demotion for excerpt-shaped snippets — anchor is in view. */ + fixSiteInWindow?: boolean; }>, maxContextChars: number = 400_000, /** Optional phase for prompts.log metadata (e.g. 'final-audit'). */ @@ -2063,22 +1555,23 @@ ${codeSnippet} } } - // Truncation guard: partial excerpt + UNFIXED without strong code cite (or visibility hedge) → pass. + // Truncation guard: partial excerpt + UNFIXED only when the model says the shown window is insufficient. + // WHY: Prior `!hasStrongCite || visibilityHedge` demoted substantive UNFIXED that lacked line quotes (pill-output). if ( !isFixed && + issue.fixSiteInWindow !== true && finalAuditSnippetLooksTruncatedOrExcerpt(issue.codeSnippet) && !snippetShowsUuidCommentAlignedWithVersionRange(issue.codeSnippet) ) { const hasStrongCite = /\bline\s+\d+/i.test(finalExplanation) && /`[^`\n]{2,120}`/.test(finalExplanation); - const visibilityHedge = explanationMentionsMissingCodeVisibility(finalExplanation); - if (!hasStrongCite || visibilityHedge) { - debug('Final audit demotion: excerpt/truncation + weak or hedged UNFIXED → pass', { + if (!hasStrongCite && finalAuditExplanationClaimsSnippetIsIncomplete(finalExplanation)) { + debug('Final audit demotion: excerpt/truncation + UNFIXED hinges on incomplete snippet view → pass', { issueId: issue.id, }); finalStatus = false; finalExplanation = - 'FIXED (truncation guard): Partial snippet; UNFIXED lacked line+code citation or admitted limited visibility. ' + + 'FIXED (truncation guard): Partial snippet; model indicated visible excerpt insufficient for UNFIXED. ' + finalExplanation; } } @@ -2333,9 +1826,11 @@ Respond with ONLY the lesson text, nothing else. Keep it under 150 characters.`; /** Max fixes per request to avoid 500 on large verification prompts (e.g. 26 fixes → 124k chars). */ private static readonly MAX_VERIFY_FIXES_PER_BATCH = 6; - /** Per-fix truncation so batches stay under gateway limits. WHY 8k/1500: Audit showed 2k code + 800 comment - * caused false negatives (verifier couldn't see relevant section); larger limits match anchored snippet size. */ - private static readonly MAX_VERIFY_CURRENT_CODE_CHARS = 8000; + /** + * Hard ceiling on batch verify user prompt size (chars) for ElizaCloud even when the model claims a large context. + * WHY: Gateways time out or drop connections on 30k–50k verify payloads (prompts.log audit); splitting batches cuts wall time. + */ + private static readonly MAX_VERIFY_BATCH_PROMPT_CHARS_ELIZACLOUD = 72_000; private static readonly MAX_VERIFY_DIFF_CHARS = 2500; private static readonly MAX_VERIFY_COMMENT_CHARS = 1500; @@ -2355,29 +1850,34 @@ Respond with ONLY the lesson text, nothing else. Keep it under 150 characters.`; } const results = new Map(); - const batchSize = LLMClient.MAX_VERIFY_FIXES_PER_BATCH; - const batches = Array.from( - { length: Math.ceil(fixes.length / batchSize) }, - (_, i) => fixes.slice(i * batchSize, (i + 1) * batchSize) - ); + const verifyModel = options?.model ?? this.verifierModel ?? this.model ?? ''; + const batches = this.partitionFixesForVerifyBatches(fixes, verifyModel); + if (batches.length > 1) { + debug('Verify batches split', { + batchCount: batches.length, + fixes: fixes.length, + provider: this.provider, + model: verifyModel, + }); + } // WHY verifierModel/options.model: Verification accuracy drives fix-loop decisions. Audit showed false negatives // with a weak default model. Prefer PRR_VERIFIER_MODEL (or caller override) over default llmModel for verification. const MAX_VERIFY_RETRIES = 1; for (let b = 0; b < batches.length; b++) { const batchFixes = batches[b]; - const batchPrompt = this.buildBatchVerifyPrompt(batchFixes); + const batchPrompt = this.buildBatchVerifyPrompt(batchFixes, verifyModel); debug('Batch verifying fixes', { batch: b + 1, totalBatches: batches.length, count: batchFixes.length, modelOverride: !!options?.model }); let batchResults: Map | null = null; for (let attempt = 0; attempt <= MAX_VERIFY_RETRIES; attempt++) { try { - const verifyModel = options?.model ?? this.verifierModel ?? this.model; const response = await this.complete(batchPrompt, undefined, { model: verifyModel }); batchResults = this.parseBatchVerifyResponse(batchFixes, response.content); break; } catch (err) { const msg = err instanceof Error ? err.message : String(err); - const isTransient = /500|502|504|timeout|gateway|ECONNRESET|ECONNREFUSED|socket hang up/i.test(msg); + const isTransient = + /500|502|504|timeout|gateway|ECONNRESET|ECONNREFUSED|socket hang up|connection error|ETIMEDOUT/i.test(msg); if (isTransient && attempt < MAX_VERIFY_RETRIES) { debug('Batch verify failed (transient), retrying', { batch: b + 1, attempt: attempt + 1, error: msg.slice(0, 80) }); continue; @@ -2458,82 +1958,49 @@ Respond with ONLY the lesson text, nothing else. Keep it under 150 characters.`; } /** - * When post-fix "Current Code" exceeds the verify char budget, keep a window around the review line - * instead of truncating from byte 0 (which drops the hunk and leaves only imports — prompts.log audit). + * Pack fixes into verify batches under {@link MAX_VERIFY_FIXES_PER_BATCH} and a char budget. + * WHY: Fixed "6 fixes" batches still produced 30k+ prompts with large files; ElizaCloud then stalls or connection-errors (prompts.log audit #0022). */ - private static truncateVerificationCurrentCode( - raw: string, - anchorLine: number | null | undefined, - maxChars: number, - ): string { - if (raw.length <= maxChars) return raw; - const lines = raw.split('\n'); - const footerLines: string[] = []; - const bodyLines = [...lines]; - while (bodyLines.length > 0) { - const last = bodyLines[bodyLines.length - 1] ?? ''; - if ( - /^\(end of file — \d+ lines total\)\s*$/.test(last) || - /^\.\.\. \(truncated — file has \d+ lines total\)\s*$/.test(last) - ) { - footerLines.unshift(last); - bodyLines.pop(); + private partitionFixesForVerifyBatches( + fixes: Array<{ + id: string; + comment: string; + filePath: string; + line?: number | null; + diff: string; + currentCode?: string; + }>, + verifyModel: string, + ): Array<(typeof fixes)[number][]> { + const maxPerBatch = LLMClient.MAX_VERIFY_FIXES_PER_BATCH; + const modelKey = verifyModel || this.model; + const maxChars = + this.provider === 'elizacloud' && modelKey + ? Math.min( + Math.floor(getMaxElizacloudLlmCompleteInputChars(modelKey) * 0.9), + LLMClient.MAX_VERIFY_BATCH_PROMPT_CHARS_ELIZACLOUD, + ) + : 200_000; + + const batches: Array<(typeof fixes)[number][]> = []; + let cur: (typeof fixes)[number][] = []; + for (const f of fixes) { + const trial = [...cur, f]; + if (trial.length > maxPerBatch) { + batches.push(cur); + cur = [f]; continue; } - break; - } - type Row = { lineNum: number; text: string }; - const rows: Row[] = []; - for (let i = 0; i < bodyLines.length; i++) { - const text = bodyLines[i] ?? ''; - const m = text.match(/^(\d+):\s?(.*)$/); - if (m) { - rows.push({ lineNum: parseInt(m[1]!, 10), text }); + const promptLen = this.buildBatchVerifyPrompt(trial, modelKey).length; + if (promptLen > maxChars && cur.length > 0) { + batches.push(cur); + cur = [f]; + continue; } + cur = trial; } - if (rows.length === 0) { - return raw.substring(0, Math.max(0, maxChars - 80)) + '\n... (truncated — snippet was cut for prompt size)'; - } - let center = Math.floor(rows.length / 2); - if (anchorLine != null && anchorLine > 0) { - let best = 0; - let bestDist = Infinity; - for (let k = 0; k < rows.length; k++) { - const d = Math.abs(rows[k].lineNum - anchorLine); - if (d < bestDist) { - bestDist = d; - best = k; - } - } - center = best; - } - let lo = center; - let hi = center; - const sliceText = () => rows.slice(lo, hi + 1).map((r) => r.text).join('\n'); - let chunk = sliceText(); - const note = '\n... (truncated — centered on review line for prompt budget)'; - const maxBody = Math.max(400, maxChars - note.length - footerLines.reduce((s, l) => s + l.length + 1, 0)); - while (chunk.length < maxBody && (lo > 0 || hi < rows.length - 1)) { - const canHi = hi < rows.length - 1; - const canLo = lo > 0; - if (canHi && (!canLo || hi - center <= center - lo)) hi++; - else if (canLo) lo--; - else if (canHi) hi++; - else break; - const next = sliceText(); - if (next.length > maxBody) break; - chunk = next; - } - while (chunk.length > maxBody && lo < hi) { - if (hi - center >= center - lo) hi--; - else lo--; - chunk = sliceText(); - } - if (chunk.length > maxBody) { - chunk = chunk.substring(0, Math.max(0, maxBody - 60)) + '\n...'; - } - const footer = footerLines.length > 0 ? '\n' + footerLines.join('\n') : ''; - return chunk + note + footer; + if (cur.length > 0) batches.push(cur); + return batches; } private buildBatchVerifyPrompt( @@ -2544,7 +2011,8 @@ Respond with ONLY the lesson text, nothing else. Keep it under 150 characters.`; line?: number | null; diff: string; currentCode?: string; - }> + }>, + verifyModel: string ): string { // Build batch prompt — verification + failure analysis in a single LLM call. const parts: string[] = [ @@ -2582,7 +2050,7 @@ Respond with ONLY the lesson text, nothing else. Keep it under 150 characters.`; '', ]; - const maxCode = LLMClient.MAX_VERIFY_CURRENT_CODE_CHARS; + const maxCode = computePerFixVerifyCurrentCodeBudget(verifyModel, fixes.length); const maxDiff = LLMClient.MAX_VERIFY_DIFF_CHARS; const maxComment = LLMClient.MAX_VERIFY_COMMENT_CHARS; for (let i = 0; i < fixes.length; i++) { @@ -2595,7 +2063,7 @@ Respond with ONLY the lesson text, nothing else. Keep it under 150 characters.`; const currentCode = rawCurrent && rawCurrent.length > 0 ? rawCurrent.length > maxCode - ? LLMClient.truncateVerificationCurrentCode(rawCurrent, fix.line ?? null, maxCode) + ? truncateNumberedCodeAroundAnchor(rawCurrent, fix.line ?? null, maxCode) : rawCurrent : undefined; const diff = @@ -2766,7 +2234,7 @@ Respond with ONLY the lesson text, nothing else. Keep it under 150 characters.`; baseBranch, filePath, options.previousParseError - ) + `\n\nOutput the COMPLETE resolved file. ${getConflictFileTypeRules(filePath)}` + ) + '\n\nOutput the COMPLETE resolved file.' : `You are resolving a Git merge conflict. FILE: ${filePath} diff --git a/tools/prr/llm/error-helpers.ts b/tools/prr/llm/error-helpers.ts index a902781..755207b 100644 --- a/tools/prr/llm/error-helpers.ts +++ b/tools/prr/llm/error-helpers.ts @@ -130,13 +130,26 @@ export function maskApiKey(key: string | undefined): string { return `length=${k.length}, prefix=${prefix}`; } -/** File-type-specific rules for conflict resolution prompt (reduces invalid JSON/TS output). */ +/** + * File-type rules for merge-conflict prompts (chunked 3-way and marker single-shot). + * Bullet list so it reads well alone (chunked) or after INSTRUCTIONS 1–5 (single-shot). + */ export function getConflictFileTypeRules(filePath: string): string { if (filePath.endsWith('.json')) { - return '\n6. Output must be strict JSON (no comments, no trailing commas).'; + const lines = [ + 'Output must be strict JSON (no comments, no trailing commas).', + 'No duplicate property keys in any object — invalid JSON and easy to produce when merging. Combine both sides so each key appears exactly once.', + ]; + if (/package\.json$/i.test(filePath)) { + lines.push( + 'package.json: merge every distinct key in "scripts", "dependencies", and "devDependencies" from BOTH sides; never output two entries with the same key (e.g. two "dev:desktop" lines).', + 'When the same script key exists on both sides with different command strings, prefer HEAD unless the incoming side clearly adds a new capability you must keep.', + ); + } + return `\n${lines.map(l => `- ${l}`).join('\n')}`; } if (/\.(ts|tsx|js|jsx|mjs|cjs)$/i.test(filePath)) { - return '\n6. Preserve all imports and ensure the result compiles.'; + return '\n- Preserve all imports and ensure the result compiles.'; } return ''; } diff --git a/tools/prr/llm/llm-client-transport.ts b/tools/prr/llm/llm-client-transport.ts new file mode 100644 index 0000000..bc9c043 --- /dev/null +++ b/tools/prr/llm/llm-client-transport.ts @@ -0,0 +1,524 @@ +/** + * Low-level LLM transport: Anthropic / OpenAI-compatible completion, retries, prompts.log. + * WHY split: `client.ts` mixed network I/O with batch analysis, verification, and conflict prompts; + * isolating transport makes retries and provider quirks easier to review and test. + */ +import type Anthropic from '@anthropic-ai/sdk'; +import chalk from 'chalk'; +import OpenAI from 'openai'; +import type { LLMProvider } from '../../../shared/config.js'; +import { debug, warn, trackTokens, debugPrompt, debugResponse, debugPromptError, formatNumber } from '../../../shared/logger.js'; +import { + ELIZACLOUD_API_BASE_URL, + getElizacloudGatewayFallbackModels, + getElizacloudServerErrorMaxRetries, +} from '../../../shared/constants.js'; +import { acquireElizacloud, releaseElizacloud, notifyRateLimitHit } from '../../../shared/llm/rate-limit.js'; +import { openAiChatCompletionContentToString } from '../../../shared/llm/openai-chat-content.js'; +import { + ELIZACLOUD_COMPLETION_CONTEXT_RESERVE_TOKENS, + ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS, + estimateElizacloudInputTokensFromCharLength, + getElizaCloudModelContextSpec, + getMaxElizacloudHardInputCeiling, + getMaxElizacloudLlmCompleteInputChars, + lowerModelMaxPromptChars, +} from '../../../shared/llm/model-context-limits.js'; +import { + elizaCloudServerErrorExpectationDebug, + getElizaCloudErrorContext, + isElizaCloudServerClassError, + isLikelyContextLengthExceededError, + maskApiKey, + sanitizeForJson, +} from './error-helpers.js'; +import type { CompleteOptions, LLMResponse } from './llm-client-types.js'; + +export interface LlmTransportDeps { + provider: LLMProvider; + model: string; + thinkingBudget?: number; + anthropic?: Anthropic; + openai?: OpenAI; + elizacloudKeyHint?: string; + runAbortSignal: AbortSignal | null; +} + +export async function completeAnthropicDep( + deps: LlmTransportDeps, + prompt: string, systemPrompt?: string, model?: string): Promise { + if (!deps.anthropic) { + throw new Error('Anthropic client not initialized'); + } + + const chosenModel = model ?? deps.model; + + // Build request options + // max_tokens is required by the Anthropic API — we can't omit it. + // Set it high so it's never the constraint; response length is controlled + // via prompt instructions, not this parameter. You only pay for tokens + // actually generated, not the budget ceiling. + // + // WHY 64K default: Sonnet/Haiku cap at 64K. Opus also caps at 64K unless + // extended thinking is enabled — requesting 128K without thinking causes 400. + const isHighOutputModel = chosenModel.includes('opus'); + const maxOutputTokens = (isHighOutputModel && deps.thinkingBudget) ? 128_000 : 64_000; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const requestOptions: any = { + model: chosenModel, + max_tokens: maxOutputTokens, + messages: [ + { + role: 'user', + content: prompt, + }, + ], + }; + + const maxTokens = requestOptions.max_tokens; + if (deps.thinkingBudget && deps.thinkingBudget >= maxTokens) { + throw new Error(`PRR_THINKING_BUDGET (${deps.thinkingBudget}) must be < max_tokens (${maxTokens})`); + } + + // Add extended thinking if budget is set + if (deps.thinkingBudget) { + requestOptions.thinking = { + type: 'enabled', + budget_tokens: deps.thinkingBudget, + }; + debug('Using extended thinking', { budget: deps.thinkingBudget }); + } else { + // Only use system prompt when not using extended thinking + // (extended thinking doesn't support system prompts). + // Use block format with cache_control so Anthropic caches the system + // prompt prefix across calls. Cache reads are 90% cheaper than base + // input — big win for repeated calls like batch analysis and verification. + const systemText = systemPrompt || 'You are a helpful code review assistant.'; + requestOptions.system = [ + { + type: 'text', + text: systemText, + cache_control: { type: 'ephemeral' }, + }, + ]; + } + + const requestOpts = deps.runAbortSignal ? { signal: deps.runAbortSignal } : undefined; + const response = await deps.anthropic.messages.create(requestOptions, requestOpts); + + // Extract text content (skip thinking blocks) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const content = response.content + .filter((block: any) => block.type === 'text' && 'text' in block) + .map((block: any) => block.text) + .join(''); + + // Log thinking if present (extended thinking feature) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const thinkingBlock = response.content.find((block: any) => block.type === 'thinking'); + if (thinkingBlock && 'thinking' in thinkingBlock) { + debug('Extended thinking output', (thinkingBlock as any).thinking); + } + + // Capture cache usage stats from Anthropic's response. + // WHY log: Without observability, you can't tell if caching is actually + // working. Cache hits depend on the system prompt exceeding the model's + // minimum cacheable size (1024 tokens for Sonnet, 2048 for Haiku). If + // you see only cacheWrite with zero cacheRead, the system prompt is too + // small or the prefix changed between calls. + const usage: any = response.usage; + const cacheCreation = usage.cache_creation_input_tokens || 0; + const cacheRead = usage.cache_read_input_tokens || 0; + if (cacheCreation > 0 || cacheRead > 0) { + debug('Anthropic prompt cache', { + cacheWrite: cacheCreation, + cacheRead: cacheRead, + inputTokens: response.usage.input_tokens, + outputTokens: response.usage.output_tokens, + savingsPercent: cacheRead > 0 + ? Math.round((cacheRead / (response.usage.input_tokens + cacheRead)) * 90) + '%' + : '0%', + }); + } + + return { + content, + usage: { + inputTokens: response.usage.input_tokens, + outputTokens: response.usage.output_tokens, + cacheCreationInputTokens: cacheCreation || undefined, + cacheReadInputTokens: cacheRead || undefined, + }, + }; +} + +export async function completeOpenAIDep( + deps: LlmTransportDeps, + prompt: string, systemPrompt?: string, model?: string): Promise { + if (!deps.openai) { + throw new Error('OpenAI client not initialized'); + } + + const chosenModel = model ?? deps.model; + + const messages: OpenAI.ChatCompletionMessageParam[] = []; + + // WHY suppress for Qwen: Asking the model not to emit reduces output tokens and latency; + // we still strip in response as a fallback for other models or when the instruction is ignored. + const noThinkSuffix = /\bqwen\b/i.test(chosenModel) + ? '\nDo NOT include tags or internal reasoning. Respond directly.' + : ''; + + if (systemPrompt) { + messages.push({ role: 'system', content: systemPrompt + noThinkSuffix }); + } else if (noThinkSuffix) { + messages.push({ role: 'system', content: noThinkSuffix.trim() }); + } + + messages.push({ role: 'user', content: prompt }); + + // Cap completion so estimated input + max_output stays under model context. + // WHY: Char preflight uses a separate budget; OpenAI-style APIs still validate **tokens**. + // Qwen3-14B is 24,576 ctx — ~32k chars ≈ ~20k input tok + 8192 max out → opaque HTTP 500 (audit). + const systemMessageChars = systemPrompt + ? (systemPrompt + noThinkSuffix).length + : noThinkSuffix + ? noThinkSuffix.trim().length + : 0; + const totalInputChars = systemMessageChars + prompt.length; + + let maxCompletionTokens = ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS; + if (deps.provider === 'elizacloud') { + const spec = getElizaCloudModelContextSpec(chosenModel); + const { approxTokens } = estimateElizacloudInputTokensFromCharLength(chosenModel, totalInputChars); + const headroom = spec.maxContextTokens - approxTokens - ELIZACLOUD_COMPLETION_CONTEXT_RESERVE_TOKENS; + const capped = Math.min( + ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS, + Math.max(256, headroom), + ); + if (capped < ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS) { + debug('ElizaCloud: capping max_completion_tokens for context window', { + model: chosenModel, + estimatedInputTokensApprox: approxTokens, + maxContextTokens: spec.maxContextTokens, + maxCompletionTokens: capped, + }); + } + maxCompletionTokens = capped; + } + + const requestOpts = deps.runAbortSignal ? { signal: deps.runAbortSignal } : undefined; + const response = await deps.openai.chat.completions.create( + { model: chosenModel, messages, max_completion_tokens: maxCompletionTokens }, + requestOpts + ); + + let content = openAiChatCompletionContentToString(response.choices[0]?.message?.content); + + // Strip reasoning blocks emitted by models like Qwen. + // WHY: They waste ~30% output tokens and break parsers that expect content to start + // with the answer (e.g. startsWith('YES')). Second replace handles unclosed think (truncated output). + if (//i.test(content)) { + content = content + .replace(/[\s\S]*?<\/think>\s*/gi, '') + .replace(/[\s\S]*/i, '') + .trim(); + } + + return { + content, + usage: response.usage + ? { + inputTokens: response.usage.prompt_tokens, + outputTokens: response.usage.completion_tokens, + } + : undefined, + }; +} + +export async function llmComplete( + deps: LlmTransportDeps, + prompt: string, systemPrompt?: string, options?: CompleteOptions): Promise { + // Sanitize inputs: strip unpaired UTF-16 surrogates that cause JSON serialization + // errors (Anthropic API returns 400 "no low surrogate in string"). These can appear + // in code snippets read from binary or corrupted files. + prompt = sanitizeForJson(prompt); + if (systemPrompt) { + systemPrompt = sanitizeForJson(systemPrompt); + } + + // Allow callers to override the model for this request (no instance mutation to avoid race conditions) + // WHY: The LLM client defaults to the verification model (often haiku), + // but some callers (like tryDirectLLMFix) need a stronger model for code fixing + const chosenModel = options?.model ?? deps.model; + + const baseDebug: Record = { + promptLength: prompt.length, + hasSystemPrompt: !!systemPrompt, + }; + if (deps.provider === 'elizacloud') { + const sysLen = systemPrompt?.length ?? 0; + const totalChars = prompt.length + sysLen; + const { approxTokens, assumedCharsPerToken } = estimateElizacloudInputTokensFromCharLength( + chosenModel, + totalChars, + ); + const spec = getElizaCloudModelContextSpec(chosenModel); + const worstOut = approxTokens + ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS; + baseDebug.requestTotalChars = totalChars; + baseDebug.estimatedInputTokensApprox = approxTokens; + baseDebug.tokenizerAssumptionCharsPerToken = assumedCharsPerToken; + baseDebug.maxCompletionTokensDefault = ELIZACLOUD_DEFAULT_MAX_COMPLETION_TOKENS; + baseDebug.estimatedInputPlusDefaultMaxOutputApprox = worstOut; + baseDebug.estimatedExceedsContextWithDefaultMaxOut = worstOut > spec.maxContextTokens; + } + debug(`LLM request to ${deps.provider}/${chosenModel}`, baseDebug); + + // ElizaCloud: fail fast when total input exceeds the model's **context-derived** hard + // ceiling. WHY hard ceiling vs budget: `lowerModelMaxPromptChars` adaptively shrinks the + // budget after timeouts (which may be gateway lag, not context overflow). A 40k prompt on + // a 200k-context model should never be rejected just because a prior timeout lowered the + // cap. Only reject when the prompt genuinely can't fit the model's context window. + if (deps.provider === 'elizacloud') { + const total = prompt.length + (systemPrompt?.length ?? 0); + const hardCeiling = getMaxElizacloudHardInputCeiling(chosenModel); + const softBudget = getMaxElizacloudLlmCompleteInputChars(chosenModel); + if (total > hardCeiling) { + const detail = elizaCloudServerErrorExpectationDebug(chosenModel, prompt, systemPrompt); + debug('ElizaCloud input exceeds context-derived hard ceiling', detail); + throw new Error( + `ElizaCloud request too large for ${chosenModel}: ${formatNumber(total)} chars (context ceiling ${formatNumber(hardCeiling)}).`, + ); + } + if (total > softBudget) { + debug( + `ElizaCloud prompt exceeds adaptive budget (${formatNumber(total)} chars > ${formatNumber(softBudget)}) but within context ceiling (${formatNumber(hardCeiling)}); proceeding`, + ); + } + } + + // Log full prompt to debug file + const fullPrompt = systemPrompt ? `[SYSTEM]\n${systemPrompt}\n\n[USER]\n${prompt}` : prompt; + const promptMeta: Record = { model: chosenModel }; + if (options?.phase != null) promptMeta.phase = options.phase; + const promptSlug = debugPrompt(`llm-${deps.provider}`, fullPrompt, promptMeta); + + const is429 = (e: unknown) => { + const status = (e as { status?: number })?.status; + const msg = e instanceof Error ? e.message : String(e); + return status === 429 || /429|Too many requests|rate limit/i.test(msg); + }; + const isServerError = (e: unknown) => { + const status = (e as { status?: number })?.status; + const msg = e instanceof Error ? e.message : String(e); + return status === 500 || /500|504|502|gateway.*timeout|deployment.*timeout|error occurred with your deployment/i.test(msg); + }; + + let elizaAcquired = false; + try { + if (deps.provider === 'elizacloud') { + await acquireElizacloud(); // same global limit / spacing as llm-api runner (`shared/llm/rate-limit.ts`) + elizaAcquired = true; + } + const max429Retries = deps.provider === 'elizacloud' ? 3 : 0; + const max504Retries = + options?.max504Retries ?? + (deps.provider === 'elizacloud' ? getElizacloudServerErrorMaxRetries() : 0); + const backoffMs = deps.provider === 'elizacloud' ? [60_000, 60_000, 60_000] : [2000, 4000, 8000]; + const backoff504Ms = deps.provider === 'elizacloud' ? [10_000, 20_000] : [10_000]; + // ElizaCloud STRICT = 10 req/min; short backoff (2s/4s/8s) sends 4 requests in ~14s → 429. Use 60s so retries stay under limit. + for (let attempt = 0; attempt <= max429Retries; attempt++) { + try { + let response: LLMResponse | undefined; + let requestModel = chosenModel; + let consecutiveElizacloudGatewayErrors = 0; + let elizacloudFallbackIdx = 0; + const elizacloudGatewayFallbackChain = + deps.provider === 'elizacloud' ? getElizacloudGatewayFallbackModels(chosenModel) : []; + + for (let attempt504 = 0; attempt504 <= max504Retries; attempt504++) { + try { + response = deps.provider === 'anthropic' + ? await completeAnthropicDep(deps, prompt, systemPrompt, chosenModel) + : await completeOpenAIDep(deps, prompt, systemPrompt, requestModel); + break; + } catch (e504) { + if (deps.provider === 'elizacloud') { + const base504 = getElizaCloudErrorContext(e504); + const payload504 = + isElizaCloudServerClassError(e504) + ? { ...base504, ...elizaCloudServerErrorExpectationDebug(requestModel, prompt, systemPrompt) } + : base504; + debug('ElizaCloud error (response context)', payload504); + } + const timeoutMsg = e504 instanceof Error && /timeout/i.test(e504.message); + const contextOverflow = isLikelyContextLengthExceededError(e504); + const totalChars = prompt.length + (systemPrompt?.length ?? 0); + const overHardCeiling = + deps.provider === 'elizacloud' && + totalChars > getMaxElizacloudHardInputCeiling(requestModel); + if (contextOverflow && deps.provider === 'elizacloud') { + lowerModelMaxPromptChars('elizacloud', requestModel, prompt.length); + debug('ElizaCloud context length exceeded — lowered prompt cap for this model', { + model: requestModel, + promptLength: formatNumber(prompt.length), + ...elizaCloudServerErrorExpectationDebug(requestModel, prompt, systemPrompt), + }); + } + + const gatewayClassRetry = + deps.provider === 'elizacloud' && (isServerError(e504) || timeoutMsg); + if (gatewayClassRetry) { + consecutiveElizacloudGatewayErrors++; + } else { + consecutiveElizacloudGatewayErrors = 0; + } + + if ( + deps.provider === 'elizacloud' && + consecutiveElizacloudGatewayErrors >= 2 && + elizacloudFallbackIdx < elizacloudGatewayFallbackChain.length + ) { + const nextModel = elizacloudGatewayFallbackChain[elizacloudFallbackIdx]!; + elizacloudFallbackIdx++; + console.warn( + chalk.yellow( + `ElizaCloud: ${formatNumber(2)} consecutive gateway/server errors on ${requestModel} — trying fallback model ${nextModel} (override chain: PRR_ELIZACLOUD_GATEWAY_FALLBACK_MODELS; disable: off).`, + ), + ); + requestModel = nextModel; + consecutiveElizacloudGatewayErrors = 0; + attempt504--; + continue; + } + + if ( + attempt504 < max504Retries && + (isServerError(e504) || timeoutMsg) && + !contextOverflow && + !overHardCeiling + ) { + const delayMs = Array.isArray(backoff504Ms) ? backoff504Ms[attempt504] ?? backoff504Ms[backoff504Ms.length - 1] : backoff504Ms; + debug('Server error or request timeout, retrying', { + attempt: attempt504 + 1, + maxRetries: max504Retries, + delayMs, + model: deps.provider === 'elizacloud' ? requestModel : chosenModel, + ...(deps.provider === 'elizacloud' + ? elizaCloudServerErrorExpectationDebug(requestModel, prompt, systemPrompt) + : {}), + }); + await new Promise(r => setTimeout(r, delayMs)); + } else { + throw e504; + } + } + } + + if (!response) throw new Error('LLM request failed after retries'); + + debug('LLM response', { + responseLength: response.content.length, + usage: response.usage, + }); + + // Pill #1, #4: Ensure we pass the accumulated response content, not empty string. + // The OpenAI/Anthropic SDKs should return full content, but add safeguard. + const responseContent = response.content || ''; + if (!responseContent && response.usage?.outputTokens && response.usage.outputTokens > 0) { + debug('WARNING: LLM response has usage tokens but empty content — possible streaming accumulation bug', { + provider: deps.provider, + model: requestModel, + outputTokens: response.usage.outputTokens, + }); + } + + if (response.usage) { + trackTokens(response.usage.inputTokens, response.usage.outputTokens); + } + + // WHY: writeToPromptLog refuses empty RESPONSE — audits would see orphan PROMPT slugs with no ERROR. + if (!responseContent.trim()) { + debugPromptError( + promptSlug, + `llm-${deps.provider}`, + 'Empty or whitespace-only response body (HTTP success but no text; prompts.log would not record a RESPONSE).', + { + model: requestModel, + usage: response.usage, + ...(options?.phase != null ? { phase: options.phase } : {}), + emptyBody: true, + } + ); + // WHY: Operators and CI often skip prompts.log; one stderr line ties empty LLM output to the ERROR slug. + console.warn( + chalk.yellow( + `${deps.provider}: empty response body from ${requestModel} (prompts.log has ERROR for this request).`, + ), + ); + } else { + const responseMeta: Record = { model: requestModel, usage: response.usage }; + if (options?.phase != null) responseMeta.phase = options.phase; + debugResponse(promptSlug, `llm-${deps.provider}`, responseContent, responseMeta); + } + + return response; + } catch (err) { + if (deps.provider === 'elizacloud') { + const status = (err as { status?: number })?.status; + const msg = err instanceof Error ? err.message : String(err); + if (status === 401 || /401|Unauthorized|Authentication required/i.test(msg)) { + const url = ELIZACLOUD_API_BASE_URL; + const keyHint = deps.elizacloudKeyHint ?? maskApiKey(undefined); + debug('ElizaCloud 401', { requestURL: `${url}/chat/completions`, apiKey: keyHint, ...getElizaCloudErrorContext(err) }); + debugPromptError(promptSlug, `llm-${deps.provider}`, msg, { + model: chosenModel, + status: 401, + ...(options?.phase != null ? { phase: options.phase } : {}), + }); + throw new Error( + `ElizaCloud API key was rejected (401 Unauthorized). ` + + `Request URL: ${url}/chat/completions. API key: ${keyHint}. ` + + `Check that ELIZACLOUD_API_KEY in .env is correct for this URL, has no extra spaces/newlines, and has not been revoked.` + ); + } + if (is429(err)) { + notifyRateLimitHit(); + if (attempt < max429Retries) { + const wait = backoffMs[attempt] ?? 8000; + debug(`ElizaCloud 429, retry ${attempt + 1}/${max429Retries} in ${wait}ms`); + await new Promise(r => setTimeout(r, wait)); + continue; + } + } + } + if (deps.provider === 'elizacloud') { + const baseErr = getElizaCloudErrorContext(err); + const payloadErr = + isElizaCloudServerClassError(err) + ? { ...baseErr, ...elizaCloudServerErrorExpectationDebug(chosenModel, prompt, systemPrompt) } + : baseErr; + debug('ElizaCloud error (response context)', payloadErr); + } + // WHY: Connection errors / exhausted retries throw here — without ERROR, prompts.log shows orphan PROMPT only (audit: #0022). + const terminalMsg = err instanceof Error ? err.message : String(err); + debugPromptError(promptSlug, `llm-${deps.provider}`, terminalMsg.slice(0, 12_000), { + model: chosenModel, + status: (err as { status?: number })?.status, + is504: isServerError(err), + isTimeout: /timeout|connection error/i.test(terminalMsg), + ...(options?.phase != null ? { phase: options.phase } : {}), + }); + throw err; + } + } + // TypeScript: each iteration returns from `try` or throws from `catch` (429 uses `continue` inside `catch`). + throw new Error('LLM complete: unexpected end of retry loop'); + } finally { + if (deps.provider === 'elizacloud' && elizaAcquired) { + releaseElizacloud(); + } + // Review: ensures slot release only if acquisition is successful to maintain accurate in-flight count. + } + } diff --git a/tools/prr/llm/llm-client-types.ts b/tools/prr/llm/llm-client-types.ts new file mode 100644 index 0000000..373a003 --- /dev/null +++ b/tools/prr/llm/llm-client-types.ts @@ -0,0 +1,75 @@ +/** + * Shared types for the PRR LLM client (transport + higher-level operations). + * WHY: Keeps `client.ts` as a thin facade without circular imports between split modules. + */ + +export interface LLMResponse { + content: string; + usage?: { + inputTokens: number; + outputTokens: number; + /** Tokens written to Anthropic's prompt cache (1.25x cost, 5-min TTL). */ + cacheCreationInputTokens?: number; + /** Tokens read from Anthropic's prompt cache (0.1x cost — 90% savings). */ + cacheReadInputTokens?: number; + }; +} + +export interface CompleteOptions { + model?: string; + /** + * Override the generic ElizaCloud 500/504 retry count for special callers. + * WHY: Conflict resolution should fall back to chunked/manual strategies quickly + * instead of spending ~10 minutes exhausting the global retry ladder first. + */ + max504Retries?: number; + /** Optional phase label for prompts.log metadata (e.g. batch-verify, final-audit). Helps pill and auditors filter by step. */ + phase?: string; +} + +/** + * Batch check result with optional model recommendation + */ +export interface BatchCheckResult { + issues: Map< + string, + { + exists: boolean; + explanation: string; + stale: boolean; + /** + * Importance score (1-5): 1=critical, 5=trivial. + * Defaults to 3 if LLM doesn't provide or issue is NO/STALE. + */ + importance: number; + /** + * Fix difficulty score (1-5): 1=easy one-liner, 5=major refactor. + * Defaults to 3 if LLM doesn't provide or issue is NO/STALE. + */ + ease: number; + } + >; + /** Recommended models to use for fixing, in order of preference */ + recommendedModels?: string[]; + /** Reasoning behind the model recommendation */ + modelRecommendationReasoning?: string; + /** True when a batch failed (e.g. 504) but earlier batches were returned so state can be persisted */ + partial?: boolean; +} + +/** + * Filter attempt history to only lines for issues in the current batch. + * WHY: Audit showed full history (all issues) sent to every verify batch; only the current batch is relevant. + * NOTE: batchIds should be raw comment IDs (PRRC_...) matching the format from getAttemptHistoryForIssues. + * The batch input uses synthetic issue_N IDs, so callers must map back to comment IDs before calling this. + */ +export function filterAttemptHistoryToBatch(attemptHistory: string, batchIds: string[]): string { + const set = new Set(batchIds); + return attemptHistory + .split('\n') + .filter((line) => { + const m = line.match(/^Issue\s+(\S+):/); + return m && set.has(m[1]); + }) + .join('\n'); +} diff --git a/tools/prr/llm/verification-heuristics.ts b/tools/prr/llm/verification-heuristics.ts index 238c196..803e5c8 100644 --- a/tools/prr/llm/verification-heuristics.ts +++ b/tools/prr/llm/verification-heuristics.ts @@ -50,6 +50,15 @@ export function snippetShowsUuidCommentAlignedWithVersionRange(codeSnippet: stri * parroted review text when the model never saw the implementation region (pill-output final-audit cluster). */ export function finalAuditSnippetLooksTruncatedOrExcerpt(snippet: string): boolean { + // Line-centered budget excerpts from fitToBudget — anchor line is in the visible window; do not + // treat like blind truncation for UNFIXED demotion (Pattern G / pill-output final-audit cluster). + if ( + /centered on line [\d,]+/i.test(snippet) && + (/\(excerpt — [\d,]+ lines; centered on line/i.test(snippet) || + /\(excerpt only — file has [\d,]+ lines; centered on line/i.test(snippet)) + ) { + return false; + } return ( /truncated for model context limit — final audit/i.test(snippet) || /more lines omitted — file exceeds/i.test(snippet) || @@ -59,6 +68,31 @@ export function finalAuditSnippetLooksTruncatedOrExcerpt(snippet: string): boole ); } +/** + * True when the model says the **shown** snippet/excerpt is incomplete relative to what it needs + * (outside the window, rest of file, etc.). **WHY:** Truncation-guard demotion should apply only when + * the UNFIXED rationale explicitly hinges on not seeing enough code — not when the model gives a + * substantive UNFIXED from visible context without line quotes (pill-output). + */ +export function finalAuditExplanationClaimsSnippetIsIncomplete(explanation: string): boolean { + const e = explanation.toLowerCase(); + return ( + /\b(not|isn't|is not)\s+(visible|shown|included)\s+in\s+(the\s+)?(provided|shown|excerpt|snippet)/.test( + e, + ) || + /\b(excerpt|snippet)\s+(does not|doesn't)\s+(include|show|contain)/.test(e) || + /\boutside\s+(of\s+)?(the\s+)?(shown|provided)\s+(code|snippet|excerpt)/.test(e) || + /\b(rest|remainder)\s+of\s+the\s+file\b/.test(e) || + /\belsewhere\s+in\s+the\s+file\b/.test(e) || + /\bcannot\s+(see|view|verify)\s+(the\s+)?(rest|full|remaining|complete)\b/.test(e) || + /\b(full|entire)\s+file\b.*\b(not|isn't)\s+(shown|provided|visible)/.test(e) || + /\bimplementation\s+(may be|might be|could be)\s+(elsewhere|outside)/.test(e) || + /\breported\s+(line|region|location)\b.*\b(not\s+in|outside)\s+(the\s+)?(excerpt|snippet)/.test(e) || + /\bcannot\s+verify\b.*\b(truncated|unavailable|excerpt|snippet)\b/.test(e) || + /\bnot\s+visible\s+in\s+(the\s+)?(provided|current)\s+(code|snippet|excerpt)\b/.test(e) + ); +} + export function explanationMentionsMissingCodeVisibility(explanation: string): boolean { return ( /snippet.*(?:truncated|unavailable)/i.test(explanation) || diff --git a/tools/prr/models/rotation.ts b/tools/prr/models/rotation.ts index 15ac8e6..09c5fb5 100644 --- a/tools/prr/models/rotation.ts +++ b/tools/prr/models/rotation.ts @@ -6,6 +6,14 @@ import chalk from 'chalk'; import type { Runner } from '../../../shared/runners/types.js'; import { detectAvailableRunners, getRunnerByName, printRunnerSummary, DEFAULT_MODEL_ROTATIONS } from '../../../shared/runners/detect.js'; import { ensureRotationSession, type StateContext } from '../state/state-context.js'; + +function modelRunStatsLine(stateContext: StateContext | undefined, runnerName: string, model: string): string { + const rs = stateContext?.rotationSession; + if (!rs) return ''; + const st = rs.modelStats.get(sessionModelKey(runnerName, model)); + if (!st) return ''; + return ` — this run: ${formatNumber(st.fixes)} verified / ${formatNumber(st.failures)} failed`; +} import * as Rotation from '../state/state-rotation.js'; import * as Bailout from '../state/state-bailout.js'; import type { CLIOptions } from '../cli.js'; @@ -72,13 +80,25 @@ export function maybeResetSessionSkippedModelsAfterFixIteration( fixIteration: number, ): void { const every = getSessionModelSkipResetAfterFixIterations(); - if (every <= 0 || fixIteration <= 0 || fixIteration % every !== 0) return; - const skipped = stateContext.rotationSession?.skippedModelKeys; - if (!skipped?.size) return; - const n = skipped.size; - skipped.clear(); + if (every <= 0 || fixIteration <= 0) return; + const rs = ensureRotationSession(stateContext); + const skipped = rs.skippedModelKeys; + if (!skipped.size) return; + const sinceMap = rs.sessionSkippedSinceFixIteration ?? new Map(); + if (!rs.sessionSkippedSinceFixIteration) rs.sessionSkippedSinceFixIteration = sinceMap; + + const toRemove: string[] = []; + for (const key of skipped) { + const since = sinceMap.get(key) ?? 0; + if (fixIteration - since >= every) toRemove.push(key); + } + if (toRemove.length === 0) return; + for (const key of toRemove) { + skipped.delete(key); + sinceMap.delete(key); + } warn( - `PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS (${formatNumber(every)}): cleared ${formatNumber(n)} session-skipped model key(s) — rotation may retry those models this run.`, + `PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS (${formatNumber(every)}): cleared ${formatNumber(toRemove.length)} session-skipped model key(s) after ${formatNumber(every)}+ fix iteration(s) per key — rotation may retry those models this run.`, ); } @@ -87,7 +107,9 @@ export function recordSessionModelVerificationOutcome( runnerName: string, model: string | undefined, verifiedCount: number, - failedCount: number + failedCount: number, + /** Completed fix iteration (1-based) when this outcome is recorded — used for per-key session skip retry window. */ + fixIteration?: number, ): void { const threshold = getSessionModelSkipFailureThreshold(); if (threshold <= 0) return; @@ -100,12 +122,15 @@ export function recordSessionModelVerificationOutcome( rs.modelStats.set(key, cur); if (cur.fixes > 0) { if (rs.skippedModelKeys.delete(key)) { + rs.sessionSkippedSinceFixIteration?.delete(key); debug('Session model skip cleared after verified fix', { key }); } return; } if (cur.failures >= threshold && !rs.skippedModelKeys.has(key)) { rs.skippedModelKeys.add(key); + const iter = fixIteration ?? 0; + rs.sessionSkippedSinceFixIteration.set(key, iter); warn( `${runnerName} / ${m}: ${formatNumber(cur.failures)} verification failure(s) with no verified fixes this run — skipping this model until next run. ` + `Set PRR_SESSION_MODEL_SKIP_FAILURES=0 to disable. For persistent poor performers, extend ELIZACLOUD_SKIP_MODEL_IDS in shared/constants.ts, set PRR_ELIZACLOUD_EXTRA_SKIP_MODELS for env-specific skips, or use PRR_ELIZACLOUD_INCLUDE_MODELS to re-enable.`, @@ -313,7 +338,9 @@ export function advanceModel(ctx: RotationContext, stateContext: StateContext, o if (ctx.recommendedModelIndex < ctx.recommendedModels.length) { const nextModel = ctx.recommendedModels[ctx.recommendedModelIndex]; const prevModel = ctx.recommendedModels[ctx.recommendedModelIndex - 1]; - console.log(chalk.yellow(`\n 🔄 Next recommended model: ${prevModel} → ${nextModel}`)); + warn( + `\n 🔄 Next recommended model: ${prevModel} → ${nextModel}${modelRunStatsLine(ctx.stateContext, ctx.runner.name, prevModel)}`, + ); return true; } @@ -358,7 +385,9 @@ export function rotateModel(ctx: RotationContext, stateContext: StateContext): b Rotation.setModelIndex(stateContext, ctx.runner.name, nextIndex); ctx.modelsTriedThisToolRound++; - console.log(chalk.yellow(`\n 🔄 Rotating model: ${previousModel} → ${nextModel}`)); + warn( + `\n 🔄 Rotating model: ${previousModel} → ${nextModel}${modelRunStatsLine(ctx.stateContext, ctx.runner.name, previousModel)}`, + ); return true; } @@ -389,7 +418,7 @@ export function switchToNextRunner(ctx: RotationContext, stateContext: StateCont const newModel = getCurrentModel(ctx, options ?? ({} as CLIOptions)); const modelInfo = newModel ? ` (${newModel})` : ''; - console.log(chalk.yellow(`\n 🔄 Switching fixer: ${previousRunner} → ${ctx.runner.name}${modelInfo}`)); + warn(`\n 🔄 Switching fixer: ${previousRunner} → ${ctx.runner.name}${modelInfo}`); return true; // Review: passing options ensures consistent model selection with active CLI flags. } @@ -876,6 +905,20 @@ export async function validateAndFilterModels( } } + if (isLlMApi && useElizaCloudForLlMApi && validModels.length === 0) { + throw new Error( + 'ElizaCloud: no models remain after the built-in skip list and gateway filter. ' + + 'Set PRR_ELIZACLOUD_INCLUDE_MODELS to re-enable at least one id, or see docs/MODELS.md.', + ); + } + if (isLlMApi && useElizaCloudForLlMApi && validModels.length === 1) { + console.warn( + chalk.yellow( + ` ⚠ Only ${formatNumber(1)} ElizaCloud model in rotation after skips — a single failure blocks fixes until the next rotation step. Consider PRR_ELIZACLOUD_INCLUDE_MODELS (see docs/MODELS.md). Pin the working id with PRR_LLM_MODEL (and PRR_VERIFIER_MODEL / PRR_FINAL_AUDIT_MODEL if needed — see README).`, + ), + ); + } + // User-visible warning when configured default was skipped (pill-output #2) if (skippedConfiguredDefault) { const replacement = validModels.length > 0 ? validModels[0] : '(none; add other models or remove from skip list)'; @@ -889,7 +932,7 @@ export async function validateAndFilterModels( !thinElizacloudPoolWarned && isLlMApi && useElizaCloudForLlMApi && - validModels.length > 0 && + validModels.length >= 2 && validModels.length <= 3 ) { thinElizacloudPoolWarned = true; diff --git a/tools/prr/resolver-proc.ts b/tools/prr/resolver-proc.ts index 49f79af..9b1d66e 100644 --- a/tools/prr/resolver-proc.ts +++ b/tools/prr/resolver-proc.ts @@ -43,6 +43,7 @@ export { getFullFileForAudit, findUnresolvedIssues, } from './workflow/issue-analysis.js'; +export type { FullFileForAuditResult } from './workflow/issue-analysis.js'; // Startup workflows export { diff --git a/tools/prr/resolver.ts b/tools/prr/resolver.ts index e444304..fbe4fe4 100644 --- a/tools/prr/resolver.ts +++ b/tools/prr/resolver.ts @@ -37,6 +37,7 @@ import * as ResolverProc from './resolver-proc.js'; import * as Performance from './state/state-performance.js'; import { getWiderSnippetForAnalysis } from './workflow/issue-analysis.js'; import { getFullFileContentForSingleIssue } from './workflow/utils.js'; +import { resolveTrackedPathWithPrFiles } from './workflow/helpers/solvability.js'; export class PRResolver { private config: Config; @@ -148,9 +149,40 @@ export class PRResolver { /** Reset model rotation to first model (call at start of each push iteration when pushIteration > 1). WHY: Each push cycle gets best model first instead of retrying the model that may have just 500'd or timed out. */ private resetRotationToFirstModel(): void { const ctx = this.getRotationContext(); Rotation.resetCurrentModelToFirst(ctx, this.stateContext); this.syncRotationContext(ctx); } private async executeBailOut(unresolvedIssues: UnresolvedIssue[], comments: ReviewComment[]): Promise { const result = await ResolverProc.executeBailOut(unresolvedIssues, comments, this.stateContext, this.lessonsContext, this.runners, this.options, (runner) => this.getModelsForRunner(runner), this.workdir, this.llm); this.bailedOut = result.bailedOut; this.exitReason = result.exitReason; this.exitDetails = result.exitDetails; this.finalUnresolvedIssues = result.finalUnresolvedIssues; this.finalComments = result.finalComments; } - private async trySingleIssueFix(issues: UnresolvedIssue[], git: SimpleGit, verifiedThisSession?: Set): Promise { return await ResolverProc.trySingleIssueFix(issues, git, this.workdir, this.runner, this.stateContext, this.lessonsContext, this.llm, verifiedThisSession, (issue, options) => this.buildSingleIssuePrompt(issue, options), () => this.getCurrentModel(), (output) => this.parseNoChangesExplanation(output), (output, maxLength) => this.sanitizeOutputForLog(output, maxLength), this.config.openaiApiKey); } + private async trySingleIssueFix( + issues: UnresolvedIssue[], + git: SimpleGit, + verifiedThisSession?: Set, + comments?: import('./github/types.js').ReviewComment[], + ): Promise { + return await ResolverProc.trySingleIssueFix( + issues, + git, + this.workdir, + this.runner, + this.stateContext, + this.lessonsContext, + this.llm, + verifiedThisSession, + (issue, options) => this.buildSingleIssuePrompt(issue, options), + () => this.getCurrentModel(), + (output) => this.parseNoChangesExplanation(output), + (output, maxLength) => this.sanitizeOutputForLog(output, maxLength), + this.config.openaiApiKey, + comments, + ); + } private async buildSingleIssuePrompt(issue: UnresolvedIssue, options?: { pathExists?: (path: string) => boolean }): Promise { - const primaryPath = issue.resolvedPath ?? issue.comment.path; + const prFiles = this.stateContext.prChangedFilesForRecovery; + const primaryPath = + issue.resolvedPath + ?? resolveTrackedPathWithPrFiles( + this.workdir, + issue.comment.path, + issue.comment.body ?? '', + prFiles, + ) + ?? issue.comment.path; let codeSnippetOverride: string | undefined; if (this.stateContext.state?.widerSnippetRequestedByCommentId?.[issue.comment.id]) { codeSnippetOverride = await getWiderSnippetForAnalysis(this.workdir, primaryPath, issue.comment.line ?? null, issue.comment.body); @@ -165,7 +197,24 @@ export class PRResolver { const lastApplyError = this.stateContext.state?.lastApplyErrorByCommentId?.[issue.comment.id]; return ResolverProc.buildSingleIssuePrompt(issue, this.lessonsContext, this.prInfo, codeSnippetOverride, { pathExists, lastApplyError }); } - private async tryDirectLLMFix(issues: UnresolvedIssue[], git: SimpleGit, verifiedThisSession?: Set): Promise { return await ResolverProc.tryDirectLLMFix(issues, git, this.workdir, this.config.llmProvider, this.llm, this.stateContext, verifiedThisSession, this.lessonsContext); } + private async tryDirectLLMFix( + issues: UnresolvedIssue[], + git: SimpleGit, + verifiedThisSession?: Set, + comments?: ReviewComment[], + ): Promise { + return await ResolverProc.tryDirectLLMFix( + issues, + git, + this.workdir, + this.config.llmProvider, + this.llm, + this.stateContext, + verifiedThisSession, + this.lessonsContext, + comments, + ); + } async gracefulShutdown(): Promise { this.isShuttingDown = await ResolverProc.executeGracefulShutdown(this.isShuttingDown, this.stateContext, () => this.printModelPerformance(), () => this.printFinalSummary()); } isRunning(): boolean { return !this.isShuttingDown; } @@ -197,10 +246,10 @@ export class PRResolver { getCodeSnippet: (path, line, commentBody) => this.getCodeSnippet(path, line, commentBody), printUnresolvedIssues: (issues) => this.printUnresolvedIssues(issues), parseNoChangesExplanation: (output) => this.parseNoChangesExplanation(output), - trySingleIssueFix: (issues, git, verified) => this.trySingleIssueFix(issues, git, verified), + trySingleIssueFix: (issues, git, verified, comments) => this.trySingleIssueFix(issues, git, verified, comments), tryRotation: (failureErrorType?: string) => this.tryRotation(failureErrorType), resetRotationToFirstModel: () => this.resetRotationToFirstModel(), - tryDirectLLMFix: (issues, git, verified) => this.tryDirectLLMFix(issues, git, verified), + tryDirectLLMFix: (issues, git, verified, comments) => this.tryDirectLLMFix(issues, git, verified, comments), executeBailOut: (issues, comments) => this.executeBailOut(issues, comments), onDisableRunner: (name) => this.disabledRunners.add(name), checkForNewBotReviews: (o, r, n, ids, headSha) => this.checkForNewBotReviews(o, r, n, ids, headSha), diff --git a/tools/prr/state/index.ts b/tools/prr/state/index.ts index 4a59b4d..86b8194 100644 --- a/tools/prr/state/index.ts +++ b/tools/prr/state/index.ts @@ -2,6 +2,9 @@ * State management exports - procedural functions */ +export { transitionIssue, type IssueStateTransition } from './state-transitions.js'; +export type { MarkVerifiedOptions } from './state-verification.js'; + // Core export * from './state-context.js'; export * as Core from './state-core.js'; diff --git a/tools/prr/state/manager.ts b/tools/prr/state/manager.ts index ca62837..38fd1dd 100644 --- a/tools/prr/state/manager.ts +++ b/tools/prr/state/manager.ts @@ -19,6 +19,13 @@ import type { ResolverState, Iteration, VerificationResult, TokenUsageRecord, Mo import { createInitialState } from './types.js'; import { loadOverallTimings, getOverallTimings, loadOverallTokenUsage, getOverallTokenUsage, formatNumber } from '../../../shared/logger.js'; import * as Normalize from './lessons-normalize.js'; +import type { StateContext } from './state-context.js'; +import { transitionIssue } from './state-transitions.js'; +import { + applyDismissedIssuesLoadNormalization, + applyResolverStateLoadCoreNormalization, + applyResolverStatePostOverlapCleanup, +} from './state-core.js'; const STATE_FILENAME = '.pr-resolver-state.json'; @@ -49,33 +56,86 @@ export class StateManager { if (this.state.headSha !== headSha) { const prevSha = this.state.headSha?.slice(0, 7); this.state.headSha = headSha; + delete this.state.sessionSkippedModelKeys; + delete this.state.sessionModelStats; + delete this.state.sessionSkippedSinceFixIteration; const hadVerified = (this.state.verifiedFixed?.length ?? 0) + (this.state.verifiedComments?.length ?? 0) > 0; const hadPartial = Object.keys(this.state.partialConflictResolutions ?? {}).length > 0; // Pill #9: Also clear dismissed (especially already-fixed) on head change — stale dismissals can mask regressions const hadDismissed = (this.state.dismissedIssues?.length ?? 0) > 0; if (hadVerified) { + const clearedVerifiedIds = [ + ...new Set([ + ...(this.state.verifiedFixed ?? []), + ...(this.state.verifiedComments ?? []).map((v) => v.commentId), + ]), + ]; + const showN = 25; + const idSample = + clearedVerifiedIds.length === 0 + ? '' + : ` — IDs (${formatNumber(clearedVerifiedIds.length)} total, showing up to ${formatNumber(showN)}): ${clearedVerifiedIds.slice(0, showN).join(', ')}${clearedVerifiedIds.length > showN ? ' …' : ''}`; this.state.verifiedFixed = []; this.state.verifiedComments = []; - console.warn(`PR head changed (${prevSha} → ${headSha.slice(0, 7)}): cleared verified state so fixes are re-checked against current code`); + // Also clear verified/resolved entries in commentStatuses so callers don't see stale + // 'resolved' or 'verified' statuses for comments that are no longer confirmed fixed. + // WHY: Without this, commentStatuses retains 'status: resolved' for IDs that were just + // cleared from verifiedFixed/verifiedComments, producing misleading state maps that show + // a comment as resolved while the verified arrays say otherwise (Pattern H, 2026-04-05). + if (this.state.commentStatuses) { + let statusCleared = 0; + for (const [id, st] of Object.entries(this.state.commentStatuses)) { + if ((st as { status?: string }).status === 'resolved' || (st as { status?: string }).status === 'verified') { + delete this.state.commentStatuses[id]; + statusCleared++; + } + } + if (statusCleared > 0) { + console.warn(`PR head changed: also cleared ${formatNumber(statusCleared)} verified/resolved commentStatuses entries`); + } + } + console.warn( + `PR head changed (${prevSha} → ${headSha.slice(0, 7)}): cleared verified state so fixes are re-checked against current code${idSample}`, + ); } if (hadDismissed) { const clearAllRaw = process.env.PRR_CLEAR_ALL_DISMISSED_ON_HEAD?.trim().toLowerCase(); const clearAll = clearAllRaw === '1' || clearAllRaw === 'true' || clearAllRaw === 'yes' || clearAllRaw === 'on'; if (clearAll) { - const n = this.state.dismissedIssues?.length ?? 0; + const priorDismissed = this.state.dismissedIssues ?? []; + const n = priorDismissed.length; + const showD = 25; + const dismissedIdSample = + n === 0 + ? '' + : ` — comment IDs (showing up to ${formatNumber(showD)}): ${priorDismissed + .slice(0, showD) + .map((d) => d.commentId) + .join(', ')}${n > showD ? ' …' : ''}`; this.state.dismissedIssues = []; console.warn( - `PR head changed (${prevSha} → ${headSha.slice(0, 7)}): cleared ${formatNumber(n)} dismissal(s) — PRR_CLEAR_ALL_DISMISSED_ON_HEAD`, + `PR head changed (${prevSha} → ${headSha.slice(0, 7)}): cleared ${formatNumber(n)} dismissal(s) — PRR_CLEAR_ALL_DISMISSED_ON_HEAD${dismissedIdSample}`, ); } else { - // Clear already-fixed dismissals (most likely to be stale) but keep others (e.g. not-an-issue, stale) - const before = this.state.dismissedIssues?.length ?? 0; - this.state.dismissedIssues = (this.state.dismissedIssues ?? []).filter((d) => d.category !== 'already-fixed'); + // Clear code-/thread-dependent dismissals; keep e.g. not-an-issue, path-unresolved, path-fragment, false-positive. + const prior = this.state.dismissedIssues ?? []; + const before = prior.length; + const dropCategories = new Set(['already-fixed', 'chronic-failure', 'stale']); + const removedRows = prior.filter((d) => dropCategories.has(d.category)); + this.state.dismissedIssues = prior.filter((d) => !dropCategories.has(d.category)); const cleared = before - (this.state.dismissedIssues?.length ?? 0); if (cleared > 0) { + const showD = 25; + const dismissedIdSample = + removedRows.length === 0 + ? '' + : ` — removed comment IDs (showing up to ${formatNumber(showD)}): ${removedRows + .slice(0, showD) + .map((d) => d.commentId) + .join(', ')}${removedRows.length > showD ? ' …' : ''}`; console.warn( - `PR head changed: cleared ${formatNumber(cleared)} already-fixed dismissal(s) so they are re-checked against current code`, + `PR head changed: cleared ${formatNumber(cleared)} already-fixed/chronic-failure/stale dismissal(s) so they are re-checked against current code${dismissedIdSample}`, ); } } @@ -99,29 +159,28 @@ export class StateManager { console.log(`Compacted ${removed} duplicate lessons (${this.state.lessonsLearned.length} unique remaining)`); } - // Deduplicate verifiedFixed on load - if (this.state.verifiedFixed && this.state.verifiedFixed.length > 0) { - const before = this.state.verifiedFixed.length; - this.state.verifiedFixed = [...new Set(this.state.verifiedFixed)]; - const dupsRemoved = before - this.state.verifiedFixed.length; - if (dupsRemoved > 0) { - console.log(`Deduplicated verifiedFixed: removed ${dupsRemoved} duplicate(s) (${this.state.verifiedFixed.length} unique)`); - } - } - - // Load cumulative stats from previous sessions - if (this.state.totalTimings) { - loadOverallTimings(this.state.totalTimings); - } - if (this.state.totalTokenUsage) { - loadOverallTokenUsage(this.state.totalTokenUsage); - } + applyResolverStateLoadCoreNormalization(this.state); // Initialize new fields for backward compatibility if (!this.state.dismissedIssues) { this.state.dismissedIssues = []; } + const { + list: normalizedDismissed, + fragmentNormalized, + dedupeRemoved: dismissedDupes, + } = applyDismissedIssuesLoadNormalization(this.state.dismissedIssues); + this.state.dismissedIssues = normalizedDismissed; + if (fragmentNormalized > 0) { + console.log(`Normalized ${formatNumber(fragmentNormalized)} legacy fragment dismissal(s) to path-fragment`); + } + if (dismissedDupes > 0) { + console.log( + `Deduplicated dismissedIssues: removed ${formatNumber(dismissedDupes)} duplicate row(s) for the same comment id (kept latest dismissedAt / canonical path category)`, + ); + } + // Keep verifiedFixed and dismissedIssues mutually exclusive (pill #3; output.log audit). const verifiedAll = new Set([ ...(this.state.verifiedFixed ?? []), @@ -129,35 +188,48 @@ export class StateManager { ]); const dismissedIds = new Set((this.state.dismissedIssues ?? []).map((d) => d.commentId)); if (verifiedAll.size > 0 && (this.state.dismissedIssues?.length ?? 0) > 0) { + const overlapDismissed = this.state.dismissedIssues!.filter((d) => verifiedAll.has(d.commentId)); const beforeD = this.state.dismissedIssues!.length; this.state.dismissedIssues = this.state.dismissedIssues!.filter((d) => !verifiedAll.has(d.commentId)); const removedD = beforeD - this.state.dismissedIssues.length; if (removedD > 0) { + const ids = overlapDismissed.map((d) => d.commentId); + const show = ids.slice(0, 15).join(', '); + const more = ids.length > 15 ? ` …(+${formatNumber(ids.length - 15)} more)` : ''; console.log( - `Cleaned ${formatNumber(removedD)} overlap (removed from dismissed; already in verified)`, + `Cleaned ${formatNumber(removedD)} overlap (removed from dismissed; already in verified) — comment id(s): ${show}${more}`, ); } } if (dismissedIds.size > 0 && this.state.verifiedFixed?.length) { + const removedIds = this.state.verifiedFixed.filter((id) => dismissedIds.has(id)); const before = this.state.verifiedFixed.length; this.state.verifiedFixed = this.state.verifiedFixed.filter((id) => !dismissedIds.has(id)); const removed = before - this.state.verifiedFixed.length; if (removed > 0) { + const show = removedIds.slice(0, 15).join(', '); + const more = removedIds.length > 15 ? ` …(+${formatNumber(removedIds.length - 15)} more)` : ''; console.warn( - `State load: removed ${formatNumber(removed)} ID(s) from verifiedFixed (already in dismissed — overlap cleaned)`, + `State load: removed ${formatNumber(removed)} ID(s) from verifiedFixed (already in dismissed — overlap cleaned): ${show}${more}`, ); } } if (dismissedIds.size > 0 && this.state.verifiedComments?.length) { + const removedVcRows = this.state.verifiedComments.filter((v) => dismissedIds.has(v.commentId)); const beforeVc = this.state.verifiedComments.length; this.state.verifiedComments = this.state.verifiedComments.filter((v) => !dismissedIds.has(v.commentId)); const removedVc = beforeVc - this.state.verifiedComments.length; if (removedVc > 0) { + const ids = removedVcRows.map((v) => v.commentId); + const show = ids.slice(0, 15).join(', '); + const more = ids.length > 15 ? ` …(+${formatNumber(ids.length - 15)} more)` : ''; console.warn( - `State load: removed ${formatNumber(removedVc)} verifiedComments record(s) (already in dismissed — overlap cleaned)`, + `State load: removed ${formatNumber(removedVc)} verifiedComments record(s) (already in dismissed — overlap cleaned): ${show}${more}`, ); } } + + applyResolverStatePostOverlapCleanup(this.state); } } catch (error) { console.warn('Failed to load state file, creating new state:', error); @@ -174,6 +246,15 @@ export class StateManager { this.currentPhase = phase; } + /** Minimal {@link StateContext} for shared transition helpers (no session Set). */ + private toStateContext(): StateContext { + return { + statePath: this.statePath, + state: this.state, + currentPhase: this.currentPhase, + }; + } + async markInterrupted(): Promise { if (!this.state) return; @@ -260,24 +341,9 @@ export class StateManager { if (!this.state) { throw new Error('State not loaded. Call load() first.'); } - - // Update legacy array for backwards compatibility - if (!this.state.verifiedFixed.includes(commentId)) { - this.state.verifiedFixed.push(commentId); - } - - // Update new detailed records - if (!this.state.verifiedComments) { - this.state.verifiedComments = []; - } - - // Remove existing record if any (we'll add a fresh one) - this.state.verifiedComments = this.state.verifiedComments.filter(v => v.commentId !== commentId); - - this.state.verifiedComments.push({ - commentId, - verifiedAt: new Date().toISOString(), - verifiedAtIteration: this.state.iterations.length, + transitionIssue(this.toStateContext(), commentId, { + kind: 'verified', + forceVerificationRefresh: true, }); } @@ -285,17 +351,7 @@ export class StateManager { if (!this.state) { throw new Error('State not loaded. Call load() first.'); } - - // Remove from legacy array - const index = this.state.verifiedFixed.indexOf(commentId); - if (index !== -1) { - this.state.verifiedFixed.splice(index, 1); - } - - // Remove from new detailed records - if (this.state.verifiedComments) { - this.state.verifiedComments = this.state.verifiedComments.filter(v => v.commentId !== commentId); - } + transitionIssue(this.toStateContext(), commentId, { kind: 'unverified' }); } /** @@ -330,7 +386,20 @@ export class StateManager { addDismissedIssue( commentId: string, reason: string, - category: 'already-fixed' | 'not-an-issue' | 'file-unchanged' | 'false-positive' | 'duplicate' | 'stale' | 'exhausted' | 'remaining' | 'chronic-failure' | 'missing-file' | 'path-unresolved', + category: + | 'already-fixed' + | 'not-an-issue' + | 'file-unchanged' + | 'false-positive' + | 'duplicate' + | 'stale' + | 'exhausted' + | 'remaining' + | 'chronic-failure' + | 'missing-file' + | 'path-unresolved' + | 'path-fragment' + | 'out-of-scope', filePath: string, line: number | null, commentBody: string @@ -338,23 +407,14 @@ export class StateManager { if (!this.state) { throw new Error('State not loaded. Call load() first.'); } - - if (!this.state.dismissedIssues) { - this.state.dismissedIssues = []; - } - - // Remove existing record if any (we'll add a fresh one) - this.state.dismissedIssues = this.state.dismissedIssues.filter(d => d.commentId !== commentId); - - this.state.dismissedIssues.push({ - commentId, + transitionIssue(this.toStateContext(), commentId, { + kind: 'dismissed', reason, - dismissedAt: new Date().toISOString(), - dismissedAtIteration: this.state.iterations.length, category, filePath, line, commentBody, + replaceExistingDismissal: true, }); } diff --git a/tools/prr/state/state-context.ts b/tools/prr/state/state-context.ts index fd414d7..2926fe4 100644 --- a/tools/prr/state/state-context.ts +++ b/tools/prr/state/state-context.ts @@ -13,10 +13,19 @@ export interface AggregatedTokenUsage { output_tokens: number; } -/** In-memory only (not persisted in .pr-resolver-state.json): session-level model skip after repeated failures. */ +/** + * Session-level model skip after repeated failures. **Mostly persisted** in + * `.pr-resolver-state.json` (`sessionSkippedModelKeys` / `sessionModelStats`) so restarts skip bad + * models without re-burning budget — opt out with **`PRR_PERSIST_SESSION_MODEL_SKIP=0`**. + */ export interface RotationSessionTracking { skippedModelKeys: Set; modelStats: Map; + /** + * Fix iteration (1-based) when each key was added to `skippedModelKeys`. + * WHY: Per-key retry — remove from skip after N iterations for that key only (vs clearing all skips). + */ + sessionSkippedSinceFixIteration: Map; } export interface StateContext { @@ -54,6 +63,23 @@ export interface StateContext { diminishingReturnsZeroVerifyStreak?: number; /** Ephemeral: already logged one diminishing-returns warning this run. */ diminishingReturnsWarned?: boolean; + /** + * Repo-relative paths in the blast-radius set (changed files + graph BFS + proximity), normalized with `/`. + * **WHY:** Fixer batch allowlist stays full; prompt injection is intersected with this set to save context. + * Undefined when blast radius was not built this analysis (disabled, failure, or cache without field). + */ + blastRadiusPaths?: Set; + /** + * Ephemeral: `git diff --name-only` vs PR base for this push iteration (from main-loop-setup). + * WHY: Basename-only API paths (e.g. `auto-optimizer.ts`) need `resolveTrackedPathWithPrFiles` in + * recovery and single-issue prompts when `issue.resolvedPath` is missing — same disambiguation as analysis. + */ + prChangedFilesForRecovery?: string[]; + /** + * Ephemeral: LLM dedup cluster map for this push iteration (from issue analysis). + * WHY: Recovery / single-issue paths must mark the full duplicate cluster verified, not only the queued id. + */ + duplicateMapForSession?: Map; } export function createStateContext(workdir: string): StateContext { @@ -69,11 +95,72 @@ export function createStateContext(workdir: string): StateContext { export function ensureRotationSession(ctx: StateContext): RotationSessionTracking { if (!ctx.rotationSession) { - ctx.rotationSession = { skippedModelKeys: new Set(), modelStats: new Map() }; + ctx.rotationSession = { + skippedModelKeys: new Set(), + modelStats: new Map(), + sessionSkippedSinceFixIteration: new Map(), + }; + } + if (!ctx.rotationSession.sessionSkippedSinceFixIteration) { + ctx.rotationSession.sessionSkippedSinceFixIteration = new Map(); } return ctx.rotationSession; } +/** Restore session skip + stats from persisted state after `loadState` (pill-output). */ +export function hydrateRotationSessionFromPersistedState(ctx: StateContext): void { + if (!ctx.state) return; + if (process.env.PRR_PERSIST_SESSION_MODEL_SKIP?.trim() === '0') return; + const s = ctx.state; + const keys = s.sessionSkippedModelKeys; + const stats = s.sessionModelStats; + const since = s.sessionSkippedSinceFixIteration; + const hasKeys = keys && keys.length > 0; + const hasStats = stats && Object.keys(stats).length > 0; + const hasSince = since && Object.keys(since).length > 0; + if (!hasKeys && !hasStats && !hasSince) return; + + const rs = ensureRotationSession(ctx); + for (const k of keys ?? []) rs.skippedModelKeys.add(k); + if (stats) { + for (const [k, v] of Object.entries(stats)) { + rs.modelStats.set(k, { fixes: v.fixes, failures: v.failures }); + } + } + if (since) { + for (const [k, v] of Object.entries(since)) { + rs.sessionSkippedSinceFixIteration.set(k, Number(v)); + } + } +} + +/** Write session skip sets into `ctx.state` before JSON save. */ +export function persistRotationSessionToState(ctx: StateContext): void { + if (!ctx.state || process.env.PRR_PERSIST_SESSION_MODEL_SKIP?.trim() === '0') return; + if (!ctx.rotationSession) { + delete ctx.state.sessionSkippedModelKeys; + delete ctx.state.sessionModelStats; + delete ctx.state.sessionSkippedSinceFixIteration; + return; + } + const rs = ctx.rotationSession; + if (rs.skippedModelKeys.size === 0 && rs.modelStats.size === 0) { + delete ctx.state.sessionSkippedModelKeys; + delete ctx.state.sessionModelStats; + delete ctx.state.sessionSkippedSinceFixIteration; + return; + } + ctx.state.sessionSkippedModelKeys = [...rs.skippedModelKeys]; + ctx.state.sessionModelStats = Object.fromEntries( + [...rs.modelStats.entries()].map(([k, v]) => [k, { fixes: v.fixes, failures: v.failures }]), + ); + if (rs.sessionSkippedSinceFixIteration.size > 0) { + ctx.state.sessionSkippedSinceFixIteration = Object.fromEntries(rs.sessionSkippedSinceFixIteration); + } else { + delete ctx.state.sessionSkippedSinceFixIteration; + } +} + export function getState(ctx: StateContext): ResolverState { if (!ctx.state) { throw new Error('State not loaded. Call load() first.'); diff --git a/tools/prr/state/state-core.ts b/tools/prr/state/state-core.ts index bd38a58..d364328 100644 --- a/tools/prr/state/state-core.ts +++ b/tools/prr/state/state-core.ts @@ -4,12 +4,159 @@ import { readFile, writeFile, mkdir } from 'fs/promises'; import { existsSync } from 'fs'; import { dirname } from 'path'; -import type { ResolverState } from './types.js'; +import type { DismissedIssue, ResolverState } from './types.js'; import { createInitialState } from './types.js'; import { loadOverallTimings, getOverallTimings, loadOverallTokenUsage, getOverallTokenUsage, formatNumber } from '../../../shared/logger.js'; import { getEffectiveElizacloudSkipModelIds } from '../../../shared/constants.js'; import { isReviewPathFragment } from '../../../shared/path-utils.js'; -import type { StateContext } from './state-context.js'; +import { + type StateContext, + hydrateRotationSessionFromPersistedState, + persistRotationSessionToState, +} from './state-context.js'; + +/** Prefer canonical path categories when timestamps tie (pill-output #539). */ +function dismissalCategoryRank(c: DismissedIssue['category']): number { + if (c === 'path-fragment') return 0; + if (c === 'path-unresolved') return 1; + if (c === 'missing-file') return 2; + return 3; +} + +/** + * Collapse duplicate rows for the same comment id (hand-edited or legacy state). + * Keeps the row with the latest dismissedAt; on tie, prefers path-fragment > path-unresolved > missing-file. + * Preserves first-seen order of unique ids. + */ +export function dedupeDismissedIssuesByCommentId(issues: DismissedIssue[]): { + merged: DismissedIssue[]; + removedCount: number; +} { + if (issues.length <= 1) { + return { merged: issues, removedCount: 0 }; + } + const firstIndex = new Map(); + const best = new Map(); + for (let i = 0; i < issues.length; i++) { + const d = issues[i]!; + if (!firstIndex.has(d.commentId)) firstIndex.set(d.commentId, i); + const prev = best.get(d.commentId); + if (!prev) { + best.set(d.commentId, d); + continue; + } + const at = (d.dismissedAt ?? '') > (prev.dismissedAt ?? ''); + const bt = (prev.dismissedAt ?? '') > (d.dismissedAt ?? ''); + let pick: DismissedIssue; + if (at && !bt) pick = d; + else if (bt && !at) pick = prev; + else { + const ra = dismissalCategoryRank(d.category); + const rb = dismissalCategoryRank(prev.category); + pick = ra < rb ? d : ra > rb ? prev : d; + } + best.set(d.commentId, pick); + } + const orderedIds = [...firstIndex.entries()].sort((a, b) => a[1] - b[1]).map(([id]) => id); + const merged = orderedIds.map((id) => best.get(id)!); + return { merged, removedCount: issues.length - merged.length }; +} + +/** + * Fragment category migration + duplicate row collapse for persisted dismissals. + * Mutates row objects in place for fragment fields; returns a new array from dedupe. + * **WHY:** Shared by {@link loadState} and legacy {@link StateManager.load} (pill-output). + */ +export function applyDismissedIssuesLoadNormalization(issues: DismissedIssue[]): { + list: DismissedIssue[]; + fragmentNormalized: number; + dedupeRemoved: number; +} { + let fragmentNormalized = 0; + for (const d of issues) { + if (!isReviewPathFragment(d.filePath)) continue; + if (d.category === 'missing-file' || d.category === 'path-unresolved') { + d.category = 'path-fragment'; + if (d.reason?.includes('Tracked file not found')) { + d.reason = `Review path "${d.filePath}" is a fragment or incomplete path — cannot resolve to a single tracked file`; + } + fragmentNormalized++; + } + } + const { merged, removedCount } = dedupeDismissedIssuesByCommentId(issues); + return { list: merged, fragmentNormalized, dedupeRemoved: removedCount }; +} + +/** + * Verified-array dedupe, no-progress reset, and timing hydration — shared by {@link loadState} + * and {@link StateManager.load} (pill-output StateManager parity). + */ +export function applyResolverStateLoadCoreNormalization(state: ResolverState): void { + if (state.verifiedFixed && state.verifiedFixed.length > 0) { + const before = state.verifiedFixed.length; + state.verifiedFixed = [...new Set(state.verifiedFixed)]; + const dupsRemoved = before - state.verifiedFixed.length; + if (dupsRemoved > 0) { + console.log( + `Deduplicated verifiedFixed: removed ${formatNumber(dupsRemoved)} duplicate(s) (${formatNumber(state.verifiedFixed.length)} unique)`, + ); + } + } + + if (state.verifiedComments && state.verifiedComments.length > 0) { + const seen = new Map(); + for (const vc of state.verifiedComments) { + const existing = seen.get(vc.commentId); + if (!existing || (vc.verifiedAt && (!existing.verifiedAt || vc.verifiedAt > existing.verifiedAt))) { + seen.set(vc.commentId, vc); + } + } + const beforeNew = state.verifiedComments.length; + state.verifiedComments = [...seen.values()]; + const dupsRemovedNew = beforeNew - state.verifiedComments.length; + if (dupsRemovedNew > 0) { + console.log(`Deduplicated verifiedComments: removed ${formatNumber(dupsRemovedNew)} duplicate(s)`); + } + } + + if (state.noProgressCycles) { + state.noProgressCycles = 0; + } + + if (state.totalTimings) { + loadOverallTimings(state.totalTimings); + } + if (state.totalTokenUsage) { + loadOverallTokenUsage(state.totalTokenUsage); + } +} + +/** + * Ephemeral git-recovery markers and stale skip-list stats — after dismissed/verified overlap cleanup. + */ +export function applyResolverStatePostOverlapCleanup(state: ResolverState): void { + if (state.recoveredFromGitCommentIds !== undefined) { + state.recoveredFromGitCommentIds = undefined; + } + + if (state.modelPerformance) { + const skipIds = getEffectiveElizacloudSkipModelIds(); + if (skipIds.length > 0) { + const skipSet = new Set(skipIds); + let removed = 0; + for (const key of Object.keys(state.modelPerformance)) { + const modelId = key.includes('/') ? key.split('/').slice(1).join('/') : key; + if (skipSet.has(modelId)) { + delete state.modelPerformance[key]; + removed++; + } + } + if (removed > 0) { + console.log(`Cleared ${formatNumber(removed)} model performance entries for skipped models`); + } + } + } +} export async function loadState(ctx: StateContext, pr: string, branch: string, headSha: string): Promise { if (existsSync(ctx.statePath)) { @@ -24,6 +171,9 @@ export async function loadState(ctx: StateContext, pr: string, branch: string, h if (ctx.state.headSha !== headSha) { const prevSha = ctx.state.headSha?.slice(0, 7); ctx.state.headSha = headSha; + delete ctx.state.sessionSkippedModelKeys; + delete ctx.state.sessionModelStats; + delete ctx.state.sessionSkippedSinceFixIteration; const hadVerified = (ctx.state.verifiedFixed?.length ?? 0) + (ctx.state.verifiedComments?.length ?? 0) > 0; const hadPartial = @@ -77,69 +227,25 @@ export async function loadState(ctx: StateContext, pr: string, branch: string, h console.log(`Compacted ${removed} duplicate lessons (${ctx.state.lessonsLearned.length} unique remaining)`); } - // Deduplicate verifiedFixed on load. - // WHY: Prior sessions and git-commit-scan can accumulate duplicate IDs, - // inflating the verified count beyond the total number of comments. - if (ctx.state.verifiedFixed && ctx.state.verifiedFixed.length > 0) { - const before = ctx.state.verifiedFixed.length; - ctx.state.verifiedFixed = [...new Set(ctx.state.verifiedFixed)]; - const dupsRemoved = before - ctx.state.verifiedFixed.length; - if (dupsRemoved > 0) { - console.log(`Deduplicated verifiedFixed: removed ${dupsRemoved} duplicate(s) (${ctx.state.verifiedFixed.length} unique)`); - } - } - - // Also deduplicate verifiedComments by commentId, keeping the latest entry - if (ctx.state.verifiedComments && ctx.state.verifiedComments.length > 0) { - const seen = new Map(); - for (const vc of ctx.state.verifiedComments) { - const existing = seen.get(vc.commentId); - if (!existing || (vc.verifiedAt && (!existing.verifiedAt || vc.verifiedAt > existing.verifiedAt))) { - seen.set(vc.commentId, vc); - } - } - const beforeNew = ctx.state.verifiedComments.length; - ctx.state.verifiedComments = [...seen.values()]; - const dupsRemovedNew = beforeNew - ctx.state.verifiedComments.length; - if (dupsRemovedNew > 0) { - console.log(`Deduplicated verifiedComments: removed ${dupsRemovedNew} duplicate(s)`); - } - } - - // Reset no-progress cycle counter at session start. - // WHY: This counter is for detecting stalemate within a session's rotation. - // Carrying over 43 from a previous run makes the bail-out message misleading - // ("44 cycles") and gives no useful signal. Historical bail-out data is - // preserved in bailOutRecord anyway. - if (ctx.state.noProgressCycles) { - ctx.state.noProgressCycles = 0; - } - - if (ctx.state.totalTimings) { - loadOverallTimings(ctx.state.totalTimings); - } - if (ctx.state.totalTokenUsage) { - loadOverallTokenUsage(ctx.state.totalTokenUsage); - } + applyResolverStateLoadCoreNormalization(ctx.state); if (!ctx.state.dismissedIssues) { ctx.state.dismissedIssues = []; } - // Normalize legacy dismissals: fragment / extension-only paths were sometimes "missing-file"; - // canonical category is path-unresolved (shared/path-utils isReviewPathFragment). - let normalizedFragment = 0; - for (const d of ctx.state.dismissedIssues) { - if (d.category === 'missing-file' && isReviewPathFragment(d.filePath)) { - d.category = 'path-unresolved'; - if (d.reason?.includes('Tracked file not found')) { - d.reason = `Review path "${d.filePath}" is a fragment or incomplete path — cannot resolve to a single tracked file`; - } - normalizedFragment++; - } + const { + list: normalizedDismissed, + fragmentNormalized, + dedupeRemoved: dismissedDupes, + } = applyDismissedIssuesLoadNormalization(ctx.state.dismissedIssues); + ctx.state.dismissedIssues = normalizedDismissed; + if (fragmentNormalized > 0) { + console.log(`Normalized ${formatNumber(fragmentNormalized)} legacy fragment dismissal(s) to path-fragment`); } - if (normalizedFragment > 0) { - console.log(`Normalized ${formatNumber(normalizedFragment)} legacy fragment dismissal(s) to path-unresolved`); + if (dismissedDupes > 0) { + console.log( + `Deduplicated dismissedIssues: removed ${formatNumber(dismissedDupes)} duplicate row(s) for the same comment id (kept latest dismissedAt / canonical path category)`, + ); } // Keep verifiedFixed and dismissedIssues mutually exclusive (output.log audit: overlapVerifiedAndDismissed; pill #3). @@ -150,55 +256,48 @@ export async function loadState(ctx: StateContext, pr: string, branch: string, h ]); const dismissedIds = new Set(ctx.state.dismissedIssues.map((d) => d.commentId)); if (verifiedSet.size > 0 && ctx.state.dismissedIssues.length > 0) { + const overlapDismissed = ctx.state.dismissedIssues.filter((d) => verifiedSet.has(d.commentId)); const beforeD = ctx.state.dismissedIssues.length; ctx.state.dismissedIssues = ctx.state.dismissedIssues.filter((d) => !verifiedSet.has(d.commentId)); const removedD = beforeD - ctx.state.dismissedIssues.length; if (removedD > 0) { - console.log(`Cleaned ${formatNumber(removedD)} overlap (removed from dismissed; already in verified)`); + const ids = overlapDismissed.map((d) => d.commentId); + const show = ids.slice(0, 15).join(', '); + const more = ids.length > 15 ? ` …(+${formatNumber(ids.length - 15)} more)` : ''; + console.log( + `Cleaned ${formatNumber(removedD)} overlap (removed from dismissed; already in verified) — comment id(s): ${show}${more}`, + ); } } if (dismissedIds.size > 0 && ctx.state.verifiedFixed?.length) { + const removedIds = ctx.state.verifiedFixed.filter((id) => dismissedIds.has(id)); const beforeV = ctx.state.verifiedFixed.length; ctx.state.verifiedFixed = ctx.state.verifiedFixed.filter((id) => !dismissedIds.has(id)); const removedV = beforeV - ctx.state.verifiedFixed.length; if (removedV > 0) { - console.warn(`State load: removed ${formatNumber(removedV)} ID(s) from verifiedFixed (already in dismissed — overlap cleaned)`); + const show = removedIds.slice(0, 15).join(', '); + const more = removedIds.length > 15 ? ` …(+${formatNumber(removedIds.length - 15)} more)` : ''; + console.warn( + `State load: removed ${formatNumber(removedV)} ID(s) from verifiedFixed (already in dismissed — overlap cleaned): ${show}${more}`, + ); } } if (dismissedIds.size > 0 && ctx.state.verifiedComments?.length) { + const removedVcRows = ctx.state.verifiedComments.filter((v) => dismissedIds.has(v.commentId)); const beforeVc = ctx.state.verifiedComments.length; ctx.state.verifiedComments = ctx.state.verifiedComments.filter((v) => !dismissedIds.has(v.commentId)); const removedVc = beforeVc - ctx.state.verifiedComments.length; if (removedVc > 0) { + const ids = removedVcRows.map((v) => v.commentId); + const show = ids.slice(0, 15).join(', '); + const more = ids.length > 15 ? ` …(+${formatNumber(ids.length - 15)} more)` : ''; console.warn( - `State load: removed ${formatNumber(removedVc)} verifiedComments record(s) (already in dismissed — overlap cleaned)`, + `State load: removed ${formatNumber(removedVc)} verifiedComments record(s) (already in dismissed — overlap cleaned): ${show}${more}`, ); } } - // Never carry recoveredFromGitCommentIds across runs — it's only for the first analysis after recovery. - if (ctx.state.recoveredFromGitCommentIds !== undefined) { - ctx.state.recoveredFromGitCommentIds = undefined; - } - - // Zero out model performance for skipped models so stale 0%-success data doesn't persist. - if (ctx.state.modelPerformance) { - const skipIds = getEffectiveElizacloudSkipModelIds(); - if (skipIds.length > 0) { - const skipSet = new Set(skipIds); - let removed = 0; - for (const key of Object.keys(ctx.state.modelPerformance)) { - const modelId = key.includes('/') ? key.split('/').slice(1).join('/') : key; - if (skipSet.has(modelId)) { - delete ctx.state.modelPerformance[key]; - removed++; - } - } - if (removed > 0) { - console.log(`Cleared ${formatNumber(removed)} model performance entries for skipped models`); - } - } - } + applyResolverStatePostOverlapCleanup(ctx.state); } } catch (error) { console.warn('Failed to load state file, creating new state:', error); @@ -208,6 +307,10 @@ export async function loadState(ctx: StateContext, pr: string, branch: string, h ctx.state = createInitialState(pr, branch, headSha); } + if (ctx.state) { + hydrateRotationSessionFromPersistedState(ctx); + } + return ctx.state; } @@ -253,6 +356,7 @@ export async function saveState(ctx: StateContext): Promise { await mkdir(dir, { recursive: true }); } + persistRotationSessionToState(ctx); await writeFile(ctx.statePath, JSON.stringify(ctx.state, null, 2), 'utf-8'); } diff --git a/tools/prr/state/state-dismissed.ts b/tools/prr/state/state-dismissed.ts index 1392142..aa17787 100644 --- a/tools/prr/state/state-dismissed.ts +++ b/tools/prr/state/state-dismissed.ts @@ -14,14 +14,12 @@ * The distinction matters for reporting: dismissed issues need human attention, * verified ones don't. * - * WHY commentStatuses sync hooks: dismissIssue() flips commentStatuses to - * "resolved" and undismissIssue() deletes the entry. Without this, the - * analysis pass would see a stale "open" status and re-analyze a dismissed - * comment, potentially un-dismissing it. See state-comment-status.ts. + * {@link dismissIssue} delegates to {@link transitionIssue} so verified arrays + * and commentStatuses stay consistent. */ import type { StateContext } from './state-context.js'; -import { getState } from './state-context.js'; import type { DismissedIssue } from './types.js'; +import { transitionIssue } from './state-transitions.js'; /** * Dismiss a comment — record that it doesn't need fixing, with a reason. @@ -45,51 +43,15 @@ export function dismissIssue( commentBody: string, remediationHint?: string ): void { - const state = getState(ctx); - - if (!state.dismissedIssues) { - state.dismissedIssues = []; - } - - const currentIteration = state.iterations.length; - // Pill cycle 2 #9: Enforce mutual exclusivity at write time — when we dismiss, remove from verified. - if (state.verifiedFixed?.length) { - state.verifiedFixed = state.verifiedFixed.filter((id) => id !== commentId); - } - // Also remove from verifiedComments array (not just legacy verifiedFixed) - if (state.verifiedComments?.length) { - const index = state.verifiedComments.findIndex(v => v.commentId === commentId); - if (index !== -1) { - state.verifiedComments.splice(index, 1); - } - } - const existing = state.dismissedIssues.find(d => d.commentId === commentId); - if (!existing) { - const entry: DismissedIssue = { - commentId, - reason, - dismissedAt: new Date().toISOString(), - dismissedAtIteration: currentIteration, - category, - filePath, - line, - commentBody, - }; - if (remediationHint !== undefined) entry.remediationHint = remediationHint; - state.dismissedIssues.push(entry); - } - - // Sync commentStatuses: flip to resolved and persist dismiss category - if (state.commentStatuses?.[commentId]) { - state.commentStatuses[commentId] = { - ...state.commentStatuses[commentId], - status: 'resolved', - classification: 'stale', - dismissCategory: category, - updatedAt: new Date().toISOString(), - updatedAtIteration: currentIteration, - }; - } + transitionIssue(ctx, commentId, { + kind: 'dismissed', + reason, + category, + filePath, + line, + commentBody, + remediationHint, + }); } /** @@ -100,21 +62,7 @@ export function dismissIssue( * force a clean slate. */ export function undismissIssue(ctx: StateContext, commentId: string): void { - const state = getState(ctx); - - if (!state.dismissedIssues) { - state.dismissedIssues = []; - } - - const index = state.dismissedIssues.findIndex(d => d.commentId === commentId); - if (index !== -1) { - state.dismissedIssues.splice(index, 1); - } - - // Delete commentStatuses entry so the comment gets re-analyzed - if (state.commentStatuses?.[commentId]) { - delete state.commentStatuses[commentId]; - } + transitionIssue(ctx, commentId, { kind: 'undismissed' }); } export function getDismissedIssues(ctx: StateContext): DismissedIssue[] { @@ -126,13 +74,13 @@ export function isCommentDismissed(ctx: StateContext, commentId: string): boolea if (!state?.dismissedIssues) { return false; } - - return state.dismissedIssues.some(d => d.commentId === commentId); + + return state.dismissedIssues.some((d) => d.commentId === commentId); } /** Get the dismissed issue entry for a comment, if any. Used to preserve category/reason on re-dismiss. */ export function getDismissedIssue(ctx: StateContext, commentId: string): DismissedIssue | undefined { const state = ctx.state; if (!state?.dismissedIssues) return undefined; - return state.dismissedIssues.find(d => d.commentId === commentId); + return state.dismissedIssues.find((d) => d.commentId === commentId); } diff --git a/tools/prr/state/state-transitions.ts b/tools/prr/state/state-transitions.ts new file mode 100644 index 0000000..ba4622e --- /dev/null +++ b/tools/prr/state/state-transitions.ts @@ -0,0 +1,220 @@ +/** + * Single write path for verified / dismissed / unverified comment state. + * + * WHY: Multiple APIs (markVerified, dismissIssue, unmarkVerified, legacy StateManager) + * used to duplicate array surgery and sometimes skipped verifiedThisSession or + * commentStatuses sync (audit cycles 41, 51, 64). All transitions go through + * {@link transitionIssue} so mutual exclusion, session set, commentStatuses, + * and apply-failure cleanup stay consistent. + */ +import type { StateContext } from './state-context.js'; +import { getState } from './state-context.js'; +import type { DismissedIssue } from './types.js'; +import { debug } from '../../../shared/logger.js'; + +/** Discriminated transitions applied by {@link transitionIssue}. */ +export type IssueStateTransition = + | { + kind: 'verified'; + autoVerifiedFrom?: string; + /** When true, do not add to {@link StateContext.verifiedThisSession} (e.g. git recovery of old `prr-fix:` commits). */ + skipSessionTracking?: boolean; + /** When true, refresh timestamps even in the same iteration (legacy {@link StateManager.markCommentVerifiedFixed}). */ + forceVerificationRefresh?: boolean; + } + | { + kind: 'dismissed'; + reason: string; + category: DismissedIssue['category']; + filePath: string; + line: number | null; + commentBody: string; + remediationHint?: string; + /** + * When true, remove any existing dismissed row for this comment before adding. + * WHY: {@link StateManager.addDismissedIssue} replaces the record; {@link dismissIssue} is idempotent (skip push if already dismissed). + */ + replaceExistingDismissal?: boolean; + } + | { kind: 'unverified' } + | { kind: 'undismissed' }; + +function clearApplyFailureState(state: ReturnType, commentId: string): void { + if (state.lastApplyErrorByCommentId?.[commentId] !== undefined) { + delete state.lastApplyErrorByCommentId[commentId]; + } + if (state.applyFailureCountByCommentId?.[commentId] !== undefined) { + delete state.applyFailureCountByCommentId[commentId]; + } +} + +function removeFromVerifiedArrays(state: ReturnType, ctx: StateContext, commentId: string): void { + if (!state.verifiedComments) { + state.verifiedComments = []; + } + const vIndex = state.verifiedComments.findIndex((v) => v.commentId === commentId); + if (vIndex !== -1) { + state.verifiedComments.splice(vIndex, 1); + } + const legacyIndex = (state.verifiedFixed ?? []).indexOf(commentId); + if (legacyIndex !== -1) { + (state.verifiedFixed ??= []).splice(legacyIndex, 1); + } + if (state.commentStatuses?.[commentId]) { + delete state.commentStatuses[commentId]; + } + ctx.verifiedThisSession?.delete(commentId); +} + +/** + * Apply a single comment lifecycle transition (verified, dismissed, or unverified). + * Callers should use {@link markVerified}, {@link dismissIssue}, {@link unmarkVerified} unless testing this layer. + */ +export function transitionIssue(ctx: StateContext, commentId: string, tr: IssueStateTransition): void { + const state = getState(ctx); + + switch (tr.kind) { + case 'unverified': { + removeFromVerifiedArrays(state, ctx, commentId); + debug('transitionIssue: unverified', { commentId, remainingVerified: (state.verifiedFixed ?? []).length }); + return; + } + + case 'undismissed': { + if (!state.dismissedIssues?.length) { + return; + } + const uIndex = state.dismissedIssues.findIndex((d) => d.commentId === commentId); + if (uIndex !== -1) { + state.dismissedIssues.splice(uIndex, 1); + } + if (state.commentStatuses?.[commentId]) { + delete state.commentStatuses[commentId]; + } + return; + } + + case 'dismissed': { + if (!state.dismissedIssues) { + state.dismissedIssues = []; + } + const currentIteration = state.iterations.length; + + if (state.verifiedFixed?.length) { + state.verifiedFixed = state.verifiedFixed.filter((id) => id !== commentId); + } + if (state.verifiedComments?.length) { + const index = state.verifiedComments.findIndex((v) => v.commentId === commentId); + if (index !== -1) { + state.verifiedComments.splice(index, 1); + } + } + + if (tr.replaceExistingDismissal && state.dismissedIssues.length > 0) { + state.dismissedIssues = state.dismissedIssues.filter((d) => d.commentId !== commentId); + } + + const existing = state.dismissedIssues.find((d) => d.commentId === commentId); + if (!existing) { + const entry: DismissedIssue = { + commentId, + reason: tr.reason, + dismissedAt: new Date().toISOString(), + dismissedAtIteration: currentIteration, + category: tr.category, + filePath: tr.filePath, + line: tr.line, + commentBody: tr.commentBody, + }; + if (tr.remediationHint !== undefined) entry.remediationHint = tr.remediationHint; + state.dismissedIssues.push(entry); + } + + if (state.commentStatuses?.[commentId]) { + state.commentStatuses[commentId] = { + ...state.commentStatuses[commentId], + status: 'resolved', + classification: 'stale', + dismissCategory: tr.category, + updatedAt: new Date().toISOString(), + updatedAtIteration: currentIteration, + }; + } + ctx.verifiedThisSession?.delete(commentId); + return; + } + + case 'verified': { + if (!state.verifiedComments) { + state.verifiedComments = []; + } + + const currentIteration = state.iterations.length; + const existing = state.verifiedComments.find((v) => v.commentId === commentId); + + if (existing) { + const hadDismissed = state.dismissedIssues?.some((d) => d.commentId === commentId) ?? false; + const sameIteration = existing.verifiedAtIteration === currentIteration; + const fromCompatible = + tr.autoVerifiedFrom === undefined || tr.autoVerifiedFrom === existing.autoVerifiedFrom; + if (!tr.forceVerificationRefresh && sameIteration && fromCompatible && !hadDismissed) { + return; + } + existing.verifiedAt = new Date().toISOString(); + existing.verifiedAtIteration = currentIteration; + if (tr.autoVerifiedFrom !== undefined) { + existing.autoVerifiedFrom = tr.autoVerifiedFrom; + } + if (state.dismissedIssues?.length) { + const before = state.dismissedIssues.length; + state.dismissedIssues = state.dismissedIssues.filter((d) => d.commentId !== commentId); + if (state.dismissedIssues.length < before) { + debug('transitionIssue: verified (update) removed from dismissed', { commentId }); + } + } + debug('transitionIssue: verified (update)', { + commentId, + iteration: currentIteration, + autoVerifiedFrom: tr.autoVerifiedFrom, + }); + } else { + state.verifiedComments.push({ + commentId, + verifiedAt: new Date().toISOString(), + verifiedAtIteration: currentIteration, + autoVerifiedFrom: tr.autoVerifiedFrom, + }); + + if (!(state.verifiedFixed ??= []).includes(commentId)) { + state.verifiedFixed.push(commentId); + } + if (state.dismissedIssues?.length) { + state.dismissedIssues = state.dismissedIssues.filter((d) => d.commentId !== commentId); + } + debug('transitionIssue: verified (new)', { + commentId, + iteration: currentIteration, + autoVerifiedFrom: tr.autoVerifiedFrom, + totalVerified: state.verifiedFixed.length, + }); + } + + if (state.commentStatuses?.[commentId]) { + state.commentStatuses[commentId] = { + ...state.commentStatuses[commentId], + status: 'resolved', + classification: 'fixed', + updatedAt: new Date().toISOString(), + updatedAtIteration: currentIteration, + }; + } + + clearApplyFailureState(state, commentId); + + if (!tr.skipSessionTracking) { + ctx.verifiedThisSession?.add(commentId); + } + return; + } + } +} diff --git a/tools/prr/state/state-verification.ts b/tools/prr/state/state-verification.ts index eb30d87..567595e 100644 --- a/tools/prr/state/state-verification.ts +++ b/tools/prr/state/state-verification.ts @@ -12,150 +12,69 @@ * iterations). Both arrays are maintained for backward compatibility with * existing state files. * - * WHY commentStatuses sync hooks: This module also keeps commentStatuses{} in - * sync. Without hooks, markVerified() would update verifiedFixed but leave - * commentStatuses showing "open" — a contradiction that causes the analysis - * pass to re-analyze already-fixed issues. See state-comment-status.ts for - * the full lifecycle. + * Writes go through {@link transitionIssue} in state-transitions.ts so + * commentStatuses, dismissed mutual exclusion, and apply-failure cleanup stay in sync. */ import type { StateContext } from './state-context.js'; -import { getState } from './state-context.js'; import type { VerifiedComment } from './types.js'; import { debug } from '../../../shared/logger.js'; +import { transitionIssue } from './state-transitions.js'; export type VerificationRecord = VerifiedComment; +/** Optional flags for {@link markVerified} (fourth argument). */ +export interface MarkVerifiedOptions { + skipSessionTracking?: boolean; + forceVerificationRefresh?: boolean; +} + /** Sentinel for `autoVerifiedFrom` when verification was restored from `prr-fix:` git history (not a duplicate). */ export const PRR_GIT_RECOVERY_VERIFIED_MARKER = '__prr_git_recovery__'; /** * Mark a comment as verified/fixed - * + * * Records the current iteration number and timestamp. Updates existing * verification or creates a new one. Also adds to legacy verifiedFixed array. - * + * * @param ctx - State context * @param commentId - ID of the comment to mark as verified * @param autoVerifiedFrom - Optional canonical comment ID for auto-verified **duplicates**, or * **`PRR_GIT_RECOVERY_VERIFIED_MARKER`** when verification was restored from **`prr-fix:`** git history (`recoverVerificationState`). */ -export function markVerified(ctx: StateContext, commentId: string, autoVerifiedFrom?: string): void { - const state = getState(ctx); - - if (!state.verifiedComments) { - state.verifiedComments = []; - } - - const currentIteration = state.iterations.length; - const existing = state.verifiedComments.find(v => v.commentId === commentId); - - if (existing) { - const hadDismissed = state.dismissedIssues?.some((d) => d.commentId === commentId) ?? false; - const sameIteration = existing.verifiedAtIteration === currentIteration; - const fromCompatible = - autoVerifiedFrom === undefined || autoVerifiedFrom === existing.autoVerifiedFrom; - if (sameIteration && fromCompatible && !hadDismissed) { - return; - } - existing.verifiedAt = new Date().toISOString(); - existing.verifiedAtIteration = currentIteration; - if (autoVerifiedFrom !== undefined) { - existing.autoVerifiedFrom = autoVerifiedFrom; - } - if (state.dismissedIssues?.length) { - const before = state.dismissedIssues.length; - state.dismissedIssues = state.dismissedIssues.filter((d) => d.commentId !== commentId); - if (state.dismissedIssues.length < before) { - debug('markVerified (update): removed from dismissed — mutual exclusivity', { commentId }); - } - } - debug('markVerified (update)', { commentId, iteration: currentIteration, autoVerifiedFrom }); - } else { - state.verifiedComments.push({ - commentId, - verifiedAt: new Date().toISOString(), - verifiedAtIteration: currentIteration, - autoVerifiedFrom, - }); - - if (!(state.verifiedFixed ??= []).includes(commentId)) { - state.verifiedFixed.push(commentId); - } - // Pill: Keep verifiedFixed and dismissedIssues mutually exclusive — when we verify, remove from dismissed. - if (state.dismissedIssues?.length) { - state.dismissedIssues = state.dismissedIssues.filter((d) => d.commentId !== commentId); - } - debug('markVerified (new)', { commentId, iteration: currentIteration, autoVerifiedFrom, totalVerified: state.verifiedFixed.length }); - } - - // Sync commentStatuses: if this comment had an "open" analysis status, - // flip it to resolved. No-op if entry doesn't exist (comment was never - // analyzed, e.g. recovered from git history — isVerified() handles it). - if (state.commentStatuses?.[commentId]) { - state.commentStatuses[commentId] = { - ...state.commentStatuses[commentId], - status: 'resolved', - classification: 'fixed', - updatedAt: new Date().toISOString(), - updatedAtIteration: currentIteration, - }; - } - - // Clear apply-failure state so a future re-attempt doesn't see stale error or count. - if (state.lastApplyErrorByCommentId?.[commentId] !== undefined) { - delete state.lastApplyErrorByCommentId[commentId]; - } - if (state.applyFailureCountByCommentId?.[commentId] !== undefined) { - delete state.applyFailureCountByCommentId[commentId]; - } +export function markVerified( + ctx: StateContext, + commentId: string, + autoVerifiedFrom?: string, + options?: MarkVerifiedOptions +): void { + transitionIssue(ctx, commentId, { + kind: 'verified', + autoVerifiedFrom, + skipSessionTracking: options?.skipSessionTracking, + forceVerificationRefresh: options?.forceVerificationRefresh, + }); } /** * Remove verification status from a comment - * + * * Used when a previously verified fix is detected as stale or incorrect. * Removes from both new verifiedComments array and legacy verifiedFixed array. - * + * * @param ctx - State context * @param commentId - ID of the comment to unmark */ export function unmarkVerified(ctx: StateContext, commentId: string): void { - const state = getState(ctx); - - if (!state.verifiedComments) { - state.verifiedComments = []; - } - - const index = state.verifiedComments.findIndex(v => v.commentId === commentId); - if (index !== -1) { - state.verifiedComments.splice(index, 1); - } - - const legacyIndex = (state.verifiedFixed ?? []).indexOf(commentId); - if (legacyIndex !== -1) { - (state.verifiedFixed ??= []).splice(legacyIndex, 1); - } - - // Delete commentStatuses entry so the comment gets re-analyzed - if (state.commentStatuses?.[commentId]) { - delete state.commentStatuses[commentId]; - } - - // WHY: fix-loop start filters out IDs in verifiedThisSession ("already fixed this session"). - // Final audit calls unmarkVerified when it says UNFIXED; if we leave the ID in the session set, - // the next iteration drops all re-queued issues → empty queue → "BUG DETECTED" repopulate - // (output.log audit babylon#1327 2026-03-21). - ctx.verifiedThisSession?.delete(commentId); - - debug('unmarkVerified', { commentId, remainingVerified: (state.verifiedFixed ?? []).length }); + transitionIssue(ctx, commentId, { kind: 'unverified' }); } /** * Check if a comment is marked as verified - * + * * Checks both new verifiedComments array and legacy verifiedFixed array * for backward compatibility. - * + * * @param ctx - State context * @param commentId - ID of the comment to check * @returns true if the comment is verified @@ -163,19 +82,19 @@ export function unmarkVerified(ctx: StateContext, commentId: string): void { export function isVerified(ctx: StateContext, commentId: string): boolean { const state = ctx.state; if (!state) return false; - - const inNew = state.verifiedComments?.some(v => v.commentId === commentId) ?? false; + + const inNew = state.verifiedComments?.some((v) => v.commentId === commentId) ?? false; if (inNew) return true; - + return (state.verifiedFixed ?? []).includes(commentId); } /** * Get the full verification record for a comment - * + * * Returns the verification record with timestamp and iteration number, * or undefined if not verified. - * + * * @param ctx - State context * @param commentId - ID of the comment * @returns Verification record or undefined @@ -183,17 +102,17 @@ export function isVerified(ctx: StateContext, commentId: string): boolean { export function getVerificationRecord(ctx: StateContext, commentId: string): VerificationRecord | undefined { const state = ctx.state; if (!state?.verifiedComments) return undefined; - - return state.verifiedComments.find(v => v.commentId === commentId); + + return state.verifiedComments.find((v) => v.commentId === commentId); } /** * Find verifications that are older than a threshold - * + * * Used to detect verifications that may no longer be valid due to code changes. * Returns comment IDs verified more than maxIterationsAgo iterations ago. * Also returns auto-verified duplicates when their canonical goes stale. - * + * * @param ctx - State context (optional) * @param maxIterationsAgo - Maximum age in iterations before considered stale * @returns Array of stale comment IDs (including linked duplicates) @@ -202,57 +121,55 @@ export function getStaleVerifications(ctx: StateContext | undefined, maxIteratio if (!ctx) return []; const state = ctx.state; if (!state || !state.verifiedComments) return []; - + const currentIteration = state.iterations.length; const staleIds = new Set(); - - // Find stale canonicals + for (const v of state.verifiedComments) { - if ((currentIteration - v.verifiedAtIteration) > maxIterationsAgo) { + if (currentIteration - v.verifiedAtIteration > maxIterationsAgo) { staleIds.add(v.commentId); } } - - // Also mark auto-verified duplicates as stale when their canonical is stale + for (const v of state.verifiedComments) { if (v.autoVerifiedFrom && staleIds.has(v.autoVerifiedFrom)) { staleIds.add(v.commentId); } } - + return [...staleIds]; } /** * Get all verified comment IDs - * + * * Returns a deduplicated list from both new and legacy storage. - * + * * @param ctx - State context * @returns Array of all verified comment IDs */ export function getVerifiedComments(ctx: StateContext): string[] { const state = ctx.state; if (!state) return []; - + const fromLegacy = state.verifiedFixed || []; - const fromNew = state.verifiedComments?.map(v => v.commentId) || []; - + const fromNew = state.verifiedComments?.map((v) => v.commentId) || []; + return [...new Set([...fromLegacy, ...fromNew])]; } /** * Clear all verification records - * + * * Used when code changes invalidate all previous verifications (e.g., after * pulling new commits). Clears both new and legacy storage. - * + * * @param ctx - State context */ export function clearAllVerifications(ctx: StateContext): void { const state = ctx.state; if (!state) return; - + const previousCount = (state.verifiedFixed ?? []).length; state.verifiedFixed = []; state.verifiedComments = []; diff --git a/tools/prr/state/types.ts b/tools/prr/state/types.ts index d0b6741..9cc4a3a 100644 --- a/tools/prr/state/types.ts +++ b/tools/prr/state/types.ts @@ -85,7 +85,20 @@ export interface DismissedIssue { reason: string; // Detailed explanation of why it doesn't need fixing dismissedAt: string; // ISO timestamp when dismissed dismissedAtIteration: number; // Which iteration it was dismissed in - category: 'already-fixed' | 'not-an-issue' | 'file-unchanged' | 'false-positive' | 'duplicate' | 'stale' | 'exhausted' | 'remaining' | 'chronic-failure' | 'missing-file' | 'path-unresolved'; + category: + | 'already-fixed' + | 'not-an-issue' + | 'file-unchanged' + | 'false-positive' + | 'duplicate' + | 'stale' + | 'exhausted' + | 'remaining' + | 'chronic-failure' + | 'missing-file' + | 'path-unresolved' + | 'path-fragment' + | 'out-of-scope'; filePath: string; // File the comment was about line: number | null; // Line number if specified commentBody: string; // Original review comment text @@ -280,6 +293,15 @@ export interface ResolverState { * WHY: If the base branch advances, cached file contents may be wrong for the new merge — clear partials. */ partialConflictSavedOriginBaseSha?: string; + /** + * Persisted session model skip (same PR/HEAD). WHY: In-memory skip was lost on restart, wasting + * rotation budget re-proving bad models (pill-output). Cleared on PR head change with verified state. + */ + sessionSkippedModelKeys?: string[]; + /** Failure/fix counts per `runner/model` key for session skip threshold — persisted with skip keys. */ + sessionModelStats?: Record; + /** Fix iteration when each key was session-skipped — for PRR_SESSION_MODEL_SKIP_RESET_AFTER_FIX_ITERATIONS. */ + sessionSkippedSinceFixIteration?: Record; } export function createInitialState(pr: string, branch: string, headSha: string): ResolverState { diff --git a/tools/prr/ui/reporter.ts b/tools/prr/ui/reporter.ts index 49333f9..aa5197b 100644 --- a/tools/prr/ui/reporter.ts +++ b/tools/prr/ui/reporter.ts @@ -320,9 +320,13 @@ export function printFinalSummary( const auditOverridesThisRun = stateContext.auditOverridesThisRun ?? []; if (overlapIds.length > 0) { + const showOverlap = 20; + const overlapSample = overlapIds.slice(0, showOverlap).join(', '); + const more = + overlapIds.length > showOverlap ? ` … (+${formatNumber(overlapIds.length - showOverlap)} more)` : ''; console.warn( chalk.yellow( - ` ⚠ verified ∩ dismissed still shows ${formatNumber(overlapIds.length)} ID(s) at summary time — unexpected. Delete .pr-resolver-state.json in the clone workdir (see README Troubleshooting), then re-run.`, + ` ⚠ verified ∩ dismissed still shows ${formatNumber(overlapIds.length)} ID(s) at summary time — unexpected. Overlap: ${overlapSample}${more}. Delete .pr-resolver-state.json in the clone workdir (see README Troubleshooting), then re-run.`, ), ); } @@ -337,7 +341,18 @@ export function printFinalSummary( if (exitDetails) { console.log(chalk.gray(` ${exitDetails}`)); } - + const successLikeExit = + effectiveReason === 'all_fixed' || + effectiveReason === 'all_resolved' || + effectiveReason === 'audit_passed'; + if (successLikeExit && remainingCount !== undefined && remainingCount > 0) { + console.log( + chalk.gray( + ` Note: Fix loop finished for all active threads. Remaining (${formatNumber(remainingCount)}) counts exhausted or “remaining” locations in state (deduped by file:line), not open fix-queue work.`, + ), + ); + } + // Fixed issues (only count issues actually fixed by the tool, not pre-existing fixes) // Use verifiedThisSession (the actual Set of IDs verified during iteration loops) // instead of delta counting, which undercounts re-verifications of issues already @@ -380,6 +395,18 @@ export function printFinalSummary( } } + // Pill-output #407: surface UNCERTAIN vs truncation-guard counts in the summary (not only debug). + const finalAuditUncertain = stateContext.finalAuditUncertainThisRun ?? []; + if (finalAuditUncertain.length > 0) { + const trunc = finalAuditUncertain.filter((u) => u.kind === 'truncation-guard').length; + const unc = finalAuditUncertain.filter((u) => u.kind === 'uncertain').length; + console.log( + chalk.gray( + `\n ℹ Final audit non-affirming passes: ${formatNumber(finalAuditUncertain.length)} (${formatNumber(unc)} UNCERTAIN, ${formatNumber(trunc)} truncation guard)`, + ), + ); + } + // Pill-output #18: keep final-audit re-queue count with other outcome lines (fixed / dismissed), not only above Exit. if (auditOverridesThisRun.length > 0) { console.log( @@ -387,6 +414,11 @@ export function printFinalSummary( `\n ◆ Final audit re-queued: ${formatNumber(auditOverridesThisRun.length)} issue(s) (adversarial pass said UNFIXED for previously verified — see After Action Report)`, ), ); + console.log( + chalk.gray( + ` (This count is only threads that were verified then challenged by final audit — not the same as “Remaining” unless those were the only open issues.)`, + ), + ); if ( remainingCount !== undefined && remainingCount > 0 && @@ -394,7 +426,7 @@ export function printFinalSummary( ) { console.log( chalk.gray( - ` (If Remaining below differs: re-queue is per thread; Remaining dedupes by file:line and can shrink after fixes.)`, + ` If Remaining below differs: re-queue is per thread id; Remaining dedupes by file:line and can include issues never verified this run.`, ), ); } @@ -966,7 +998,7 @@ export async function printAfterActionReport( } } - // Summary — Fixed, Dismissed, Remaining (by unique comment IDs so total never exceeds comment count). + // Summary — Fixed, Dismissed, Remaining (union of distinct comment IDs across buckets vs fetched rows). console.log(chalk.cyan('\n━━━ Summary ━━━')); const fixedIds = new Set( comments @@ -995,7 +1027,16 @@ export async function printAfterActionReport( ), ); if (totalAccounted !== comments.length) { - console.log(chalk.gray(` (Unique comment IDs in these buckets: ${formatNumber(totalAccounted)})`)); + console.log( + chalk.gray( + ` Distinct comment IDs in at least one bucket: ${formatNumber(totalAccounted)} (loaded: ${formatNumber(commentsFetched)})`, + ), + ); + console.log( + chalk.gray( + " → Buckets can be larger if dismissed/remaining/exhausted reference IDs not in this run's fetch; smaller if many loaded comments are only outdated / out of queue.", + ), + ); } console.log(chalk.green(` Fixed: ${formatNumber(fixedCount)}${fixedThisSessionCount > 0 ? ` (${formatNumber(fixedThisSessionCount)} this session)` : ''}`)); console.log(chalk.gray(` Dismissed: ${formatNumber(dismissedCount)}`)); diff --git a/tools/prr/workflow/analysis.ts b/tools/prr/workflow/analysis.ts index 05df1f6..61f3493 100644 --- a/tools/prr/workflow/analysis.ts +++ b/tools/prr/workflow/analysis.ts @@ -20,16 +20,37 @@ import * as Performance from '../state/state-performance.js'; import type { CLIOptions } from '../cli.js'; import { formatNumber } from '../ui/reporter.js'; import { dedupeNewCommentsByQueue } from './utils.js'; +import { + dismissDuplicateClusterFromComments, + resolveEffectiveDuplicateMapForComments, +} from './issue-analysis-dedup.js'; +import { + markVerifiedClusterForFixedIssue, + unmarkVerifiedClustersForFinalAuditFailures, +} from './duplicate-cluster-verify.js'; import { debug, debugStep, setTokenPhase, formatDuration as formatDur } from '../../../shared/logger.js'; import { shouldSkipFinalAuditLlmForPath } from '../../../shared/path-utils.js'; -import { assessSolvability, SNIPPET_PLACEHOLDER } from './helpers/solvability.js'; +import { assessSolvability, SNIPPET_PLACEHOLDER, resolveTrackedPath } from './helpers/solvability.js'; import { classifyFinalAuditUncertainExplanation } from './helpers/final-audit-uncertain.js'; import { pathTrackedAtGitHead } from './helpers/git-path-at-head.js'; +import { isTrackedGitSubmodulePath } from '../../../shared/git/git-submodule-path.js'; /** Logged when final audit skips the LLM for a comment (synthetic / fragment path). */ const FINAL_AUDIT_SKIP_LLM_EXPLANATION = 'Skipped adversarial LLM: no single on-disk file path (synthetic path, empty path, or path fragment).'; +/** + * Repo-relative path for file reads and `git` checks; falls back to review **`path`** when unresolved. + * **WHY:** GitHub’s path may be a basename, diff-prefixed, or an extension variant; **`resolveTrackedPath`** + * matches the clone. Logs / dedup keys may still use **`comment.path`** so operators see the same string as the PR UI. + */ +function commentFilePathForWorkdir(workdir: string | undefined, c: Pick): string { + const raw = c.path; + if (raw == null || raw === '') return ''; + if (!workdir) return raw; + return resolveTrackedPath(workdir, raw, c.body ?? '') ?? raw; +} + /** * Detect audit explanations that say no fix is needed (false positive). * @@ -122,8 +143,20 @@ export function analyzeAndReportIssues( ? ` (all ${formatNumber(verifiedInQueue)} already verified — will skip fixer)` : ` (${formatNumber(toFixCount)} to fix, ${formatNumber(verifiedInQueue)} already verified)` : ''; + const hasBlast = unresolvedIssues.some((i) => i.inBlastRadius !== undefined); + const blastSubtitle = hasBlast + ? (() => { + const out = unresolvedIssues.filter((i) => i.inBlastRadius === false).length; + const inn = unresolvedIssues.length - out; + return ` — ${formatNumber(inn)} in blast radius, ${formatNumber(out)} out-of-scope (deprioritized)`; + })() + : ''; console.log(''); - console.log(chalk.yellowBright(`┌─ QUEUE: ${formatNumber(unresolvedIssues.length)} issue(s) entering fix loop${queueSubtitle} ─┐`)); + console.log( + chalk.yellowBright( + `┌─ QUEUE: ${formatNumber(unresolvedIssues.length)} issue(s) entering fix loop${queueSubtitle}${blastSubtitle} ─┐`, + ), + ); if (toFixCount > 0) { // Group by file for readability (skip full box when all verified — output.log audit) @@ -178,7 +211,9 @@ export async function checkForNewComments( spinner: Ora, getCodeSnippet: (path: string, line: number | null, body: string) => Promise, stateContext: StateContext, - workdir: string + workdir: string, + /** LLM dedup map from last analysis — dismiss siblings when solvability drops a canonical/dupe. */ + duplicateMap?: Map, ): Promise<{ hasNewComments: boolean; updatedComments: ReviewComment[]; @@ -210,23 +245,28 @@ export async function checkForNewComments( // Add new comments to our list const updatedComments = [...existingComments]; const updatedUnresolvedIssues = [...unresolvedIssues]; + const lookupComments = [...existingComments, ...newComments]; + const effectiveDupForNewComments = resolveEffectiveDuplicateMapForComments( + stateContext, + duplicateMap, + lookupComments, + ); const solvableComments: ReviewComment[] = []; const resolvedPaths = new Map(); for (const comment of newComments) { const solvability = assessSolvability(workdir, comment, stateContext); if (!solvability.solvable) { - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - comment.id, + comment, + effectiveDupForNewComments, + lookupComments, solvability.reason ?? 'Not solvable', solvability.dismissCategory ?? 'not-an-issue', - comment.path, - comment.line, - comment.body, - solvability.remediationHint + solvability.remediationHint, ); - debug('New comment dismissed by solvability', { commentId: comment.id, path: comment.path, reason: solvability.reason }); + debug('New comment dismissed by solvability (cluster)', { commentId: comment.id, path: comment.path, reason: solvability.reason }); continue; } if (solvability.resolvedPath) { @@ -307,10 +347,16 @@ export async function runFinalAudit( options: CLIOptions, spinner: Ora, getCodeSnippet: (path: string, line: number | null, body: string) => Promise, - /** When set, use full file content instead of snippets so the audit has complete context. */ - getFullFile?: (path: string, line: number | null, body: string) => Promise, + /** When set, use full file (or budget excerpt) instead of windowed snippets. May return a string (legacy) or `{ snippet, fixSiteInWindow }`. */ + getFullFile?: ( + path: string, + line: number | null, + body: string + ) => Promise, /** Pill cycle 2 #4: When set, validate Rule 6 (file deleted) by checking git ls-tree before accepting FIXED verdict. */ - workdir?: string + workdir?: string, + /** LLM dedup clusters — mark/unmark siblings consistently when audit passes (same as fix verification / recovery). */ + duplicateMap?: Map ): Promise<{ failedAudit: Array<{ comment: ReviewComment; explanation: string }>; auditPassed: boolean; @@ -326,6 +372,7 @@ export async function runFinalAudit( debug('Starting final audit (verification cache not cleared - results are additive)'); stateContext.finalAuditUncertainThisRun = []; + const dupForFinalAudit = resolveEffectiveDuplicateMapForComments(stateContext, duplicateMap, comments); // Pill-output #11: runtime overlap check (load() also repairs; this surfaces bugs in-session) const verifiedSet = new Set(Verification.getVerifiedComments(stateContext)); @@ -348,6 +395,7 @@ export async function runFinalAudit( const llmIndices: number[] = []; const skipLlmIndices: number[] = []; for (let i = 0; i < comments.length; i++) { + // INTENTIONAL: raw `comment.path` for fragment / synthetic-path gate (matches solvability and GitHub anchor). if (shouldSkipFinalAuditLlmForPath(comments[i].path)) { skipLlmIndices.push(i); } else { @@ -364,12 +412,22 @@ export async function runFinalAudit( const llmComments = llmIndices.map((i) => comments[i]); const auditSnippetsLlm = getFullFile - ? await Promise.all(llmComments.map((c) => getFullFile(c.path, c.line, c.body))) - : await Promise.all(llmComments.map((c) => getCodeSnippet(c.path, c.line, c.body))); + ? await Promise.all( + llmComments.map(async (c) => { + const r = await getFullFile(commentFilePathForWorkdir(workdir, c), c.line, c.body); + return typeof r === 'string' ? { snippet: r, fixSiteInWindow: false } : r; + }), + ) + : await Promise.all( + llmComments.map(async (c) => ({ + snippet: await getCodeSnippet(commentFilePathForWorkdir(workdir, c), c.line, c.body), + fixSiteInWindow: false, + })), + ); const auditSnippets: string[] = new Array(comments.length); for (let j = 0; j < llmIndices.length; j++) { - auditSnippets[llmIndices[j]] = auditSnippetsLlm[j]!; + auditSnippets[llmIndices[j]] = auditSnippetsLlm[j]!.snippet; } const skipSnippetNote = '(no file context — final audit LLM skipped for non-file path)'; for (const i of skipLlmIndices) { @@ -385,11 +443,12 @@ export async function runFinalAudit( filePath: string; line: number | null; codeSnippet: string; + fixSiteInWindow?: boolean; }> = []; for (let j = 0; j < llmComments.length; j++) { const comment = llmComments[j]!; - const snippet = auditSnippetsLlm[j]!; + const snippet = auditSnippetsLlm[j]!.snippet; if ( workdir && snippet === SNIPPET_PLACEHOLDER && @@ -397,7 +456,20 @@ export async function runFinalAudit( comment.path !== '(PR comment)' && !shouldSkipFinalAuditLlmForPath(comment.path) ) { - const tracked = pathTrackedAtGitHead(workdir, comment.path); + const pathForGit = commentFilePathForWorkdir(workdir, comment); + if (isTrackedGitSubmodulePath(workdir, pathForGit)) { + syntheticAuditResults.set(comment.id, { + stillExists: false, + explanation: + 'FIXED (git submodule): Review path is a git submodule (gitlink) — no regular file text at this anchor; final audit skipped adversarial LLM.', + }); + debug('Final audit: skipped adversarial LLM — path is git submodule gitlink', { + commentId: comment.id, + path: pathForGit, + }); + continue; + } + const tracked = pathTrackedAtGitHead(workdir, pathForGit); if (tracked === false) { syntheticAuditResults.set(comment.id, { stillExists: false, @@ -406,7 +478,7 @@ export async function runFinalAudit( }); debug('Final audit: skipped adversarial LLM — path absent at HEAD and snippet is unreadable placeholder', { commentId: comment.id, - path: comment.path, + path: pathForGit, }); continue; } @@ -418,9 +490,10 @@ export async function runFinalAudit( issuesForLlm.push({ id: comment.id, comment: commentForAudit, - filePath: comment.path, + filePath: commentFilePathForWorkdir(workdir, comment) || comment.path || '', line: comment.line, codeSnippet: snippet, + fixSiteInWindow: auditSnippetsLlm[j]!.fixSiteInWindow, }); } @@ -455,11 +528,11 @@ export async function runFinalAudit( comment.path && comment.path !== '(PR comment)' && codeSnippetEarly === SNIPPET_PLACEHOLDER && - pathTrackedAtGitHead(workdir, comment.path) === false + pathTrackedAtGitHead(workdir, commentFilePathForWorkdir(workdir, comment)) === false ) { debug( 'Final audit tie-break: UNFIXED but path absent from HEAD + unreadable snippet — keeping verified (deleted file)', - { commentId: comment.id, path: comment.path }, + { commentId: comment.id, path: commentFilePathForWorkdir(workdir, comment) }, ); console.warn( chalk.yellow( @@ -538,7 +611,12 @@ export async function runFinalAudit( line: comment.line, excerpt: result.explanation.slice(0, 80), }); - Verification.markVerified(stateContext, comment.id); + markVerifiedClusterForFixedIssue( + stateContext, + comment.id, + dupForFinalAudit, + stateContext.verifiedThisSession, + ); } else { failedAudit.push({ comment, explanation: result.explanation }); } @@ -547,11 +625,12 @@ export async function runFinalAudit( // Pill cycle 2 #4: Validate Rule 6 (file deleted / outdated thread) — confirm path absent at HEAD before accepting FIXED const isRule6Style = /(?:file deleted|file no longer exists|thread outdated)/i.test(result.explanation); if (isRule6Style && workdir && comment.path && comment.path !== '(PR comment)') { - const tracked = pathTrackedAtGitHead(workdir, comment.path); + const pathForGit = commentFilePathForWorkdir(workdir, comment); + const tracked = pathTrackedAtGitHead(workdir, pathForGit); if (tracked === true) { debug('Rule 6 validation failed: path still tracked at HEAD', { commentId: comment.id, - path: comment.path, + path: pathForGit, explanation: result.explanation, }); failedAudit.push({ @@ -563,12 +642,12 @@ export async function runFinalAudit( if (tracked === null) { debug('Rule 6 validation inconclusive: git ls-tree check failed — accepting FIXED', { commentId: comment.id, - path: comment.path, + path: pathForGit, }); } else { debug('Rule 6 validation passed: path not at HEAD', { commentId: comment.id, - path: comment.path, + path: pathForGit, }); } } @@ -585,7 +664,12 @@ export async function runFinalAudit( explanation: result.explanation?.slice(0, 200), }); } - Verification.markVerified(stateContext, comment.id); + markVerifiedClusterForFixedIssue( + stateContext, + comment.id, + dupForFinalAudit, + stateContext.verifiedThisSession, + ); } } else { // No result from audit - treat as needing review (fail-safe) @@ -593,10 +677,13 @@ export async function runFinalAudit( } } - // Single unmark pass for all failed-audit comments (WHY: main-loop-setup used to unmark too → duplicate logs). - for (const { comment } of failedAudit) { - Verification.unmarkVerified(stateContext, comment.id); - } + // Single unmark pass: whole dedup cluster per failure (WHY: markVerifiedCluster on pass marks siblings; + // per-id unmark left dupes verified — skip fixer / inconsistent queue vs README "safe over sorry"). + unmarkVerifiedClustersForFinalAuditFailures( + stateContext, + failedAudit.map((f) => f.comment.id), + dupForFinalAudit, + ); if (filteredNoAction > 0) { debug('Audit filtered no-action-needed', { count: filteredNoAction }); @@ -634,9 +721,11 @@ export async function runFinalAudit( const uncertain = stateContext.finalAuditUncertainThisRun ?? []; if (uncertain.length > 0) { + const trunc = uncertain.filter((u) => u.kind === 'truncation-guard').length; + const unc = uncertain.filter((u) => u.kind === 'uncertain').length; console.log( chalk.yellow( - ` ℹ Final audit: ${formatNumber(uncertain.length)} issue(s) passed via UNCERTAIN or truncation guard (see explanations in prompts.log). Set PRR_STRICT_FINAL_AUDIT_UNCERTAIN=1 to exit 2 on these.`, + ` ℹ Final audit: ${formatNumber(uncertain.length)} issue(s) passed via UNCERTAIN or truncation guard (${formatNumber(unc)} UNCERTAIN, ${formatNumber(trunc)} truncation guard; see prompts.log). Set PRR_STRICT_FINAL_AUDIT_UNCERTAIN=1 to exit 2 on these.`, ), ); } diff --git a/tools/prr/workflow/bailout.ts b/tools/prr/workflow/bailout.ts index a90134e..e01042d 100644 --- a/tools/prr/workflow/bailout.ts +++ b/tools/prr/workflow/bailout.ts @@ -5,7 +5,7 @@ import chalk from 'chalk'; import type { CLIOptions } from '../cli.js'; import type { ReviewComment } from '../github/types.js'; -import type { UnresolvedIssue } from '../analyzer/types.js'; +import { getIssuePrimaryPath, type UnresolvedIssue } from '../analyzer/types.js'; import type { Runner } from '../../../shared/runners/types.js'; import type { LLMClient } from '../llm/client.js'; import type { StateContext } from '../state/state-context.js'; @@ -54,7 +54,7 @@ export async function executeBailOut( const firstLine = issue.comment.body.split('\n')[0]; return { commentId: issue.comment.id, - filePath: issue.comment.path, + filePath: getIssuePrimaryPath(issue), line: issue.comment.line, summary: firstLine.length > 100 ? firstLine.substring(0, 100) + '...' : firstLine, }; @@ -109,7 +109,7 @@ export async function executeBailOut( if (unresolvedIssues.length > 0) { console.log(chalk.cyan('\n Remaining Issues (need human attention):')); for (const issue of unresolvedIssues.slice(0, 5)) { - console.log(chalk.yellow(` • ${issue.comment.path}:${issue.comment.line || '?'}`)); + console.log(chalk.yellow(` • ${getIssuePrimaryPath(issue)}:${issue.comment.line || '?'}`)); const cleanPreview = Reporter.sanitizeCommentForDisplay(issue.comment.body).split('\n')[0]; const truncated = cleanPreview.length > 80 ? `${cleanPreview.substring(0, 80)}...` : cleanPreview; console.log(chalk.gray(` "${truncated}"`)); @@ -134,7 +134,7 @@ export async function executeBailOut( dismissedAt: new Date().toISOString(), dismissedAtIteration: 0, category: 'remaining' as const, - filePath: issue.comment.path, + filePath: getIssuePrimaryPath(issue), line: issue.comment.line, commentBody: issue.comment.body, })); diff --git a/tools/prr/workflow/base-merge.ts b/tools/prr/workflow/base-merge.ts index dbf7e96..35e6e45 100644 --- a/tools/prr/workflow/base-merge.ts +++ b/tools/prr/workflow/base-merge.ts @@ -203,7 +203,6 @@ export async function checkAndMergeBaseBranch( } else { // All conflicts resolved - stage files and complete the merge const codeFiles = conflictedFiles.filter((f: string) => !isLockFile(f)); - const lockFiles = conflictedFiles.filter((f: string) => isLockFile(f)); // Verify no conflict markers remain (LLM can sometimes leave <<<<<<< in output) const workdir = (await git.revparse(['--show-toplevel'])).trim(); @@ -225,12 +224,9 @@ export async function checkAndMergeBaseBranch( }; } - // Lock files should be regenerated — accept theirs to unblock the merge - if (lockFiles.length > 0) { - await git.checkout(['--theirs', '--', ...lockFiles]); - await git.add(lockFiles); - console.log(chalk.gray(` ℹ ${formatNumber(lockFiles.length)} lock file(s) accepted from ${prInfo.baseBranch} — consider regenerating`)); - } + // Lock files: already deleted/regenerated and staged inside resolveConflicts (handleLockFileConflicts). + // Do not checkout --theirs here — it errors with "pathspec did not match" when Git no longer + // has an unmerged entry, and would replace a freshly regenerated lock with the base version. await markConflictsResolved(git, codeFiles); const commitResult = await completeMerge(git, `Merge branch '${prInfo.baseBranch}' into ${prInfo.branch}`); diff --git a/tools/prr/workflow/catalog-model-autoheal.ts b/tools/prr/workflow/catalog-model-autoheal.ts index 37db065..6ba68c9 100644 --- a/tools/prr/workflow/catalog-model-autoheal.ts +++ b/tools/prr/workflow/catalog-model-autoheal.ts @@ -4,12 +4,15 @@ * * **When:** `main-loop-setup` immediately after comments are fetched and `currentCommentIds` are set, * **before** per-path file hashes used for analysis cache — WHY: healed content must be what the - * analyzer and cache keys see. + * analyzer and cache keys see. **Dedup cluster:** when **`state.dedupCache`** matches the current + * comment-id set (`dedup-v2`), **`markVerified`** applies to the full LLM dedup cluster (canonical + * keeps **`catalog-autoheal`** / **`catalog-autoheal-noop`**; dupes reference canonical id). * * **Commit gate:** Same as fixer path — `verifiedThisSession` must be non-empty. We `markVerified` * each healed comment so `commitAndPushChanges` can run on the "no unresolved issues" branch. */ +import { execFileSync } from 'child_process'; import { readFileSync, writeFileSync } from 'fs'; import { join } from 'path'; import chalk from 'chalk'; @@ -20,9 +23,39 @@ import { debug } from '../../../shared/logger.js'; import { formatNumber } from '../ui/reporter.js'; import { resolveTrackedPath } from './helpers/solvability.js'; import { getOutdatedModelCatalogDismissal } from './helpers/outdated-model-advice.js'; +import { getDuplicateClusterCommentIds } from './utils.js'; const ENV_DISABLE_AUTOHEAL = 'PRR_DISABLE_MODEL_CATALOG_AUTOHEAL'; +/** + * Mark canonical + dedup siblings verified after catalog heal. + * Canonical row keeps **`catalog-autoheal`** / **`catalog-autoheal-noop`**; dupes use **`autoVerifiedFrom = canonicalId`**. + * WHY: Auto-heal runs before analysis — use persisted **`dedupCache.duplicateMap`** when comment IDs match. + */ +function markCatalogHealVerifiedCluster( + stateContext: StateContext, + currentCommentId: string, + duplicateMap: Map | undefined, + vs: Set, + anchorMarker: 'catalog-autoheal' | 'catalog-autoheal-noop', +): boolean { + const clusterIds = getDuplicateClusterCommentIds(currentCommentId, duplicateMap); + const canonicalId = clusterIds[0]!; + let any = false; + for (const cid of clusterIds) { + if (Verification.isVerified(stateContext, cid)) continue; + const marker = cid === canonicalId ? anchorMarker : canonicalId; + try { + Verification.markVerified(stateContext, cid, marker); + vs.add(cid); + any = true; + } catch (e) { + debug('[Auto-heal] markVerified failed', { commentId: cid.slice(0, 7), err: String(e) }); + } + } + return any; +} + /** * Lines above/below the GitHub review anchor to search for quoted model literals. * WHY 20: Large enough to cover multi-line object literals near the comment; small enough to avoid @@ -91,13 +124,55 @@ export function applyCatalogModelAutoHeals( debug('[Auto-heal] Disabled via PRR_DISABLE_MODEL_CATALOG_AUTOHEAL=1'); return { modifiedPaths: [], verificationTouched: false }; } - + + try { + const porcelain = execFileSync('git', ['-c', 'safe.directory=*', 'status', '--porcelain'], { + cwd: workdir, + encoding: 'utf8', + maxBuffer: 512 * 1024, + }); + if (porcelain.trim().length > 0) { + console.warn( + chalk.yellow( + ' Catalog auto-heal skipped: workdir has uncommitted changes — refusing to edit files on a dirty tree', + ), + ); + debug('[Auto-heal] Skipped — dirty worktree', { + workdir, + porcelainLines: porcelain.trim().split('\n').length, + }); + return { modifiedPaths: [], verificationTouched: false }; + } + } catch (e) { + console.warn( + chalk.yellow( + ` Catalog auto-heal skipped: could not read git status in workdir — ${e instanceof Error ? e.message : String(e)}`, + ), + ); + return { modifiedPaths: [], verificationTouched: false }; + } + const modified: string[] = []; if (!stateContext.verifiedThisSession) { stateContext.verifiedThisSession = new Set(); } const vs = stateContext.verifiedThisSession; + const sortedCommentKey = comments.map((c) => c.id).sort().join(','); + const persistedDedup = stateContext.state?.dedupCache; + let duplicateMapForHeal: Map | undefined; + if ( + persistedDedup?.commentIds === sortedCommentKey && + persistedDedup.schema === 'dedup-v2' && + persistedDedup.duplicateMap && + typeof persistedDedup.duplicateMap === 'object' + ) { + duplicateMapForHeal = new Map(Object.entries(persistedDedup.duplicateMap)); + debug('[Auto-heal] Persisted dedup map available for cluster verification', { + groupCount: duplicateMapForHeal.size, + }); + } + let checkedCount = 0; let matchedCount = 0; let skippedNoPath = 0; @@ -113,16 +188,25 @@ export function applyCatalogModelAutoHeals( checkedCount++; const dismissal = getOutdatedModelCatalogDismissal(comment.body ?? ''); if (!dismissal) { - debug('[Auto-heal] Comment does not match outdated model advice pattern', { - commentId: comment.id.slice(0, 7), - path: comment.path, - hasBody: !!comment.body, - bodyLength: comment.body?.length ?? 0, - }); + // WHY no per-comment debug: almost every comment misses catalog auto-heal; verbose runs + // flooded output.log (audit Cycle 78). Use Summary below + PRR_DEBUG for deep dives. continue; } matchedCount++; + const clusterEarly = getDuplicateClusterCommentIds(comment.id, duplicateMapForHeal); + const canonicalEarly = clusterEarly[0]!; + if ( + comment.id !== canonicalEarly && + clusterEarly.some((id) => Verification.isVerified(stateContext, id)) + ) { + debug('[Auto-heal] Skipping duplicate row — cluster already verified', { + commentId: comment.id.slice(0, 7), + canonicalId: canonicalEarly.slice(0, 7), + }); + continue; + } + debug('[Auto-heal] Found outdated model advice comment', { commentId: comment.id.slice(0, 7), path: comment.path, @@ -248,23 +332,24 @@ export function applyCatalogModelAutoHeals( const goodQuoted = countQuotedModelIdLiterals(allLines, good); if (wrongQuoted === 0 && goodQuoted > 0) { verifiedNoOp++; - vs.add(comment.id); - verificationTouched = true; - try { - Verification.markVerified(stateContext, comment.id, 'catalog-autoheal-noop'); - debug('[Auto-heal] No file change needed — file already uses catalog model id in literals', { - commentId: comment.id.slice(0, 7), - resolvedPath: rel, - catalogGoodId: good, - wronglySuggestedId: wrongly, - goodQuotedLiterals: goodQuoted, - }); - } catch (e) { - debug('[Auto-heal] markVerified failed (catalog-autoheal-noop)', { - commentId: comment.id.slice(0, 7), - err: String(e), - }); + if ( + markCatalogHealVerifiedCluster( + stateContext, + comment.id, + duplicateMapForHeal, + vs, + 'catalog-autoheal-noop', + ) + ) { + verificationTouched = true; } + debug('[Auto-heal] No file change needed — file already uses catalog model id in literals', { + commentId: comment.id.slice(0, 7), + resolvedPath: rel, + catalogGoodId: good, + wronglySuggestedId: wrongly, + goodQuotedLiterals: goodQuoted, + }); console.log( chalk.cyan( ` Catalog auto-heal: no edit needed — ${rel} already has \`${good}\` in string literal(s); marked review ${comment.id.slice(0, 7)}… verified (outdated model advice)`, @@ -302,20 +387,12 @@ export function applyCatalogModelAutoHeals( : [...allLines.slice(0, start), ...newWindow, ...allLines.slice(end)]; writeFileSync(abs, merged.join('\n'), 'utf8'); modified.push(rel); - vs.add(comment.id); - verificationTouched = true; - - try { - Verification.markVerified(stateContext, comment.id, 'catalog-autoheal'); - debug('[Auto-heal] Marked comment as verified', { commentId: comment.id.slice(0, 7) }); - } catch (e) { - // WHY swallow: Disk is already healed; missing state should not abort the run. Commit message - // may list fewer issues than healed files until state loads on a later run. - debug('[Auto-heal] markVerified failed (state not loaded?)', { - commentId: comment.id.slice(0, 7), - err: String(e) - }); + if ( + markCatalogHealVerifiedCluster(stateContext, comment.id, duplicateMapForHeal, vs, 'catalog-autoheal') + ) { + verificationTouched = true; } + debug('[Auto-heal] Marked cluster as verified (disk heal)', { commentId: comment.id.slice(0, 7) }); console.log( chalk.cyan( diff --git a/tools/prr/workflow/dismissal-comments.ts b/tools/prr/workflow/dismissal-comments.ts index 4beba33..8645acf 100644 --- a/tools/prr/workflow/dismissal-comments.ts +++ b/tools/prr/workflow/dismissal-comments.ts @@ -109,6 +109,11 @@ const DISMISSAL_COMMENT_PHRASES = [ /false\s+positive/i, /self-?explanatory/i, /intentional\s*[—\-]/i, + // LLM often returns EXISTING when these appear in // comments but phrases above miss (audit Cycle 78). + /\bintentional\b/i, + /\bdownstream\b/i, + /\berror\s+boundary\b/i, + /\bby\s+design\b/i, ]; /** diff --git a/tools/prr/workflow/duplicate-cluster-verify.ts b/tools/prr/workflow/duplicate-cluster-verify.ts new file mode 100644 index 0000000..b8fa382 --- /dev/null +++ b/tools/prr/workflow/duplicate-cluster-verify.ts @@ -0,0 +1,134 @@ +/** + * LLM dedup cluster helpers for verified state (mark, unmark on stale re-check). + * WHY: `duplicateMap` keys are canonical ids; queued rows may be a dupe — touching only one id + * leaves siblings wrong for queue accounting / “skip fixer” / dismissed state. + */ +import type { StateContext } from '../state/state-context.js'; +import * as Verification from '../state/state-verification.js'; +import { debug } from '../../../shared/logger.js'; +import { getDuplicateClusterCommentIds } from './utils.js'; + +/** + * After {@link recoverVerificationState} marks only comment ids found in `prr-fix:` commits, expand to the + * full LLM dedup cluster when **`state.dedupCache`** matches the current PR comment set (`dedup-v2` + same id key). + * WHY: A fix commit often references one thread id; duplicate threads would stay unverified and re-enter analysis. + * + * @returns **`staleSkipIds`** — use like former `recoveredFromGitCommentIds` for stale/unmark guards (full cluster). + * **`addedVerified`** — true if any new `markVerified` ran (caller may persist state). + */ +export function expandGitRecoveredVerificationFromDedupCache( + stateContext: StateContext, + recoveredFromGit: readonly string[], + allCommentIdsKey: string, +): { staleSkipIds: string[]; addedVerified: boolean } { + const staleSkipIds = new Set(recoveredFromGit); + let addedVerified = false; + + const persisted = stateContext.state?.dedupCache; + if ( + !persisted || + persisted.commentIds !== allCommentIdsKey || + persisted.schema !== 'dedup-v2' || + !persisted.duplicateMap || + typeof persisted.duplicateMap !== 'object' + ) { + return { staleSkipIds: [...staleSkipIds], addedVerified: false }; + } + + const duplicateMap = new Map(Object.entries(persisted.duplicateMap)); + const gitSet = new Set(recoveredFromGit); + const processedCluster = new Set(); + + for (const r of recoveredFromGit) { + const cluster = getDuplicateClusterCommentIds(r, duplicateMap); + for (const cid of cluster) { + staleSkipIds.add(cid); + } + const sig = [...cluster].sort().join('\0'); + if (processedCluster.has(sig)) continue; + processedCluster.add(sig); + + const canonical = cluster[0]!; + const gitAnchor = gitSet.has(canonical) + ? canonical + : cluster.find((id) => gitSet.has(id)) ?? canonical; + + for (const cid of cluster) { + if (Verification.isVerified(stateContext, cid)) continue; + if (cid === gitAnchor) { + Verification.markVerified(stateContext, cid, Verification.PRR_GIT_RECOVERY_VERIFIED_MARKER, { + skipSessionTracking: true, + }); + } else { + Verification.markVerified(stateContext, cid, gitAnchor, { skipSessionTracking: true }); + } + addedVerified = true; + } + } + + return { staleSkipIds: [...staleSkipIds], addedVerified }; +} + +/** + * @returns Count of cluster members verified in addition to the anchor (for "N duplicate(s) auto-resolved"). + */ +export function markVerifiedClusterForFixedIssue( + stateContext: StateContext, + anchorId: string, + duplicateMap: Map | undefined, + verifiedThisSession?: Set | undefined, +): number { + const clusterIds = getDuplicateClusterCommentIds(anchorId, duplicateMap); + let autoExtra = 0; + for (const cid of clusterIds) { + if (Verification.isVerified(stateContext, cid)) continue; + Verification.markVerified(stateContext, cid, cid === anchorId ? undefined : anchorId); + verifiedThisSession?.add(cid); + if (cid !== anchorId) autoExtra++; + } + return autoExtra; +} + +/** + * When analysis re-check says the issue still exists, unmark every verified id in the dedup cluster. + * WHY: Batch/sequential paths only unmarked the analyzed row — dupes stayed verified → "already verified — skip fixer". + * Skips ids in **`recoveredSet`** (git recovery this run) per id. + */ +export function unmarkVerifiedClusterForStaleRecheck( + stateContext: StateContext, + anchorId: string, + duplicateMap: Map | undefined, + recoveredSet?: Set, +): void { + for (const cid of getDuplicateClusterCommentIds(anchorId, duplicateMap)) { + if (!Verification.isVerified(stateContext, cid)) continue; + if (recoveredSet?.has(cid)) { + debug('Skipping unmark (recovered from git this run)', { commentId: cid }); + continue; + } + Verification.unmarkVerified(stateContext, cid); + debug('Unmarked verified (stale re-check said still exists)', { commentId: cid }); + } +} + +/** + * After final audit reports UNFIXED (or missing result), unmark every id in each failed comment’s dedup cluster. + * **WHY:** {@link markVerifiedClusterForFixedIssue} marks the full cluster when audit passes; per-id + * **`unmarkVerified`** left siblings verified → "already verified — skip fixer" while another thread + * re-entered the queue (same logical issue). + */ +export function unmarkVerifiedClustersForFinalAuditFailures( + stateContext: StateContext, + failedCommentIds: readonly string[], + duplicateMap: Map | undefined, +): void { + const seen = new Set(); + for (const id of failedCommentIds) { + for (const cid of getDuplicateClusterCommentIds(id, duplicateMap)) { + if (seen.has(cid)) continue; + seen.add(cid); + Verification.unmarkVerified(stateContext, cid); + debug('Unmarked verified (final audit failure — cluster)', { commentId: cid }); + } + } +} diff --git a/tools/prr/workflow/execute-fix-iteration.ts b/tools/prr/workflow/execute-fix-iteration.ts index cd44146..6e26d7d 100644 --- a/tools/prr/workflow/execute-fix-iteration.ts +++ b/tools/prr/workflow/execute-fix-iteration.ts @@ -17,7 +17,6 @@ import type { StateContext } from '../state/state-context.js'; import { setPhase, addTokenUsage, getState } from '../state/state-context.js'; import * as State from '../state/state-core.js'; import * as Verification from '../state/state-verification.js'; -import * as Dismissed from '../state/state-dismissed.js'; import * as Iterations from '../state/state-iterations.js'; import * as Lessons from '../state/state-lessons.js'; import * as Performance from '../state/state-performance.js'; @@ -30,11 +29,16 @@ import { debug, debugStep, startTimer, endTimer, formatDuration, formatNumber } import { hasChanges } from '../../../shared/git/git-clone-index.js'; import * as ResolverProc from '../resolver-proc.js'; import * as LessonsAPI from '../state/lessons-index.js'; +import { + dismissDuplicateClusterFromComments, + getClusterIdsAccountedOnState, + resolveEffectiveDuplicateMapForComments, +} from './issue-analysis-dedup.js'; import { parseResultCode } from './utils.js'; import { stripPrrFromDiffStat } from './bot-prediction-llm.js'; import { tryRestoreFromBaseIfRequested } from './restore-from-base.js'; import { getMentionedTestFilePaths, getMigrationJournalPath, getConsolidateDuplicateTargetPath, getDocumentationPathFromComment, getImplPathForTestFileIssue, getPathsToDeleteFromComment, getReferencedFullPathFromComment, getRenameTargetPath, getSiblingFilePathsFromComment, getTestPathForSourceFileIssue, issueRequestsTests, reviewSuggestsFixInTest, reviewTargetsMentionedTestFile } from '../analyzer/prompt-builder.js'; -import { filterAllowedPathsForFix } from '../../../shared/path-utils.js'; +import { filterAllowedPathsForFix, normalizeRepoPath, stripGitDiffPathPrefix } from '../../../shared/path-utils.js'; import { HALLUCINATION_DISMISS_THRESHOLD, NO_PROGRESS_DISMISS_THRESHOLD, getEffectiveMaxConcurrentLLM } from '../../../shared/constants.js'; import { runWithConcurrency } from '../../../shared/run-with-concurrency.js'; import { existsSync } from 'fs'; @@ -47,13 +51,26 @@ import { assessSolvability } from './helpers/solvability.js'; // Re-running it is guaranteed to fail again — skip straight to rotation. let lastPromptKey: string | null = null; +/** + * Restrict prompt injection to blast-radius paths when the set is present. + * **WHY:** Saves context; fixer may still edit `allowedPathsForBatch`. If intersection is empty, fall back to full batch. + */ +function allowedPathsForInjectionSubset(batch: string[], blast: Set | undefined): string[] { + if (!blast || blast.size === 0) return batch; + const filtered = batch.filter((p) => { + const k = stripGitDiffPathPrefix(normalizeRepoPath(p)); + return blast.has(k) || blast.has(p); + }); + return filtered.length > 0 ? filtered : batch; +} + /** Expand allowed paths for a set of issues (mirrors prompt-builder so runner accepts same files). */ function getAllowedPathsForIssues( issues: UnresolvedIssue[], pathExists: (p: string) => boolean ): string[] { return filterAllowedPathsForFix(Array.from(new Set(issues.flatMap((i) => { - const primaryPath = i.resolvedPath ?? i.comment.path; + const primaryPath = getIssuePrimaryPath(i); let base = i.allowedPaths?.length ? [...i.allowedPaths] : [primaryPath]; if (base.length === 0) base = [primaryPath]; const journal = getMigrationJournalPath(i); @@ -78,7 +95,7 @@ function getAllowedPathsForIssues( const testPath = getTestPathForSourceFileIssue(i, { pathExists, forceTestPath }); if (testPath && !base.includes(testPath)) base.push(testPath); if (issueRequestsTests(i) || forceTestPath) { - const srcPath = i.resolvedPath ?? i.comment.path ?? ''; + const srcPath = getIssuePrimaryPath(i) || ''; if (/\.(?:ts|tsx|js|jsx)$/.test(srcPath)) { const testBase = srcPath.replace(/^.*\//, '').replace(/\.(ts|tsx|js|jsx)$/, '.test.$1'); const testsRootPath = `__tests__/${testBase}`; @@ -108,7 +125,7 @@ function addDisallowedFilesLessonsAndState( ): void { const allowedStr = allowedPathsForBatch.length > 0 ? allowedPathsForBatch.slice(0, 5).join(', ') + (allowedPathsForBatch.length > 5 ? ` (+${allowedPathsForBatch.length - 5} more)` : '') - : [...new Set(issuesForPrompt.map((i) => i.resolvedPath ?? i.comment.path))].slice(0, 5).join(', '); + : [...new Set(issuesForPrompt.map((i) => getIssuePrimaryPath(i)))].slice(0, 5).join(', '); LessonsAPI.Add.addGlobalLesson( lessonsContext, `Fixer attempted disallowed file(s): ${skippedDisallowedFiles.join(', ')}. Only edit the file(s) listed in TARGET FILE(S): ${allowedStr}.` @@ -119,7 +136,7 @@ function addDisallowedFilesLessonsAndState( // Only increment wrong-file count for issues whose target file was in skippedDisallowedFiles. // WHY: Otherwise every issue in the batch gets blamed when one issue's file was disallowed (e.g. empty allowlist). for (const issue of issuesForPrompt) { - const primaryPath = issue.resolvedPath ?? issue.comment.path; + const primaryPath = getIssuePrimaryPath(issue); const allowedForIssue = issue.allowedPaths?.length ? issue.allowedPaths : [primaryPath]; const wasThisIssueTargetDisallowed = skippedDisallowedFiles.some( (p) => p === primaryPath || allowedForIssue.includes(p) @@ -149,7 +166,7 @@ function addDisallowedFilesLessonsAndState( ].filter((p, idx, arr) => Boolean(p) && arr.indexOf(p) === idx); if (inferredTestPaths.some((p) => allowedPathsForBatch.includes(p))) continue; const attemptedTestPath = skippedDisallowedFiles.find( - (p) => testFilePattern.test(p) && isPlausibleTestPathForIssue(p, issue.comment.path) + (p) => testFilePattern.test(p) && isPlausibleTestPathForIssue(p, getIssuePrimaryPath(issue)) ); if (!attemptedTestPath) continue; if (!state.wrongFileAllowedPathsByCommentId) state.wrongFileAllowedPathsByCommentId = {}; @@ -201,9 +218,19 @@ export async function executeFixIteration( progressThisCycle: number, getCurrentModel: () => string | undefined, parseNoChangesExplanation: (output: string) => string | null, - trySingleIssueFix: (issues: UnresolvedIssue[], git: SimpleGit, verified?: Set) => Promise, + trySingleIssueFix: ( + issues: UnresolvedIssue[], + git: SimpleGit, + verified?: Set, + comments?: ReviewComment[], + ) => Promise, tryRotation: (failureErrorType?: string) => boolean, - tryDirectLLMFix: (issues: UnresolvedIssue[], git: SimpleGit, verified?: Set) => Promise, + tryDirectLLMFix: ( + issues: UnresolvedIssue[], + git: SimpleGit, + verified?: Set, + comments?: ReviewComment[], + ) => Promise, executeBailOut: (issues: UnresolvedIssue[], comments: ReviewComment[]) => Promise, /** Current fix iteration (1-based). When 1, use conservative prompt cap to avoid timeout (audit). */ fixIteration: number, @@ -228,6 +255,7 @@ export async function executeFixIteration( skippedDuplicatePrompt?: boolean; }> { const spinner = ora(); + const dupForCluster = resolveEffectiveDuplicateMapForComments(stateContext, duplicateMap, comments); // H3 (output.log audit): Dismiss issues whose file has accumulated too many S/R or hallucinated-stub failures. let workingUnresolved = unresolvedIssues; @@ -236,17 +264,19 @@ export async function executeFixIteration( const failureCounts = runnerWithCounts.getFailureCounts(); const dismissedIds = new Set(); for (const issue of workingUnresolved) { - if ((failureCounts.get(issue.comment.path) ?? 0) >= HALLUCINATION_DISMISS_THRESHOLD) { - Dismissed.dismissIssue( + const primaryForCounts = getIssuePrimaryPath(issue); + if ((failureCounts.get(primaryForCounts) ?? 0) >= HALLUCINATION_DISMISS_THRESHOLD) { + dismissDuplicateClusterFromComments( stateContext, - issue.comment.id, + issue.comment, + dupForCluster, + comments, 'Repeated failed fix attempts (output did not match file); manual review recommended.', 'remaining', - issue.comment.path, - issue.comment.line, - issue.comment.body ); - dismissedIds.add(issue.comment.id); + for (const cid of getClusterIdsAccountedOnState(stateContext, issue.comment.id, dupForCluster)) { + dismissedIds.add(cid); + } } } if (dismissedIds.size > 0) { @@ -263,18 +293,18 @@ export async function executeFixIteration( for (const issue of workingUnresolved) { const solvability = assessSolvability(workdir, issue.comment, stateContext); if (solvability.solvable) continue; - const primaryPath = getIssuePrimaryPath(issue); - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - issue.comment.id, + issue.comment, + dupForCluster, + comments, solvability.reason ?? 'Not solvable', solvability.dismissCategory ?? 'not-an-issue', - primaryPath, - issue.comment.line, - issue.comment.body, - solvability.remediationHint + solvability.remediationHint, ); - dismissedIds.add(issue.comment.id); + for (const cid of getClusterIdsAccountedOnState(stateContext, issue.comment.id, dupForCluster)) { + dismissedIds.add(cid); + } } if (dismissedIds.size > 0) { workingUnresolved = workingUnresolved.filter((i) => !dismissedIds.has(i.comment.id)); @@ -307,7 +337,7 @@ export async function executeFixIteration( ? workingUnresolved.map((issue) => { const extra = allowedPathsByComment[issue.comment.id]; if (!extra?.length) return issue; - const base = issue.allowedPaths?.length ? issue.allowedPaths : [issue.comment.path]; + const base = issue.allowedPaths?.length ? issue.allowedPaths : [getIssuePrimaryPath(issue)]; const merged = [...new Set([...base, ...extra])]; return { ...issue, allowedPaths: merged }; }) @@ -444,6 +474,10 @@ export async function executeFixIteration( // Pass OpenAI key explicitly so Codex gets it even when config came from env and runner spawns with a copy of process.env const keyForRunner = openaiApiKey ?? process.env.OPENAI_API_KEY; const allowedPathsForBatch = getAllowedPathsForIssues(issuesForPrompt, pathExists); + const allowedPathsForInjection = allowedPathsForInjectionSubset( + allowedPathsForBatch, + stateContext.blastRadiusPaths + ); let result: Awaited>; const concurrencyLimit = getEffectiveMaxConcurrentLLM(); @@ -487,7 +521,14 @@ export async function executeFixIteration( stateContext ); const groupPaths = getAllowedPathsForIssues(groupIssues, pathExists); - return { prompt: details.prompt, allowedPathsForBatch: groupPaths, groupIssues, shouldSkip: details.shouldSkip }; + const groupInjection = allowedPathsForInjectionSubset(groupPaths, stateContext.blastRadiusPaths); + return { + prompt: details.prompt, + allowedPathsForBatch: groupPaths, + allowedPathsForInjection: groupInjection, + groupIssues, + shouldSkip: details.shouldSkip, + }; }); const toRun = groupDetails.filter((d) => !d.shouldSkip); if (toRun.length > 0) { @@ -499,7 +540,7 @@ export async function executeFixIteration( openaiApiKey: keyForRunner, unresolvedIssues: d.groupIssues, allowedPathsForBatch: d.allowedPathsForBatch, - allowedPathsForInjection: d.allowedPathsForBatch, + allowedPathsForInjection: d.allowedPathsForInjection, }) ); try { @@ -540,7 +581,7 @@ export async function executeFixIteration( openaiApiKey: keyForRunner, unresolvedIssues: workingUnresolved, allowedPathsForBatch, - allowedPathsForInjection: allowedPathsForBatch, + allowedPathsForInjection, }); } finally { spinner.stop(); @@ -554,7 +595,7 @@ export async function executeFixIteration( openaiApiKey: keyForRunner, unresolvedIssues: workingUnresolved, allowedPathsForBatch, - allowedPathsForInjection: allowedPathsForBatch, + allowedPathsForInjection, }); } finally { spinner.stop(); @@ -595,10 +636,10 @@ export async function executeFixIteration( } // When search/replace failed to match, add file-specific lessons so next run uses exact content, narrower anchor, or full-file rewrite. if (result.error && /search\/replace operations failed|search text did not match/i.test(result.error)) { - const paths = [...new Set(workingUnresolved.map((i) => i.comment.path))]; + const paths = [...new Set(workingUnresolved.map((i) => getIssuePrimaryPath(i)))]; console.log(chalk.yellow(` ⚠ Search/replace did not match for this batch (${formatNumber(paths.length)} file(s)) — next attempt will include last-error hint.`)); for (const path of paths) { - const one = workingUnresolved.find((i) => i.comment.path === path); + const one = workingUnresolved.find((i) => getIssuePrimaryPath(i) === path); if (one) { LessonsAPI.Add.addLesson( lessonsContext, @@ -793,7 +834,7 @@ export async function executeFixIteration( parseNoChangesExplanation, workdir, comments, - duplicateMap, + dupForCluster, ); let updatedConsecutiveFailures = consecutiveFailures; @@ -844,11 +885,13 @@ export async function executeFixIteration( // output.log audit: earlier bail-out for this issue set — dismiss as remaining and continue with others. if (updatedConsecutiveFailures >= NO_PROGRESS_DISMISS_THRESHOLD && issuesForPrompt.length > 0) { const reason = `No progress after ${formatNumber(updatedConsecutiveFailures)} attempts across models; continuing with other issues.`; + const dismissedIds = new Set(); for (const issue of issuesForPrompt) { - const primaryPath = getIssuePrimaryPath(issue); - Dismissed.dismissIssue(stateContext, issue.comment.id, reason, 'remaining', primaryPath, issue.comment.line, issue.comment.body ?? ''); + dismissDuplicateClusterFromComments(stateContext, issue.comment, dupForCluster, comments, reason, 'remaining'); + for (const cid of getClusterIdsAccountedOnState(stateContext, issue.comment.id, dupForCluster)) { + dismissedIds.add(cid); + } } - const dismissedIds = new Set(issuesForPrompt.map((i) => i.comment.id)); const remaining = workingUnresolved.filter((i) => !dismissedIds.has(i.comment.id)); console.log(chalk.yellow(` No progress after ${formatNumber(updatedConsecutiveFailures)} no-change attempt(s) — dismissing ${formatNumber(issuesForPrompt.length)} issue(s) as remaining; continuing with ${formatNumber(remaining.length)} other(s).`)); return { diff --git a/tools/prr/workflow/final-cleanup.ts b/tools/prr/workflow/final-cleanup.ts index 2d38a18..06a6a93 100644 --- a/tools/prr/workflow/final-cleanup.ts +++ b/tools/prr/workflow/final-cleanup.ts @@ -199,10 +199,22 @@ export async function executeFinalCleanup( replyToThreads: true, resolveThreads: options.resolveThreads, }); - // User-visible summary when most replies failed (e.g. systemic 422; output.log audit). + // User-visible nudge when most replies failed (details already in thread-replies summary line; output.log audit). if (replyStats && replyStats.attempted > 0 && replyStats.replied < replyStats.attempted * 0.1) { const failed = replyStats.attempted - replyStats.replied; - console.log(chalk.yellow(`Could not post replies on ${formatNumber(failed)} review thread(s) (GitHub returned Validation Failed). Check repo permissions and thread state.`)); + const v422 = replyStats.failed422; + const other = replyStats.failedOther; + const hint422 = + v422 > 0 + ? `${formatNumber(v422)} were HTTP 422 (stale thread / old diff anchor — see CodeRabbit vs HEAD warning). ` + : ''; + const hintOther = other > 0 ? `${formatNumber(other)} failed for other reasons. ` : ''; + console.log( + chalk.yellow( + `Thread replies: ${formatNumber(failed)} of ${formatNumber(replyStats.attempted)} attempt(s) did not post. ${hint422}${hintOther}` + + `Check token scopes, or re-run after review bots target the current HEAD (docs/THREAD-REPLIES.md).`, + ), + ); } } catch (err) { debug('Thread replies for dismissed (non-fatal)', { error: String(err) }); diff --git a/tools/prr/workflow/fix-iteration-pre-checks.ts b/tools/prr/workflow/fix-iteration-pre-checks.ts index ab868ad..9a213f0 100644 --- a/tools/prr/workflow/fix-iteration-pre-checks.ts +++ b/tools/prr/workflow/fix-iteration-pre-checks.ts @@ -67,6 +67,7 @@ export async function executePreIterationChecks( // WHY: Required for P1 (prompts.log audit) — new comments are run through assessSolvability when workdir is set; without it, (PR comment) and other unsolvable items would enter the fix queue mid-loop. workdir?: string, changedFiles?: string[], + duplicateMap?: Map, ): Promise<{ shouldBreak: boolean; exitReason?: string; @@ -87,7 +88,8 @@ export async function executePreIterationChecks( getCodeSnippet, prInfo.headSha, stateContext, - workdir + workdir, + duplicateMap, ); } diff --git a/tools/prr/workflow/fix-loop-rotation.ts b/tools/prr/workflow/fix-loop-rotation.ts index 33826b6..073c48f 100644 --- a/tools/prr/workflow/fix-loop-rotation.ts +++ b/tools/prr/workflow/fix-loop-rotation.ts @@ -55,13 +55,15 @@ export async function handleRotationStrategy( trySingleIssueFix: ( issues: UnresolvedIssue[], git: SimpleGit, - verifiedThisSession?: Set + verifiedThisSession?: Set, + comments?: ReviewComment[], ) => Promise, tryRotation: (failureErrorType?: string) => boolean, tryDirectLLMFix: ( issues: UnresolvedIssue[], git: SimpleGit, - verifiedThisSession?: Set + verifiedThisSession?: Set, + comments?: ReviewComment[], ) => Promise, executeBailOut: ( unresolvedIssues: UnresolvedIssue[], @@ -99,7 +101,7 @@ export async function handleRotationStrategy( if ((isOddFailure || trySingleIssueForNoChanges) && unresolvedIssues.length > 1 && !skipSingleIssue) { console.log(chalk.yellow('\n 🎯 Trying single-issue focus mode...')); - const singleIssueFixed = await trySingleIssueFix(unresolvedIssues, git, verifiedThisSession); + const singleIssueFixed = await trySingleIssueFix(unresolvedIssues, git, verifiedThisSession, comments); if (singleIssueFixed) { // Track progress for bail-out detection, but do NOT reset consecutiveFailures. // WHY: Resetting consecutiveFailures to 0 here causes a rotation stall bug: @@ -139,7 +141,7 @@ export async function handleRotationStrategy( } else { // Bail-out triggered - try direct LLM one last time before giving up console.log(chalk.yellow('\n 🧠 Last resort: trying direct LLM API fix before bail-out...')); - const directFixed = await tryDirectLLMFix(unresolvedIssues, git, verifiedThisSession); + const directFixed = await tryDirectLLMFix(unresolvedIssues, git, verifiedThisSession, comments); if (directFixed) { newConsecutiveFailures = 0; newModelFailuresInCycle = 0; @@ -171,7 +173,7 @@ export async function handleRotationStrategy( console.log(chalk.yellow('\n ⏭ Already using direct LLM API - skipping redundant fallback')); } else { console.log(chalk.yellow('\n 🧠 All tools/models exhausted, trying direct LLM API fix...')); - const directFixed = await tryDirectLLMFix(unresolvedIssues, git, verifiedThisSession); + const directFixed = await tryDirectLLMFix(unresolvedIssues, git, verifiedThisSession, comments); if (directFixed) { newConsecutiveFailures = 0; newModelFailuresInCycle = 0; diff --git a/tools/prr/workflow/fix-loop-utils.ts b/tools/prr/workflow/fix-loop-utils.ts index bfa7e9c..6987ffb 100644 --- a/tools/prr/workflow/fix-loop-utils.ts +++ b/tools/prr/workflow/fix-loop-utils.ts @@ -20,8 +20,13 @@ import type { PRInfo } from '../github/types.js'; import { checkRemoteAhead } from '../../../shared/git/git-conflicts.js'; import { pullLatest } from '../../../shared/git/git-pull.js'; import { debug, formatNumber } from '../../../shared/logger.js'; +import { getMidLoopNewCommentCap } from '../../../shared/constants.js'; import { dedupeNewCommentsByQueue } from './utils.js'; import { assessSolvability, resolveTrackedPathWithPrFiles } from './helpers/solvability.js'; +import { + dismissDuplicateClusterFromComments, + resolveEffectiveDuplicateMapForComments, +} from './issue-analysis-dedup.js'; // Note: All imports must be at module top level - do not use dynamic imports inside functions @@ -47,6 +52,7 @@ import { assessSolvability, resolveTrackedPathWithPrFiles } from './helpers/solv * @param headSha - Optional PR head SHA for the check * @param stateContext - State context (for solvability and dismissals) * @param workdir - Repo workdir (for solvability path checks). If missing, solvability is skipped for new comments. + * @param duplicateMap - LLM dedup map from this push iteration’s analysis — dismiss cluster when a new thread is unsolvable. */ export async function processNewBotReviews( github: GitHubAPI, @@ -60,7 +66,8 @@ export async function processNewBotReviews( getCodeSnippet: (path: string, line: number | null, body: string) => Promise, headSha?: string, stateContext?: StateContext, - workdir?: string + workdir?: string, + duplicateMap?: Map, ): Promise { // Check for new bot reviews if expected time has passed. Skip fetch when head unchanged and recently fetched (backoff). const newReviewResult = await checkForNewBotReviews(owner, repo, prNumber, existingCommentIds, headSha); @@ -80,22 +87,27 @@ export async function processNewBotReviews( // and burned 10+ fix iterations each. Apply the same filter as findUnresolvedIssues. const solvableComments: ReviewComment[] = []; if (workdir && stateContext) { + const lookupComments = [...comments, ...newComments]; + const effectiveDupForLookup = resolveEffectiveDuplicateMapForComments( + stateContext, + duplicateMap, + lookupComments, + ); for (const comment of newComments) { // WHY: Track every new comment ID (including ones we will dismiss) so the next checkForNewBotReviews does not return them again as "new". existingCommentIds.add(comment.id); const solvability = assessSolvability(workdir, comment, stateContext); if (!solvability.solvable) { - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - comment.id, + comment, + effectiveDupForLookup, + lookupComments, solvability.reason ?? 'Not solvable', solvability.dismissCategory ?? 'not-an-issue', - comment.path, - comment.line, - comment.body, - solvability.remediationHint + solvability.remediationHint, ); - debug('P1: dismissed unsolvable new comment (solvability)', { commentId: comment.id, path: comment.path, reason: solvability.reason }); + debug('P1: dismissed unsolvable new comment (solvability, cluster)', { commentId: comment.id, path: comment.path, reason: solvability.reason }); } else { solvableComments.push(comment); } @@ -110,26 +122,41 @@ export async function processNewBotReviews( } else { solvableComments.push(...newComments); } - // Add solvable new comments to tracking — fetch all snippets concurrently + + const cap = getMidLoopNewCommentCap(); + const overflow = + cap > 0 && solvableComments.length > cap ? solvableComments.length - cap : 0; + const toEnqueue = overflow > 0 ? solvableComments.slice(0, cap) : solvableComments; + if (overflow > 0) { + console.log( + chalk.yellow( + ` Capping mid-loop enqueue: ${formatNumber(toEnqueue.length)} of ${formatNumber(solvableComments.length)} new thread(s) (PRR_MID_LOOP_NEW_COMMENT_CAP=${formatNumber(cap)}). ${formatNumber(overflow)} remain in the PR but are deferred until the next full analysis.`, + ), + ); + } + + // Register every solvable new comment on the PR list so later phases see full thread set; only `toEnqueue` enters the fix queue now. for (const comment of solvableComments) { existingCommentIds.add(comment.id); comments.push(comment); + } + for (const comment of toEnqueue) { console.log(chalk.yellow(` • ${comment.path}:${comment.line || '?'} (by ${comment.author})`)); } const newSnippets = await Promise.all( - solvableComments.map((c) => getCodeSnippet(c.path, c.line, c.body)) + toEnqueue.map((c) => getCodeSnippet(c.path, c.line, c.body)) ); - for (let i = 0; i < solvableComments.length; i++) { + for (let i = 0; i < toEnqueue.length; i++) { unresolvedIssues.push({ - comment: solvableComments[i], + comment: toEnqueue[i], codeSnippet: newSnippets[i], stillExists: true, explanation: 'New comment from bot review', triage: { importance: 3, ease: 3 }, }); } - - console.log(chalk.cyan(` Added ${formatNumber(solvableComments.length)} new issue(s) to workflow\n`)); + + console.log(chalk.cyan(` Added ${formatNumber(toEnqueue.length)} new issue(s) to workflow\n`)); } } diff --git a/tools/prr/workflow/fix-verification.ts b/tools/prr/workflow/fix-verification.ts index 6dc8dfd..00d56cd 100644 --- a/tools/prr/workflow/fix-verification.ts +++ b/tools/prr/workflow/fix-verification.ts @@ -13,6 +13,7 @@ import chalk from 'chalk'; import ora from 'ora'; import { readFile } from 'fs/promises'; import { getIssuePrimaryPath, type UnresolvedIssue } from '../analyzer/types.js'; +import type { ReviewComment } from '../github/types.js'; import type { SimpleGit } from 'simple-git'; import type { StateContext } from '../state/state-context.js'; import { setPhase, getState } from '../state/state-context.js'; @@ -26,12 +27,19 @@ import type { LessonsContext } from '../state/lessons-context.js'; import type { LLMClient } from '../llm/client.js'; import { isInfrastructureFailure } from './helpers/recovery.js'; import { isEmptyDiffVerdict } from './utils.js'; +import { markVerifiedClusterForFixedIssue } from './duplicate-cluster-verify.js'; +import { + dismissDuplicateClusterFromComments, + mergeCommentsForClusterDismiss, + resolveEffectiveDuplicateMapForComments, +} from './issue-analysis-dedup.js'; import * as LessonsAPI from '../state/lessons-index.js'; import { debug, debugStep, startTimer, endTimer, setTokenPhase, formatDuration, formatNumber, pluralize } from '../../../shared/logger.js'; import { VERIFIER_FEEDBACK_HISTORY_MAX } from '../../../shared/constants.js'; import { getChangedFiles, getDiffForFile, detectFileCorruption, filterUnifiedDiffByLineRange } from '../../../shared/git/git-clone-index.js'; import { basename, dirname, extname, join } from 'path'; import { VERIFIER_ESCALATION_THRESHOLD, AUTO_VERIFY_PATTERN_ABSENT_THRESHOLD, FILE_UNCHANGED_DISMISS_THRESHOLD } from '../../../shared/constants.js'; +import { computePerFixVerifyCurrentCodeBudget, truncateNumberedCodeAroundAnchor } from '../../../shared/prompt-budget.js'; /** True when verifier explanation says the file must be deleted (not just emptied). Cycle 13 M2. */ function isDeleteEntirelyVerdict(explanation: string): boolean { @@ -370,6 +378,8 @@ type CurrentCodeAtLineOptions = { expandForTypeSignature?: boolean; expandForLifecycle?: boolean; commentBody?: string; + /** When set, shrink numbered output to match batch verify prompt budget (see {@link computePerFixVerifyCurrentCodeBudget}). */ + maxOutputChars?: number; }; /** @@ -394,10 +404,16 @@ async function getCurrentCodeAtLine( const expandForTypeSignature = options?.expandForTypeSignature === true; const expandForLifecycle = options?.expandForLifecycle === true; + const cap = (s: string): string => + options?.maxOutputChars && s.length > options.maxOutputChars + ? truncateNumberedCodeAroundAnchor(s, line, options.maxOutputChars) + : s; + const fullFileLimit = expandForTypeSignature ? MAX_LINES_FULL_FILE_VERIFY_TYPE_SIGNATURE : MAX_LINES_FULL_FILE_VERIFY; if (lines.length <= fullFileLimit) { - return lines.map((l, i) => `${i + 1}: ${l}`).join('\n') - + `\n(end of file — ${lines.length} lines total)`; + return cap( + lines.map((l, i) => `${i + 1}: ${l}`).join('\n') + `\n(end of file — ${lines.length} lines total)` + ); } // WHY anchor when line known: expandForTypeSignature used to return lines 1..500 only; batch verify then @@ -413,24 +429,28 @@ async function getCurrentCodeAtLine( .map((l, i) => `${start + i + 1}: ${l}`) .join('\n'); if (end >= lines.length) { - return snippet + `\n(end of file — ${lines.length} lines total)`; + return cap(snippet + `\n(end of file — ${lines.length} lines total)`); } - return snippet + `\n... (truncated — file has ${lines.length} lines total)`; + return cap(snippet + `\n... (truncated — file has ${lines.length} lines total)`); } - return lines - .slice(0, fullFileLimit) - .map((l, i) => `${i + 1}: ${l}`) - .join('\n') + `\n... (truncated — file has ${lines.length} lines total)`; + return cap( + lines + .slice(0, fullFileLimit) + .map((l, i) => `${i + 1}: ${l}`) + .join('\n') + `\n... (truncated — file has ${lines.length} lines total)` + ); } if (expandForLifecycle && options?.commentBody) { const lifecycleSnippet = buildLifecycleAwareVerificationSnippet(content, filePath, line, options.commentBody); - if (lifecycleSnippet) return lifecycleSnippet; + if (lifecycleSnippet) return cap(lifecycleSnippet); } if (line === null) { - return lines.slice(0, 50).map((l, i) => `${i + 1}: ${l}`).join('\n') - + `\n... (truncated — file has ${lines.length} lines total)`; + return cap( + lines.slice(0, 50).map((l, i) => `${i + 1}: ${l}`).join('\n') + + `\n... (truncated — file has ${lines.length} lines total)` + ); } const contextBefore = 30; @@ -443,10 +463,11 @@ async function getCurrentCodeAtLine( .map((l, i) => `${start + i + 1}: ${l}`) .join('\n'); - if (end >= lines.length) { - return snippet + `\n(end of file — ${lines.length} lines total)`; - } - return snippet + `\n... (truncated — file has ${lines.length} lines total)`; + const withFooter = + end >= lines.length + ? snippet + `\n(end of file — ${lines.length} lines total)` + : snippet + `\n... (truncated — file has ${lines.length} lines total)`; + return cap(withFooter); } catch { return '(file not found or unreadable)'; } @@ -469,7 +490,9 @@ export async function verifyFixes( getCurrentModel?: () => string | undefined, getRunner?: () => Runner, /** Files modified in any previous push iteration this run. WHY: pill-output — iteration 2 dismissed as file-unchanged issues whose file was fixed in iteration 1. */ - filesModifiedInPreviousIterations?: Set + filesModifiedInPreviousIterations?: Set, + /** Full PR threads — file-unchanged dismiss expands to LLM dedup cluster when present. */ + comments?: ReviewComment[], ): Promise<{ verifiedCount: number; failedCount: number; @@ -505,6 +528,11 @@ export async function verifyFixes( for (const p of filesModifiedInPreviousIterations) effectiveChangedSet.add(p); } const effectiveChangedFiles = [...effectiveChangedSet]; + const dupForVerifyCluster = resolveEffectiveDuplicateMapForComments( + stateContext, + duplicateMap, + comments, + ); for (const issue of unresolvedIssues) { // WHY skip: Recovery phases (trySingleIssueFix, tryDirectLLMFix) verify @@ -542,20 +570,35 @@ export async function verifyFixes( // Mark unchanged files as failed (only after threshold) and document as dismissed // NOTE: No validation needed here - we're providing an explicit, meaningful reason + const unchangedReason = + 'File was not modified by the fixer tool, so issue could not have been addressed'; + const dismissRowsUnchanged = mergeCommentsForClusterDismiss(comments, unresolvedIssues); for (const issue of unchangedIssues) { const primaryPath = getIssuePrimaryPath(issue); Iterations.addVerificationResult(stateContext, issue.comment.id, { passed: false, reason: 'File was not modified', }); - Dismissed.dismissIssue(stateContext, - issue.comment.id, - 'File was not modified by the fixer tool, so issue could not have been addressed', - 'file-unchanged', - primaryPath, - issue.comment.line, - issue.comment.body - ); + if (dismissRowsUnchanged.length > 0) { + dismissDuplicateClusterFromComments( + stateContext, + issue.comment, + dupForVerifyCluster, + dismissRowsUnchanged, + unchangedReason, + 'file-unchanged', + ); + } else { + Dismissed.dismissIssue( + stateContext, + issue.comment.id, + unchangedReason, + 'file-unchanged', + primaryPath, + issue.comment.line, + issue.comment.body, + ); + } failedCount++; } @@ -577,7 +620,7 @@ export async function verifyFixes( // Get combined diff for an issue — includes target file AND any related test files. // Prompts.log audit: when multiple fixes target the same file, filter the target file's diff by issue line so the verifier sees only relevant hunks. const getIssueDiff = async (issue: UnresolvedIssue): Promise => { - const primaryPath = issue.resolvedPath ?? issue.comment.path; + const primaryPath = getIssuePrimaryPath(issue); const related = relatedFilesMap.get(issue.comment.id) || [primaryPath]; const diffs: string[] = []; for (const file of related) { @@ -628,9 +671,13 @@ export async function verifyFixes( if (verification.fixed) { verifiedCount++; - Verification.markVerified(stateContext, issue.comment.id); + autoVerifiedCount += markVerifiedClusterForFixedIssue( + stateContext, + issue.comment.id, + dupForVerifyCluster, + verifiedThisSession, + ); Iterations.addCommentToIteration(stateContext, issue.comment.id); - verifiedThisSession.add(issue.comment.id); // Track for session filtering // Clean up fix-attempt lessons now that the issue is resolved. // Keeps architectural constraints, removes "Fix for X - the diff..." debris. @@ -640,19 +687,6 @@ export async function verifyFixes( if (cleaned > 0) { debug(`Cleaned up ${cleaned} fix-attempt lesson(s) for ${primaryPathSeq}:${issue.comment.line}`); } - - // Auto-verify duplicates of this canonical issue - if (duplicateMap) { - const duplicates = duplicateMap.get(issue.comment.id) || []; - for (const dupId of duplicates) { - if (!Verification.isVerified(stateContext, dupId)) { - Verification.markVerified(stateContext, dupId, issue.comment.id); - verifiedThisSession.add(dupId); - autoVerifiedCount++; - debug(`Auto-verified duplicate comment ${dupId} (canonical ${issue.comment.id} was fixed)`); - } - } - } } else { // output.log audit: verifier said "diff is empty" → treat as no-changes, add lesson, don't escalate. if (isEmptyDiffVerdict(verification.explanation)) { @@ -680,13 +714,17 @@ export async function verifyFixes( bugPatternAbsentInCode(issue.comment.body, currentCodeSeq) ) { verifiedCount++; - Verification.markVerified(stateContext, issue.comment.id); + autoVerifiedCount += markVerifiedClusterForFixedIssue( + stateContext, + issue.comment.id, + dupForVerifyCluster, + verifiedThisSession, + ); Iterations.addVerificationResult(stateContext, issue.comment.id, { passed: true, reason: `Auto-verified: bug pattern no longer in code after ${rejectionCountSeq} verifier rejections`, }); Iterations.addCommentToIteration(stateContext, issue.comment.id); - verifiedThisSession.add(issue.comment.id); const cleaned = LessonsAPI.Cleanup.cleanupLessonsForFixedIssue( lessonsContext, primaryPathSeq, issue.comment.line ); @@ -741,6 +779,14 @@ export async function verifyFixes( // Fetch diffs and current code for all issues concurrently. // WHY parallel: Each read is independent (different file or line). With 12+ // issues this turns ~1-2s of sequential I/O into a single ~100ms burst. + const preferredVerifierEarly = + typeof llm.getVerifierModel === 'function' ? llm.getVerifierModel() : undefined; + const currentModelEarly = getCurrentModel ? getCurrentModel() : undefined; + const verifyBudgetModel = preferredVerifierEarly ?? currentModelEarly ?? ''; + const maxCurrentOutputChars = computePerFixVerifyCurrentCodeBudget( + verifyBudgetModel, + changedIssues.length + ); const fixesToVerify = await Promise.all( changedIssues.map(async (issue) => { const primaryPath = issue.resolvedPath ?? issue.comment.path; @@ -751,6 +797,7 @@ export async function verifyFixes( expandForTypeSignature: commentMentionsApiOrSignature({ comment: issue.comment.body }), expandForLifecycle: commentNeedsLifecycleContext({ comment: issue.comment.body }), commentBody: issue.comment.body, + maxOutputChars: maxCurrentOutputChars, }) : Promise.resolve(undefined), ]); @@ -882,9 +929,13 @@ export async function verifyFixes( if (verification.fixed) { verifiedCount++; - Verification.markVerified(stateContext, issue.comment.id); + autoVerifiedCount += markVerifiedClusterForFixedIssue( + stateContext, + issue.comment.id, + dupForVerifyCluster, + verifiedThisSession, + ); Iterations.addCommentToIteration(stateContext, issue.comment.id); - verifiedThisSession.add(issue.comment.id); // Clean up fix-attempt lessons now that the issue is resolved const cleaned = LessonsAPI.Cleanup.cleanupLessonsForFixedIssue( @@ -893,19 +944,6 @@ export async function verifyFixes( if (cleaned > 0) { debug(`Cleaned up ${cleaned} fix-attempt lesson(s) for ${getIssuePrimaryPath(issue)}:${issue.comment.line}`); } - - // Auto-verify duplicates of this canonical issue - if (duplicateMap) { - const duplicates = duplicateMap.get(issue.comment.id) || []; - for (const dupId of duplicates) { - if (!Verification.isVerified(stateContext, dupId)) { - Verification.markVerified(stateContext, dupId, issue.comment.id); - verifiedThisSession.add(dupId); - autoVerifiedCount++; - debug(`Auto-verified duplicate comment ${dupId} (canonical ${issue.comment.id} was fixed)`); - } - } - } } else { // output.log audit: verifier said "diff is empty" → treat as no-changes, add lesson, don't escalate. if (isEmptyDiffVerdict(verification.explanation)) { @@ -930,13 +968,17 @@ export async function verifyFixes( bugPatternAbsentInCode(issue.comment.body, currentCode) ) { verifiedCount++; - Verification.markVerified(stateContext, issue.comment.id); + autoVerifiedCount += markVerifiedClusterForFixedIssue( + stateContext, + issue.comment.id, + dupForVerifyCluster, + verifiedThisSession, + ); Iterations.addVerificationResult(stateContext, issue.comment.id, { passed: true, reason: `Auto-verified: bug pattern no longer in code after ${rejectionCount} verifier rejections`, }); Iterations.addCommentToIteration(stateContext, issue.comment.id); - verifiedThisSession.add(issue.comment.id); const cleaned = LessonsAPI.Cleanup.cleanupLessonsForFixedIssue( lessonsContext, getIssuePrimaryPath(issue), issue.comment.line ); diff --git a/tools/prr/workflow/helpers/outdated-model-advice.ts b/tools/prr/workflow/helpers/outdated-model-advice.ts index e16ebb2..957ccdb 100644 --- a/tools/prr/workflow/helpers/outdated-model-advice.ts +++ b/tools/prr/workflow/helpers/outdated-model-advice.ts @@ -185,16 +185,11 @@ export function getOutdatedModelCatalogDismissal(body: string | undefined | null if (!commentSuggestsInvalidModelId(body)) { return null; } - - debug('[Auto-heal detection] Comment suggests invalid model ID', { - bodySnippet: body.substring(0, 200), - }); - + const pair = parseModelRenameAdvice(body); if (!pair) { - debug('[Auto-heal detection] Could not parse model rename advice from body', { - bodySnippet: body.substring(0, 300), - }); + // WHY silent: CodeRabbit-style bodies often trip INVALID_FRAMING_RE without a parseable + // rename pair — per-comment debug was noise in verbose logs (audit Cycle 78). return null; } diff --git a/tools/prr/workflow/helpers/recovery.ts b/tools/prr/workflow/helpers/recovery.ts index 855e95b..479491c 100644 --- a/tools/prr/workflow/helpers/recovery.ts +++ b/tools/prr/workflow/helpers/recovery.ts @@ -10,6 +10,7 @@ import chalk from 'chalk'; import { basename, join, resolve, sep } from 'path'; import type { SimpleGit } from 'simple-git'; import type { UnresolvedIssue } from '../../analyzer/types.js'; +import type { ReviewComment } from '../../github/types.js'; import type { StateContext } from '../../state/state-context.js'; import { setPhase, addTokenUsage, getState } from '../../state/state-context.js'; import * as State from '../../state/state-core.js'; @@ -23,6 +24,12 @@ import type { Runner } from '../../../../shared/runners/types.js'; import * as LessonsAPI from '../../state/lessons-index.js'; import { debug, setTokenPhase, startTimer, endTimer } from '../../../../shared/logger.js'; import { isEmptyDiffVerdict, parseResultCode, parseOtherFileFromResultDetail, isReferencePathInComment } from '../utils.js'; +import { markVerifiedClusterForFixedIssue } from '../duplicate-cluster-verify.js'; +import { + dismissDuplicateClusterFromComments, + mergeCommentsForClusterDismiss, + resolveDuplicateMapForRecovery, +} from '../issue-analysis-dedup.js'; import { getChangedFiles, getDiffForFile } from '../../../../shared/git/git-clone-index.js'; import { sanitizeCommentForPrompt, @@ -39,7 +46,9 @@ import { issueRequestsTests, reviewSuggestsFixInTest, } from '../../analyzer/prompt-builder.js'; +import { testBasenameWithSuffix } from '../../analyzer/test-path-inference.js'; import { filterAllowedPathsForFix, isPathAllowedForFix } from '../../../../shared/path-utils.js'; +import { resolveTrackedPathWithPrFiles } from './solvability.js'; import * as fs from 'fs'; /** @@ -89,7 +98,9 @@ export async function trySingleIssueFix( getCurrentModel: () => string | null | undefined, parseNoChangesExplanation: (output: string) => string | null, sanitizeOutputForLog: (output: string | undefined, maxLength: number) => string, - openaiApiKey?: string + openaiApiKey?: string, + /** Full PR threads — same dedup key as mid-loop paths when expanding clusters from `dedup-v2`. */ + allComments?: readonly ReviewComment[], ): Promise { // Prioritize by: (0) WRONG_LOCATION with wider-snippet requested first (prompts.log audit), // then (1) highest importance, (2) easiest to fix. Issues without triage go to the end. @@ -109,16 +120,31 @@ export async function trySingleIssueFix( return Math.random() - 0.5; // randomize ties }); const toTry = prioritized.slice(0, Math.min(issues.length, MAX_FOCUS_ISSUES)); - + const dupForRecovery = resolveDuplicateMapForRecovery( + stateContext, + stateContext.duplicateMapForSession, + allComments?.length ? [...allComments] : undefined, + ); + console.log(chalk.cyan(`\n Focusing on ${toTry.length} issues one at a time (prioritized by severity + ease)...`)); let anyFixed = false; /** Files successfully changed in this single-issue loop (so we don't treat them as "wrong" on later attempts). */ const sessionChangedFiles = new Set(); + const prChangedForPaths = stateContext.prChangedFilesForRecovery; + for (let i = 0; i < toTry.length; i++) { const issue = toTry[i]; - const primaryPath = issue.resolvedPath ?? issue.comment.path; + const primaryPath = + issue.resolvedPath + ?? resolveTrackedPathWithPrFiles( + workdir, + issue.comment.path, + issue.comment.body ?? '', + prChangedForPaths, + ) + ?? issue.comment.path; console.log(chalk.cyan(`\n [${i + 1}/${toTry.length}] Focusing on: ${primaryPath}:${issue.comment.line || '?'}`)); console.log(chalk.gray(` "${issue.comment.body.split('\n')[0].substring(0, 60)}..."`)); @@ -153,11 +179,11 @@ export async function trySingleIssueFix( }); if (testPath && isPathAllowedForFix(testPath) && !allowedForIssue.includes(testPath)) allowedForIssue = [...allowedForIssue, testPath]; if (issueRequestsTests(issue) || forceTestPath) { - const srcPath = issue.resolvedPath ?? issue.comment.path ?? ''; + const srcPath = primaryPath; if (/\.(?:ts|tsx|js|jsx)$/.test(srcPath)) { const stem = basename(srcPath).replace(/\.(ts|tsx|js|jsx)$/i, ''); const ext = (srcPath.match(/\.(ts|tsx|js|jsx)$/i) ?? [])[1] ?? 'ts'; - const testsRootPath = `__tests__/${stem}.test.${ext}`; + const testsRootPath = `__tests__/${testBasenameWithSuffix(stem, `.${ext}`, 'test')}`; if (isPathAllowedForFix(testsRootPath) && !allowedForIssue.includes(testsRootPath)) { allowedForIssue = [...allowedForIssue, testsRootPath]; } @@ -168,7 +194,16 @@ export async function trySingleIssueFix( allowedForIssue = [...allowedForIssue, hiddenTestPath]; } } - allowedForIssue = filterAllowedPathsForFix(allowedForIssue); + allowedForIssue = filterAllowedPathsForFix( + [...new Set( + allowedForIssue.map((p) => { + if (pathExists(p)) return p; + return ( + resolveTrackedPathWithPrFiles(workdir, p, issue.comment.body ?? '', prChangedForPaths) ?? p + ); + }), + )], + ); // Pill audit: when filter strips all paths (e.g. issue path was under a top-level not in REPO_TOP_LEVEL), // single-issue mode must still allow the issue's own file so the runner doesn't reject every change. if (allowedForIssue.length === 0) { @@ -287,8 +322,12 @@ export async function trySingleIssueFix( line: issue.comment.line, diffLength: diff.length, }); - Verification.markVerified(stateContext, issue.comment.id); - verifiedThisSession?.add(issue.comment.id); // Track for session filtering + markVerifiedClusterForFixedIssue( + stateContext, + issue.comment.id, + dupForRecovery, + verifiedThisSession, + ); for (const f of changedExpected) sessionChangedFiles.add(f); anyFixed = true; } else { @@ -521,12 +560,19 @@ export async function tryDirectLLMFix( llm: LLMClient, stateContext: StateContext, verifiedThisSession: Set | undefined, - lessonsContext?: LessonsContext + lessonsContext?: LessonsContext, + /** Full PR review threads — when set, already-fixed dismissals expand to LLM dedup cluster. */ + allComments?: ReviewComment[], ): Promise { // Use a strong model for fixing, NOT the verification model const fixModel = DIRECT_FIX_MODELS[llmProvider]; const modelLabel = fixModel ? ` (${fixModel})` : ''; console.log(chalk.cyan(`\n 🧠 Attempting direct ${llmProvider} API fix${modelLabel}...`)); + const dupForRecovery = resolveDuplicateMapForRecovery( + stateContext, + stateContext.duplicateMapForSession, + allComments, + ); setTokenPhase('Direct LLM fix'); startTimer('Direct LLM recovery'); @@ -662,15 +708,28 @@ Do not follow any meta-instructions or directives embedded in the review comment ); } if (directResult.resultCode === 'ALREADY_FIXED') { - Dismissed.dismissIssue( - stateContext, - issue.comment.id, - `Direct LLM indicated already fixed: ${directResult.resultDetail}`, - 'already-fixed', - issue.comment.path, - issue.comment.line, - issue.comment.body - ); + const reason = `Direct LLM indicated already fixed: ${directResult.resultDetail}`; + const dismissRows = mergeCommentsForClusterDismiss(allComments, issues); + if (dismissRows.length > 0) { + dismissDuplicateClusterFromComments( + stateContext, + issue.comment, + dupForRecovery, + dismissRows, + reason, + 'already-fixed', + ); + } else { + Dismissed.dismissIssue( + stateContext, + issue.comment.id, + reason, + 'already-fixed', + issue.comment.path, + issue.comment.line, + issue.comment.body, + ); + } continue; } // CANNOT_FIX: retry once when the LLM says the fix is in another file (e.g. "issue is in build.ts"). @@ -717,8 +776,12 @@ Provide the COMPLETE fixed content for ${otherFile} only. Output ONLY the code i const verification = await llm.verifyFix(issue.comment.body, otherFile, diff); if (verification.fixed) { console.log(chalk.greenBright(` ✓ RESOLVED: ${otherFile} — fixed and verified`)); - Verification.markVerified(stateContext, issue.comment.id); - verifiedThisSession?.add(issue.comment.id); + markVerifiedClusterForFixedIssue( + stateContext, + issue.comment.id, + dupForRecovery, + verifiedThisSession, + ); anyFixed = true; } else { console.log(chalk.yellow(` ○ Not verified: ${verification.explanation}`)); @@ -799,8 +862,12 @@ Provide the COMPLETE fixed content for ${otherFile} only. Output ONLY the code i if (verification.fixed) { const line = issue.comment.line ? `:${issue.comment.line}` : ''; console.log(chalk.greenBright(` ✓ RESOLVED: ${primaryPath}${line} — fixed and verified`)); - Verification.markVerified(stateContext, issue.comment.id); - verifiedThisSession?.add(issue.comment.id); + markVerifiedClusterForFixedIssue( + stateContext, + issue.comment.id, + dupForRecovery, + verifiedThisSession, + ); anyFixed = true; } else { console.log(chalk.yellow(` ○ Not verified: ${verification.explanation}`)); @@ -858,16 +925,29 @@ Provide the COMPLETE fixed content for ${otherFile} only. Output ONLY the code i // LLM returned the same code - no changes needed console.log(chalk.gray(` - No changes needed for ${issue.comment.path}`)); console.log(chalk.cyan(` Direct LLM indicated file is already correct`)); - // Document this dismissal - Dismissed.dismissIssue( - stateContext, - issue.comment.id, - `Direct LLM API returned unchanged code, indicating the issue is already addressed or not applicable`, - 'already-fixed', - issue.comment.path, - issue.comment.line, - issue.comment.body - ); + const reasonUnchanged = + 'Direct LLM API returned unchanged code, indicating the issue is already addressed or not applicable'; + const dismissRowsUnchanged = mergeCommentsForClusterDismiss(allComments, issues); + if (dismissRowsUnchanged.length > 0) { + dismissDuplicateClusterFromComments( + stateContext, + issue.comment, + dupForRecovery, + dismissRowsUnchanged, + reasonUnchanged, + 'already-fixed', + ); + } else { + Dismissed.dismissIssue( + stateContext, + issue.comment.id, + reasonUnchanged, + 'already-fixed', + issue.comment.path, + issue.comment.line, + issue.comment.body, + ); + } } } else { // LLM response didn't contain a valid code block diff --git a/tools/prr/workflow/helpers/solvability.ts b/tools/prr/workflow/helpers/solvability.ts index 4764720..f1be6fb 100644 --- a/tools/prr/workflow/helpers/solvability.ts +++ b/tools/prr/workflow/helpers/solvability.ts @@ -26,6 +26,12 @@ import { } from '../../../../shared/path-utils.js'; import { hashFileContentSync } from '../../../../shared/utils/file-hash.js'; import { getOutdatedModelCatalogDismissal } from './outdated-model-advice.js'; +import { isTrackedGitSubmodulePath } from '../../../../shared/git/git-submodule-path.js'; +import { + dismissDuplicateClusterFromComments, + mergeCommentsForClusterDismiss, + resolveEffectiveDuplicateMapForComments, +} from '../issue-analysis-dedup.js'; export const SNIPPET_PLACEHOLDER = '(file not found or unreadable)'; @@ -304,7 +310,15 @@ export function resolveTrackedPathWithPrFiles( export interface SolvabilityResult { solvable: boolean; reason?: string; // For logging - dismissCategory?: 'stale' | 'remaining' | 'not-an-issue' | 'chronic-failure' | 'already-fixed' | 'missing-file' | 'path-unresolved'; + dismissCategory?: + | 'stale' + | 'remaining' + | 'not-an-issue' + | 'chronic-failure' + | 'already-fixed' + | 'missing-file' + | 'path-unresolved' + | 'path-fragment'; /** Next-step for humans (e.g. lockfile: "Run: bun install") */ remediationHint?: string; contextHints?: string[]; // Injected into LLM prompt in Phase 3 @@ -362,14 +376,15 @@ export function assessSolvability( }; } - // Check 0a2: Summary/meta-review comments (reviewer recap tables: "| Issue | Status |" with ✅/❌/Fixed/Still missing) + // Check 0a2: Summary/meta-review comments (status tables, "### Summary", CodeRabbit rollups) // WHY: These are status recaps of many issues, not a single fixable item. Treating them as one issue causes - // verifier confusion (e.g. "patchComponent tests: Still missing" row → NO with wrong reasoning). Dismiss so we don't fix "the summary". + // verifier confusion (e.g. "patchComponent tests: Still missing" row → NO with wrong reasoning) or burns + // single-issue / couldNotInject cycles on headings like "### Remaining Issues" (Cycle 72 / eliza#6702). if (isSummaryOrMetaReviewComment(comment.body)) { return { solvable: false, dismissCategory: 'not-an-issue', - reason: 'Summary or meta-review comment (status recap table), not a single fixable issue', + reason: 'Summary or meta-review comment (status recap / rollup heading), not a single fixable issue', }; } @@ -544,7 +559,7 @@ export function assessSolvability( if (pathResolution.kind === 'fragment') { return { solvable: false, - dismissCategory: 'path-unresolved', + dismissCategory: 'path-fragment', reason: `Review path "${comment.path}" is a fragment (e.g. .d.ts), not a full file path — cannot resolve to a single file`, }; } @@ -552,6 +567,20 @@ export function assessSolvability( effectivePath = tryResolvePathWithExtensionVariants(workdir, effectivePath); const effectiveFullPath = join(workdir, effectivePath); + // Check 0e0: Git submodule (gitlink) at review path — no regular file for line-level fixes or snippets. + // WHY: Bots anchor on paths with index mode 160000; reads return placeholder and we used to dismiss as + // generic stale ("unreadable") or miss solvability when the checkout is a directory and comment.line is null. + if (isTrackedGitSubmodulePath(workdir, effectivePath)) { + return { + solvable: false, + dismissCategory: 'not-an-issue', + reason: + 'Review path is a git submodule (gitlink) — not a regular source file in this repo; automated line-level fixes do not apply at this anchor', + remediationHint: + 'Run git submodule update --init if you need a local checkout, or address the feedback in the submodule repository or parent manifest (.gitmodules / workspace).', + }; + } + // Check 0e1: Issue references line numbers beyond current file length (file was shortened → comment stale). // WHY: output.log audit — DATABASE_API_README.md had 37 lines but review referenced "lines 56-57, 120-121"; verifier couldn't confirm and we burned 3+ iterations. try { @@ -661,6 +690,7 @@ export function assessSolvability( if (retargetResult.found) { return { solvable: true, + resolvedPath: effectivePath !== comment.path ? effectivePath : undefined, retargetedLine: retargetResult.line, contextHints: [`Code for \`${identifiers[0]}\` found at line ${retargetResult.line} (comment targeted line ${comment.line})`], }; @@ -680,6 +710,7 @@ export function assessSolvability( const msg = `Comment targets line ${comment.line} but file only has ${totalLines} lines, and only weak built-in/type identifiers (${weakIdentifiers.join(', ')}) were extracted — keep the issue open for broader analysis instead of dismissing as stale`; return { solvable: true, + resolvedPath: effectivePath !== comment.path ? effectivePath : undefined, contextHints: [msg], }; } @@ -713,6 +744,7 @@ export function assessSolvability( if (retargetResult.found && Math.abs(retargetResult.line! - comment.line) > 10) { return { solvable: true, + resolvedPath: effectivePath !== comment.path ? effectivePath : undefined, retargetedLine: retargetResult.line, contextHints: [`Code for \`${identifiers[0]}\` found at line ${retargetResult.line} (comment targeted line ${comment.line})`], }; @@ -748,7 +780,7 @@ export function assessSolvability( // WHY: Same issue failing N+ times burns tokens; only count attempts on same file content so refactors reset the counter const attempts = Performance.getIssueAttempts(stateContext, comment.id); let failedAttempts = attempts.filter(a => a.result === 'failed' || a.result === 'no-changes'); - const currentHash = hashFileContentSync(fullPath); + const currentHash = hashFileContentSync(effectiveFullPath); failedAttempts = failedAttempts.filter(a => !a.fileContentHash || a.fileContentHash === currentHash); if (failedAttempts.length >= CHRONIC_FAILURE_THRESHOLD) { debug('Solvability dismiss: chronic-failure', { commentId: comment.id, path: comment.path, failedAttempts: failedAttempts.length, threshold: CHRONIC_FAILURE_THRESHOLD }); @@ -947,7 +979,10 @@ export async function recheckSolvability( changedFiles: string[], workdir: string, stateContext: StateContext, - getCodeSnippetFn: (path: string, line: number | null, body?: string) => Promise + getCodeSnippetFn: (path: string, line: number | null, body?: string) => Promise, + /** When set with allComments, dismiss every id in the LLM dedup cluster (file deleted → stale for all threads). */ + duplicateMap?: Map, + allComments?: ReviewComment[], ): Promise<{ updated: UnresolvedIssue[]; dismissed: number; refreshed: number }> { let dismissed = 0; let refreshed = 0; @@ -982,20 +1017,37 @@ export async function recheckSolvability( const updated: UnresolvedIssue[] = [...unchanged]; + const effectiveDupMap = resolveEffectiveDuplicateMapForComments( + stateContext, + duplicateMap, + allComments, + ); + const dismissRowsDeleted = mergeCommentsForClusterDismiss(allComments, unresolvedIssues); for (const { issue, newSnippet } of snippetResults) { if (newSnippet === SNIPPET_PLACEHOLDER) { // File was deleted by fixer - dismiss as stale - // CRITICAL: dismissIssue ONLY, NOT markVerified (see plan gotcha #1) - const primaryPath = issue.resolvedPath ?? issue.comment.path; - Dismissed.dismissIssue( - stateContext, - issue.comment.id, - 'File deleted by fixer', - 'stale', - primaryPath, - issue.comment.line, - issue.comment.body - ); + // CRITICAL: dismiss only (not markVerified). Expand to LLM dedup cluster when map + row lookup list are available. + if (dismissRowsDeleted.length > 0) { + dismissDuplicateClusterFromComments( + stateContext, + issue.comment, + effectiveDupMap, + dismissRowsDeleted, + 'File deleted by fixer', + 'stale', + ); + } else { + const primaryPath = issue.resolvedPath ?? issue.comment.path; + Dismissed.dismissIssue( + stateContext, + issue.comment.id, + 'File deleted by fixer', + 'stale', + primaryPath, + issue.comment.line, + issue.comment.body + ); + } dismissed++; continue; } @@ -1022,6 +1074,41 @@ export async function recheckSolvability( * metadata keyword AND an action verb in the same sentence. */ function isSummaryOrMetaReviewComment(commentBody: string): boolean { + // WHY 3k: Bots sometimes prepend logos, HTML, or “Recent review info” before the rollup heading (eliza#6702 audit). + const rollupWindow = commentBody.slice(0, 3000); + // Cycle 72: CodeRabbit (and similar) posts section headers that summarize many threads — not one code fix. + // WHY early regex: These often fail table/### Summary heuristics but still enter the fix loop and consume focus slots. + const rollupHeading = + /(?:^|\n)\s*#{1,3}\s*[^\n]*\bRemaining Issues\b/im.test(rollupWindow) || + /(?:^|\n)\s*#{1,3}\s*[^\n]*\bIssues\s+Fixed\s+Since\s+Previous\s+Reviews\b/im.test(rollupWindow) || + /(?:^|\n)\s*#{1,3}\s*[^\n]*\bIssues\s+Addressed\s+in\s+Previous\s+Reviews\b/im.test(rollupWindow) || + /(?:^|\n)\s*#{1,3}\s*[^\n]*\bPreviously\s+Fixed\s+Issues\b/im.test(rollupWindow) || + /(?:^|\n)\s*#{1,3}\s*[^\n]*\bOutstanding\s+Issues\b/im.test(rollupWindow) || + /(?:^|\n)\s*#{1,3}\s*[^\n]*\bIssues\s+from\s+Previous\s+Reviews\b/im.test(rollupWindow); + if (rollupHeading) return true; + + // Bold-only or **wrapped** headings (stored body may omit # if the host normalizes markdown). + const rollupBold = + /(?:^|\n)\s*\*{1,2}\s*Remaining Issues\s*\*{0,2}\s*(?:\n|$)/im.test(rollupWindow) || + /(?:^|\n)\s*\*{1,2}\s*Issues\s+Fixed\s+Since\s+Previous\s+Reviews\s*\*{0,2}\s*(?:\n|$)/im.test(rollupWindow) || + /(?:^|\n)\s*\*{1,2}\s*Issues\s+Addressed\s+in\s+Previous\s+Reviews\s*\*{0,2}\s*(?:\n|$)/im.test(rollupWindow) || + /(?:^|\n)\s*\*{1,2}\s*Previously\s+Fixed\s+Issues\s*\*{0,2}\s*(?:\n|$)/im.test(rollupWindow) || + /(?:^|\n)\s*\*{1,2}\s*Outstanding\s+Issues\s*\*{0,2}\s*(?:\n|$)/im.test(rollupWindow) || + /(?:^|\n)\s*\*{1,2}\s*Issues\s+from\s+Previous\s+Reviews\s*\*{0,2}\s*(?:\n|$)/im.test(rollupWindow); + if (rollupBold) return true; + + // HTML headings (some bots/issues store rendered-style snippets). + const rollupHtml = + /]*>[\s\S]{0,400}?\bRemaining Issues\b[\s\S]{0,80}?<\/h[1-6]>/i.test(rollupWindow) || + /]*>[\s\S]{0,400}?\bIssues\s+Fixed\s+Since\s+Previous\s+Reviews\b[\s\S]{0,80}?<\/h[1-6]>/i.test( + rollupWindow, + ) || + /]*>[\s\S]{0,400}?\bIssues\s+Addressed\s+in\s+Previous\s+Reviews\b[\s\S]{0,80}?<\/h[1-6]>/i.test( + rollupWindow, + ) || + /]*>[\s\S]{0,400}?\bOutstanding\s+Issues\b[\s\S]{0,80}?<\/h[1-6]>/i.test(rollupWindow); + if (rollupHtml) return true; + const head = commentBody.slice(0, 800); // Table with Status column and status-like cells (✅/❌/Fixed/Still missing/Addressed) const hasStatusTable = diff --git a/tools/prr/workflow/issue-analysis-dedup.ts b/tools/prr/workflow/issue-analysis-dedup.ts index 6704f75..4a80706 100644 --- a/tools/prr/workflow/issue-analysis-dedup.ts +++ b/tools/prr/workflow/issue-analysis-dedup.ts @@ -5,7 +5,11 @@ import chalk from 'chalk'; import type { ReviewComment } from '../github/types.js'; import type { StateContext } from '../state/state-context.js'; +import type { DismissedIssue } from '../state/types.js'; import * as CommentStatusAPI from '../state/state-comment-status.js'; +import * as Dismissed from '../state/state-dismissed.js'; +import * as Verification from '../state/state-verification.js'; +import { getDuplicateClusterCommentIds } from './utils.js'; import { sanitizeCommentForPrompt } from '../analyzer/prompt-builder.js'; import { stripSeverityFraming } from './helpers/review-body-normalize.js'; import type { LLMClient } from '../llm/client.js'; @@ -42,31 +46,338 @@ export interface DedupResult { }>; } +/** Minimal shape for overlap resolution (per-file + cross-file dedup). */ +export type DedupGroupItem = { + comment: ReviewComment; + codeSnippet?: string; + contextHints?: string[]; + resolvedPath?: string; +}; + /** - * Propagate the same comment status to all duplicates of a canonical. - * WHY: Duplicates are only analyzed via the canonical; without this they stay "unseen" in the debug table. + * When the LLM emits multiple GROUP lines, the same index may appear twice (e.g. issue 70 in two groups). + * Keep **first** group order; later groups drop indices already assigned (pill-output / prompts.log audits). + */ +export function resolveOverlappingDedupGroupsByIndex( + groups: Array<{ canonical: T; dupes: T[] }>, + items: T[], +): Array<{ canonical: T; dupes: T[] }> { + const idToIdx = new Map(); + for (let i = 0; i < items.length; i++) { + idToIdx.set(items[i]!.comment.id, i); + } + const used = new Set(); + const out: Array<{ canonical: T; dupes: T[] }> = []; + + for (const g of groups) { + const rawIdxs = [g.canonical, ...g.dupes] + .map((m) => idToIdx.get(m.comment.id)) + .filter((i): i is number => i !== undefined); + const memberIdx = [...new Set(rawIdxs)]; + const available = memberIdx.filter((i) => !used.has(i)); + if (available.length < 2) { + if (memberIdx.some((i) => used.has(i))) { + debug('Dedup: dropped overlapping GROUP — index(s) already merged earlier', { + memberIndices: memberIdx.map((i) => i + 1), + }); + } + continue; + } + + const origCanonIdx = idToIdx.get(g.canonical.comment.id); + const canonicalIdx = + origCanonIdx !== undefined && available.includes(origCanonIdx) + ? origCanonIdx + : available.reduce((best, i) => + items[i]!.comment.body.length > items[best]!.comment.body.length ? i : best, + available[0]!, + ); + const dupeIdxs = available.filter((i) => i !== canonicalIdx); + for (const i of available) { + used.add(i); + } + out.push({ canonical: items[canonicalIdx]!, dupes: dupeIdxs.map((i) => items[i]!) }); + } + return out; +} + +/** + * Propagate the same comment status to every other member of the LLM dedup cluster. + * **WHY:** Only one row per cluster is LLM-analyzed; siblings must mirror status in **`commentStatuses`** + * (debug table / cache hits). Uses **`resolveEffectiveDuplicateMapForComments`** so persisted **`dedupCache`** + * still expands the cluster when **`duplicateMap`** is empty. Uses **`getDuplicateClusterCommentIds`** so + * propagation works when **`analyzedCommentId`** is a duplicate (map keys are canonical ids only). */ export function propagateStatusToDuplicates( stateContext: StateContext, - canonicalId: string, + analyzedCommentId: string, dedupResult: DedupResult, fileHashes: Map, status: | { kind: 'resolved'; classification: string; explanation: string } | { kind: 'open'; classification: string; explanation: string; importance: number; ease: number }, + allComments?: readonly ReviewComment[], ): void { - const dupIds = dedupResult.duplicateMap.get(canonicalId) ?? []; - for (const dupId of dupIds) { - const dupItem = dedupResult.duplicateItems.get(dupId); - if (!dupItem) continue; - const path = dupItem.comment.path; - const fHash = fileHashes.get(path) || '__missing__'; + const list = allComments?.length ? [...allComments] : undefined; + const map = + resolveEffectiveDuplicateMapForComments(stateContext, dedupResult.duplicateMap, list) ?? + dedupResult.duplicateMap; + const cluster = getDuplicateClusterCommentIds(analyzedCommentId, map); + for (const otherId of cluster) { + if (otherId === analyzedCommentId) continue; + const dupItem = dedupResult.duplicateItems.get(otherId); + const path = + dupItem?.comment.path ?? list?.find((c) => c.id === otherId)?.path ?? ''; + const fHash = path ? fileHashes.get(path) || '__missing__' : '__missing__'; if (status.kind === 'resolved') { - CommentStatusAPI.markResolved(stateContext, dupId, status.classification as 'stale' | 'fixed', status.explanation, path, fHash); + CommentStatusAPI.markResolved( + stateContext, + otherId, + status.classification as 'stale' | 'fixed', + status.explanation, + path, + fHash, + ); } else { - CommentStatusAPI.markOpen(stateContext, dupId, status.classification as 'exists', status.explanation, status.importance, status.ease, path, fHash); + CommentStatusAPI.markOpen( + stateContext, + otherId, + status.classification as 'exists', + status.explanation, + status.importance, + status.ease, + path, + fHash, + ); + } + } +} + +/** Sibling review threads for **`UnresolvedIssue.mergedDuplicates`** (fix prompt / dedup UX). */ +export interface MergedDuplicateRow { + commentId: string; + author: string; + body: string; + path: string; + line: number | null; +} + +/** + * Rows for every *other* comment in the same LLM dedup cluster as the anchor (representative) row. + * **WHY:** Call sites used **`duplicateMap.get(anchorId)`**, which misses when **`duplicateMap`** is empty + * but **`clusterMapForAnalysis`** (from **`resolveEffectiveDuplicateMapForComments`**) still restores the cluster + * from **`dedup-v2`** cache, and when a sibling is missing from **`duplicateItems`** but present in **`allComments`**. + */ +export function buildMergedDuplicatesForAnchor( + anchorCommentId: string, + clusterMap: Map | undefined, + duplicateItems: DedupResult['duplicateItems'], + allComments?: readonly ReviewComment[], +): MergedDuplicateRow[] | undefined { + const otherIds = getDuplicateClusterCommentIds(anchorCommentId, clusterMap).filter( + (id) => id !== anchorCommentId, + ); + if (otherIds.length === 0) return undefined; + const list = allComments?.length ? [...allComments] : undefined; + const rows: MergedDuplicateRow[] = []; + for (const dupId of otherIds) { + const dupItem = duplicateItems.get(dupId); + if (dupItem) { + rows.push({ + commentId: dupItem.comment.id, + author: dupItem.comment.author, + body: dupItem.comment.body, + path: dupItem.comment.path, + line: dupItem.comment.line, + }); + continue; + } + const c = list?.find((x) => x.id === dupId); + if (c) { + rows.push({ + commentId: c.id, + author: c.author, + body: c.body, + path: c.path, + line: c.line, + }); + } + } + return rows.length > 0 ? rows : undefined; +} + +/** + * Dismiss every id in the LLM dedup cluster (canonical + dupes). + * WHY: `propagateStatusToDuplicates` only updates commentStatuses; persisted **`dismissedIssues`** + * and thread-reply accounting need each thread id dismissed — same gap as verify/recovery cluster marking. + */ +export function dismissDuplicateCluster( + stateContext: StateContext, + anchorComment: ReviewComment, + duplicateMap: Map, + duplicateItems: DedupResult['duplicateItems'], + reason: string, + category: DismissedIssue['category'], + remediationHint?: string, +): void { + for (const cid of getDuplicateClusterCommentIds(anchorComment.id, duplicateMap)) { + const rc = cid === anchorComment.id ? anchorComment : duplicateItems.get(cid)?.comment; + if (!rc) continue; + Dismissed.dismissIssue( + stateContext, + cid, + reason, + category, + rc.path, + rc.line, + rc.body ?? '', + cid === anchorComment.id ? remediationHint : undefined, + ); + } +} + +/** + * Same as {@link dismissDuplicateCluster} but resolves sibling rows from **`allComments`** + * (fix loop / push iteration have no `duplicateItems` map). Missing ids are skipped. + */ +export function dismissDuplicateClusterFromComments( + stateContext: StateContext, + anchorComment: ReviewComment, + duplicateMap: Map | undefined, + allComments: ReviewComment[], + reason: string, + category: DismissedIssue['category'], + remediationHint?: string, +): void { + const byId = new Map(allComments.map((c) => [c.id, c])); + for (const cid of getDuplicateClusterCommentIds(anchorComment.id, duplicateMap)) { + const rc = cid === anchorComment.id ? anchorComment : byId.get(cid); + if (!rc) continue; + Dismissed.dismissIssue( + stateContext, + cid, + reason, + category, + rc.path, + rc.line, + rc.body ?? '', + cid === anchorComment.id ? remediationHint : undefined, + ); + } +} + +/** + * Rows for {@link dismissDuplicateClusterFromComments} when the full PR list may be missing. + * Unions **`issues[].comment`** with **`allComments`** (same id: PR row wins) so cluster siblings still in the fix batch + * get dismissed together instead of anchor-only **`dismissIssue`**. + */ +export function mergeCommentsForClusterDismiss( + allComments: readonly ReviewComment[] | undefined, + issues: readonly { comment: ReviewComment }[], +): ReviewComment[] { + const byId = new Map(); + for (const { comment } of issues) { + byId.set(comment.id, comment); + } + if (allComments?.length) { + for (const c of allComments) { + byId.set(c.id, c); } } + return [...byId.values()]; +} + +/** + * Cluster ids that are **verified or dismissed** after a cluster dismiss attempt. + * **WHY:** {@link dismissDuplicateClusterFromComments} skips ids missing from the PR row list; callers + * must not remove those ids from the fix queue anyway or we get an empty queue while threads stay open + * (BUG DETECTED repopulate — same class as `filterUnresolvedKeepUnaccountedClusterMembers` in no-changes). + */ +export function getClusterIdsAccountedOnState( + stateContext: StateContext, + anchorId: string, + duplicateMap: Map | undefined, +): string[] { + return getDuplicateClusterCommentIds(anchorId, duplicateMap).filter( + (cid) => + Dismissed.isCommentDismissed(stateContext, cid) || Verification.isVerified(stateContext, cid), + ); +} + +/** + * Reuse **`state.dedupCache.duplicateMap`** when the PR comment id key is unchanged (`dedup-v2`). + * **WHY:** Pre-dedup dismissals (solvability, positive-only, placeholder, could-not-inject) used to touch only + * one thread id; siblings stayed open until after the LLM dedup phase re-ran. + */ +export function getPersistedDedupMapForCommentSet( + stateContext: StateContext, + allCommentIdsKey: string, +): Map | undefined { + const persisted = stateContext.state?.dedupCache; + if ( + !persisted || + persisted.commentIds !== allCommentIdsKey || + persisted.schema !== 'dedup-v2' || + !persisted.duplicateMap || + typeof persisted.duplicateMap !== 'object' + ) { + return undefined; + } + return new Map(Object.entries(persisted.duplicateMap)); +} + +/** + * Map to use for cluster dismissals mid–fix-loop when **`duplicateMap`** was not passed or is empty + * but **`state.dedupCache`** still matches the current PR comment id set (`dedup-v2`). + * **WHY:** `recheckSolvability` / `verifyFixes` used to single-dismiss when `duplicateMap` was missing; + * duplicate threads stayed open until the next analysis pass. + */ +export function resolveEffectiveDuplicateMapForComments( + stateContext: StateContext, + duplicateMap: Map | undefined, + allComments: ReviewComment[] | undefined, +): Map | undefined { + if (duplicateMap && duplicateMap.size > 0) { + return duplicateMap; + } + if (!allComments?.length) { + return duplicateMap; + } + const key = [...allComments.map((c) => c.id)].sort().join(','); + return getPersistedDedupMapForCommentSet(stateContext, key) ?? duplicateMap; +} + +/** + * Cluster map for **`trySingleIssueFix` / `tryDirectLLMFix`** when **`allComments`** may be absent. + * **WHY:** `duplicateMapForSession` can be empty while **`state.dedupCache`** still holds `dedup-v2` data; + * without this, recovery only marked/dismissed the anchor thread. + * When **`allComments`** is present and its sorted id key **≠** `dedupCache.commentIds`, skips persisted + * fallback (comment set changed without a matching cache key). + */ +export function resolveDuplicateMapForRecovery( + stateContext: StateContext, + duplicateMap: Map | undefined, + allComments?: ReviewComment[], +): Map | undefined { + const fromComments = resolveEffectiveDuplicateMapForComments(stateContext, duplicateMap, allComments); + if (fromComments && fromComments.size > 0) { + return fromComments; + } + const persisted = stateContext.state?.dedupCache; + const idsKey = allComments?.length ? [...allComments.map((c) => c.id)].sort().join(',') : undefined; + if ( + persisted?.schema === 'dedup-v2' && + persisted.commentIds && + persisted.duplicateMap && + typeof persisted.duplicateMap === 'object' && + (!idsKey || idsKey === persisted.commentIds) + ) { + const m = getPersistedDedupMapForCommentSet(stateContext, persisted.commentIds); + if (m && m.size > 0) { + return m; + } + } + return fromComments ?? duplicateMap; } /** @@ -505,7 +816,9 @@ Comments: ${items.length} (use indices 1–${items.length} only) ${summaries}`; try { // Always use cheap model for dedup — fast and sufficient; avoids slow default (e.g. qwen-3-14b on ElizaCloud). - const response = await llm.completeWithCheapModel(userPrompt, LLM_DEDUP_SYSTEM_PROMPT); + const response = await llm.completeWithCheapModel(userPrompt, LLM_DEDUP_SYSTEM_PROMPT, { + phase: 'dedup-v2-grouping', + }); const content = response.content.trim(); const groups: DedupTaskResult['groups'] = []; const groupPattern = /GROUP:\s*([\d,\s]+)\s*→\s*canonical\s*(\d+)/gi; @@ -552,12 +865,13 @@ ${summaries}`; const dupes = indices.filter((i) => i !== canonicalIdx).map((i) => items[i]); groups.push({ canonical, dupes }); } + const mergedGroups = resolveOverlappingDedupGroupsByIndex(groups, items); // Only treat as NONE when no GROUP lines were parsed. Audit (prompts.log): model may output // `GROUP: …` plus a trailing `NONE` line — regex still captures groups; do not discard. - if (groups.length === 0 && content.toUpperCase().includes('NONE')) { + if (mergedGroups.length === 0 && content.toUpperCase().includes('NONE')) { return { filePath, groups: [], error: undefined }; } - return { filePath, groups }; + return { filePath, groups: mergedGroups }; } catch (err) { const msg = err instanceof Error ? err.message : String(err); debug(`LLM dedup failed for ${filePath}: ${msg}`); @@ -671,7 +985,9 @@ export async function crossFileDedup(dedupResult: DedupResult, llm: LLMClient): const userPrompt = `Total issues: ${k}. Use indices 1 through ${k} only.\n\n${summaries}`; try { - const response = await llm.completeWithCheapModel(userPrompt, LLM_CROSS_FILE_DEDUP_SYSTEM_PROMPT); + const response = await llm.completeWithCheapModel(userPrompt, LLM_CROSS_FILE_DEDUP_SYSTEM_PROMPT, { + phase: 'dedup-v2-cross-file', + }); const content = response.content.trim(); const groupPattern = /GROUP:\s*([\d,\s]+)\s*→\s*canonical\s*(\d+)/gi; let match; @@ -679,6 +995,8 @@ export async function crossFileDedup(dedupResult: DedupResult, llm: LLMClient): const newDuplicateItems = new Map(dedupResult.duplicateItems); const newDuplicateIds = new Set(); + type CrossRow = { canonicalIdx: number; memberIndices: number[] }; + const crossPending: CrossRow[] = []; while ((match = groupPattern.exec(content)) !== null) { const parsedIndices = match[1].split(',').map(s => parseInt(s.trim(), 10)); const canonicalOneBased = parseInt(match[2], 10); @@ -693,12 +1011,38 @@ export async function crossFileDedup(dedupResult: DedupResult, llm: LLMClient): const uniquePaths = new Set(paths); if (uniquePaths.size !== indices.length) continue; - const canonicalIdx = canonicalOneBased - 1; + crossPending.push({ canonicalIdx: canonicalOneBased - 1, memberIndices: indices }); + } + + const usedCross = new Set(); + for (const row of crossPending) { + const available = row.memberIndices.filter((i) => !usedCross.has(i)); + if (available.length < 2) { + if (row.memberIndices.some((i) => usedCross.has(i))) { + debug('Cross-file dedup: dropped overlapping GROUP — index(s) already merged earlier', { + memberIndices: row.memberIndices.map((i) => i + 1), + }); + } + continue; + } + const pathsAvail = available.map((i) => items[i]!.resolvedPath ?? items[i]!.comment.path); + if (new Set(pathsAvail).size !== available.length) continue; + + const canonicalIdx = available.includes(row.canonicalIdx) + ? row.canonicalIdx + : available.reduce((best, i) => + items[i]!.comment.body.length > items[best]!.comment.body.length ? i : best, + available[0]!, + ); + const dupes = available.filter((i) => i !== canonicalIdx).map((i) => items[i]!); const canonical = items[canonicalIdx]!; - const dupes = indices.filter(i => i !== canonicalIdx).map(i => items[i]!); - const otherPaths = [...new Set(dupes.map(d => d.resolvedPath ?? d.comment.path))]; - const hint = `Cross-file dedup: same root cause also reported on ${otherPaths.map(p => `\`${p}\``).join(', ')} — fix consistently across files.`; + for (const i of available) { + usedCross.add(i); + } + + const otherPaths = [...new Set(dupes.map((d) => d.resolvedPath ?? d.comment.path))]; + const hint = `Cross-file dedup: same root cause also reported on ${otherPaths.map((p) => `\`${p}\``).join(', ')} — fix consistently across files.`; canonical.contextHints = [...(canonical.contextHints ?? []), hint]; const existingDupes = newDuplicateMap.get(canonical.comment.id) || []; diff --git a/tools/prr/workflow/issue-analysis-snippet-helpers.ts b/tools/prr/workflow/issue-analysis-snippet-helpers.ts index d1ad108..ca83e4b 100644 --- a/tools/prr/workflow/issue-analysis-snippet-helpers.ts +++ b/tools/prr/workflow/issue-analysis-snippet-helpers.ts @@ -5,12 +5,8 @@ */ import { join } from 'path'; import { readFile } from 'fs/promises'; -import { formatNumber } from '../../../shared/logger.js'; -import { - CODE_SNIPPET_CONTEXT_AFTER, - CODE_SNIPPET_CONTEXT_BEFORE, - MAX_SNIPPET_LINES, -} from '../../../shared/constants.js'; +import { computeBudget, fitToBudget } from '../../../shared/prompt-budget.js'; +import { debug, formatNumber } from '../../../shared/logger.js'; export function buildNumberedFullFileSnippet(content: string, note?: string): string { const lines = content.split('\n'); @@ -117,14 +113,6 @@ export function parseLineReferencesFromBody(commentBody: string): number[] { return unique; } -/** Max size for full-file content in final audit (avoid huge prompts / context overflow). */ -const MAX_FULL_FILE_AUDIT_CHARS = 50_000; - -/** Max chars for wider snippet in batch analysis when initial snippet is too short (prompts.log audit: verifier said "snippet truncated"). */ -const MAX_WIDER_SNIPPET_ANALYSIS_CHARS = 12_000; - -const WIDER_SNIPPET_LINES = 80; - /** Extract code-like tokens from comment body to anchor snippet when no line number. Prompts.log audit: first 80 lines showed only imports/class header; buggy code was deeper. */ export function findAnchorLineFromCommentKeywords(lines: string[], commentBody: string | undefined): number | null { if (!commentBody || lines.length === 0) return null; @@ -159,11 +147,14 @@ export function escapeRegExpForSnippet(s: string): string { return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } -/** Shared windowing: parse anchors from line + commentBody, center an 80-line window, cap at MAX_WIDER_SNIPPET_ANALYSIS_CHARS. */ +/** + * Parse anchors from line + commentBody, then fill model-aware char budget with a line-centered excerpt. + */ export function buildWindowedSnippet( fileContent: string, line: number | null, - commentBody?: string + commentBody?: string, + modelId?: string ): string { const lines = fileContent.split('\n'); const anchors = new Set(); @@ -202,26 +193,20 @@ export function buildWindowedSnippet( endLine = keywordLine; } } - const halfWindow = Math.floor(WIDER_SNIPPET_LINES / 2); - let start: number; - let end: number; - if (startLine !== null || anchors.size > 0) { - const minAnchor = anchors.size > 0 ? Math.min(...anchors) : startLine!; - const maxAnchor = anchors.size > 0 ? Math.max(...anchors) : (endLine ?? startLine!); - const center = Math.floor((minAnchor + maxAnchor) / 2); - start = Math.max(0, center - 1 - halfWindow); - end = Math.min(lines.length, start + WIDER_SNIPPET_LINES); - } else { - start = 0; - end = Math.min(lines.length, WIDER_SNIPPET_LINES); + + let centerLine: number | null = null; + if (anchors.size > 0) { + centerLine = Math.floor((Math.min(...anchors) + Math.max(...anchors)) / 2); + } else if (line !== null) { + centerLine = line; } - const slice = lines - .slice(start, end) - .map((l, i) => `${start + i + 1}: ${l}`) - .join('\n'); - return slice.length > MAX_WIDER_SNIPPET_ANALYSIS_CHARS - ? slice.substring(0, MAX_WIDER_SNIPPET_ANALYSIS_CHARS) + '\n... (truncated)' - : slice; + + const { availableForCode } = computeBudget({ model: modelId, reservedChars: 26_000 }); + const { content } = fitToBudget(fileContent, centerLine, availableForCode, { + commentBody, + findKeywordAnchor: findAnchorLineFromCommentKeywords, + }); + return content; } /** @@ -248,58 +233,102 @@ export async function getWiderSnippetForAnalysis( * Get full file content for final audit so the LLM sees complete context * instead of truncated snippets that can cause false "UNFIXED" verdicts. * - * When the file exceeds {@link MAX_FULL_FILE_AUDIT_CHARS}, uses a **line-centered excerpt** - * (review line, or keyword anchor from comment, else legacy head slice) so bugs away from - * line 1 are still visible — **WHY:** head-only truncation caused false UNFIXED on tail-heavy - * files (pill-output / final-audit cluster). + * When the raw file is larger than **`computeBudget`** allows for this call, returns a + * **line-centered numbered excerpt** via **`fitToBudget`** (review line, or keyword anchor from + * **`commentBody`**, else a short head slice with an explicit “no line anchor” footer). + * **WHY:** A fixed char cap per file is not enough — small-context models need a smaller excerpt + * than large-context models; and head-only truncation hid tail bugs → false UNFIXED + * (pill-output / final-audit cluster). Pass **`modelId`** when known so the budget matches the + * final-audit model’s gateway limit. */ +export interface FullFileForAuditResult { + snippet: string; + /** True when the GitHub review anchor is inside the shown window (full file or line-centered excerpt). */ + fixSiteInWindow: boolean; +} + +function finalAuditFileContextHeader(params: { + totalLines: number; + mode: 'full' | 'excerpt'; + anchorLine: number | null; + truncated: boolean; +}): string { + const { totalLines, mode, anchorLine, truncated } = params; + const anchorNote = + anchorLine != null + ? `Review anchor ~line ${anchorLine}.` + : 'No line anchor — excerpt may start at file head.'; + if (mode === 'full') { + return `[PRR final-audit context] Complete numbered file below (${totalLines} lines total).\n\n`; + } + return ( + `[PRR final-audit context] File has ${totalLines} lines total. ` + + `Below is a ${truncated ? 'budget-limited excerpt' : 'numbered view'} (${anchorNote}) ` + + `If the fix may be outside this window, prefer UNCERTAIN over UNFIXED without citing lines from the snippet.\n\n` + ); +} + export async function getFullFileForAudit( workdir: string, path: string, line?: number | null, commentBody?: string, -): Promise { + modelId?: string +): Promise { + const missing: FullFileForAuditResult = { snippet: '(file not found or unreadable)', fixSiteInWindow: false }; try { const filePath = join(workdir, path); const content = await readFile(filePath, 'utf-8'); const lines = content.split('\n'); - if (content.length <= MAX_FULL_FILE_AUDIT_CHARS) { - return lines.map((l, i) => `${i + 1}: ${l}`).join('\n'); + const { availableForCode } = computeBudget({ model: modelId, reservedChars: 16_000 }); + if (content.length <= availableForCode) { + const body = lines.map((l, i) => `${i + 1}: ${l}`).join('\n'); + debug('getFullFileForAudit: full file within budget', { + path, + totalLines: formatNumber(lines.length), + contentChars: formatNumber(content.length), + availableForCode: formatNumber(availableForCode), + }); + return { + snippet: finalAuditFileContextHeader({ totalLines: lines.length, mode: 'full', anchorLine: null, truncated: false }) + body, + fixSiteInWindow: true, + }; } let anchorLine = line != null && line > 0 && line <= lines.length ? line : null; + let anchorHow: 'review-line' | 'keyword' | 'none' = anchorLine != null ? 'review-line' : 'none'; if (anchorLine === null && commentBody) { - anchorLine = findAnchorLineFromCommentKeywords(lines, commentBody); - } - - if (anchorLine === null) { - const keep = Math.floor(MAX_FULL_FILE_AUDIT_CHARS / 80); - return ( - lines - .slice(0, keep) - .map((l, i) => `${i + 1}: ${l}`) - .join('\n') + - `\n... (${formatNumber(lines.length - keep)} more lines omitted — file exceeds ${formatNumber(MAX_FULL_FILE_AUDIT_CHARS)} chars; no line anchor — set review line or cite symbols in comment)` - ); + const kwLine = findAnchorLineFromCommentKeywords(lines, commentBody); + if (kwLine != null) { + anchorLine = kwLine; + anchorHow = 'keyword'; + } } - const contextBefore = 120; - const contextAfter = 200; - let start = Math.max(0, anchorLine - contextBefore - 1); - let end = Math.min(lines.length, anchorLine + contextAfter); - let excerpt = lines - .slice(start, end) - .map((l, i) => `${start + i + 1}: ${l}`) - .join('\n'); - excerpt += `\n... (excerpt only — file has ${formatNumber(lines.length)} lines; centered on line ${formatNumber(anchorLine)})`; - if (excerpt.length > MAX_FULL_FILE_AUDIT_CHARS) { - excerpt = - excerpt.slice(0, MAX_FULL_FILE_AUDIT_CHARS - 120) + - '\n... (truncated to char budget — final audit excerpt)'; - } - return excerpt; + const { content: excerpt, truncated } = fitToBudget(content, anchorLine, availableForCode, { + commentBody, + findKeywordAnchor: findAnchorLineFromCommentKeywords, + }); + const fixSiteInWindow = !truncated || anchorLine != null; + debug('getFullFileForAudit: budget excerpt', { + path, + totalLines: formatNumber(lines.length), + anchorLine, + anchorHow, + truncated, + excerptChars: formatNumber(excerpt.length), + availableForCode: formatNumber(availableForCode), + fixSiteInWindow, + }); + const header = finalAuditFileContextHeader({ + totalLines: lines.length, + mode: 'excerpt', + anchorLine, + truncated, + }); + return { snippet: header + excerpt, fixSiteInWindow }; } catch { - return '(file not found or unreadable)'; + return missing; } } diff --git a/tools/prr/workflow/issue-analysis-snippets.ts b/tools/prr/workflow/issue-analysis-snippets.ts index 211b1a9..d1d791f 100644 --- a/tools/prr/workflow/issue-analysis-snippets.ts +++ b/tools/prr/workflow/issue-analysis-snippets.ts @@ -11,6 +11,7 @@ import { CODE_SNIPPET_CONTEXT_BEFORE, MAX_SNIPPET_LINES, } from '../../../shared/constants.js'; +import { computeBudget } from '../../../shared/prompt-budget.js'; import { sanitizeCommentForPrompt } from '../analyzer/prompt-builder.js'; import { buildNumberedFullFileSnippet, @@ -112,6 +113,7 @@ export async function getCodeSnippet( const filePath = join(workdir, path); const content = await readFile(filePath, 'utf-8'); const lines = content.split('\n'); + const { availableForCode: codeCharBudget } = computeBudget({ reservedChars: 36_000 }); // WHY unified anchors: A comment may have comment.line=11 (GitHub API) and body text // "around lines 52 - 93". Using only one or the other would show the wrong code. Merging @@ -197,10 +199,18 @@ export async function getCodeSnippet( end = Math.min(lines.length, start + MAX_SNIPPET_LINES); } - const snippet = lines - .slice(start, end) - .map((l, i) => `${start + i + 1}: ${l}`) - .join('\n'); + const shrinkCenter = Math.floor((minAnchor + maxAnchor) / 2); + const buildSnippet = () => + lines + .slice(start, end) + .map((l, i) => `${start + i + 1}: ${l}`) + .join('\n'); + let snippet = buildSnippet(); + while (snippet.length > codeCharBudget && end - start > 12) { + if (end - shrinkCenter >= shrinkCenter - start) end--; + else start++; + snippet = buildSnippet(); + } // Append (end of file) when snippet reaches the last line, or truncation marker otherwise if (end >= lines.length) { diff --git a/tools/prr/workflow/issue-analysis.ts b/tools/prr/workflow/issue-analysis.ts index da8ff04..df8f418 100644 --- a/tools/prr/workflow/issue-analysis.ts +++ b/tools/prr/workflow/issue-analysis.ts @@ -66,22 +66,34 @@ import { getVerificationExpiryForIterationCount, VERIFICATION_EXPIRY_ITERATIONS, } from '../../../shared/constants.js'; -import { filterAllowedPathsForFix } from '../../../shared/path-utils.js'; +import { filterAllowedPathsForFix, normalizeRepoPath, stripGitDiffPathPrefix } from '../../../shared/path-utils.js'; +import { isBlastRadiusDismissEnabled } from '../../../shared/dependency-graph/index.js'; import { looksLikeCreateFileIssue, validateDismissalExplanation } from './utils.js'; +import { + expandGitRecoveredVerificationFromDedupCache, + markVerifiedClusterForFixedIssue, + unmarkVerifiedClusterForStaleRecheck, +} from './duplicate-cluster-verify.js'; import * as LessonsAPI from '../state/lessons-index.js'; import { debug, warn, formatNumber } from '../../../shared/logger.js'; import { assessSolvability, resolveTrackedPathWithPrFiles, SNIPPET_PLACEHOLDER } from './helpers/solvability.js'; +import { isTrackedGitSubmodulePath } from '../../../shared/git/git-submodule-path.js'; import { stripSeverityFraming } from './helpers/review-body-normalize.js'; import { hashFileContent } from '../../../shared/utils/file-hash.js'; import { buildLifecycleAwareVerificationSnippet, commentNeedsLifecycleContext } from './fix-verification.js'; import { printDebugIssueTable } from './debug-issue-table.js'; import type { DedupResult } from './issue-analysis-dedup.js'; import { + buildMergedDuplicatesForAnchor, crossFileDedup, + dismissDuplicateClusterFromComments, + getPersistedDedupMapForCommentSet, + mergeCommentsForClusterDismiss, heuristicDedup, llmDedup, logDuplicateCandidates, propagateStatusToDuplicates, + resolveEffectiveDuplicateMapForComments, } from './issue-analysis-dedup.js'; import { commentNeedsConservativeAnalysisContext, @@ -102,6 +114,7 @@ export { } from './issue-analysis-context.js'; export type { DedupResult } from './issue-analysis-dedup.js'; export { getCodeSnippet } from './issue-analysis-snippets.js'; +export type { FullFileForAuditResult } from './issue-analysis-snippet-helpers.js'; export { getFullFileForAudit, getWiderSnippetForAnalysis, parseLineReferencesFromBody } from './issue-analysis-snippet-helpers.js'; /** Optional options for findUnresolvedIssues (e.g. line map from git diff for post-push). */ @@ -112,6 +125,8 @@ export type FindUnresolvedIssuesOptions = { getFileContentFromRepo?: (path: string) => Promise; /** Files changed in the PR (e.g. from git diff --name-only). When comment.path is a basename, prefer matching full path so issue targets the correct file. */ changedFiles?: string[]; + /** Map path → BFS depth from changed files (imports + proximity). When set, issues get `inBlastRadius` / `blastRadiusDepth`; optional dismiss when `PRR_BLAST_RADIUS_DISMISS=1`. */ + blastRadius?: Map; }; /** If the issue requests tests or review suggests fix-in-test (e.g. "fix mocks in tests"), return [primaryPath, testPath] so allowedPaths is set at issue build. */ @@ -137,6 +152,73 @@ function getEffectiveAllowedPathsForNewIssue(comment: ReviewComment, primaryPath return merged.length > 0 ? merged : [primaryPath]; } +/** + * Annotate unresolved issues with blast-radius fields; optionally dismiss out-of-scope when + * `PRR_BLAST_RADIUS_DISMISS=1`. Runs once before final save so dismissals persist. + */ +function applyBlastRadiusToUnresolved( + unresolved: UnresolvedIssue[], + blastRadius: Map | undefined, + stateContext: StateContext, + duplicateMap?: Map, + allComments?: ReviewComment[], +): UnresolvedIssue[] { + if (!blastRadius || blastRadius.size === 0) { + return unresolved; + } + for (const issue of unresolved) { + const primary = issue.resolvedPath ?? issue.comment.path; + const k = stripGitDiffPathPrefix(normalizeRepoPath(primary)); + const d = blastRadius.get(k) ?? blastRadius.get(primary); + if (d !== undefined) { + issue.inBlastRadius = true; + issue.blastRadiusDepth = d; + } else { + issue.inBlastRadius = false; + } + } + if (!isBlastRadiusDismissEnabled()) { + return unresolved; + } + const blastReason = + 'Comment target is outside the PR dependency blast radius (imports + proximity heuristics).'; + const blastHint = + 'This file is outside the PR\'s dependency graph (blast radius). Review manually if the comment is valid.'; + const mapForBlastDismiss = resolveEffectiveDuplicateMapForComments(stateContext, duplicateMap, allComments); + const dismissCommentRows = mergeCommentsForClusterDismiss(allComments, unresolved); + const kept: UnresolvedIssue[] = []; + for (const issue of unresolved) { + if (issue.inBlastRadius === false) { + if (dismissCommentRows.length > 0) { + dismissDuplicateClusterFromComments( + stateContext, + issue.comment, + mapForBlastDismiss, + dismissCommentRows, + blastReason, + 'out-of-scope', + blastHint, + ); + } else { + const primary = issue.resolvedPath ?? issue.comment.path; + Dismissed.dismissIssue( + stateContext, + issue.comment.id, + blastReason, + 'out-of-scope', + primary, + issue.comment.line, + issue.comment.body ?? '', + blastHint, + ); + } + } else { + kept.push(issue); + } + } + return kept; +} + /** When comment.path is a basename (no directory), resolve to full path from diff if present. Prompts.log audit: fixer was sent wrong file (root reporting.py) when issue was about benchmarks/bfcl/reporting.py. */ function resolvePathFromDiff(commentPath: string, changedFiles: string[] | undefined): string | undefined { if (!changedFiles?.length || commentPath.includes('/')) return undefined; @@ -199,18 +281,34 @@ export async function findUnresolvedIssues( let dismissedNotAnIssue = 0; let dismissedPlaceholder = 0; let dismissedRemaining = 0; + /** Solvability autoVerify anchors — cluster expansion runs after dedup (see `markVerifiedClusterForFixedIssue`). */ + const pendingAutoVerifyAnchorIds: string[] = []; const iterationCount = stateContext.state?.iterations?.length ?? 0; const effectiveExpiry = getVerificationExpiryForIterationCount(iterationCount); const staleVerificationsRaw = Verification.getStaleVerifications(stateContext, effectiveExpiry); // WHY: output.log audit — don't re-check or unmark comments just recovered from git this run. + // Cluster: when dedupCache matches this comment set, mark dedup siblings verified and widen stale-skip to the cluster. const recoveredIds = stateContext.state?.recoveredFromGitCommentIds; - const recoveredSet = recoveredIds?.length ? new Set(recoveredIds) : undefined; + const allCommentIdsKey = comments.map((c) => c.id).sort().join(','); + /** When dedup cache matches this comment set, pre-dedup dismissals expand to the LLM cluster (same as post-dedup). */ + const persistedDedupMapForCommentSet = getPersistedDedupMapForCommentSet(stateContext, allCommentIdsKey); + let recoveredStaleSkipIds: string[] | undefined; if (recoveredIds?.length) { + const { staleSkipIds, addedVerified } = expandGitRecoveredVerificationFromDedupCache( + stateContext, + recoveredIds, + allCommentIdsKey, + ); + recoveredStaleSkipIds = staleSkipIds; stateContext.state!.recoveredFromGitCommentIds = undefined; + if (addedVerified) { + await State.saveState(stateContext); + } } - let staleVerifications = recoveredIds?.length - ? staleVerificationsRaw.filter((id) => !recoveredIds.includes(id)) + const recoveredSet = recoveredStaleSkipIds?.length ? new Set(recoveredStaleSkipIds) : undefined; + let staleVerifications = recoveredStaleSkipIds?.length + ? staleVerificationsRaw.filter((id) => !recoveredStaleSkipIds!.includes(id)) : staleVerificationsRaw; const changedFiles = findUnresolvedIssuesOptions?.changedFiles; if (changedFiles?.length) { @@ -266,31 +364,56 @@ export async function findUnresolvedIssues( } if (isCommentPositiveOnly(comment.body ?? '')) { - Dismissed.dismissIssue( - stateContext, - comment.id, - 'Comment is purely positive (e.g. What\'s Good) with no actionable issue — dismissing', - 'not-an-issue', - comment.path, - comment.line, - comment.body, - undefined - ); + const positiveReason = + 'Comment is purely positive (e.g. What\'s Good) with no actionable issue — dismissing'; + if (persistedDedupMapForCommentSet) { + dismissDuplicateClusterFromComments( + stateContext, + comment, + persistedDedupMapForCommentSet, + comments, + positiveReason, + 'not-an-issue', + ); + } else { + Dismissed.dismissIssue( + stateContext, + comment.id, + positiveReason, + 'not-an-issue', + comment.path, + comment.line, + comment.body, + undefined, + ); + } dismissedNotAnIssue++; continue; } if (isVercelDeploymentOrTeamComment(comment)) { - Dismissed.dismissIssue( - stateContext, - comment.id, - 'Vercel deployment/team notification — not a code review; fix via Vercel dashboard', - 'not-an-issue', - comment.path, - comment.line, - comment.body, - undefined - ); + const vercelReason = 'Vercel deployment/team notification — not a code review; fix via Vercel dashboard'; + if (persistedDedupMapForCommentSet) { + dismissDuplicateClusterFromComments( + stateContext, + comment, + persistedDedupMapForCommentSet, + comments, + vercelReason, + 'not-an-issue', + ); + } else { + Dismissed.dismissIssue( + stateContext, + comment.id, + vercelReason, + 'not-an-issue', + comment.path, + comment.line, + comment.body, + undefined, + ); + } dismissedNotAnIssue++; continue; } @@ -298,16 +421,29 @@ export async function findUnresolvedIssues( const couldNotInjectCount = stateContext.state?.couldNotInjectCountByCommentId?.[comment.id] ?? 0; const couldNotInjectThreshold = looksLikeCreateFileIssue(comment) ? COULD_NOT_INJECT_CREATE_FILE_THRESHOLD : COULD_NOT_INJECT_DISMISS_THRESHOLD; if (couldNotInjectCount >= couldNotInjectThreshold) { - Dismissed.dismissIssue( - stateContext, - comment.id, - 'Target file could not be resolved in the repository (repeated could-not-inject + no-change cycles)', - 'file-unchanged', - comment.path, - comment.line, - comment.body, - undefined - ); + const cniReason = + 'Target file could not be resolved in the repository (repeated could-not-inject + no-change cycles)'; + if (persistedDedupMapForCommentSet) { + dismissDuplicateClusterFromComments( + stateContext, + comment, + persistedDedupMapForCommentSet, + comments, + cniReason, + 'file-unchanged', + ); + } else { + Dismissed.dismissIssue( + stateContext, + comment.id, + cniReason, + 'file-unchanged', + comment.path, + comment.line, + comment.body, + undefined, + ); + } continue; } @@ -321,27 +457,35 @@ export async function findUnresolvedIssues( path: comment.path, reason: solvability.reason, }); - Verification.markVerified(stateContext, comment.id); - // Add to verifiedThisSession if available (it's set on stateContext) - if (stateContext.verifiedThisSession) { - stateContext.verifiedThisSession.add(comment.id); - } + pendingAutoVerifyAnchorIds.push(comment.id); continue; } // CRITICAL: dismissIssue ONLY — do NOT call markVerified. // If the file comes back (revert, re-add), we want to re-analyze it. const reason = solvability.reason ?? `Issue not solvable (${solvability.dismissCategory ?? 'unknown'})`; - Dismissed.dismissIssue( - stateContext, - comment.id, - reason, - solvability.dismissCategory!, - comment.path, - comment.line, - comment.body, - solvability.remediationHint - ); + if (persistedDedupMapForCommentSet) { + dismissDuplicateClusterFromComments( + stateContext, + comment, + persistedDedupMapForCommentSet, + comments, + reason, + solvability.dismissCategory!, + solvability.remediationHint, + ); + } else { + Dismissed.dismissIssue( + stateContext, + comment.id, + reason, + solvability.dismissCategory!, + comment.path, + comment.line, + comment.body, + solvability.remediationHint, + ); + } // Pill #7: Cascade dismissal to sibling sub-items when dismissing for outdated model advice // (same file+line means same underlying issue; all sub-items should be dismissed consistently) @@ -426,15 +570,58 @@ export async function findUnresolvedIssues( // Phase 3: Post-filter placeholder results for (const { comment, codeSnippet, contextHints, resolvedPath } of snippetResults) { if (codeSnippet === SNIPPET_PLACEHOLDER) { - Dismissed.dismissIssue( - stateContext, - comment.id, - 'File not found or unreadable after existence check passed', - 'stale', - comment.path, - comment.line, - comment.body - ); + const pathForSubmoduleCheck = (resolvedPath ?? comment.path).replace(/\\/g, '/'); + if (isTrackedGitSubmodulePath(workdir, pathForSubmoduleCheck)) { + const subReason = + 'Review path is a git submodule (gitlink) — no regular file text for snippets after existence check'; + const subHint = + 'Run git submodule update --init, or fix in the submodule repo / parent manifest.'; + if (persistedDedupMapForCommentSet) { + dismissDuplicateClusterFromComments( + stateContext, + comment, + persistedDedupMapForCommentSet, + comments, + subReason, + 'not-an-issue', + subHint, + ); + } else { + Dismissed.dismissIssue( + stateContext, + comment.id, + subReason, + 'not-an-issue', + comment.path, + comment.line, + comment.body, + subHint, + ); + } + dismissedNotAnIssue++; + } else { + const phStaleReason = 'File not found or unreadable after existence check passed'; + if (persistedDedupMapForCommentSet) { + dismissDuplicateClusterFromComments( + stateContext, + comment, + persistedDedupMapForCommentSet, + comments, + phStaleReason, + 'stale', + ); + } else { + Dismissed.dismissIssue( + stateContext, + comment.id, + phStaleReason, + 'stale', + comment.path, + comment.line, + comment.body, + ); + } + } dismissedPlaceholder++; continue; } @@ -542,6 +729,24 @@ export async function findUnresolvedIssues( } } + /** Persisted fallback when `dedupResult.duplicateMap` is empty (e.g. heuristic dedup threw) but `dedup-v2` cache matches. */ + const clusterMapForAnalysis = resolveEffectiveDuplicateMapForComments( + stateContext, + dedupResult.duplicateMap, + comments, + ); + + if (pendingAutoVerifyAnchorIds.length > 0) { + for (const aid of pendingAutoVerifyAnchorIds) { + markVerifiedClusterForFixedIssue( + stateContext, + aid, + clusterMapForAnalysis, + stateContext.verifiedThisSession, + ); + } + } + // Use deduplicated list for analysis const toAnalyze = dedupResult.dedupedToCheck; @@ -566,7 +771,7 @@ export async function findUnresolvedIssues( ); // Build a set for fast lookup in the status check loop (after dedup, before status split). - // HISTORY: staleVerifications forces re-check of comments verified 5+ iterations ago. + // HISTORY: staleVerifications forces re-check of comments past verification-expiry (see getVerificationExpiryForIterationCount). // Without this bypass, Phase 0 hooks would mark them 'resolved', Phase 2 hash relaxation // would return the status, and line 774 would re-dismiss them — defeating stale re-check. const staleVerificationSet = new Set(staleVerifications); @@ -580,7 +785,7 @@ export async function findUnresolvedIssues( // Both --reverify and stale verifications force fresh LLM analysis. // --reverify: user explicitly wants to re-check everything. - // staleVerifications: comment was verified 5+ iterations ago, fix may have regressed. + // staleVerifications: comment was verified long enough ago (iteration-scaled expiry), fix may have regressed. // Without this, Phase 0 hooks + Phase 2 hash relaxation would make these // bypass the LLM entirely, defeating the purpose of stale verification. const forceReanalyze = options.reverify || staleVerificationSet.has(item.comment.id); @@ -593,17 +798,12 @@ export async function findUnresolvedIssues( statusHits++; // Issue still exists — reuse persisted classification - const duplicates = dedupResult.duplicateMap.get(item.comment.id); - const mergedDuplicates = duplicates?.map(dupId => { - const dupItem = dedupResult.duplicateItems.get(dupId); - return dupItem ? { - commentId: dupItem.comment.id, - author: dupItem.comment.author, - body: dupItem.comment.body, - path: dupItem.comment.path, - line: dupItem.comment.line, - } : null; - }).filter((d): d is NonNullable => d !== null); + const mergedDuplicates = buildMergedDuplicatesForAnchor( + item.comment.id, + clusterMapForAnalysis, + dedupResult.duplicateItems, + comments, + ); unresolved.push({ comment: item.comment, @@ -611,7 +811,7 @@ export async function findUnresolvedIssues( stillExists: true, explanation: validStatus.explanation, triage: { importance: validStatus.importance, ease: validStatus.ease }, - mergedDuplicates: mergedDuplicates && mergedDuplicates.length > 0 ? mergedDuplicates : undefined, + mergedDuplicates, allowedPaths: getEffectiveAllowedPathsForNewIssue(item.comment, item.resolvedPath ?? item.comment.path, item.codeSnippet, validStatus.explanation), resolvedPath: item.resolvedPath, }); @@ -619,12 +819,24 @@ export async function findUnresolvedIssues( // Resolved but not in verifiedFixed (stale dismissal) — re-dismiss preserving existing category const existing = Dismissed.getDismissedIssue(stateContext, item.comment.id); if (existing) { - Dismissed.dismissIssue(stateContext, item.comment.id, existing.reason ?? 'Previously dismissed', existing.category, - item.comment.path, item.comment.line, item.comment.body, existing.remediationHint); + dismissDuplicateClusterFromComments( + stateContext, + item.comment, + clusterMapForAnalysis, + comments, + existing.reason ?? 'Previously dismissed', + existing.category, + existing.remediationHint, + ); } else { - Dismissed.dismissIssue(stateContext, item.comment.id, validStatus.explanation ?? 'Resolved (no explanation recorded)', + dismissDuplicateClusterFromComments( + stateContext, + item.comment, + clusterMapForAnalysis, + comments, + validStatus.explanation ?? 'Resolved (no explanation recorded)', validStatus.classification === 'stale' ? 'stale' : 'already-fixed', - item.comment.path, item.comment.line, item.comment.body); + ); } statusHits++; } else { @@ -677,7 +889,9 @@ export async function findUnresolvedIssues( return { unresolved, recommendedModelIndex: 0, - duplicateMap: dedupResult.duplicateMap, + // Session map must match cluster expansion used above (`clusterMapForAnalysis`), not only the + // in-memory dedup rebuild — when dedup throws or yields an empty map, dedup-v2 cache still applies. + duplicateMap: clusterMapForAnalysis ?? dedupResult.duplicateMap, }; } @@ -705,42 +919,35 @@ export async function findUnresolvedIssues( const fHash = fileHashes.get(comment.path) || '__missing__'; if (result.stale) { CommentStatusAPI.markResolved(stateContext, comment.id, 'stale', result.explanation, comment.path, fHash); - propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'resolved', classification: 'stale', explanation: result.explanation }); + propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'resolved', classification: 'stale', explanation: result.explanation }, comments); } else if (result.exists) { CommentStatusAPI.markOpen(stateContext, comment.id, 'exists', result.explanation, 3, 3, comment.path, fHash); - propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'open', classification: 'exists', explanation: result.explanation, importance: 3, ease: 3 }); + propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'open', classification: 'exists', explanation: result.explanation, importance: 3, ease: 3 }, comments); } else { CommentStatusAPI.markResolved(stateContext, comment.id, 'fixed', result.explanation, comment.path, fHash); - propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'resolved', classification: 'fixed', explanation: result.explanation }); + propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'resolved', classification: 'fixed', explanation: result.explanation }, comments); } if (result.stale) { // Issue is stale (code fundamentally restructured) - dismiss without marking verified if (validateDismissalExplanation(result.explanation, comment.path, comment.line)) { - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - comment.id, + comment, + clusterMapForAnalysis, + comments, result.explanation, 'stale', - comment.path, - comment.line, - comment.body ); } else { warn(`Stale issue missing valid explanation - marking as unresolved`); - // Check if this is a canonical issue with duplicates - const duplicates = dedupResult.duplicateMap.get(comment.id); - const mergedDuplicates = duplicates?.map(dupId => { - const dupItem = dedupResult.duplicateItems.get(dupId); - return dupItem ? { - commentId: dupItem.comment.id, - author: dupItem.comment.author, - body: dupItem.comment.body, - path: dupItem.comment.path, - line: dupItem.comment.line, - } : null; - }).filter((d): d is NonNullable => d !== null); + const mergedDuplicates = buildMergedDuplicatesForAnchor( + comment.id, + clusterMapForAnalysis, + dedupResult.duplicateItems, + comments, + ); unresolved.push({ comment, @@ -748,24 +955,19 @@ export async function findUnresolvedIssues( stillExists: true, explanation: 'LLM indicated issue is stale, but provided insufficient explanation', triage: { importance: 3, ease: 3 }, // Default: sequential mode has no triage - mergedDuplicates: mergedDuplicates && mergedDuplicates.length > 0 ? mergedDuplicates : undefined, + mergedDuplicates, allowedPaths: getEffectiveAllowedPathsForNewIssue(comment, resolvedPath ?? comment.path, codeSnippet, undefined), resolvedPath, }); } } else if (result.exists) { - // Check if this is a canonical issue with duplicates - const duplicates = dedupResult.duplicateMap.get(comment.id); - const mergedDuplicates = duplicates?.map(dupId => { - const dupItem = dedupResult.duplicateItems.get(dupId); - return dupItem ? { - commentId: dupItem.comment.id, - author: dupItem.comment.author, - body: dupItem.comment.body, - path: dupItem.comment.path, - line: dupItem.comment.line, - } : null; - }).filter((d): d is NonNullable => d !== null); + unmarkVerifiedClusterForStaleRecheck(stateContext, comment.id, clusterMapForAnalysis, recoveredSet); + const mergedDuplicates = buildMergedDuplicatesForAnchor( + comment.id, + clusterMapForAnalysis, + dedupResult.duplicateItems, + comments, + ); unresolved.push({ comment, @@ -773,40 +975,38 @@ export async function findUnresolvedIssues( stillExists: true, explanation: result.explanation, triage: { importance: 3, ease: 3 }, // Default: sequential mode has no triage - mergedDuplicates: mergedDuplicates && mergedDuplicates.length > 0 ? mergedDuplicates : undefined, + mergedDuplicates, allowedPaths: getEffectiveAllowedPathsForNewIssue(comment, resolvedPath ?? comment.path, codeSnippet, result.explanation), resolvedPath, }); } else { // Issue appears to be already fixed - but we can ONLY dismiss if we have a valid explanation if (validateDismissalExplanation(result.explanation, comment.path, comment.line)) { - // Valid explanation - document why it doesn't need fixing - Verification.markVerified(stateContext, comment.id); - Dismissed.dismissIssue( + // Valid explanation - document why it doesn't need fixing (full dedup cluster) + markVerifiedClusterForFixedIssue( stateContext, comment.id, + clusterMapForAnalysis, + stateContext.verifiedThisSession, + ); + dismissDuplicateClusterFromComments( + stateContext, + comment, + clusterMapForAnalysis, + comments, result.explanation, 'already-fixed', - comment.path, - comment.line, - comment.body ); } else { // Invalid/missing explanation - treat as unresolved (potential bug) warn(`Cannot dismiss without valid explanation - marking as unresolved`); - // Check if this is a canonical issue with duplicates - const duplicates = dedupResult.duplicateMap.get(comment.id); - const mergedDuplicates = duplicates?.map(dupId => { - const dupItem = dedupResult.duplicateItems.get(dupId); - return dupItem ? { - commentId: dupItem.comment.id, - author: dupItem.comment.author, - body: dupItem.comment.body, - path: dupItem.comment.path, - line: dupItem.comment.line, - } : null; - }).filter((d): d is NonNullable => d !== null); + const mergedDuplicates = buildMergedDuplicatesForAnchor( + comment.id, + clusterMapForAnalysis, + dedupResult.duplicateItems, + comments, + ); unresolved.push({ comment, @@ -814,7 +1014,7 @@ export async function findUnresolvedIssues( stillExists: true, explanation: 'LLM indicated issue does not exist, but provided insufficient explanation to dismiss', triage: { importance: 3, ease: 3 }, // Default: sequential mode has no triage - mergedDuplicates: mergedDuplicates && mergedDuplicates.length > 0 ? mergedDuplicates : undefined, + mergedDuplicates, allowedPaths: getEffectiveAllowedPathsForNewIssue(comment, resolvedPath ?? comment.path, codeSnippet, undefined), resolvedPath, }); @@ -1030,18 +1230,12 @@ export async function findUnresolvedIssues( // Don't cache: LLM failure, next iteration should retry - // Check if this is a canonical issue with duplicates - const duplicates = dedupResult.duplicateMap.get(comment.id); - const mergedDuplicates = duplicates?.map(dupId => { - const dupItem = dedupResult.duplicateItems.get(dupId); - return dupItem ? { - commentId: dupItem.comment.id, - author: dupItem.comment.author, - body: dupItem.comment.body, - path: dupItem.comment.path, - line: dupItem.comment.line, - } : null; - }).filter((d): d is NonNullable => d !== null); + const mergedDuplicates = buildMergedDuplicatesForAnchor( + comment.id, + clusterMapForAnalysis, + dedupResult.duplicateItems, + comments, + ); unresolved.push({ comment, @@ -1049,7 +1243,7 @@ export async function findUnresolvedIssues( stillExists: true, explanation: 'Unable to determine status', triage: { importance: 3, ease: 3 }, // Default: fallback path - mergedDuplicates: mergedDuplicates && mergedDuplicates.length > 0 ? mergedDuplicates : undefined, + mergedDuplicates, allowedPaths: getEffectiveAllowedPathsForNewIssue(comment, resolvedPath ?? comment.path, snippetForFix, undefined), resolvedPath, }); @@ -1079,42 +1273,35 @@ export async function findUnresolvedIssues( const fHash = fileHashes.get(comment.path) || '__missing__'; if (effectiveResult.stale) { CommentStatusAPI.markResolved(stateContext, comment.id, 'stale', effectiveResult.explanation, comment.path, fHash); - propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'resolved', classification: 'stale', explanation: effectiveResult.explanation }); + propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'resolved', classification: 'stale', explanation: effectiveResult.explanation }, comments); } else if (effectiveResult.exists) { CommentStatusAPI.markOpen(stateContext, comment.id, 'exists', effectiveResult.explanation, effectiveResult.importance ?? 3, effectiveResult.ease ?? 3, comment.path, fHash); - propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'open', classification: 'exists', explanation: effectiveResult.explanation, importance: effectiveResult.importance ?? 3, ease: effectiveResult.ease ?? 3 }); + propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'open', classification: 'exists', explanation: effectiveResult.explanation, importance: effectiveResult.importance ?? 3, ease: effectiveResult.ease ?? 3 }, comments); } else { CommentStatusAPI.markResolved(stateContext, comment.id, 'fixed', effectiveResult.explanation, comment.path, fHash); - propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'resolved', classification: 'fixed', explanation: effectiveResult.explanation }); + propagateStatusToDuplicates(stateContext, comment.id, dedupResult, fileHashes, { kind: 'resolved', classification: 'fixed', explanation: effectiveResult.explanation }, comments); } if (effectiveResult.stale) { // Issue is stale (code fundamentally restructured) - dismiss without marking verified if (validateDismissalExplanation(effectiveResult.explanation, comment.path, comment.line)) { - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - comment.id, + comment, + clusterMapForAnalysis, + comments, effectiveResult.explanation, 'stale', - comment.path, - comment.line, - comment.body ); } else { warn(`Stale issue missing valid explanation - marking as unresolved`); - // Check if this is a canonical issue with duplicates - const duplicates = dedupResult.duplicateMap.get(comment.id); - const mergedDuplicates = duplicates?.map(dupId => { - const dupItem = dedupResult.duplicateItems.get(dupId); - return dupItem ? { - commentId: dupItem.comment.id, - author: dupItem.comment.author, - body: dupItem.comment.body, - path: dupItem.comment.path, - line: dupItem.comment.line, - } : null; - }).filter((d): d is NonNullable => d !== null); + const mergedDuplicates = buildMergedDuplicatesForAnchor( + comment.id, + clusterMapForAnalysis, + dedupResult.duplicateItems, + comments, + ); unresolved.push({ comment, @@ -1122,35 +1309,22 @@ export async function findUnresolvedIssues( stillExists: true, explanation: 'LLM indicated issue is stale, but provided insufficient explanation', triage: { importance: effectiveResult.importance, ease: effectiveResult.ease }, - mergedDuplicates: mergedDuplicates && mergedDuplicates.length > 0 ? mergedDuplicates : undefined, + mergedDuplicates, allowedPaths: getEffectiveAllowedPathsForNewIssue(comment, resolvedPath ?? comment.path, snippetForFix, effectiveResult.explanation), resolvedPath, }); } } else if (effectiveResult.exists) { - // Stale re-check: batch said "still exists" — if comment was previously verified, unmark so it re-enters the fix queue. + // Stale re-check: batch said "still exists" — unmark verified cluster so dupes don't stay "skip fixer". // WHY: output.log audit — push iter 2 had 2 unresolved (reporting.py) but "All 2 already verified — skipping fixer" // because they stayed in verifiedFixed; re-check had correctly said stillExists but we never unmarked. - if (Verification.isVerified(stateContext, comment.id)) { - if (recoveredSet?.has(comment.id)) { - debug('Skipping unmark (recovered from git this run)', { commentId: comment.id, path: comment.path }); - } else { - Verification.unmarkVerified(stateContext, comment.id); - debug('Unmarked verified (stale re-check said still exists)', { commentId: comment.id, path: comment.path }); - } - } - // Check if this is a canonical issue with duplicates - const duplicates = dedupResult.duplicateMap.get(comment.id); - const mergedDuplicates = duplicates?.map(dupId => { - const dupItem = dedupResult.duplicateItems.get(dupId); - return dupItem ? { - commentId: dupItem.comment.id, - author: dupItem.comment.author, - body: dupItem.comment.body, - path: dupItem.comment.path, - line: dupItem.comment.line, - } : null; - }).filter((d): d is NonNullable => d !== null); + unmarkVerifiedClusterForStaleRecheck(stateContext, comment.id, clusterMapForAnalysis, recoveredSet); + const mergedDuplicates = buildMergedDuplicatesForAnchor( + comment.id, + clusterMapForAnalysis, + dedupResult.duplicateItems, + comments, + ); unresolved.push({ comment, @@ -1158,40 +1332,37 @@ export async function findUnresolvedIssues( stillExists: true, explanation: effectiveResult.explanation, triage: { importance: effectiveResult.importance, ease: effectiveResult.ease }, - mergedDuplicates: mergedDuplicates && mergedDuplicates.length > 0 ? mergedDuplicates : undefined, + mergedDuplicates, allowedPaths: getEffectiveAllowedPathsForNewIssue(comment, resolvedPath ?? comment.path, snippetForFix, effectiveResult.explanation), resolvedPath, }); } else { // Issue appears to be already fixed - but we can ONLY dismiss if we have a valid explanation if (validateDismissalExplanation(effectiveResult.explanation, comment.path, comment.line)) { - // Valid explanation - document why it doesn't need fixing - Verification.markVerified(stateContext, comment.id); - Dismissed.dismissIssue( + markVerifiedClusterForFixedIssue( stateContext, comment.id, + clusterMapForAnalysis, + stateContext.verifiedThisSession, + ); + dismissDuplicateClusterFromComments( + stateContext, + comment, + clusterMapForAnalysis, + comments, effectiveResult.explanation, 'already-fixed', - comment.path, - comment.line, - comment.body ); } else { // Invalid/missing explanation - treat as unresolved (potential bug) warn(`Cannot dismiss without valid explanation - marking as unresolved`); - // Check if this is a canonical issue with duplicates - const duplicates = dedupResult.duplicateMap.get(comment.id); - const mergedDuplicates = duplicates?.map(dupId => { - const dupItem = dedupResult.duplicateItems.get(dupId); - return dupItem ? { - commentId: dupItem.comment.id, - author: dupItem.comment.author, - body: dupItem.comment.body, - path: dupItem.comment.path, - line: dupItem.comment.line, - } : null; - }).filter((d): d is NonNullable => d !== null); + const mergedDuplicates = buildMergedDuplicatesForAnchor( + comment.id, + clusterMapForAnalysis, + dedupResult.duplicateItems, + comments, + ); unresolved.push({ comment, @@ -1199,7 +1370,7 @@ export async function findUnresolvedIssues( stillExists: true, explanation: 'LLM indicated issue does not exist, but provided insufficient explanation to dismiss', triage: { importance: effectiveResult.importance, ease: effectiveResult.ease }, - mergedDuplicates: mergedDuplicates && mergedDuplicates.length > 0 ? mergedDuplicates : undefined, + mergedDuplicates, allowedPaths: getEffectiveAllowedPathsForNewIssue(comment, resolvedPath ?? comment.path, snippetForFix, effectiveResult.explanation), resolvedPath, }); @@ -1216,17 +1387,24 @@ export async function findUnresolvedIssues( } } + const unresolvedAfterBlast = applyBlastRadiusToUnresolved( + unresolved, + findUnresolvedIssuesOptions?.blastRadius, + stateContext, + clusterMapForAnalysis, + comments, + ); await State.saveState(stateContext); await LessonsAPI.Save.save(lessonsContext); if (options.verbose) { - printDebugIssueTable('after analysis', comments, stateContext, unresolved); + printDebugIssueTable('after analysis', comments, stateContext, unresolvedAfterBlast); } - + return { - unresolved, + unresolved: unresolvedAfterBlast, recommendedModels, recommendedModelIndex, modelRecommendationReasoning, - duplicateMap: dedupResult.duplicateMap, + duplicateMap: clusterMapForAnalysis ?? dedupResult.duplicateMap, }; } diff --git a/tools/prr/workflow/iteration-cleanup.ts b/tools/prr/workflow/iteration-cleanup.ts index d94a760..8c564fe 100644 --- a/tools/prr/workflow/iteration-cleanup.ts +++ b/tools/prr/workflow/iteration-cleanup.ts @@ -83,7 +83,8 @@ export async function handleIterationCleanup( runner.name, currentModel ?? undefined, verifiedCount, - failedCount + failedCount, + fixIteration, ); // Record per-issue attempts (with file hash so chronic check only counts same-version attempts) diff --git a/tools/prr/workflow/main-loop-setup.ts b/tools/prr/workflow/main-loop-setup.ts index 6bdadcc..b086629 100644 --- a/tools/prr/workflow/main-loop-setup.ts +++ b/tools/prr/workflow/main-loop-setup.ts @@ -36,6 +36,21 @@ import { createHash } from 'crypto'; import type { FindUnresolvedIssuesOptions } from './issue-analysis.js'; import { hasChanges } from '../../../shared/git/git-clone-index.js'; import { applyCatalogModelAutoHeals } from './catalog-model-autoheal.js'; +import { setDynamicRepoTopLevelDirs } from '../../../shared/path-utils.js'; +import { assessSolvability, resolveTrackedPath } from './helpers/solvability.js'; +import { + dismissDuplicateClusterFromComments, + resolveEffectiveDuplicateMapForComments, +} from './issue-analysis-dedup.js'; +import { + buildDependencyGraph, + computeBlastRadius, + getBlastRadiusDepth, + getBlastRadiusMaxFiles, + getBlastRadiusTimeoutMs, + isBlastRadiusDisabled, + listGitTrackedFiles, +} from '../../../shared/dependency-graph/index.js'; /** * Process comments and determine if fix loop should run @@ -81,7 +96,20 @@ export async function processCommentsAndPrepareFixLoop( * the CodeRabbit check already did the exact same API call. */ prefetchedComments?: ReviewComment[], /** When set, reuse cached analysis if comment IDs, headSha, and file hashes for comment paths unchanged (output.log audit). */ - analysisCacheRef?: { current: { commentCount: number; headSha: string; commentIds?: string; fileHashesKeyDigest?: string; unresolvedIssues: UnresolvedIssue[]; comments: ReviewComment[]; duplicateMap: Map; changedFiles?: string[] } | null } + analysisCacheRef?: { + current: { + commentCount: number; + headSha: string; + commentIds?: string; + fileHashesKeyDigest?: string; + unresolvedIssues: UnresolvedIssue[]; + comments: ReviewComment[]; + duplicateMap: Map; + changedFiles?: string[]; + /** Normalized repo paths in blast radius when graph was built (for injection subset on cache hit). */ + blastRadiusPaths?: string[]; + } | null; + } ): Promise<{ comments: ReviewComment[]; unresolvedIssues: UnresolvedIssue[]; @@ -211,8 +239,17 @@ export async function processCommentsAndPrepareFixLoop( (cache.fileHashesKeyDigest != null ? cache.fileHashesKeyDigest === fileHashesKeyDigest : true); if (cacheHit) { unresolvedIssues = cache.unresolvedIssues; - duplicateMap = cache.duplicateMap; + // Re-resolve against persisted dedup-v2: cached duplicateMap may be empty from an older analysis + // path while state.dedupCache still matches this comment set (same as findUnresolvedIssues return). + duplicateMap = + resolveEffectiveDuplicateMapForComments(stateContext, cache.duplicateMap, comments) ?? + cache.duplicateMap; prChangedFiles = cache.changedFiles; + stateContext.blastRadiusPaths = + cache.blastRadiusPaths && cache.blastRadiusPaths.length > 0 ? new Set(cache.blastRadiusPaths) : undefined; + // WHY: Populate path-utils dynamic top-level segments for strict allow mode + stripGitDiffPathPrefix + // before findUnresolvedIssues runs filterAllowedPathsForFix (see shared/path-utils.ts file header). + if (prChangedFiles) setDynamicRepoTopLevelDirs(prChangedFiles); analyzeTime = 0; console.log(chalk.gray(` Reusing cached analysis (${formatNumber(comments.length)} comments, same IDs + file hashes)`)); debug('Reused analysis cache', { commentCount: comments.length, headSha: headSha.slice(0, 7), fileHashesDigest: fileHashesKeyDigest }); @@ -232,6 +269,8 @@ export async function processCommentsAndPrepareFixLoop( // Base ref may not exist (e.g. first push) } prChangedFiles = changedFiles.length > 0 ? changedFiles : undefined; + // WHY: Same as cache-hit branch — issue.allowedPaths and runner injection see consistent segments. + if (prChangedFiles) setDynamicRepoTopLevelDirs(prChangedFiles); console.log(chalk.gray(`Analyzing ${formatNumber(comments.length)} review comments...`)); const getFileContentFromRepo = async (path: string): Promise => { try { @@ -240,10 +279,45 @@ export async function processCommentsAndPrepareFixLoop( return null; } }; + + let blastRadius: Map | undefined; + stateContext.blastRadiusPaths = undefined; + // Blast radius: best-effort graph from PR changed files + imports/proximity. WHY try/catch: + // timeout, max-files, git/fs errors must not fail analysis — omit map so all issues stay in-scope + // (same behavior as PRR_DISABLE_BLAST_RADIUS). blastRadiusPaths drives llm-api injection subset only. + if (!isBlastRadiusDisabled() && changedFiles.length > 0) { + try { + const t0 = Date.now(); + const allFiles = await listGitTrackedFiles(workdir); + const graph = await buildDependencyGraph(workdir, { + maxFiles: getBlastRadiusMaxFiles(), + timeoutMs: getBlastRadiusTimeoutMs(), + }); + blastRadius = computeBlastRadius(graph, changedFiles, getBlastRadiusDepth(), allFiles); + stateContext.blastRadiusPaths = new Set(blastRadius.keys()); + debug('Blast radius', { + changedFiles: changedFiles.length, + graphNodes: graph.nodeCount, + graphEdges: graph.edgeCount, + radiusFiles: blastRadius.size, + depth: getBlastRadiusDepth(), + buildTimeMs: Date.now() - t0, + }); + } catch (e) { + console.warn( + chalk.yellow('Blast radius graph build failed; all issues treated as in-scope (no deprioritization).'), + ); + debug('Blast radius error', { error: e instanceof Error ? e.message : String(e) }); + blastRadius = undefined; + stateContext.blastRadiusPaths = undefined; + } + } + const analysisResult = await findUnresolvedIssues(comments, comments.length, { lineMap: lineMap.size > 0 ? lineMap : undefined, getFileContentFromRepo, changedFiles: prChangedFiles, + blastRadius, }); unresolvedIssues = analysisResult.unresolved; duplicateMap = analysisResult.duplicateMap; @@ -258,6 +332,7 @@ export async function processCommentsAndPrepareFixLoop( comments: [...comments], duplicateMap: new Map(duplicateMap), changedFiles: prChangedFiles, + blastRadiusPaths: blastRadius && blastRadius.size > 0 ? [...blastRadius.keys()] : undefined, }; } } @@ -284,7 +359,8 @@ export async function processCommentsAndPrepareFixLoop( spinner, getCodeSnippet, stateContext, - workdir + workdir, + duplicateMap, ); if (newCommentsResult.hasNewComments) { comments.length = 0; @@ -308,40 +384,52 @@ export async function processCommentsAndPrepareFixLoop( spinner, getCodeSnippet, getFullFile, - workdir // Pill cycle 2 #4: Pass workdir for Rule 6 validation + workdir, // Pill cycle 2 #4: Pass workdir for Rule 6 validation + duplicateMap, ); if (auditResult.failedAudit.length > 0) { // runFinalAudit() already unmarked every failed-audit comment (single place — avoids duplicate unmark logs). // Re-run solvability on audit-failed items so we don't re-enter with unsolvable issues (e.g. (PR comment), deleted file). - const { assessSolvability } = await import('./helpers/solvability.js'); unresolvedIssues.length = 0; const failedItems = auditResult.failedAudit; + const effectiveDupForAuditReentry = resolveEffectiveDuplicateMapForComments( + stateContext, + duplicateMap, + comments, + ); let reEnterCount = 0; for (let i = 0; i < failedItems.length; i++) { const { comment, explanation } = failedItems[i]; const solvability = assessSolvability(workdir, comment, stateContext); + // File ops + dismiss record: resolved repo path when basename-only review path maps to one file. + const primaryPath = + comment.path != null && comment.path !== '' + ? resolveTrackedPath(workdir, comment.path, comment.body ?? '') ?? comment.path + : (comment.path ?? ''); if (!solvability.solvable) { - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - comment.id, + comment, + effectiveDupForAuditReentry, + comments, solvability.reason ?? explanation, solvability.dismissCategory ?? 'not-an-issue', - comment.path, - comment.line, - comment.body ?? '', - solvability.remediationHint + solvability.remediationHint, ); - debug('Audit re-entry: dismissed unsolvable issue', { commentId: comment.id, reason: solvability.reason }); + debug('Audit re-entry: dismissed unsolvable issue (cluster)', { commentId: comment.id, reason: solvability.reason }); continue; } - const codeSnippet = await getCodeSnippet(comment.path, comment.line, comment.body); + const codeSnippet = await getCodeSnippet(primaryPath, comment.line, comment.body); + const resolvedPath = + comment.path != null && primaryPath !== comment.path ? primaryPath : undefined; unresolvedIssues.push({ comment, codeSnippet, stillExists: true, explanation, triage: { importance: 2, ease: 3 }, + ...(resolvedPath ? { resolvedPath } : {}), }); reEnterCount++; } diff --git a/tools/prr/workflow/no-changes-verification.ts b/tools/prr/workflow/no-changes-verification.ts index 56e46a6..e5703f4 100644 --- a/tools/prr/workflow/no-changes-verification.ts +++ b/tools/prr/workflow/no-changes-verification.ts @@ -25,6 +25,8 @@ import { parseResultCode, parseOtherFileFromResultDetail, isReferencePathInComme import type { ReviewComment } from '../github/types.js'; import { getMentionedTestFilePaths, getTestPathForSourceFileIssue, reviewSuggestsFixInTest, reviewTargetsMentionedTestFile } from '../analyzer/prompt-builder.js'; import * as Dismissed from '../state/state-dismissed.js'; +import type { DismissedIssue } from '../state/types.js'; +import { resolveEffectiveDuplicateMapForComments } from './issue-analysis-dedup.js'; /** * Number of issues to spot-check before committing to full verification. @@ -90,6 +92,77 @@ function persistInferredTestTargets( return []; } +/** + * After cluster dismiss attempts, only remove queued rows that are verified or dismissed. + * WHY: We used to splice every cluster id out of the queue even when `dismissIssue` was skipped + * (no row in `comments`), which left those threads neither verified nor dismissed → empty queue + + * BUG DETECTED repopulate (eliza #6702 audit). + */ +function filterUnresolvedKeepUnaccountedClusterMembers( + unresolvedIssues: UnresolvedIssue[], + clusterIds: string[], + stateContext: StateContext, +): UnresolvedIssue[] { + const clusterSet = new Set(clusterIds); + return unresolvedIssues.filter((i) => { + if (!clusterSet.has(i.comment.id)) return true; + return ( + !Verification.isVerified(stateContext, i.comment.id) && + !Dismissed.isCommentDismissed(stateContext, i.comment.id) + ); + }); +} + +/** + * Row for `dismissIssue` when a cluster id is missing from the fetched `comments` list. + * Prefer full API row, then any queued issue, anchor, else same file/line/body as anchor with `id`. + */ +function resolveCommentRowForClusterDismiss( + cid: string, + anchorIssue: UnresolvedIssue, + comments: ReviewComment[] | undefined, + unresolvedIssues: UnresolvedIssue[], + clusterSet: Set, +): ReviewComment | undefined { + const fromList = comments?.find((co) => co.id === cid); + if (fromList) return fromList; + const fromQueue = unresolvedIssues.find((i) => i.comment.id === cid)?.comment; + if (fromQueue) return fromQueue; + if (cid === anchorIssue.comment.id) return anchorIssue.comment; + if (!clusterSet.has(cid)) return undefined; + return { ...anchorIssue.comment, id: cid }; +} + +/** + * Dismiss the full LLM dedup cluster for no-changes paths (CANNOT_FIX exhaust, hidden-target, etc.). + * Mirrors ALREADY_FIXED cluster handling — single-row dismiss left siblings unaccounted (BUG DETECTED repopulate). + */ +function dismissNoChangesCluster( + stateContext: StateContext, + anchorIssue: UnresolvedIssue, + duplicateMap: Map | undefined, + comments: ReviewComment[] | undefined, + unresolvedIssues: UnresolvedIssue[], + dismissText: string, + category: DismissedIssue['category'], + remediationHint?: string, +): string[] { + const clusterIds = getDuplicateClusterCommentIds(anchorIssue.comment.id, duplicateMap); + const clusterSet = new Set(clusterIds); + for (const cid of clusterIds) { + if (Verification.isVerified(stateContext, cid) || Dismissed.isCommentDismissed(stateContext, cid)) { + continue; + } + const c = resolveCommentRowForClusterDismiss(cid, anchorIssue, comments, unresolvedIssues, clusterSet); + if (!c) { + debug('no-changes cluster dismiss: skip (no row resolvable)', { commentId: cid }); + continue; + } + Dismissed.dismissIssue(stateContext, cid, dismissText, category, c.path, c.line, c.body, remediationHint); + } + return clusterIds; +} + /** * Handle no-changes scenario after fixer runs. * @@ -103,10 +176,10 @@ function persistInferredTestTargets( * 4. Track no-changes for performance stats * 5. Return whether to continue, break, or proceed to rotation * - * Pass `comments` + `duplicateMap` so single-issue **ALREADY_FIXED** and **ALREADY_FIXED any-threshold** - * dismiss the **entire dedup cluster** (`getDuplicateClusterCommentIds`). **WHY:** Auto-verify on real - * fixes already marks duplicates when one canonical change lands; the no-change path used to dismiss - * only the queued row, leaving cluster siblings neither verified nor dismissed → BUG DETECTED repopulate. + * Pass `comments` + `duplicateMap` so **ALREADY_FIXED** paths, **CANNOT_FIX** exhaust, and **hidden-target** + * dismissals use the **full dedup cluster** (`getDuplicateClusterCommentIds` + `resolveCommentRowForClusterDismiss`). + * **WHY:** Auto-verify on real fixes already marks duplicates when one canonical change lands; single-row dismiss + * left cluster siblings neither verified nor dismissed → BUG DETECTED repopulate. */ export async function handleNoChangesWithVerification( unresolvedIssues: UnresolvedIssue[], @@ -130,6 +203,7 @@ export async function handleNoChangesWithVerification( updatedUnresolvedIssues: UnresolvedIssue[]; progressMade: number; }> { + const dupForCluster = resolveEffectiveDuplicateMapForComments(stateContext, duplicateMap, comments); console.log(chalk.yellow(`\nNo changes made by ${runnerName}${currentModel ? ` (${currentModel})` : ''}`)); // WHY try RESULT first: Structured codes (ALREADY_FIXED, UNCLEAR, WRONG_LOCATION, etc.) allow @@ -149,17 +223,22 @@ export async function handleNoChangesWithVerification( // Prompts.log audit: single-issue ALREADY_FIXED with no code blocks was re-sent (duplicate 78k prompt). Dismiss immediately so we don't retry the same prompt. if (unresolvedIssues.length === 1) { const detailMsg = detail || 'fixer confirmed no changes needed'; - const clusterIds = getDuplicateClusterCommentIds(firstIssueAf.comment.id, duplicateMap); + const clusterIds = getDuplicateClusterCommentIds(firstIssueAf.comment.id, dupForCluster); + const clusterSet = new Set(clusterIds); const dismissText = `ALREADY_FIXED — ${detailMsg}`; for (const cid of clusterIds) { if (Verification.isVerified(stateContext, cid) || Dismissed.isCommentDismissed(stateContext, cid)) { continue; } - const c = - comments?.find((co) => co.id === cid) ?? - (cid === firstIssueAf.comment.id ? firstIssueAf.comment : undefined); + const c = resolveCommentRowForClusterDismiss( + cid, + firstIssueAf, + comments, + unresolvedIssues, + clusterSet, + ); if (!c) { - debug('ALREADY_FIXED cluster: skip dismiss (no comment row)', { commentId: cid }); + debug('ALREADY_FIXED cluster: skip dismiss (no row resolvable)', { commentId: cid }); continue; } Dismissed.dismissIssue( @@ -173,13 +252,16 @@ export async function handleNoChangesWithVerification( undefined, ); } - const clusterSet = new Set(clusterIds); Performance.recordModelNoChanges(stateContext, runnerName, currentModel); return { shouldBreak: false, shouldContinue: false, verifiedCount: 0, - updatedUnresolvedIssues: unresolvedIssues.filter((i) => !clusterSet.has(i.comment.id)), + updatedUnresolvedIssues: filterUnresolvedKeepUnaccountedClusterMembers( + unresolvedIssues, + clusterIds, + stateContext, + ), progressMade: 0, }; } @@ -203,16 +285,20 @@ export async function handleNoChangesWithVerification( debug('ALREADY_FIXED any-counter', { commentId: firstIssueAf.comment.id, anyCount, threshold: ALREADY_FIXED_ANY_THRESHOLD }); if (anyCount >= ALREADY_FIXED_ANY_THRESHOLD) { debug('ALREADY_FIXED dismiss: any-threshold reached', { commentId: firstIssueAf.comment.id, anyCount }); - const clusterIds = getDuplicateClusterCommentIds(firstIssueAf.comment.id, duplicateMap); + const clusterIds = getDuplicateClusterCommentIds(firstIssueAf.comment.id, dupForCluster); const dismissText = `ALREADY_FIXED ${anyCount}× (multiple models) — dismissing as already-fixed`; const clusterSet = new Set(clusterIds); for (const cid of clusterIds) { if (Verification.isVerified(stateContext, cid) || Dismissed.isCommentDismissed(stateContext, cid)) { continue; } - const c = - comments?.find((co) => co.id === cid) ?? - (cid === firstIssueAf.comment.id ? firstIssueAf.comment : undefined); + const c = resolveCommentRowForClusterDismiss( + cid, + firstIssueAf, + comments, + unresolvedIssues, + clusterSet, + ); if (!c) continue; Dismissed.dismissIssue(stateContext, cid, dismissText, 'already-fixed', c.path, c.line, c.body, undefined); } @@ -221,27 +307,54 @@ export async function handleNoChangesWithVerification( shouldBreak: false, shouldContinue: false, verifiedCount: 0, - updatedUnresolvedIssues: unresolvedIssues.filter((i) => !clusterSet.has(i.comment.id)), + updatedUnresolvedIssues: filterUnresolvedKeepUnaccountedClusterMembers( + unresolvedIssues, + clusterIds, + stateContext, + ), progressMade: 0, }; } if (consecutive >= ALREADY_FIXED_EXHAUST_THRESHOLD) { - Dismissed.dismissIssue( - stateContext, - firstIssueAf.comment.id, - `ALREADY_FIXED ${consecutive}× with same explanation — dismissing as not-an-issue`, - 'not-an-issue', - firstIssueAf.comment.path, - firstIssueAf.comment.line, - firstIssueAf.comment.body, - undefined - ); + const clusterIdsEx = getDuplicateClusterCommentIds(firstIssueAf.comment.id, dupForCluster); + const clusterSetEx = new Set(clusterIdsEx); + const dismissTextEx = `ALREADY_FIXED ${consecutive}× with same explanation — dismissing as not-an-issue`; + for (const cid of clusterIdsEx) { + if (Verification.isVerified(stateContext, cid) || Dismissed.isCommentDismissed(stateContext, cid)) { + continue; + } + const c = resolveCommentRowForClusterDismiss( + cid, + firstIssueAf, + comments, + unresolvedIssues, + clusterSetEx, + ); + if (!c) { + debug('ALREADY_FIXED exhaust cluster: skip dismiss (no row resolvable)', { commentId: cid }); + continue; + } + Dismissed.dismissIssue( + stateContext, + cid, + dismissTextEx, + 'not-an-issue', + c.path, + c.line, + c.body, + undefined, + ); + } Performance.recordModelNoChanges(stateContext, runnerName, currentModel); return { shouldBreak: false, shouldContinue: false, verifiedCount: 0, - updatedUnresolvedIssues: unresolvedIssues.filter((i) => i.comment.id !== firstIssueAf.comment.id), + updatedUnresolvedIssues: filterUnresolvedKeepUnaccountedClusterMembers( + unresolvedIssues, + clusterIdsEx, + stateContext, + ), progressMade: 0, }; } @@ -264,22 +377,26 @@ export async function handleNoChangesWithVerification( const consecutive = state.cannotFixConsecutiveByCommentId[firstIssue0.comment.id]; debug('CANNOT_FIX consecutive count', { commentId: firstIssue0.comment.id, count: consecutive }); if (consecutive >= CANNOT_FIX_EXHAUST_THRESHOLD) { - Dismissed.dismissIssue( + const cannotFixDismiss = `CANNOT_FIX ${consecutive}× — ${(structuredResult.resultDetail ?? '').trim().substring(0, 120) || 'not fixable via code changes'}`; + const clusterIdsCf = dismissNoChangesCluster( stateContext, - firstIssue0.comment.id, - `CANNOT_FIX ${consecutive}× — ${(structuredResult.resultDetail ?? '').trim().substring(0, 120) || 'not fixable via code changes'}`, + firstIssue0, + dupForCluster, + comments, + unresolvedIssues, + cannotFixDismiss, 'not-an-issue', - firstIssue0.comment.path, - firstIssue0.comment.line, - firstIssue0.comment.body ?? '', - undefined ); Performance.recordModelNoChanges(stateContext, runnerName, currentModel); return { shouldBreak: false, shouldContinue: false, verifiedCount: 0, - updatedUnresolvedIssues: unresolvedIssues.filter((i) => i.comment.id !== firstIssue0.comment.id), + updatedUnresolvedIssues: filterUnresolvedKeepUnaccountedClusterMembers( + unresolvedIssues, + clusterIdsCf, + stateContext, + ), progressMade: 0, }; } @@ -306,22 +423,26 @@ export async function handleNoChangesWithVerification( const inferredTargets = persistInferredTestTargets(firstIssue0, detail, workdir, stateContext); const missingTargetCount = stateContext.state?.missingTargetFileCountByCommentId?.[firstIssue0.comment.id] ?? 0; if (inferredTargets.length === 0 && missingTargetCount >= 2) { - Dismissed.dismissIssue( + const hiddenTargetMsg = `Hidden target file could not be inferred after ${missingTargetCount} attempts — review points to a test file that is not identifiable from current context`; + const clusterIdsHt = dismissNoChangesCluster( stateContext, - firstIssue0.comment.id, - `Hidden target file could not be inferred after ${missingTargetCount} attempts — review points to a test file that is not identifiable from current context`, + firstIssue0, + dupForCluster, + comments, + unresolvedIssues, + hiddenTargetMsg, 'remaining', - getIssuePrimaryPath(firstIssue0), - firstIssue0.comment.line, - firstIssue0.comment.body ?? '', - undefined ); Performance.recordModelNoChanges(stateContext, runnerName, currentModel); return { shouldBreak: false, shouldContinue: false, verifiedCount: 0, - updatedUnresolvedIssues: unresolvedIssues.filter((i) => i.comment.id !== firstIssue0.comment.id), + updatedUnresolvedIssues: filterUnresolvedKeepUnaccountedClusterMembers( + unresolvedIssues, + clusterIdsHt, + stateContext, + ), progressMade: 0, }; } @@ -346,22 +467,26 @@ export async function handleNoChangesWithVerification( const inferredTargets = persistInferredTestTargets(firstIssue0, detail, workdir, stateContext); const missingTargetCount = stateContext.state?.missingTargetFileCountByCommentId?.[firstIssue0.comment.id] ?? 0; if (inferredTargets.length === 0 && missingTargetCount >= 2) { - Dismissed.dismissIssue( + const hiddenTargetMsgU = `Hidden target file could not be inferred after ${missingTargetCount} attempts — review points to a test file that is not identifiable from current context`; + const clusterIdsHu = dismissNoChangesCluster( stateContext, - firstIssue0.comment.id, - `Hidden target file could not be inferred after ${missingTargetCount} attempts — review points to a test file that is not identifiable from current context`, + firstIssue0, + dupForCluster, + comments, + unresolvedIssues, + hiddenTargetMsgU, 'remaining', - getIssuePrimaryPath(firstIssue0), - firstIssue0.comment.line, - firstIssue0.comment.body ?? '', - undefined ); Performance.recordModelNoChanges(stateContext, runnerName, currentModel); return { shouldBreak: false, shouldContinue: false, verifiedCount: 0, - updatedUnresolvedIssues: unresolvedIssues.filter((i) => i.comment.id !== firstIssue0.comment.id), + updatedUnresolvedIssues: filterUnresolvedKeepUnaccountedClusterMembers( + unresolvedIssues, + clusterIdsHu, + stateContext, + ), progressMade: 0, }; } @@ -414,22 +539,26 @@ export async function handleNoChangesWithVerification( const inferredTargets = persistInferredTestTargets(firstIssue1, detail, workdir, stateContext); const missingTargetCount = state.missingTargetFileCountByCommentId?.[firstIssue1.comment.id] ?? 0; if (inferredTargets.length === 0 && missingTargetCount >= 2) { - Dismissed.dismissIssue( + const hiddenTargetMsgW = `Hidden target file could not be inferred after ${missingTargetCount} attempts — review points to a test file that is not identifiable from current context`; + const clusterIdsHw = dismissNoChangesCluster( stateContext, - firstIssue1.comment.id, - `Hidden target file could not be inferred after ${missingTargetCount} attempts — review points to a test file that is not identifiable from current context`, + firstIssue1, + dupForCluster, + comments, + unresolvedIssues, + hiddenTargetMsgW, 'remaining', - getIssuePrimaryPath(firstIssue1), - firstIssue1.comment.line, - firstIssue1.comment.body ?? '', - undefined ); Performance.recordModelNoChanges(stateContext, runnerName, currentModel); return { shouldBreak: false, shouldContinue: false, verifiedCount: 0, - updatedUnresolvedIssues: unresolvedIssues.filter((i) => i.comment.id !== firstIssue1.comment.id), + updatedUnresolvedIssues: filterUnresolvedKeepUnaccountedClusterMembers( + unresolvedIssues, + clusterIdsHw, + stateContext, + ), progressMade: 0, }; } @@ -600,7 +729,15 @@ export async function handleNoChangesWithVerification( // (falls through to the end of this block) } else { // Full verification (spot-check passed) - const fullResult = await verifyAllIssues(unresolvedIssues, llm, stateContext, runnerName, currentModel, verifiedThisSession); + const fullResult = await verifyAllIssues( + unresolvedIssues, + llm, + stateContext, + runnerName, + currentModel, + verifiedThisSession, + dupForCluster, + ); if (fullResult) { return fullResult; } @@ -608,7 +745,15 @@ export async function handleNoChangesWithVerification( } } else { // Small number of issues — verify all directly (no spot-check needed) - const fullResult = await verifyAllIssues(unresolvedIssues, llm, stateContext, runnerName, currentModel, verifiedThisSession); + const fullResult = await verifyAllIssues( + unresolvedIssues, + llm, + stateContext, + runnerName, + currentModel, + verifiedThisSession, + dupForCluster, + ); if (fullResult) { return fullResult; } @@ -710,7 +855,9 @@ async function verifyAllIssues( stateContext: StateContext, runnerName: string, currentModel: string | undefined, - verifiedThisSession: Set + verifiedThisSession: Set, + /** When set, verifying one cluster member marks the full dedup cluster (same as fix-verification / ALREADY_FIXED dismiss). */ + duplicateMap?: Map, ): Promise<{ shouldBreak: boolean; shouldContinue: boolean; @@ -743,8 +890,13 @@ async function verifyAllIssues( if (result && !result.exists) { verifiedAsFixed++; - Verification.markVerified(stateContext, issue.comment.id); - verifiedThisSession.add(issue.comment.id); + const anchorId = issue.comment.id; + const clusterIds = getDuplicateClusterCommentIds(anchorId, duplicateMap); + for (const cid of clusterIds) { + if (Verification.isVerified(stateContext, cid)) continue; + Verification.markVerified(stateContext, cid, cid === anchorId ? undefined : anchorId); + verifiedThisSession.add(cid); + } { const primaryPath = getIssuePrimaryPath(issue); console.log(chalk.greenBright(` ✓ RESOLVED: ${primaryPath}${issue.comment.line != null ? `:${issue.comment.line}` : ''} — ${result.explanation}`)); diff --git a/tools/prr/workflow/post-verification-handling.ts b/tools/prr/workflow/post-verification-handling.ts index 692ca7e..c485e0d 100644 --- a/tools/prr/workflow/post-verification-handling.ts +++ b/tools/prr/workflow/post-verification-handling.ts @@ -46,9 +46,19 @@ export async function handlePostVerification( lessonsContext: LessonsContext, options: CLIOptions, currentRunnerName: string, - trySingleIssueFix: (issues: UnresolvedIssue[], git: SimpleGit, verified?: Set) => Promise, + trySingleIssueFix: ( + issues: UnresolvedIssue[], + git: SimpleGit, + verified?: Set, + comments?: ReviewComment[], + ) => Promise, tryRotation: (failureErrorType?: string) => boolean, - tryDirectLLMFix: (issues: UnresolvedIssue[], git: SimpleGit, verified?: Set) => Promise, + tryDirectLLMFix: ( + issues: UnresolvedIssue[], + git: SimpleGit, + verified?: Set, + comments?: ReviewComment[], + ) => Promise, executeBailOut: (issues: UnresolvedIssue[], comments: ReviewComment[]) => Promise ): Promise<{ shouldBreak: boolean; diff --git a/tools/prr/workflow/push-iteration-loop.ts b/tools/prr/workflow/push-iteration-loop.ts index 4fe3192..7b87178 100644 --- a/tools/prr/workflow/push-iteration-loop.ts +++ b/tools/prr/workflow/push-iteration-loop.ts @@ -41,6 +41,11 @@ import * as Bailout from '../state/state-bailout.js'; import * as LessonsAPI from '../state/lessons-index.js'; import { assessSolvability, recheckSolvability } from './helpers/solvability.js'; import type { FindUnresolvedIssuesOptions } from './issue-analysis.js'; +import { + dismissDuplicateClusterFromComments, + getClusterIdsAccountedOnState, + resolveEffectiveDuplicateMapForComments, +} from './issue-analysis-dedup.js'; import { looksLikeCreateFileIssue } from './utils.js'; /** Git and GitHub context for a push iteration */ @@ -89,7 +94,19 @@ export interface PushIterationContexts { * Cache of last analysis result (comment IDs + headSha + file hashes → unresolved, duplicateMap). * When comment set and file content for comment paths unchanged, reuse to skip expensive findUnresolvedIssues (output.log audit). */ - lastAnalysisCacheRef?: { current: { commentCount: number; headSha: string; commentIds?: string; fileHashesKeyDigest?: string; unresolvedIssues: UnresolvedIssue[]; comments: ReviewComment[]; duplicateMap: Map; changedFiles?: string[] } | null }; + lastAnalysisCacheRef?: { + current: { + commentCount: number; + headSha: string; + commentIds?: string; + fileHashesKeyDigest?: string; + unresolvedIssues: UnresolvedIssue[]; + comments: ReviewComment[]; + duplicateMap: Map; + changedFiles?: string[]; + blastRadiusPaths?: string[]; + } | null; + }; /** Thread IDs we have already replied to this run (one reply per thread). */ repliedThreadIds: Set; } @@ -109,9 +126,19 @@ export interface PushIterationCallbacks { getCurrentModel: () => string | undefined; getRunner: () => Runner; parseNoChangesExplanation: (output: string) => string | null; - trySingleIssueFix: (issues: UnresolvedIssue[], git: SimpleGit, verifiedThisSession?: Set) => Promise; + trySingleIssueFix: ( + issues: UnresolvedIssue[], + git: SimpleGit, + verifiedThisSession?: Set, + comments?: ReviewComment[], + ) => Promise; tryRotation: (failureErrorType?: string) => boolean; - tryDirectLLMFix: (issues: UnresolvedIssue[], git: SimpleGit, verifiedThisSession?: Set) => Promise; + tryDirectLLMFix: ( + issues: UnresolvedIssue[], + git: SimpleGit, + verifiedThisSession?: Set, + comments?: ReviewComment[], + ) => Promise; executeBailOut: (issues: UnresolvedIssue[], comments: ReviewComment[]) => Promise; /** Called when a runner fails with tool_config (e.g. unknown option) so it's skipped for rest of run */ onDisableRunner?: (runnerName: string) => void; @@ -200,6 +227,8 @@ export async function executePushIteration( ); const { comments, unresolvedIssues, duplicateMap, changedFiles: prChangedFiles } = loopResult; + stateContext.prChangedFilesForRecovery = prChangedFiles; + stateContext.duplicateMapForSession = duplicateMap; debug('Push iteration: comments processed', { pushIteration, commentCount: comments.length, @@ -267,6 +296,7 @@ export async function executePushIteration( checkForNewBotReviews, getCodeSnippet, getCurrentModel, config.githubToken, workdir, prChangedFiles, + duplicateMap, ); if (preChecks.shouldBreak) { @@ -278,6 +308,12 @@ export async function executePushIteration( prInfoRef.current.headSha = preChecks.updatedHeadSha; } + const effectiveDuplicateMap = resolveEffectiveDuplicateMapForComments( + stateContext, + duplicateMap, + comments, + ); + // Dismiss issues that hit couldNotInject threshold (file unresolved in repo + no-change cycles). // WHY: The threshold is also checked in findUnresolvedIssues, but that only runs at the start of // a push iteration. Inside the fix loop we keep retrying single-issue focus without re-running @@ -289,9 +325,12 @@ export async function executePushIteration( }); if (couldNotInjectDismiss.length > 0) { const reason = 'Target file could not be resolved in the repository (repeated could-not-inject + no-change cycles)'; - const dismissedIds = new Set(couldNotInjectDismiss.map((i) => i.comment.id)); + const dismissedIds = new Set(); for (const issue of couldNotInjectDismiss) { - Dismissed.dismissIssue(stateContext, issue.comment.id, reason, 'file-unchanged', getIssuePrimaryPath(issue), issue.comment.line, issue.comment.body, undefined); + dismissDuplicateClusterFromComments(stateContext, issue.comment, effectiveDuplicateMap, comments, reason, 'file-unchanged'); + for (const cid of getClusterIdsAccountedOnState(stateContext, issue.comment.id, effectiveDuplicateMap)) { + dismissedIds.add(cid); + } } unresolvedIssues.splice(0, unresolvedIssues.length, ...unresolvedIssues.filter((i) => !dismissedIds.has(i.comment.id))); console.log(chalk.yellow(` ${formatNumber(couldNotInjectDismiss.length)} issue(s) dismissed (file not in repo after repeated could-not-inject + no-change cycles)`)); @@ -313,9 +352,12 @@ export async function executePushIteration( ); if (deleteEntirelyDismiss.length > 0) { const reason = 'Requires file deletion (use or resolve manually)'; - const dismissedIds = new Set(deleteEntirelyDismiss.map((i) => i.comment.id)); + const dismissedIds = new Set(); for (const issue of deleteEntirelyDismiss) { - Dismissed.dismissIssue(stateContext, issue.comment.id, reason, 'remaining', getIssuePrimaryPath(issue), issue.comment.line, issue.comment.body, undefined); + dismissDuplicateClusterFromComments(stateContext, issue.comment, effectiveDuplicateMap, comments, reason, 'remaining'); + for (const cid of getClusterIdsAccountedOnState(stateContext, issue.comment.id, effectiveDuplicateMap)) { + dismissedIds.add(cid); + } } unresolvedIssues.splice(0, unresolvedIssues.length, ...unresolvedIssues.filter((i) => !dismissedIds.has(i.comment.id))); console.log(chalk.yellow(` ${formatNumber(deleteEntirelyDismiss.length)} issue(s) dismissed (requires file deletion after ${DELETE_ENTIRELY_DISMISS_THRESHOLD}+ verifier verdicts)`)); @@ -341,7 +383,7 @@ export async function executePushIteration( }); if (wrongFileIssues.length > 0) { debug('Trying single-issue first for issues with wrong-file history (1–2 attempts)', { count: wrongFileIssues.length }); - const singleFixed = await trySingleIssueFix(wrongFileIssues, git, verifiedThisSession); + const singleFixed = await trySingleIssueFix(wrongFileIssues, git, verifiedThisSession, comments); if (singleFixed) { unresolvedIssues.splice(0, unresolvedIssues.length, ...unresolvedIssues.filter((i) => !verifiedThisSession.has(i.comment.id))); if (unresolvedIssues.length === 0) { @@ -374,7 +416,7 @@ export async function executePushIteration( rapidFailureCount, lastFailureTime, consecutiveFailures, modelFailuresInCycle, progressThisCycle, getCurrentModel, parseNoChangesExplanation, trySingleIssueFix, tryRotation, tryDirectLLMFix, executeBailOut, fixIteration, - duplicateMap, + effectiveDuplicateMap, callbacks.onDisableRunner ); @@ -418,7 +460,21 @@ export async function executePushIteration( // WHY: Verification result is what we need; fetch has no shared mutable state with it. // Best-effort fetch so a network blip does not fail the iteration. const [verifyResult] = await Promise.all([ - ResolverProc.verifyFixes(git, unresolvedIssues, stateContext, lessonsContext, llm, verifiedThisSession, options.noBatch, duplicateMap, workdir, getCurrentModel, getRunner, filesModifiedThisRun), + ResolverProc.verifyFixes( + git, + unresolvedIssues, + stateContext, + lessonsContext, + llm, + verifiedThisSession, + options.noBatch, + effectiveDuplicateMap, + workdir, + getCurrentModel, + getRunner, + filesModifiedThisRun, + comments, + ), git.fetch().catch(() => {}), ]); const { verifiedCount, failedCount, changedIssues, unchangedIssues, changedFiles } = verifyResult; @@ -507,38 +563,35 @@ export async function executePushIteration( for (const issue of stillUnresolved) { const solvability = assessSolvability(gitCtx.workdir, issue.comment, stateContext); if (!solvability.solvable && solvability.dismissCategory === 'chronic-failure') { - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - issue.comment.id, + issue.comment, + effectiveDuplicateMap, + comments, solvability.reason ?? 'Chronic failure — too many fix attempts with no success', 'chronic-failure', - getIssuePrimaryPath(issue), - issue.comment.line, - issue.comment.body ); - chronicDismissed.push(issue.comment.id); + chronicDismissed.push(...getClusterIdsAccountedOnState(stateContext, issue.comment.id, effectiveDuplicateMap)); } else if (!solvability.solvable && solvability.dismissCategory === 'already-fixed') { - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - issue.comment.id, + issue.comment, + effectiveDuplicateMap, + comments, solvability.reason ?? 'Multiple models reported already fixed — dismissing', 'already-fixed', - getIssuePrimaryPath(issue), - issue.comment.line, - issue.comment.body ); - alreadyFixedDismissed.push(issue.comment.id); + alreadyFixedDismissed.push(...getClusterIdsAccountedOnState(stateContext, issue.comment.id, effectiveDuplicateMap)); } else if (!solvability.solvable && solvability.dismissCategory === 'remaining') { - Dismissed.dismissIssue( + dismissDuplicateClusterFromComments( stateContext, - issue.comment.id, + issue.comment, + effectiveDuplicateMap, + comments, solvability.reason ?? 'Repeated failures — dismissing for human follow-up', 'remaining', - getIssuePrimaryPath(issue), - issue.comment.line, - issue.comment.body ); - remainingDismissed.push(issue.comment.id); + remainingDismissed.push(...getClusterIdsAccountedOnState(stateContext, issue.comment.id, effectiveDuplicateMap)); } } if (chronicDismissed.length > 0) { @@ -578,11 +631,13 @@ export async function executePushIteration( const getCodeSnippetFn = (path: string, line: number | null, body?: string) => ResolverProc.getCodeSnippet(gitCtx.workdir, path, line, body); const refreshResult = await recheckSolvability( - unresolvedIssues, - changedFiles, - gitCtx.workdir, - stateContext, - getCodeSnippetFn + unresolvedIssues, + changedFiles, + gitCtx.workdir, + stateContext, + getCodeSnippetFn, + effectiveDuplicateMap, + comments, ); if (refreshResult.dismissed > 0) { console.log(chalk.yellow(` ${refreshResult.dismissed} issue(s) became stale (files deleted by fixer)`)); diff --git a/tools/prr/workflow/repository.ts b/tools/prr/workflow/repository.ts index 2e0a89e..d6a4407 100644 --- a/tools/prr/workflow/repository.ts +++ b/tools/prr/workflow/repository.ts @@ -103,7 +103,9 @@ export async function cloneOrUpdateRepository( } /** - * Recover verification state from git commit messages + * Recover verification state from git commit messages. + * Dedup siblings are expanded on the first `findUnresolvedIssues` pass when **`state.dedupCache`** + * matches the current PR comment id set — see **`expandGitRecoveredVerificationFromDedupCache`** (`duplicate-cluster-verify.ts`). */ export async function recoverVerificationState( git: SimpleGit, @@ -130,7 +132,9 @@ export async function recoverVerificationState( console.log(chalk.cyan(`Recovered ${formatNumber(n)} previously committed ${pluralize(n, 'fix', 'fixes')} from git history`)); for (const commentId of committedFixes) { if (!Verification.isVerified(stateContext, commentId)) { - Verification.markVerified(stateContext, commentId, Verification.PRR_GIT_RECOVERY_VERIFIED_MARKER); + Verification.markVerified(stateContext, commentId, Verification.PRR_GIT_RECOVERY_VERIFIED_MARKER, { + skipSessionTracking: true, + }); } } // WHY: So the first analysis skips stale re-check and unmark for these IDs (output.log audit). diff --git a/tools/prr/workflow/restore-from-base.ts b/tools/prr/workflow/restore-from-base.ts index ca615be..38be39c 100644 --- a/tools/prr/workflow/restore-from-base.ts +++ b/tools/prr/workflow/restore-from-base.ts @@ -8,7 +8,7 @@ import { resolve } from 'path'; import { writeFileSync } from 'fs'; import { debug } from '../../../shared/logger.js'; import { PROTECTED_DIRS } from '../../../shared/git/git-commit-core.js'; -import type { UnresolvedIssue } from '../analyzer/types.js'; +import { getIssuePrimaryPath, type UnresolvedIssue } from '../analyzer/types.js'; /** * Parse fixer/LLM output for "restore from base" or "file corrupted" intent. @@ -33,12 +33,12 @@ export function parseRestoreFromBaseIntent( if (path && !path.includes('..') && path.length < 300) return path; } - // Fallback: single unresolved issue's file - if (unresolvedIssues.length === 1) return unresolvedIssues[0].comment.path; + // Fallback: single unresolved issue's file (canonical path when basename was resolved). + if (unresolvedIssues.length === 1) return getIssuePrimaryPath(unresolvedIssues[0]); if (unresolvedIssues.length > 1) { // Prefer a path that appears in the output (e.g. "restore lib/privy-sync.ts from base") for (const issue of unresolvedIssues) { - const p = issue.comment.path; + const p = getIssuePrimaryPath(issue); if (output.includes(p)) return p; } return null; diff --git a/tools/prr/workflow/run-orchestrator.ts b/tools/prr/workflow/run-orchestrator.ts index f1cc8fb..b4bf064 100644 --- a/tools/prr/workflow/run-orchestrator.ts +++ b/tools/prr/workflow/run-orchestrator.ts @@ -88,9 +88,19 @@ export interface RunCallbacks { getCodeSnippet: (path: string, line: number | null, commentBody?: string) => Promise; printUnresolvedIssues: (issues: UnresolvedIssue[]) => void; parseNoChangesExplanation: (output: string) => string | null; - trySingleIssueFix: (issues: UnresolvedIssue[], git: SimpleGit, verifiedThisSession?: Set) => Promise; + trySingleIssueFix: ( + issues: UnresolvedIssue[], + git: SimpleGit, + verifiedThisSession?: Set, + comments?: ReviewComment[], + ) => Promise; tryRotation: (failureErrorType?: string) => boolean; - tryDirectLLMFix: (issues: UnresolvedIssue[], git: SimpleGit, verifiedThisSession?: Set) => Promise; + tryDirectLLMFix: ( + issues: UnresolvedIssue[], + git: SimpleGit, + verifiedThisSession?: Set, + comments?: ReviewComment[], + ) => Promise; executeBailOut: (issues: UnresolvedIssue[], comments: ReviewComment[]) => Promise; onDisableRunner?: (runnerName: string) => void; /** Reset model rotation to first model for this push iteration (pushIteration > 1). WHY: Each push cycle gets best model first instead of retrying the model that may have just 500'd or timed out. */ diff --git a/tools/prr/workflow/startup.ts b/tools/prr/workflow/startup.ts index e19b9f4..2cf87e5 100644 --- a/tools/prr/workflow/startup.ts +++ b/tools/prr/workflow/startup.ts @@ -19,6 +19,9 @@ import chalk from 'chalk'; import { warn, info, debug, debugStep, formatDuration, formatNumber } from '../../../shared/logger.js'; import { getWorkdirInfo, ensureWorkdir } from '../../../shared/git/workdir.js'; +/** One stale-inline warning per process per (repo, PR, HEAD, bot review SHA) — pill-output #619. */ +const codeRabbitStaleInlineWarned = new Set(); + /** * Display PR status including CI checks, bot reviews, and overall activity */ @@ -200,11 +203,15 @@ export async function checkCodeRabbitStatus( crResult.botReviewCommitSha !== headSha ) { staleInlineReviewVsHead = true; - console.log( - chalk.yellow( - ` ⚠ CodeRabbit's latest review targets \`${crResult.botReviewCommitSha.substring(0, 7)}\`; PR HEAD is \`${headSha.substring(0, 7)}\` — inline comments may be stale until the bot re-reviews.`, - ), - ); + const staleKey = `${owner}\0${repo}\0${String(prNumber)}\0${headSha}\0${crResult.botReviewCommitSha}`; + if (!codeRabbitStaleInlineWarned.has(staleKey)) { + codeRabbitStaleInlineWarned.add(staleKey); + console.log( + chalk.yellow( + ` ⚠ CodeRabbit's latest review targets \`${crResult.botReviewCommitSha.substring(0, 7)}\`; PR HEAD is \`${headSha.substring(0, 7)}\` — inline comments may be stale until the bot re-reviews.`, + ), + ); + } } // Check for bot rate-limit signals (e.g. CodeRabbit posting "review paused") diff --git a/tools/prr/workflow/thread-replies.ts b/tools/prr/workflow/thread-replies.ts index a4184cd..8464df0 100644 --- a/tools/prr/workflow/thread-replies.ts +++ b/tools/prr/workflow/thread-replies.ts @@ -25,10 +25,12 @@ const DISMISSED_CATEGORIES_BASE = new Set([ 'false-positive', 'remaining', 'exhausted', - 'path-unresolved', // e.g. .d.ts fragment — reply so thread has visible feedback + 'path-unresolved', // ambiguous basename / cannot pick one file — reply so thread has visible feedback + 'path-fragment', // extension-only / bare .d.ts — reply so thread has visible feedback 'missing-file', // file not found — reply so thread has visible feedback 'duplicate', 'file-unchanged', + 'out-of-scope', // blast radius (opt-in dismiss) — manual review if comment still valid ]); /** Categories that receive a dismissed-thread reply for this process (base set + optional chronic-failure). */ @@ -76,6 +78,62 @@ function getErrorDetails(err: unknown): { status?: number; message: string; body return { status, message, body }; } +/** GitHub REST cap for review reply bodies (leave margin below 65,536). */ +const REVIEW_REPLY_BODY_MAX_CHARS = 60_000; + +function clampReplyBodyForGitHub(body: string): string { + if (body.length <= REVIEW_REPLY_BODY_MAX_CHARS) return body; + return `${body.slice(0, REVIEW_REPLY_BODY_MAX_CHARS - 24)}\n[body truncated]`; +} + +/** + * When true, 422 is almost certainly stale thread / diff position / comment id — a shorter body will not help. + * Skip the second API call (pill-output audits: redundant fallback still 422s). + */ +function threadReply422SkipShortBodyRetry(err: unknown): boolean { + const { body } = getErrorDetails(err); + if (body != null && typeof body === 'object' && !Array.isArray(body) && 'errors' in body) { + const errors = (body as { errors?: unknown }).errors; + if (Array.isArray(errors)) { + for (const raw of errors) { + if (!raw || typeof raw !== 'object') continue; + const e = raw as { field?: string; resource?: string; code?: string }; + const field = (e.field ?? '').toLowerCase(); + const resource = (e.resource ?? '').toLowerCase(); + if (field === 'body' || field.endsWith('_body')) return false; + if (resource.includes('pullrequestreviewcomment') || resource.includes('pull_request_review')) return true; + if ( + field === 'in_reply_to' || + field === 'commit_id' || + field === 'path' || + field === 'position' || + field === 'line' || + field === 'side' || + field === 'subject_type' || + field === 'diff_hunk' + ) { + return true; + } + } + } + } + const s = + typeof body === 'string' + ? body + : body != null + ? JSON.stringify(body) + : ''; + const lower = s.toLowerCase(); + if (/\bfield["']?\s*:\s*["']body["']/.test(s) || /\bcode["']?\s*:\s*["']too_large["']/.test(s)) { + return false; + } + return ( + /pullrequestreviewcomment|in_reply_to|"field":"commit_id"|"field":"path"|"field":"position"|"field":"line"|"field":"side"|diff_hunk/.test( + lower, + ) + ); +} + /** * Post reply; on 422/Validation Failed log full error body and retry once with shortened message. * WHY full error: GitHub's reason (body format, thread state) is in the response; we log it so we can fix. @@ -92,8 +150,10 @@ async function postReplyWithRetry( body: string, fallbackBody: string ): Promise<{ ok: boolean; is422?: boolean }> { + const primary = clampReplyBodyForGitHub(body); + const fallback = clampReplyBodyForGitHub(fallbackBody); try { - await github.replyToReviewThread(owner, repo, prNumber, databaseId, body); + await github.replyToReviewThread(owner, repo, prNumber, databaseId, primary); return { ok: true }; } catch (err) { const { status, message, body: errBody } = getErrorDetails(err); @@ -103,9 +163,13 @@ async function postReplyWithRetry( } else { debug('Failed to post reply', { threadId, error: message }); } - if (fallbackBody !== body) { + const skipShortRetry = validationFailed && threadReply422SkipShortBodyRetry(err); + if (skipShortRetry) { + debug('Skipping short-body reply retry — 422 looks like thread/diff/comment state, not body length', { threadId }); + } + if (!skipShortRetry && fallback !== primary) { try { - await github.replyToReviewThread(owner, repo, prNumber, databaseId, fallbackBody); + await github.replyToReviewThread(owner, repo, prNumber, databaseId, fallback); return { ok: true }; } catch (retryErr) { const retryDetails = getErrorDetails(retryErr); @@ -121,20 +185,26 @@ async function postReplyWithRetry( } } -/** Return type: when replyToThreads is true, returns counts for user-visible summary on high failure rate (output.log audit). */ +/** Return type: when replyToThreads is true, returns counts for user-visible summary (output.log audit / 422 storms). */ export interface PostThreadRepliesResult { attempted: number; replied: number; + /** Failures where GitHub returned 422 / validation (stale thread, bad anchor, etc.). */ + failed422: number; + /** Failures for other reasons (network, 403, non-422 errors). */ + failedOther: number; + /** Candidates not attempted after we stopped on consecutive all-422 batches. */ + skippedDueTo422Stop: number; } -/** Consecutive 422s after which we stop attempting further replies (avoids retry storm; output.log audit). */ -const MAX_CONSECUTIVE_422_BEFORE_STOP = 3; +/** Consecutive batches where **every** reply in the batch failed with 422 — then stop (avoids parallel 422 miscount; pill-output). */ +const MAX_CONSECUTIVE_ALL_422_BATCHES_BEFORE_STOP = 3; /** * Post a reply on each review thread that was verified-fixed or dismissed (with reply). * Skips ic-* threads (issue comments); skips threads already in repliedThreadIds. * Updates repliedThreadIds in-place after each successful reply. - * On 3 consecutive 422 Validation Failed, stops attempting more replies and returns counts. + * On 3 consecutive batches where every reply in the batch returns 422, stops attempting more replies (serial batch accounting; pill-output). * Caller may print a summary when replied/attempted is very low (e.g. <10%). */ export async function postThreadReplies(opts: PostThreadRepliesOptions): Promise { @@ -174,12 +244,20 @@ export async function postThreadReplies(opts: PostThreadRepliesOptions): Promise commentToThread.get(commentId) ?? commentToThread.get(commentId.toLowerCase()); const threadsRepliedThisCall: string[] = []; - const botLogin = process.env.PRR_BOT_LOGIN?.trim() || undefined; let attempted = 0; let replied = 0; - let consecutive422 = 0; + let failed422 = 0; + let failedOther = 0; + let skippedDueTo422Stop = 0; + let consecutiveAll422Batches = 0; let stopReplyDueTo422 = false; + const tallyFailure = (result: { ok: boolean; is422?: boolean }): void => { + if (result.ok) return; + if (result.is422 === true) failed422++; + else failedOther++; + }; + // Collect candidate thread IDs we might reply to (for batched cross-run idempotency check). const candidateThreadIds = new Set(); for (const commentId of verifiedCommentIds) { @@ -192,9 +270,21 @@ export async function postThreadReplies(opts: PostThreadRepliesOptions): Promise if (entry && !repliedThreadIds.has(entry.threadId)) candidateThreadIds.add(entry.threadId); } + let botLogin = process.env.PRR_BOT_LOGIN?.trim() || undefined; + if (!botLogin && candidateThreadIds.size > 0) { + botLogin = await github.getAuthenticatedLogin(); + } + // Batch-fetch "already replied by us" for all candidates in parallel (one API call per thread, parallelized). // WHY parallel: Sequential getThreadComments would make latency linear in thread count; Promise.all keeps wall-clock time low. const alreadyRepliedByUsMap = new Map(); + if (!botLogin && candidateThreadIds.size > 0) { + console.warn( + chalk.yellow( + ' Thread replies: could not determine bot login (set PRR_BOT_LOGIN or use a token allowed to call GET /user); cross-run idempotency is off.', + ), + ); + } if (botLogin && candidateThreadIds.size > 0) { const results = await Promise.all( Array.from(candidateThreadIds, async (threadId) => { @@ -224,6 +314,20 @@ export async function postThreadReplies(opts: PostThreadRepliesOptions): Promise verifiedReplies.push({ entry, body: `Fixed in \`${short}\`.` }); } + /** How many dismissed-thread replies would still be attempted (uses current repliedThreadIds — call after verified phase updates). */ + const countDismissedReplyCandidates = (): number => { + let n = 0; + for (const d of dismissedIssues) { + if (!dismissedWithReply.has(d.category)) continue; + const entry = getThreadEntry(d.commentId); + if (!entry) continue; + if (repliedThreadIds.has(entry.threadId)) continue; + if (alreadyRepliedByUsMap.get(entry.threadId) === true) continue; + n++; + } + return n; + }; + // Process verified replies with concurrency limit (3 parallel) const REPLY_CONCURRENCY = 3; for (let i = 0; i < verifiedReplies.length && !stopReplyDueTo422; i += REPLY_CONCURRENCY) { @@ -234,35 +338,37 @@ export async function postThreadReplies(opts: PostThreadRepliesOptions): Promise const result = await postReplyWithRetry(github, owner, repo, prNumber, entry.databaseId, entry.threadId, body, 'Addressed.'); if (result.ok) { replied++; - consecutive422 = 0; repliedThreadIds.add(entry.threadId); threadsRepliedThisCall.push(entry.threadId); debug('Posted fixed reply on thread', { threadId: entry.threadId }); } else { - if (result.is422) { - consecutive422++; - if (consecutive422 >= MAX_CONSECUTIVE_422_BEFORE_STOP) { - console.log( - chalk.yellow( - `Stopping thread replies after ${formatNumber(MAX_CONSECUTIVE_422_BEFORE_STOP)} consecutive 422s (Validation Failed).`, - ), - ); - stopReplyDueTo422 = true; - } - } else { - consecutive422 = 0; - } + tallyFailure(result); } return result; }) ); - // Check if any result triggered stop - if (results.some(r => r.is422 && consecutive422 >= MAX_CONSECUTIVE_422_BEFORE_STOP)) { + const anyOk = results.some((r) => r.ok); + const all422 = + results.length > 0 && results.every((r) => !r.ok && r.is422 === true); + if (anyOk) consecutiveAll422Batches = 0; + else if (all422) consecutiveAll422Batches++; + else consecutiveAll422Batches = 0; + if (consecutiveAll422Batches >= MAX_CONSECUTIVE_ALL_422_BATCHES_BEFORE_STOP) { + const nextIdx = i + batch.length; + skippedDueTo422Stop = verifiedReplies.length - nextIdx + countDismissedReplyCandidates(); + console.log( + chalk.yellow( + `Stopping thread replies after ${formatNumber(MAX_CONSECUTIVE_ALL_422_BATCHES_BEFORE_STOP)} consecutive batches where every reply returned 422 (Validation Failed). ` + + `Posted ${formatNumber(replied)} of ${formatNumber(attempted)} so far; ${formatNumber(skippedDueTo422Stop)} thread(s) not attempted. ` + + `Often caused by comments anchored on an old commit (re-run after bots re-review) or threads GitHub no longer accepts replies on — see docs/THREAD-REPLIES.md.`, + ), + ); + stopReplyDueTo422 = true; break; } } - // Pill #10: Batch dismissed replies with concurrency limit + // Build dismissed list after verified replies so repliedThreadIds matches threads we already "Fixed in …" (avoid duplicate queue entries). const dismissedReplies: Array<{ entry: { threadId: string; databaseId: number }; body: string }> = []; for (const d of dismissedIssues) { if (!dismissedWithReply.has(d.category)) continue; @@ -280,14 +386,19 @@ export async function postThreadReplies(opts: PostThreadRepliesOptions): Promise body = 'Could not auto-fix (wrong file or repeated failures); manual review recommended.'; } else if (d.category === 'chronic-failure') { body = 'Could not auto-verify after repeated failures; batch-dismissed. Manual review if still needed.'; - } else if (d.category === 'path-unresolved') { - body = 'Could not auto-fix (path unresolved); manual review recommended.'; + } else if (d.category === 'path-unresolved' || d.category === 'path-fragment') { + body = + d.category === 'path-fragment' + ? 'Could not auto-fix (path fragment — not a single file); manual review recommended.' + : 'Could not auto-fix (path unresolved); manual review recommended.'; } else if (d.category === 'missing-file') { body = 'Could not auto-fix (file not found); manual review recommended.'; } else if (d.category === 'duplicate') { body = 'Treated as duplicate of another comment; no separate fix.'; } else if (d.category === 'file-unchanged') { body = 'No change in this file this run; manual review if still needed.'; + } else if (d.category === 'out-of-scope') { + body = 'Outside PR scope — manual review recommended.'; } else { body = `Dismissed: ${oneLine(d.reason)}`; } @@ -303,30 +414,32 @@ export async function postThreadReplies(opts: PostThreadRepliesOptions): Promise const result = await postReplyWithRetry(github, owner, repo, prNumber, entry.databaseId, entry.threadId, body, 'No change needed.'); if (result.ok) { replied++; - consecutive422 = 0; repliedThreadIds.add(entry.threadId); threadsRepliedThisCall.push(entry.threadId); debug('Posted dismissed reply on thread', { threadId: entry.threadId }); } else { - if (result.is422) { - consecutive422++; - if (consecutive422 >= MAX_CONSECUTIVE_422_BEFORE_STOP) { - console.log( - chalk.yellow( - `Stopping thread replies after ${formatNumber(MAX_CONSECUTIVE_422_BEFORE_STOP)} consecutive 422s (Validation Failed).`, - ), - ); - stopReplyDueTo422 = true; - } - } else { - consecutive422 = 0; - } + tallyFailure(result); } return result; }) ); - // Check if any result triggered stop - if (results.some(r => r.is422 && consecutive422 >= MAX_CONSECUTIVE_422_BEFORE_STOP)) { + const anyOk = results.some((r) => r.ok); + const all422 = + results.length > 0 && results.every((r) => !r.ok && r.is422 === true); + if (anyOk) consecutiveAll422Batches = 0; + else if (all422) consecutiveAll422Batches++; + else consecutiveAll422Batches = 0; + if (consecutiveAll422Batches >= MAX_CONSECUTIVE_ALL_422_BATCHES_BEFORE_STOP) { + const nextIdx = i + batch.length; + skippedDueTo422Stop = dismissedReplies.length - nextIdx; + console.log( + chalk.yellow( + `Stopping thread replies after ${formatNumber(MAX_CONSECUTIVE_ALL_422_BATCHES_BEFORE_STOP)} consecutive batches where every reply returned 422 (Validation Failed). ` + + `Posted ${formatNumber(replied)} of ${formatNumber(attempted)} so far; ${formatNumber(skippedDueTo422Stop)} thread(s) not attempted. ` + + `Often caused by comments anchored on an old commit (re-run after bots re-review) or threads GitHub no longer accepts replies on — see docs/THREAD-REPLIES.md.`, + ), + ); + stopReplyDueTo422 = true; break; } } @@ -343,5 +456,22 @@ export async function postThreadReplies(opts: PostThreadRepliesOptions): Promise } } - return { attempted, replied }; + if (attempted > 0) { + const pieces: string[] = [ + `${formatNumber(replied)} of ${formatNumber(attempted)} thread reply attempt(s) succeeded`, + ]; + if (failed422 > 0) pieces.push(`${formatNumber(failed422)} Validation Failed (422)`); + if (failedOther > 0) pieces.push(`${formatNumber(failedOther)} other failure(s)`); + if (skippedDueTo422Stop > 0) { + pieces.push(`${formatNumber(skippedDueTo422Stop)} not attempted (stopped after repeated 422 batches)`); + } + const line = ` Thread replies: ${pieces.join('; ')}.`; + if (replied === attempted && skippedDueTo422Stop === 0) { + console.log(chalk.gray(line)); + } else { + console.log(chalk.yellow(line)); + } + } + + return { attempted, replied, failed422, failedOther, skippedDueTo422Stop }; }