From 7da832a1f562e35e5f8a87ec86051cf4ac81806c Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:38:38 -0400 Subject: [PATCH 01/30] chore(TP-196): complete Step 0 \u2014 preflight + SegmentScopeMode decision --- .../STATUS.md | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md index 7c5091b7..0f55b1e6 100644 --- a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md @@ -1,11 +1,11 @@ # TP-196: Multi-segment engine hardening β€” Status -**Current Step:** Not Started -**Status:** πŸ”΅ Ready for Execution +**Current Step:** Step 0: Preflight +**Status:** 🟑 In Progress **Last Updated:** 2026-05-10 **Review Level:** 2 **Review Counter:** 0 -**Iteration:** 0 +**Iteration:** 1 **Size:** M > **Hydration:** Worker expands Steps 2-5 with concrete per-file checkboxes @@ -19,14 +19,14 @@ --- ### Step 0: Preflight -**Status:** ⬜ Not Started +**Status:** βœ… Complete -- [ ] On `main` (fresh from v0.30.0) -- [ ] All four gates pass on baseline (typecheck 0, lint 0, format:check 0, tests 3627+) -- [ ] All four issue bodies read: #462, #502, #503, #508 -- [ ] Tier 3 context files read (lane-runner.ts segment scope, execution.ts monitor + tool registration, resume.ts reconciliation, discovery.ts skip logic, segment-scoped-lane-runner test file) -- [ ] Live grep verification of `#502` condition pattern -- [ ] Decision: SegmentScopeMode promotion to first-class enum/type (recommendation in Discoveries) +- [x] On `main` (fresh from v0.30.0) β€” branch `task/henrylach-lane-1-20260510T193434` based on post-v0.30.0 main (`6b5d9de6`) +- [x] All four gates pass on baseline (typecheck 0, lint 0, format:check 0, tests 3627 pass / 1 skip / 3628 total) +- [x] All four issue bodies read: #462, #502, #503, #508 +- [x] Tier 3 context files read (lane-runner.ts segment scope, execution.ts monitor + tool registration, resume.ts reconciliation, discovery.ts skip logic, segment-scoped-lane-runner test file, types.ts segment types) +- [x] Live grep verification of `#502` condition pattern β€” `stepSegmentMap && currentRepoId` confirmed live at lane-runner.ts:398 and used inside the iteration loop; `isSegmentScoped` is computed at lane-runner.ts:458 and consumed at 483/499/642/672-673. The `TASKPLANE_ACTIVE_SEGMENT_ID` env var (line 672) and segment system-prompt overlay (line 642) are ALREADY gated on `isSegmentScoped`. The `request_segment_expansion` tool registration in `agent-bridge-extension.ts:97` keys off the env var (so indirectly gated, but not on the authoritative flag). +- [x] Decision: promote `SegmentScopeMode` to a first-class `'FULL_TASK' \| 'SEGMENT_SCOPED'` string-literal union exported from `types.ts`, plus a single computation helper `computeSegmentScopeMode(...)`. This keeps changes minimal vs. a TypeScript enum, plays well with JSON state serialization if ever needed, and lets call-sites compare `mode === "SEGMENT_SCOPED"` rather than tracking a boolean. (Recommendation logged in Discoveries.) --- @@ -130,6 +130,10 @@ | Discovery | Disposition | Location | |-----------|-------------|----------| +| Baseline gates green pre-implementation: typecheck 0, lint 0 (280 warnings/671 infos but exit 0), format:check 0, fast suite 3627 pass / 1 skip / 0 fail. | Note | baseline (post-v0.30.0) | +| `SegmentScopeMode` decision: implement as `export type SegmentScopeMode = "FULL_TASK" \| "SEGMENT_SCOPED"` in `types.ts` + a `computeSegmentScopeMode(stepSegmentMap, repoStepNumbers, currentRepoId, remainingSteps)` helper exported from `lane-runner.ts`. String-literal union (not enum) keeps the runtime cost zero and works cleanly with JSON serialization. The helper centralizes the 5-condition expression that currently lives inline at lane-runner.ts:458–465. | Step 2 plan | types.ts + lane-runner.ts | +| Pre-existing gating: `isSegmentScoped` already gates the env var `TASKPLANE_ACTIVE_SEGMENT_ID` (lane-runner.ts:672) AND segment-system-prompt overlay (line 642). The remaining drift risk #502 calls out is the scattered `stepSegmentMap && currentRepoId` conditional pattern (lines 398, 412, 517, 671, 1225, 1249, 1279) which can drift if updated unevenly. Replacing the bool-prone pattern with a single `mode === "SEGMENT_SCOPED"` reference satisfies #502 without changing runtime behavior. | Step 2 plan | lane-runner.ts | +| #508 latent-fix observation: the existing `if (remainingSteps.length === 0) break` at lane-runner.ts:419 already prevents iter-2 spawn when all segment checkboxes are complete (since TP-174 commit `3ef96db8` made `remainingSteps` use `isSegmentComplete`). However there is no regression test asserting "zero iterations spawned when all segment checkboxes are pre-complete", so the property is undefended. TP-196 will add an explicit early-exit check just before the `spawnAgent` call AND a behavioral test that asserts the spawn is skipped. | Step 4 plan | lane-runner.ts | --- @@ -138,6 +142,8 @@ | Timestamp | Action | Outcome | |-----------|--------|---------| | 2026-05-10 | Task staged | PROMPT.md and STATUS.md created (bundles #462/#502/#503/#508) | +| 2026-05-10 23:34 | Task started | Runtime V2 lane-runner execution | +| 2026-05-10 23:34 | Step 0 started | Preflight | --- From 26737e542c3fbe43fc3d174395147391920be1a1 Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:39:10 -0400 Subject: [PATCH 02/30] chore(TP-197): step 0 preflight complete --- .../STATUS.md | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md b/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md index 23b41835..b98c8a0d 100644 --- a/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md +++ b/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md @@ -1,11 +1,11 @@ # TP-197: Dashboard segment-level progress indicators β€” Status -**Current Step:** Not Started -**Status:** πŸ”΅ Ready for Execution +**Current Step:** Step 1: Plan the API + visual design +**Status:** 🟑 In Progress **Last Updated:** 2026-05-10 **Review Level:** 1 **Review Counter:** 0 -**Iteration:** 0 +**Iteration:** 1 **Size:** S-M > **Hydration:** Worker expands Step 2/3 with concrete render-site checkboxes @@ -17,14 +17,14 @@ --- ### Step 0: Preflight -**Status:** ⬜ Not Started +**Status:** βœ… Complete -- [ ] On `main` (fresh from v0.30.0) -- [ ] All four gates pass on baseline -- [ ] Issue #464 read in full -- [ ] Tier 3 context files read (dashboard/public/app.js, style.css, server.cjs, types.ts segment shapes) -- [ ] API verification: does `dashboard/server.cjs` already surface segment data, or does the worker need to extend it? -- [ ] Real-world test case identified (recent multi-segment batch in `.pi/runtime//`) +- [x] On `main` (fresh from v0.30.0) β€” base 6b5d9de from `main`, segment-followups feature branch merged via #576 +- [x] All four gates pass on baseline β€” typecheck 0, lint 0, format:check 0, tests 3627/3628 pass (1 skipped) +- [x] Issue #464 read in full +- [x] Tier 3 context files read (dashboard/public/app.js, style.css, server.cjs, types.ts segment shapes) +- [x] API verification: `dashboard/server.cjs` line 1257 already exposes `segments: state.segments || []` to the frontend. Each segment record carries `{ segmentId, taskId, repoId, status, laneId, ... }` per `PersistedSegmentRecord` (types.ts:2885). No API extension needed; rendering work is purely client-side in `app.js` + `style.css`. +- [x] Real-world test case identified β€” current `.pi/batch-state.json` (this very batch) has segments[] populated but only single-segment per task. For visual validation we will construct a synthetic batch-state fixture with multi-segment tasks (taskA has 3 segments across shared-libs/web-client/admin) and load it via the dashboard's static server. Manual visual verification will also exercise the same code paths against any future real polyrepo batch via the tp-test-workspace. --- @@ -96,6 +96,9 @@ | Discovery | Disposition | Location | |-----------|-------------|----------| +| **API already complete** β€” `dashboard/server.cjs` (line 1257) exposes `segments: state.segments \|\| []` with full `PersistedSegmentRecord` shape (`{segmentId, taskId, repoId, status, laneId, sessionName, worktreePath, branch, startedAt, endedAt, retries, dependsOnSegmentIds, exitDiagnostic?}`). Tasks already carry `segmentIds: string[]`. No server-side work required β€” Step 2 "data plumbing" reduces to a no-op aside from validating existing shape. | Frontend-only change; Step 2 noted as verification | `dashboard/server.cjs:1257`, `extensions/taskplane/types.ts:2885` | +| **Existing partial rendering** β€” `parseSegmentId`, `segmentProgressText`, `buildSegmentStatusMap`, `taskSegmentProgress`, `laneActiveSegmentInfo` already exist (app.js lines 323–405). Lane header shows a single β€œSegment N/T: repo” pill (`.lane-segment`, line 758); task row shows the same per-task (`.task-segment-progress`, line 864). **Missing: per-segment status indicators** β€” today’s render shows only the *current* segment, not the row of βœ…/⏳/⬚ status across ALL segments. | This is the visibility gap TP-197 closes | `dashboard/public/app.js:323-405,758,864` | +| **Progress-bar plumbing already segment-scoped (TP-174)** β€” `v2Progress` (the runtime V2 lane snapshot) already provides segment-scoped checked/total, used in app.js:818-829 (`useV2Progress`). The bar today reflects current-segment progress when V2 snapshot is fresh. **Missing: two-tone visual** showing completed segments + current-segment progress portion. Optional enhancement per Step 1 plan. | Address as a visual layer over existing data | `dashboard/public/app.js:805-829` | --- @@ -104,6 +107,10 @@ | Timestamp | Action | Outcome | |-----------|--------|---------| | 2026-05-10 | Task staged | PROMPT.md and STATUS.md created | +| 2026-05-10 23:34 | Task started | Runtime V2 lane-runner execution | +| 2026-05-10 23:34 | Step 0 started | Preflight | +| 2026-05-10 | Step 0 complete | API already complete; rendering work is purely client-side | +| 2026-05-10 | Step 1 started | Plan API + visual design | --- From bb50b55560836a2c831a8196e18daf7895e5566e Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:39:23 -0400 Subject: [PATCH 03/30] plan(TP-196): Step 1 \u2014 per-issue design + cross-issue coordination --- .../STATUS.md | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md index 0f55b1e6..67f66ee3 100644 --- a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md @@ -31,16 +31,16 @@ --- ### Step 1: Plan all four fixes -**Status:** ⬜ Not Started +**Status:** 🟨 In Progress > ⚠️ Plan-review checkpoint. -- [ ] #462 design (3 guards + edge-case tests) -- [ ] #502 design (SegmentScopeMode promotion + gate sites) -- [ ] #503 design (test file structure + 4 scenarios) -- [ ] #508 design (pre-spawn check site + exit-condition semantics) -- [ ] Cross-issue coordination documented -- [ ] Drafts in Discoveries +- [x] #462 design (3 guards + edge-case tests) β€” see Discoveries `#462 plan` +- [x] #502 design (SegmentScopeMode promotion + gate sites) β€” see Discoveries `#502 plan` +- [x] #503 design (test file structure + 4 scenarios) β€” see Discoveries `#503 plan` +- [x] #508 design (pre-spawn check site + exit-condition semantics) β€” see Discoveries `#508 plan` +- [x] Cross-issue coordination documented β€” see Discoveries `cross-issue` +- [x] Drafts in Discoveries --- @@ -134,6 +134,11 @@ | `SegmentScopeMode` decision: implement as `export type SegmentScopeMode = "FULL_TASK" \| "SEGMENT_SCOPED"` in `types.ts` + a `computeSegmentScopeMode(stepSegmentMap, repoStepNumbers, currentRepoId, remainingSteps)` helper exported from `lane-runner.ts`. String-literal union (not enum) keeps the runtime cost zero and works cleanly with JSON serialization. The helper centralizes the 5-condition expression that currently lives inline at lane-runner.ts:458–465. | Step 2 plan | types.ts + lane-runner.ts | | Pre-existing gating: `isSegmentScoped` already gates the env var `TASKPLANE_ACTIVE_SEGMENT_ID` (lane-runner.ts:672) AND segment-system-prompt overlay (line 642). The remaining drift risk #502 calls out is the scattered `stepSegmentMap && currentRepoId` conditional pattern (lines 398, 412, 517, 671, 1225, 1249, 1279) which can drift if updated unevenly. Replacing the bool-prone pattern with a single `mode === "SEGMENT_SCOPED"` reference satisfies #502 without changing runtime behavior. | Step 2 plan | lane-runner.ts | | #508 latent-fix observation: the existing `if (remainingSteps.length === 0) break` at lane-runner.ts:419 already prevents iter-2 spawn when all segment checkboxes are complete (since TP-174 commit `3ef96db8` made `remainingSteps` use `isSegmentComplete`). However there is no regression test asserting "zero iterations spawned when all segment checkboxes are pre-complete", so the property is undefended. TP-196 will add an explicit early-exit check just before the `spawnAgent` call AND a behavioral test that asserts the spawn is skipped. | Step 4 plan | lane-runner.ts | +| **#502 plan** β€” (1) Add to `types.ts`: `export type SegmentScopeMode = "FULL_TASK" \| "SEGMENT_SCOPED"`. (2) Add to `lane-runner.ts`: `export function computeSegmentScopeMode(stepSegmentMap, repoStepNumbers, currentRepoId, currentStepNum)` that returns `"SEGMENT_SCOPED"` iff the existing 5-condition `isSegmentScoped` boolean would be true, else `"FULL_TASK"`. (3) Inside the iteration loop replace the inline `const isSegmentScoped = !!( ... )` with `const segmentScopeMode = computeSegmentScopeMode(...)` and a derived `const isSegmentScoped = segmentScopeMode === "SEGMENT_SCOPED"` for backward compatibility with existing callers (we keep the boolean alias so existing reads at lines 483/499/642/672/673 continue to work). (4) The bridge extension's `request_segment_expansion` registration in `agent-bridge-extension.ts:97` is already keyed on `TASKPLANE_ACTIVE_SEGMENT_ID`, which lane-runner already gates on `isSegmentScoped` (line 672) β€” so promoting the mode to a first-class type closes the drift loop without bridge-extension changes. (5) Gate-sites audit replaces `stepSegmentMap && currentRepoId` runtime checks with a single `isSegmentScoped` reference where the variable is in scope; sites where it's NOT in scope (e.g., the snapshotSegmentCtx block at line 357, post-loop block at 1270+, emitSnapshot signature at 1482/1606) intentionally remain structural because they encode the *shape* of available data, not the mode decision. Result: one authoritative computation, two consumer references (`segmentScopeMode` for the type-explicit path, `isSegmentScoped` boolean for ergonomics). | Step 2 plan | types.ts + lane-runner.ts | +| **#462 plan** β€” *Monitor guard* (`execution.ts::resolveTaskMonitorState`): currently `.DONE` is Priority 1 unconditionally (line 1042). Add a guard: when the caller provides a `multiSegmentContext` (task has multiple segment nodes AND the active segment is known to be non-final), demote `.DONE` to a non-terminal signal and log a warning. Implementation: extend the function signature with an optional `multiSegmentContext?: { isFinalSegment: boolean; segmentId: string }` parameter β€” if `isFinalSegment === false` and `.DONE` is observed, skip Priority 1 and proceed to Priority 4 (running). Callers populate this from the task's `SegmentPlan`. Fail-loud stance: log a `WARN` execLog entry so operators see the unusual state. *Resume guard* (`resume.ts::collectDoneTaskIdsForResume`): currently `.DONE` is accepted unconditionally. Add a sanity check: for multi-segment tasks, verify the task's segment frontier in `persistedState.tasks[i].segments` is complete (all segments status === "succeeded") before accepting `.DONE`. If `.DONE` exists but the frontier is incomplete, log a warning and DO NOT add the taskId to the done set (so it re-executes). Stance: fail-loud-and-recover (we don't auto-delete the marker; resume retries the task, which lets the engine re-establish authoritative state). *Discovery safeguard* (`discovery.ts::scanAreaForTasks` and `buildCompletedTaskSet`): on every `.DONE` skip in a folder whose PROMPT.md parses to a multi-segment plan, emit a one-line `console.warn` if there's evidence the frontier is incomplete (specifically: a STATUS.md segment block exists with unchecked items). This is purely a doctor-style warning β€” no behavioral change to discovery itself, since discovery is invoked early and lacks the persisted-state context needed to make a hard decision. *Tests*: (a) `resolveTaskMonitorState` returns non-terminal status when `.DONE` exists but `multiSegmentContext.isFinalSegment === false`; (b) `collectDoneTaskIdsForResume` excludes tasks where `.DONE` is present but the persisted segment frontier is incomplete; (c) `collectDoneTaskIdsForResume` *includes* tasks where `.DONE` is present and the frontier is complete (regression guard for normal case); (d) discovery warns (but does not skip differently) on inconsistent state. | Step 3 plan | execution.ts + resume.ts + discovery.ts | +| **#508 plan** β€” Add an explicit pre-spawn `isSegmentComplete` check immediately before the `spawnAgent` call (β‰ˆ lane-runner.ts:705). Implementation: after `repoStepNumbers` is computed but before `spawnAgent(hostOpts, ...)`, when `isSegmentScoped`, recompute `isCurrentSegmentComplete = [...repoStepNumbers].every((stepNum) => isSegmentComplete(iterStatusContent, stepNum, currentRepoId!))` and if true, `break` out of the iteration loop. This is redundant with the line-419 `remainingSteps.length === 0` check by construction (since `remainingSteps` already uses `isSegmentComplete`) but: (a) makes the contract explicit at the spawn boundary, (b) catches edge cases where parsed.steps and the repo step set diverge, (c) provides a clean hook for the regression test. Exit-condition: `break` to fall through to post-loop completion handling (same path as the existing line-419 break). *Test*: spawn-shim-based behavioral test β€” set up a STATUS.md fixture with all segment checkboxes pre-checked, invoke the iteration loop, assert the worker is NOT spawned (zero `spawnAgent` calls) and `totalIterations === 0`. | Step 4 plan | lane-runner.ts | +| **#503 plan** β€” Extend the existing `extensions/tests/segment-scoped-lane-runner.test.ts` with a new `### 9.x: SegmentScopeMode prompt-injection regression` block. The existing file already does source-string analysis of `lane-runner.ts` (sections 4–8), so adding two more source-analysis groups for `FULL_TASK` vs `SEGMENT_SCOPED` prompt contents keeps the testing strategy consistent. Cases: (1) **FULL_TASK** β€” source analysis confirms prompt-construction branch does NOT inject `Active segment ID` or `Your checkboxes for this step:` when `isSegmentScoped === false`. (2) **SEGMENT_SCOPED** β€” source analysis confirms prompt includes `Active segment ID`, `Your checkboxes for this step:`, and `Other segments in this step (NOT yours β€” do not attempt)`. (3) **Polyrepo single-segment** β€” a behavioral fixture-based test: when `stepSegmentMap` is null/empty (FULL_TASK mode), `remainingSteps` includes ALL steps and is not artificially truncated to Step 0 only. (4) **Legacy/partial-marker fallback** β€” fixture where some steps have segment markers and others don't: assert that `repoStepNumbers` is constructed from only the marked steps, AND that unmarked steps with checkboxes for the active repo are NOT silently scoped out. (Most of this is testable via the existing `getStepsForRepoId` + `isSegmentComplete` helpers without a full lane-runner spawn.) Update `extensions/tests/lane-runner-v2.test.ts`: if the new `SegmentScopeMode` first-class type changes any exported contract, mirror that change here β€” likely a no-op since promoted type is additive. | Step 5 plan | extensions/tests/segment-scoped-lane-runner.test.ts | +| **cross-issue** coordination β€” (a) #502 (SegmentScopeMode promotion) lands FIRST so #462 and #508 can reference `segmentScopeMode` (or `isFinalSegment`) consistently. (b) #462's monitor guard reads from the task's existing `SegmentPlan` data on the lane snapshot β€” the new `multiSegmentContext` param is opt-in for callers (defaulted to `undefined` = legacy behavior), so existing tests still pass without modification. (c) #462's resume guard and #508's pre-spawn check are independent code paths and do not interact (resume runs once at startup; pre-spawn runs each iteration). (d) #508's pre-spawn check uses the SAME `isSegmentComplete` helper the monitor doesn't β€” monitor checks `.DONE`; lane-runner checks checkboxes. So no shared mutation risk. (e) #503's tests assert the prompt-content contract that #502 cements; #503 should run AFTER #502 lands. | Sequencing | all 4 files | --- From 8f55f0a5747a08c7f1f50907f755cef71fa778ce Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:40:20 -0400 Subject: [PATCH 04/30] docs(TP-197): step 1 design plan drafted (ready for plan review) --- .../STATUS.md | 91 +++++++++++++++++-- 1 file changed, 85 insertions(+), 6 deletions(-) diff --git a/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md b/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md index b98c8a0d..5b889eb3 100644 --- a/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md +++ b/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md @@ -29,15 +29,94 @@ --- ### Step 1: Plan the API + visual design -**Status:** ⬜ Not Started +**Status:** 🟨 In Progress > ⚠️ Plan-review checkpoint. -- [ ] API design + JSON shape documented -- [ ] Visual design (pill row + progress-bar behavior) documented -- [ ] Single-segment fallback confirmed (no regression for non-segmented tasks) -- [ ] Mobile/narrow-viewport considered -- [ ] Drafts in Discoveries +- [x] API design + JSON shape documented β€” see **Design Plan Β§ 1** below +- [x] Visual design (pill row + progress-bar behavior) documented β€” see **Design Plan Β§ 2–3** +- [x] Single-segment fallback confirmed (no regression for non-segmented tasks) β€” see **Design Plan Β§ 4** +- [x] Mobile/narrow-viewport considered β€” see **Design Plan Β§ 5** +- [x] Drafts in Discoveries + +--- + +## Design Plan (Step 1, drafted for plan review) + +### Β§1. API design β€” no server change required + +The dashboard API already exposes everything needed for TP-197: + +- `batch.segments[]` (server.cjs:1257) β€” array of `PersistedSegmentRecord`: + ```json + { + "segmentId": "TP-002::shared-libs", + "taskId": "TP-002", + "repoId": "shared-libs", + "status": "succeeded" | "running" | "pending" | "failed" | "stalled" | "skipped", + "laneId": "lane-1", "sessionName": "…", "branch": "…", + "startedAt": 1778…, "endedAt": 1778…, "retries": 0, + "dependsOnSegmentIds": [] + } + ``` +- `batch.tasks[*].segmentIds: string[]` β€” ordered list of all segments owned by a task. +- `runtimeLaneSnapshots[laneNumber].segmentId` β€” the segment currently executing on a given lane (V2 snapshot). + +**Implication for Step 2:** β€œdata plumbing” reduces to a verification pass; no `server.cjs` change is required. The existing helpers in `app.js` (`parseSegmentId`, `buildSegmentStatusMap`, `taskSegmentProgress`, `laneActiveSegmentInfo`) already consume this shape and are sufficient inputs to the new renderer. + +### Β§2. Visual design β€” per-segment status pill row + +**Placement.** Augment the existing `.task-segment-progress` slot in each task row (app.js:864, inside the `task-step` cell). Today that slot renders a single β€œSegment N/T: repo” label for the *current* segment. We replace it with a **pill row of per-segment status badges**, one per `segmentId` in `task.segmentIds`. The lane-header `.lane-segment` pill stays as-is (its job β€” β€œthis lane is on segment N/T” β€” is different and complementary). + +**Pill format (per segment).** Compact pill: ` ` where icon comes from segment status: + +| Status | Icon | Pill class | +|--------|------|------------| +| `succeeded` | βœ… | `seg-pill seg-succeeded` | +| `running` | ⏳ | `seg-pill seg-running` | +| `pending` | ⬚ | `seg-pill seg-pending` | +| `failed` | ❌ | `seg-pill seg-failed` | +| `stalled` | ⏸ | `seg-pill seg-stalled` | +| `skipped` | β†· | `seg-pill seg-skipped` | + +The **current segment** (the one the lane is actively executing, identified via `v2snap.segmentId` or `taskSegmentProgress().segmentId`) additionally gets `seg-pill-current` for visual emphasis (brighter ring / heavier weight). Each pill carries `title=" Β· "` for hover-tooltip. + +Pill row rendered as `
Β·Β·Β·
` inside the existing `task-step` cell, sitting **before** the existing `task-step-main` content. `flex-wrap: wrap` so it degrades gracefully on narrow viewports. + +**Rendering helper (new):** add `taskSegmentPillRow(task, segmentStatusMap, activeSegmentId)` returning the HTML string. Returns `""` when `segmentIds.length <= 1` so the single-segment path is byte-identical to today. + +**Lane-header pill:** unchanged. The existing one-line summary still has value as a fast β€œlane focus” signal. + +### Β§3. Progress-bar behavior β€” keep current bar; rely on pill row for context + +The progress bar today already reflects **current-segment** progress when the V2 snapshot is fresh (TP-174 made `v2Progress` segment-scoped). The operator-facing gap is *interpretation*: without the pill row, the bar reads as β€œtask progress”. + +**Decision: do NOT introduce a two-tone bar.** Considered and rejected because: + +1. The pill row already conveys overall task position (β€œβœ…Β·β³Β·β¬š means we’re mid-task on segment 2 of 3”). Encoding the same information in the bar duplicates signal without adding new information. +2. A two-tone bar would require aggregating progress across segments β€” segments have heterogeneous total-checkbox counts, and `v2Progress` only carries the current lane’s segment counts. Aggregation across past segments would need data we don’t persist per segment today. +3. Single-segment tasks must render identically. A conditional two-tone code path would either regress single-segment or branch on segment count, both more code than the operator value justifies. + +The pill row is sufficient. If a future task wants a two-tone bar, persistence of historical per-segment counts would be the dependency. + +### Β§4. Single-segment fallback β€” byte-identical render + +`taskSegmentProgress()` already returns `null` when `segmentIds.length <= 1`, and we keep the existing guard in the new `taskSegmentPillRow()` helper. Therefore for single-segment tasks the new helper returns `""`, the `detailBits` array remains exactly as today, and the rendered HTML for non-segmented tasks is **unchanged**. We will verify this with a manual diff: render a single-segment task before-and-after the change and confirm identical DOM. + +### Β§5. Mobile / narrow-viewport + +- Pill row uses `flex-wrap: wrap` β€” wraps onto a second line cleanly in narrow viewports. +- Each pill has a `max-width` with `text-overflow: ellipsis` for very long repoIds. +- We override the existing `.task-step` cell’s `overflow: hidden` only for the embedded `.task-segment-row` container (the surrounding text continues to ellipsis-clip). +- Worst case: ~3–5 segments common in polyrepo workspaces β€” fits one line at typical desktop widths; wraps to two lines at ≀600px viewport. Acceptable. + +**Fallback Option B** (only if A clutters in practice): move the pill row to a second grid sub-row beneath the task row, mirroring the `task-title-subtitle` pattern from TP-485 (spans cols 3–6). Decision deferred to implementation when we can eyeball a real multi-segment fixture; either way the change is contained. + +### Β§6. Test-case strategy + +- **Synthetic fixture for browser smoke**: drop a small `batch-state.json` into `.pi/runtime/` or load via the dashboard’s test mode containing one multi-segment task (3 segments Γ— shared-libs/web-client/admin) and one single-segment task; visually verify rendering for both. +- **Unit-test note**: `dashboard/public/app.js` is a browser script (not ESM, no exports), so no node-test coverage is added for the renderer itself. The Step 4 test gate is the existing 3627-test suite remaining green (we change no extension code). +- **CI verification**: the four gates (typecheck/lint/format:check/test:fast) must remain green. Lint scope explicitly excludes `dashboard/public/**` per the code-quality-gates spec, so adding JS to `app.js` does not introduce new lint surface. --- From 38fca2c9842f8e88c2ebe53832f7695e6b8b8170 Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:42:32 -0400 Subject: [PATCH 05/30] docs(TP-197): address R001 plan review - move pill row to grid row 3 (responsive-safe) --- .../.reviews/R001-plan-step1.md | 15 ++++++ .../STATUS.md | 46 +++++++++++++------ 2 files changed, 48 insertions(+), 13 deletions(-) create mode 100644 taskplane-tasks/TP-197-dashboard-segment-progress/.reviews/R001-plan-step1.md diff --git a/taskplane-tasks/TP-197-dashboard-segment-progress/.reviews/R001-plan-step1.md b/taskplane-tasks/TP-197-dashboard-segment-progress/.reviews/R001-plan-step1.md new file mode 100644 index 00000000..10e8aea4 --- /dev/null +++ b/taskplane-tasks/TP-197-dashboard-segment-progress/.reviews/R001-plan-step1.md @@ -0,0 +1,15 @@ +## Plan Review: Step 1: Plan the API + visual design + +### Verdict: REVISE + +### Summary +The plan is strong on API verification and overall UX direction: it correctly identifies that `batch.segments[]`, `task.segmentIds`, and `runtimeLaneSnapshots[*].segmentId` already provide the needed data, and the pill-row concept is a good fit for the operator visibility gap in #464. However, the responsive behavior section conflicts with current dashboard CSS, so the proposed placement would not satisfy the step’s narrow-viewport requirement as written. Resolve that layout mismatch before implementation. + +### Issues Found +1. **[Severity: important]** β€” The plan places the new segment pills inside `.task-step` (STATUS.md Design Plan Β§2), but the current responsive rule hides that entire cell at narrow widths: `.task-row .task-step { display: none; }` under `@media (max-width: 900px)` (`dashboard/public/style.css:1240`). That means the indicators disappear entirely on mobile/narrow viewports, contradicting Β§5’s β€œwrap/degrade gracefully” intent and the Step 1 requirement to account for responsive behavior. **Suggested fix:** choose a render location that remains visible at narrow widths (e.g., subtitle-style row spanning cols 3–6), or explicitly update responsive CSS so the segment row remains visible with truncation/wrapping. + +### Missing Items +- An explicit responsive contract for `<=900px` (what exactly stays visible, what wraps, what can be truncated/hidden) that aligns with existing media-query behavior. + +### Suggestions +- Since Step 0 already confirmed no server/API changes are needed, tighten Step 2 wording to β€œverify and consume existing API segment fields” to avoid implying backend edits. diff --git a/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md b/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md index 5b889eb3..3a99bb1b 100644 --- a/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md +++ b/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md @@ -4,7 +4,7 @@ **Status:** 🟑 In Progress **Last Updated:** 2026-05-10 **Review Level:** 1 -**Review Counter:** 0 +**Review Counter:** 1 **Iteration:** 1 **Size:** S-M @@ -66,7 +66,9 @@ The dashboard API already exposes everything needed for TP-197: ### Β§2. Visual design β€” per-segment status pill row -**Placement.** Augment the existing `.task-segment-progress` slot in each task row (app.js:864, inside the `task-step` cell). Today that slot renders a single β€œSegment N/T: repo” label for the *current* segment. We replace it with a **pill row of per-segment status badges**, one per `segmentId` in `task.segmentIds`. The lane-header `.lane-segment` pill stays as-is (its job β€” β€œthis lane is on segment N/T” β€” is different and complementary). +**Placement (REVISED after R001).** Add a new **grid row 3** sub-row to `.task-row`, mirroring the row-2 `task-title-subtitle` pattern introduced in TP-485. The pill row spans cols 3–7 (`grid-column: 3 / 7; grid-row: 3;`). This placement keeps the pills visible at narrow viewports (≀900px) where the `task-step` cell is `display: none` per the existing media query β€” placing pills inside `task-step` would have hidden them on mobile (caught by reviewer R001). + +The existing `.task-segment-progress` text inside `task-step` (the β€œSegment N/T: repo” one-liner) is **removed** when the new pill row renders, to avoid duplicate signal. For single-segment tasks the existing path is preserved (neither old text nor new pill row renders β€” see Β§4). The lane-header `.lane-segment` pill stays as-is (its job β€” β€œthis lane is on segment N/T” β€” is a different, lane-level signal complementary to the task-level pill row). **Pill format (per segment).** Compact pill: ` ` where icon comes from segment status: @@ -81,7 +83,7 @@ The dashboard API already exposes everything needed for TP-197: The **current segment** (the one the lane is actively executing, identified via `v2snap.segmentId` or `taskSegmentProgress().segmentId`) additionally gets `seg-pill-current` for visual emphasis (brighter ring / heavier weight). Each pill carries `title=" Β· "` for hover-tooltip. -Pill row rendered as `
Β·Β·Β·
` inside the existing `task-step` cell, sitting **before** the existing `task-step-main` content. `flex-wrap: wrap` so it degrades gracefully on narrow viewports. +Pill row rendered as `
Β·Β·Β·
` as a separate grid item at `grid-row: 3`. `flex-wrap: wrap` so it degrades gracefully on narrow viewports. The `.task-row` `grid-template-rows` is extended to `auto auto auto` so row 3 (pill row) sits below row 2 (title subtitle); rows auto-collapse to 0 height when empty, so single-segment / title-less tasks render the same height as today. **Rendering helper (new):** add `taskSegmentPillRow(task, segmentStatusMap, activeSegmentId)` returning the HTML string. Returns `""` when `segmentIds.length <= 1` so the single-segment path is byte-identical to today. @@ -103,14 +105,25 @@ The pill row is sufficient. If a future task wants a two-tone bar, persistence o `taskSegmentProgress()` already returns `null` when `segmentIds.length <= 1`, and we keep the existing guard in the new `taskSegmentPillRow()` helper. Therefore for single-segment tasks the new helper returns `""`, the `detailBits` array remains exactly as today, and the rendered HTML for non-segmented tasks is **unchanged**. We will verify this with a manual diff: render a single-segment task before-and-after the change and confirm identical DOM. -### Β§5. Mobile / narrow-viewport +### Β§5. Mobile / narrow-viewport (REVISED after R001) + +**Responsive contract:** + +| Viewport | Pill row visibility | Pill behavior | +|----------|--------------------:|--------------| +| `> 900px` (default) | Visible in row 3 (cols 3–7) | Single line, may wrap if many segments | +| `≀ 900px` | Still visible in row 3 (cols 3 β†’ end of 6-col grid) | Wraps as needed; pills shrink to icon + repoId truncated by `max-width: 100px` + `text-overflow: ellipsis`; segment-id tooltip preserves full info | +| Very narrow (`≀ 600px`) | Wraps to multiple lines | Icon stays visible; long repoIds ellipsis-clip | -- Pill row uses `flex-wrap: wrap` β€” wraps onto a second line cleanly in narrow viewports. -- Each pill has a `max-width` with `text-overflow: ellipsis` for very long repoIds. -- We override the existing `.task-step` cell’s `overflow: hidden` only for the embedded `.task-segment-row` container (the surrounding text continues to ellipsis-clip). -- Worst case: ~3–5 segments common in polyrepo workspaces β€” fits one line at typical desktop widths; wraps to two lines at ≀600px viewport. Acceptable. +The row-3 placement is intentionally **unaffected** by the `@media (max-width: 900px) { .task-step { display: none; } }` rule (only `.task-step` is hidden; row 3 is a separate grid item). -**Fallback Option B** (only if A clutters in practice): move the pill row to a second grid sub-row beneath the task row, mirroring the `task-title-subtitle` pattern from TP-485 (spans cols 3–6). Decision deferred to implementation when we can eyeball a real multi-segment fixture; either way the change is contained. +Implementation specifics: +- Pill container: `display: flex; flex-wrap: wrap; gap: 4px;`. +- Each pill: `display: inline-flex; align-items: center; gap: 3px; padding: 1px 6px; max-width: 120px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;`. +- The pill row inherits cols 3–7 of the parent grid; in the narrow (6-col) layout, `grid-column: 3 / 7` clips to `3 / -1` effectively, still spanning the remaining width. +- Worst case (~5 segments at ≀600px viewport): 2 lines of pills. Acceptable. + +**Why we did not put pills inline in `.task-step` (originally Option A, withdrawn):** the existing 900px media query hides `.task-step` entirely (style.css:1240), which would make pills invisible on mobile. Row-3 placement avoids this entirely. ### Β§6. Test-case strategy @@ -120,12 +133,16 @@ The pill row is sufficient. If a future task wants a two-tone bar, persistence o --- -### Step 2: Implement the data plumbing +### Step 2: Verify (no API change needed) + consume existing segment fields **Status:** ⬜ Not Started -- [ ] `dashboard/server.cjs` API extended (if needed) -- [ ] Frontend types added for new API shape -- [ ] API response verified on real running batch +> Per Step 0 verification: `dashboard/server.cjs:1257` already exposes `segments[]`, +> tasks already carry `segmentIds[]`, and V2 lane snapshots carry `segmentId`. +> No server.cjs change. This step is a verification + frontend-typing pass. + +- [ ] Verify `batch.segments`, `task.segmentIds`, `runtimeLaneSnapshots[*].segmentId` are present in the live API response (sanity check using the current `.pi/batch-state.json` via the dashboard server) +- [ ] Document the consumed shape inline in `dashboard/public/app.js` (JSDoc on new helper) +- [ ] No `dashboard/server.cjs` change required (confirmed) --- @@ -168,6 +185,7 @@ The pill row is sufficient. If a future task wants a two-tone bar, persistence o | # | Type | Step | Verdict | File | |---|------|------|---------|------| +| R001 | plan | 1 | REVISE | `.reviews/R001-plan-step1.md` | --- @@ -177,6 +195,7 @@ The pill row is sufficient. If a future task wants a two-tone bar, persistence o |-----------|-------------|----------| | **API already complete** β€” `dashboard/server.cjs` (line 1257) exposes `segments: state.segments \|\| []` with full `PersistedSegmentRecord` shape (`{segmentId, taskId, repoId, status, laneId, sessionName, worktreePath, branch, startedAt, endedAt, retries, dependsOnSegmentIds, exitDiagnostic?}`). Tasks already carry `segmentIds: string[]`. No server-side work required β€” Step 2 "data plumbing" reduces to a no-op aside from validating existing shape. | Frontend-only change; Step 2 noted as verification | `dashboard/server.cjs:1257`, `extensions/taskplane/types.ts:2885` | | **Existing partial rendering** β€” `parseSegmentId`, `segmentProgressText`, `buildSegmentStatusMap`, `taskSegmentProgress`, `laneActiveSegmentInfo` already exist (app.js lines 323–405). Lane header shows a single β€œSegment N/T: repo” pill (`.lane-segment`, line 758); task row shows the same per-task (`.task-segment-progress`, line 864). **Missing: per-segment status indicators** β€” today’s render shows only the *current* segment, not the row of βœ…/⏳/⬚ status across ALL segments. | This is the visibility gap TP-197 closes | `dashboard/public/app.js:323-405,758,864` | +| **Responsive-CSS gotcha (R001)** β€” `.task-step` cell is `display: none` under `@media (max-width: 900px)` (style.css:1240). The original plan to place pills inside `.task-step` would have hidden them on mobile/narrow viewports. Revised plan moves pills to a new grid row 3 spanning cols 3–7, mirroring the `task-title-subtitle` pattern from TP-485, which is unaffected by the 900px media query. | Plan revised; pill row placed in row 3 sub-row | `dashboard/public/style.css:1237-1241` | | **Progress-bar plumbing already segment-scoped (TP-174)** β€” `v2Progress` (the runtime V2 lane snapshot) already provides segment-scoped checked/total, used in app.js:818-829 (`useV2Progress`). The bar today reflects current-segment progress when V2 snapshot is fresh. **Missing: two-tone visual** showing completed segments + current-segment progress portion. Optional enhancement per Step 1 plan. | Address as a visual layer over existing data | `dashboard/public/app.js:805-829` | --- @@ -212,3 +231,4 @@ Unlike most tasks, the success criterion for TP-197 is partially visual β€” does **dashboard/public/ stays out of Biome lint scope:** Per the code-quality-gates spec (section 3, non-goals), `dashboard/public/` is intentionally vanilla JS, out of lint scope. This task touches those files but does NOT add them to lint scope. The `.biome.json` exclusion for `dashboard/public/**` stays in place. A separate future task could opt-in to dashboard linting if/when there's demand. +| 2026-05-10 23:41 | Review R001 | plan Step 1: REVISE | From 2fd6af195d9739a08a091f745f0209c5e11a990a Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:43:21 -0400 Subject: [PATCH 06/30] feat(TP-196, #502): promote SegmentScopeMode to first-class type + unify scope decision - Add 'export type SegmentScopeMode = FULL_TASK | SEGMENT_SCOPED' to types.ts - Add computeSegmentScopeMode() helper in lane-runner.ts \u2014 single source of truth for the FULL_TASK vs SEGMENT_SCOPED decision (TP-196 / #502). - Iteration loop now derives both 'segmentScopeMode' (typed) and the legacy 'isSegmentScoped' boolean alias from one computation, preventing the multi-condition drift documented in #502. - 16 new tests (11 unit + 5 source-analysis contracts) in segment-scoped-lane-runner.test.ts cover the helper's truth table and guard the unified gating sites. - TASKPLANE_ACTIVE_SEGMENT_ID and the segment system-prompt overlay are already gated via the (now unified) isSegmentScoped alias; the request_segment_expansion tool registration inherits this gating through the env var. Targeted suite: 62/62 pass. Full fast suite: 3643 pass / 0 fail. --- extensions/taskplane/lane-runner.ts | 57 +++++++++-- extensions/taskplane/types.ts | 23 +++++ .../tests/segment-scoped-lane-runner.test.ts | 98 +++++++++++++++++++ .../.reviews/R001-plan-step1.md | 16 +++ .../STATUS.md | 20 ++-- 5 files changed, 197 insertions(+), 17 deletions(-) create mode 100644 taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R001-plan-step1.md diff --git a/extensions/taskplane/lane-runner.ts b/extensions/taskplane/lane-runner.ts index 551cd52f..a43c6f2b 100644 --- a/extensions/taskplane/lane-runner.ts +++ b/extensions/taskplane/lane-runner.ts @@ -68,6 +68,7 @@ import { type LaneTaskStatus, type SupervisorAlertCallback, type StepSegmentMapping, + type SegmentScopeMode, } from "./types.ts"; const LANE_RUNNER_DIR = dirname(fileURLToPath(import.meta.url)); @@ -178,6 +179,44 @@ export function isSegmentComplete( return result.unchecked === 0; } +/** + * Compute the authoritative `SegmentScopeMode` for one worker iteration. + * + * This is the single source of truth for the FULL_TASK vs SEGMENT_SCOPED + * decision (TP-196 / #502). All segment-related side-effects (env vars, + * system-prompt overlay, prompt content, tool registration) should derive + * their behaviour from this mode rather than re-evaluating the underlying + * boolean conditions in isolation, which is what created the drift risk + * documented in #502. + * + * Returns `SEGMENT_SCOPED` iff ALL of the following hold: + * - The task has a non-empty `stepSegmentMap` (parsed from PROMPT.md markers). + * - The lane has an associated `currentRepoId` (segmentId set, so we know + * which repo this lane is iterating). + * - The (legacy-fallback-filtered) `repoStepNumbers` set is non-null (the + * repo has at least one step with explicit segment markers). + * - A `currentStepNumber` is provided (there is a step to evaluate). + * - The current step's segment mapping contains an entry for `currentRepoId` + * (the worker actually has segment-scoped work in the current step). + * + * In any other case the mode is `FULL_TASK`. + * + * @since TP-196 + */ +export function computeSegmentScopeMode( + stepSegmentMap: StepSegmentMapping[] | undefined | null, + repoStepNumbers: Set | null, + currentRepoId: string | null, + currentStepNumber: number | null, +): SegmentScopeMode { + if (!stepSegmentMap || !currentRepoId || !repoStepNumbers) return "FULL_TASK"; + if (currentStepNumber === null) return "FULL_TASK"; + const currentStepMapping = stepSegmentMap.find((s) => s.stepNumber === currentStepNumber); + if (!currentStepMapping) return "FULL_TASK"; + const mySegment = currentStepMapping.segments.find((seg) => seg.repoId === currentRepoId); + return mySegment ? "SEGMENT_SCOPED" : "FULL_TASK"; +} + // ── Types ──────────────────────────────────────────────────────────── /** @@ -454,16 +493,16 @@ export async function executeTaskV2( /* ignore */ } - // TP-174/TP-501: Compute segment scope mode BEFORE building prompt. - const isSegmentScoped = !!( - stepSegmentMap && - currentRepoId && - repoStepNumbers && - remainingSteps.length > 0 && - stepSegmentMap - .find((s) => s.stepNumber === remainingSteps[0].number) - ?.segments.find((seg) => seg.repoId === currentRepoId) + // TP-174/TP-501/TP-196: Compute segment scope mode BEFORE building prompt. + // `segmentScopeMode` is the authoritative TP-196 flag; `isSegmentScoped` is + // preserved as a boolean alias for ergonomics at the many existing call sites. + const segmentScopeMode: SegmentScopeMode = computeSegmentScopeMode( + stepSegmentMap, + repoStepNumbers, + currentRepoId, + remainingSteps.length > 0 ? remainingSteps[0].number : null, ); + const isSegmentScoped = segmentScopeMode === "SEGMENT_SCOPED"; const promptLines = [ `Read your task instructions at: ${promptPath}`, diff --git a/extensions/taskplane/types.ts b/extensions/taskplane/types.ts index 7518961b..cc92a04b 100644 --- a/extensions/taskplane/types.ts +++ b/extensions/taskplane/types.ts @@ -180,6 +180,29 @@ export function buildExpansionRequestId(timestamp = Date.now()): string { // ── Step-Segment Mapping (Phase A: segment-scoped worker visibility) ──── +/** + * Authoritative segment-scope mode for a single worker iteration. + * + * - `FULL_TASK`: the worker sees the entire PROMPT.md, all steps, all checkboxes. + * No `Active segment ID` / `Your checkboxes for this step` prose is injected. + * Segment-related environment variables (`TASKPLANE_ACTIVE_SEGMENT_ID`, + * `TASKPLANE_SEGMENT_ID`) are hard-cleared so that runtime tools keyed on + * them (e.g., `request_segment_expansion`) cannot accidentally register. + * + * - `SEGMENT_SCOPED`: the worker is iterating a specific segment of a + * multi-segment task. Only that segment's steps and checkboxes are shown; + * `Active segment ID` is announced; segment-related env vars carry the + * active `segmentId`; the segment-overlay system prompt is appended. + * + * This is the single authoritative flag for the segment-scope decision + * (TP-196 / #502). Call sites should derive their behaviour from this mode + * rather than re-evaluating the underlying boolean conditions, which prevents + * the multiple branches drifting out of sync. + * + * @since TP-196 + */ +export type SegmentScopeMode = "FULL_TASK" | "SEGMENT_SCOPED"; + /** A group of checkboxes scoped to a single repo within a step. */ export interface SegmentCheckboxGroup { repoId: string; diff --git a/extensions/tests/segment-scoped-lane-runner.test.ts b/extensions/tests/segment-scoped-lane-runner.test.ts index cd4f3847..1a02ade4 100644 --- a/extensions/tests/segment-scoped-lane-runner.test.ts +++ b/extensions/tests/segment-scoped-lane-runner.test.ts @@ -18,6 +18,7 @@ import { getStepsForRepoId, getSegmentCheckboxes, isSegmentComplete, + computeSegmentScopeMode, } from "../taskplane/lane-runner.ts"; import type { StepSegmentMapping } from "../taskplane/types.ts"; @@ -424,6 +425,103 @@ describe("7.x: Legacy fallback β€” no behavior change for tasks without markers" }); }); +// ── 9. computeSegmentScopeMode (TP-196 / #502) ───────────────────── + +describe("9.x: computeSegmentScopeMode (TP-196 / #502)", () => { + it("9.1: returns FULL_TASK when stepSegmentMap is null", () => { + const result = computeSegmentScopeMode(null, new Set([1]), "shared-libs", 1); + expect(result).toBe("FULL_TASK"); + }); + + it("9.2: returns FULL_TASK when stepSegmentMap is undefined", () => { + const result = computeSegmentScopeMode(undefined, new Set([1]), "shared-libs", 1); + expect(result).toBe("FULL_TASK"); + }); + + it("9.3: returns FULL_TASK when currentRepoId is null", () => { + const result = computeSegmentScopeMode(MULTI_SEGMENT_MAP, new Set([1]), null, 1); + expect(result).toBe("FULL_TASK"); + }); + + it("9.4: returns FULL_TASK when repoStepNumbers is null (legacy fallback)", () => { + const result = computeSegmentScopeMode(MULTI_SEGMENT_MAP, null, "shared-libs", 1); + expect(result).toBe("FULL_TASK"); + }); + + it("9.5: returns FULL_TASK when currentStepNumber is null (no remaining steps)", () => { + const result = computeSegmentScopeMode(MULTI_SEGMENT_MAP, new Set([1]), "shared-libs", null); + expect(result).toBe("FULL_TASK"); + }); + + it("9.6: returns FULL_TASK when current step has no segment for repoId", () => { + // web-client is NOT in Step 2 of MULTI_SEGMENT_MAP + const result = computeSegmentScopeMode(MULTI_SEGMENT_MAP, new Set([0, 1, 2]), "web-client", 2); + expect(result).toBe("FULL_TASK"); + }); + + it("9.7: returns SEGMENT_SCOPED when all conditions hold for shared-libs in Step 1", () => { + const result = computeSegmentScopeMode(MULTI_SEGMENT_MAP, new Set([0, 1, 2]), "shared-libs", 1); + expect(result).toBe("SEGMENT_SCOPED"); + }); + + it("9.8: returns SEGMENT_SCOPED for web-client in Step 0", () => { + const result = computeSegmentScopeMode(MULTI_SEGMENT_MAP, new Set([0, 1]), "web-client", 0); + expect(result).toBe("SEGMENT_SCOPED"); + }); + + it("9.9: returns SEGMENT_SCOPED for single-segment map matching repoId", () => { + const result = computeSegmentScopeMode(SINGLE_SEGMENT_MAP, new Set([0, 1]), "default", 1); + expect(result).toBe("SEGMENT_SCOPED"); + }); + + it("9.10: returns FULL_TASK when stepSegmentMap is empty array", () => { + // Empty array is truthy but has no entries β€” the find() returns undefined. + const result = computeSegmentScopeMode([], new Set([1]), "shared-libs", 1); + expect(result).toBe("FULL_TASK"); + }); + + it("9.11: returns FULL_TASK when step number does not exist in map", () => { + const result = computeSegmentScopeMode(MULTI_SEGMENT_MAP, new Set([1]), "shared-libs", 99); + expect(result).toBe("FULL_TASK"); + }); +}); + +describe("9.x: SegmentScopeMode source-analysis contracts (TP-196 / #502)", () => { + let laneRunnerSrc: string; + + it("9.20: load lane-runner source", async () => { + const { readFileSync } = await import("node:fs"); + const { join, dirname } = await import("node:path"); + const { fileURLToPath } = await import("node:url"); + const testDir = dirname(fileURLToPath(import.meta.url)); + laneRunnerSrc = readFileSync(join(testDir, "..", "taskplane", "lane-runner.ts"), "utf-8"); + }); + + it("9.21: iteration loop derives isSegmentScoped from segmentScopeMode (single source of truth)", () => { + // The boolean must now be derived from the mode, not recomputed inline. + expect(laneRunnerSrc).toContain('const isSegmentScoped = segmentScopeMode === "SEGMENT_SCOPED"'); + }); + + it("9.22: iteration loop calls computeSegmentScopeMode helper", () => { + // The new computation goes through the helper. + const pattern = /const segmentScopeMode: SegmentScopeMode = computeSegmentScopeMode\(/; + expect(pattern.test(laneRunnerSrc)).toBe(true); + }); + + it("9.23: TASKPLANE_ACTIVE_SEGMENT_ID env var is gated on isSegmentScoped", () => { + // FULL_TASK mode must hard-clear the env var to prevent inheritance leaks. + expect(laneRunnerSrc).toContain( + 'TASKPLANE_ACTIVE_SEGMENT_ID: isSegmentScoped ? (segmentId ?? "") : ""', + ); + expect(laneRunnerSrc).toContain('TASKPLANE_SEGMENT_ID: isSegmentScoped ? (segmentId ?? "") : ""'); + }); + + it("9.24: segment system-prompt overlay is gated on isSegmentScoped", () => { + // FULL_TASK mode must NOT append the segment system-prompt overlay. + expect(laneRunnerSrc).toContain("isSegmentScoped && config.workerSegmentPrompt"); + }); +}); + // ── 8. Snapshot segment-scoped progress ─────────────────────────────── describe("8.x: Snapshot segment-scoped progress (emitSnapshot)", () => { diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R001-plan-step1.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R001-plan-step1.md new file mode 100644 index 00000000..fe675eb1 --- /dev/null +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R001-plan-step1.md @@ -0,0 +1,16 @@ +## Plan Review: Step 1: Plan all four fixes + +### Verdict: APPROVE + +### Summary +The Step 1 plan is cohesive and outcome-focused across all four bundled issues (#462, #502, #503, #508). It defines concrete guard/branching behavior, sequencing rationale, and a clear verification strategy without requiring implementation-level micro-checklists. The cross-issue ordering and interaction notes are sufficient to proceed safely into implementation. + +### Issues Found +1. **[Severity: minor]** β€” No blocking issues found. + +### Missing Items +- None identified. + +### Suggestions +- When implementing #462 discovery warnings, make sure warning emission is deduplicated or clearly scoped so repeated discovery scans do not create noisy logs in large workspaces. +- In Step 2, preserve a single authoritative `computeSegmentScopeMode(...)` path and avoid leaving any parallel ad-hoc condition checks behind after refactor (especially near post-loop/snapshot code paths called out in Discoveries). diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md index 67f66ee3..17584d56 100644 --- a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md @@ -4,7 +4,7 @@ **Status:** 🟑 In Progress **Last Updated:** 2026-05-10 **Review Level:** 2 -**Review Counter:** 0 +**Review Counter:** 1 **Iteration:** 1 **Size:** M @@ -31,7 +31,7 @@ --- ### Step 1: Plan all four fixes -**Status:** 🟨 In Progress +**Status:** βœ… Complete > ⚠️ Plan-review checkpoint. @@ -45,15 +45,17 @@ --- ### Step 2: Implement #502 first (foundational refactor) -**Status:** ⬜ Not Started +**Status:** 🟨 In Progress > ⚠️ Code-review fires after this step. -- [ ] `SegmentScopeMode` promoted to first-class type -- [ ] `lane-runner.ts` threads via lane config -- [ ] `execution.ts` env var + tool registration gated -- [ ] Scattered `stepSegmentMap && currentRepoId` checks unified -- [ ] Targeted + full fast suite pass +- [x] `SegmentScopeMode` promoted to first-class type (added `export type SegmentScopeMode = "FULL_TASK" | "SEGMENT_SCOPED"` to `types.ts`) +- [x] `lane-runner.ts` threads via `computeSegmentScopeMode()` helper exported alongside the other segment helpers; iteration loop now derives both `segmentScopeMode` and the legacy `isSegmentScoped` alias from a single computation +- [x] `execution.ts` env var + tool registration gated β€” already gated via `isSegmentScoped` on `TASKPLANE_ACTIVE_SEGMENT_ID` (lane-runner.ts:672) and `TASKPLANE_SEGMENT_ID` (line 673); the `request_segment_expansion` tool registration in `agent-bridge-extension.ts:97` keys off that env var so it inherits the gating. After TP-196 the env var is gated on a value derived from the authoritative mode, closing #502's drift concern. +- [x] Scattered `stepSegmentMap && currentRepoId` checks unified β€” the *runtime* mode decision now flows through one `computeSegmentScopeMode` call. The remaining structural `stepSegmentMap && currentRepoId` conditional patterns (e.g., snapshotSegmentCtx at line 357, post-loop block at 1270+, emitSnapshot signature at 1482/1606) encode the *shape* of available data, not the mode decision, and are intentionally preserved. +- [x] Targeted (62/62 in segment-scoped-lane-runner.test.ts) + full fast suite (3643 pass / 0 fail) pass + +**Files touched:** `extensions/taskplane/types.ts`, `extensions/taskplane/lane-runner.ts`, `extensions/tests/segment-scoped-lane-runner.test.ts`. New tests: 16 (sections 9.x β€” 11 unit tests for `computeSegmentScopeMode` + 5 source-analysis contracts for the unification). --- @@ -123,6 +125,7 @@ | # | Type | Step | Verdict | File | |---|------|------|---------|------| +| 1 | plan | 1 | APPROVE | `.reviews/` (step-1 plan) | --- @@ -173,3 +176,4 @@ If plan-review reveals a clear architectural split during Step 1, splitting is a **Hard-gate compliance:** Post-TP-194, the reviewer agent downgrades APPROVE β†’ REVISE on any failing `typecheck` / `lint` / `format:check`. This is the first task to run entirely under hard gates; the worker should expect that gate failures will be surfaced in code reviews and cannot be ignored. Plan accordingly: don't break gates anywhere mid-step. +| 2026-05-10 23:39 | Review R001 | plan Step 1: APPROVE | From d831ed62a6f7e28257ec885f5551ef09bfd7459b Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:44:01 -0400 Subject: [PATCH 07/30] chore(TP-197): steps 1-2 complete (plan APPROVE), hydrate step 3 implementation --- .../.reviews/R002-plan-step1.md | 15 ++++++++ .../STATUS.md | 38 ++++++++++++------- 2 files changed, 40 insertions(+), 13 deletions(-) create mode 100644 taskplane-tasks/TP-197-dashboard-segment-progress/.reviews/R002-plan-step1.md diff --git a/taskplane-tasks/TP-197-dashboard-segment-progress/.reviews/R002-plan-step1.md b/taskplane-tasks/TP-197-dashboard-segment-progress/.reviews/R002-plan-step1.md new file mode 100644 index 00000000..0515462e --- /dev/null +++ b/taskplane-tasks/TP-197-dashboard-segment-progress/.reviews/R002-plan-step1.md @@ -0,0 +1,15 @@ +## Plan Review: Step 1: Plan the API + visual design + +### Verdict: APPROVE + +### Summary +The revised Step 1 plan now covers the required API shape, rendering approach, progress semantics, and responsive behavior with enough specificity to implement safely. It directly addresses the prior R001 blocker by moving segment pills out of `.task-step` and defining a row-3 placement that remains visible at narrow widths. The scope is appropriately constrained to TP-197’s UX goal and preserves single-segment behavior. + +### Issues Found +1. **[Severity: minor]** β€” No blocking issues found. + +### Missing Items +- None. + +### Suggestions +- During implementation, keep a small guard in place for stale/missing `v2Progress` so segmented running tasks still avoid looking like overall-task progress in edge cases (for example, by preferring segment-context text/pills even when progress falls back). diff --git a/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md b/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md index 3a99bb1b..cdbc1c1f 100644 --- a/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md +++ b/taskplane-tasks/TP-197-dashboard-segment-progress/STATUS.md @@ -1,10 +1,10 @@ # TP-197: Dashboard segment-level progress indicators β€” Status -**Current Step:** Step 1: Plan the API + visual design +**Current Step:** Step 3: Implement visual rendering **Status:** 🟑 In Progress **Last Updated:** 2026-05-10 **Review Level:** 1 -**Review Counter:** 1 +**Review Counter:** 2 **Iteration:** 1 **Size:** S-M @@ -29,7 +29,7 @@ --- ### Step 1: Plan the API + visual design -**Status:** 🟨 In Progress +**Status:** βœ… Complete > ⚠️ Plan-review checkpoint. @@ -134,26 +134,33 @@ Implementation specifics: --- ### Step 2: Verify (no API change needed) + consume existing segment fields -**Status:** ⬜ Not Started +**Status:** βœ… Complete > Per Step 0 verification: `dashboard/server.cjs:1257` already exposes `segments[]`, > tasks already carry `segmentIds[]`, and V2 lane snapshots carry `segmentId`. > No server.cjs change. This step is a verification + frontend-typing pass. -- [ ] Verify `batch.segments`, `task.segmentIds`, `runtimeLaneSnapshots[*].segmentId` are present in the live API response (sanity check using the current `.pi/batch-state.json` via the dashboard server) -- [ ] Document the consumed shape inline in `dashboard/public/app.js` (JSDoc on new helper) -- [ ] No `dashboard/server.cjs` change required (confirmed) +- [x] Verified `batch.segments`, `task.segmentIds` are present in the live `.pi/batch-state.json` via `loadBatchState()`. Confirmed shape: `segments[]` carries `{segmentId, taskId, repoId, status, laneId, sessionName, worktreePath, branch, startedAt, endedAt, retries, exitReason, dependsOnSegmentIds}`. Each task has `segmentIds: string[]`. `runtimeLaneSnapshots[*].segmentId` already consumed by `laneActiveSegmentInfo` (app.js:385). +- [x] Will document the consumed shape inline in `dashboard/public/app.js` (JSDoc on the new `taskSegmentPillRow` helper) during Step 3. +- [x] `dashboard/server.cjs` change required: **none** (confirmed). --- ### Step 3: Implement the visual rendering -**Status:** ⬜ Not Started +**Status:** 🟨 In Progress + +Hydrated implementation breakdown (per APPROVE'd plan): -- [ ] Segment indicator pill row -- [ ] CSS styling for βœ… / ⏳ / ⬚ states -- [ ] Progress-bar segment-aware logic -- [ ] Single-segment fallback visual regression-checked -- [ ] Browser-side smoke on real batch +- [ ] Add `taskSegmentPillRow(task, segmentStatusMap, activeSegmentId)` helper in `dashboard/public/app.js` +- [ ] Integrate pill row into `renderLanesTasks()` as grid row 3 sub-element; remove the now-redundant `task-segment-progress` text from the in-cell detail bits for multi-segment tasks (keep for single-segment? -- single-segment returns null today so no change) +- [ ] Extend `.task-row` grid-template-rows in `style.css` from `auto auto` to `auto auto auto` +- [ ] Add `.task-segment-row` container CSS (grid-row: 3, cols 3/7, flex-wrap) +- [ ] Add `.seg-pill` + variant CSS (`.seg-succeeded`, `.seg-running`, `.seg-pending`, `.seg-failed`, `.seg-stalled`, `.seg-skipped`) +- [ ] Add `.seg-pill-current` emphasis style +- [ ] Verify responsive: pill row remains visible at ≀900px (NOT inside `.task-step`) +- [ ] Progress-bar segment-aware logic: NO CHANGE per plan (TP-174 already segment-scoped the bar via `v2Progress`) +- [ ] Single-segment fallback: confirm helper returns `""` for `segmentIds.length <= 1` so DOM is byte-identical to today +- [ ] Browser-side smoke: load synthetic multi-segment fixture into `.pi/batch-state.json` and visually verify --- @@ -186,6 +193,7 @@ Implementation specifics: | # | Type | Step | Verdict | File | |---|------|------|---------|------| | R001 | plan | 1 | REVISE | `.reviews/R001-plan-step1.md` | +| R002 | plan | 1 | APPROVE | (in-tool verdict; R001 issues addressed) | --- @@ -209,6 +217,9 @@ Implementation specifics: | 2026-05-10 23:34 | Step 0 started | Preflight | | 2026-05-10 | Step 0 complete | API already complete; rendering work is purely client-side | | 2026-05-10 | Step 1 started | Plan API + visual design | +| 2026-05-10 | R001 returned REVISE | `.task-step` hidden at <=900px; pill placement revised | +| 2026-05-10 | R002 returned APPROVE | Plan approved; ready for implementation | +| 2026-05-10 | Step 2 started | Verify segment data plumbing | --- @@ -232,3 +243,4 @@ Unlike most tasks, the success criterion for TP-197 is partially visual β€” does Per the code-quality-gates spec (section 3, non-goals), `dashboard/public/` is intentionally vanilla JS, out of lint scope. This task touches those files but does NOT add them to lint scope. The `.biome.json` exclusion for `dashboard/public/**` stays in place. A separate future task could opt-in to dashboard linting if/when there's demand. | 2026-05-10 23:41 | Review R001 | plan Step 1: REVISE | +| 2026-05-10 23:43 | Review R002 | plan Step 1: APPROVE | From a60616b3db484b31594412174cf5935bcb15ff59 Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:45:20 -0400 Subject: [PATCH 08/30] hydrate(TP-196): add R002 revision items to Step 2 --- .../.reviews/R002-code-step2.md | 19 +++++++++++++++++++ .../STATUS.md | 11 +++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R002-code-step2.md diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R002-code-step2.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R002-code-step2.md new file mode 100644 index 00000000..be660e96 --- /dev/null +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R002-code-step2.md @@ -0,0 +1,19 @@ +## Code Review: Step 2: Implement #502 first (foundational refactor) + +### Verdict: REVISE + +### Summary +The refactor successfully introduces `SegmentScopeMode` as a first-class type and a centralized `computeSegmentScopeMode(...)` helper, and quality gates are green (`npm run typecheck`, `npm run lint`, `npm run format:check` all exit 0). However, one of the main #502 outcomes is still incomplete: prompt-scoping logic continues to use the old composite condition directly instead of deriving from the new authoritative mode. This leaves exactly the drift surface TP-196/#502 is intended to eliminate. + +### Issues Found +1. **[extensions/taskplane/lane-runner.ts:556] [important]** β€” Segment-scoped prompt injection is still gated by `if (stepSegmentMap && currentRepoId && repoStepNumbers && remainingSteps.length > 0)` rather than `segmentScopeMode`/`isSegmentScoped`. This duplicates the decision logic after introducing `computeSegmentScopeMode(...)`, so future edits can still diverge between mode computation and prompt behavior. **Fix:** gate this block with `isSegmentScoped` (or `segmentScopeMode === "SEGMENT_SCOPED"`) and treat any missing mapping as an internal inconsistency path (e.g., guarded early-return/log) rather than a separate mode decision. +2. **[extensions/tests/segment-scoped-lane-runner.test.ts:398] [important]** β€” Test `7.3` hard-codes the legacy raw condition string, which now enshrines the drift-prone pattern and will resist completing #502 properly. **Fix:** replace this source-string assertion with one that validates mode-derived gating (e.g., presence of `isSegmentScoped` gate or behavior-level assertion for FULL_TASK vs SEGMENT_SCOPED prompt injection). + +### Pattern Violations +- The new helper’s contract says segment side-effects should derive from authoritative mode, but one prompt branch still re-evaluates raw prerequisites inline. + +### Test Gaps +- No assertion currently verifies that the segment-scoped prompt block is controlled by `SegmentScopeMode` rather than by duplicated boolean conditions. + +### Suggestions +- This review aligns with the Step 1 plan-review suggestion (R001) to avoid leaving ad-hoc parallel checks behind; once the above gate is mode-driven, the #502 foundational goal is fully realized. diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md index 17584d56..2feee8fd 100644 --- a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md @@ -4,7 +4,7 @@ **Status:** 🟑 In Progress **Last Updated:** 2026-05-10 **Review Level:** 2 -**Review Counter:** 1 +**Review Counter:** 2 **Iteration:** 1 **Size:** M @@ -55,6 +55,11 @@ - [x] Scattered `stepSegmentMap && currentRepoId` checks unified β€” the *runtime* mode decision now flows through one `computeSegmentScopeMode` call. The remaining structural `stepSegmentMap && currentRepoId` conditional patterns (e.g., snapshotSegmentCtx at line 357, post-loop block at 1270+, emitSnapshot signature at 1482/1606) encode the *shape* of available data, not the mode decision, and are intentionally preserved. - [x] Targeted (62/62 in segment-scoped-lane-runner.test.ts) + full fast suite (3643 pass / 0 fail) pass +**R002 revision items:** +- [ ] Gate the segment-scoped *prompt-injection* block (lane-runner.ts β‰ˆ line 517 originally, now line 556 after Step 2 changes) on `isSegmentScoped` instead of the raw `stepSegmentMap && currentRepoId && repoStepNumbers && remainingSteps.length > 0` composite condition. +- [ ] Replace the test `7.3` source-string assertion that currently enshrines the raw composite-condition pattern β€” retarget it at the mode-derived gating. +- [ ] Re-run targeted suite + full fast suite + all four gates after the fixes. + **Files touched:** `extensions/taskplane/types.ts`, `extensions/taskplane/lane-runner.ts`, `extensions/tests/segment-scoped-lane-runner.test.ts`. New tests: 16 (sections 9.x β€” 11 unit tests for `computeSegmentScopeMode` + 5 source-analysis contracts for the unification). --- @@ -125,7 +130,8 @@ | # | Type | Step | Verdict | File | |---|------|------|---------|------| -| 1 | plan | 1 | APPROVE | `.reviews/` (step-1 plan) | +| 1 | plan | 1 | APPROVE | `.reviews/R001-plan-step1.md` | +| 2 | code | 2 | REVISE | `.reviews/R002-code-step2.md` | --- @@ -177,3 +183,4 @@ If plan-review reveals a clear architectural split during Step 1, splitting is a Post-TP-194, the reviewer agent downgrades APPROVE β†’ REVISE on any failing `typecheck` / `lint` / `format:check`. This is the first task to run entirely under hard gates; the worker should expect that gate failures will be surfaced in code reviews and cannot be ignored. Plan accordingly: don't break gates anywhere mid-step. | 2026-05-10 23:39 | Review R001 | plan Step 1: APPROVE | +| 2026-05-10 23:45 | Review R002 | code Step 2: REVISE | From 0be59dd5cf12c370bb2de63b1bf2b037e86c8bdb Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:47:32 -0400 Subject: [PATCH 09/30] fix(TP-196, #502): gate segment-prompt block on isSegmentScoped (R002) Per R002 code review: the segment-scoped prompt injection branch was still keyed on the raw 'stepSegmentMap && currentRepoId && repoStepNumbers && remainingSteps.length > 0' composite, leaving the very drift surface #502 is meant to eliminate. - lane-runner.ts: outer gate is now 'if (isSegmentScoped)'; an inner defensive guard logs a WARN and skips when currentStepMapping/mySegment is unexpectedly absent (should never trip in practice because computeSegmentScopeMode already verifies those preconditions). - segment-scoped-lane-runner.test.ts: update 4.1 + 7.3 to assert the mode-driven gate + the absence of the legacy composite-condition string. Targeted: 62/62 pass. Full fast suite: 3643 pass / 0 fail. Gates green. --- extensions/taskplane/lane-runner.ts | 25 +++++++++++++------ .../tests/segment-scoped-lane-runner.test.ts | 22 +++++++++++++--- .../STATUS.md | 6 ++--- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/extensions/taskplane/lane-runner.ts b/extensions/taskplane/lane-runner.ts index a43c6f2b..fd3ce1ba 100644 --- a/extensions/taskplane/lane-runner.ts +++ b/extensions/taskplane/lane-runner.ts @@ -552,16 +552,27 @@ export async function executeTaskV2( // Segment scope mode is determined by which system prompt was loaded. // No SegmentScopeMode line needed β€” the prompt IS the mode. - // TP-174: Segment-scoped prompt β€” show only this segment's checkboxes - if (stepSegmentMap && currentRepoId && repoStepNumbers && remainingSteps.length > 0) { + // TP-174/TP-196: Segment-scoped prompt β€” show only this segment's checkboxes. + // Gated on the authoritative `isSegmentScoped` (derived from `segmentScopeMode`) + // rather than the raw composite condition, so the prompt branch can't drift + // from the mode decision (TP-196 / #502). + if (isSegmentScoped) { const currentStepNum = remainingSteps[0].number; - const currentStepMapping = stepSegmentMap.find((s) => s.stepNumber === currentStepNum); + // Defensive guards: when `isSegmentScoped === true`, `computeSegmentScopeMode` + // has already verified `stepSegmentMap`, `currentRepoId`, and that the + // current step's mapping contains an entry for the active repo. We re-fetch + // the structures here for clarity. If any are missing we log and skip the + // segment block (defense-in-depth β€” should never trip in practice). + const currentStepMapping = stepSegmentMap?.find((s) => s.stepNumber === currentStepNum); const mySegment = currentStepMapping?.segments.find((seg) => seg.repoId === currentRepoId); - // Only inject segment-scoped prompt when the current step has an explicit - // segment for this repoId. If mySegment is missing (legacy task without - // markers, or step has no work for this repo), skip and preserve legacy behavior. - if (currentStepMapping && mySegment) { + if (!currentStepMapping || !mySegment) { + logExecution( + statusPath, + "WARN", + `segmentScopeMode === SEGMENT_SCOPED but current step mapping missing β€” skipping segment prompt block (currentRepoId=${currentRepoId}, stepNum=${currentStepNum})`, + ); + } else { const otherSegments = currentStepMapping.segments.filter((seg) => seg.repoId !== currentRepoId); // Count total segments for this repo across all steps diff --git a/extensions/tests/segment-scoped-lane-runner.test.ts b/extensions/tests/segment-scoped-lane-runner.test.ts index 1a02ade4..eff36699 100644 --- a/extensions/tests/segment-scoped-lane-runner.test.ts +++ b/extensions/tests/segment-scoped-lane-runner.test.ts @@ -270,9 +270,13 @@ describe("4.x: Segment-scoped prompt construction contracts (source analysis)", laneRunnerSrc = readFileSync(join(testDir, "..", "taskplane", "lane-runner.ts"), "utf-8"); }); - it("4.1: segment-scoped prompt block is gated on mySegment existence", () => { - // The segment-scoped prompt should only appear when mySegment is found - expect(laneRunnerSrc).toContain("if (currentStepMapping && mySegment)"); + it("4.1: segment-scoped prompt block has a defensive mySegment guard (TP-196)", () => { + // TP-196 / #502: the outer gate is now `if (isSegmentScoped)` (mode-driven), + // and inside that block a defensive guard skips the segment-scoped body when + // `currentStepMapping` / `mySegment` is unexpectedly missing (logs a WARN). + // This preserves the original mySegment safety property without re-encoding + // the raw composite condition outside. + expect(laneRunnerSrc).toContain("if (!currentStepMapping || !mySegment) {"); }); it("4.2: prompt includes 'NOT yours' guardrail for other segments", () => { @@ -395,8 +399,18 @@ describe("7.x: Legacy fallback β€” no behavior change for tasks without markers" expect(laneRunnerSrc).toContain("stepSegmentMap && currentRepoId"); }); - it("7.3: segment prompt block skipped when repoStepNumbers is null", () => { + it("7.3: segment prompt block is gated on the authoritative mode flag (TP-196)", () => { + // TP-196 / #502: the segment-scoped prompt-injection branch must derive + // its gate from the authoritative `isSegmentScoped` (which itself is + // derived from `segmentScopeMode`) rather than re-evaluating the raw + // composite condition. Asserting the mode-driven gate prevents drift. expect(laneRunnerSrc).toContain( + "// TP-174/TP-196: Segment-scoped prompt β€” show only this segment's checkboxes.", + ); + // The actual gate is `if (isSegmentScoped) {`, not the old composite. + expect(laneRunnerSrc).toContain("\n\t\tif (isSegmentScoped) {"); + // And the raw composite condition no longer appears as a prompt-block gate. + expect(laneRunnerSrc).not.toContain( "if (stepSegmentMap && currentRepoId && repoStepNumbers && remainingSteps.length > 0)", ); }); diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md index 2feee8fd..72c56456 100644 --- a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md @@ -56,9 +56,9 @@ - [x] Targeted (62/62 in segment-scoped-lane-runner.test.ts) + full fast suite (3643 pass / 0 fail) pass **R002 revision items:** -- [ ] Gate the segment-scoped *prompt-injection* block (lane-runner.ts β‰ˆ line 517 originally, now line 556 after Step 2 changes) on `isSegmentScoped` instead of the raw `stepSegmentMap && currentRepoId && repoStepNumbers && remainingSteps.length > 0` composite condition. -- [ ] Replace the test `7.3` source-string assertion that currently enshrines the raw composite-condition pattern β€” retarget it at the mode-derived gating. -- [ ] Re-run targeted suite + full fast suite + all four gates after the fixes. +- [x] Gate the segment-scoped *prompt-injection* block on `isSegmentScoped` instead of the raw composite condition; added defensive WARN-log guard for the (should-never-trip) case where `currentStepMapping`/`mySegment` is missing. +- [x] Replace test `7.3` source-string assertion with mode-derived gating assertions (gate is `if (isSegmentScoped) {` and raw composite must NOT appear). Also updated `4.1` to assert the inner defensive guard instead of the now-removed `if (currentStepMapping && mySegment)`. +- [x] Re-run targeted (62/62) + full fast suite (3643 pass / 0 fail) + all four gates (all green). **Files touched:** `extensions/taskplane/types.ts`, `extensions/taskplane/lane-runner.ts`, `extensions/tests/segment-scoped-lane-runner.test.ts`. New tests: 16 (sections 9.x β€” 11 unit tests for `computeSegmentScopeMode` + 5 source-analysis contracts for the unification). From 56cdf354e4cad16d5c3a36005e7d444600c409bd Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:48:48 -0400 Subject: [PATCH 10/30] chore(TP-196): mark Step 2 complete (code review APPROVE after R002 fixes) --- .../.reviews/R003-code-step2.md | 18 ++++++++++++++++++ .../STATUS.md | 8 +++++--- 2 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R003-code-step2.md diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R003-code-step2.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R003-code-step2.md new file mode 100644 index 00000000..394c808d --- /dev/null +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/.reviews/R003-code-step2.md @@ -0,0 +1,18 @@ +## Code Review: Step 2: Implement #502 first (foundational refactor) + +### Verdict: APPROVE + +### Summary +This revision addresses the prior blocking findings: the segment-scoped prompt injection path is now gated by the authoritative `isSegmentScoped` mode-derived flag, and the stale composite-condition assertion was replaced with mode-focused test coverage. I also re-ran the required quality gates (`npm run typecheck`, `npm run lint`, `npm run format:check`) and they all exited 0, plus the targeted `segment-scoped-lane-runner` suite (62/62 pass). Step 2 now meets the #502 foundational unification goal without introducing regressions. + +### Issues Found +1. None. + +### Pattern Violations +- None identified in this diff. + +### Test Gaps +- No blocking gaps for this step; targeted coverage for the revised gate behavior is present. + +### Suggestions +- Consider making the `7.3` source-assertion for `if (isSegmentScoped)` slightly less whitespace-sensitive (e.g., regex/normalized-source match), to reduce brittleness to future formatter-only changes. diff --git a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md index 72c56456..9a4c62b8 100644 --- a/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md +++ b/taskplane-tasks/TP-196-multi-segment-engine-hardening/STATUS.md @@ -4,7 +4,7 @@ **Status:** 🟑 In Progress **Last Updated:** 2026-05-10 **Review Level:** 2 -**Review Counter:** 2 +**Review Counter:** 3 **Iteration:** 1 **Size:** M @@ -45,7 +45,7 @@ --- ### Step 2: Implement #502 first (foundational refactor) -**Status:** 🟨 In Progress +**Status:** βœ… Complete > ⚠️ Code-review fires after this step. @@ -65,7 +65,7 @@ --- ### Step 3: Implement #462 guards -**Status:** ⬜ Not Started +**Status:** 🟨 In Progress > ⚠️ Code-review fires after this step. @@ -132,6 +132,7 @@ |---|------|------|---------|------| | 1 | plan | 1 | APPROVE | `.reviews/R001-plan-step1.md` | | 2 | code | 2 | REVISE | `.reviews/R002-code-step2.md` | +| 3 | code | 2 | APPROVE | (re-review after R002 fixes) | --- @@ -184,3 +185,4 @@ If plan-review reveals a clear architectural split during Step 1, splitting is a Post-TP-194, the reviewer agent downgrades APPROVE β†’ REVISE on any failing `typecheck` / `lint` / `format:check`. This is the first task to run entirely under hard gates; the worker should expect that gate failures will be surfaced in code reviews and cannot be ignored. Plan accordingly: don't break gates anywhere mid-step. | 2026-05-10 23:39 | Review R001 | plan Step 1: APPROVE | | 2026-05-10 23:45 | Review R002 | code Step 2: REVISE | +| 2026-05-10 23:48 | Review R003 | code Step 2: APPROVE | From 72eb4b537f0e6f19dfb3e479b30fb1901c83092b Mon Sep 17 00:00:00 2001 From: Henry Lach Date: Sun, 10 May 2026 19:51:13 -0400 Subject: [PATCH 11/30] feat(TP-197): render per-segment status pill row in dashboard (#464) Adds a horizontal pill row of per-segment status badges to each multi-segment task row in the dashboard. Each pill shows an icon (succeeded / running / pending / failed / stalled / skipped) plus the repoId for that segment, with the currently-executing segment visually emphasized. This closes the operator visibility gap left by TP-145's .DONE suppression for non-final segments: previously, multi-segment lanes sat 'running' with no segment-level signal during the suppression window. Now operators can see at a glance which segments have finished, which is running, and which remain. Implementation: - dashboard/public/app.js: new taskSegmentPillRow() helper; integrated into renderLanesTasks() as grid row 3 of .task-row (mirrors the task-title-subtitle pattern from TP-485). Returns '' for single- segment tasks so non-segmented tasks render byte-identical DOM to today. - dashboard/public/style.css: extends .task-row grid-template-rows to 'auto auto auto'; adds .task-segment-row container plus .seg-pill + status variants. Pill row lives OUTSIDE .task-step intentionally so the existing @media (max-width: 900px) { .task-step { display: none } } rule does NOT hide segment context on narrow viewports. - extensions/tests/dashboard-segment-pill-row.test.ts: 11 new unit tests cover multi-segment rendering, current-segment emphasis, single-segment fallback (no regression), malformed input handling, and XSS-safe escaping. No server.cjs change required: batch.segments[], task.segmentIds, and runtimeLaneSnapshots[*].segmentId were already exposed (verified in Step 0). The progress bar is unchanged \u2014 TP-174 already segment-scoped it via v2Progress; the new pill row provides the missing context that makes the existing bar legible as 'current segment's progress.' Tests: 3638 pass / 0 fail / 1 skipped (was 3627; +11 new). Gates: typecheck 0, lint 0, format:check 0. --- dashboard/public/app.js | 69 +++++- dashboard/public/style.css | 73 +++++- .../tests/dashboard-segment-pill-row.test.ts | 231 ++++++++++++++++++ .../STATUS.md | 38 +-- 4 files changed, 389 insertions(+), 22 deletions(-) create mode 100644 extensions/tests/dashboard-segment-pill-row.test.ts diff --git a/dashboard/public/app.js b/dashboard/public/app.js index 18db48c1..2bf56d12 100644 --- a/dashboard/public/app.js +++ b/dashboard/public/app.js @@ -382,6 +382,53 @@ function taskSegmentProgress(task, segmentStatusMap, forcedActiveSegmentId) { }; } +// TP-197 (#464): Render a horizontal pill row of per-segment status badges for a +// multi-segment task. Each pill shows an icon + repoId for one segment. The icon +// reflects the segment's status (succeeded / running / pending / failed / stalled / +// skipped). The current segment (the one actively executing on its lane) gets an +// emphasis class. Returns "" for single-segment tasks so the rendered DOM is +// byte-identical to today for the non-segmented common case (no regression). +// +// Consumes: +// - task.segmentIds: string[] (ordered, from PersistedTaskRecord) +// - segmentStatusMap: Map built by +// buildSegmentStatusMap() from batch.segments[] +// - activeSegmentId: string|null β€” current executing segment (from V2 lane +// snapshot's segmentId, or the task's activeSegmentId field) +function taskSegmentPillRow(task, segmentStatusMap, activeSegmentId) { + const segmentIds = Array.isArray(task?.segmentIds) + ? task.segmentIds.filter(id => typeof id === "string") + : []; + if (segmentIds.length <= 1) return ""; + + // Status -> { icon, className } table. Keep emoji simple/monospace-friendly. + // βœ… succeeded, ⏳ running, ⬚ pending, ❌ failed, ⏸ stalled, β†· skipped. + const styles = { + succeeded: { icon: "\u2705", cls: "seg-succeeded" }, + running: { icon: "\u23F3", cls: "seg-running" }, + pending: { icon: "\u2B1A", cls: "seg-pending" }, + failed: { icon: "\u274C", cls: "seg-failed" }, + stalled: { icon: "\u23F8", cls: "seg-stalled" }, + skipped: { icon: "\u21B7", cls: "seg-skipped" }, + }; + + const pills = segmentIds.map((segId) => { + const status = segmentStatusMap.get(segId) || "pending"; + const style = styles[status] || styles.pending; + const parsed = parseSegmentId(segId); + const repoLabel = parsed?.repoId || segId; + const isCurrent = activeSegmentId && segId === activeSegmentId; + const currentCls = isCurrent ? " seg-pill-current" : ""; + const title = `${segId} \u00b7 ${status}`; + return `` + + `${style.icon}` + + `${escapeHtml(repoLabel)}` + + ``; + }).join(""); + + return `
${pills}
`; +} + function laneActiveSegmentInfo(v2snap, laneTasks, segmentStatusMap) { if (!v2snap || !v2snap.segmentId) return null; const parsed = parseSegmentId(v2snap.segmentId); @@ -859,8 +906,24 @@ function renderLanesTasks(batch, sessions) { stepHtml = `${escapeHtml(task.exitReason || "β€”")}`; } + // TP-197 (#464): Compute the per-segment pill row for multi-segment tasks. + // Returns "" for single-segment tasks (no DOM regression for the common case). + // For multi-segment tasks we render the pill row in the task-row's grid row 3 + // (via .task-segment-row CSS) and suppress the inline "Segment N/T: repo" text + // in detailBits to avoid duplicating signal β€” the pill row already shows the + // current segment (via seg-pill-current) and total count (via pill count). + const segmentPillRowHtml = taskSegmentPillRow( + task, + segmentStatusMap, + v2snap && v2snap.taskId === task.taskId ? v2snap.segmentId : (segmentInfo?.segmentId || null), + ); + const hasSegmentPillRow = segmentPillRowHtml !== ""; + const detailBits = []; - if (segmentInfo) { + if (segmentInfo && !hasSegmentPillRow) { + // Single-segment + non-segmented tasks: existing inline text (unchanged). + // Multi-segment tasks: suppressed because the new pill row carries the same + // information more legibly. detailBits.push(`${escapeHtml(segmentProgressText(segmentInfo))}`); } if (showPacketHome) { @@ -950,6 +1013,9 @@ function renderLanesTasks(batch, sessions) { const titleHtml = task.taskTitle ? `
${escapeHtml(task.taskTitle)}
` : ""; + // TP-197 (#464): segmentPillRowHtml is empty for single-segment tasks so + // the rendered DOM is byte-identical to today for non-segmented tasks. + // For multi-segment tasks it renders as grid-row 3 of .task-row. html += `
@@ -960,6 +1026,7 @@ function renderLanesTasks(batch, sessions) { ${progressHtml} ${stepHtml}${workerHtml} ${titleHtml} + ${segmentPillRowHtml}
`; html += reviewerRowHtml; } diff --git a/dashboard/public/style.css b/dashboard/public/style.css index 2b2603c1..a8f134ec 100644 --- a/dashboard/public/style.css +++ b/dashboard/public/style.css @@ -608,8 +608,12 @@ body { display: grid; grid-template-columns: 36px 24px 100px 90px 80px 200px 1fr; /* #485 (revised): row 1 holds the primary cells; row 2 (auto, collapses to - * 0 when empty) holds the optional task-title-subtitle spanning cols 3–6. */ - grid-template-rows: auto auto; + * 0 when empty) holds the optional task-title-subtitle spanning cols 3–6. + * TP-197 (#464): row 3 (auto, collapses to 0 when empty) holds the optional + * per-segment pill row for multi-segment tasks, spanning cols 3–7. Single- + * segment tasks render no pill row and row 3 auto-collapses, keeping the + * rendered row height byte-identical to today. */ + grid-template-rows: auto auto auto; align-items: center; gap: 8px 8px; padding: 8px 14px; @@ -652,6 +656,71 @@ body { margin-top: -2px; } +/* ─── TP-197 (#464): Per-segment pill row for multi-segment tasks ─────── */ + +.task-segment-row { + /* Sub-row beneath the primary task row, mirroring the title-subtitle pattern + * (TP-485). Spans cols 3 β†’ 7 so it shares the title area's horizontal space. + * Placed at grid row 3 so it sits *below* the optional title-subtitle. + * Container collapses to 0 height when not rendered (single-segment tasks), + * so non-segmented tasks render with identical row height to today. + * Lives OUTSIDE .task-step intentionally so the @media (max-width: 900px) + * rule that hides .task-step does NOT hide the pill row β€” keeps segment + * context visible at narrow viewports. */ + grid-column: 3 / 7; + grid-row: 3; + display: flex; + flex-wrap: wrap; + align-items: center; + gap: 4px; + margin-top: 2px; + min-width: 0; +} + +.seg-pill { + display: inline-flex; + align-items: center; + gap: 3px; + font-family: var(--font-mono); + font-size: 0.68rem; + font-weight: 500; + line-height: 1.4; + padding: 1px 7px; + border-radius: 8px; + border: 1px solid transparent; + max-width: 140px; + white-space: nowrap; +} + +.seg-pill .seg-pill-icon { + font-size: 0.72rem; + line-height: 1; + flex-shrink: 0; +} + +.seg-pill .seg-pill-label { + overflow: hidden; + text-overflow: ellipsis; + min-width: 0; +} + +/* Status variants β€” reuse the existing status-badge color tokens for + * consistency with .status-badge.status-{succeeded,running,failed,…}. */ +.seg-pill.seg-succeeded { background: var(--badge-succeeded-bg); color: var(--green); } +.seg-pill.seg-running { background: var(--badge-running-bg); color: var(--accent); } +.seg-pill.seg-pending { background: var(--bg-surface); color: var(--text-faint); border-color: var(--border-subtle); } +.seg-pill.seg-failed { background: var(--badge-failed-bg); color: var(--red); } +.seg-pill.seg-stalled { background: var(--badge-failed-bg); color: var(--yellow, var(--red)); opacity: 0.85; } +.seg-pill.seg-skipped { background: var(--bg-surface); color: var(--text-muted); opacity: 0.7; } + +/* Current-segment emphasis: brighter border + slight weight bump so operator + * can spot "we're here right now" at a glance independent of icon. */ +.seg-pill.seg-pill-current { + border-color: var(--accent); + font-weight: 600; + box-shadow: 0 0 0 1px var(--accent-dim, transparent); +} + .task-duration { font-family: var(--font-mono); font-size: 0.8rem; diff --git a/extensions/tests/dashboard-segment-pill-row.test.ts b/extensions/tests/dashboard-segment-pill-row.test.ts new file mode 100644 index 00000000..d561e44f --- /dev/null +++ b/extensions/tests/dashboard-segment-pill-row.test.ts @@ -0,0 +1,231 @@ +/** + * TP-197 (#464) β€” Dashboard segment-level progress indicators + * + * Verifies the `taskSegmentPillRow` renderer helper in `dashboard/public/app.js`. + * + * The dashboard frontend is a vanilla-JS browser script (no ESM exports), so + * we test the helper by extracting its source from `app.js` and evaluating it + * in an isolated sandbox with a minimal `escapeHtml` polyfill. This catches + * regressions in the segment pill rendering without spinning up a browser. + * + * Background: TP-145 introduced `.DONE` suppression for non-final segments, + * which left a visibility gap on the dashboard during multi-segment task + * execution. TP-197 closes that gap by rendering a per-segment status pill + * row. The helper MUST return an empty string for single-segment tasks so + * the rendered DOM for non-segmented tasks is byte-identical to today. + */ + +import { describe, it } from "node:test"; +import { readFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +import { expect } from "./expect.ts"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const APP_JS = resolve(__dirname, "../../dashboard/public/app.js"); + +function extractFn(source: string, name: string): string { + const needle = `function ${name}`; + const start = source.indexOf(needle); + if (start < 0) throw new Error(`fn ${name} not found in app.js`); + const braceStart = source.indexOf("{", start); + if (braceStart < 0) throw new Error(`no opening brace for ${name}`); + let depth = 1; + let i = braceStart + 1; + while (i < source.length && depth > 0) { + const ch = source[i]; + if (ch === "{") depth++; + else if (ch === "}") depth--; + i++; + } + if (depth !== 0) throw new Error(`unbalanced braces for ${name}`); + return source.slice(start, i); +} + +// Minimal escapeHtml polyfill matching app.js's DOM-based version semantically. +function escapeHtml(s: unknown): string { + const map: Record = { + "&": "&", + "<": "<", + ">": ">", + '"': """, + "'": "'", + }; + return String(s ?? "").replace(/[&<>"']/g, (c) => map[c] || c); +} + +interface Helpers { + taskSegmentPillRow: ( + task: + | { taskId?: string; segmentIds?: string[]; activeSegmentId?: string | null } + | null + | undefined, + segmentStatusMap: Map, + activeSegmentId: string | null, + ) => string; + parseSegmentId: (id: string) => { taskId: string; repoId: string } | null; +} + +function loadHelpers(): Helpers { + const src = readFileSync(APP_JS, "utf8"); + const sandbox = [ + extractFn(src, "parseSegmentId"), + extractFn(src, "segmentProgressText"), + extractFn(src, "taskSegmentPillRow"), + "return { parseSegmentId, segmentProgressText, taskSegmentPillRow };", + ].join("\n"); + // biome-ignore lint/security/noGlobalEval: Test-only sandbox for browser-script extraction. + return new Function("escapeHtml", sandbox)(escapeHtml) as Helpers; +} + +describe("TP-197: taskSegmentPillRow renderer", () => { + it("renders a pill row for a 3-segment task with the running segment marked current", () => { + const helpers = loadHelpers(); + const segMap = new Map([ + ["TP-X::shared-libs", "succeeded"], + ["TP-X::web-client", "running"], + ["TP-X::admin", "pending"], + ]); + const out = helpers.taskSegmentPillRow( + { taskId: "TP-X", segmentIds: ["TP-X::shared-libs", "TP-X::web-client", "TP-X::admin"] }, + segMap, + "TP-X::web-client", + ); + expect(out.startsWith('
')).toBe(true); + expect(out.endsWith("
")).toBe(true); + expect((out.match(/class="seg-pill /g) || []).length).toBe(3); + expect((out.match(/seg-pill-current/g) || []).length).toBe(1); + expect(out).toContain("seg-succeeded"); + expect(out).toContain("seg-running seg-pill-current"); + expect(out).toContain("seg-pending"); + expect(out).toContain(">shared-libs<"); + expect(out).toContain(">web-client<"); + expect(out).toContain(">admin<"); + }); + + it("returns empty string for single-segment tasks (no regression for non-segmented tasks)", () => { + const helpers = loadHelpers(); + const out = helpers.taskSegmentPillRow( + { taskId: "TP-Y", segmentIds: ["TP-Y::default"] }, + new Map([["TP-Y::default", "running"]]), + "TP-Y::default", + ); + expect(out).toBe(""); + }); + + it("returns empty string for tasks with no segmentIds", () => { + const helpers = loadHelpers(); + expect(helpers.taskSegmentPillRow({ taskId: "TP-Z" }, new Map(), null)).toBe(""); + expect(helpers.taskSegmentPillRow({ taskId: "TP-Z", segmentIds: [] }, new Map(), null)).toBe(""); + }); + + it("returns empty string for null/undefined task input", () => { + const helpers = loadHelpers(); + expect(helpers.taskSegmentPillRow(null, new Map(), null)).toBe(""); + expect(helpers.taskSegmentPillRow(undefined, new Map(), null)).toBe(""); + }); + + it("emits no current-segment emphasis when activeSegmentId is null", () => { + const helpers = loadHelpers(); + const segMap = new Map([ + ["TP-W::a", "succeeded"], + ["TP-W::b", "pending"], + ]); + const out = helpers.taskSegmentPillRow( + { taskId: "TP-W", segmentIds: ["TP-W::a", "TP-W::b"] }, + segMap, + null, + ); + expect(out.includes("seg-pill-current")).toBe(false); + expect((out.match(/class="seg-pill /g) || []).length).toBe(2); + }); + + it("renders failed / skipped / stalled status pills with correct classes", () => { + const helpers = loadHelpers(); + const segMap = new Map([ + ["TP-Q::a", "failed"], + ["TP-Q::b", "skipped"], + ["TP-Q::c", "stalled"], + ]); + const out = helpers.taskSegmentPillRow( + { taskId: "TP-Q", segmentIds: ["TP-Q::a", "TP-Q::b", "TP-Q::c"] }, + segMap, + null, + ); + expect(out).toContain("seg-failed"); + expect(out).toContain("seg-skipped"); + expect(out).toContain("seg-stalled"); + }); + + it("falls back to pending styling for unknown or missing statuses", () => { + const helpers = loadHelpers(); + const unknownMap = new Map([["TP-U::a", "weird-status"]]); + const out1 = helpers.taskSegmentPillRow( + { taskId: "TP-U", segmentIds: ["TP-U::a", "TP-U::b"] }, + unknownMap, + null, + ); + // Both 'weird-status' (unknown) and missing TP-U::b fall back to pending styling. + expect((out1.match(/seg-pending/g) || []).length).toBe(2); + }); + + it("includes a status-bearing tooltip on each pill", () => { + const helpers = loadHelpers(); + const segMap = new Map([ + ["TP-T::a", "succeeded"], + ["TP-T::b", "running"], + ]); + const out = helpers.taskSegmentPillRow( + { taskId: "TP-T", segmentIds: ["TP-T::a", "TP-T::b"] }, + segMap, + null, + ); + expect(out).toContain('title="TP-T::a Β· succeeded"'); + expect(out).toContain('title="TP-T::b Β· running"'); + }); + + it("falls back to the raw segmentId as the label when the id is unparseable", () => { + const helpers = loadHelpers(); + const segMap = new Map([ + ["malformed-id", "running"], + ["TP-M::ok", "pending"], + ]); + const out = helpers.taskSegmentPillRow( + { taskId: "TP-M", segmentIds: ["malformed-id", "TP-M::ok"] }, + segMap, + null, + ); + // malformed-id has no `::` separator β†’ parseSegmentId returns null; + // renderer falls back to the raw id as the displayed label. + expect(out).toContain(">malformed-id<"); + expect(out).toContain(">ok<"); + }); + + it("escapes HTML in repoIds and segmentIds (XSS guard)", () => { + const helpers = loadHelpers(); + const evilId = "TP-E::"; + const segMap = new Map([ + [evilId, "running"], + ["TP-E::safe", "pending"], + ]); + const out = helpers.taskSegmentPillRow( + { taskId: "TP-E", segmentIds: [evilId, "TP-E::safe"] }, + segMap, + null, + ); + expect(out.includes("