diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad426234c0..b9fe6e68f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,7 @@ jobs: sparse-checkout: | .github config + dashboard docs instructions prompts diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ae1e764e3..4c935eee7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ checkpoint, and status-only commits are intentionally omitted. durable spam audit records without blocking users or mutating repositories. - Added a light privacy reminder and stronger screenshot-or-video nudge to real behavior proof review guidance. - Added agent-led real behavior proof judgement so ClawSweeper can inspect linked screenshots, videos, logs, and terminal output with a read-only GitHub token, explain the proof verdict in the review comment, tell contributors how to trigger a fresh review after adding proof, and sync `proof: sufficient` when the evidence is convincing. +- Added a durable review-context budget ledger to generated reports so prompt section sizes, hydrated counts, and truncation state are visible after each run. - Added a real behavior proof assessment to PR reviews so missing, mock-only, or insufficient contributor proof blocks pass/automerge markers and asks for screenshots, terminal output, redacted logs, recordings, linked artifacts, or copied live output instead. - Added `config/automation-limits.json` plus docs and a drift check so review, commit-review, repair, and issue-implementation capacity defaults have one diff --git a/src/clawsweeper.ts b/src/clawsweeper.ts index 1b5c812500..4ae8716fe4 100644 --- a/src/clawsweeper.ts +++ b/src/clawsweeper.ts @@ -378,6 +378,16 @@ interface ReviewPromptBuild { telemetry: ReviewPromptTelemetry; } +interface ReviewContextLedgerEntry { + section: string; + label: string; + entries: number; + chars: number; + total?: number; + hydrated?: number; + truncated?: boolean; +} + interface ReviewPromptRuntimeHints { proofScratchDir?: string; } @@ -5296,6 +5306,147 @@ function contextCountText( return `${displayTotal} (hydrated ${displayHydrated}${truncated ? ", truncated" : ""})`; } +function promptJsonChars(value: unknown): number { + return JSON.stringify(value, null, 2).length; +} + +function reviewContextLedgerEntry(options: { + section: string; + label: string; + value: unknown; + entries: number; + total?: number | undefined; + hydrated?: number | undefined; + truncated?: boolean | undefined; +}): ReviewContextLedgerEntry { + const entry: ReviewContextLedgerEntry = { + section: options.section, + label: options.label, + entries: Math.max(0, Math.round(options.entries)), + chars: promptJsonChars(options.value), + }; + if (options.total !== undefined && Number.isFinite(options.total)) { + entry.total = Math.max(0, Math.round(options.total)); + } + if (options.hydrated !== undefined && Number.isFinite(options.hydrated)) { + entry.hydrated = Math.max(0, Math.round(options.hydrated)); + } + if (options.truncated !== undefined) entry.truncated = options.truncated; + return entry; +} + +function arrayEntries(value: unknown[] | undefined): number { + return value?.length ?? 0; +} + +function reviewContextLedger(context: ItemContext): ReviewContextLedgerEntry[] { + const counts = context.counts; + const entries = [ + reviewContextLedgerEntry({ + section: "issue", + label: "issue", + value: context.issue, + entries: 1, + }), + reviewContextLedgerEntry({ + section: "comments", + label: "comments", + value: context.comments, + entries: context.comments.length, + total: counts?.comments, + hydrated: counts?.commentsHydrated, + truncated: counts?.commentsTruncated, + }), + reviewContextLedgerEntry({ + section: "timeline", + label: "timeline events", + value: context.timeline, + entries: context.timeline.length, + total: counts?.timeline, + }), + reviewContextLedgerEntry({ + section: "closingPullRequests", + label: "closing PRs", + value: context.closingPullRequests ?? [], + entries: arrayEntries(context.closingPullRequests), + total: counts?.closingPullRequests, + }), + reviewContextLedgerEntry({ + section: "relatedItems", + label: "related items", + value: context.relatedItems ?? [], + entries: arrayEntries(context.relatedItems), + total: counts?.relatedItems, + }), + reviewContextLedgerEntry({ + section: "pullRequest", + label: "pull request", + value: context.pullRequest ?? null, + entries: context.pullRequest === undefined ? 0 : 1, + }), + reviewContextLedgerEntry({ + section: "pullFiles", + label: "PR files", + value: context.pullFiles ?? [], + entries: arrayEntries(context.pullFiles), + total: counts?.pullFiles, + hydrated: counts?.pullFilesHydrated, + truncated: counts?.pullFilesTruncated, + }), + reviewContextLedgerEntry({ + section: "pullCommits", + label: "PR commits", + value: context.pullCommits ?? [], + entries: arrayEntries(context.pullCommits), + total: counts?.pullCommits, + hydrated: counts?.pullCommitsHydrated, + truncated: counts?.pullCommitsTruncated, + }), + reviewContextLedgerEntry({ + section: "pullReviewComments", + label: "PR review comments", + value: context.pullReviewComments ?? [], + entries: arrayEntries(context.pullReviewComments), + total: counts?.pullReviewComments, + hydrated: counts?.pullReviewCommentsHydrated, + truncated: counts?.pullReviewCommentsTruncated, + }), + reviewContextLedgerEntry({ + section: "counts", + label: "context counts", + value: counts ?? {}, + entries: Object.keys(counts ?? {}).length, + }), + ]; + return entries.filter((entry) => entry.entries > 0 || (entry.total ?? 0) > 0); +} + +export function reviewContextLedgerForTest(context: ItemContext): ReviewContextLedgerEntry[] { + return reviewContextLedger(context); +} + +function reviewContextLedgerCountText(entry: ReviewContextLedgerEntry): string { + if (entry.total !== undefined || entry.hydrated !== undefined) { + const total = entry.total ?? entry.entries; + const hydrated = entry.hydrated ?? entry.entries; + const suffix = entry.truncated ? ", truncated" : ""; + return `${hydrated}/${total} hydrated${suffix}`; + } + return `${entry.entries} ${entry.entries === 1 ? "entry" : "entries"}`; +} + +function renderReviewContextBudget(context: ItemContext): string { + return reviewContextLedger(context) + .map( + (entry) => `- ${entry.label}: ${reviewContextLedgerCountText(entry)}, ${entry.chars} chars`, + ) + .join("\n"); +} + +export function renderReviewContextBudgetForTest(context: ItemContext): string { + return renderReviewContextBudget(context); +} + function runtimeReviewTextFromReport(markdown: string): string { return runtimeReviewText({ model: frontMatterValue(markdown, "review_model") ?? "", @@ -6542,6 +6693,10 @@ ${options.action.closeComment ? options.action.closeComment : "_No close comment options.context.counts?.pullReviewCommentsTruncated, )} +## Review Context Budget + +${renderReviewContextBudget(options.context)} + ## Review Telemetry - prompt chars: ${reviewTelemetryNumber(options.runtime.promptChars)} diff --git a/test/clawsweeper.test.ts b/test/clawsweeper.test.ts index 0d665804bc..6efe9be01b 100644 --- a/test/clawsweeper.test.ts +++ b/test/clawsweeper.test.ts @@ -47,7 +47,9 @@ import { reviewActionForDecision, reviewPriority, renderReviewCommentFromReport, + renderReviewContextBudgetForTest, renderWorkPlanFromReport, + reviewContextLedgerForTest, reviewDecisionSchemaText, reviewPromptTelemetryForTest, reviewPromptTemplate, @@ -465,6 +467,68 @@ test("review prompt telemetry records durable cost proxies", () => { assert.equal(telemetry.additionalPromptChars, "keep extra instructions visible".length); }); +test("review context ledger records ordered section budgets", () => { + const context = { + issue: { number: 123, title: "Sample PR" }, + comments: [{ author: "alice", body: "Please review this." }], + timeline: [{ event: "committed", sha: "abc123" }], + relatedItems: [{ number: 122, title: "Related issue" }], + pullRequest: { number: 123, additions: 12 }, + pullFiles: [ + { filename: "src/example.ts", patch: "line\n".repeat(20) }, + { filename: "test/example.test.ts", patch: "test\n".repeat(20) }, + ], + pullCommits: [{ sha: "abc123", message: "fix example" }], + pullReviewComments: [], + counts: { + comments: 10, + commentsHydrated: 1, + commentsTruncated: true, + timeline: 1, + relatedItems: 1, + pullFiles: 120, + pullFilesHydrated: 2, + pullFilesTruncated: true, + pullCommits: 1, + pullCommitsHydrated: 1, + pullCommitsTruncated: false, + pullReviewComments: 0, + pullReviewCommentsHydrated: 0, + pullReviewCommentsTruncated: false, + }, + }; + + const ledger = reviewContextLedgerForTest(context); + + assert.deepEqual( + ledger.map(({ section, entries, total, hydrated, truncated }) => [ + section, + entries, + total, + hydrated, + truncated, + ]), + [ + ["issue", 1, undefined, undefined, undefined], + ["comments", 1, 10, 1, true], + ["timeline", 1, 1, undefined, undefined], + ["relatedItems", 1, 1, undefined, undefined], + ["pullRequest", 1, undefined, undefined, undefined], + ["pullFiles", 2, 120, 2, true], + ["pullCommits", 1, 1, 1, false], + ["counts", 14, undefined, undefined, undefined], + ], + ); + assert.equal( + ledger.find((entry) => entry.section === "pullFiles")?.chars, + JSON.stringify(context.pullFiles, null, 2).length, + ); + assert.match( + renderReviewContextBudgetForTest(context), + /- PR files: 2\/120 hydrated, truncated, \d+ chars/, + ); +}); + test("protected labels are normalized and excluded from normal planning", () => { assert.deepEqual(protectedLabels(["Security", "bug", "maintainer", "SECURITY"]), [ "security",