Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2488db4
docs: design for agent rate-limit and session-cap handling
wesm May 5, 2026
2da3099
docs: revise rate-limit spec per review
wesm May 5, 2026
4e99855
docs: tighten rollout and test-helper wording in rate-limit spec
wesm May 5, 2026
aba56aa
docs: implementation plan for agent rate-limit and session-cap handling
wesm May 5, 2026
2115905
feat(agentlimit): create package with Kind and Classification types
wesm May 5, 2026
d580130
feat(agentlimit): port reset-duration parsing from worker.parseQuotaC…
wesm May 5, 2026
c00b2e9
feat(agentlimit): add absolute reset-time parser with same-day rollover
wesm May 5, 2026
c4bf92c
fix(agentlimit): correct DST handling and equality rollover in ParseR…
wesm May 5, 2026
44f74f7
feat(agentlimit): add Classify with nine production quota patterns
wesm May 5, 2026
62e7aca
refactor(daemon): route quota detection through internal/agentlimit
wesm May 5, 2026
b76a774
fix(agentlimit): address roborev review findings 18483/18487/18498
wesm May 5, 2026
b34ea49
test(daemon): cover session-limit cooldown via injected classifier
wesm May 5, 2026
9d12ba8
test(daemon): rename SessionLimit test to reflect what it verifies
wesm May 5, 2026
de71ad3
feat(daemon): log WARN for unclassified agent errors
wesm May 5, 2026
073f816
feat(fix): add classifier hook and agentLimitError plumbing
wesm May 5, 2026
7f744d9
feat(fix): abort fixSingleJob on KindQuota or KindSession
wesm May 5, 2026
7823a65
feat(fix): abort runFixBatch on KindQuota or KindSession
wesm May 5, 2026
62f00dd
fix: address roborev review findings 18506 and 18513
wesm May 5, 2026
a0bb883
docs: update CLAUDE.md cooldown reference to point at agentlimit
wesm May 5, 2026
a3f244b
remove specs
wesm May 5, 2026
3fecb4b
refactor: fold agentlimit into agent; classify retry errors
wesm May 5, 2026
a7ac2c1
chore: tell review agents to stop flagging build errors
wesm May 5, 2026
657052b
docs: mark LimitKindSession as a prepared path, not armed for production
wesm May 5, 2026
333420a
fix: warn on dirty tree at retry-abort and slice lowered string
wesm May 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .roborev.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,16 @@ findings with summary/front matter in a way that makes verdict detection less
reliable, the fix should be to tighten the review prompts/templates and output
contract. Do not ask for increasingly broad deterministic heuristics to parse
arbitrary narrative text.

## Compilation, imports, and build errors

Do not flag suspected compile errors, missing imports, undeclared
identifiers, type mismatches, or other build-level issues. The local
toolchain (go build, go vet, golangci-lint) and the pre-commit hook
catch these before any commit lands; if the diff actually fails to
compile, the PR cannot merge regardless of what the review says.
Reviews see only the diff, not the rest of the package, so claims like
"function X is not defined" or "package Y is not imported" are almost
always wrong — the symbol exists elsewhere in the file or package.
Focus reviews on logic, architecture, and behavior; trust the build.
"""
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ The internal `lookupFieldByTag()` helper resolves these via reflection on the st

- **Retries**: Up to 3 retries for transient failures (`db.RetryJob`). Resets status to queued.
- **Failover**: After retries exhausted (or on quota errors), switches to backup agent via `db.FailoverJob`. Resets retry_count, sets backup agent/model.
- **Cooldown**: Quota exhaustion errors (`isQuotaError`) trigger per-agent cooldown (default 30 min, parsed from error message). Cooldowns are tracked in-memory with RWMutex.
- **Cooldown**: Quota exhaustion errors (classified by `agent.ClassifyLimit` as `LimitKindQuota`) trigger per-agent cooldown (default 30 min, parsed from the error message via `agent.ParseResetDuration`/`ParseResetTime`). Cooldowns are tracked in-memory with RWMutex. `LimitKindSession` follows the same cooldown path, but no production rule emits it yet — a Claude session-cap rule is pending a captured error message.

### Workflow derivation for failover

Expand Down
158 changes: 158 additions & 0 deletions cmd/roborev/fix.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ Examples:
minSeverity: minSeverity,
quiet: quiet,
resume: resume,
classify: agent.ClassifyLimit,
}

roots, err := resolveCurrentRepoRoots()
Expand Down Expand Up @@ -228,13 +229,91 @@ type fixOptions struct {
minSeverity string
quiet bool
resume bool

// classify is the rate-limit classifier. Defaults to
// agent.ClassifyLimit in the production cobra command's RunE; tests
// inject a stub to drive deterministic KindQuota / KindSession
// outcomes without depending on real agent error wording.
classify agent.LimitClassifier
}

// agentLimitError is returned by the fix loop when the configured agent
// hits a quota or session limit. The fix command surfaces it as the
// process exit error so users see the reset time and a hint to retry.
type agentLimitError struct {
Classification agent.LimitClassification
}

func (e *agentLimitError) Error() string {
return formatAgentLimitMessage(e.Classification, time.Now())
}

// formatAgentLimitMessage builds the user-facing abort message. Pulled
// out so tests can assert against it without depending on time.Now.
// The label ("quota" / "session limit" / "rate limit") is derived from
// cls.Kind so a Gemini/Codex KindQuota abort doesn't mis-report itself
// as a session-cap.
func formatAgentLimitMessage(cls agent.LimitClassification, now time.Time) string {
label := agentLimitLabel(cls.Kind)
var dur time.Duration
switch {
case !cls.ResetAt.IsZero():
dur = cls.ResetAt.Sub(now)
case cls.CooldownFor > 0:
dur = cls.CooldownFor
}
switch {
case dur > 0 && !cls.ResetAt.IsZero():
return fmt.Sprintf(
"agent %s hit a %s. Cooldown until %s (in %s). "+
"Re-run after that, or pass --agent <other> to switch.",
cls.Agent,
label,
cls.ResetAt.Format("3:04 PM"),
dur.Round(time.Minute),
)
case dur > 0:
return fmt.Sprintf(
"agent %s hit a %s. Cooldown for ~%s. "+
"Re-run after that, or pass --agent <other> to switch.",
cls.Agent,
label,
dur.Round(time.Minute),
)
default:
flat := strings.ReplaceAll(cls.Message, "\n", " ")
return fmt.Sprintf(
"agent %s hit a %s (unknown reset time). "+
"Re-run later, or pass --agent <other> to switch. "+
"Original error: %s",
cls.Agent,
label,
truncateString(flat, 200),
)
}
}

func agentLimitLabel(k agent.LimitKind) string {
switch k {
case agent.LimitKindSession:
return "session limit"
case agent.LimitKindQuota:
return "quota limit"
case agent.LimitKindTransient:
return "rate limit"
default:
return "rate limit"
}
}

// fixJobParams configures a fixJobDirect operation.
type fixJobParams struct {
RepoRoot string
Agent agent.Agent
Output io.Writer // agent streaming output (nil = discard)
// Classify is the rate-limit classifier used for the commit-retry
// path. nil defaults to agent.ClassifyLimit. Tests inject a stub.
Classify agent.LimitClassifier
}

// fixJobResult contains the outcome of a fix operation.
Expand Down Expand Up @@ -320,6 +399,28 @@ func fixJobDirect(ctx context.Context, params fixJobParams, prompt string) (*fix
}
}
if _, retryErr := retryAgent.Review(ctx, params.RepoRoot, "HEAD", buildGenericCommitPrompt(), out); retryErr != nil {
// Classify the retry error so quota/session limits abort
// instead of being demoted to a warning — otherwise the fix
// loop keeps invoking the exhausted agent on every following
// job until the queue is empty.
classify := params.Classify
if classify == nil {
classify = agent.ClassifyLimit
}
cls := classify(agent.CanonicalName(retryAgent.Name()), retryErr.Error())
if cls.Kind == agent.LimitKindQuota || cls.Kind == agent.LimitKindSession {
// The first agent call left uncommitted changes; the
// retry that would have committed them was aborted by
// the limit. Surface the dirty-tree state so the user
// can decide whether to commit manually before the
// cooldown expires — the success path emits the same
// warning, and bare cooldown text would otherwise hide
// the regression.
if hasChanges, _ := git.HasUncommittedChanges(params.RepoRoot); hasChanges {
fmt.Fprintln(out, "Warning: Changes were made but not committed. Please review and commit manually.")
}
return nil, &agentLimitError{Classification: cls}
}
fmt.Fprintf(out, "Warning: commit agent failed: %v\n", retryErr)
}
if sha, ok := detectNewCommit(params.RepoRoot, headBefore); ok {
Expand Down Expand Up @@ -421,6 +522,14 @@ func runFixWithSeen(cmd *cobra.Command, jobIDs []int64, opts fixOptions, seen ma
if isConnectionError(err) {
return fmt.Errorf("daemon connection lost: %w", err)
}
// Agent quota/session-limit aborts must propagate even in
// discovery mode — otherwise the re-query loop keeps
// invoking the exhausted agent until every queued job is
// burned through with the same error.
var lim *agentLimitError
if errors.As(err, &lim) {
return err
}
// In discovery mode (seen != nil), log a warning and
// continue best-effort. For explicit job IDs (seen ==
// nil), return the error so the CLI exits non-zero.
Expand Down Expand Up @@ -828,6 +937,9 @@ func jobVerdict(job *storage.ReviewJob, review *storage.Review) string {
}

func fixSingleJob(cmd *cobra.Command, repoRoot string, jobID int64, opts fixOptions, tracker *fixSessionTracker) error {
if opts.classify == nil {
opts.classify = agent.ClassifyLimit
}
ctx := cmd.Context()
if ctx == nil {
ctx = context.Background()
Expand Down Expand Up @@ -927,6 +1039,7 @@ func fixSingleJob(cmd *cobra.Command, repoRoot string, jobID int64, opts fixOpti
RepoRoot: repoRoot,
Agent: currentAgent,
Output: capture,
Classify: opts.classify,
}, buildGenericFixPrompt(review.Output, minSev, comments))
// Flush capture FIRST so session extraction completes before reading SessionID.
capture.Flush()
Expand All @@ -935,6 +1048,27 @@ func fixSingleJob(cmd *cobra.Command, repoRoot string, jobID int64, opts fixOpti
}
if err != nil {
tracker.Reset()
// fixJobDirect already returns *agentLimitError for retry-path
// quota/session aborts; preserve it instead of re-classifying
// its user-facing message string.
var lim *agentLimitError
if errors.As(err, &lim) {
return err
}
cls := opts.classify(agent.CanonicalName(currentAgent.Name()), err.Error())
switch cls.Kind {
case agent.LimitKindQuota, agent.LimitKindSession:
return &agentLimitError{Classification: cls}
case agent.LimitKindNone:
if err.Error() != "" && !opts.quiet {
flat := strings.ReplaceAll(err.Error(), "\n", " ")
cmd.PrintErrf(
"warning: unclassified agent error from %s: %s\n",
currentAgent.Name(),
truncateString(flat, 200),
)
}
}
return err
}
tracker.Capture(capture.SessionID())
Expand Down Expand Up @@ -998,6 +1132,9 @@ type batchEntry struct {
// runFixBatch discovers jobs (or uses provided IDs), splits them into batches
// respecting max prompt size, and runs each batch as a single agent invocation.
func runFixBatch(cmd *cobra.Command, jobIDs []int64, branch string, allBranches, explicitBranch, newestFirst bool, batchSize int, opts fixOptions, tracker *fixSessionTracker) error {
if opts.classify == nil {
opts.classify = agent.ClassifyLimit
}
if err := ensureDaemon(); err != nil {
return err
}
Expand Down Expand Up @@ -1171,6 +1308,7 @@ func runFixBatch(cmd *cobra.Command, jobIDs []int64, branch string, allBranches,
RepoRoot: roots.worktreeRoot,
Agent: currentAgent,
Output: capture,
Classify: opts.classify,
}, prompt)
// Flush capture FIRST so session extraction completes before reading SessionID.
capture.Flush()
Expand All @@ -1179,6 +1317,26 @@ func runFixBatch(cmd *cobra.Command, jobIDs []int64, branch string, allBranches,
}
if err != nil {
tracker.Reset()
// Preserve a retry-path agentLimitError without
// re-classifying its user-facing message string.
var lim *agentLimitError
if errors.As(err, &lim) {
return err
}
cls := opts.classify(agent.CanonicalName(currentAgent.Name()), err.Error())
switch cls.Kind {
case agent.LimitKindQuota, agent.LimitKindSession:
return &agentLimitError{Classification: cls}
case agent.LimitKindNone:
if err.Error() != "" && !opts.quiet {
flat := strings.ReplaceAll(err.Error(), "\n", " ")
cmd.PrintErrf(
"warning: unclassified agent error from %s: %s\n",
currentAgent.Name(),
truncateString(flat, 200),
)
}
}
cmd.Printf("Warning: error in batch %d: %v\n", i+1, err)
continue
}
Expand Down
Loading
Loading