diff --git a/internal/daemon/ci_poller.go b/internal/daemon/ci_poller.go index b8a76775..96db2808 100644 --- a/internal/daemon/ci_poller.go +++ b/internal/daemon/ci_poller.go @@ -768,6 +768,14 @@ func (p *CIPoller) maybeDispatchAutoDesignForCI(ctx context.Context, repo *stora files, filesErr := gitpkg.GetFilesChanged(repo.RootPath, headSHA) diff, diffErr := gitpkg.GetDiff(repo.RootPath, headSHA) inputsIncomplete := filesErr != nil || diffErr != nil + var generatedFiles []string + if !inputsIncomplete { + var generatedErr error + generatedFiles, generatedErr = gitpkg.GetGeneratedFiles(repo.RootPath, headSHA, files) + if generatedErr != nil { + log.Printf("CI poller: git.GetGeneratedFiles(%s) failed, continuing without generated-file category: %v", headSHA, generatedErr) + } + } var commitID int64 subject := "" @@ -793,11 +801,12 @@ func (p *CIPoller) maybeDispatchAutoDesignForCI(ctx context.Context, repo *stora } in := autotype.Input{ - RepoPath: repo.RootPath, - GitRef: headSHA, - Diff: diff, - Message: classifierCommitMessage(repo.RootPath, headSHA, subject), - ChangedFiles: files, + RepoPath: repo.RootPath, + GitRef: headSHA, + Diff: diff, + Message: classifierCommitMessage(repo.RootPath, headSHA, subject), + ChangedFiles: files, + GeneratedFiles: generatedFiles, } d, err := autotype.Classify(ctx, in, hh, autotype.ErrOnClassifier{}) diff --git a/internal/daemon/e2e_auto_design_test.go b/internal/daemon/e2e_auto_design_test.go index 81e91162..be544320 100644 --- a/internal/daemon/e2e_auto_design_test.go +++ b/internal/daemon/e2e_auto_design_test.go @@ -214,6 +214,34 @@ func TestE2EAutoDesign_HeuristicSkip_DocsOnly(t *testing.T) { assert.EqualValues(1, snap.SkippedHeuristic) } +func TestE2EAutoDesign_HeuristicSkip_GeneratedOnly(t *testing.T) { + e := newAutoDesignE2E(t) + + require.NoError(t, os.WriteFile(filepath.Join(e.repo.Path(), ".gitattributes"), + []byte("internal/daemon_client/*.gen.go linguist-generated=true\n"), 0o644)) + e.repo.RunGit("add", ".gitattributes") + e.repo.RunGit("commit", "-m", "chore: mark generated client") + + var body strings.Builder + for range 600 { + body.WriteString("// generated line\n") + } + sha := e.repo.CommitFile("internal/daemon_client/client.gen.go", body.String(), + "feat: refresh generated client") + + e.enqueueReviewFor(sha, "feat: refresh generated client") + + got := e.eventuallyFindAutoDesign(sha) + assert := assert.New(t) + assert.Equal(storage.JobStatusSkipped, got.Status) + assert.Equal("review", got.JobType) + assert.Contains(got.SkipReason, "generated") + + snap := AutoDesignMetricsSnapshot() + assert.EqualValues(1, snap.SkippedHeuristic) + assert.EqualValues(0, snap.TriggeredHeuristic) +} + func TestE2EAutoDesign_HeuristicSkip_ConventionalPrefix(t *testing.T) { // Separate coverage for the skip_message_patterns branch (docs:, // chore:, etc.) — asserting that a conventional-prefix commit diff --git a/internal/daemon/server_auto_design.go b/internal/daemon/server_auto_design.go index b4f6c05d..1f419e44 100644 --- a/internal/daemon/server_auto_design.go +++ b/internal/daemon/server_auto_design.go @@ -146,6 +146,7 @@ func (s *Server) maybeDispatchAutoDesign(ctx context.Context, parent *storage.Re // file/diff lookup doesn't apply. diff := derefString(parent.DiffContent) var files []string + var generatedFiles []string if diff == "" && parent.JobType == storage.JobTypeReview && parent.GitRef != "" && parent.GitRef != "dirty" { inputsIncomplete := false var err error @@ -163,17 +164,24 @@ func (s *Server) maybeDispatchAutoDesign(ctx context.Context, parent *storage.Re files = nil } } + if !inputsIncomplete { + generatedFiles, err = git.GetGeneratedFiles(parent.RepoPath, parent.GitRef, files) + if err != nil { + log.Printf("auto-design: git.GetGeneratedFiles(%s) failed, continuing without generated-file category: %v", parent.GitRef, err) + } + } if inputsIncomplete { return s.enqueueClassifyJob(parent) } } in := autotype.Input{ - RepoPath: parent.RepoPath, - GitRef: parent.GitRef, - Diff: diff, - Message: classifierCommitMessage(parent.RepoPath, parent.GitRef, parent.CommitSubject), - ChangedFiles: files, + RepoPath: parent.RepoPath, + GitRef: parent.GitRef, + Diff: diff, + Message: classifierCommitMessage(parent.RepoPath, parent.GitRef, parent.CommitSubject), + ChangedFiles: files, + GeneratedFiles: generatedFiles, } d, err := autotype.Classify(ctx, in, hh, autotype.ErrOnClassifier{}) diff --git a/internal/git/git.go b/internal/git/git.go index 7e1f97aa..8499280a 100644 --- a/internal/git/git.go +++ b/internal/git/git.go @@ -14,6 +14,8 @@ import ( "strings" "time" "unicode/utf8" + + "github.com/bmatcuk/doublestar/v4" ) // normalizeMSYSPath converts MSYS-style paths (e.g., /c/Users/...) to Windows paths (C:\Users\...). @@ -258,6 +260,79 @@ func GetFilesChanged( return files, nil } +// GetGeneratedFiles returns the subset of files that should be categorized as +// generated. It combines repo-authored Linguist attributes with roborev's +// built-in generated-artifact patterns such as lockfiles. +func GetGeneratedFiles(repoPath, source string, files []string) ([]string, error) { + if len(files) == 0 { + return nil, nil + } + + generated := make(map[string]struct{}, len(files)) + for _, file := range files { + if isBuiltinGeneratedPath(file) { + generated[file] = struct{}{} + } + } + + attrGenerated, err := linguistGeneratedFiles(repoPath, source, files) + if err != nil { + return nil, err + } + for _, file := range attrGenerated { + generated[file] = struct{}{} + } + + out := make([]string, 0, len(generated)) + for _, file := range files { + if _, ok := generated[file]; ok { + out = append(out, file) + } + } + return out, nil +} + +func linguistGeneratedFiles(repoPath, source string, files []string) ([]string, error) { + args := []string{"check-attr", "-z"} + if source != "" { + args = append(args, "--source", source) + } + args = append(args, "--stdin", "linguist-generated") + + cmd := exec.Command("git", args...) + cmd.Dir = repoPath + cmd.Stdin = strings.NewReader(strings.Join(files, "\x00") + "\x00") + + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("git check-attr linguist-generated: %w", err) + } + + parts := strings.Split(string(out), "\x00") + generated := make([]string, 0) + for i := 0; i+2 < len(parts); i += 3 { + file, attr, value := parts[i], parts[i+1], strings.ToLower(parts[i+2]) + if attr != "linguist-generated" { + continue + } + if value == "set" || value == "true" { + generated = append(generated, file) + } + } + return generated, nil +} + +func isBuiltinGeneratedPath(file string) bool { + path := filepath.ToSlash(file) + for _, pattern := range builtinGeneratedGlobs { + ok, err := doublestar.Match(pattern, path) + if err == nil && ok { + return true + } + } + return false +} + // GetStat returns the stat summary for a commit func GetStat(repoPath, sha string) (string, error) { cmd := exec.Command("git", "show", "--stat", sha, "--format=") @@ -712,49 +787,59 @@ func GetDirtyDiff( return result.String(), nil } -// excludedPathPatterns contains pathspec patterns for files that should be excluded from diffs. -// These are typically generated files that add noise to code reviews. -// Uses :(exclude,glob)**/ form so patterns match at any directory depth. -// Directory patterns use :(exclude) without glob since git recognizes them as trees. -var excludedPathPatterns = []string{ +// builtinGeneratedGlobs contains file patterns that are generated artifacts +// even without a Linguist attribute in the repo. +var builtinGeneratedGlobs = []string{ // JavaScript / Node - ":(exclude,glob)**/package-lock.json", - ":(exclude,glob)**/yarn.lock", - ":(exclude,glob)**/pnpm-lock.yaml", - ":(exclude,glob)**/bun.lockb", - ":(exclude,glob)**/bun.lock", + "**/package-lock.json", + "**/yarn.lock", + "**/pnpm-lock.yaml", + "**/bun.lockb", + "**/bun.lock", // Python - ":(exclude,glob)**/uv.lock", - ":(exclude,glob)**/poetry.lock", - ":(exclude,glob)**/Pipfile.lock", - ":(exclude,glob)**/pdm.lock", + "**/uv.lock", + "**/poetry.lock", + "**/Pipfile.lock", + "**/pdm.lock", // Go - ":(exclude,glob)**/go.sum", + "**/go.sum", // Rust - ":(exclude,glob)**/Cargo.lock", - ":(exclude,glob)**/cargo.lock", // lowercase for case-insensitive filesystems + "**/Cargo.lock", + "**/cargo.lock", // lowercase for case-insensitive filesystems // Ruby - ":(exclude,glob)**/Gemfile.lock", + "**/Gemfile.lock", // PHP - ":(exclude,glob)**/composer.lock", + "**/composer.lock", // .NET - ":(exclude,glob)**/packages.lock.json", + "**/packages.lock.json", // Dart / Flutter - ":(exclude,glob)**/pubspec.lock", + "**/pubspec.lock", // Elixir - ":(exclude,glob)**/mix.lock", + "**/mix.lock", // Swift - ":(exclude,glob)**/Package.resolved", + "**/Package.resolved", // iOS / macOS - ":(exclude,glob)**/Podfile.lock", + "**/Podfile.lock", // Nix - ":(exclude,glob)**/flake.lock", + "**/flake.lock", // Directories — trailing /** matches all files inside at any depth - ":(exclude,glob)**/.beads/**", - ":(exclude,glob)**/.gocache/**", - ":(exclude,glob)**/.cache/**", + "**/.beads/**", + "**/.gocache/**", + "**/.cache/**", } +// excludedPathPatterns contains pathspec patterns for files that should be excluded from diffs. +// These are typically generated files that add noise to code reviews. +// Uses :(exclude,glob)**/ form so patterns match at any directory depth. +// Directory patterns use :(exclude) without glob since git recognizes them as trees. +var excludedPathPatterns = func() []string { + out := make([]string, 0, len(builtinGeneratedGlobs)) + for _, pattern := range builtinGeneratedGlobs { + out = append(out, ":(exclude,glob)"+pattern) + } + return out +}() + // FormatExcludeArgs converts user-provided exclude patterns (filenames // or globs) into git pathspec arguments. Plain names without path // separators get both **/name (file match) and **/name/** (directory diff --git a/internal/git/git_test.go b/internal/git/git_test.go index 9d3d65f1..efaf27d5 100644 --- a/internal/git/git_test.go +++ b/internal/git/git_test.go @@ -1088,6 +1088,25 @@ func TestGetDiffExcludesGeneratedFiles(t *testing.T) { }) } +func TestGetGeneratedFiles(t *testing.T) { + repo := NewTestRepoWithCommit(t) + repo.WriteFile(".gitattributes", "internal/daemon_client/*.gen.go linguist-generated=true\n") + repo.WriteFile("internal/daemon_client/client.gen.go", "generated\n") + repo.WriteFile("package-lock.json", "lock\n") + repo.WriteFile("src/keep.go", "package src\n") + repo.CommitAll("add generated files") + + files, err := GetFilesChanged(repo.Dir, repo.HeadSHA()) + require.NoError(t, err) + + generated, err := GetGeneratedFiles(repo.Dir, repo.HeadSHA(), files) + require.NoError(t, err) + assert.ElementsMatch(t, []string{ + "internal/daemon_client/client.gen.go", + "package-lock.json", + }, generated) +} + func TestGetDiffExcludesSlashedDirectory(t *testing.T) { repo := NewTestRepoWithCommit(t) repo.WriteFile("keep.txt", "keep\n") diff --git a/internal/review/autotype/autotype.go b/internal/review/autotype/autotype.go index 7213a944..2cfc84d6 100644 --- a/internal/review/autotype/autotype.go +++ b/internal/review/autotype/autotype.go @@ -93,11 +93,12 @@ type Decision struct { // Input is what the heuristics and classifier operate on. type Input struct { - RepoPath string - GitRef string - Diff string - Message string - ChangedFiles []string + RepoPath string + GitRef string + Diff string + Message string + ChangedFiles []string + GeneratedFiles []string } // Classifier returns a yes/no + reason for ambiguous cases. Implementations @@ -128,6 +129,10 @@ func Classify(ctx context.Context, in Input, h Heuristics, cls Classifier) (Deci } } + if allGenerated(in.ChangedFiles, in.GeneratedFiles) { + return heuristic(false, "generated change"), nil + } + for _, f := range in.ChangedFiles { ok, err := AnyMatch(h.TriggerPaths, f) if err != nil { @@ -186,3 +191,19 @@ func Classify(ctx context.Context, in Input, h Heuristics, cls Classifier) (Deci Reason: sanitizeReason(reason), }, nil } + +func allGenerated(files, generated []string) bool { + if len(files) == 0 || len(generated) == 0 { + return false + } + generatedSet := make(map[string]struct{}, len(generated)) + for _, f := range generated { + generatedSet[f] = struct{}{} + } + for _, f := range files { + if _, ok := generatedSet[f]; !ok { + return false + } + } + return true +} diff --git a/internal/review/autotype/classify_test.go b/internal/review/autotype/classify_test.go index 9f24fb05..6ff168e2 100644 --- a/internal/review/autotype/classify_test.go +++ b/internal/review/autotype/classify_test.go @@ -95,6 +95,19 @@ func TestClassify_SkipAllMatchPaths(t *testing.T) { assert.Contains(t, d.Reason, "doc/test-only") } +func TestClassify_SkipAllGeneratedFiles(t *testing.T) { + d, err := Classify(context.Background(), Input{ + ChangedFiles: []string{"internal/daemon_client/client.gen.go", "package-lock.json"}, + GeneratedFiles: []string{"internal/daemon_client/client.gen.go", "package-lock.json"}, + Diff: strings.Repeat("+line\n", 600), + Message: "feat: refresh generated client", + }, newTestHeuristics(), nil) + require.NoError(t, err) + assert.False(t, d.Run) + assert.Equal(t, MethodHeuristic, d.Method) + assert.Contains(t, d.Reason, "generated") +} + func TestClassify_SkipMessage(t *testing.T) { d, err := Classify(context.Background(), Input{ ChangedFiles: []string{"src/foo.go"},