Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions internal/daemon/ci_poller.go
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,14 @@ func (p *CIPoller) maybeDispatchAutoDesignForCI(ctx context.Context, repo *stora
files, filesErr := gitpkg.GetFilesChanged(repo.RootPath, headSHA)
diff, diffErr := gitpkg.GetDiff(repo.RootPath, headSHA)
inputsIncomplete := filesErr != nil || diffErr != nil
var generatedFiles []string
if !inputsIncomplete {
var generatedErr error
generatedFiles, generatedErr = gitpkg.GetGeneratedFiles(repo.RootPath, headSHA, files)
if generatedErr != nil {
log.Printf("CI poller: git.GetGeneratedFiles(%s) failed, continuing without generated-file category: %v", headSHA, generatedErr)
}
}

var commitID int64
subject := ""
Expand All @@ -793,11 +801,12 @@ func (p *CIPoller) maybeDispatchAutoDesignForCI(ctx context.Context, repo *stora
}

in := autotype.Input{
RepoPath: repo.RootPath,
GitRef: headSHA,
Diff: diff,
Message: classifierCommitMessage(repo.RootPath, headSHA, subject),
ChangedFiles: files,
RepoPath: repo.RootPath,
GitRef: headSHA,
Diff: diff,
Message: classifierCommitMessage(repo.RootPath, headSHA, subject),
ChangedFiles: files,
GeneratedFiles: generatedFiles,
}

d, err := autotype.Classify(ctx, in, hh, autotype.ErrOnClassifier{})
Expand Down
28 changes: 28 additions & 0 deletions internal/daemon/e2e_auto_design_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,34 @@ func TestE2EAutoDesign_HeuristicSkip_DocsOnly(t *testing.T) {
assert.EqualValues(1, snap.SkippedHeuristic)
}

func TestE2EAutoDesign_HeuristicSkip_GeneratedOnly(t *testing.T) {
e := newAutoDesignE2E(t)

require.NoError(t, os.WriteFile(filepath.Join(e.repo.Path(), ".gitattributes"),
[]byte("internal/daemon_client/*.gen.go linguist-generated=true\n"), 0o644))
e.repo.RunGit("add", ".gitattributes")
e.repo.RunGit("commit", "-m", "chore: mark generated client")

var body strings.Builder
for range 600 {
body.WriteString("// generated line\n")
}
sha := e.repo.CommitFile("internal/daemon_client/client.gen.go", body.String(),
"feat: refresh generated client")

e.enqueueReviewFor(sha, "feat: refresh generated client")

got := e.eventuallyFindAutoDesign(sha)
assert := assert.New(t)
assert.Equal(storage.JobStatusSkipped, got.Status)
assert.Equal("review", got.JobType)
assert.Contains(got.SkipReason, "generated")

snap := AutoDesignMetricsSnapshot()
assert.EqualValues(1, snap.SkippedHeuristic)
assert.EqualValues(0, snap.TriggeredHeuristic)
}

func TestE2EAutoDesign_HeuristicSkip_ConventionalPrefix(t *testing.T) {
// Separate coverage for the skip_message_patterns branch (docs:,
// chore:, etc.) — asserting that a conventional-prefix commit
Expand Down
18 changes: 13 additions & 5 deletions internal/daemon/server_auto_design.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ func (s *Server) maybeDispatchAutoDesign(ctx context.Context, parent *storage.Re
// file/diff lookup doesn't apply.
diff := derefString(parent.DiffContent)
var files []string
var generatedFiles []string
if diff == "" && parent.JobType == storage.JobTypeReview && parent.GitRef != "" && parent.GitRef != "dirty" {
inputsIncomplete := false
var err error
Expand All @@ -163,17 +164,24 @@ func (s *Server) maybeDispatchAutoDesign(ctx context.Context, parent *storage.Re
files = nil
}
}
if !inputsIncomplete {
generatedFiles, err = git.GetGeneratedFiles(parent.RepoPath, parent.GitRef, files)
if err != nil {
log.Printf("auto-design: git.GetGeneratedFiles(%s) failed, continuing without generated-file category: %v", parent.GitRef, err)
}
}
if inputsIncomplete {
return s.enqueueClassifyJob(parent)
}
}

in := autotype.Input{
RepoPath: parent.RepoPath,
GitRef: parent.GitRef,
Diff: diff,
Message: classifierCommitMessage(parent.RepoPath, parent.GitRef, parent.CommitSubject),
ChangedFiles: files,
RepoPath: parent.RepoPath,
GitRef: parent.GitRef,
Diff: diff,
Message: classifierCommitMessage(parent.RepoPath, parent.GitRef, parent.CommitSubject),
ChangedFiles: files,
GeneratedFiles: generatedFiles,
}

d, err := autotype.Classify(ctx, in, hh, autotype.ErrOnClassifier{})
Expand Down
141 changes: 113 additions & 28 deletions internal/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"strings"
"time"
"unicode/utf8"

"github.com/bmatcuk/doublestar/v4"
)

// normalizeMSYSPath converts MSYS-style paths (e.g., /c/Users/...) to Windows paths (C:\Users\...).
Expand Down Expand Up @@ -258,6 +260,79 @@ func GetFilesChanged(
return files, nil
}

// GetGeneratedFiles returns the subset of files that should be categorized as
// generated. It combines repo-authored Linguist attributes with roborev's
// built-in generated-artifact patterns such as lockfiles.
func GetGeneratedFiles(repoPath, source string, files []string) ([]string, error) {
if len(files) == 0 {
return nil, nil
}

generated := make(map[string]struct{}, len(files))
for _, file := range files {
if isBuiltinGeneratedPath(file) {
generated[file] = struct{}{}
}
}

attrGenerated, err := linguistGeneratedFiles(repoPath, source, files)
if err != nil {
return nil, err
}
for _, file := range attrGenerated {
generated[file] = struct{}{}
}

out := make([]string, 0, len(generated))
for _, file := range files {
if _, ok := generated[file]; ok {
out = append(out, file)
}
}
return out, nil
}

func linguistGeneratedFiles(repoPath, source string, files []string) ([]string, error) {
args := []string{"check-attr", "-z"}
if source != "" {
args = append(args, "--source", source)
}
args = append(args, "--stdin", "linguist-generated")

cmd := exec.Command("git", args...)
cmd.Dir = repoPath
cmd.Stdin = strings.NewReader(strings.Join(files, "\x00") + "\x00")

out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("git check-attr linguist-generated: %w", err)
}

parts := strings.Split(string(out), "\x00")
generated := make([]string, 0)
for i := 0; i+2 < len(parts); i += 3 {
file, attr, value := parts[i], parts[i+1], strings.ToLower(parts[i+2])
if attr != "linguist-generated" {
continue
}
if value == "set" || value == "true" {
generated = append(generated, file)
}
}
return generated, nil
}

func isBuiltinGeneratedPath(file string) bool {
path := filepath.ToSlash(file)
for _, pattern := range builtinGeneratedGlobs {
ok, err := doublestar.Match(pattern, path)
if err == nil && ok {
return true
}
}
return false
}

// GetStat returns the stat summary for a commit
func GetStat(repoPath, sha string) (string, error) {
cmd := exec.Command("git", "show", "--stat", sha, "--format=")
Expand Down Expand Up @@ -712,49 +787,59 @@ func GetDirtyDiff(
return result.String(), nil
}

// excludedPathPatterns contains pathspec patterns for files that should be excluded from diffs.
// These are typically generated files that add noise to code reviews.
// Uses :(exclude,glob)**/ form so patterns match at any directory depth.
// Directory patterns use :(exclude) without glob since git recognizes them as trees.
var excludedPathPatterns = []string{
// builtinGeneratedGlobs contains file patterns that are generated artifacts
// even without a Linguist attribute in the repo.
var builtinGeneratedGlobs = []string{
// JavaScript / Node
":(exclude,glob)**/package-lock.json",
":(exclude,glob)**/yarn.lock",
":(exclude,glob)**/pnpm-lock.yaml",
":(exclude,glob)**/bun.lockb",
":(exclude,glob)**/bun.lock",
"**/package-lock.json",
"**/yarn.lock",
"**/pnpm-lock.yaml",
"**/bun.lockb",
"**/bun.lock",
// Python
":(exclude,glob)**/uv.lock",
":(exclude,glob)**/poetry.lock",
":(exclude,glob)**/Pipfile.lock",
":(exclude,glob)**/pdm.lock",
"**/uv.lock",
"**/poetry.lock",
"**/Pipfile.lock",
"**/pdm.lock",
// Go
":(exclude,glob)**/go.sum",
"**/go.sum",
// Rust
":(exclude,glob)**/Cargo.lock",
":(exclude,glob)**/cargo.lock", // lowercase for case-insensitive filesystems
"**/Cargo.lock",
"**/cargo.lock", // lowercase for case-insensitive filesystems
// Ruby
":(exclude,glob)**/Gemfile.lock",
"**/Gemfile.lock",
// PHP
":(exclude,glob)**/composer.lock",
"**/composer.lock",
// .NET
":(exclude,glob)**/packages.lock.json",
"**/packages.lock.json",
// Dart / Flutter
":(exclude,glob)**/pubspec.lock",
"**/pubspec.lock",
// Elixir
":(exclude,glob)**/mix.lock",
"**/mix.lock",
// Swift
":(exclude,glob)**/Package.resolved",
"**/Package.resolved",
// iOS / macOS
":(exclude,glob)**/Podfile.lock",
"**/Podfile.lock",
// Nix
":(exclude,glob)**/flake.lock",
"**/flake.lock",
// Directories — trailing /** matches all files inside at any depth
":(exclude,glob)**/.beads/**",
":(exclude,glob)**/.gocache/**",
":(exclude,glob)**/.cache/**",
"**/.beads/**",
"**/.gocache/**",
"**/.cache/**",
}

// excludedPathPatterns contains pathspec patterns for files that should be excluded from diffs.
// These are typically generated files that add noise to code reviews.
// Uses :(exclude,glob)**/ form so patterns match at any directory depth.
// Directory patterns use :(exclude) without glob since git recognizes them as trees.
var excludedPathPatterns = func() []string {
out := make([]string, 0, len(builtinGeneratedGlobs))
for _, pattern := range builtinGeneratedGlobs {
out = append(out, ":(exclude,glob)"+pattern)
}
return out
}()

// FormatExcludeArgs converts user-provided exclude patterns (filenames
// or globs) into git pathspec arguments. Plain names without path
// separators get both **/name (file match) and **/name/** (directory
Expand Down
19 changes: 19 additions & 0 deletions internal/git/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,25 @@ func TestGetDiffExcludesGeneratedFiles(t *testing.T) {
})
}

func TestGetGeneratedFiles(t *testing.T) {
repo := NewTestRepoWithCommit(t)
repo.WriteFile(".gitattributes", "internal/daemon_client/*.gen.go linguist-generated=true\n")
repo.WriteFile("internal/daemon_client/client.gen.go", "generated\n")
repo.WriteFile("package-lock.json", "lock\n")
repo.WriteFile("src/keep.go", "package src\n")
repo.CommitAll("add generated files")

files, err := GetFilesChanged(repo.Dir, repo.HeadSHA())
require.NoError(t, err)

generated, err := GetGeneratedFiles(repo.Dir, repo.HeadSHA(), files)
require.NoError(t, err)
assert.ElementsMatch(t, []string{
"internal/daemon_client/client.gen.go",
"package-lock.json",
}, generated)
}

func TestGetDiffExcludesSlashedDirectory(t *testing.T) {
repo := NewTestRepoWithCommit(t)
repo.WriteFile("keep.txt", "keep\n")
Expand Down
31 changes: 26 additions & 5 deletions internal/review/autotype/autotype.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,12 @@ type Decision struct {

// Input is what the heuristics and classifier operate on.
type Input struct {
RepoPath string
GitRef string
Diff string
Message string
ChangedFiles []string
RepoPath string
GitRef string
Diff string
Message string
ChangedFiles []string
GeneratedFiles []string
}

// Classifier returns a yes/no + reason for ambiguous cases. Implementations
Expand Down Expand Up @@ -128,6 +129,10 @@ func Classify(ctx context.Context, in Input, h Heuristics, cls Classifier) (Deci
}
}

if allGenerated(in.ChangedFiles, in.GeneratedFiles) {
return heuristic(false, "generated change"), nil
}

for _, f := range in.ChangedFiles {
ok, err := AnyMatch(h.TriggerPaths, f)
if err != nil {
Expand Down Expand Up @@ -186,3 +191,19 @@ func Classify(ctx context.Context, in Input, h Heuristics, cls Classifier) (Deci
Reason: sanitizeReason(reason),
}, nil
}

func allGenerated(files, generated []string) bool {
if len(files) == 0 || len(generated) == 0 {
return false
}
generatedSet := make(map[string]struct{}, len(generated))
for _, f := range generated {
generatedSet[f] = struct{}{}
}
for _, f := range files {
if _, ok := generatedSet[f]; !ok {
return false
}
}
return true
}
13 changes: 13 additions & 0 deletions internal/review/autotype/classify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,19 @@ func TestClassify_SkipAllMatchPaths(t *testing.T) {
assert.Contains(t, d.Reason, "doc/test-only")
}

func TestClassify_SkipAllGeneratedFiles(t *testing.T) {
d, err := Classify(context.Background(), Input{
ChangedFiles: []string{"internal/daemon_client/client.gen.go", "package-lock.json"},
GeneratedFiles: []string{"internal/daemon_client/client.gen.go", "package-lock.json"},
Diff: strings.Repeat("+line\n", 600),
Message: "feat: refresh generated client",
}, newTestHeuristics(), nil)
require.NoError(t, err)
assert.False(t, d.Run)
assert.Equal(t, MethodHeuristic, d.Method)
assert.Contains(t, d.Reason, "generated")
}

func TestClassify_SkipMessage(t *testing.T) {
d, err := Classify(context.Background(), Input{
ChangedFiles: []string{"src/foo.go"},
Expand Down
Loading