From a142070b3049aa2d4d723df16074256c7cc8bc81 Mon Sep 17 00:00:00 2001 From: Preetam Dwivedi Date: Thu, 26 Feb 2026 14:33:13 -0800 Subject: [PATCH 1/2] feat(scorer): add scorer extension with heuristic and composite implementations Add scorer extension with ChangeStats entity and Scorer interface. Heuristic scorer buckets a single metric into probability ranges. Composite scorer combines multiple scorers via reduce functions (Min, Max, Avg). Both constructors validate inputs, return errors for nil functions or empty scorers, and return the Scorer interface. Individual stat functions provided for all ChangeStats fields. --- extension/scorer/BUILD.bazel | 13 +++ extension/scorer/composite/BUILD.bazel | 21 ++++ extension/scorer/composite/scorer.go | 65 +++++++++++ extension/scorer/composite/scorer_test.go | 134 ++++++++++++++++++++++ extension/scorer/heuristic/BUILD.bazel | 20 ++++ extension/scorer/heuristic/scorer.go | 78 +++++++++++++ extension/scorer/heuristic/scorer_test.go | 131 +++++++++++++++++++++ extension/scorer/mock/BUILD.bazel | 27 +++++ extension/scorer/scorer.go | 33 ++++++ 9 files changed, 522 insertions(+) create mode 100644 extension/scorer/BUILD.bazel create mode 100644 extension/scorer/composite/BUILD.bazel create mode 100644 extension/scorer/composite/scorer.go create mode 100644 extension/scorer/composite/scorer_test.go create mode 100644 extension/scorer/heuristic/BUILD.bazel create mode 100644 extension/scorer/heuristic/scorer.go create mode 100644 extension/scorer/heuristic/scorer_test.go create mode 100644 extension/scorer/mock/BUILD.bazel create mode 100644 extension/scorer/scorer.go diff --git a/extension/scorer/BUILD.bazel b/extension/scorer/BUILD.bazel new file mode 100644 index 00000000..e351e691 --- /dev/null +++ b/extension/scorer/BUILD.bazel @@ -0,0 +1,13 @@ +load("@rules_go//go:def.bzl", "go_library") + +exports_files( + ["scorer.go"], + visibility = ["//extension/scorer/mock:__pkg__"], +) + +go_library( + name = "scorer", + srcs = ["scorer.go"], + importpath = "github.com/uber/submitqueue/extension/scorer", + visibility = ["//visibility:public"], +) diff --git a/extension/scorer/composite/BUILD.bazel b/extension/scorer/composite/BUILD.bazel new file mode 100644 index 00000000..1cbe490b --- /dev/null +++ b/extension/scorer/composite/BUILD.bazel @@ -0,0 +1,21 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "composite", + srcs = ["scorer.go"], + importpath = "github.com/uber/submitqueue/extension/scorer/composite", + visibility = ["//visibility:public"], + deps = ["//extension/scorer"], +) + +go_test( + name = "composite_test", + srcs = ["scorer_test.go"], + embed = [":composite"], + deps = [ + "//extension/scorer", + "//extension/scorer/heuristic", + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/extension/scorer/composite/scorer.go b/extension/scorer/composite/scorer.go new file mode 100644 index 00000000..15e549e3 --- /dev/null +++ b/extension/scorer/composite/scorer.go @@ -0,0 +1,65 @@ +package composite + +import ( + "context" + "fmt" + "slices" + + "github.com/uber/submitqueue/extension/scorer" +) + +// ReduceFunc combines multiple scores into a single score. +type ReduceFunc func([]float64) float64 + +// Min returns the minimum value from scores. +func Min(scores []float64) float64 { return slices.Min(scores) } + +// Max returns the maximum value from scores. +func Max(scores []float64) float64 { return slices.Max(scores) } + +// Avg returns the arithmetic mean of scores. +func Avg(scores []float64) float64 { + var sum float64 + for _, s := range scores { + sum += s + } + return sum / float64(len(scores)) +} + +// compositeScorer combines multiple scorers into a single score using a reduce function. +type compositeScorer struct { + // scorers is the list of child scorers to evaluate. + scorers []scorer.Scorer + // reduce combines individual scores into a single value. + reduce ReduceFunc +} + +// New creates a composite Scorer that evaluates all child scorers and combines +// their results using the given reduce function. +// Returns an error if scorers is empty or reduce is nil. +func New(scorers []scorer.Scorer, reduce ReduceFunc) (scorer.Scorer, error) { + if len(scorers) == 0 { + return nil, fmt.Errorf("composite.New: scorers must not be empty") + } + if reduce == nil { + return nil, fmt.Errorf("composite.New: reduce must not be nil") + } + return &compositeScorer{ + scorers: scorers, + reduce: reduce, + }, nil +} + +// Score evaluates all child scorers and combines their results using the reduce function. +// If any child scorer returns an error, that error is returned immediately. +func (c *compositeScorer) Score(ctx context.Context, stats scorer.ChangeStats) (float64, error) { + scores := make([]float64, len(c.scorers)) + for i, s := range c.scorers { + score, err := s.Score(ctx, stats) + if err != nil { + return 0, err + } + scores[i] = score + } + return c.reduce(scores), nil +} diff --git a/extension/scorer/composite/scorer_test.go b/extension/scorer/composite/scorer_test.go new file mode 100644 index 00000000..558455e8 --- /dev/null +++ b/extension/scorer/composite/scorer_test.go @@ -0,0 +1,134 @@ +package composite + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/uber/submitqueue/extension/scorer" + "github.com/uber/submitqueue/extension/scorer/heuristic" +) + +func TestScorer_Score(t *testing.T) { + fileScorer, err := heuristic.New([]heuristic.Bucket{ + {Min: 0, Max: 5, Score: 0.9}, + {Min: 6, Max: 50, Score: 0.7}, + {Min: 51, Max: 1000, Score: 0.4}, + }, heuristic.FilesChanged) + require.NoError(t, err) + + depScorer, err := heuristic.New([]heuristic.Bucket{ + {Min: 0, Max: 10, Score: 0.95}, + {Min: 11, Max: 100, Score: 0.6}, + {Min: 101, Max: 10000, Score: 0.3}, + }, heuristic.DependencyCount) + require.NoError(t, err) + + linesScorer, err := heuristic.New([]heuristic.Bucket{ + {Min: 0, Max: 100, Score: 0.85}, + {Min: 101, Max: 500, Score: 0.5}, + }, heuristic.LinesAdded) + require.NoError(t, err) + + tests := []struct { + name string + scorers []scorer.Scorer + reduce ReduceFunc + stats scorer.ChangeStats + want float64 + wantErr bool + }{ + { + name: "min of two scorers", + scorers: []scorer.Scorer{fileScorer, depScorer}, + reduce: Min, + stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: 50}, + want: 0.6, + }, + { + name: "max of two scorers", + scorers: []scorer.Scorer{fileScorer, depScorer}, + reduce: Max, + stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: 50}, + want: 0.9, + }, + { + name: "avg of two scorers", + scorers: []scorer.Scorer{fileScorer, depScorer}, + reduce: Avg, + stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: 50}, + want: 0.75, + }, + { + name: "single scorer passthrough", + scorers: []scorer.Scorer{fileScorer}, + reduce: Avg, + stats: scorer.ChangeStats{FilesChanged: 3}, + want: 0.9, + }, + { + name: "child scorer error propagates", + scorers: []scorer.Scorer{fileScorer, depScorer}, + reduce: Min, + stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: -1}, + wantErr: true, + }, + { + name: "avg of three scorers", + scorers: []scorer.Scorer{fileScorer, depScorer, linesScorer}, + reduce: Avg, + stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: 5, LinesAdded: 50}, + want: 0.9, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s, err := New(tt.scorers, tt.reduce) + require.NoError(t, err) + got, err := s.Score(context.Background(), tt.stats) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.InDelta(t, tt.want, got, 1e-9) + }) + } +} + +// errorScorer always returns an error. +type errorScorer struct{} + +func (e *errorScorer) Score(_ context.Context, _ scorer.ChangeStats) (float64, error) { + return 0, fmt.Errorf("scorer failed") +} + +func TestScorer_Score_ErrorFromFirstScorer(t *testing.T) { + h, err := heuristic.New([]heuristic.Bucket{ + {Min: 0, Max: 1000, Score: 0.9}, + }, heuristic.FilesChanged) + require.NoError(t, err) + + s, err := New([]scorer.Scorer{&errorScorer{}, h}, Min) + require.NoError(t, err) + _, err = s.Score(context.Background(), scorer.ChangeStats{}) + require.Error(t, err) +} + +func TestNew_EmptyScorers(t *testing.T) { + _, err := New([]scorer.Scorer{}, Min) + require.Error(t, err) +} + +func TestNew_NilReduce(t *testing.T) { + h, err := heuristic.New([]heuristic.Bucket{ + {Min: 0, Max: 1000, Score: 0.9}, + }, heuristic.FilesChanged) + require.NoError(t, err) + + _, err = New([]scorer.Scorer{h}, nil) + require.Error(t, err) +} diff --git a/extension/scorer/heuristic/BUILD.bazel b/extension/scorer/heuristic/BUILD.bazel new file mode 100644 index 00000000..1c7d8c0a --- /dev/null +++ b/extension/scorer/heuristic/BUILD.bazel @@ -0,0 +1,20 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "heuristic", + srcs = ["scorer.go"], + importpath = "github.com/uber/submitqueue/extension/scorer/heuristic", + visibility = ["//visibility:public"], + deps = ["//extension/scorer"], +) + +go_test( + name = "heuristic_test", + srcs = ["scorer_test.go"], + embed = [":heuristic"], + deps = [ + "//extension/scorer", + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/extension/scorer/heuristic/scorer.go b/extension/scorer/heuristic/scorer.go new file mode 100644 index 00000000..72dee245 --- /dev/null +++ b/extension/scorer/heuristic/scorer.go @@ -0,0 +1,78 @@ +package heuristic + +import ( + "context" + "fmt" + + "github.com/uber/submitqueue/extension/scorer" +) + +// Bucket defines a range [Min, Max] mapped to a probability Score. +type Bucket struct { + // Min is the inclusive lower bound of the range. + Min int + // Max is the inclusive upper bound of the range. + Max int + // Score is the probability returned when the metric falls within this bucket. + Score float64 +} + +// StatFunc extracts a single numeric value from ChangeStats for bucketing. +type StatFunc func(scorer.ChangeStats) int + +// FilesChanged returns the number of files changed from ChangeStats. +func FilesChanged(stats scorer.ChangeStats) int { return stats.FilesChanged } + +// LinesAdded returns the number of lines added from ChangeStats. +func LinesAdded(stats scorer.ChangeStats) int { return stats.LinesAdded } + +// LinesDeleted returns the number of lines deleted from ChangeStats. +func LinesDeleted(stats scorer.ChangeStats) int { return stats.LinesDeleted } + +// LinesModified returns the number of lines modified from ChangeStats. +func LinesModified(stats scorer.ChangeStats) int { return stats.LinesModified } + +// BuildTargetsAdded returns the number of build targets added from ChangeStats. +func BuildTargetsAdded(stats scorer.ChangeStats) int { return stats.BuildTargetsAdded } + +// BuildTargetsRemoved returns the number of build targets removed from ChangeStats. +func BuildTargetsRemoved(stats scorer.ChangeStats) int { return stats.BuildTargetsRemoved } + +// BuildTargetsChanged returns the number of build targets modified from ChangeStats. +func BuildTargetsChanged(stats scorer.ChangeStats) int { return stats.BuildTargetsChanged } + +// DependencyCount returns the number of downstream dependencies affected from ChangeStats. +func DependencyCount(stats scorer.ChangeStats) int { return stats.DependencyCount } + +// heuristicScorer computes a success probability by bucketing a metric extracted from ChangeStats. +// It follows the Java HeuristicsBasedSuccessPredictor pattern. +type heuristicScorer struct { + // buckets is the list of ranges to match against. + buckets []Bucket + // statFunc extracts the numeric metric from ChangeStats. + statFunc StatFunc +} + +// New creates a new heuristic Scorer with the given buckets and stat function. +// Returns an error if statFunc is nil. +func New(buckets []Bucket, statFunc StatFunc) (scorer.Scorer, error) { + if statFunc == nil { + return nil, fmt.Errorf("heuristic.New: statFunc must not be nil") + } + return &heuristicScorer{ + buckets: buckets, + statFunc: statFunc, + }, nil +} + +// Score returns the probability score for the first bucket whose range [Min, Max] +// contains the extracted metric value. Returns an error if no bucket matches. +func (s *heuristicScorer) Score(_ context.Context, stats scorer.ChangeStats) (float64, error) { + metric := s.statFunc(stats) + for _, b := range s.buckets { + if metric >= b.Min && metric <= b.Max { + return b.Score, nil + } + } + return 0, fmt.Errorf("no bucket matches metric value %d", metric) +} diff --git a/extension/scorer/heuristic/scorer_test.go b/extension/scorer/heuristic/scorer_test.go new file mode 100644 index 00000000..f29a4dc3 --- /dev/null +++ b/extension/scorer/heuristic/scorer_test.go @@ -0,0 +1,131 @@ +package heuristic + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/uber/submitqueue/extension/scorer" +) + +func TestScorer_Score(t *testing.T) { + tests := []struct { + name string + buckets []Bucket + statFunc StatFunc + stats scorer.ChangeStats + want float64 + wantErr bool + }{ + { + name: "single bucket covering all values", + buckets: []Bucket{ + {Min: 0, Max: 1000, Score: 0.9}, + }, + statFunc: FilesChanged, + stats: scorer.ChangeStats{FilesChanged: 5}, + want: 0.9, + }, + { + name: "multiple buckets with different ranges", + buckets: []Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 6, Max: 20, Score: 0.75}, + {Min: 21, Max: 100, Score: 0.5}, + }, + statFunc: FilesChanged, + stats: scorer.ChangeStats{FilesChanged: 10}, + want: 0.75, + }, + { + name: "exact min boundary", + buckets: []Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 6, Max: 20, Score: 0.75}, + }, + statFunc: FilesChanged, + stats: scorer.ChangeStats{FilesChanged: 6}, + want: 0.75, + }, + { + name: "exact max boundary", + buckets: []Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 6, Max: 20, Score: 0.75}, + }, + statFunc: FilesChanged, + stats: scorer.ChangeStats{FilesChanged: 5}, + want: 0.95, + }, + { + name: "no matching bucket", + buckets: []Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 10, Max: 20, Score: 0.75}, + }, + statFunc: FilesChanged, + stats: scorer.ChangeStats{FilesChanged: 7}, + wantErr: true, + }, + { + name: "lines added stat function", + buckets: []Bucket{ + {Min: 0, Max: 100, Score: 0.9}, + {Min: 101, Max: 500, Score: 0.7}, + }, + statFunc: LinesAdded, + stats: scorer.ChangeStats{LinesAdded: 200}, + want: 0.7, + }, + { + name: "lines deleted stat function", + buckets: []Bucket{ + {Min: 0, Max: 50, Score: 0.95}, + {Min: 51, Max: 200, Score: 0.8}, + }, + statFunc: LinesDeleted, + stats: scorer.ChangeStats{LinesDeleted: 10}, + want: 0.95, + }, + { + name: "zero-value change stats", + buckets: []Bucket{ + {Min: 0, Max: 0, Score: 1.0}, + {Min: 1, Max: 100, Score: 0.8}, + }, + statFunc: FilesChanged, + stats: scorer.ChangeStats{}, + want: 1.0, + }, + { + name: "first matching bucket wins", + buckets: []Bucket{ + {Min: 0, Max: 10, Score: 0.9}, + {Min: 5, Max: 15, Score: 0.7}, + }, + statFunc: FilesChanged, + stats: scorer.ChangeStats{FilesChanged: 7}, + want: 0.9, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s, err := New(tt.buckets, tt.statFunc) + require.NoError(t, err) + got, err := s.Score(context.Background(), tt.stats) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestNew_NilStatFunc(t *testing.T) { + _, err := New([]Bucket{{Min: 0, Max: 10, Score: 0.85}}, nil) + require.Error(t, err) +} diff --git a/extension/scorer/mock/BUILD.bazel b/extension/scorer/mock/BUILD.bazel new file mode 100644 index 00000000..097b1138 --- /dev/null +++ b/extension/scorer/mock/BUILD.bazel @@ -0,0 +1,27 @@ +load("@rules_go//extras:gomock.bzl", "gomock") +load("@rules_go//go:def.bzl", "go_library") + +_MOCKGEN = "@org_uber_go_mock//mockgen" + +gomock( + name = "mock_scorer_src", + out = "scorer_mock.go", + mockgen_tool = _MOCKGEN, + package = "mock", + source = "//extension/scorer:scorer.go", + source_importpath = "github.com/uber/submitqueue/extension/scorer", +) + +# gazelle:ignore +go_library( + name = "mock", + srcs = [ + ":mock_scorer_src", + ], + importpath = "github.com/uber/submitqueue/extension/scorer/mock", + visibility = ["//visibility:public"], + deps = [ + "//extension/scorer", + "@org_uber_go_mock//gomock", + ], +) diff --git a/extension/scorer/scorer.go b/extension/scorer/scorer.go new file mode 100644 index 00000000..4e28ff19 --- /dev/null +++ b/extension/scorer/scorer.go @@ -0,0 +1,33 @@ +package scorer + +//go:generate mockgen -source=scorer.go -destination=mock/scorer.go -package=mock + +import "context" + +// ChangeStats contains aggregate statistics about a code change used for scoring. +type ChangeStats struct { + // FilesChanged is the number of files modified. + FilesChanged int + // LinesAdded is the total lines added across all files. + LinesAdded int + // LinesDeleted is the total lines deleted across all files. + LinesDeleted int + // LinesModified is the total lines modified across all files. + LinesModified int + + // BuildTargetsAdded is the number of build targets added. + BuildTargetsAdded int + // BuildTargetsRemoved is the number of build targets removed. + BuildTargetsRemoved int + // BuildTargetsChanged is the number of build targets modified. + BuildTargetsChanged int + // DependencyCount is the number of downstream dependencies affected. + DependencyCount int +} + +// Scorer computes a success probability score for a change based on its characteristics. +type Scorer interface { + // Score returns a probability between 0.0 and 1.0 indicating the likelihood + // of a successful land for a change with the given statistics. + Score(ctx context.Context, stats ChangeStats) (float64, error) +} From addda5f272355bf4e886703eafe8c13cbbfde4a4 Mon Sep 17 00:00:00 2001 From: Preetam Dwivedi Date: Sun, 1 Mar 2026 10:35:56 -0800 Subject: [PATCH 2/2] refactor(scorer): address PR #97 review comments - Generalize Scorer interface to Score(ctx, entity.Change) instead of Score(ctx, ChangeStats), making it agnostic to input data shape - Heuristic scorer takes a ValueFunc to extract numeric value from Change - Composite scorer uses named scorers (map[string]Scorer) so ReduceFunc receives map[string]float64 for domain-aware aggregation - Constructors panic on contract violations instead of returning errors - Add extension/scorer/README.md Co-Authored-By: Claude Opus 4.6 --- extension/scorer/BUILD.bazel | 1 + extension/scorer/README.md | 64 +++++++++ extension/scorer/composite/BUILD.bazel | 7 +- extension/scorer/composite/scorer.go | 67 +++++---- extension/scorer/composite/scorer_test.go | 168 +++++++++++----------- extension/scorer/heuristic/BUILD.bazel | 7 +- extension/scorer/heuristic/scorer.go | 68 ++++----- extension/scorer/heuristic/scorer_test.go | 96 ++++++------- extension/scorer/mock/BUILD.bazel | 1 + extension/scorer/scorer.go | 29 +--- 10 files changed, 274 insertions(+), 234 deletions(-) create mode 100644 extension/scorer/README.md diff --git a/extension/scorer/BUILD.bazel b/extension/scorer/BUILD.bazel index e351e691..e52e8eff 100644 --- a/extension/scorer/BUILD.bazel +++ b/extension/scorer/BUILD.bazel @@ -10,4 +10,5 @@ go_library( srcs = ["scorer.go"], importpath = "github.com/uber/submitqueue/extension/scorer", visibility = ["//visibility:public"], + deps = ["//entity"], ) diff --git a/extension/scorer/README.md b/extension/scorer/README.md new file mode 100644 index 00000000..21c4ee60 --- /dev/null +++ b/extension/scorer/README.md @@ -0,0 +1,64 @@ +# Scorer + +Vendor-agnostic interface for computing success probability scores for code changes. + +## Interface + +### Scorer + +Computes a success probability for a given change. + +```go +type Scorer interface { + Score(ctx context.Context, change entity.Change) (float64, error) +} +``` + +- **change**: A `entity.Change` identifying the code change to score. +- **Score**: Returns a probability between 0.0 and 1.0 indicating the likelihood of a successful land. Returns an error if scoring fails. + +## Implementations + +### Heuristic + +Scores a change by extracting a numeric value via a `ValueFunc` and matching it against ordered buckets. Each bucket maps a `[Min, Max]` range to a probability. + +```go +s := heuristic.New( + []heuristic.Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 6, Max: 20, Score: 0.75}, + {Min: 21, Max: 100, Score: 0.5}, + }, + func(ctx context.Context, change entity.Change) (int, error) { + // resolve the change into a numeric metric + return filesChanged, nil + }, +) + +score, err := s.Score(ctx, change) +``` + +### Composite + +Combines multiple named scorers into a single score using a reduce function. The reduce function receives a `map[string]float64` mapping scorer names to their scores, enabling domain-aware aggregation. + +Built-in reduce functions: `Min`, `Max`, `Avg`. + +```go +s := composite.New( + map[string]scorer.Scorer{ + "files": fileScorer, + "deps": depScorer, + }, + composite.Min, +) + +score, err := s.Score(ctx, change) +``` + +## Implementing a Backend + +1. Create `extension/scorer/{backend}/` directory +2. Implement the `Scorer` interface +3. Accept `entity.Change` and resolve it into whatever data the implementation needs diff --git a/extension/scorer/composite/BUILD.bazel b/extension/scorer/composite/BUILD.bazel index 1cbe490b..3016a608 100644 --- a/extension/scorer/composite/BUILD.bazel +++ b/extension/scorer/composite/BUILD.bazel @@ -5,7 +5,10 @@ go_library( srcs = ["scorer.go"], importpath = "github.com/uber/submitqueue/extension/scorer/composite", visibility = ["//visibility:public"], - deps = ["//extension/scorer"], + deps = [ + "//entity", + "//extension/scorer", + ], ) go_test( @@ -13,8 +16,8 @@ go_test( srcs = ["scorer_test.go"], embed = [":composite"], deps = [ + "//entity", "//extension/scorer", - "//extension/scorer/heuristic", "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", ], diff --git a/extension/scorer/composite/scorer.go b/extension/scorer/composite/scorer.go index 15e549e3..e1c7eba9 100644 --- a/extension/scorer/composite/scorer.go +++ b/extension/scorer/composite/scorer.go @@ -2,64 +2,83 @@ package composite import ( "context" - "fmt" - "slices" + "github.com/uber/submitqueue/entity" "github.com/uber/submitqueue/extension/scorer" ) -// ReduceFunc combines multiple scores into a single score. -type ReduceFunc func([]float64) float64 +// ReduceFunc combines named scores into a single score. +type ReduceFunc func(map[string]float64) float64 // Min returns the minimum value from scores. -func Min(scores []float64) float64 { return slices.Min(scores) } +func Min(scores map[string]float64) float64 { + var min float64 + first := true + for _, v := range scores { + if first || v < min { + min = v + first = false + } + } + return min +} // Max returns the maximum value from scores. -func Max(scores []float64) float64 { return slices.Max(scores) } +func Max(scores map[string]float64) float64 { + var max float64 + first := true + for _, v := range scores { + if first || v > max { + max = v + first = false + } + } + return max +} // Avg returns the arithmetic mean of scores. -func Avg(scores []float64) float64 { +func Avg(scores map[string]float64) float64 { var sum float64 - for _, s := range scores { - sum += s + for _, v := range scores { + sum += v } return sum / float64(len(scores)) } -// compositeScorer combines multiple scorers into a single score using a reduce function. +// compositeScorer combines multiple named scorers into a single score using a reduce function. type compositeScorer struct { - // scorers is the list of child scorers to evaluate. - scorers []scorer.Scorer - // reduce combines individual scores into a single value. + // scorers maps scorer names to their implementations. + scorers map[string]scorer.Scorer + // reduce combines named scores into a single value. reduce ReduceFunc } -// New creates a composite Scorer that evaluates all child scorers and combines +// New creates a composite Scorer that evaluates all named child scorers and combines // their results using the given reduce function. -// Returns an error if scorers is empty or reduce is nil. -func New(scorers []scorer.Scorer, reduce ReduceFunc) (scorer.Scorer, error) { +// Panics if scorers is empty or reduce is nil. +func New(scorers map[string]scorer.Scorer, reduce ReduceFunc) scorer.Scorer { if len(scorers) == 0 { - return nil, fmt.Errorf("composite.New: scorers must not be empty") + panic("composite.New: scorers must not be empty") } if reduce == nil { - return nil, fmt.Errorf("composite.New: reduce must not be nil") + panic("composite.New: reduce must not be nil") } return &compositeScorer{ scorers: scorers, reduce: reduce, - }, nil + } } // Score evaluates all child scorers and combines their results using the reduce function. // If any child scorer returns an error, that error is returned immediately. -func (c *compositeScorer) Score(ctx context.Context, stats scorer.ChangeStats) (float64, error) { - scores := make([]float64, len(c.scorers)) - for i, s := range c.scorers { - score, err := s.Score(ctx, stats) +func (c *compositeScorer) Score(ctx context.Context, change entity.Change) (float64, error) { + scores := make(map[string]float64, len(c.scorers)) + for name, s := range c.scorers { + score, err := s.Score(ctx, change) if err != nil { return 0, err } - scores[i] = score + scores[name] = score } return c.reduce(scores), nil } diff --git a/extension/scorer/composite/scorer_test.go b/extension/scorer/composite/scorer_test.go index 558455e8..6ee2e690 100644 --- a/extension/scorer/composite/scorer_test.go +++ b/extension/scorer/composite/scorer_test.go @@ -7,128 +7,126 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/uber/submitqueue/entity" "github.com/uber/submitqueue/extension/scorer" - "github.com/uber/submitqueue/extension/scorer/heuristic" ) -func TestScorer_Score(t *testing.T) { - fileScorer, err := heuristic.New([]heuristic.Bucket{ - {Min: 0, Max: 5, Score: 0.9}, - {Min: 6, Max: 50, Score: 0.7}, - {Min: 51, Max: 1000, Score: 0.4}, - }, heuristic.FilesChanged) - require.NoError(t, err) +// fixedScorer always returns a fixed score. +type fixedScorer struct { + score float64 +} - depScorer, err := heuristic.New([]heuristic.Bucket{ - {Min: 0, Max: 10, Score: 0.95}, - {Min: 11, Max: 100, Score: 0.6}, - {Min: 101, Max: 10000, Score: 0.3}, - }, heuristic.DependencyCount) - require.NoError(t, err) +func (f *fixedScorer) Score(_ context.Context, _ entity.Change) (float64, error) { + return f.score, nil +} - linesScorer, err := heuristic.New([]heuristic.Bucket{ - {Min: 0, Max: 100, Score: 0.85}, - {Min: 101, Max: 500, Score: 0.5}, - }, heuristic.LinesAdded) - require.NoError(t, err) +// errorScorer always returns an error. +type errorScorer struct{} +func (e *errorScorer) Score(_ context.Context, _ entity.Change) (float64, error) { + return 0, fmt.Errorf("scorer failed") +} + +func TestScorer_Score(t *testing.T) { tests := []struct { name string - scorers []scorer.Scorer + scorers map[string]scorer.Scorer reduce ReduceFunc - stats scorer.ChangeStats want float64 - wantErr bool }{ { - name: "min of two scorers", - scorers: []scorer.Scorer{fileScorer, depScorer}, - reduce: Min, - stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: 50}, - want: 0.6, + name: "min of two scorers", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.6}, + }, + reduce: Min, + want: 0.6, }, { - name: "max of two scorers", - scorers: []scorer.Scorer{fileScorer, depScorer}, - reduce: Max, - stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: 50}, - want: 0.9, + name: "max of two scorers", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.6}, + }, + reduce: Max, + want: 0.9, }, { - name: "avg of two scorers", - scorers: []scorer.Scorer{fileScorer, depScorer}, - reduce: Avg, - stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: 50}, - want: 0.75, + name: "avg of two scorers", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.6}, + }, + reduce: Avg, + want: 0.75, }, { - name: "single scorer passthrough", - scorers: []scorer.Scorer{fileScorer}, - reduce: Avg, - stats: scorer.ChangeStats{FilesChanged: 3}, - want: 0.9, + name: "single scorer passthrough", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + }, + reduce: Avg, + want: 0.9, }, { - name: "child scorer error propagates", - scorers: []scorer.Scorer{fileScorer, depScorer}, - reduce: Min, - stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: -1}, - wantErr: true, - }, - { - name: "avg of three scorers", - scorers: []scorer.Scorer{fileScorer, depScorer, linesScorer}, - reduce: Avg, - stats: scorer.ChangeStats{FilesChanged: 3, DependencyCount: 5, LinesAdded: 50}, - want: 0.9, + name: "avg of three scorers", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.95}, + "lines": &fixedScorer{0.85}, + }, + reduce: Avg, + want: 0.9, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s, err := New(tt.scorers, tt.reduce) - require.NoError(t, err) - got, err := s.Score(context.Background(), tt.stats) - if tt.wantErr { - require.Error(t, err) - return - } + s := New(tt.scorers, tt.reduce) + got, err := s.Score(context.Background(), entity.Change{}) require.NoError(t, err) assert.InDelta(t, tt.want, got, 1e-9) }) } } -// errorScorer always returns an error. -type errorScorer struct{} - -func (e *errorScorer) Score(_ context.Context, _ scorer.ChangeStats) (float64, error) { - return 0, fmt.Errorf("scorer failed") -} - -func TestScorer_Score_ErrorFromFirstScorer(t *testing.T) { - h, err := heuristic.New([]heuristic.Bucket{ - {Min: 0, Max: 1000, Score: 0.9}, - }, heuristic.FilesChanged) - require.NoError(t, err) - - s, err := New([]scorer.Scorer{&errorScorer{}, h}, Min) - require.NoError(t, err) - _, err = s.Score(context.Background(), scorer.ChangeStats{}) +func TestScorer_Score_ChildError(t *testing.T) { + s := New(map[string]scorer.Scorer{ + "error": &errorScorer{}, + "files": &fixedScorer{0.9}, + }, Min) + _, err := s.Score(context.Background(), entity.Change{}) require.Error(t, err) } func TestNew_EmptyScorers(t *testing.T) { - _, err := New([]scorer.Scorer{}, Min) - require.Error(t, err) + assert.Panics(t, func() { + New(map[string]scorer.Scorer{}, Min) + }) } func TestNew_NilReduce(t *testing.T) { - h, err := heuristic.New([]heuristic.Bucket{ - {Min: 0, Max: 1000, Score: 0.9}, - }, heuristic.FilesChanged) - require.NoError(t, err) + assert.Panics(t, func() { + New(map[string]scorer.Scorer{"files": &fixedScorer{0.9}}, nil) + }) +} - _, err = New([]scorer.Scorer{h}, nil) - require.Error(t, err) +func TestReduceFunc_ReceivesNames(t *testing.T) { + var receivedNames []string + custom := func(scores map[string]float64) float64 { + for name := range scores { + receivedNames = append(receivedNames, name) + } + return scores["files"] + } + + s := New(map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.95}, + }, custom) + got, err := s.Score(context.Background(), entity.Change{}) + require.NoError(t, err) + assert.Equal(t, 0.9, got) + assert.ElementsMatch(t, []string{"files", "deps"}, receivedNames) } diff --git a/extension/scorer/heuristic/BUILD.bazel b/extension/scorer/heuristic/BUILD.bazel index 1c7d8c0a..f75806d6 100644 --- a/extension/scorer/heuristic/BUILD.bazel +++ b/extension/scorer/heuristic/BUILD.bazel @@ -5,7 +5,10 @@ go_library( srcs = ["scorer.go"], importpath = "github.com/uber/submitqueue/extension/scorer/heuristic", visibility = ["//visibility:public"], - deps = ["//extension/scorer"], + deps = [ + "//entity", + "//extension/scorer", + ], ) go_test( @@ -13,7 +16,7 @@ go_test( srcs = ["scorer_test.go"], embed = [":heuristic"], deps = [ - "//extension/scorer", + "//entity", "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", ], diff --git a/extension/scorer/heuristic/scorer.go b/extension/scorer/heuristic/scorer.go index 72dee245..06144f4b 100644 --- a/extension/scorer/heuristic/scorer.go +++ b/extension/scorer/heuristic/scorer.go @@ -4,9 +4,13 @@ import ( "context" "fmt" + "github.com/uber/submitqueue/entity" "github.com/uber/submitqueue/extension/scorer" ) +// ValueFunc extracts a single numeric value from a Change for bucketing. +type ValueFunc func(context.Context, entity.Change) (int, error) + // Bucket defines a range [Min, Max] mapped to a probability Score. type Bucket struct { // Min is the inclusive lower bound of the range. @@ -17,62 +21,38 @@ type Bucket struct { Score float64 } -// StatFunc extracts a single numeric value from ChangeStats for bucketing. -type StatFunc func(scorer.ChangeStats) int - -// FilesChanged returns the number of files changed from ChangeStats. -func FilesChanged(stats scorer.ChangeStats) int { return stats.FilesChanged } - -// LinesAdded returns the number of lines added from ChangeStats. -func LinesAdded(stats scorer.ChangeStats) int { return stats.LinesAdded } - -// LinesDeleted returns the number of lines deleted from ChangeStats. -func LinesDeleted(stats scorer.ChangeStats) int { return stats.LinesDeleted } - -// LinesModified returns the number of lines modified from ChangeStats. -func LinesModified(stats scorer.ChangeStats) int { return stats.LinesModified } - -// BuildTargetsAdded returns the number of build targets added from ChangeStats. -func BuildTargetsAdded(stats scorer.ChangeStats) int { return stats.BuildTargetsAdded } - -// BuildTargetsRemoved returns the number of build targets removed from ChangeStats. -func BuildTargetsRemoved(stats scorer.ChangeStats) int { return stats.BuildTargetsRemoved } - -// BuildTargetsChanged returns the number of build targets modified from ChangeStats. -func BuildTargetsChanged(stats scorer.ChangeStats) int { return stats.BuildTargetsChanged } - -// DependencyCount returns the number of downstream dependencies affected from ChangeStats. -func DependencyCount(stats scorer.ChangeStats) int { return stats.DependencyCount } - -// heuristicScorer computes a success probability by bucketing a metric extracted from ChangeStats. +// heuristicScorer computes a success probability by bucketing a metric extracted from a Change. // It follows the Java HeuristicsBasedSuccessPredictor pattern. type heuristicScorer struct { // buckets is the list of ranges to match against. buckets []Bucket - // statFunc extracts the numeric metric from ChangeStats. - statFunc StatFunc + // valueFunc extracts the numeric value from a Change. + valueFunc ValueFunc } -// New creates a new heuristic Scorer with the given buckets and stat function. -// Returns an error if statFunc is nil. -func New(buckets []Bucket, statFunc StatFunc) (scorer.Scorer, error) { - if statFunc == nil { - return nil, fmt.Errorf("heuristic.New: statFunc must not be nil") +// New creates a new heuristic Scorer with the given buckets and value function. +// Panics if valueFunc is nil. +func New(buckets []Bucket, valueFunc ValueFunc) scorer.Scorer { + if valueFunc == nil { + panic("heuristic.New: valueFunc must not be nil") } return &heuristicScorer{ - buckets: buckets, - statFunc: statFunc, - }, nil + buckets: buckets, + valueFunc: valueFunc, + } } -// Score returns the probability score for the first bucket whose range [Min, Max] -// contains the extracted metric value. Returns an error if no bucket matches. -func (s *heuristicScorer) Score(_ context.Context, stats scorer.ChangeStats) (float64, error) { - metric := s.statFunc(stats) +// Score extracts the value from the change, then returns the probability score for the first +// bucket whose range [Min, Max] contains the value. Returns an error if no bucket matches. +func (s *heuristicScorer) Score(ctx context.Context, change entity.Change) (float64, error) { + value, err := s.valueFunc(ctx, change) + if err != nil { + return 0, err + } for _, b := range s.buckets { - if metric >= b.Min && metric <= b.Max { + if value >= b.Min && value <= b.Max { return b.Score, nil } } - return 0, fmt.Errorf("no bucket matches metric value %d", metric) + return 0, fmt.Errorf("no bucket matches value %d", value) } diff --git a/extension/scorer/heuristic/scorer_test.go b/extension/scorer/heuristic/scorer_test.go index f29a4dc3..6f371082 100644 --- a/extension/scorer/heuristic/scorer_test.go +++ b/extension/scorer/heuristic/scorer_test.go @@ -6,26 +6,31 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/uber/submitqueue/extension/scorer" + "github.com/uber/submitqueue/entity" ) +// staticValue returns a ValueFunc that always returns the given value. +func staticValue(value int) ValueFunc { + return func(_ context.Context, _ entity.Change) (int, error) { + return value, nil + } +} + func TestScorer_Score(t *testing.T) { tests := []struct { - name string - buckets []Bucket - statFunc StatFunc - stats scorer.ChangeStats - want float64 - wantErr bool + name string + buckets []Bucket + valueFunc ValueFunc + want float64 + wantErr bool }{ { name: "single bucket covering all values", buckets: []Bucket{ {Min: 0, Max: 1000, Score: 0.9}, }, - statFunc: FilesChanged, - stats: scorer.ChangeStats{FilesChanged: 5}, - want: 0.9, + valueFunc: staticValue(5), + want: 0.9, }, { name: "multiple buckets with different ranges", @@ -34,9 +39,8 @@ func TestScorer_Score(t *testing.T) { {Min: 6, Max: 20, Score: 0.75}, {Min: 21, Max: 100, Score: 0.5}, }, - statFunc: FilesChanged, - stats: scorer.ChangeStats{FilesChanged: 10}, - want: 0.75, + valueFunc: staticValue(10), + want: 0.75, }, { name: "exact min boundary", @@ -44,9 +48,8 @@ func TestScorer_Score(t *testing.T) { {Min: 0, Max: 5, Score: 0.95}, {Min: 6, Max: 20, Score: 0.75}, }, - statFunc: FilesChanged, - stats: scorer.ChangeStats{FilesChanged: 6}, - want: 0.75, + valueFunc: staticValue(6), + want: 0.75, }, { name: "exact max boundary", @@ -54,9 +57,8 @@ func TestScorer_Score(t *testing.T) { {Min: 0, Max: 5, Score: 0.95}, {Min: 6, Max: 20, Score: 0.75}, }, - statFunc: FilesChanged, - stats: scorer.ChangeStats{FilesChanged: 5}, - want: 0.95, + valueFunc: staticValue(5), + want: 0.95, }, { name: "no matching bucket", @@ -64,39 +66,17 @@ func TestScorer_Score(t *testing.T) { {Min: 0, Max: 5, Score: 0.95}, {Min: 10, Max: 20, Score: 0.75}, }, - statFunc: FilesChanged, - stats: scorer.ChangeStats{FilesChanged: 7}, - wantErr: true, - }, - { - name: "lines added stat function", - buckets: []Bucket{ - {Min: 0, Max: 100, Score: 0.9}, - {Min: 101, Max: 500, Score: 0.7}, - }, - statFunc: LinesAdded, - stats: scorer.ChangeStats{LinesAdded: 200}, - want: 0.7, + valueFunc: staticValue(7), + wantErr: true, }, { - name: "lines deleted stat function", - buckets: []Bucket{ - {Min: 0, Max: 50, Score: 0.95}, - {Min: 51, Max: 200, Score: 0.8}, - }, - statFunc: LinesDeleted, - stats: scorer.ChangeStats{LinesDeleted: 10}, - want: 0.95, - }, - { - name: "zero-value change stats", + name: "zero metric value", buckets: []Bucket{ {Min: 0, Max: 0, Score: 1.0}, {Min: 1, Max: 100, Score: 0.8}, }, - statFunc: FilesChanged, - stats: scorer.ChangeStats{}, - want: 1.0, + valueFunc: staticValue(0), + want: 1.0, }, { name: "first matching bucket wins", @@ -104,17 +84,15 @@ func TestScorer_Score(t *testing.T) { {Min: 0, Max: 10, Score: 0.9}, {Min: 5, Max: 15, Score: 0.7}, }, - statFunc: FilesChanged, - stats: scorer.ChangeStats{FilesChanged: 7}, - want: 0.9, + valueFunc: staticValue(7), + want: 0.9, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s, err := New(tt.buckets, tt.statFunc) - require.NoError(t, err) - got, err := s.Score(context.Background(), tt.stats) + s := New(tt.buckets, tt.valueFunc) + got, err := s.Score(context.Background(), entity.Change{}) if tt.wantErr { require.Error(t, err) return @@ -125,7 +103,17 @@ func TestScorer_Score(t *testing.T) { } } -func TestNew_NilStatFunc(t *testing.T) { - _, err := New([]Bucket{{Min: 0, Max: 10, Score: 0.85}}, nil) +func TestScorer_Score_ValueFuncError(t *testing.T) { + failing := func(_ context.Context, _ entity.Change) (int, error) { + return 0, assert.AnError + } + s := New([]Bucket{{Min: 0, Max: 10, Score: 0.9}}, failing) + _, err := s.Score(context.Background(), entity.Change{}) require.Error(t, err) } + +func TestNew_NilValueFunc(t *testing.T) { + assert.Panics(t, func() { + New([]Bucket{{Min: 0, Max: 10, Score: 0.85}}, nil) + }) +} diff --git a/extension/scorer/mock/BUILD.bazel b/extension/scorer/mock/BUILD.bazel index 097b1138..bc12de38 100644 --- a/extension/scorer/mock/BUILD.bazel +++ b/extension/scorer/mock/BUILD.bazel @@ -21,6 +21,7 @@ go_library( importpath = "github.com/uber/submitqueue/extension/scorer/mock", visibility = ["//visibility:public"], deps = [ + "//entity", "//extension/scorer", "@org_uber_go_mock//gomock", ], diff --git a/extension/scorer/scorer.go b/extension/scorer/scorer.go index 4e28ff19..ec1e38fb 100644 --- a/extension/scorer/scorer.go +++ b/extension/scorer/scorer.go @@ -2,32 +2,15 @@ package scorer //go:generate mockgen -source=scorer.go -destination=mock/scorer.go -package=mock -import "context" +import ( + "context" -// ChangeStats contains aggregate statistics about a code change used for scoring. -type ChangeStats struct { - // FilesChanged is the number of files modified. - FilesChanged int - // LinesAdded is the total lines added across all files. - LinesAdded int - // LinesDeleted is the total lines deleted across all files. - LinesDeleted int - // LinesModified is the total lines modified across all files. - LinesModified int - - // BuildTargetsAdded is the number of build targets added. - BuildTargetsAdded int - // BuildTargetsRemoved is the number of build targets removed. - BuildTargetsRemoved int - // BuildTargetsChanged is the number of build targets modified. - BuildTargetsChanged int - // DependencyCount is the number of downstream dependencies affected. - DependencyCount int -} + "github.com/uber/submitqueue/entity" +) // Scorer computes a success probability score for a change based on its characteristics. type Scorer interface { // Score returns a probability between 0.0 and 1.0 indicating the likelihood - // of a successful land for a change with the given statistics. - Score(ctx context.Context, stats ChangeStats) (float64, error) + // of a successful land for the given change. + Score(ctx context.Context, change entity.Change) (float64, error) }