diff --git a/extension/scorer/BUILD.bazel b/extension/scorer/BUILD.bazel new file mode 100644 index 00000000..e52e8eff --- /dev/null +++ b/extension/scorer/BUILD.bazel @@ -0,0 +1,14 @@ +load("@rules_go//go:def.bzl", "go_library") + +exports_files( + ["scorer.go"], + visibility = ["//extension/scorer/mock:__pkg__"], +) + +go_library( + name = "scorer", + srcs = ["scorer.go"], + importpath = "github.com/uber/submitqueue/extension/scorer", + visibility = ["//visibility:public"], + deps = ["//entity"], +) diff --git a/extension/scorer/README.md b/extension/scorer/README.md new file mode 100644 index 00000000..21c4ee60 --- /dev/null +++ b/extension/scorer/README.md @@ -0,0 +1,64 @@ +# Scorer + +Vendor-agnostic interface for computing success probability scores for code changes. + +## Interface + +### Scorer + +Computes a success probability for a given change. + +```go +type Scorer interface { + Score(ctx context.Context, change entity.Change) (float64, error) +} +``` + +- **change**: A `entity.Change` identifying the code change to score. +- **Score**: Returns a probability between 0.0 and 1.0 indicating the likelihood of a successful land. Returns an error if scoring fails. + +## Implementations + +### Heuristic + +Scores a change by extracting a numeric value via a `ValueFunc` and matching it against ordered buckets. Each bucket maps a `[Min, Max]` range to a probability. + +```go +s := heuristic.New( + []heuristic.Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 6, Max: 20, Score: 0.75}, + {Min: 21, Max: 100, Score: 0.5}, + }, + func(ctx context.Context, change entity.Change) (int, error) { + // resolve the change into a numeric metric + return filesChanged, nil + }, +) + +score, err := s.Score(ctx, change) +``` + +### Composite + +Combines multiple named scorers into a single score using a reduce function. The reduce function receives a `map[string]float64` mapping scorer names to their scores, enabling domain-aware aggregation. + +Built-in reduce functions: `Min`, `Max`, `Avg`. + +```go +s := composite.New( + map[string]scorer.Scorer{ + "files": fileScorer, + "deps": depScorer, + }, + composite.Min, +) + +score, err := s.Score(ctx, change) +``` + +## Implementing a Backend + +1. Create `extension/scorer/{backend}/` directory +2. Implement the `Scorer` interface +3. Accept `entity.Change` and resolve it into whatever data the implementation needs diff --git a/extension/scorer/composite/BUILD.bazel b/extension/scorer/composite/BUILD.bazel new file mode 100644 index 00000000..3016a608 --- /dev/null +++ b/extension/scorer/composite/BUILD.bazel @@ -0,0 +1,24 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "composite", + srcs = ["scorer.go"], + importpath = "github.com/uber/submitqueue/extension/scorer/composite", + visibility = ["//visibility:public"], + deps = [ + "//entity", + "//extension/scorer", + ], +) + +go_test( + name = "composite_test", + srcs = ["scorer_test.go"], + embed = [":composite"], + deps = [ + "//entity", + "//extension/scorer", + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/extension/scorer/composite/scorer.go b/extension/scorer/composite/scorer.go new file mode 100644 index 00000000..e1c7eba9 --- /dev/null +++ b/extension/scorer/composite/scorer.go @@ -0,0 +1,84 @@ +package composite + +import ( + "context" + + "github.com/uber/submitqueue/entity" + "github.com/uber/submitqueue/extension/scorer" +) + +// ReduceFunc combines named scores into a single score. +type ReduceFunc func(map[string]float64) float64 + +// Min returns the minimum value from scores. +func Min(scores map[string]float64) float64 { + var min float64 + first := true + for _, v := range scores { + if first || v < min { + min = v + first = false + } + } + return min +} + +// Max returns the maximum value from scores. +func Max(scores map[string]float64) float64 { + var max float64 + first := true + for _, v := range scores { + if first || v > max { + max = v + first = false + } + } + return max +} + +// Avg returns the arithmetic mean of scores. +func Avg(scores map[string]float64) float64 { + var sum float64 + for _, v := range scores { + sum += v + } + return sum / float64(len(scores)) +} + +// compositeScorer combines multiple named scorers into a single score using a reduce function. +type compositeScorer struct { + // scorers maps scorer names to their implementations. + scorers map[string]scorer.Scorer + // reduce combines named scores into a single value. + reduce ReduceFunc +} + +// New creates a composite Scorer that evaluates all named child scorers and combines +// their results using the given reduce function. +// Panics if scorers is empty or reduce is nil. +func New(scorers map[string]scorer.Scorer, reduce ReduceFunc) scorer.Scorer { + if len(scorers) == 0 { + panic("composite.New: scorers must not be empty") + } + if reduce == nil { + panic("composite.New: reduce must not be nil") + } + return &compositeScorer{ + scorers: scorers, + reduce: reduce, + } +} + +// Score evaluates all child scorers and combines their results using the reduce function. +// If any child scorer returns an error, that error is returned immediately. +func (c *compositeScorer) Score(ctx context.Context, change entity.Change) (float64, error) { + scores := make(map[string]float64, len(c.scorers)) + for name, s := range c.scorers { + score, err := s.Score(ctx, change) + if err != nil { + return 0, err + } + scores[name] = score + } + return c.reduce(scores), nil +} diff --git a/extension/scorer/composite/scorer_test.go b/extension/scorer/composite/scorer_test.go new file mode 100644 index 00000000..6ee2e690 --- /dev/null +++ b/extension/scorer/composite/scorer_test.go @@ -0,0 +1,132 @@ +package composite + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/uber/submitqueue/entity" + "github.com/uber/submitqueue/extension/scorer" +) + +// fixedScorer always returns a fixed score. +type fixedScorer struct { + score float64 +} + +func (f *fixedScorer) Score(_ context.Context, _ entity.Change) (float64, error) { + return f.score, nil +} + +// errorScorer always returns an error. +type errorScorer struct{} + +func (e *errorScorer) Score(_ context.Context, _ entity.Change) (float64, error) { + return 0, fmt.Errorf("scorer failed") +} + +func TestScorer_Score(t *testing.T) { + tests := []struct { + name string + scorers map[string]scorer.Scorer + reduce ReduceFunc + want float64 + }{ + { + name: "min of two scorers", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.6}, + }, + reduce: Min, + want: 0.6, + }, + { + name: "max of two scorers", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.6}, + }, + reduce: Max, + want: 0.9, + }, + { + name: "avg of two scorers", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.6}, + }, + reduce: Avg, + want: 0.75, + }, + { + name: "single scorer passthrough", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + }, + reduce: Avg, + want: 0.9, + }, + { + name: "avg of three scorers", + scorers: map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.95}, + "lines": &fixedScorer{0.85}, + }, + reduce: Avg, + want: 0.9, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := New(tt.scorers, tt.reduce) + got, err := s.Score(context.Background(), entity.Change{}) + require.NoError(t, err) + assert.InDelta(t, tt.want, got, 1e-9) + }) + } +} + +func TestScorer_Score_ChildError(t *testing.T) { + s := New(map[string]scorer.Scorer{ + "error": &errorScorer{}, + "files": &fixedScorer{0.9}, + }, Min) + _, err := s.Score(context.Background(), entity.Change{}) + require.Error(t, err) +} + +func TestNew_EmptyScorers(t *testing.T) { + assert.Panics(t, func() { + New(map[string]scorer.Scorer{}, Min) + }) +} + +func TestNew_NilReduce(t *testing.T) { + assert.Panics(t, func() { + New(map[string]scorer.Scorer{"files": &fixedScorer{0.9}}, nil) + }) +} + +func TestReduceFunc_ReceivesNames(t *testing.T) { + var receivedNames []string + custom := func(scores map[string]float64) float64 { + for name := range scores { + receivedNames = append(receivedNames, name) + } + return scores["files"] + } + + s := New(map[string]scorer.Scorer{ + "files": &fixedScorer{0.9}, + "deps": &fixedScorer{0.95}, + }, custom) + got, err := s.Score(context.Background(), entity.Change{}) + require.NoError(t, err) + assert.Equal(t, 0.9, got) + assert.ElementsMatch(t, []string{"files", "deps"}, receivedNames) +} diff --git a/extension/scorer/heuristic/BUILD.bazel b/extension/scorer/heuristic/BUILD.bazel new file mode 100644 index 00000000..f75806d6 --- /dev/null +++ b/extension/scorer/heuristic/BUILD.bazel @@ -0,0 +1,23 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "heuristic", + srcs = ["scorer.go"], + importpath = "github.com/uber/submitqueue/extension/scorer/heuristic", + visibility = ["//visibility:public"], + deps = [ + "//entity", + "//extension/scorer", + ], +) + +go_test( + name = "heuristic_test", + srcs = ["scorer_test.go"], + embed = [":heuristic"], + deps = [ + "//entity", + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/extension/scorer/heuristic/scorer.go b/extension/scorer/heuristic/scorer.go new file mode 100644 index 00000000..06144f4b --- /dev/null +++ b/extension/scorer/heuristic/scorer.go @@ -0,0 +1,58 @@ +package heuristic + +import ( + "context" + "fmt" + + "github.com/uber/submitqueue/entity" + "github.com/uber/submitqueue/extension/scorer" +) + +// ValueFunc extracts a single numeric value from a Change for bucketing. +type ValueFunc func(context.Context, entity.Change) (int, error) + +// Bucket defines a range [Min, Max] mapped to a probability Score. +type Bucket struct { + // Min is the inclusive lower bound of the range. + Min int + // Max is the inclusive upper bound of the range. + Max int + // Score is the probability returned when the metric falls within this bucket. + Score float64 +} + +// heuristicScorer computes a success probability by bucketing a metric extracted from a Change. +// It follows the Java HeuristicsBasedSuccessPredictor pattern. +type heuristicScorer struct { + // buckets is the list of ranges to match against. + buckets []Bucket + // valueFunc extracts the numeric value from a Change. + valueFunc ValueFunc +} + +// New creates a new heuristic Scorer with the given buckets and value function. +// Panics if valueFunc is nil. +func New(buckets []Bucket, valueFunc ValueFunc) scorer.Scorer { + if valueFunc == nil { + panic("heuristic.New: valueFunc must not be nil") + } + return &heuristicScorer{ + buckets: buckets, + valueFunc: valueFunc, + } +} + +// Score extracts the value from the change, then returns the probability score for the first +// bucket whose range [Min, Max] contains the value. Returns an error if no bucket matches. +func (s *heuristicScorer) Score(ctx context.Context, change entity.Change) (float64, error) { + value, err := s.valueFunc(ctx, change) + if err != nil { + return 0, err + } + for _, b := range s.buckets { + if value >= b.Min && value <= b.Max { + return b.Score, nil + } + } + return 0, fmt.Errorf("no bucket matches value %d", value) +} diff --git a/extension/scorer/heuristic/scorer_test.go b/extension/scorer/heuristic/scorer_test.go new file mode 100644 index 00000000..6f371082 --- /dev/null +++ b/extension/scorer/heuristic/scorer_test.go @@ -0,0 +1,119 @@ +package heuristic + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/uber/submitqueue/entity" +) + +// staticValue returns a ValueFunc that always returns the given value. +func staticValue(value int) ValueFunc { + return func(_ context.Context, _ entity.Change) (int, error) { + return value, nil + } +} + +func TestScorer_Score(t *testing.T) { + tests := []struct { + name string + buckets []Bucket + valueFunc ValueFunc + want float64 + wantErr bool + }{ + { + name: "single bucket covering all values", + buckets: []Bucket{ + {Min: 0, Max: 1000, Score: 0.9}, + }, + valueFunc: staticValue(5), + want: 0.9, + }, + { + name: "multiple buckets with different ranges", + buckets: []Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 6, Max: 20, Score: 0.75}, + {Min: 21, Max: 100, Score: 0.5}, + }, + valueFunc: staticValue(10), + want: 0.75, + }, + { + name: "exact min boundary", + buckets: []Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 6, Max: 20, Score: 0.75}, + }, + valueFunc: staticValue(6), + want: 0.75, + }, + { + name: "exact max boundary", + buckets: []Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 6, Max: 20, Score: 0.75}, + }, + valueFunc: staticValue(5), + want: 0.95, + }, + { + name: "no matching bucket", + buckets: []Bucket{ + {Min: 0, Max: 5, Score: 0.95}, + {Min: 10, Max: 20, Score: 0.75}, + }, + valueFunc: staticValue(7), + wantErr: true, + }, + { + name: "zero metric value", + buckets: []Bucket{ + {Min: 0, Max: 0, Score: 1.0}, + {Min: 1, Max: 100, Score: 0.8}, + }, + valueFunc: staticValue(0), + want: 1.0, + }, + { + name: "first matching bucket wins", + buckets: []Bucket{ + {Min: 0, Max: 10, Score: 0.9}, + {Min: 5, Max: 15, Score: 0.7}, + }, + valueFunc: staticValue(7), + want: 0.9, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := New(tt.buckets, tt.valueFunc) + got, err := s.Score(context.Background(), entity.Change{}) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestScorer_Score_ValueFuncError(t *testing.T) { + failing := func(_ context.Context, _ entity.Change) (int, error) { + return 0, assert.AnError + } + s := New([]Bucket{{Min: 0, Max: 10, Score: 0.9}}, failing) + _, err := s.Score(context.Background(), entity.Change{}) + require.Error(t, err) +} + +func TestNew_NilValueFunc(t *testing.T) { + assert.Panics(t, func() { + New([]Bucket{{Min: 0, Max: 10, Score: 0.85}}, nil) + }) +} diff --git a/extension/scorer/mock/BUILD.bazel b/extension/scorer/mock/BUILD.bazel new file mode 100644 index 00000000..bc12de38 --- /dev/null +++ b/extension/scorer/mock/BUILD.bazel @@ -0,0 +1,28 @@ +load("@rules_go//extras:gomock.bzl", "gomock") +load("@rules_go//go:def.bzl", "go_library") + +_MOCKGEN = "@org_uber_go_mock//mockgen" + +gomock( + name = "mock_scorer_src", + out = "scorer_mock.go", + mockgen_tool = _MOCKGEN, + package = "mock", + source = "//extension/scorer:scorer.go", + source_importpath = "github.com/uber/submitqueue/extension/scorer", +) + +# gazelle:ignore +go_library( + name = "mock", + srcs = [ + ":mock_scorer_src", + ], + importpath = "github.com/uber/submitqueue/extension/scorer/mock", + visibility = ["//visibility:public"], + deps = [ + "//entity", + "//extension/scorer", + "@org_uber_go_mock//gomock", + ], +) diff --git a/extension/scorer/scorer.go b/extension/scorer/scorer.go new file mode 100644 index 00000000..ec1e38fb --- /dev/null +++ b/extension/scorer/scorer.go @@ -0,0 +1,16 @@ +package scorer + +//go:generate mockgen -source=scorer.go -destination=mock/scorer.go -package=mock + +import ( + "context" + + "github.com/uber/submitqueue/entity" +) + +// Scorer computes a success probability score for a change based on its characteristics. +type Scorer interface { + // Score returns a probability between 0.0 and 1.0 indicating the likelihood + // of a successful land for the given change. + Score(ctx context.Context, change entity.Change) (float64, error) +}