Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions core/speculation/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
load("@rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "speculation",
srcs = ["speculation.go"],
importpath = "github.com/uber/submitqueue/core/speculation",
visibility = ["//visibility:public"],
deps = ["//entity"],
)

go_test(
name = "speculation_test",
srcs = ["speculation_test.go"],
embed = [":speculation"],
deps = [
"//entity",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
],
)
107 changes: 107 additions & 0 deletions core/speculation/speculation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package speculation

import (
"fmt"
"math/bits"
"sort"

"github.com/uber/submitqueue/entity"
)

// MaxDependencies is the maximum number of predecessor dependencies allowed
// when generating a speculation tree.
//
// Speculation produces 2^N paths (the power set of predecessors). Growth is
// exponential, so a cap is required to prevent memory exhaustion:
//
// N=10 → 1,024 paths (~100 KB)
// N=15 → 32,768 paths (~3 MB)
// N=20 → 1,048,576 paths (~100 MB)
// N=25 → 33,554,432 paths (~3 GB) — OOM on most machines
//
// The current implementation uses bitmask iteration (one int per subset),
// which limits N to 62 on 64-bit systems before the bit shift overflows.
// An iterative or recursive approach would remove the bitmask ceiling, but
// the exponential memory growth remains the binding constraint regardless
// of algorithm — 2^N paths must all be held in memory.
//
// In practice, a submit queue batch rarely has more than a handful of
// predecessors. A limit of 10 (1,024 paths) is generous for real workloads.
const MaxDependencies = 10

// GenerateTree takes the current batch ID and its ordered dependencies (sorted by
// arrival time), and generates a SpeculationTree for the current batch containing
// all possible speculation paths (power set of predecessors).
//
// Each path represents a possible future: a subset of predecessors that succeed
// (forming the base) with the current batch as the head being tested.
//
// For N dependencies, this produces 2^N speculation paths.
// Paths are sorted most-optimistic first (most predecessors included) to
// least-optimistic (fewest predecessors included).
// All paths are initialized with Action = SpeculationPathActionSchedule.
//
// Returns an error if len(dependencyIDs) exceeds MaxDependencies.
func GenerateTree(currentID string, dependencyIDs []string) (entity.SpeculationTree, error) {
if len(dependencyIDs) > MaxDependencies {
return entity.SpeculationTree{}, fmt.Errorf(
"dependency count %d exceeds maximum %d", len(dependencyIDs), MaxDependencies,
)
}

// Defensive copy to avoid mutation of caller's slice.
deps := make([]string, len(dependencyIDs))
copy(deps, dependencyIDs)

n := len(deps)

// We enumerate every subset of dependencies using bitmask iteration.
// An N-bit integer has 2^N possible values (0 to 2^N-1), and each value
// represents a unique subset: bit i being set means dependency i is
// included in the base (assumed to have succeeded).
//
// Example with deps = [B1, B2, B3]:
// mask=0 (000) → base=[] — all predecessors failed
// mask=1 (001) → base=[B1] — only B1 succeeded
// mask=2 (010) → base=[B2] — only B2 succeeded
// mask=3 (011) → base=[B1, B2] — B1 and B2 succeeded
// mask=4 (100) → base=[B3] — only B3 succeeded
// mask=5 (101) → base=[B1, B3] — B1 and B3 succeeded
// mask=6 (110) → base=[B2, B3] — B2 and B3 succeeded
// mask=7 (111) → base=[B1, B2, B3] — all predecessors succeeded
//
// Because we iterate bit positions low-to-high (i=0,1,...,N-1), included
// dependencies are appended in their original arrival order.
totalPaths := 1 << n // 2^N
speculations := make([]entity.SpeculationInfo, 0, totalPaths)

for mask := range totalPaths {
// bits.OnesCount gives the number of set bits, which equals the
// number of dependencies included in this subset.
base := make([]string, 0, bits.OnesCount(uint(mask)))
for i := range n {
if mask&(1<<i) != 0 {
base = append(base, deps[i])
}
}

speculations = append(speculations, entity.SpeculationInfo{
Path: entity.SpeculationPath{
Base: base,
Head: currentID,
},
Action: entity.SpeculationPathActionSchedule,
Score: 0,
})
}

// Sort most-optimistic first: more predecessors included = higher optimism.
sort.SliceStable(speculations, func(i, j int) bool {
return len(speculations[i].Path.Base) > len(speculations[j].Path.Base)
})

return entity.SpeculationTree{
BatchID: currentID,
Speculations: speculations,
}, nil
}
161 changes: 161 additions & 0 deletions core/speculation/speculation_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package speculation

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/uber/submitqueue/entity"
)

func TestGenerateTree(t *testing.T) {
tests := []struct {
name string
currentID string
dependencyIDs []string
wantPaths []entity.SpeculationPath
}{
{
name: "no dependencies produces single path",
currentID: "B1",
dependencyIDs: nil,
wantPaths: []entity.SpeculationPath{
{Base: []string{}, Head: "B1"},
},
},
{
name: "one dependency produces two paths",
currentID: "B2",
dependencyIDs: []string{"B1"},
wantPaths: []entity.SpeculationPath{
{Base: []string{"B1"}, Head: "B2"}, // optimistic: B1 succeeded
{Base: []string{}, Head: "B2"}, // pessimistic: B1 failed
},
},
{
name: "two dependencies produces four paths",
currentID: "B3",
dependencyIDs: []string{"B1", "B2"},
wantPaths: []entity.SpeculationPath{
{Base: []string{"B1", "B2"}, Head: "B3"}, // both succeeded
{Base: []string{"B1"}, Head: "B3"}, // only B1 succeeded
{Base: []string{"B2"}, Head: "B3"}, // only B2 succeeded
{Base: []string{}, Head: "B3"}, // all failed
},
},
{
name: "three dependencies produces eight paths matching ERD example",
currentID: "B4",
dependencyIDs: []string{"B1", "B2", "B3"},
wantPaths: []entity.SpeculationPath{
{Base: []string{"B1", "B2", "B3"}, Head: "B4"}, // all succeeded
{Base: []string{"B1", "B2"}, Head: "B4"}, // B1, B2 succeeded
{Base: []string{"B1", "B3"}, Head: "B4"}, // B1, B3 succeeded
{Base: []string{"B2", "B3"}, Head: "B4"}, // B2, B3 succeeded
{Base: []string{"B1"}, Head: "B4"}, // only B1 succeeded
{Base: []string{"B2"}, Head: "B4"}, // only B2 succeeded
{Base: []string{"B3"}, Head: "B4"}, // only B3 succeeded
{Base: []string{}, Head: "B4"}, // all failed
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tree, err := GenerateTree(tt.currentID, tt.dependencyIDs)
require.NoError(t, err)

assert.Equal(t, tt.currentID, tree.BatchID)
require.Len(t, tree.Speculations, len(tt.wantPaths))

for i, spec := range tree.Speculations {
assert.Equal(t, tt.wantPaths[i], spec.Path, "path at index %d", i)
}
})
}
}

func TestGenerateTree_Ordering(t *testing.T) {
tree, err := GenerateTree("B5", []string{"B1", "B2", "B3", "B4"})
require.NoError(t, err)

require.Len(t, tree.Speculations, 16)

// Verify ordering: base length must be non-increasing (most optimistic first).
for i := 1; i < len(tree.Speculations); i++ {
assert.GreaterOrEqual(t, len(tree.Speculations[i-1].Path.Base), len(tree.Speculations[i].Path.Base),
"path at index %d should have >= base length than path at index %d", i-1, i)
}
}

func TestGenerateTree_AllActionsSchedule(t *testing.T) {
tree, err := GenerateTree("B3", []string{"B1", "B2"})
require.NoError(t, err)

for i, spec := range tree.Speculations {
assert.Equal(t, entity.SpeculationPathActionSchedule, spec.Action,
"action at index %d", i)
}
}

func TestGenerateTree_AllScoresZero(t *testing.T) {
tree, err := GenerateTree("B3", []string{"B1", "B2"})
require.NoError(t, err)

for i, spec := range tree.Speculations {
assert.Equal(t, float32(0), spec.Score, "score at index %d", i)
}
}

func TestGenerateTree_InputImmutability(t *testing.T) {
deps := []string{"B1", "B2", "B3"}
original := make([]string, len(deps))
copy(original, deps)

_, err := GenerateTree("B4", deps)
require.NoError(t, err)

assert.Equal(t, original, deps, "input dependency slice should not be mutated")
}

func TestGenerateTree_EmptyDependencySlice(t *testing.T) {
tree, err := GenerateTree("B1", []string{})
require.NoError(t, err)

require.Len(t, tree.Speculations, 1)
assert.Equal(t, entity.SpeculationPath{Base: []string{}, Head: "B1"}, tree.Speculations[0].Path)
}

func TestGenerateTree_HeadAlwaysCurrentID(t *testing.T) {
tree, err := GenerateTree("B3", []string{"B1", "B2"})
require.NoError(t, err)

for i, spec := range tree.Speculations {
assert.Equal(t, "B3", spec.Path.Head, "head at index %d", i)
}
}

func TestGenerateTree_ExceedsMaxDependencies(t *testing.T) {
deps := make([]string, MaxDependencies+1)
for i := range deps {
deps[i] = fmt.Sprintf("B%d", i+1)
}

_, err := GenerateTree("current", deps)

require.Error(t, err)
assert.Contains(t, err.Error(), "exceeds maximum")
}

func TestGenerateTree_AtMaxDependencies(t *testing.T) {
deps := make([]string, MaxDependencies)
for i := range deps {
deps[i] = fmt.Sprintf("B%d", i+1)
}

tree, err := GenerateTree("current", deps)

require.NoError(t, err)
assert.Len(t, tree.Speculations, 1<<MaxDependencies)
}
8 changes: 1 addition & 7 deletions entity/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,6 @@ func (s BuildStatus) IsTerminal() bool {
}


// SpeculationPathInfo represents the base and head commits of a speculation path used in a build.
type SpeculationPathInfo struct {
// Base is a list of batchIDs(in order) that form the base of this speculation path.
Base []string
}

// Build represents a build scheduled for a batch along a specific speculation path.
// All fields except the Status are immutable after creation.
type Build struct {
Expand All @@ -55,7 +49,7 @@ type Build struct {
// SpeculationPath is the speculation path that represents this build. For
// a given batch this path is crafted from the graph that is generated from the
// dependencies of this batch.
SpeculationPath SpeculationPathInfo
SpeculationPath SpeculationPath
// Score represents the build prediction score for this speculation path.
Score float32
// Status represents the state of the build lifecycle this build is in.
Expand Down
27 changes: 21 additions & 6 deletions entity/speculation_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,28 @@ type SpeculationPathAction string
const (
// SpeculationPathActionUnknown is the default zero value for SpeculationPathAction.
SpeculationPathActionUnknown SpeculationPathAction = ""
// TODO: Add comprehensive list of actions
// SpeculationPathActionSchedule indicates a build should be scheduled for this path.
SpeculationPathActionSchedule SpeculationPathAction = "schedule"
// SpeculationPathActionCancel indicates this path should be cancelled or not built.
SpeculationPathActionCancel SpeculationPathAction = "cancel"
// SpeculationPathActionLand indicates this path succeeded and should proceed to merge.
SpeculationPathActionLand SpeculationPathAction = "land"
)

// SpeculationPath represents a speculation path through the dependency graph.
// Base contains the predecessor batch IDs assumed to have succeeded (in arrival order),
// and Head is the batch being tested.
type SpeculationPath struct {
// Base is the ordered list of predecessor batch IDs assumed to have succeeded.
Base []string
// Head is the batch ID being tested along this path.
Head string
}

// SpeculationInfo represents metadata about a single speculation path, including the path through the dependency graph, its current state, and the predicted build score.
type SpeculationInfo struct {
// Path represents the speculation path; which is an ordered list of batches.
Path []string
// Path represents the speculation path as a base/head pair.
Path SpeculationPath
// Action is a state that this path is in.
Action SpeculationPathAction
// Score is score for this speculation path.
Expand All @@ -29,9 +44,9 @@ type SpeculationTree struct {
// For e.g - Consider batches - queueA/batch/1, queueA/batch/2, queueA/batch/3
// such that - queueA/batch/2 and queueA/batch/3 depend on queueA/batch/1
//
// Speculations for queueA/batch/1 - [{Path: []string{"queueA/batch/1"}, State: "scheduled", Score: 0.1}]
// Speculations for queueA/batch/2 - [{Path: []string{"queueA/batch/2"}, State: "scheduled", Score: 0.9}, {Path: []string{"queueA/batch/1", "queueA/batch/2"}, State: "scheduled", Score: 0.3}]
// Speculations for queueA/batch/3 - [{Path: []string{"queueA/batch/3"}, State: "scheduled", Score: 0.9}, {Path: []string{"queueA/batch/1", "queueA/batch/3"}, State: "scheduled", Score: 0.3}]
// Speculations for queueA/batch/1 - [{Path: {Base: [], Head: "queueA/batch/1"}, Action: "scheduled", Score: 0.1}]
// Speculations for queueA/batch/2 - [{Path: {Base: [], Head: "queueA/batch/2"}, Action: "scheduled", Score: 0.9}, {Path: {Base: ["queueA/batch/1"], Head: "queueA/batch/2"}, Action: "scheduled", Score: 0.3}]
// Speculations for queueA/batch/3 - [{Path: {Base: [], Head: "queueA/batch/3"}, Action: "scheduled", Score: 0.9}, {Path: {Base: ["queueA/batch/1"], Head: "queueA/batch/3"}, Action: "scheduled", Score: 0.3}]
//
Speculations []SpeculationInfo
}
1 change: 1 addition & 0 deletions orchestrator/controller/speculate/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//core/consumer",
"//core/speculation",
"//entity",
"//entity/queue",
"@com_github_uber_go_tally_v4//:tally",
Expand Down
Loading