uber · behinddwalls · Feb 25, 2026
@@ -0,0 +1,20 @@
+load("@rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "speculation",
+    srcs = ["speculation.go"],
+    importpath = "github.com/uber/submitqueue/core/speculation",
+    visibility = ["//visibility:public"],
+    deps = ["//entity"],
+)
+
+go_test(
+    name = "speculation_test",
+    srcs = ["speculation_test.go"],
+    embed = [":speculation"],
+    deps = [
+        "//entity",
+        "@com_github_stretchr_testify//assert",
+        "@com_github_stretchr_testify//require",
+    ],
+)
@@ -0,0 +1,107 @@
+package speculation
+
+import (
+	"fmt"
+	"math/bits"
+	"sort"
+
+	"github.com/uber/submitqueue/entity"
+)
+
+// MaxDependencies is the maximum number of predecessor dependencies allowed
+// when generating a speculation tree.
+//
+// Speculation produces 2^N paths (the power set of predecessors). Growth is
+// exponential, so a cap is required to prevent memory exhaustion:
+//
+//	N=10  →      1,024 paths  (~100 KB)
+//	N=15  →     32,768 paths  (~3 MB)
+//	N=20  →  1,048,576 paths  (~100 MB)
+//	N=25  → 33,554,432 paths  (~3 GB)   — OOM on most machines
+//
+// The current implementation uses bitmask iteration (one int per subset),
+// which limits N to 62 on 64-bit systems before the bit shift overflows.
+// An iterative or recursive approach would remove the bitmask ceiling, but
+// the exponential memory growth remains the binding constraint regardless
+// of algorithm — 2^N paths must all be held in memory.
+//
+// In practice, a submit queue batch rarely has more than a handful of
+// predecessors. A limit of 10 (1,024 paths) is generous for real workloads.
+const MaxDependencies = 10
+
+// GenerateTree takes the current batch ID and its ordered dependencies (sorted by
+// arrival time), and generates a SpeculationTree for the current batch containing
+// all possible speculation paths (power set of predecessors).
+//
+// Each path represents a possible future: a subset of predecessors that succeed
+// (forming the base) with the current batch as the head being tested.
+//
+// For N dependencies, this produces 2^N speculation paths.
+// Paths are sorted most-optimistic first (most predecessors included) to
+// least-optimistic (fewest predecessors included).
+// All paths are initialized with Action = SpeculationPathActionSchedule.
+//
+// Returns an error if len(dependencyIDs) exceeds MaxDependencies.
+func GenerateTree(currentID string, dependencyIDs []string) (entity.SpeculationTree, error) {
+	if len(dependencyIDs) > MaxDependencies {
+		return entity.SpeculationTree{}, fmt.Errorf(
+			"dependency count %d exceeds maximum %d", len(dependencyIDs), MaxDependencies,
+		)
+	}
+
+	// Defensive copy to avoid mutation of caller's slice.
+	deps := make([]string, len(dependencyIDs))
+	copy(deps, dependencyIDs)
+
+	n := len(deps)
+
+	// We enumerate every subset of dependencies using bitmask iteration.
+	// An N-bit integer has 2^N possible values (0 to 2^N-1), and each value
+	// represents a unique subset: bit i being set means dependency i is
+	// included in the base (assumed to have succeeded).
+	//
+	// Example with deps = [B1, B2, B3]:
+	//   mask=0 (000) → base=[]              — all predecessors failed
+	//   mask=1 (001) → base=[B1]            — only B1 succeeded
+	//   mask=2 (010) → base=[B2]            — only B2 succeeded
+	//   mask=3 (011) → base=[B1, B2]        — B1 and B2 succeeded
+	//   mask=4 (100) → base=[B3]            — only B3 succeeded
+	//   mask=5 (101) → base=[B1, B3]        — B1 and B3 succeeded
+	//   mask=6 (110) → base=[B2, B3]        — B2 and B3 succeeded
+	//   mask=7 (111) → base=[B1, B2, B3]    — all predecessors succeeded
+	//
+	// Because we iterate bit positions low-to-high (i=0,1,...,N-1), included
+	// dependencies are appended in their original arrival order.
+	totalPaths := 1 << n // 2^N
+	speculations := make([]entity.SpeculationInfo, 0, totalPaths)
+
+	for mask := range totalPaths {
+		// bits.OnesCount gives the number of set bits, which equals the
+		// number of dependencies included in this subset.
+		base := make([]string, 0, bits.OnesCount(uint(mask)))
+		for i := range n {
+			if mask&(1<<i) != 0 {
+				base = append(base, deps[i])
+			}
+		}
+
+		speculations = append(speculations, entity.SpeculationInfo{
+			Path: entity.SpeculationPath{
+				Base: base,
+				Head: currentID,
+			},
+			Action: entity.SpeculationPathActionSchedule,
+			Score:  0,
+		})
+	}
+
+	// Sort most-optimistic first: more predecessors included = higher optimism.
+	sort.SliceStable(speculations, func(i, j int) bool {
+		return len(speculations[i].Path.Base) > len(speculations[j].Path.Base)
+	})
+
+	return entity.SpeculationTree{
+		BatchID:      currentID,
+		Speculations: speculations,
+	}, nil
+}
@@ -0,0 +1,161 @@
+package speculation
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/uber/submitqueue/entity"
+)
+
+func TestGenerateTree(t *testing.T) {
+	tests := []struct {
+		name          string
+		currentID     string
+		dependencyIDs []string
+		wantPaths     []entity.SpeculationPath
+	}{
+		{
+			name:          "no dependencies produces single path",
+			currentID:     "B1",
+			dependencyIDs: nil,
+			wantPaths: []entity.SpeculationPath{
+				{Base: []string{}, Head: "B1"},
+			},
+		},
+		{
+			name:          "one dependency produces two paths",
+			currentID:     "B2",
+			dependencyIDs: []string{"B1"},
+			wantPaths: []entity.SpeculationPath{
+				{Base: []string{"B1"}, Head: "B2"}, // optimistic: B1 succeeded
+				{Base: []string{}, Head: "B2"},       // pessimistic: B1 failed
+			},
+		},
+		{
+			name:          "two dependencies produces four paths",
+			currentID:     "B3",
+			dependencyIDs: []string{"B1", "B2"},
+			wantPaths: []entity.SpeculationPath{
+				{Base: []string{"B1", "B2"}, Head: "B3"}, // both succeeded
+				{Base: []string{"B1"}, Head: "B3"},        // only B1 succeeded
+				{Base: []string{"B2"}, Head: "B3"},        // only B2 succeeded
+				{Base: []string{}, Head: "B3"},              // all failed
+			},
+		},
+		{
+			name:          "three dependencies produces eight paths matching ERD example",
+			currentID:     "B4",
+			dependencyIDs: []string{"B1", "B2", "B3"},
+			wantPaths: []entity.SpeculationPath{
+				{Base: []string{"B1", "B2", "B3"}, Head: "B4"}, // all succeeded
+				{Base: []string{"B1", "B2"}, Head: "B4"},        // B1, B2 succeeded
+				{Base: []string{"B1", "B3"}, Head: "B4"},        // B1, B3 succeeded
+				{Base: []string{"B2", "B3"}, Head: "B4"},        // B2, B3 succeeded
+				{Base: []string{"B1"}, Head: "B4"},               // only B1 succeeded
+				{Base: []string{"B2"}, Head: "B4"},               // only B2 succeeded
+				{Base: []string{"B3"}, Head: "B4"},               // only B3 succeeded
+				{Base: []string{}, Head: "B4"},                     // all failed
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tree, err := GenerateTree(tt.currentID, tt.dependencyIDs)
+			require.NoError(t, err)
+
+			assert.Equal(t, tt.currentID, tree.BatchID)
+			require.Len(t, tree.Speculations, len(tt.wantPaths))
+
+			for i, spec := range tree.Speculations {
+				assert.Equal(t, tt.wantPaths[i], spec.Path, "path at index %d", i)
+			}
+		})
+	}
+}
+
+func TestGenerateTree_Ordering(t *testing.T) {
+	tree, err := GenerateTree("B5", []string{"B1", "B2", "B3", "B4"})
+	require.NoError(t, err)
+
+	require.Len(t, tree.Speculations, 16)
+
+	// Verify ordering: base length must be non-increasing (most optimistic first).
+	for i := 1; i < len(tree.Speculations); i++ {
+		assert.GreaterOrEqual(t, len(tree.Speculations[i-1].Path.Base), len(tree.Speculations[i].Path.Base),
+			"path at index %d should have >= base length than path at index %d", i-1, i)
+	}
+}
+
+func TestGenerateTree_AllActionsSchedule(t *testing.T) {
+	tree, err := GenerateTree("B3", []string{"B1", "B2"})
+	require.NoError(t, err)
+
+	for i, spec := range tree.Speculations {
+		assert.Equal(t, entity.SpeculationPathActionSchedule, spec.Action,
+			"action at index %d", i)
+	}
+}
+
+func TestGenerateTree_AllScoresZero(t *testing.T) {
+	tree, err := GenerateTree("B3", []string{"B1", "B2"})
+	require.NoError(t, err)
+
+	for i, spec := range tree.Speculations {
+		assert.Equal(t, float32(0), spec.Score, "score at index %d", i)
+	}
+}
+
+func TestGenerateTree_InputImmutability(t *testing.T) {
+	deps := []string{"B1", "B2", "B3"}
+	original := make([]string, len(deps))
+	copy(original, deps)
+
+	_, err := GenerateTree("B4", deps)
+	require.NoError(t, err)
+
+	assert.Equal(t, original, deps, "input dependency slice should not be mutated")
+}
+
+func TestGenerateTree_EmptyDependencySlice(t *testing.T) {
+	tree, err := GenerateTree("B1", []string{})
+	require.NoError(t, err)
+
+	require.Len(t, tree.Speculations, 1)
+	assert.Equal(t, entity.SpeculationPath{Base: []string{}, Head: "B1"}, tree.Speculations[0].Path)
+}
+
+func TestGenerateTree_HeadAlwaysCurrentID(t *testing.T) {
+	tree, err := GenerateTree("B3", []string{"B1", "B2"})
+	require.NoError(t, err)
+
+	for i, spec := range tree.Speculations {
+		assert.Equal(t, "B3", spec.Path.Head, "head at index %d", i)
+	}
+}
+
+func TestGenerateTree_ExceedsMaxDependencies(t *testing.T) {
+	deps := make([]string, MaxDependencies+1)
+	for i := range deps {
+		deps[i] = fmt.Sprintf("B%d", i+1)
+	}
+
+	_, err := GenerateTree("current", deps)
+
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "exceeds maximum")
+}
+
+func TestGenerateTree_AtMaxDependencies(t *testing.T) {
+	deps := make([]string, MaxDependencies)
+	for i := range deps {
+		deps[i] = fmt.Sprintf("B%d", i+1)
+	}
+
+	tree, err := GenerateTree("current", deps)
+
+	require.NoError(t, err)
+	assert.Len(t, tree.Speculations, 1<<MaxDependencies)
+}
@@ -38,12 +38,6 @@ func (s BuildStatus) IsTerminal() bool {
 }
 
 
-// SpeculationPathInfo represents the base and head commits of a speculation path used in a build.
-type SpeculationPathInfo struct {
-	// Base is a list of batchIDs(in order) that form the base of this speculation path.
-	Base []string
-}
-
 // Build represents a build scheduled for a batch along a specific speculation path.
 // All fields except the Status are immutable after creation.
 type Build struct {
@@ -55,7 +49,7 @@ type Build struct {
 	// SpeculationPath is the speculation path that represents this build. For
 	// a given batch this path is crafted from the graph that is generated from the
 	// dependencies of this batch.
-	SpeculationPath SpeculationPathInfo
+	SpeculationPath SpeculationPath
 	// Score represents the build prediction score for this speculation path.
 	Score float32
 	// Status represents the state of the build lifecycle this build is in.

@@ -6,13 +6,28 @@ type SpeculationPathAction string
 const (
 	// SpeculationPathActionUnknown is the default zero value for SpeculationPathAction.
 	SpeculationPathActionUnknown SpeculationPathAction = ""
-	// TODO: Add comprehensive list of actions
+	// SpeculationPathActionSchedule indicates a build should be scheduled for this path.
+	SpeculationPathActionSchedule SpeculationPathAction = "schedule"
+	// SpeculationPathActionCancel indicates this path should be cancelled or not built.
+	SpeculationPathActionCancel SpeculationPathAction = "cancel"
+	// SpeculationPathActionLand indicates this path succeeded and should proceed to merge.
+	SpeculationPathActionLand SpeculationPathAction = "land"
 )
 
+// SpeculationPath represents a speculation path through the dependency graph.
+// Base contains the predecessor batch IDs assumed to have succeeded (in arrival order),
+// and Head is the batch being tested.
+type SpeculationPath struct {
+	// Base is the ordered list of predecessor batch IDs assumed to have succeeded.
+	Base []string
+	// Head is the batch ID being tested along this path.
+	Head string
+}
+
 // SpeculationInfo represents metadata about a single speculation path, including the path through the dependency graph, its current state, and the predicted build score.
 type SpeculationInfo struct {
-	// Path represents the speculation path; which is an ordered list of batches.
-	Path []string
+	// Path represents the speculation path as a base/head pair.
+	Path SpeculationPath
 	// Action is a state that this path is in.
 	Action SpeculationPathAction
 	// Score is score for this speculation path.
@@ -29,9 +44,9 @@ type SpeculationTree struct {
 	// For e.g - Consider batches - queueA/batch/1, queueA/batch/2, queueA/batch/3
 	// such that - queueA/batch/2 and queueA/batch/3 depend on queueA/batch/1
 	//
-	// Speculations for queueA/batch/1 - [{Path: []string{"queueA/batch/1"}, State: "scheduled", Score: 0.1}]
-	// Speculations for queueA/batch/2 - [{Path: []string{"queueA/batch/2"}, State: "scheduled", Score: 0.9}, {Path: []string{"queueA/batch/1", "queueA/batch/2"}, State: "scheduled", Score: 0.3}]
-	// Speculations for queueA/batch/3 - [{Path: []string{"queueA/batch/3"}, State: "scheduled", Score: 0.9}, {Path: []string{"queueA/batch/1", "queueA/batch/3"}, State: "scheduled", Score: 0.3}]
+	// Speculations for queueA/batch/1 - [{Path: {Base: [], Head: "queueA/batch/1"}, Action: "scheduled", Score: 0.1}]
+	// Speculations for queueA/batch/2 - [{Path: {Base: [], Head: "queueA/batch/2"}, Action: "scheduled", Score: 0.9}, {Path: {Base: ["queueA/batch/1"], Head: "queueA/batch/2"}, Action: "scheduled", Score: 0.3}]
+	// Speculations for queueA/batch/3 - [{Path: {Base: [], Head: "queueA/batch/3"}, Action: "scheduled", Score: 0.9}, {Path: {Base: ["queueA/batch/1"], Head: "queueA/batch/3"}, Action: "scheduled", Score: 0.3}]
 	//
 	Speculations []SpeculationInfo
 }
@@ -7,6 +7,7 @@ go_library(
     visibility = ["//visibility:public"],
     deps = [
         "//core/consumer",
+        "//core/speculation",
         "//entity",
         "//entity/queue",
         "@com_github_uber_go_tally_v4//:tally",