diff --git a/core/itg/graph/BUILD.bazel b/core/itg/graph/BUILD.bazel new file mode 100644 index 0000000..a9281f7 --- /dev/null +++ b/core/itg/graph/BUILD.bazel @@ -0,0 +1,33 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "graph", + srcs = [ + "graph.go", + "invalidate.go", + "update.go", + ], + importpath = "github.com/uber/tango/core/itg/graph", + visibility = ["//visibility:public"], + deps = [ + "//core/targethasher", + "@com_github_bazelbuild_buildtools//build_proto", + "@com_github_deckarep_golang_set_v2//:golang-set", + ], +) + +go_test( + name = "graph_test", + srcs = [ + "graph_test.go", + "invalidate_test.go", + "update_test.go", + ], + embed = [":graph"], + deps = [ + "//core/targethasher", + "@com_github_bazelbuild_buildtools//build_proto", + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/core/itg/graph/graph.go b/core/itg/graph/graph.go new file mode 100644 index 0000000..66c2436 --- /dev/null +++ b/core/itg/graph/graph.go @@ -0,0 +1,339 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "log" + "maps" + "slices" + + buildpb "github.com/bazelbuild/buildtools/build_proto" + "github.com/uber/tango/core/targethasher" +) + +// IntSet is a set of integers. +type IntSet map[int]struct{} + +// NewIntSet creates a new, empty IntSet. +func NewIntSet() IntSet { return make(IntSet) } + +// Insert adds i to the set. +func (s IntSet) Insert(i int) { s[i] = struct{}{} } + +// Delete removes i from the set. +func (s IntSet) Delete(i int) { delete(s, i) } + +// Contains reports whether i is in the set. +func (s IntSet) Contains(i int) bool { _, ok := s[i]; return ok } + +// UnsortedList returns the elements of the set as an unsorted slice. +func (s IntSet) UnsortedList() []int { + result := make([]int, 0, len(s)) + for k := range s { + result = append(result, k) + } + return result +} + +// Copy returns a shallow copy of the set. +func (s IntSet) Copy() IntSet { + c := make(IntSet, len(s)) + for k := range s { + c[k] = struct{}{} + } + return c +} + +// StringSet is a set of strings. +type StringSet map[string]struct{} + +// NewStringSet creates a StringSet from the given values. +func NewStringSet(vals ...string) StringSet { + s := make(StringSet, len(vals)) + for _, v := range vals { + s[v] = struct{}{} + } + return s +} + +// Insert adds v to the set. +func (s StringSet) Insert(v string) { s[v] = struct{}{} } + +// Contains reports whether v is in the set. +func (s StringSet) Contains(v string) bool { _, ok := s[v]; return ok } + +// UnsortedList returns the elements of the set as an unsorted slice. +func (s StringSet) UnsortedList() []string { + result := make([]string, 0, len(s)) + for k := range s { + result = append(result, k) + } + return result +} + +// OptimizedTarget is a representation of a Target that is optimized for storage. +type OptimizedTarget struct { + ID int `json:"id"` + Hash []byte `json:"hash"` + HashWithoutDeps []byte `json:"hashWithoutDeps"` + RuleType int `json:"ruleTypeID"` + Deps IntSet `json:"deps"` + ReverseDeps IntSet `json:"reverseDeps"` + Tags []int `json:"tagIDs"` + Root bool `json:"root"` + External bool `json:"external"` + Attributes map[int]int `json:"attributes"` +} + +// OptimizedGraph is a representation of a dependency graph that is optimized for storage. +type OptimizedGraph struct { + OptimizedTargets map[int]*OptimizedTarget `json:"optimizedTargets"` + ExternalRuleTargets map[int]*OptimizedTarget `json:"externalRuleTargets"` + + NextTargetID int `json:"nextTargetID"` + TargetNameToID map[string]int `json:"targetNameToID"` + TargetIDToString map[int]string `json:"targetIDToString"` + RuleTypeToID map[string]int `json:"ruleTypeToID"` + RuleTypeIDToString map[int]string `json:"ruleTypeIDToString"` + TagToID map[string]int `json:"tagToID"` + TagIDToString map[int]string `json:"tagIDToString"` + AttrNameToID map[string]int `json:"attrNameToID"` + AttrNameIDToString map[int]string `json:"attrNameIDToString"` + AttrValueToID map[string]int `json:"attrValueToID"` + AttrValueIDToString map[int]string `json:"attrValueIDToString"` +} + +// Copy makes a deep copy of OptimizedTarget. +func (t *OptimizedTarget) Copy() *OptimizedTarget { + return &OptimizedTarget{ + ID: t.ID, + Hash: slices.Clone(t.Hash), + HashWithoutDeps: slices.Clone(t.HashWithoutDeps), + RuleType: t.RuleType, + Deps: t.Deps.Copy(), + ReverseDeps: t.ReverseDeps.Copy(), + Tags: slices.Clone(t.Tags), + Root: t.Root, + External: t.External, + Attributes: maps.Clone(t.Attributes), + } +} + +// OptimizeGraph converts a map of Targets into an OptimizedGraph. +func OptimizeGraph(targets map[string]*targethasher.Target) *OptimizedGraph { + g := &OptimizedGraph{ + OptimizedTargets: make(map[int]*OptimizedTarget, len(targets)), + ExternalRuleTargets: make(map[int]*OptimizedTarget, len(targets)), + NextTargetID: 0, + TargetNameToID: make(map[string]int, len(targets)), + TargetIDToString: make(map[int]string, len(targets)), + RuleTypeToID: make(map[string]int), + RuleTypeIDToString: make(map[int]string), + TagToID: make(map[string]int), + TagIDToString: make(map[int]string), + AttrNameToID: make(map[string]int), + AttrNameIDToString: make(map[int]string), + AttrValueToID: make(map[string]int), + AttrValueIDToString: make(map[int]string), + } + for _, target := range targets { + g.AddTarget(target) + } + + for id, target := range g.OptimizedTargets { + for depID := range target.Deps { + child, ok := g.OptimizedTargets[depID] + if !ok { + continue + } + child.ReverseDeps.Insert(id) + } + } + + return g +} + +// Copy makes a deep copy of the OptimizedGraph. +func (g *OptimizedGraph) Copy() *OptimizedGraph { + optimizedTargetsCopy := make(map[int]*OptimizedTarget, len(g.OptimizedTargets)) + for id, target := range g.OptimizedTargets { + optimizedTargetsCopy[id] = target.Copy() + } + + externalRuleTargetsCopy := make(map[int]*OptimizedTarget, len(g.ExternalRuleTargets)) + for id, target := range g.ExternalRuleTargets { + externalRuleTargetsCopy[id] = target.Copy() + } + + return &OptimizedGraph{ + OptimizedTargets: optimizedTargetsCopy, + ExternalRuleTargets: externalRuleTargetsCopy, + NextTargetID: g.NextTargetID, + TargetNameToID: maps.Clone(g.TargetNameToID), + TargetIDToString: maps.Clone(g.TargetIDToString), + RuleTypeToID: maps.Clone(g.RuleTypeToID), + RuleTypeIDToString: maps.Clone(g.RuleTypeIDToString), + TagToID: maps.Clone(g.TagToID), + TagIDToString: maps.Clone(g.TagIDToString), + AttrNameToID: maps.Clone(g.AttrNameToID), + AttrNameIDToString: maps.Clone(g.AttrNameIDToString), + AttrValueToID: maps.Clone(g.AttrValueToID), + AttrValueIDToString: maps.Clone(g.AttrValueIDToString), + } +} + +// AddTarget adds a Target to the graph. +func (g *OptimizedGraph) AddTarget(target *targethasher.Target) { + id := g.getOrGenerateTargetID(target.Name) + depIDs := NewIntSet() + for _, dep := range target.Deps { + depIDs.Insert(g.getOrGenerateTargetID(dep)) + } + optimizedTarget := &OptimizedTarget{ + ID: id, + Hash: target.Hash, + HashWithoutDeps: target.HashWithoutDeps, + External: target.External, + RuleType: getOrGenerateRecordReverse(target.RuleType, g.RuleTypeToID, g.RuleTypeIDToString), + Deps: depIDs, + ReverseDeps: NewIntSet(), + } + + isExternalRuleTarget := target.RuleType == targethasher.ExternalRuleType + if !isExternalRuleTarget { + tagIDs := make([]int, len(target.Tags)) + for i, tag := range target.Tags { + tagIDs[i] = getOrGenerateRecordReverse(tag, g.TagToID, g.TagIDToString) + } + + attributes := make(map[int]int, len(target.Attributes)) + for _, attr := range target.Attributes { + attrNameID := getOrGenerateRecordReverse(attr.GetName(), g.AttrNameToID, g.AttrNameIDToString) + attrValueID := getOrGenerateRecordReverse(attr.GetStringValue(), g.AttrValueToID, g.AttrValueIDToString) + attributes[attrNameID] = attrValueID + } + optimizedTarget.HashWithoutDeps = target.HashWithoutDeps + optimizedTarget.Tags = tagIDs + optimizedTarget.Root = target.Root + optimizedTarget.External = target.External + optimizedTarget.Attributes = attributes + } + + g.OptimizedTargets[id] = optimizedTarget + if isExternalRuleTarget { + g.ExternalRuleTargets[id] = optimizedTarget + } +} + +// GetReverseDepsAsTargets returns the reverse dependencies of the given targets. +func (g *OptimizedGraph) GetReverseDepsAsTargets(targetNames []string) []targethasher.Target { + reverseDeps := make([]targethasher.Target, 0, 2*len(targetNames)) + visited := NewIntSet() + reverseDepIDs := make([]int, 0, len(targetNames)) + for _, targetName := range targetNames { + targetID, ok := g.TargetNameToID[targetName] + if !ok { + log.Printf("starting target %s is not in the graph", targetName) + continue + } + reverseDepIDs = g.getNewReverseDeps(targetID, reverseDepIDs, visited) + } + for _, reverseDepID := range reverseDepIDs { + reverseDeps = append(reverseDeps, g.OptimizedTargetToTarget(reverseDepID)) + } + return reverseDeps +} + +// OptimizedTargetToTarget converts an OptimizedTarget back to a Target. +func (g *OptimizedGraph) OptimizedTargetToTarget(targetID int) targethasher.Target { + name, ok := g.TargetIDToString[targetID] + if !ok { + return targethasher.Target{} + } + + optimizedTarget, ok := g.OptimizedTargets[targetID] + if !ok { + return targethasher.Target{Name: name} + } + + target := targethasher.Target{ + Name: name, + Hash: optimizedTarget.Hash, + HashWithoutDeps: optimizedTarget.HashWithoutDeps, + RuleType: g.RuleTypeIDToString[optimizedTarget.RuleType], + Deps: make([]string, 0, len(optimizedTarget.Deps)), + Tags: make([]string, len(optimizedTarget.Tags)), + Root: optimizedTarget.Root, + External: optimizedTarget.External, + Attributes: make([]*buildpb.Attribute, 0, len(optimizedTarget.Attributes)), + } + + for depID := range optimizedTarget.Deps { + target.Deps = append(target.Deps, g.TargetIDToString[depID]) + } + + for i, tagID := range optimizedTarget.Tags { + target.Tags[i] = g.TagIDToString[tagID] + } + + for nameID, valID := range optimizedTarget.Attributes { + n := g.AttrNameIDToString[nameID] + v := g.AttrValueIDToString[valID] + target.Attributes = append(target.Attributes, &buildpb.Attribute{ + Name: &n, + StringValue: &v, + }) + } + + return target +} + +func (g *OptimizedGraph) getNewReverseDeps(targetID int, reverseDeps []int, visited IntSet) []int { + if visited.Contains(targetID) { + return reverseDeps + } + + visited.Insert(targetID) + reverseDeps = append(reverseDeps, targetID) + for reverseDep := range g.OptimizedTargets[targetID].ReverseDeps { + reverseDeps = g.getNewReverseDeps(reverseDep, reverseDeps, visited) + } + return reverseDeps +} + +func (g *OptimizedGraph) getOrGenerateTargetID(targetName string) int { + if id, ok := g.TargetNameToID[targetName]; ok { + return id + } + + id := g.NextTargetID + g.NextTargetID++ + g.TargetNameToID[targetName] = id + g.TargetIDToString[id] = targetName + return id +} + +func getOrGenerate(key string, m map[string]int) int { + if _, ok := m[key]; !ok { + m[key] = len(m) + } + return m[key] +} + +func getOrGenerateRecordReverse(key string, m map[string]int, reverseM map[int]string) int { + id := getOrGenerate(key, m) + reverseM[id] = key + return m[key] +} diff --git a/core/itg/graph/graph_test.go b/core/itg/graph/graph_test.go new file mode 100644 index 0000000..774654f --- /dev/null +++ b/core/itg/graph/graph_test.go @@ -0,0 +1,351 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/uber/tango/core/targethasher" +) + +// --- IntSet --- + +func TestIntSet(t *testing.T) { + t.Parallel() + + t.Run("insert and contains", func(t *testing.T) { + t.Parallel() + s := NewIntSet() + s.Insert(1) + s.Insert(2) + assert.True(t, s.Contains(1)) + assert.True(t, s.Contains(2)) + assert.False(t, s.Contains(3)) + }) + + t.Run("delete removes element", func(t *testing.T) { + t.Parallel() + s := NewIntSet() + s.Insert(5) + s.Delete(5) + assert.False(t, s.Contains(5)) + }) + + t.Run("delete on absent element is safe", func(t *testing.T) { + t.Parallel() + s := NewIntSet() + s.Delete(99) // should not panic + assert.False(t, s.Contains(99)) + }) + + t.Run("UnsortedList has all elements", func(t *testing.T) { + t.Parallel() + s := NewIntSet() + s.Insert(3) + s.Insert(1) + s.Insert(2) + list := s.UnsortedList() + sort.Ints(list) + assert.Equal(t, []int{1, 2, 3}, list) + }) + + t.Run("Copy is independent of original", func(t *testing.T) { + t.Parallel() + s := NewIntSet() + s.Insert(10) + c := s.Copy() + + // Mutate original and copy independently + s.Insert(20) + c.Insert(30) + + assert.True(t, s.Contains(20)) + assert.False(t, s.Contains(30)) + assert.False(t, c.Contains(20)) + assert.True(t, c.Contains(30)) + }) +} + +// --- StringSet --- + +func TestStringSet(t *testing.T) { + t.Parallel() + + t.Run("NewStringSet initializes with values", func(t *testing.T) { + t.Parallel() + s := NewStringSet("a", "b", "c") + assert.True(t, s.Contains("a")) + assert.True(t, s.Contains("b")) + assert.True(t, s.Contains("c")) + assert.False(t, s.Contains("d")) + }) + + t.Run("NewStringSet empty", func(t *testing.T) { + t.Parallel() + s := NewStringSet() + assert.False(t, s.Contains("x")) + assert.Empty(t, s.UnsortedList()) + }) + + t.Run("Insert adds element", func(t *testing.T) { + t.Parallel() + s := NewStringSet() + s.Insert("hello") + assert.True(t, s.Contains("hello")) + }) + + t.Run("UnsortedList has all elements", func(t *testing.T) { + t.Parallel() + s := NewStringSet("x", "y", "z") + list := s.UnsortedList() + sort.Strings(list) + assert.Equal(t, []string{"x", "y", "z"}, list) + }) +} + +// --- OptimizeGraph --- + +func TestOptimizeGraph(t *testing.T) { + t.Parallel() + + t.Run("nil targets produces empty graph", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(nil) + assert.Empty(t, g.OptimizedTargets) + assert.Empty(t, g.ExternalRuleTargets) + }) + + t.Run("single target is registered with ID mappings", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Hash: []byte{1}}, + } + g := OptimizeGraph(targets) + + id, ok := g.TargetNameToID["//pkg:a"] + require.True(t, ok) + assert.Equal(t, "//pkg:a", g.TargetIDToString[id]) + assert.Equal(t, []byte{1}, g.OptimizedTargets[id].Hash) + }) + + t.Run("dependency wires reverse dep", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library"}, + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Deps: []string{"//pkg:a"}}, + } + g := OptimizeGraph(targets) + + aID := g.TargetNameToID["//pkg:a"] + bID := g.TargetNameToID["//pkg:b"] + + assert.True(t, g.OptimizedTargets[bID].Deps.Contains(aID), "b should dep on a") + assert.True(t, g.OptimizedTargets[aID].ReverseDeps.Contains(bID), "a should have b as reverse dep") + }) + + t.Run("external rule target tracked in ExternalRuleTargets", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//external:repo": {Name: "//external:repo", RuleType: targethasher.ExternalRuleType}, + } + g := OptimizeGraph(targets) + + id := g.TargetNameToID["//external:repo"] + assert.Contains(t, g.ExternalRuleTargets, id) + assert.Contains(t, g.OptimizedTargets, id) + }) + + t.Run("rule type and tag ID mappings are populated", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Tags: []string{"manual"}}, + } + g := OptimizeGraph(targets) + + _, ok := g.RuleTypeToID["go_library"] + assert.True(t, ok, "rule type should be in RuleTypeToID") + _, ok = g.TagToID["manual"] + assert.True(t, ok, "tag should be in TagToID") + }) +} + +// --- OptimizedTarget.Copy --- + +func TestOptimizedTargetCopy(t *testing.T) { + t.Parallel() + + original := &OptimizedTarget{ + ID: 1, + Hash: []byte{0xAA, 0xBB}, + HashWithoutDeps: []byte{0xCC}, + RuleType: 2, + Deps: IntSet{3: {}, 4: {}}, + ReverseDeps: IntSet{5: {}}, + Tags: []int{6, 7}, + Root: true, + External: false, + Attributes: map[int]int{8: 9}, + } + + c := original.Copy() + + // Values are the same + assert.Equal(t, original.ID, c.ID) + assert.Equal(t, original.Hash, c.Hash) + assert.Equal(t, original.HashWithoutDeps, c.HashWithoutDeps) + assert.Equal(t, original.Root, c.Root) + + // Mutating copy's sets/slices does not affect original + c.Deps.Insert(99) + c.ReverseDeps.Insert(99) + c.Tags = append(c.Tags, 99) + c.Attributes[99] = 99 + c.Hash[0] = 0xFF + + assert.False(t, original.Deps.Contains(99)) + assert.False(t, original.ReverseDeps.Contains(99)) + assert.Len(t, original.Tags, 2) + assert.NotContains(t, original.Attributes, 99) + assert.Equal(t, byte(0xAA), original.Hash[0]) +} + +// --- OptimizedGraph.Copy --- + +func TestOptimizedGraphCopy(t *testing.T) { + t.Parallel() + + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Hash: []byte{1}}, + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Deps: []string{"//pkg:a"}}, + } + g := OptimizeGraph(targets) + c := g.Copy() + + // Adding a target to the copy's map does not affect the original + aID := g.TargetNameToID["//pkg:a"] + delete(c.OptimizedTargets, aID) + assert.Contains(t, g.OptimizedTargets, aID, "deleting from copy should not affect original") + + // Mutating a target in the copy does not affect the original + bID := g.TargetNameToID["//pkg:b"] + c.OptimizedTargets[bID].Hash = []byte{0xFF} + assert.NotEqual(t, []byte{0xFF}, g.OptimizedTargets[bID].Hash) +} + +// --- GetReverseDepsAsTargets --- + +func TestGetReverseDepsAsTargets(t *testing.T) { + t.Parallel() + + t.Run("starting target included in results", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library"}, + }) + result := g.GetReverseDepsAsTargets([]string{"//pkg:a"}) + require.Len(t, result, 1) + assert.Equal(t, "//pkg:a", result[0].Name) + }) + + t.Run("transitive reverse deps all returned", func(t *testing.T) { + t.Parallel() + // a ← b ← c; starting from a should yield a, b, c + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library"}, + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Deps: []string{"//pkg:a"}}, + "//pkg:c": {Name: "//pkg:c", RuleType: "go_library", Deps: []string{"//pkg:b"}}, + } + g := OptimizeGraph(targets) + result := g.GetReverseDepsAsTargets([]string{"//pkg:a"}) + + names := make(map[string]struct{}, len(result)) + for _, t := range result { + names[t.Name] = struct{}{} + } + assert.Contains(t, names, "//pkg:a") + assert.Contains(t, names, "//pkg:b") + assert.Contains(t, names, "//pkg:c") + }) + + t.Run("unrelated targets not returned", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library"}, + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Deps: []string{"//pkg:a"}}, + "//pkg:unrelated": {Name: "//pkg:unrelated", RuleType: "go_library"}, + } + g := OptimizeGraph(targets) + result := g.GetReverseDepsAsTargets([]string{"//pkg:a"}) + + names := make(map[string]struct{}, len(result)) + for _, t := range result { + names[t.Name] = struct{}{} + } + assert.NotContains(t, names, "//pkg:unrelated") + }) + + t.Run("unknown target is skipped", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(nil) + result := g.GetReverseDepsAsTargets([]string{"//pkg:missing"}) + assert.Empty(t, result) + }) +} + +// --- OptimizedTargetToTarget --- + +func TestOptimizedTargetToTarget(t *testing.T) { + t.Parallel() + + t.Run("unknown target ID returns zero value", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(nil) + result := g.OptimizedTargetToTarget(999) + assert.Equal(t, targethasher.Target{}, result) + }) + + t.Run("known target fields round-trip", func(t *testing.T) { + t.Parallel() + hash := []byte{0xDE, 0xAD, 0xBE, 0xEF} + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Hash: hash, Root: true, Tags: []string{"manual"}}, + } + g := OptimizeGraph(targets) + aID := g.TargetNameToID["//pkg:a"] + + result := g.OptimizedTargetToTarget(aID) + assert.Equal(t, "//pkg:a", result.Name) + assert.Equal(t, "go_library", result.RuleType) + assert.Equal(t, hash, result.Hash) + assert.True(t, result.Root) + assert.Contains(t, result.Tags, "manual") + }) + + t.Run("dep names are resolved in result", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//pkg:dep": {Name: "//pkg:dep", RuleType: "go_library"}, + "//pkg:lib": {Name: "//pkg:lib", RuleType: "go_library", Deps: []string{"//pkg:dep"}}, + } + g := OptimizeGraph(targets) + libID := g.TargetNameToID["//pkg:lib"] + + result := g.OptimizedTargetToTarget(libID) + assert.Contains(t, result.Deps, "//pkg:dep") + }) +} diff --git a/core/itg/graph/invalidate.go b/core/itg/graph/invalidate.go new file mode 100644 index 0000000..f4f3224 --- /dev/null +++ b/core/itg/graph/invalidate.go @@ -0,0 +1,238 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "context" + "fmt" + "strings" + + "github.com/uber/tango/core/targethasher" +) + +// getTargetByID looks up a target by ID in both OptimizedTargets and ExternalRuleTargets. +func (g *OptimizedGraph) getTargetByID(id int) *OptimizedTarget { + if target := g.OptimizedTargets[id]; target != nil { + return target + } + return g.ExternalRuleTargets[id] +} + +// updateInvalidateTargets updates and invalidates targets. +func (g *OptimizedGraph) updateInvalidateTargets( + ctx context.Context, + deletedSrcFileTargets StringSet, + changedPkgs StringSet, + deletedPkgs StringSet, + targets map[string]*targethasher.Target, + allInvalidated IntSet, +) error { + for deletedSrcFileTarget := range deletedSrcFileTargets { + if toDeleteID, ok := g.TargetNameToID[deletedSrcFileTarget]; ok { + if err := g.removeTarget(g.OptimizedTargets[toDeleteID], allInvalidated); err != nil { + return err + } + } + } + + if err := g.checkDeletedTargets(changedPkgs, deletedPkgs, targets, allInvalidated); err != nil { + return err + } + + topoSortedTargets, err := topoSort(ctx, targets) + if err != nil { + return err + } + for _, t := range topoSortedTargets { + if err := g.upsertTarget(t, allInvalidated); err != nil { + return err + } + } + return nil +} + +// removeTarget removes the target from the graph. +func (g *OptimizedGraph) removeTarget(target *OptimizedTarget, invalidated IntSet) error { + id := target.ID + for reverseDepID := range target.ReverseDeps { + reverseDepTarget, ok := g.OptimizedTargets[reverseDepID] + if !ok { + return fmt.Errorf("target %d not found", reverseDepID) + } + reverseDepTarget.Deps.Delete(id) + } + for depID := range target.Deps { + depTarget, ok := g.OptimizedTargets[depID] + if !ok { + return fmt.Errorf("target %d not found", depID) + } + depTarget.ReverseDeps.Delete(id) + if len(depTarget.ReverseDeps) == 0 && targethasher.CanBeRoot(g.RuleTypeIDToString[depTarget.RuleType]) { + depTarget.Root = true + } + } + delete(g.OptimizedTargets, id) + delete(g.ExternalRuleTargets, id) + delete(g.TargetNameToID, g.TargetIDToString[id]) + delete(g.TargetIDToString, id) + invalidated.Delete(id) + return g.invalidateHashRecursively(target.ReverseDeps, invalidated) +} + +// invalidateHashRecursively invalidates hashes of the given targets and all their reverse deps. +func (g *OptimizedGraph) invalidateHashRecursively(ids IntSet, invalidated IntSet) error { + candidates := make([]int, 0, len(ids)) + for id := range ids { + candidates = append(candidates, id) + } + curIdx := 0 + for curIdx < len(candidates) { + targetID := candidates[curIdx] + curIdx++ + target, ok := g.OptimizedTargets[targetID] + if !ok || target.Hash == nil { + continue + } + + target.Hash = nil + invalidated.Insert(targetID) + for id := range target.ReverseDeps { + candidates = append(candidates, id) + } + } + return nil +} + +// checkDeletedTargets removes targets that have been deleted from the graph. +func (g *OptimizedGraph) checkDeletedTargets(changedPkgs StringSet, deletedPkgs StringSet, targets map[string]*targethasher.Target, invalidated IntSet) error { + targetIDsInQuery := NewIntSet() + for name := range targets { + if id, ok := g.TargetNameToID[name]; ok { + targetIDsInQuery.Insert(id) + } + } + for _, target := range g.OptimizedTargets { + pkgName := getPackage(g.TargetIDToString[target.ID]) + if deletedPkgs.Contains(pkgName) || (changedPkgs.Contains(pkgName) && !targetIDsInQuery.Contains(target.ID)) { + if err := g.removeTarget(target, invalidated); err != nil { + return err + } + } + } + return nil +} + +// upsertTarget inserts or updates a target in the graph. +func (g *OptimizedGraph) upsertTarget(target *targethasher.Target, invalidated IntSet) error { + id := g.getOrGenerateTargetID(target.Name) + ruleTypeID := getOrGenerateRecordReverse(target.RuleType, g.RuleTypeToID, g.RuleTypeIDToString) + depIDs := NewIntSet() + for _, dep := range target.Deps { + depID, ok := g.TargetNameToID[dep] + if !ok { + return fmt.Errorf("dependency %s of target %s not found", dep, target.Name) + } + depIDs.Insert(depID) + + depTarget := g.getTargetByID(depID) + if depTarget == nil { + return fmt.Errorf("dependency target %s (id=%d) not found in graph", dep, depID) + } + if depTarget.ReverseDeps == nil { + depTarget.ReverseDeps = NewIntSet() + } + depTarget.ReverseDeps.Insert(id) + depTarget.Root = false + } + + tagIDs := make([]int, len(target.Tags)) + for i, tag := range target.Tags { + tagIDs[i] = getOrGenerateRecordReverse(tag, g.TagToID, g.TagIDToString) + } + + attributes := make(map[int]int, len(target.Attributes)) + for _, attr := range target.Attributes { + attrNameID := getOrGenerateRecordReverse(attr.GetName(), g.AttrNameToID, g.AttrNameIDToString) + attrValueID := getOrGenerateRecordReverse(attr.GetStringValue(), g.AttrValueToID, g.AttrValueIDToString) + attributes[attrNameID] = attrValueID + } + + if oldTarget, ok := g.OptimizedTargets[id]; ok { + for oldDep := range oldTarget.Deps { + if !depIDs.Contains(oldDep) { + oldDepTarget := g.getTargetByID(oldDep) + if oldDepTarget == nil { + continue + } + oldDepTarget.ReverseDeps.Delete(id) + if len(oldDepTarget.ReverseDeps) == 0 && targethasher.CanBeRoot(g.RuleTypeIDToString[oldDepTarget.RuleType]) { + oldDepTarget.Root = true + } + } + } + oldTarget.Hash = target.Hash + oldTarget.HashWithoutDeps = target.HashWithoutDeps + oldTarget.RuleType = ruleTypeID + oldTarget.Deps = depIDs + oldTarget.Tags = tagIDs + oldTarget.External = target.External + oldTarget.Attributes = attributes + } else { + g.OptimizedTargets[id] = &OptimizedTarget{ + ID: id, + Hash: target.Hash, + HashWithoutDeps: target.HashWithoutDeps, + RuleType: ruleTypeID, + Deps: depIDs, + ReverseDeps: NewIntSet(), + Tags: tagIDs, + Root: targethasher.CanBeRoot(target.RuleType), + External: target.External, + Attributes: attributes, + } + } + + if err := g.invalidateHashRecursively(g.OptimizedTargets[id].ReverseDeps, invalidated); err != nil { + return err + } + if target.Hash == nil { + invalidated.Insert(id) + } + return nil +} + +func getPackage(targetName string) string { + before, _, _ := strings.Cut(targetName, ":") + return strings.TrimPrefix(before, "//") +} + +func topoSort(ctx context.Context, targets map[string]*targethasher.Target) ([]*targethasher.Target, error) { + var err error + roots := targethasher.GetTopologicalRootsAndIdentifyBuildableRoots(targets) + + targetNames := make([]string, 0, len(targets)) + visited := make(map[string]struct{}, len(targets)) + for _, name := range roots { + targetNames, err = targethasher.ToposortRecursively(ctx, targets, name, targetNames, visited) + if err != nil { + return nil, err + } + } + topoSortedTargets := make([]*targethasher.Target, 0, len(targetNames)) + for _, name := range targetNames { + topoSortedTargets = append(topoSortedTargets, targets[name]) + } + return topoSortedTargets, nil +} diff --git a/core/itg/graph/invalidate_test.go b/core/itg/graph/invalidate_test.go new file mode 100644 index 0000000..fffdd02 --- /dev/null +++ b/core/itg/graph/invalidate_test.go @@ -0,0 +1,249 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/uber/tango/core/targethasher" +) + +// --- getPackage --- + +func TestGetPackage(t *testing.T) { + t.Parallel() + + tests := []struct { + input string + want string + }{ + {"//pkg/sub:target", "pkg/sub"}, + {"//:target", ""}, + {"//external:repo", "external"}, + {"//a/b/c/d:foo", "a/b/c/d"}, + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, getPackage(tt.input)) + }) + } +} + +// --- invalidateHashRecursively --- + +func TestInvalidateHashRecursively(t *testing.T) { + t.Parallel() + + t.Run("sets hashes nil and adds to invalidated set", func(t *testing.T) { + t.Parallel() + // a ← b ← c; invalidate a's reverse deps → b and c become nil + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Hash: []byte{1}}, + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Hash: []byte{2}, Deps: []string{"//pkg:a"}}, + "//pkg:c": {Name: "//pkg:c", RuleType: "go_library", Hash: []byte{3}, Deps: []string{"//pkg:b"}}, + } + g := OptimizeGraph(targets) + aID := g.TargetNameToID["//pkg:a"] + bID := g.TargetNameToID["//pkg:b"] + cID := g.TargetNameToID["//pkg:c"] + + invalidated := NewIntSet() + require.NoError(t, g.invalidateHashRecursively(g.OptimizedTargets[aID].ReverseDeps, invalidated)) + + // b and c should be invalidated (they are reverse deps of a) + assert.Nil(t, g.OptimizedTargets[bID].Hash) + assert.Nil(t, g.OptimizedTargets[cID].Hash) + assert.True(t, invalidated.Contains(bID)) + assert.True(t, invalidated.Contains(cID)) + + // a itself was not passed; its hash should be unchanged + assert.NotNil(t, g.OptimizedTargets[aID].Hash) + assert.False(t, invalidated.Contains(aID)) + }) + + t.Run("targets with nil hash are skipped", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library"}, // Hash=nil + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Deps: []string{"//pkg:a"}}, // Hash=nil + } + g := OptimizeGraph(targets) + aID := g.TargetNameToID["//pkg:a"] + + invalidated := NewIntSet() + require.NoError(t, g.invalidateHashRecursively(g.OptimizedTargets[aID].ReverseDeps, invalidated)) + + // b has a nil hash and is thus skipped; invalidated stays empty + assert.Empty(t, invalidated.UnsortedList()) + }) +} + +// --- removeTarget --- + +func TestRemoveTarget(t *testing.T) { + t.Parallel() + + t.Run("target removed from all maps", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Hash: []byte{1}}, + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Hash: []byte{2}, Deps: []string{"//pkg:a"}}, + } + g := OptimizeGraph(targets) + bID := g.TargetNameToID["//pkg:b"] + + require.NoError(t, g.removeTarget(g.OptimizedTargets[bID], NewIntSet())) + + assert.NotContains(t, g.OptimizedTargets, bID) + assert.NotContains(t, g.TargetNameToID, "//pkg:b") + assert.NotContains(t, g.TargetIDToString, bID) + }) + + t.Run("dep's reverse dep reference is cleaned up", func(t *testing.T) { + t.Parallel() + targets := map[string]*targethasher.Target{ + "//pkg:dep": {Name: "//pkg:dep", RuleType: "go_library", Hash: []byte{1}}, + "//pkg:lib": {Name: "//pkg:lib", RuleType: "go_library", Hash: []byte{2}, Deps: []string{"//pkg:dep"}}, + } + g := OptimizeGraph(targets) + depID := g.TargetNameToID["//pkg:dep"] + libID := g.TargetNameToID["//pkg:lib"] + + // Confirm lib is initially in dep's reverse deps + assert.True(t, g.OptimizedTargets[depID].ReverseDeps.Contains(libID)) + + require.NoError(t, g.removeTarget(g.OptimizedTargets[libID], NewIntSet())) + + assert.False(t, g.OptimizedTargets[depID].ReverseDeps.Contains(libID)) + }) + + t.Run("dep becomes root when its last reverse dep is removed", func(t *testing.T) { + t.Parallel() + // go_library can be root; a is blocked by b. Removing b should make a root. + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Hash: []byte{1}, Root: false}, + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Hash: []byte{2}, Deps: []string{"//pkg:a"}}, + } + g := OptimizeGraph(targets) + bID := g.TargetNameToID["//pkg:b"] + aID := g.TargetNameToID["//pkg:a"] + + require.NoError(t, g.removeTarget(g.OptimizedTargets[bID], NewIntSet())) + + assert.True(t, g.OptimizedTargets[aID].Root) + }) + + t.Run("reverse dep hashes are invalidated", func(t *testing.T) { + t.Parallel() + // c depends on b; removing b should invalidate c's hash. + targets := map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Hash: []byte{1}}, + "//pkg:b": {Name: "//pkg:b", RuleType: "go_library", Hash: []byte{2}, Deps: []string{"//pkg:a"}}, + "//pkg:c": {Name: "//pkg:c", RuleType: "go_library", Hash: []byte{3}, Deps: []string{"//pkg:b"}}, + } + g := OptimizeGraph(targets) + bID := g.TargetNameToID["//pkg:b"] + cID := g.TargetNameToID["//pkg:c"] + + invalidated := NewIntSet() + require.NoError(t, g.removeTarget(g.OptimizedTargets[bID], invalidated)) + + assert.Nil(t, g.OptimizedTargets[cID].Hash, "c's hash should be invalidated") + assert.True(t, invalidated.Contains(cID)) + }) +} + +// --- upsertTarget --- + +func TestUpsertTarget(t *testing.T) { + t.Parallel() + + t.Run("inserts new target and wires reverse dep", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(map[string]*targethasher.Target{ + "//pkg:dep": {Name: "//pkg:dep", RuleType: "go_library", Hash: []byte{1}}, + }) + depID := g.TargetNameToID["//pkg:dep"] + + newTarget := &targethasher.Target{ + Name: "//pkg:lib", + RuleType: "go_library", + HashWithoutDeps: []byte{2}, + Deps: []string{"//pkg:dep"}, + } + require.NoError(t, g.upsertTarget(newTarget, NewIntSet())) + + libID, ok := g.TargetNameToID["//pkg:lib"] + require.True(t, ok) + assert.True(t, g.OptimizedTargets[libID].Deps.Contains(depID)) + assert.True(t, g.OptimizedTargets[depID].ReverseDeps.Contains(libID)) + }) + + t.Run("missing dep returns error", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(nil) + newTarget := &targethasher.Target{ + Name: "//pkg:lib", + Deps: []string{"//pkg:missing"}, + } + err := g.upsertTarget(newTarget, NewIntSet()) + assert.Error(t, err) + }) + + t.Run("updating target swaps dep references", func(t *testing.T) { + t.Parallel() + // lib initially depends on dep1; update it to depend on dep2 instead. + targets := map[string]*targethasher.Target{ + "//pkg:dep1": {Name: "//pkg:dep1", RuleType: "go_library", Hash: []byte{1}}, + "//pkg:dep2": {Name: "//pkg:dep2", RuleType: "go_library", Hash: []byte{2}}, + "//pkg:lib": {Name: "//pkg:lib", RuleType: "go_library", Hash: []byte{3}, Deps: []string{"//pkg:dep1"}}, + } + g := OptimizeGraph(targets) + dep1ID := g.TargetNameToID["//pkg:dep1"] + dep2ID := g.TargetNameToID["//pkg:dep2"] + libID := g.TargetNameToID["//pkg:lib"] + + updated := &targethasher.Target{ + Name: "//pkg:lib", + Deps: []string{"//pkg:dep2"}, + } + require.NoError(t, g.upsertTarget(updated, NewIntSet())) + + assert.True(t, g.OptimizedTargets[libID].Deps.Contains(dep2ID), "lib should now dep on dep2") + assert.False(t, g.OptimizedTargets[libID].Deps.Contains(dep1ID), "lib should no longer dep on dep1") + assert.False(t, g.OptimizedTargets[dep1ID].ReverseDeps.Contains(libID), "dep1 should lose lib as reverse dep") + assert.True(t, g.OptimizedTargets[dep2ID].ReverseDeps.Contains(libID), "dep2 should gain lib as reverse dep") + }) + + t.Run("target with nil hash added to invalidated set", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(map[string]*targethasher.Target{ + "//pkg:dep": {Name: "//pkg:dep", RuleType: "go_library", Hash: []byte{1}}, + }) + newTarget := &targethasher.Target{ + Name: "//pkg:lib", + Hash: nil, // no hash yet + Deps: []string{"//pkg:dep"}, + } + invalidated := NewIntSet() + require.NoError(t, g.upsertTarget(newTarget, invalidated)) + + libID := g.TargetNameToID["//pkg:lib"] + assert.True(t, invalidated.Contains(libID), "target with nil hash should be in invalidated set") + }) +} diff --git a/core/itg/graph/update.go b/core/itg/graph/update.go new file mode 100644 index 0000000..4c1e353 --- /dev/null +++ b/core/itg/graph/update.go @@ -0,0 +1,243 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "context" + "crypto/sha1" + "fmt" + "slices" + "strings" + + buildpb "github.com/bazelbuild/buildtools/build_proto" + set "github.com/deckarep/golang-set/v2" + "github.com/uber/tango/core/targethasher" +) + +// TODO: properly read it from changed_target_config.yaml +var sequentialHashTargets = []string{"@io_bazel_rules_go//:go_context_data"} + +// UpdateGraphInput contains the parameters to update the graph. +type UpdateGraphInput struct { + DeletedSrcFiles StringSet + ChangedPkgs StringSet + DeletedPkgs StringSet + QueryResult *buildpb.QueryResult + WorkspaceRoot string + FullHashRepos StringSet +} + +// UpdateGraph updates the dependency relationships and hashes of targets in the graph. +func (g *OptimizedGraph) UpdateGraph( + ctx context.Context, + sourceHasher targethasher.SourceHasher, + input UpdateGraphInput, +) error { + rawQueryResults := input.QueryResult + + // translate raw query results to targethasher.Target + warns := make(map[string]error) + targets, err := targethasher.GetInternalTargetsWithoutHashAndRootInfo(ctx, rawQueryResults) + if err != nil { + return err + } + + fullHashReposSet := set.NewSet(input.FullHashRepos.UnsortedList()...) + + // HashExternalTargets adds external rule targets and hashes them + if err := targethasher.HashExternalTargets(ctx, rawQueryResults, targets, sourceHasher, input.WorkspaceRoot, fullHashReposSet, warns); err != nil { + return err + } + + allInvalidated := NewIntSet() + // update external rule targets in the graph + for _, target := range targets { + if target.RuleType == targethasher.ExternalRuleType { + if err := g.upsertExternalRuleTarget(target, allInvalidated); err != nil { + return err + } + } + } + // retrieve all external targets and convert to targethasher.Target, needed by the hasher to hash source files + for id, target := range g.ExternalRuleTargets { + name := g.TargetIDToString[id] + if _, exists := targets[name]; !exists { + targets[name] = &targethasher.Target{ + Name: name, + RuleType: targethasher.ExternalRuleType, + Hash: target.Hash, + HashWithoutDeps: target.HashWithoutDeps, + External: target.External, + } + } + } + // compute hashes for source file, package group, and rule common targets + if err := computeAvailableHashes(sourceHasher, targets); err != nil { + return err + } + + // update and invalidate targets + if err := g.updateInvalidateTargets(ctx, input.DeletedSrcFiles, input.ChangedPkgs, input.DeletedPkgs, targets, allInvalidated); err != nil { + return err + } + + // prioritize computing hashes of targets that could create cycles + for _, target := range sequentialHashTargets { + id, ok := g.TargetNameToID[target] + if !ok || !allInvalidated.Contains(id) { + continue + } + if _, err := g.computeHashes(ctx, id); err != nil { + return err + } + } + + // update hashes for invalidated targets + for id := range allInvalidated { + if _, err := g.computeHashes(ctx, id); err != nil { + return err + } + } + return nil +} + +// upsertExternalRuleTarget inserts or updates an external rule target and invalidates reverse deps. +func (g *OptimizedGraph) upsertExternalRuleTarget(target *targethasher.Target, invalidated IntSet) error { + id := g.getOrGenerateTargetID(target.Name) + ruleTypeID := getOrGenerateRecordReverse(target.RuleType, g.RuleTypeToID, g.RuleTypeIDToString) + depIDs := NewIntSet() + for _, dep := range target.Deps { + depIDs.Insert(g.getOrGenerateTargetID(dep)) + } + + if oldTarget, ok := g.OptimizedTargets[id]; ok { + oldTarget.Hash = target.Hash + oldTarget.Deps = depIDs + oldTarget.HashWithoutDeps = target.HashWithoutDeps + oldTarget.External = target.External + return g.invalidateHashRecursively(oldTarget.ReverseDeps, invalidated) + } + + optimizedTarget := &OptimizedTarget{ + ID: id, + Hash: target.Hash, + HashWithoutDeps: target.HashWithoutDeps, + RuleType: ruleTypeID, + Deps: depIDs, + ReverseDeps: NewIntSet(), + External: target.External, + } + g.OptimizedTargets[id] = optimizedTarget + g.ExternalRuleTargets[id] = optimizedTarget + return nil +} + +// computeAvailableHashes computes hashes that are available without dep traversal. +func computeAvailableHashes( + hasher targethasher.SourceHasher, + targets map[string]*targethasher.Target, +) error { + for name, target := range targets { + var hash []byte + var hashWithoutDeps []byte + switch target.RuleType { + case targethasher.GeneratedFileType: + break + case targethasher.PackageGroup: + h := sha1.New() + h.Write([]byte(name)) + hash = h.Sum(nil) + case targethasher.SourceFileType: + h, err := hasher.HashSourceFile(target.SourceFile) + if err != nil { + return err + } + hash = h + case targethasher.ExternalRuleType: + continue + default: + noDepsHasher := sha1.New() + targethasher.HashRuleCommon(target.Rule, noDepsHasher) + hashWithoutDeps = noDepsHasher.Sum(nil) + } + if hash != nil { + target.Hash = hash + } + if hashWithoutDeps != nil { + target.HashWithoutDeps = hashWithoutDeps + } + } + return nil +} + +// computeHashes computes hashes recursively for the given target ID. +func (g *OptimizedGraph) computeHashes(ctx context.Context, id int) ([]byte, error) { + if ctx.Err() != nil { + return nil, ctx.Err() + } + + if externalRuleTarget, ok := g.ExternalRuleTargets[id]; ok { + return externalRuleTarget.Hash, nil + } + + target, ok := g.OptimizedTargets[id] + if !ok { + return nil, fmt.Errorf("target %d not found in graph", id) + } + if target.Hash != nil { + return target.Hash, nil + } + + // mark as visiting to handle cycles + target.Hash = []byte{} + var hash []byte + switch g.RuleTypeIDToString[target.RuleType] { + case targethasher.SourceFileType, targethasher.PackageGroup: + return nil, fmt.Errorf("source file or package group target %s should already have hash", g.TargetIDToString[id]) + case targethasher.GeneratedFileType: + var singleDep int + for dep := range target.Deps { + singleDep = dep + break + } + dephash, err := g.computeHashes(ctx, singleDep) + if err != nil { + return nil, err + } + hash = dephash + default: + if target.HashWithoutDeps == nil { + return nil, fmt.Errorf("rule target %s should already have rule hash", g.TargetIDToString[id]) + } + h := sha1.New() + h.Write(target.HashWithoutDeps) + depIDs := target.Deps.UnsortedList() + slices.SortStableFunc(depIDs, func(i, j int) int { + return strings.Compare(g.TargetIDToString[i], g.TargetIDToString[j]) + }) + for _, dep := range depIDs { + dephash, err := g.computeHashes(ctx, dep) + if err != nil { + return nil, err + } + h.Write(dephash) + } + hash = h.Sum(nil) + } + if hash != nil { + target.Hash = hash + } + return hash, nil +} diff --git a/core/itg/graph/update_test.go b/core/itg/graph/update_test.go new file mode 100644 index 0000000..720c951 --- /dev/null +++ b/core/itg/graph/update_test.go @@ -0,0 +1,230 @@ +// Copyright (c) 2025 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graph + +import ( + "context" + "crypto/sha1" + "testing" + + buildpb "github.com/bazelbuild/buildtools/build_proto" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/uber/tango/core/targethasher" +) + +// fakeSourceHasher is a test double for targethasher.SourceHasher. +type fakeSourceHasher struct { + result []byte + err error +} + +func (f *fakeSourceHasher) HashSourceFile(_ *buildpb.SourceFile) ([]byte, error) { + return f.result, f.err +} + +// --- computeAvailableHashes --- + +func TestComputeAvailableHashes(t *testing.T) { + t.Parallel() + + t.Run("source file hash comes from hasher", func(t *testing.T) { + t.Parallel() + expected := []byte{0xAB, 0xCD} + hasher := &fakeSourceHasher{result: expected} + name := "//pkg:file.go" + targets := map[string]*targethasher.Target{ + name: { + Name: name, + RuleType: targethasher.SourceFileType, + SourceFile: &buildpb.SourceFile{}, + }, + } + + require.NoError(t, computeAvailableHashes(hasher, targets)) + assert.Equal(t, expected, targets[name].Hash) + }) + + t.Run("package group hashed by name", func(t *testing.T) { + t.Parallel() + name := "//pkg:__pkg__" + targets := map[string]*targethasher.Target{ + name: {Name: name, RuleType: targethasher.PackageGroup}, + } + + require.NoError(t, computeAvailableHashes(&fakeSourceHasher{}, targets)) + + h := sha1.New() + h.Write([]byte(name)) + assert.Equal(t, h.Sum(nil), targets[name].Hash) + }) + + t.Run("external rule type is skipped", func(t *testing.T) { + t.Parallel() + name := "//external:repo" + targets := map[string]*targethasher.Target{ + name: {Name: name, RuleType: targethasher.ExternalRuleType}, + } + + require.NoError(t, computeAvailableHashes(&fakeSourceHasher{}, targets)) + assert.Nil(t, targets[name].Hash, "external rule targets should not get a hash here") + }) + + t.Run("generated file gets no hash at this stage", func(t *testing.T) { + t.Parallel() + name := "//pkg:gen.go" + targets := map[string]*targethasher.Target{ + name: {Name: name, RuleType: targethasher.GeneratedFileType}, + } + + require.NoError(t, computeAvailableHashes(&fakeSourceHasher{}, targets)) + assert.Nil(t, targets[name].Hash, "generated file hash is resolved later") + }) + + t.Run("rule target gets HashWithoutDeps set", func(t *testing.T) { + t.Parallel() + name := "//pkg:lib" + ruleName := name + ruleClass := "go_library" + targets := map[string]*targethasher.Target{ + name: { + Name: name, + RuleType: "go_library", + Rule: &buildpb.Rule{Name: &ruleName, RuleClass: &ruleClass}, + }, + } + + require.NoError(t, computeAvailableHashes(&fakeSourceHasher{}, targets)) + assert.NotNil(t, targets[name].HashWithoutDeps, "rule should have HashWithoutDeps after hashing") + assert.Nil(t, targets[name].Hash, "full hash is not computed here — deps are needed") + }) + + t.Run("source hasher error is propagated", func(t *testing.T) { + t.Parallel() + hasher := &fakeSourceHasher{err: assert.AnError} + targets := map[string]*targethasher.Target{ + "//pkg:f": {Name: "//pkg:f", RuleType: targethasher.SourceFileType, SourceFile: &buildpb.SourceFile{}}, + } + + err := computeAvailableHashes(hasher, targets) + assert.Error(t, err) + }) +} + +// --- computeHashes --- + +func TestComputeHashes(t *testing.T) { + t.Parallel() + + t.Run("context cancelled returns error", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library"}, + }) + aID := g.TargetNameToID["//pkg:a"] + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err := g.computeHashes(ctx, aID) + assert.ErrorIs(t, err, context.Canceled) + }) + + t.Run("target with existing hash returned immediately", func(t *testing.T) { + t.Parallel() + hash := []byte{0xDE, 0xAD} + g := OptimizeGraph(map[string]*targethasher.Target{ + "//pkg:a": {Name: "//pkg:a", RuleType: "go_library", Hash: hash}, + }) + aID := g.TargetNameToID["//pkg:a"] + + got, err := g.computeHashes(context.Background(), aID) + require.NoError(t, err) + assert.Equal(t, hash, got) + }) + + t.Run("missing target ID returns error", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(nil) + + _, err := g.computeHashes(context.Background(), 9999) + assert.Error(t, err) + }) + + t.Run("external rule target returns existing hash", func(t *testing.T) { + t.Parallel() + hash := []byte{0xCA, 0xFE} + g := OptimizeGraph(map[string]*targethasher.Target{ + "//external:repo": {Name: "//external:repo", RuleType: targethasher.ExternalRuleType, Hash: hash}, + }) + id := g.TargetNameToID["//external:repo"] + + got, err := g.computeHashes(context.Background(), id) + require.NoError(t, err) + assert.Equal(t, hash, got) + }) + + t.Run("source file with nil hash returns error", func(t *testing.T) { + t.Parallel() + g := OptimizeGraph(map[string]*targethasher.Target{ + "//pkg:f.go": {Name: "//pkg:f.go", RuleType: targethasher.SourceFileType}, // Hash intentionally nil + }) + id := g.TargetNameToID["//pkg:f.go"] + + _, err := g.computeHashes(context.Background(), id) + assert.Error(t, err, "source file should already have its hash set") + }) + + t.Run("generated file inherits dep hash", func(t *testing.T) { + t.Parallel() + depHash := []byte{0x01, 0x02, 0x03} + g := OptimizeGraph(map[string]*targethasher.Target{ + "//pkg:rule": {Name: "//pkg:rule", RuleType: "go_library", Hash: []byte{0xFF}}, + "//pkg:gen": {Name: "//pkg:gen", RuleType: targethasher.GeneratedFileType, Deps: []string{"//pkg:rule"}}, + }) + ruleID := g.TargetNameToID["//pkg:rule"] + genID := g.TargetNameToID["//pkg:gen"] + + // Override the rule hash directly so we know what to expect + g.OptimizedTargets[ruleID].Hash = depHash + g.OptimizedTargets[genID].Hash = nil // force recompute + + got, err := g.computeHashes(context.Background(), genID) + require.NoError(t, err) + assert.Equal(t, depHash, got, "generated file should inherit its dep's hash") + }) + + t.Run("rule target hash combines HashWithoutDeps and dep hashes", func(t *testing.T) { + t.Parallel() + hwod := []byte{0x03, 0x04} + depHash := []byte{0x01, 0x02} + g := OptimizeGraph(map[string]*targethasher.Target{ + "//pkg:dep": {Name: "//pkg:dep", RuleType: "go_library", Hash: depHash}, + "//pkg:lib": {Name: "//pkg:lib", RuleType: "go_library", HashWithoutDeps: hwod, Deps: []string{"//pkg:dep"}}, + }) + libID := g.TargetNameToID["//pkg:lib"] + // lib has no final hash yet (only HashWithoutDeps) + g.OptimizedTargets[libID].Hash = nil + + got, err := g.computeHashes(context.Background(), libID) + require.NoError(t, err) + + // Expected: sha1(hwod || depHash) (single dep, already sorted) + h := sha1.New() + h.Write(hwod) + h.Write(depHash) + assert.Equal(t, h.Sum(nil), got) + }) +}