diff --git a/controller/gettargetgraph.go b/controller/gettargetgraph.go index 80fe748..82ecdae 100644 --- a/controller/gettargetgraph.go +++ b/controller/gettargetgraph.go @@ -116,7 +116,7 @@ func (c *controller) getGraph(ctx context.Context, buildDescription *pb.BuildDes return nil, err } logger.Info("getGraph: treehash found") - treehashPath := common.GetGraphByTreeHash(buildDescription.GetRemote(), string(treehashBytes), requestOptions) + treehashPath := common.GetGraphByTreeHash(buildDescription.GetRemote(), string(treehashBytes), buildDescription.GetStrategy(), requestOptions) // Download the target graph based on treehash. storageStart := time.Now() graphReader, err := storage.NewGraphReader(ctx, c.storage, treehashPath) diff --git a/core/common/utils.go b/core/common/utils.go index 43020a8..4dd3b7a 100644 --- a/core/common/utils.go +++ b/core/common/utils.go @@ -53,17 +53,28 @@ func ToShortRemote(remote string) string { } // GetGraphByTreeHash returns the cache path for the target graph by treehash. +// strategy is part of the key because different computation strategies (e.g. +// SHELL vs NATIVE) can produce different graphs from the same tree state. // requestOptions is folded into the key when any of its fields affect computation // (today: extra_exclude_files_regex). Empty/nil ⇒ legacy path unchanged. -func GetGraphByTreeHash(remote, treehash string, requestOptions *tangopb.RequestOptions) string { - return filepath.Join("graph", ToShortRemote(remote), treehash) + HashRequestOptions(requestOptions) +func GetGraphByTreeHash(remote, treehash string, strategy tangopb.ComputationStrategy, requestOptions *tangopb.RequestOptions) string { + path := filepath.Join(ToShortRemote(remote), "graphs", treehash, strategy.String()) + if hash := HashRequestOptions(requestOptions); hash != "" { + path += "_requests-options-" + hash + } + return path } // GetTreehashCachePath returns the cache path for the treehash mapping. -// The git treehash is purely a function of git state, so requestOptions is not -// part of this key. +// The git treehash is purely a function of git state (base SHA + applied +// requests), so neither requestOptions nor the computation strategy is part +// of this key. func GetTreehashCachePath(buildDescription *tangopb.BuildDescription) string { - return filepath.Join("treehash", ToShortRemote(buildDescription.Remote), fmt.Sprintf("treehash-map-%s", buildDescription.BaseSha), GetReqsHash(buildDescription.Requests)) + "-" + buildDescription.Strategy.String() + path := filepath.Join(ToShortRemote(buildDescription.Remote), "treehashes", fmt.Sprintf("base-sha-%s", buildDescription.BaseSha)) + if len(buildDescription.Requests) > 0 { + path += "_request-urls-" + GetReqURLsHash(buildDescription.Requests) + } + return path } // GetComparedTargetsCachePath returns the cache path for a compared target graph result. @@ -72,7 +83,11 @@ func GetTreehashCachePath(buildDescription *tangopb.BuildDescription) string { // requestOptions is folded into the key when any of its fields affect computation. // Empty/nil ⇒ legacy path unchanged. func GetComparedTargetsCachePath(remote, treehash1, treehash2 string, requestOptions *tangopb.RequestOptions) string { - return filepath.Join("compared-targets", ToShortRemote(remote), treehash1, treehash2) + HashRequestOptions(requestOptions) + path := filepath.Join(ToShortRemote(remote), "compared-targets", treehash1+"_"+treehash2) + if hash := HashRequestOptions(requestOptions); hash != "" { + path += "_requests-options-" + hash + } + return path } // GetChangedTargetsAndEdgesCachePath returns the cache path for a GetChangedTargetsAndEdges result. @@ -81,13 +96,17 @@ func GetComparedTargetsCachePath(remote, treehash1, treehash2 string, requestOpt // requestOptions is folded into the key when any of its fields affect computation. // Empty/nil ⇒ legacy path unchanged. func GetChangedTargetsAndEdgesCachePath(remote, treehash1, treehash2 string, requestOptions *tangopb.RequestOptions) string { - return filepath.Join("compared-targets-and-edges", ToShortRemote(remote), treehash1, treehash2) + HashRequestOptions(requestOptions) + path := filepath.Join(ToShortRemote(remote), "compared-targets-and-edges", treehash1+"_"+treehash2) + if hash := HashRequestOptions(requestOptions); hash != "" { + path += "_requests-options-" + hash + } + return path } -// GetReqsHash returns a fixed-length MD5 hash of the sorted request URLs. +// GetReqURLsHash returns a fixed-length MD5 hash of the sorted request URLs. // Each URL's bytes are fed into the digest individually (no separator), matching // the Java MessageDigest.update(str.getBytes()) per-string behavior. -func GetReqsHash(requests []*tangopb.Request) string { +func GetReqURLsHash(requests []*tangopb.Request) string { if len(requests) == 0 { return "" } @@ -104,9 +123,9 @@ func GetReqsHash(requests []*tangopb.Request) string { } // HashRequestOptions returns "" when no field of RequestOptions contributes to -// the cache (preserves legacy paths), otherwise "-" deterministically -// computed from the fields. As new fields are added to RequestOptions, fold -// them into the digest here. +// the cache (preserves legacy paths), otherwise the md5 hex digest of those +// fields. As new fields are added to RequestOptions, fold them into the digest +// here. func HashRequestOptions(opts *tangopb.RequestOptions) string { if opts == nil { return "" @@ -122,7 +141,7 @@ func HashRequestOptions(opts *tangopb.RequestOptions) string { for _, r := range sorted { h.Write([]byte(r)) } - return fmt.Sprintf("-%x", h.Sum(nil)) + return fmt.Sprintf("%x", h.Sum(nil)) } // ResultToGetTargetGraphResponse converts a Result to a GetTargetGraphResponse diff --git a/core/common/utils_test.go b/core/common/utils_test.go index f79ac29..c61be13 100644 --- a/core/common/utils_test.go +++ b/core/common/utils_test.go @@ -61,20 +61,24 @@ func TestGetGraphByTreeHash(t *testing.T) { t.Parallel() remote := "git@github:uber/tango" treehash := "abcd1234" + strategy := pb.COMPUTATION_STRATEGY_NATIVE - // Nil/empty options ⇒ legacy path (regression: cache compatibility). - got := GetGraphByTreeHash(remote, treehash, nil) - assert.Equal(t, filepath.Join("graph", "uber/tango", treehash), got) - assert.Equal(t, got, GetGraphByTreeHash(remote, treehash, &pb.RequestOptions{})) + // Nil/empty options ⇒ no request-options suffix. + got := GetGraphByTreeHash(remote, treehash, strategy, nil) + assert.Equal(t, filepath.Join("uber/tango", "graphs", treehash, strategy.String()), got) + assert.Equal(t, got, GetGraphByTreeHash(remote, treehash, strategy, &pb.RequestOptions{})) + + // Different strategies ⇒ different keys. + assert.NotEqual(t, got, GetGraphByTreeHash(remote, treehash, pb.COMPUTATION_STRATEGY_SHELL, nil)) // Non-empty options ⇒ suffix appended; different lists ⇒ different keys. - withFoo := GetGraphByTreeHash(remote, treehash, &pb.RequestOptions{ExtraExcludeFilesRegex: []string{"foo.*"}}) + withFoo := GetGraphByTreeHash(remote, treehash, strategy, &pb.RequestOptions{ExtraExcludeFilesRegex: []string{"foo.*"}}) assert.NotEqual(t, got, withFoo) - assert.NotEqual(t, withFoo, GetGraphByTreeHash(remote, treehash, &pb.RequestOptions{ExtraExcludeFilesRegex: []string{"bar.*"}})) + assert.NotEqual(t, withFoo, GetGraphByTreeHash(remote, treehash, strategy, &pb.RequestOptions{ExtraExcludeFilesRegex: []string{"bar.*"}})) // Order-independence: sort before hashing. assert.Equal(t, - GetGraphByTreeHash(remote, treehash, &pb.RequestOptions{ExtraExcludeFilesRegex: []string{"a", "b"}}), - GetGraphByTreeHash(remote, treehash, &pb.RequestOptions{ExtraExcludeFilesRegex: []string{"b", "a"}}), + GetGraphByTreeHash(remote, treehash, strategy, &pb.RequestOptions{ExtraExcludeFilesRegex: []string{"a", "b"}}), + GetGraphByTreeHash(remote, treehash, strategy, &pb.RequestOptions{ExtraExcludeFilesRegex: []string{"b", "a"}}), ) } @@ -94,11 +98,11 @@ func TestGetTreehashCachePath(t *testing.T) { h := md5.New() h.Write([]byte("custom://foo/bar")) h.Write([]byte("github://org/repo/pull/1")) - want := filepath.Join("treehash", "uber/tango", "treehash-map-deadbeef", fmt.Sprintf("%x", h.Sum(nil))) + "-" + pb.COMPUTATION_STRATEGY_INVALID.String() + want := filepath.Join("uber/tango", "treehashes", "base-sha-deadbeef") + "_request-urls-" + fmt.Sprintf("%x", h.Sum(nil)) assert.Equal(t, want, got) } -func TestGetReqsHash(t *testing.T) { +func TestGetReqURLsHash(t *testing.T) { t.Parallel() md5hex := func(strs ...string) string { h := md5.New() @@ -136,7 +140,7 @@ func TestGetReqsHash(t *testing.T) { for _, tt := range tests { tt := tt t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.want, GetReqsHash(tt.in)) + assert.Equal(t, tt.want, GetReqURLsHash(tt.in)) }) } } @@ -144,7 +148,7 @@ func TestGetReqsHash(t *testing.T) { func TestGetComparedTargetsCachePath(t *testing.T) { t.Parallel() got := GetComparedTargetsCachePath("git@github:uber/tango", "abc", "def", nil) - assert.Equal(t, filepath.Join("compared-targets", "uber/tango", "abc", "def"), got) + assert.Equal(t, filepath.Join("uber/tango", "compared-targets", "abc_def"), got) // Nil/empty options ⇒ legacy path. assert.Equal(t, got, GetComparedTargetsCachePath("git@github:uber/tango", "abc", "def", &pb.RequestOptions{})) @@ -156,7 +160,7 @@ func TestGetComparedTargetsCachePath(t *testing.T) { func TestGetChangedTargetsAndEdgesCachePath(t *testing.T) { t.Parallel() got := GetChangedTargetsAndEdgesCachePath("git@github:uber/tango", "abc", "def", nil) - assert.Equal(t, filepath.Join("compared-targets-and-edges", "uber/tango", "abc", "def"), got) + assert.Equal(t, filepath.Join("uber/tango", "compared-targets-and-edges", "abc_def"), got) // Must be distinct from the GetChangedTargets cache path. assert.NotEqual(t, GetComparedTargetsCachePath("git@github:uber/tango", "abc", "def", nil), got) diff --git a/orchestrator/native_orchestrator.go b/orchestrator/native_orchestrator.go index b148a2b..784917e 100644 --- a/orchestrator/native_orchestrator.go +++ b/orchestrator/native_orchestrator.go @@ -123,7 +123,7 @@ func (b *nativeOrchestrator) GetTargetGraph(ctx context.Context, param GetTarget b.logger.Errorw("Treehash computation failed", zap.Any("request build description", param.Req.BuildDescription), zap.Error(err)) return nil, err } - treehashPath := common.GetGraphByTreeHash(param.Req.BuildDescription.Remote, treehash, param.Req.GetRequestOptions()) + treehashPath := common.GetGraphByTreeHash(param.Req.BuildDescription.Remote, treehash, param.Req.BuildDescription.GetStrategy(), param.Req.GetRequestOptions()) if !param.BypassCache { graphReader, err := storage.NewGraphReader(ctx, b.storage, treehashPath) if err == nil {