diff --git a/README.md b/README.md index 5e4c2ef..0b556ab 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ Every post-incident guide from CrowdStrike, Wiz, Snyk, and Microsoft tells you t - **Advisory database** — built-in + auto-updated database of known-compromised actions - **Standard BOM formats** — output as CycloneDX 1.5 or SPDX 2.3 for integration with Dependency-Track, Grype, and other tooling - **SHA verification** — optionally verify that pinned SHAs are actually reachable from the upstream repo, catching fork-sourced and force-pushed-away commits (`--verify-shas`) +- **Ref resolution** — optionally resolve tag and branch refs to the commit SHA they point to at BOM-generation time, turning a mutable-tag BOM into a stable evidentiary record (`--resolve-refs`) - **CI gate** — exits non-zero when compromised actions are found or (with `--fail-on-warnings`) when any advisory warning is emitted - **Fast** — caches resolved actions locally, uses `raw.githubusercontent.com` to avoid API rate limits @@ -138,6 +139,33 @@ abom scan . --verify-shas --fail-on-warnings --github-token $GITHUB_TOKEN **Rate limit caveat:** `--verify-shas` makes an extra API call per unique SHA. Anonymous requests are capped at 60/hour — set `--github-token` (or `GITHUB_TOKEN`) for a realistic 5000/hour budget. +## Resolving tag and branch refs + +Git tags are mutable. A workflow pinned to `actions/checkout@v4` today may resolve to a different commit next week if the maintainer re-points the tag. This means a BOM with tag refs is only a semi-reliable record of what actually ran. Branches (`main`, `master`) are even more mutable. + +For teams generating BOMs as audit or compliance evidence, `--resolve-refs` calls the GitHub commits API to look up the commit SHA each tag or branch currently points to and records it in `resolved_sha` alongside the original ref. The original pinning is preserved so contributor intent stays visible. + +```bash +abom scan . --resolve-refs --github-token $GITHUB_TOKEN +``` + +Output (JSON) gets a stable record of what was actually resolved at generation time: + +```json +{ + "uses": "actions/checkout@v4", + "ref": "v4", + "ref_type": "tag", + "resolved_sha": "34e114876b0b11c390a56381ad16ebd13914f8d5" +} +``` + +**Scope:** tag and branch refs only. SHA-pinned refs are already immutable and skipped. Docker and local actions are also skipped. + +**Interaction with `--verify-shas`:** orthogonal. `--verify-shas` walks SHA-pinned refs; `--resolve-refs` populates `resolved_sha` for tag and branch refs. Running both together populates both signals. + +**Rate limit caveat:** same as `--verify-shas` — one API call per unique tag or branch ref, so a token is effectively required for any workflow with more than a handful of tagged actions. + ## How detection works `abom` finds compromised dependencies through three layers that grep will never reach: @@ -179,6 +207,7 @@ Current advisories: | `--check` | | Flag known-compromised actions | `false` | | `--depth` | `-d` | Max recursion depth for transitive deps | `10` | | `--verify-shas` | | Verify pinned SHAs are reachable from upstream repo refs | `false` | +| `--resolve-refs` | | Resolve tag and branch refs to current commit SHAs | `false` | | `--fail-on-warnings` | | Exit `2` if any warnings were emitted | `false` | | `--github-token` | | GitHub token for API requests (also reads `GITHUB_TOKEN`) | | | `--no-network` | | Skip resolving transitive dependencies (local parsing only) | `false` | diff --git a/cmd/check.go b/cmd/check.go index 5b4b872..3a8427c 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -30,6 +30,9 @@ func runCheck(cmd *cobra.Command, args []string) error { if verifyShas && offline { return fmt.Errorf("--verify-shas requires network; remove --offline") } + if resolveRefs && offline { + return fmt.Errorf("--resolve-refs requires network; remove --offline") + } col := &warnings.Collector{} @@ -39,6 +42,12 @@ func runCheck(cmd *cobra.Command, args []string) error { Message: "--verify-shas running anonymously; 60 API calls/hour, set --github-token for realistic limits", }) } + if resolveRefs && githubToken == "" { + col.Emit(warnings.Warning{ + Category: warnings.CategoryRateLimit, + Message: "--resolve-refs running anonymously; 60 API calls/hour, set --github-token for realistic limits", + }) + } var r io.Reader @@ -70,6 +79,13 @@ func runCheck(cmd *cobra.Command, args []string) error { abom.CollectActions() + if resolveRefs { + if !quiet { + fmt.Fprintln(os.Stderr, "Resolving tag and branch refs to commit SHAs...") + } + resolver.ResolveABOMRefs(&abom, resolver.NewGitHubRefResolver(githubToken), col) + } + if verifyShas { if !quiet { fmt.Fprintln(os.Stderr, "Verifying pinned SHAs against upstream refs...") diff --git a/cmd/root.go b/cmd/root.go index a8e8197..1fd4018 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -14,6 +14,7 @@ var ( noCache bool offline bool verifyShas bool + resolveRefs bool failOnWarnings bool version = "dev" ) @@ -75,6 +76,7 @@ func init() { rootCmd.PersistentFlags().BoolVar(&noCache, "no-cache", false, "Force fresh advisory fetch, skip cache") rootCmd.PersistentFlags().BoolVar(&offline, "offline", false, "Skip advisory fetch, use built-in data only") rootCmd.PersistentFlags().BoolVar(&verifyShas, "verify-shas", false, "Verify SHA-pinned actions are reachable from upstream repo refs (requires --github-token for realistic rate limits; requires network)") + rootCmd.PersistentFlags().BoolVar(&resolveRefs, "resolve-refs", false, "Resolve tag and branch refs to the commit SHA they currently point to, stored alongside the original ref (requires --github-token for realistic rate limits; requires network)") rootCmd.PersistentFlags().BoolVar(&failOnWarnings, "fail-on-warnings", false, "Exit 2 if any warnings were emitted during the run") rootCmd.Version = version } diff --git a/cmd/scan.go b/cmd/scan.go index 8c64ca7..9b2ad71 100644 --- a/cmd/scan.go +++ b/cmd/scan.go @@ -50,6 +50,12 @@ func runScan(cmd *cobra.Command, args []string) error { if verifyShas && noNetwork { return fmt.Errorf("--verify-shas requires network; remove --no-network") } + if resolveRefs && offline { + return fmt.Errorf("--resolve-refs requires network; remove --offline") + } + if resolveRefs && noNetwork { + return fmt.Errorf("--resolve-refs requires network; remove --no-network") + } col := &warnings.Collector{} @@ -59,6 +65,12 @@ func runScan(cmd *cobra.Command, args []string) error { Message: "--verify-shas running anonymously; 60 API calls/hour, set --github-token for realistic limits", }) } + if resolveRefs && githubToken == "" { + col.Emit(warnings.Warning{ + Category: warnings.CategoryRateLimit, + Message: "--resolve-refs running anonymously; 60 API calls/hour, set --github-token for realistic limits", + }) + } if !quiet { fmt.Fprintf(os.Stderr, "Scanning %s...\n", target) @@ -145,6 +157,13 @@ func runScan(cmd *cobra.Command, args []string) error { abom.CollectActions() + if resolveRefs { + if !quiet { + fmt.Fprintln(os.Stderr, "Resolving tag and branch refs to commit SHAs...") + } + resolver.ResolveABOMRefs(abom, resolver.NewGitHubRefResolver(githubToken), col) + } + if verifyShas { if !quiet { fmt.Fprintln(os.Stderr, "Verifying pinned SHAs against upstream refs...") diff --git a/pkg/resolver/resolve_refs.go b/pkg/resolver/resolve_refs.go new file mode 100644 index 0000000..95966ec --- /dev/null +++ b/pkg/resolver/resolve_refs.go @@ -0,0 +1,159 @@ +package resolver + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + "github.com/julietsecurity/abom/pkg/model" + "github.com/julietsecurity/abom/pkg/warnings" +) + +// RefResolver resolves a tag or branch reference to the commit SHA it +// currently points to. +type RefResolver interface { + // ResolveRef returns the commit SHA for owner/repo@ref. Returns an error + // for network failures, 404, rate limiting, etc. + ResolveRef(owner, repo, ref string) (sha string, err error) +} + +// ErrResolveRateLimit signals that GitHub returned 403 or 429. Callers should +// stop issuing further resolve calls. +var ErrResolveRateLimit = fmt.Errorf("rate limited") + +// GitHubRefResolver resolves refs via the GitHub commits API. The commits +// endpoint accepts tags, branches, and SHAs, and returns the resolved commit +// object, so one call handles all ref types. +type GitHubRefResolver struct { + client *http.Client + token string +} + +func NewGitHubRefResolver(token string) *GitHubRefResolver { + return &GitHubRefResolver{ + client: &http.Client{Timeout: 30 * time.Second}, + token: token, + } +} + +func (r *GitHubRefResolver) ResolveRef(owner, repo, ref string) (string, error) { + url := fmt.Sprintf("https://api.github.com/repos/%s/%s/commits/%s", owner, repo, ref) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return "", err + } + req.Header.Set("Accept", "application/vnd.github.v3+json") + if r.token != "" { + req.Header.Set("Authorization", "token "+r.token) + } + + resp, err := r.client.Do(req) + if err != nil { + return "", fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + switch resp.StatusCode { + case http.StatusOK: + // parse body + case http.StatusNotFound, http.StatusUnprocessableEntity: + return "", fmt.Errorf("ref not found") + case http.StatusForbidden, http.StatusTooManyRequests: + return "", ErrResolveRateLimit + default: + return "", fmt.Errorf("unexpected status %d", resp.StatusCode) + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if err != nil { + return "", fmt.Errorf("reading response: %w", err) + } + + var payload struct { + SHA string `json:"sha"` + } + if err := json.Unmarshal(body, &payload); err != nil { + return "", fmt.Errorf("parsing commit response: %w", err) + } + if payload.SHA == "" { + return "", fmt.Errorf("no sha in commit response") + } + return payload.SHA, nil +} + +// ResolveABOMRefs iterates the deduplicated action list and resolves each +// tag- or branch-pinned reference to its current commit SHA. Stores the +// result in ActionRef.ResolvedSHA. +// +// Dedup is keyed on owner/repo@ref so subdirectory variants of the same +// action collapse into a single API call. +// +// Once the resolver observes a rate-limit response, subsequent resolutions +// are skipped for the remainder of the run. +func ResolveABOMRefs(abom *model.ABOM, r RefResolver, col *warnings.Collector) { + if abom == nil || r == nil || col == nil { + return + } + + type cacheKey struct { + owner, repo, ref string + } + cache := make(map[cacheKey]string) + var rateLimited bool + + for _, ref := range abom.Actions { + if ref.RefType != model.RefTypeTag && ref.RefType != model.RefTypeBranch { + continue + } + switch ref.ActionType { + case model.ActionTypeDocker, model.ActionTypeLocal: + continue + } + if ref.Owner == "" || ref.Repo == "" || ref.Ref == "" { + continue + } + + key := cacheKey{ref.Owner, ref.Repo, ref.Ref} + if sha, ok := cache[key]; ok { + ref.ResolvedSHA = sha + continue + } + + if rateLimited { + continue + } + + sha, err := r.ResolveRef(ref.Owner, ref.Repo, ref.Ref) + if err != nil { + if err == ErrResolveRateLimit { + rateLimited = true + col.Emit(warnings.Warning{ + Category: warnings.CategoryRateLimit, + Message: "GitHub rate limit hit during ref resolution; remaining refs skipped", + Err: err, + }) + continue + } + col.Emit(warnings.Warning{ + Category: warnings.CategoryRefResolve, + Subject: refResolveSubject(ref), + Message: "could not resolve ref to a commit SHA", + Err: err, + }) + continue + } + + cache[key] = sha + ref.ResolvedSHA = sha + } +} + +func refResolveSubject(ref *model.ActionRef) string { + if ref.Owner != "" && ref.Repo != "" && ref.Ref != "" { + return fmt.Sprintf("%s/%s@%s", ref.Owner, ref.Repo, ref.Ref) + } + return ref.Raw +} diff --git a/pkg/resolver/resolve_refs_test.go b/pkg/resolver/resolve_refs_test.go new file mode 100644 index 0000000..183d881 --- /dev/null +++ b/pkg/resolver/resolve_refs_test.go @@ -0,0 +1,267 @@ +package resolver + +import ( + "fmt" + "testing" + + "github.com/julietsecurity/abom/pkg/model" + "github.com/julietsecurity/abom/pkg/warnings" +) + +// mockRefResolver returns canned SHAs keyed on owner/repo@ref. +type mockRefResolver struct { + results map[string]string // owner/repo@ref -> sha or "" to signal 404 + calls map[string]int + err error // global error (transport / rate limit) +} + +func newMockRefResolver() *mockRefResolver { + return &mockRefResolver{ + results: make(map[string]string), + calls: make(map[string]int), + } +} + +func (m *mockRefResolver) ResolveRef(owner, repo, ref string) (string, error) { + key := fmt.Sprintf("%s/%s@%s", owner, repo, ref) + m.calls[key]++ + if m.err != nil { + return "", m.err + } + sha, ok := m.results[key] + if !ok { + return "", fmt.Errorf("ref not found") + } + return sha, nil +} + +const resolvedSHA1 = "abcdef1234567890abcdef1234567890abcdef12" +const resolvedSHA2 = "0123456789abcdef0123456789abcdef01234567" + +func newTagAction(owner, repo, path, tag string) *model.ActionRef { + return &model.ActionRef{ + Owner: owner, + Repo: repo, + Path: path, + Ref: tag, + RefType: model.RefTypeTag, + ActionType: model.ActionTypeStandard, + } +} + +func newBranchAction(owner, repo, branch string) *model.ActionRef { + return &model.ActionRef{ + Owner: owner, + Repo: repo, + Ref: branch, + RefType: model.RefTypeBranch, + ActionType: model.ActionTypeStandard, + } +} + +func TestResolveABOMRefs_Tag(t *testing.T) { + m := newMockRefResolver() + m.results["actions/checkout@v4"] = resolvedSHA1 + + ref := newTagAction("actions", "checkout", "", "v4") + abom := &model.ABOM{Actions: []*model.ActionRef{ref}} + + col := &warnings.Collector{} + ResolveABOMRefs(abom, m, col) + + if ref.ResolvedSHA != resolvedSHA1 { + t.Errorf("ResolvedSHA = %q, want %q", ref.ResolvedSHA, resolvedSHA1) + } + if col.Count() != 0 { + t.Errorf("expected 0 warnings, got %d", col.Count()) + } +} + +func TestResolveABOMRefs_Branch(t *testing.T) { + m := newMockRefResolver() + m.results["actions/checkout@main"] = resolvedSHA1 + + ref := newBranchAction("actions", "checkout", "main") + abom := &model.ABOM{Actions: []*model.ActionRef{ref}} + + col := &warnings.Collector{} + ResolveABOMRefs(abom, m, col) + + if ref.ResolvedSHA != resolvedSHA1 { + t.Errorf("ResolvedSHA = %q, want %q", ref.ResolvedSHA, resolvedSHA1) + } +} + +func TestResolveABOMRefs_SHASkipped(t *testing.T) { + m := newMockRefResolver() + + ref := &model.ActionRef{ + Owner: "actions", + Repo: "checkout", + Ref: resolvedSHA1, + RefType: model.RefTypeSHA, + ActionType: model.ActionTypeStandard, + } + abom := &model.ABOM{Actions: []*model.ActionRef{ref}} + + col := &warnings.Collector{} + ResolveABOMRefs(abom, m, col) + + if len(m.calls) != 0 { + t.Errorf("expected no API calls for SHA ref, got %d", len(m.calls)) + } + if ref.ResolvedSHA != "" { + t.Errorf("SHA ref should not have ResolvedSHA set, got %q", ref.ResolvedSHA) + } +} + +func TestResolveABOMRefs_DockerAndLocal_Skipped(t *testing.T) { + m := newMockRefResolver() + + docker := &model.ActionRef{ + Raw: "docker://alpine:3.18", + RefType: model.RefTypeTag, + ActionType: model.ActionTypeDocker, + } + local := &model.ActionRef{ + Raw: "./local-action", + RefType: model.RefTypeTag, + ActionType: model.ActionTypeLocal, + } + abom := &model.ABOM{Actions: []*model.ActionRef{docker, local}} + + col := &warnings.Collector{} + ResolveABOMRefs(abom, m, col) + + if len(m.calls) != 0 { + t.Errorf("expected no API calls for docker/local, got %d", len(m.calls)) + } +} + +func TestResolveABOMRefs_Dedup(t *testing.T) { + m := newMockRefResolver() + m.results["actions/checkout@v4"] = resolvedSHA1 + + a := newTagAction("actions", "checkout", "", "v4") + b := newTagAction("actions", "checkout", "sub", "v4") + b.ActionType = model.ActionTypeSubdirectory + + abom := &model.ABOM{Actions: []*model.ActionRef{a, b}} + col := &warnings.Collector{} + ResolveABOMRefs(abom, m, col) + + if m.calls["actions/checkout@v4"] != 1 { + t.Errorf("expected 1 API call, got %d", m.calls["actions/checkout@v4"]) + } + if a.ResolvedSHA != resolvedSHA1 || b.ResolvedSHA != resolvedSHA1 { + t.Errorf("both refs should share resolved SHA: a=%q b=%q", a.ResolvedSHA, b.ResolvedSHA) + } +} + +func TestResolveABOMRefs_RefNotFound_Warning(t *testing.T) { + m := newMockRefResolver() + // No entry for this ref, mockRefResolver returns "ref not found" error + + ref := newTagAction("actions", "checkout", "", "v999") + abom := &model.ABOM{Actions: []*model.ActionRef{ref}} + + col := &warnings.Collector{} + ResolveABOMRefs(abom, m, col) + + if col.Count() != 1 { + t.Fatalf("expected 1 warning, got %d", col.Count()) + } + w := col.All()[0] + if w.Category != warnings.CategoryRefResolve { + t.Errorf("expected RefResolve, got %s", w.Category) + } + if ref.ResolvedSHA != "" { + t.Errorf("failed resolve should leave ResolvedSHA empty, got %q", ref.ResolvedSHA) + } +} + +func TestResolveABOMRefs_TransportError_RefResolveCategory(t *testing.T) { + m := newMockRefResolver() + m.err = fmt.Errorf("network unreachable") + + ref := newTagAction("actions", "checkout", "", "v4") + abom := &model.ABOM{Actions: []*model.ActionRef{ref}} + + col := &warnings.Collector{} + ResolveABOMRefs(abom, m, col) + + if col.Count() != 1 { + t.Fatalf("expected 1 warning, got %d", col.Count()) + } + w := col.All()[0] + if w.Category != warnings.CategoryRefResolve { + t.Errorf("expected RefResolve for transport error, got %s", w.Category) + } +} + +func TestResolveABOMRefs_MidRunRateLimit(t *testing.T) { + m := newMockRefResolver() + m.results["a/b@v1"] = resolvedSHA1 + // c/d@v1 will hit rate limit via global err triggered partway through. + // Since we can't easily toggle global err mid-iteration, simulate by + // returning ErrResolveRateLimit from the second call onward. + + // Simpler approach: use a dedicated rate-limit resolver + var callCount int + rl := rateLimitOnCallN{ + results: m.results, + n: 2, // rate-limit on 2nd call + count: &callCount, + } + + abom := &model.ABOM{ + Actions: []*model.ActionRef{ + newTagAction("a", "b", "", "v1"), + newTagAction("c", "d", "", "v1"), + newTagAction("e", "f", "", "v1"), + }, + } + col := &warnings.Collector{} + ResolveABOMRefs(abom, rl, col) + + // Expect exactly one rate-limit warning for c/d, and e/f skipped. + if col.Count() != 1 { + t.Fatalf("expected 1 warning, got %d: %+v", col.Count(), col.All()) + } + if col.All()[0].Category != warnings.CategoryRateLimit { + t.Errorf("expected RateLimit, got %s", col.All()[0].Category) + } + if callCount != 2 { + t.Errorf("expected 2 calls before skip, got %d", callCount) + } + if abom.Actions[0].ResolvedSHA != resolvedSHA1 { + t.Errorf("first ref should have been resolved, got %q", abom.Actions[0].ResolvedSHA) + } + if abom.Actions[2].ResolvedSHA != "" { + t.Errorf("third ref should be skipped, got %q", abom.Actions[2].ResolvedSHA) + } +} + +type rateLimitOnCallN struct { + results map[string]string + n int + count *int +} + +func (r rateLimitOnCallN) ResolveRef(owner, repo, ref string) (string, error) { + *r.count++ + if *r.count == r.n { + return "", ErrResolveRateLimit + } + key := fmt.Sprintf("%s/%s@%s", owner, repo, ref) + if sha, ok := r.results[key]; ok { + return sha, nil + } + return resolvedSHA2, nil +} + +func TestResolveABOMRefs_NilCollector_NoPanic(t *testing.T) { + m := newMockRefResolver() + abom := &model.ABOM{Actions: []*model.ActionRef{newTagAction("a", "b", "", "v1")}} + ResolveABOMRefs(abom, m, nil) +} diff --git a/pkg/warnings/warnings.go b/pkg/warnings/warnings.go index 27eb354..6c917f7 100644 --- a/pkg/warnings/warnings.go +++ b/pkg/warnings/warnings.go @@ -16,6 +16,7 @@ type Category string const ( CategorySHAUnreachable Category = "sha-unreachable" CategoryRateLimit Category = "rate-limit" + CategoryRefResolve Category = "ref-resolve" ) type Warning struct {