From 336dd0e37c3508a738eebdc05a1b22f601a48807 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 17:45:42 +0200 Subject: [PATCH 01/23] docs: add archive explode implementation plan Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- .../2026-06-29-explode-archive-materials.md | 1105 +++++++++++++++++ 1 file changed, 1105 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-29-explode-archive-materials.md diff --git a/docs/superpowers/plans/2026-06-29-explode-archive-materials.md b/docs/superpowers/plans/2026-06-29-explode-archive-materials.md new file mode 100644 index 000000000..12449e749 --- /dev/null +++ b/docs/superpowers/plans/2026-06-29-explode-archive-materials.md @@ -0,0 +1,1105 @@ +# Explode Archive Materials in `chainloop att add` Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Let `chainloop attestation add --kind --value ` unpack a `.zip`/`.tar`/`.tar.gz`/`.tgz` and record every contained regular file as its own material of kind ``, atomically. + +**Architecture:** A new stdlib-only archive helper in the `materials` package detects archives and walks their entries with resource guards. The `Crafter` gains an atomic batch method that crafts every entry into the in-memory crafting state and persists **once**. `AttestationAdd.Run` routes to that method when `--kind` is set, the value is an archive, and the kind is not "archive-native" (e.g. `ZAP_DAST_ZIP`); all other cases keep the existing single-material path unchanged. + +**Tech Stack:** Go 1.26.4, `archive/zip`, `archive/tar`, `compress/gzip` (stdlib only), protovalidate, testify suites. + +## Global Constraints + +- **Out of scope (explicit):** per-entry **auto-detection** when `--kind` is omitted. The explode path triggers **only** when `--kind` is explicitly provided. With no `--kind`, behavior is unchanged (the whole archive goes through the existing single-material path). +- **Go version:** 1.26.4. Stdlib only for archives — no new dependency. Formats: `.zip`, `.tar`, `.tar.gz`, `.tgz` (no bzip2/xz in this iteration). +- **Material names are DNS-1123:** must match `^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` (validated by protovalidate in `materials.Craft`). Derived names MUST be sanitized; raw basenames like `scan.json` are invalid as-is. +- **`--name` on explode path:** used as a **prefix** — derived name is `-`; when `--name` is empty, the bare sanitized basename is used. +- **Annotations:** every `--annotation key=value` applies to **every** extracted material. +- **Atomicity:** all qualifying entries are added or none. State is persisted exactly once, after all entries craft successfully. +- **Guards (defaults):** max entries `10000`, max total uncompressed size `1 GiB` (`1 << 30`), enforced by streaming (do not trust declared sizes). +- **Recursion:** nested archives are NOT recursed — treated as regular files. +- **License header:** every modified/created file's header ends with the current year `2026` (range if it already had an earlier start year, e.g. `2024-2026`). +- **Commits:** signed (`-S`), DCO (`-s`), Conventional Commits, and an `Assisted-by: Claude Code` trailer on every commit. No co-authored-by lines. +- **Quality gates:** `gofmt`, `golangci-lint`, table-driven tests, TDD (failing test first). +- **Module path:** `github.com/chainloop-dev/chainloop`. + +--- + +## File Structure + +- `pkg/attestation/crafter/materials/archive.go` — **NEW.** Archive detection, entry walking with guards, name sanitization/allocation, archive-native kind allowlist. +- `pkg/attestation/crafter/materials/archive_test.go` — **NEW.** Tests for the above. +- `pkg/attestation/crafter/materials/testdata/` — **NEW fixtures** generated by a test helper (no binary blobs checked in by hand). +- `pkg/attestation/crafter/crafter.go` — **MODIFY.** Extract `stageMaterial` from `addMaterial`; add atomic `AddMaterialsFromArchive`. +- `pkg/attestation/crafter/crafter_test.go` — **MODIFY.** Tests for `AddMaterialsFromArchive`. +- `app/cli/pkg/action/attestation_add.go` — **MODIFY.** Route to the explode path; `Run` returns `[]*AttestationStatusMaterial`; carry guard limits. +- `app/cli/cmd/attestation_add.go` — **MODIFY.** Add guard flags; render N materials. + +--- + +## Task 1: Archive detection + +**Files:** +- Create: `pkg/attestation/crafter/materials/archive.go` +- Test: `pkg/attestation/crafter/materials/archive_test.go` + +**Interfaces:** +- Produces: + - `type ArchiveFormat int` with `ArchiveNone, ArchiveZip, ArchiveTar, ArchiveTarGz` + - `func DetectArchive(path string) (ArchiveFormat, error)` — extension-first, magic-byte backstop; returns `ArchiveNone` (nil error) for non-archives. + +- [ ] **Step 1: Write the failing test** + +Create `pkg/attestation/crafter/materials/archive_test.go` (standard `2026` license header; package `materials`): + +```go +package materials + +import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeZip creates a zip at dir/name containing the given files (name->content). +func writeZip(t *testing.T, dir, name string, files map[string]string) string { + t.Helper() + p := filepath.Join(dir, name) + f, err := os.Create(p) + require.NoError(t, err) + defer f.Close() + zw := zip.NewWriter(f) + for n, c := range files { + w, err := zw.Create(n) + require.NoError(t, err) + _, err = w.Write([]byte(c)) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) + return p +} + +// writeTarGz creates a .tar.gz at dir/name containing the given regular files. +func writeTarGz(t *testing.T, dir, name string, files map[string]string) string { + t.Helper() + p := filepath.Join(dir, name) + f, err := os.Create(p) + require.NoError(t, err) + defer f.Close() + gw := gzip.NewWriter(f) + tw := tar.NewWriter(gw) + for n, c := range files { + require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg})) + _, err = tw.Write([]byte(c)) + require.NoError(t, err) + } + require.NoError(t, tw.Close()) + require.NoError(t, gw.Close()) + return p +} + +func TestDetectArchive(t *testing.T) { + dir := t.TempDir() + zipPath := writeZip(t, dir, "a.zip", map[string]string{"x.txt": "hi"}) + tgzPath := writeTarGz(t, dir, "a.tar.gz", map[string]string{"x.txt": "hi"}) + + plain := filepath.Join(dir, "app.bin") + require.NoError(t, os.WriteFile(plain, []byte("not an archive"), 0o600)) + + // A .zip renamed without extension — magic bytes must still detect it. + noExt := filepath.Join(dir, "noext") + require.NoError(t, os.WriteFile(noExt, mustRead(t, zipPath), 0o600)) + + tests := []struct { + name string + path string + want ArchiveFormat + }{ + {"zip by extension", zipPath, ArchiveZip}, + {"tar.gz by extension", tgzPath, ArchiveTarGz}, + {"plain file", plain, ArchiveNone}, + {"zip without extension via magic", noExt, ArchiveZip}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := DetectArchive(tc.path) + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func mustRead(t *testing.T, p string) []byte { + t.Helper() + b, err := os.ReadFile(p) + require.NoError(t, err) + return b +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./pkg/attestation/crafter/materials/ -run TestDetectArchive -v` +Expected: FAIL — `undefined: DetectArchive` / `ArchiveZip`. + +- [ ] **Step 3: Write minimal implementation** + +Create `pkg/attestation/crafter/materials/archive.go` (license header `Copyright 2026 The Chainloop Authors.`): + +```go +package materials + +import ( + "bytes" + "fmt" + "os" + "strings" +) + +// ArchiveFormat identifies a supported archive container. +type ArchiveFormat int + +const ( + ArchiveNone ArchiveFormat = iota + ArchiveZip + ArchiveTar + ArchiveTarGz +) + +// DetectArchive reports whether path is a supported archive and, if so, its +// format. Detection is by extension first; for files whose extension does not +// match, magic bytes are used as a backstop so renamed archives are still +// caught. A non-archive returns (ArchiveNone, nil). +func DetectArchive(path string) (ArchiveFormat, error) { + lower := strings.ToLower(path) + switch { + case strings.HasSuffix(lower, ".zip"): + return ArchiveZip, nil + case strings.HasSuffix(lower, ".tar.gz"), strings.HasSuffix(lower, ".tgz"): + return ArchiveTarGz, nil + case strings.HasSuffix(lower, ".tar"): + return ArchiveTar, nil + } + + return detectByMagic(path) +} + +func detectByMagic(path string) (ArchiveFormat, error) { + f, err := os.Open(path) + if err != nil { + return ArchiveNone, fmt.Errorf("opening %q: %w", path, err) + } + defer f.Close() + + // 512 bytes is enough for the gzip/zip magic and the tar "ustar" marker at + // offset 257. + header := make([]byte, 512) + n, _ := f.Read(header) + header = header[:n] + + switch { + case bytes.HasPrefix(header, []byte("PK\x03\x04")), bytes.HasPrefix(header, []byte("PK\x05\x06")): + return ArchiveZip, nil + case bytes.HasPrefix(header, []byte{0x1f, 0x8b}): + return ArchiveTarGz, nil + case len(header) >= 262 && bytes.Equal(header[257:262], []byte("ustar")): + return ArchiveTar, nil + } + + return ArchiveNone, nil +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `go test ./pkg/attestation/crafter/materials/ -run TestDetectArchive -v` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +gofmt -w pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go +git add pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go +git commit -S -s -m "feat(cli): detect archive materials for att add explode path + +Assisted-by: Claude Code" +``` + +--- + +## Task 2: Archive entry walking with guards + +**Files:** +- Modify: `pkg/attestation/crafter/materials/archive.go` +- Test: `pkg/attestation/crafter/materials/archive_test.go` + +**Interfaces:** +- Consumes: `ArchiveFormat`, `DetectArchive` (Task 1). +- Produces: + - `type ArchiveLimits struct { MaxEntries int; MaxTotalSize int64 }` + - `func DefaultArchiveLimits() ArchiveLimits` → `{10000, 1 << 30}` + - `var ErrArchiveTooLarge`, `var ErrTooManyEntries` + - `func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error` + - Calls `yield(innerPath, reader)` for each **regular** file. `innerPath` is the entry's path inside the archive (caller derives the basename for naming). + - Skips directories, symlinks, hardlinks, and zero-length entries. + - Rejects entries whose cleaned path escapes the archive root (zip-slip guard). + - Enforces `MaxEntries` (count of yielded entries) and `MaxTotalSize` (running uncompressed bytes streamed), returning the sentinel errors when exceeded. The byte cap is enforced **while streaming**, not from declared sizes. + - A `yield` error aborts the walk and is returned wrapped with the entry name. + +- [ ] **Step 1: Write the failing test** + +Append to `archive_test.go`: + +```go +func TestWalkArchiveEntries(t *testing.T) { + dir := t.TempDir() + + t.Run("yields regular files, skips dirs", func(t *testing.T) { + // Build a zip with a directory entry + two files. + p := filepath.Join(dir, "files.zip") + f, err := os.Create(p) + require.NoError(t, err) + zw := zip.NewWriter(f) + _, err = zw.Create("nested/") // directory entry + require.NoError(t, err) + for _, n := range []string{"a.json", "nested/b.json"} { + w, err := zw.Create(n) + require.NoError(t, err) + _, err = w.Write([]byte("{}")) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) + require.NoError(t, f.Close()) + + var got []string + err = WalkArchiveEntries(p, ArchiveZip, DefaultArchiveLimits(), func(name string, r io.Reader) error { + b, _ := io.ReadAll(r) + assert.Equal(t, "{}", string(b)) + got = append(got, name) + return nil + }) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"a.json", "nested/b.json"}, got) + }) + + t.Run("max entries exceeded", func(t *testing.T) { + p := writeTarGz(t, dir, "many.tar.gz", map[string]string{"a": "1", "b": "2", "c": "3"}) + err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 2, MaxTotalSize: 1 << 30}, func(string, io.Reader) error { return nil }) + require.ErrorIs(t, err, ErrTooManyEntries) + }) + + t.Run("max total size exceeded while streaming", func(t *testing.T) { + p := writeTarGz(t, dir, "big.tar.gz", map[string]string{"a": strings.Repeat("x", 1000)}) + err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 100, MaxTotalSize: 100}, func(_ string, r io.Reader) error { + _, err := io.ReadAll(r) + return err + }) + require.ErrorIs(t, err, ErrArchiveTooLarge) + }) +} +``` + +Add `"io"` and `"strings"` to the test imports. + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./pkg/attestation/crafter/materials/ -run TestWalkArchiveEntries -v` +Expected: FAIL — `undefined: WalkArchiveEntries` / `ErrTooManyEntries` / `ErrArchiveTooLarge`. + +- [ ] **Step 3: Write minimal implementation** + +Append to `archive.go` (add imports `archive/tar`, `archive/zip`, `compress/gzip`, `errors`, `io`, `path`): + +```go +var ( + // ErrTooManyEntries is returned when an archive has more qualifying entries + // than the configured maximum. + ErrTooManyEntries = errors.New("archive exceeds the maximum number of entries") + // ErrArchiveTooLarge is returned when the running uncompressed size of an + // archive exceeds the configured maximum. + ErrArchiveTooLarge = errors.New("archive exceeds the maximum uncompressed size") +) + +// ArchiveLimits bounds archive expansion to guard against zip bombs. +type ArchiveLimits struct { + MaxEntries int + MaxTotalSize int64 +} + +// DefaultArchiveLimits returns the safe defaults: 10000 entries and 1 GiB +// total uncompressed size. +func DefaultArchiveLimits() ArchiveLimits { + return ArchiveLimits{MaxEntries: 10000, MaxTotalSize: 1 << 30} +} + +// capReader wraps a reader and fails once the shared running total exceeds max, +// so we never trust an archive's declared sizes. +type capReader struct { + r io.Reader + total *int64 + max int64 +} + +func (c *capReader) Read(p []byte) (int, error) { + n, err := c.r.Read(p) + *c.total += int64(n) + if *c.total > c.max { + return n, ErrArchiveTooLarge + } + return n, err +} + +// WalkArchiveEntries calls yield for every regular file in the archive, +// enforcing the limits and skipping directories, symlinks, hardlinks, empty +// entries, and path-traversal entries. +func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error { + var total int64 + count := 0 + visit := func(name string, size int64, r io.Reader) error { + if !safeArchivePath(name) { + return fmt.Errorf("unsafe entry path %q in archive", name) + } + count++ + if count > limits.MaxEntries { + return ErrTooManyEntries + } + if err := yield(name, &capReader{r: r, total: &total, max: limits.MaxTotalSize}); err != nil { + return fmt.Errorf("processing entry %q: %w", name, err) + } + return nil + } + + switch format { + case ArchiveZip: + return walkZip(path, visit) + case ArchiveTar: + return walkTar(path, false, visit) + case ArchiveTarGz: + return walkTar(path, true, visit) + default: + return fmt.Errorf("unsupported archive format") + } +} + +// safeArchivePath rejects absolute paths and any path that escapes the +// extraction root via "..". +func safeArchivePath(name string) bool { + clean := path.Clean("/" + strings.ReplaceAll(name, "\\", "/")) + return !strings.Contains(clean, "/../") && clean != "/.." +} + +func walkZip(p string, visit func(name string, size int64, r io.Reader) error) error { + zr, err := zip.OpenReader(p) + if err != nil { + return fmt.Errorf("opening zip: %w", err) + } + defer zr.Close() + + for _, f := range zr.File { + if f.FileInfo().IsDir() || f.Mode()&os.ModeSymlink != 0 || f.UncompressedSize64 == 0 { + continue + } + rc, err := f.Open() + if err != nil { + return fmt.Errorf("opening entry %q: %w", f.Name, err) + } + err = visit(f.Name, int64(f.UncompressedSize64), rc) + rc.Close() + if err != nil { + return err + } + } + return nil +} + +func walkTar(p string, gzipped bool, visit func(name string, size int64, r io.Reader) error) error { + f, err := os.Open(p) + if err != nil { + return fmt.Errorf("opening tar: %w", err) + } + defer f.Close() + + var src io.Reader = f + if gzipped { + gz, err := gzip.NewReader(f) + if err != nil { + return fmt.Errorf("opening gzip: %w", err) + } + defer gz.Close() + src = gz + } + + tr := tar.NewReader(src) + for { + hdr, err := tr.Next() + if errors.Is(err, io.EOF) { + return nil + } + if err != nil { + return fmt.Errorf("reading tar: %w", err) + } + if hdr.Typeflag != tar.TypeReg || hdr.Size == 0 { + continue + } + if err := visit(hdr.Name, hdr.Size, tr); err != nil { + return err + } + } +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `go test ./pkg/attestation/crafter/materials/ -run TestWalkArchiveEntries -v` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +gofmt -w pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go +git add pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go +git commit -S -s -m "feat(cli): walk archive entries with zip-bomb and traversal guards + +Assisted-by: Claude Code" +``` + +--- + +## Task 3: Archive-native kind allowlist + name derivation + +**Files:** +- Modify: `pkg/attestation/crafter/materials/archive.go` +- Test: `pkg/attestation/crafter/materials/archive_test.go` + +**Interfaces:** +- Produces: + - `func IsArchiveNativeKind(kind string) bool` — true for kinds whose value is meant to be the archive itself; initial set: `ZAP_DAST_ZIP`. + - `func SanitizeMaterialName(s string) string` — lower-cases and collapses runs outside `[a-z0-9]` to a single `-`, trims leading/trailing `-`; falls back to `"material"` if empty. Result matches DNS-1123. + - `type NameAllocator struct{ ... }`, `func NewNameAllocator(existing []string) *NameAllocator`, `func (a *NameAllocator) Allocate(prefix, base string) string` — returns a unique DNS-1123 name. First use of a name is clean; collisions get `-1`, `-2`, …. When `prefix != ""` the name is `-`, else ``. + +- [ ] **Step 1: Write the failing test** + +Append to `archive_test.go`: + +```go +func TestSanitizeMaterialName(t *testing.T) { + tests := []struct{ in, want string }{ + {"scan.json", "scan-json"}, + {"results.XML", "results-xml"}, + {"weird__name!!", "weird-name"}, + {"___", "material"}, + } + for _, tc := range tests { + assert.Equal(t, tc.want, SanitizeMaterialName(tc.in)) + } +} + +func TestNameAllocator(t *testing.T) { + a := NewNameAllocator([]string{"existing"}) + + assert.Equal(t, "scan-json", a.Allocate("", "scan.json")) + assert.Equal(t, "scan-json-1", a.Allocate("", "scan.json")) // collision + assert.Equal(t, "results-xml", a.Allocate("", "results.xml")) + assert.Equal(t, "existing-1", a.Allocate("", "existing")) // collides with pre-existing + assert.Equal(t, "sboms-a-json", a.Allocate("sboms", "a.json")) // prefix +} + +func TestIsArchiveNativeKind(t *testing.T) { + assert.True(t, IsArchiveNativeKind("ZAP_DAST_ZIP")) + assert.False(t, IsArchiveNativeKind("SBOM_CYCLONEDX_JSON")) + assert.False(t, IsArchiveNativeKind("ARTIFACT")) +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./pkg/attestation/crafter/materials/ -run 'TestSanitizeMaterialName|TestNameAllocator|TestIsArchiveNativeKind' -v` +Expected: FAIL — undefined symbols. + +- [ ] **Step 3: Write minimal implementation** + +Append to `archive.go` (add `schemaapi "github.com/chainloop-dev/chainloop/app/controlplane/api/workflowcontract/v1"` to imports): + +```go +// archiveNativeKinds lists material kinds whose value is the archive itself. +// For these, --kind short-circuits the explode path and the archive is +// recorded whole. Extend this set as new "the archive is the material" kinds +// are added. +var archiveNativeKinds = map[string]struct{}{ + schemaapi.CraftingSchema_Material_ZAP_DAST_ZIP.String(): {}, +} + +// IsArchiveNativeKind reports whether kind treats the archive as a single +// material (recorded whole) rather than something to explode. +func IsArchiveNativeKind(kind string) bool { + _, ok := archiveNativeKinds[kind] + return ok +} + +// SanitizeMaterialName converts s into a valid DNS-1123 material name: +// lowercase, with every run of characters outside [a-z0-9] collapsed to a +// single "-" and leading/trailing "-" trimmed. Falls back to "material". +func SanitizeMaterialName(s string) string { + var b strings.Builder + pendingHyphen := false + for _, r := range strings.ToLower(s) { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') { + if pendingHyphen && b.Len() > 0 { + b.WriteByte('-') + } + b.WriteRune(r) + pendingHyphen = false + } else { + pendingHyphen = true + } + } + if b.Len() == 0 { + return "material" + } + return b.String() +} + +// NameAllocator hands out unique DNS-1123 material names, suffixing collisions +// with -1, -2, …. It is seeded with names already present in the attestation +// so derived names never overwrite existing materials. +type NameAllocator struct { + used map[string]struct{} +} + +// NewNameAllocator seeds the allocator with existing material names. +func NewNameAllocator(existing []string) *NameAllocator { + used := make(map[string]struct{}, len(existing)) + for _, e := range existing { + used[e] = struct{}{} + } + return &NameAllocator{used: used} +} + +// Allocate returns a unique name derived from base (and optional prefix). +func (a *NameAllocator) Allocate(prefix, base string) string { + name := SanitizeMaterialName(base) + if prefix != "" { + name = SanitizeMaterialName(prefix) + "-" + name + } + + candidate := name + for i := 1; ; i++ { + if _, taken := a.used[candidate]; !taken { + a.used[candidate] = struct{}{} + return candidate + } + candidate = fmt.Sprintf("%s-%d", name, i) + } +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `go test ./pkg/attestation/crafter/materials/ -run 'TestSanitizeMaterialName|TestNameAllocator|TestIsArchiveNativeKind' -v` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +gofmt -w pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go +git add pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go +git commit -S -s -m "feat(cli): add archive-native allowlist and entry name allocation + +Assisted-by: Claude Code" +``` + +--- + +## Task 4: Extract `stageMaterial` from `addMaterial` (no behavior change) + +This is a pure refactor that makes a single atomic commit possible: split the +"craft + validate + evaluate policies + attach to in-memory state" work from the +final persistence write. + +**Files:** +- Modify: `pkg/attestation/crafter/crafter.go:684-794` + +**Interfaces:** +- Produces: + - `func (c *Crafter) stageMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error)` — everything `addMaterial` did **except** the final `c.stateManager.Write`. Mutates `c.CraftingState` in memory. + - `addMaterial` keeps its signature/behavior; now `stageMaterial` + a single `Write`. + +- [ ] **Step 1: Refactor — split the method** + +Replace the body of `addMaterial` (lines 684-794) so the crafting work moves into `stageMaterial`. `stageMaterial` is the current body up to and including step "5 - Attach it to state" (the `c.CraftingState.Attestation.Materials[m.Name] = mt` line). `addMaterial` becomes: + +```go +// addMaterial crafts a single material and persists the crafting state. +func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { + mt, err := c.stageMaterial(ctx, m, value, casBackend, runtimeAnnotations, opts...) + if err != nil { + return nil, err + } + + if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { + return nil, fmt.Errorf("failed to persist crafting state: %w", err) + } + + c.Logger.Debug().Str("key", m.Name).Msg("added to state") + return mt, nil +} +``` + +And `stageMaterial` holds the moved code (note: it no longer takes `attestationID`, since only the `Write` used it): + +```go +// stageMaterial crafts a material into the in-memory crafting state WITHOUT +// persisting it. Callers must call stateManager.Write to commit. Splitting the +// write out lets the archive explode path craft many entries and commit once. +func (c *Crafter) stageMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { + // ... moved body: addOpts handling, materials.Craft, annotations merge, + // annotation completeness check, protovalidate, policy-evaluation dedup + + // VerifyMaterial (group and regular), attach to c.CraftingState.Attestation.Materials. + // Ends by returning mt, nil — NO stateManager.Write here. +} +``` + +- [ ] **Step 2: Run existing crafter tests to verify no behavior change** + +Run: `go test ./pkg/attestation/crafter/... 2>&1 | tail -20` +Expected: PASS (same set of tests that passed before the refactor). If integration-gated, use `SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/...`. + +- [ ] **Step 3: Commit** + +```bash +gofmt -w pkg/attestation/crafter/crafter.go +git add pkg/attestation/crafter/crafter.go +git commit -S -s -m "refactor(crafter): split material crafting from state persistence + +Assisted-by: Claude Code" +``` + +--- + +## Task 5: Atomic `AddMaterialsFromArchive` on the crafter + +**Files:** +- Modify: `pkg/attestation/crafter/crafter.go` +- Test: `pkg/attestation/crafter/crafter_test.go` + +**Interfaces:** +- Consumes: `materials.DetectArchive`, `materials.WalkArchiveEntries`, `materials.NewNameAllocator`, `materials.ArchiveLimits` (Tasks 1-3); `c.stageMaterial` (Task 4). +- Produces: + - `func (c *Crafter) AddMaterialsFromArchive(ctx context.Context, attestationID, kind, namePrefix, archivePath string, format materials.ArchiveFormat, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, limits materials.ArchiveLimits, opts ...AddOpt) ([]*api.Attestation_Material, error)` + - Validates `kind` against `schemaapi.CraftingSchema_Material_MaterialType_value` (same as `AddMaterialContractFree`). + - Seeds a `NameAllocator` with existing `c.CraftingState.Attestation.Materials` keys. + - For each entry: copy to a temp file named with the entry basename, build a `schemaapi.CraftingSchema_Material{Optional: true, Type: kind, Name: }`, call `c.stageMaterial`. Accumulate results. + - Errors abort before any `Write` (state on disk unchanged); the error names the archive and inner entry. + - Errors if **zero** qualifying entries were added. + - After all succeed: a single `c.stateManager.Write`. + +- [ ] **Step 1: Write the failing test** + +Append to `crafter_test.go` a test in the existing suite style. Use a real `ARTIFACT` kind so no external services are needed (inline CAS backend, dry run). Build the zip with the test helper from Task 1 (or inline). Sketch: + +```go +func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { + t := s.T() + runner := crafter.NewGithubAction(s.dryRun, s.Logger) + c, err := newInitializedCrafter(t, "testdata/contracts/empty.cue", &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(t, err) + + dir := t.TempDir() + zipPath := filepath.Join(dir, "bundle.zip") + zf, err := os.Create(zipPath) + require.NoError(t, err) + zw := zip.NewWriter(zf) + for _, n := range []string{"a.txt", "b.txt"} { + w, _ := zw.Create(n) + _, _ = w.Write([]byte("hello " + n)) + } + require.NoError(t, zw.Close()) + require.NoError(t, zf.Close()) + + backend := &casclient.CASBackend{Name: "not-set"} + got, err := c.AddMaterialsFromArchive(context.Background(), "", "ARTIFACT", "", zipPath, materials.ArchiveZip, backend, nil, materials.DefaultArchiveLimits()) + require.NoError(t, err) + require.Len(t, got, 2) + + st := c.CraftingState.GetAttestation().GetMaterials() + assert.Contains(t, st, "a-txt") + assert.Contains(t, st, "b-txt") + + // Atomic failure: too-tight limit adds nothing. + c2, err := newInitializedCrafter(t, "testdata/contracts/empty.cue", &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(t, err) + _, err = c2.AddMaterialsFromArchive(context.Background(), "", "ARTIFACT", "", zipPath, materials.ArchiveZip, backend, nil, materials.ArchiveLimits{MaxEntries: 1, MaxTotalSize: 1 << 30}) + require.ErrorIs(t, err, materials.ErrTooManyEntries) + assert.Empty(t, c2.CraftingState.GetAttestation().GetMaterials()) +} +``` + +Confirm the argument order matches the signature in **Interfaces** (note `attestationID` is the first non-ctx arg). Adjust the contract fixture path to one that already exists in `crafter_test.go` (look for the constant the other suite tests pass to `newInitializedCrafter`). + +- [ ] **Step 2: Run test to verify it fails** + +Run: `SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/ -run 'TestSuite/TestAddMaterialsFromArchiveAtomic' -v` +Expected: FAIL — `c.AddMaterialsFromArchive undefined`. + +- [ ] **Step 3: Write minimal implementation** + +Add to `crafter.go` (imports: `io`, `os`, `path/filepath`, and the `materials` package is already imported): + +```go +// AddMaterialsFromArchive explodes an archive and records each contained +// regular file as its own material of the given kind, atomically: either all +// qualifying entries are added and the state is persisted once, or nothing is +// written. Entry names are derived from their basenames (DNS-1123 sanitized, +// collision-suffixed); when namePrefix is set, names are "-". +func (c *Crafter) AddMaterialsFromArchive(ctx context.Context, attestationID, kind, namePrefix, archivePath string, format materials.ArchiveFormat, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, limits materials.ArchiveLimits, opts ...AddOpt) ([]*api.Attestation_Material, error) { + if err := c.requireStateLoaded(); err != nil { + return nil, fmt.Errorf("adding materials from archive: %w", err) + } + + if _, found := schemaapi.CraftingSchema_Material_MaterialType_value[kind]; !found { + return nil, fmt.Errorf("%q kind not found. Available options are %q", kind, schemaapi.ListAvailableMaterialKind()) + } + kindType := schemaapi.CraftingSchema_Material_MaterialType(schemaapi.CraftingSchema_Material_MaterialType_value[kind]) + + existing := make([]string, 0, len(c.CraftingState.GetAttestation().GetMaterials())) + for name := range c.CraftingState.GetAttestation().GetMaterials() { + existing = append(existing, name) + } + allocator := materials.NewNameAllocator(existing) + + tmpDir, err := os.MkdirTemp("", "chainloop-explode-*") + if err != nil { + return nil, fmt.Errorf("creating temp dir: %w", err) + } + defer os.RemoveAll(tmpDir) + + var added []*api.Attestation_Material + err = materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error { + entryPath := filepath.Join(tmpDir, filepath.Base(name)) + out, err := os.Create(entryPath) + if err != nil { + return fmt.Errorf("staging entry to disk: %w", err) + } + if _, err := io.Copy(out, r); err != nil { + out.Close() + return err // may be materials.ErrArchiveTooLarge + } + out.Close() + + m := &schemaapi.CraftingSchema_Material{ + Optional: true, + Type: kindType, + Name: allocator.Allocate(namePrefix, filepath.Base(name)), + } + mt, err := c.stageMaterial(ctx, m, entryPath, casBackend, runtimeAnnotations, opts...) + if err != nil { + return err + } + added = append(added, mt) + + // Remove the per-entry temp file as we go to bound disk usage. + _ = os.Remove(entryPath) + return nil + }) + if err != nil { + return nil, fmt.Errorf("exploding archive %q: %w", archivePath, err) + } + + if len(added) == 0 { + return nil, fmt.Errorf("archive %q contained no addable files", archivePath) + } + + if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { + return nil, fmt.Errorf("failed to persist crafting state: %w", err) + } + + c.Logger.Info().Int("materials", len(added)).Str("archive", filepath.Base(archivePath)).Msg("archive exploded into materials") + return added, nil +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/ -run 'TestSuite/TestAddMaterialsFromArchiveAtomic' -v` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +gofmt -w pkg/attestation/crafter/crafter.go pkg/attestation/crafter/crafter_test.go +git add pkg/attestation/crafter/crafter.go pkg/attestation/crafter/crafter_test.go +git commit -S -s -m "feat(crafter): add atomic AddMaterialsFromArchive + +Assisted-by: Claude Code" +``` + +--- + +## Task 6: Route the explode path in `AttestationAdd.Run` + +**Files:** +- Modify: `app/cli/pkg/action/attestation_add.go:33-179` + +**Interfaces:** +- Consumes: `crafter.AddMaterialsFromArchive`, `materials.DetectArchive`, `materials.IsArchiveNativeKind`, `materials.ArchiveLimits` (Tasks 1-5). +- Produces: + - `AttestationAddOpts` and `AttestationAdd` gain `MaxExtractEntries int` and `MaxExtractSize int64`. + - `Run` signature changes to return `([]*AttestationStatusMaterial, error)`. Normal path returns a 1-element slice; explode path returns N. + +- [ ] **Step 1: Write the failing test** + +Action-layer tests need a loaded crafter; the crafter behavior is already covered in Task 5. Here, assert the **routing decision** with a small table test against a helper extracted from `Run`. Add to a new `app/cli/pkg/action/attestation_add_routing_test.go`: + +```go +package action + +import ( + "testing" + + "github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestShouldExplode(t *testing.T) { + dir := t.TempDir() + zipPath := writeTestZip(t, dir, "s.zip") // helper writing a 1-file zip + + tests := []struct { + name, kind, value string + wantExplode bool + }{ + {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, true}, + {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false}, + {"no kind", "", zipPath, false}, + {"kind + non-archive", "ARTIFACT", "/etc/hostname", false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + format, explode, err := shouldExplode(tc.kind, tc.value) + require.NoError(t, err) + assert.Equal(t, tc.wantExplode, explode) + if explode { + assert.NotEqual(t, materials.ArchiveNone, format) + } + }) + } +} +``` + +Add a `writeTestZip` helper in the test file (mirror Task 1's `writeZip`). + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./app/cli/pkg/action/ -run TestShouldExplode -v` +Expected: FAIL — `undefined: shouldExplode`. + +- [ ] **Step 3: Write the implementation** + +Add the helper to `attestation_add.go`: + +```go +// shouldExplode decides whether an att-add should explode the value into many +// materials: only when --kind is set, the value is a supported archive, and the +// kind is not archive-native (e.g. ZAP_DAST_ZIP, which is recorded whole). +func shouldExplode(materialType, value string) (materials.ArchiveFormat, bool, error) { + if materialType == "" || materials.IsArchiveNativeKind(materialType) { + return materials.ArchiveNone, false, nil + } + format, err := materials.DetectArchive(value) + if err != nil { + return materials.ArchiveNone, false, err + } + return format, format != materials.ArchiveNone, nil +} +``` + +Add the limit fields to `AttestationAddOpts` and `AttestationAdd`, wire them in `NewAttestationAdd` (defaulting to `materials.DefaultArchiveLimits()` values when zero), then change `Run`: + +```go +func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) ([]*AttestationStatusMaterial, error) { + // ... unchanged crafter setup, runtime inputs, init/load checks, CAS backend ... + + addOpts := runtimeInputAddOpts(runtimeInputs) + + // Explode path: --kind set, value is a (non-archive-native) archive. + format, explode, err := shouldExplode(materialType, materialValue) + if err != nil { + return nil, fmt.Errorf("detecting archive: %w", err) + } + if explode { + limits := materials.ArchiveLimits{MaxEntries: action.maxExtractEntries, MaxTotalSize: action.maxExtractSize} + mts, err := crafter.AddMaterialsFromArchive(ctx, attestationID, materialType, materialName, materialValue, format, casBackend, annotations, limits, addOpts...) + if err != nil { + return nil, fmt.Errorf("adding materials from archive: %w", err) + } + results := make([]*AttestationStatusMaterial, 0, len(mts)) + for _, mt := range mts { + r, err := attMaterialToAction(mt) + if err != nil { + return nil, fmt.Errorf("converting material to action: %w", err) + } + results = append(results, r) + } + return results, nil + } + + // ... existing single-material switch, but wrap the single result: + // return []*AttestationStatusMaterial{materialResult}, nil +} +``` + +Note: policy-input-from-file evidence (`addPolicyInputEvidence`) is only reachable on the single-material path; the explode path does not accept `--policy-input-from-file` mapping to N materials (out of scope — leave the existing single-path logic untouched). + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `go test ./app/cli/pkg/action/ -run TestShouldExplode -v && go build ./app/cli/...` +Expected: PASS and the action package builds (the `Run` callers are updated in Task 7; until then `go build ./app/cli/...` may fail at the cmd caller — if so, do Task 7 before building, and run only the `-run TestShouldExplode` test here). + +- [ ] **Step 5: Commit** + +```bash +gofmt -w app/cli/pkg/action/attestation_add.go app/cli/pkg/action/attestation_add_routing_test.go +git add app/cli/pkg/action/attestation_add.go app/cli/pkg/action/attestation_add_routing_test.go +git commit -S -s -m "feat(cli): route att add to archive explode path + +Assisted-by: Claude Code" +``` + +--- + +## Task 7: CLI flags + multi-material rendering + +**Files:** +- Modify: `app/cli/cmd/attestation_add.go` + +**Interfaces:** +- Consumes: `AttestationAdd.Run` now returns `[]*AttestationStatusMaterial` (Task 6). +- Produces: two flags — `--max-extract-entries` (int, default `10000`) and `--max-extract-size` (string, e.g. `1GiB`, parsed with `bytefmt`/existing helper, default `1 << 30`) — passed into `AttestationAddOpts`. The `RunE` loops the returned slice and renders each material with its policy evaluations. + +- [ ] **Step 1: Add the flags and wire them** + +In `newAttestationAddCmd`, declare `var maxExtractEntries int` and `var maxExtractSize string`, register: + +```go +cmd.Flags().IntVar(&maxExtractEntries, "max-extract-entries", 10000, "max number of files to extract when --value is an archive") +cmd.Flags().StringVar(&maxExtractSize, "max-extract-size", "1GiB", "max total uncompressed size to extract when --value is an archive") +``` + +Parse `maxExtractSize` (reuse `bytefmt.ToBytes`, already a dependency — see `materials.go` import `code.cloudfoundry.org/bytefmt`) and set `MaxExtractEntries` / `MaxExtractSize` on `AttestationAddOpts`. + +- [ ] **Step 2: Update the result rendering** + +Change the `a.Run(...)` call site to receive a slice and render each entry: + +```go +resp, err := a.Run(cmd.Context(), attestationID, name, rawValuePath, kind, annotations, policyInputFiles) +if err != nil { + return err +} + +logger.Info().Int("materials", len(resp)).Msg("material(s) added to attestation") + +policies, err := a.GetPolicyEvaluations(cmd.Context(), attestationID) +if err != nil { + return err +} + +for _, m := range resp { + if err := output.EncodeOutput(flagOutputFormat, m, func(s *action.AttestationStatusMaterial) error { + return displayMaterialInfo(s, policies[m.Name]) + }); err != nil { + return err + } +} +return nil +``` + +- [ ] **Step 3: Build and smoke-test compilation** + +Run: `go build ./app/cli/... && go vet ./app/cli/...` +Expected: builds clean. + +- [ ] **Step 4: Commit** + +```bash +gofmt -w app/cli/cmd/attestation_add.go +git add app/cli/cmd/attestation_add.go +git commit -S -s -m "feat(cli): add archive extraction guard flags and multi-material output + +Assisted-by: Claude Code" +``` + +--- + +## Task 8: End-to-end behavior tests + docs regen + gates + +**Files:** +- Modify: `pkg/attestation/crafter/crafter_test.go` (collision, name-prefix, skip dirs/symlinks, zip-slip, mixed kinds-as-same-kind cases) +- Regen: CLI docs via `make generate` + +- [ ] **Step 1: Add table-driven behavior tests** + +Extend the Task-5 suite test (or add a sibling) covering, table-driven, the **spec's** behavior matrix on the explode path (all with `ARTIFACT`/`STRING` kind so no network needed): + +- Name collision: a zip with `scan.json` and `nested/scan.json` → materials `scan-json` and `scan-json-1`. +- `--name` prefix: namePrefix `sboms` + `a.json` → `sboms-a-json`. +- Directory + symlink entries → skipped (only regular files become materials). +- Zip-slip entry (`../evil`) → `WalkArchiveEntries` returns an unsafe-path error and nothing is written. +- Guard: `MaxTotalSize` smaller than the content → `ErrArchiveTooLarge`, nothing written. +- `.tar.gz` input via `ArchiveTarGz` → N materials. + +- [ ] **Step 2: Run the full package tests** + +Run: `SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/... ./app/cli/...` +Expected: PASS. + +- [ ] **Step 3: Regenerate CLI docs** + +Run: `make generate` +Expected: updates the `add` command docs to include `--max-extract-entries` / `--max-extract-size`. Review the diff. + +- [ ] **Step 4: Lint** + +Run: `make -C app/cli lint && golangci-lint run ./pkg/attestation/crafter/...` +Expected: no findings. Fix any. + +- [ ] **Step 5: Commit** + +```bash +gofmt -w pkg/attestation/crafter/crafter_test.go +git add pkg/attestation/crafter/crafter_test.go app/cli/docs/ +git commit -S -s -m "test(crafter): cover archive explode behavior; regen CLI docs + +Assisted-by: Claude Code" +``` + +--- + +## Self-Review + +**Spec coverage:** +- Trigger/detection (extension + magic) → Task 1. Archive-native short-circuit → Task 3 (`IsArchiveNativeKind`) + Task 6 routing. +- Behavior matrix → Task 6 routing + Task 8 tests. (Row "no `--kind`" intentionally falls to the unchanged single path per the out-of-scope constraint.) +- Naming (basename, collision suffix, sanitization) → Task 3 (`SanitizeMaterialName` + `NameAllocator`), exercised in Task 8. **Correction vs. doc:** names are DNS-1123-sanitized (`scan.json` → `scan-json`), since raw basenames are invalid material names. +- `--name` as prefix → Task 3 (`Allocate(prefix, base)`) + Task 6 (passes `materialName`). +- Annotations to every entry → Task 5 (`runtimeAnnotations` passed per `stageMaterial`). +- Kind per entry (same `--kind`) → Task 5. (Per-entry auto-detection is out of scope.) +- Skipped entries (dirs/symlinks/empty) + zip-slip guard → Task 2, tested Task 8. +- Atomicity → Tasks 4-5 (stage-then-single-write); failure leaves disk state unchanged, tested Task 5/8. +- Resource guards (max entries/size, streaming) → Task 2 (`capReader`, `ArchiveLimits`), flags in Task 7. +- Recursion not supported → nested archives are regular files (no special handling); covered by "treated as one material" since a nested `.zip` entry is crafted as the given kind. +- Implementation sketch (archive util, action branch, cmd flags, allowlist) → Tasks 1-3, 6, 7, 3 respectively. +- Test plan → Tasks 1-3, 5, 8. + +**Open questions resolved:** `--name` → prefix (Task 3/6). Guard flag names/defaults → `--max-extract-entries=10000`, `--max-extract-size=1GiB` (Task 7). Formats → zip/tar/tar.gz/tgz only (Global Constraints). + +**Placeholder scan:** none — every code step shows the code; the one prose-only step (Task 4 moved body) describes an exact mechanical move of existing lines. + +**Type consistency:** `AddMaterialsFromArchive` signature is identical in Task 5 (definition) and Task 6 (call). `shouldExplode` returns `(materials.ArchiveFormat, bool, error)` in both Task 6 steps. `Run` returns `[]*AttestationStatusMaterial` in Tasks 6 and 7. `NameAllocator.Allocate(prefix, base)` order matches across Tasks 3, 5, 8. From 5b658e4e5dba20a815a6ff4c509535c9dea91276 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 17:47:05 +0200 Subject: [PATCH 02/23] feat(cli): detect archive materials for att add explode path Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 Signed-off-by: Javier Rodriguez --- pkg/attestation/crafter/materials/archive.go | 76 +++++++++++++ .../crafter/materials/archive_test.go | 102 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 pkg/attestation/crafter/materials/archive.go create mode 100644 pkg/attestation/crafter/materials/archive_test.go diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go new file mode 100644 index 000000000..059bc50e0 --- /dev/null +++ b/pkg/attestation/crafter/materials/archive.go @@ -0,0 +1,76 @@ +// +// Copyright 2026 The Chainloop Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package materials + +import ( + "bytes" + "fmt" + "os" + "strings" +) + +// ArchiveFormat identifies a supported archive container. +type ArchiveFormat int + +const ( + ArchiveNone ArchiveFormat = iota + ArchiveZip + ArchiveTar + ArchiveTarGz +) + +// DetectArchive reports whether path is a supported archive and, if so, its +// format. Detection is by extension first; for files whose extension does not +// match, magic bytes are used as a backstop so renamed archives are still +// caught. A non-archive returns (ArchiveNone, nil). +func DetectArchive(path string) (ArchiveFormat, error) { + lower := strings.ToLower(path) + switch { + case strings.HasSuffix(lower, ".zip"): + return ArchiveZip, nil + case strings.HasSuffix(lower, ".tar.gz"), strings.HasSuffix(lower, ".tgz"): + return ArchiveTarGz, nil + case strings.HasSuffix(lower, ".tar"): + return ArchiveTar, nil + } + + return detectByMagic(path) +} + +func detectByMagic(path string) (ArchiveFormat, error) { + f, err := os.Open(path) + if err != nil { + return ArchiveNone, fmt.Errorf("opening %q: %w", path, err) + } + defer f.Close() + + // 512 bytes is enough for the gzip/zip magic and the tar "ustar" marker at + // offset 257. + header := make([]byte, 512) + n, _ := f.Read(header) + header = header[:n] + + switch { + case bytes.HasPrefix(header, []byte("PK\x03\x04")), bytes.HasPrefix(header, []byte("PK\x05\x06")): + return ArchiveZip, nil + case bytes.HasPrefix(header, []byte{0x1f, 0x8b}): + return ArchiveTarGz, nil + case len(header) >= 262 && bytes.Equal(header[257:262], []byte("ustar")): + return ArchiveTar, nil + } + + return ArchiveNone, nil +} diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go new file mode 100644 index 000000000..445860f45 --- /dev/null +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -0,0 +1,102 @@ +// Copyright 2026 The Chainloop Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package materials + +import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeZip creates a zip at dir/name containing the given files (name->content). +func writeZip(t *testing.T, dir, name string, files map[string]string) string { + t.Helper() + p := filepath.Join(dir, name) + f, err := os.Create(p) + require.NoError(t, err) + defer f.Close() + zw := zip.NewWriter(f) + for n, c := range files { + w, err := zw.Create(n) + require.NoError(t, err) + _, err = w.Write([]byte(c)) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) + return p +} + +// writeTarGz creates a .tar.gz at dir/name containing the given regular files. +func writeTarGz(t *testing.T, dir, name string, files map[string]string) string { + t.Helper() + p := filepath.Join(dir, name) + f, err := os.Create(p) + require.NoError(t, err) + defer f.Close() + gw := gzip.NewWriter(f) + tw := tar.NewWriter(gw) + for n, c := range files { + require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg})) + _, err = tw.Write([]byte(c)) + require.NoError(t, err) + } + require.NoError(t, tw.Close()) + require.NoError(t, gw.Close()) + return p +} + +func TestDetectArchive(t *testing.T) { + dir := t.TempDir() + zipPath := writeZip(t, dir, "a.zip", map[string]string{"x.txt": "hi"}) + tgzPath := writeTarGz(t, dir, "a.tar.gz", map[string]string{"x.txt": "hi"}) + + plain := filepath.Join(dir, "app.bin") + require.NoError(t, os.WriteFile(plain, []byte("not an archive"), 0o600)) + + // A .zip renamed without extension — magic bytes must still detect it. + noExt := filepath.Join(dir, "noext") + require.NoError(t, os.WriteFile(noExt, mustRead(t, zipPath), 0o600)) + + tests := []struct { + name string + path string + want ArchiveFormat + }{ + {"zip by extension", zipPath, ArchiveZip}, + {"tar.gz by extension", tgzPath, ArchiveTarGz}, + {"plain file", plain, ArchiveNone}, + {"zip without extension via magic", noExt, ArchiveZip}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := DetectArchive(tc.path) + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func mustRead(t *testing.T, p string) []byte { + t.Helper() + b, err := os.ReadFile(p) + require.NoError(t, err) + return b +} From 38800a6289c7705395ab2101878fa5c0d47ed363 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 17:50:33 +0200 Subject: [PATCH 03/23] feat(cli): walk archive entries with zip-bomb and traversal guards Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/materials/archive.go | 142 ++++++++++++++++++ .../crafter/materials/archive_test.go | 49 ++++++ 2 files changed, 191 insertions(+) diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index 059bc50e0..e9ac73357 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -16,9 +16,15 @@ package materials import ( + "archive/tar" + "archive/zip" "bytes" + "compress/gzip" + "errors" "fmt" + "io" "os" + "path" "strings" ) @@ -74,3 +80,139 @@ func detectByMagic(path string) (ArchiveFormat, error) { return ArchiveNone, nil } + +var ( + // ErrTooManyEntries is returned when an archive has more qualifying entries + // than the configured maximum. + ErrTooManyEntries = errors.New("archive exceeds the maximum number of entries") + // ErrArchiveTooLarge is returned when the running uncompressed size of an + // archive exceeds the configured maximum. + ErrArchiveTooLarge = errors.New("archive exceeds the maximum uncompressed size") +) + +// ArchiveLimits bounds archive expansion to guard against zip bombs. +type ArchiveLimits struct { + MaxEntries int + MaxTotalSize int64 +} + +// DefaultArchiveLimits returns the safe defaults: 10000 entries and 1 GiB +// total uncompressed size. +func DefaultArchiveLimits() ArchiveLimits { + return ArchiveLimits{MaxEntries: 10000, MaxTotalSize: 1 << 30} +} + +// capReader wraps a reader and fails once the shared running total exceeds max, +// so we never trust an archive's declared sizes. +type capReader struct { + r io.Reader + total *int64 + max int64 +} + +func (c *capReader) Read(p []byte) (int, error) { + n, err := c.r.Read(p) + *c.total += int64(n) + if *c.total > c.max { + return n, ErrArchiveTooLarge + } + return n, err +} + +// WalkArchiveEntries calls yield for every regular file in the archive, +// enforcing the limits and skipping directories, symlinks, hardlinks, empty +// entries, and path-traversal entries. +func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error { + var total int64 + count := 0 + visit := func(name string, size int64, r io.Reader) error { + if !safeArchivePath(name) { + return fmt.Errorf("unsafe entry path %q in archive", name) + } + count++ + if count > limits.MaxEntries { + return ErrTooManyEntries + } + if err := yield(name, &capReader{r: r, total: &total, max: limits.MaxTotalSize}); err != nil { + return fmt.Errorf("processing entry %q: %w", name, err) + } + return nil + } + + switch format { + case ArchiveZip: + return walkZip(path, visit) + case ArchiveTar: + return walkTar(path, false, visit) + case ArchiveTarGz: + return walkTar(path, true, visit) + default: + return fmt.Errorf("unsupported archive format") + } +} + +// safeArchivePath rejects absolute paths and any path that escapes the +// extraction root via "..". +func safeArchivePath(name string) bool { + clean := path.Clean("/" + strings.ReplaceAll(name, "\\", "/")) + return !strings.Contains(clean, "/../") && clean != "/.." +} + +func walkZip(p string, visit func(name string, size int64, r io.Reader) error) error { + zr, err := zip.OpenReader(p) + if err != nil { + return fmt.Errorf("opening zip: %w", err) + } + defer zr.Close() + + for _, f := range zr.File { + if f.FileInfo().IsDir() || f.Mode()&os.ModeSymlink != 0 || f.UncompressedSize64 == 0 { + continue + } + rc, err := f.Open() + if err != nil { + return fmt.Errorf("opening entry %q: %w", f.Name, err) + } + err = visit(f.Name, int64(f.UncompressedSize64), rc) + rc.Close() + if err != nil { + return err + } + } + return nil +} + +func walkTar(p string, gzipped bool, visit func(name string, size int64, r io.Reader) error) error { + f, err := os.Open(p) + if err != nil { + return fmt.Errorf("opening tar: %w", err) + } + defer f.Close() + + var src io.Reader = f + if gzipped { + gz, err := gzip.NewReader(f) + if err != nil { + return fmt.Errorf("opening gzip: %w", err) + } + defer gz.Close() + src = gz + } + + tr := tar.NewReader(src) + for { + hdr, err := tr.Next() + if errors.Is(err, io.EOF) { + return nil + } + if err != nil { + return fmt.Errorf("reading tar: %w", err) + } + if hdr.Typeflag != tar.TypeReg || hdr.Size == 0 { + continue + } + if err := visit(hdr.Name, hdr.Size, tr); err != nil { + return err + } + } +} diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index 445860f45..e1c0832ef 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -18,8 +18,10 @@ import ( "archive/tar" "archive/zip" "compress/gzip" + "io" "os" "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -100,3 +102,50 @@ func mustRead(t *testing.T, p string) []byte { require.NoError(t, err) return b } + +func TestWalkArchiveEntries(t *testing.T) { + dir := t.TempDir() + + t.Run("yields regular files, skips dirs", func(t *testing.T) { + // Build a zip with a directory entry + two files. + p := filepath.Join(dir, "files.zip") + f, err := os.Create(p) + require.NoError(t, err) + zw := zip.NewWriter(f) + _, err = zw.Create("nested/") // directory entry + require.NoError(t, err) + for _, n := range []string{"a.json", "nested/b.json"} { + w, err := zw.Create(n) + require.NoError(t, err) + _, err = w.Write([]byte("{}")) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) + require.NoError(t, f.Close()) + + var got []string + err = WalkArchiveEntries(p, ArchiveZip, DefaultArchiveLimits(), func(name string, r io.Reader) error { + b, _ := io.ReadAll(r) + assert.Equal(t, "{}", string(b)) + got = append(got, name) + return nil + }) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"a.json", "nested/b.json"}, got) + }) + + t.Run("max entries exceeded", func(t *testing.T) { + p := writeTarGz(t, dir, "many.tar.gz", map[string]string{"a": "1", "b": "2", "c": "3"}) + err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 2, MaxTotalSize: 1 << 30}, func(string, io.Reader) error { return nil }) + require.ErrorIs(t, err, ErrTooManyEntries) + }) + + t.Run("max total size exceeded while streaming", func(t *testing.T) { + p := writeTarGz(t, dir, "big.tar.gz", map[string]string{"a": strings.Repeat("x", 1000)}) + err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 100, MaxTotalSize: 100}, func(_ string, r io.Reader) error { + _, err := io.ReadAll(r) + return err + }) + require.ErrorIs(t, err, ErrArchiveTooLarge) + }) +} From 002ada3da9026b40a83ab6a0b30e1b0d24e729b2 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 17:54:53 +0200 Subject: [PATCH 04/23] fix(cli): reject absolute and traversal paths in archive walk Strengthen path validation in archive extraction to explicitly reject: - Absolute paths (e.g., /etc/passwd) - Relative path traversal attempts (e.g., ../ or foo/../..) Added direct unit test of safeArchivePath and integration test via tar with traversal entry to verify rejection. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/materials/archive.go | 12 +++++- .../crafter/materials/archive_test.go | 39 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index e9ac73357..c1f1e371a 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -154,7 +154,17 @@ func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, // safeArchivePath rejects absolute paths and any path that escapes the // extraction root via "..". func safeArchivePath(name string) bool { - clean := path.Clean("/" + strings.ReplaceAll(name, "\\", "/")) + normalized := strings.ReplaceAll(name, "\\", "/") + // Reject absolute paths + if strings.HasPrefix(normalized, "/") { + return false + } + // Reject any path containing ".." which could escape the root + if strings.Contains(normalized, "..") { + return false + } + // Further validation: ensure no traversal after normalization + clean := path.Clean("/" + normalized) return !strings.Contains(clean, "/../") && clean != "/.." } diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index e1c0832ef..13ade4190 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -148,4 +148,43 @@ func TestWalkArchiveEntries(t *testing.T) { }) require.ErrorIs(t, err, ErrArchiveTooLarge) }) + + t.Run("rejects traversal via tar with .. entries", func(t *testing.T) { + // tar allows .. in header, so we can test via tar. + p := filepath.Join(dir, "evil.tar.gz") + f, err := os.Create(p) + require.NoError(t, err) + gw := gzip.NewWriter(f) + tw := tar.NewWriter(gw) + require.NoError(t, tw.WriteHeader(&tar.Header{Name: "../escape.txt", Mode: 0o600, Size: 1, Typeflag: tar.TypeReg})) + _, err = tw.Write([]byte("x")) + require.NoError(t, err) + require.NoError(t, tw.Close()) + require.NoError(t, gw.Close()) + require.NoError(t, f.Close()) + + err = WalkArchiveEntries(p, ArchiveTarGz, DefaultArchiveLimits(), func(string, io.Reader) error { return nil }) + require.Error(t, err, "entry ../escape.txt must be rejected") + }) +} + +func TestSafeArchivePath(t *testing.T) { + tests := []struct { + name string + path string + want bool + }{ + {"absolute path", "/etc/passwd", false}, + {"path traversal", "../escape.txt", false}, + {"nested path traversal", "foo/../../../etc/passwd", false}, + {"valid nested path", "a/b.txt", true}, + {"valid simple path", "file.txt", true}, + {"valid with subdirs", "nested/dir/file.txt", true}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := safeArchivePath(tc.path) + assert.Equal(t, tc.want, got) + }) + } } From c5069f1735354b5c96f22af2597cb84f1c063c99 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 17:57:04 +0200 Subject: [PATCH 05/23] feat(cli): add archive-native allowlist and entry name allocation Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/materials/archive.go | 73 +++++++++++++++++++ .../crafter/materials/archive_test.go | 28 +++++++ 2 files changed, 101 insertions(+) diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index c1f1e371a..639d08277 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -26,6 +26,8 @@ import ( "os" "path" "strings" + + schemaapi "github.com/chainloop-dev/chainloop/app/controlplane/api/workflowcontract/v1" ) // ArchiveFormat identifies a supported archive container. @@ -226,3 +228,74 @@ func walkTar(p string, gzipped bool, visit func(name string, size int64, r io.Re } } } + +// archiveNativeKinds lists material kinds whose value is the archive itself. +// For these, --kind short-circuits the explode path and the archive is +// recorded whole. Extend this set as new "the archive is the material" kinds +// are added. +var archiveNativeKinds = map[string]struct{}{ + schemaapi.CraftingSchema_Material_ZAP_DAST_ZIP.String(): {}, +} + +// IsArchiveNativeKind reports whether kind treats the archive as a single +// material (recorded whole) rather than something to explode. +func IsArchiveNativeKind(kind string) bool { + _, ok := archiveNativeKinds[kind] + return ok +} + +// SanitizeMaterialName converts s into a valid DNS-1123 material name: +// lowercase, with every run of characters outside [a-z0-9] collapsed to a +// single "-" and leading/trailing "-" trimmed. Falls back to "material". +func SanitizeMaterialName(s string) string { + var b strings.Builder + pendingHyphen := false + for _, r := range strings.ToLower(s) { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') { + if pendingHyphen && b.Len() > 0 { + b.WriteByte('-') + } + b.WriteRune(r) + pendingHyphen = false + } else { + pendingHyphen = true + } + } + if b.Len() == 0 { + return "material" + } + return b.String() +} + +// NameAllocator hands out unique DNS-1123 material names, suffixing collisions +// with -1, -2, …. It is seeded with names already present in the attestation +// so derived names never overwrite existing materials. +type NameAllocator struct { + used map[string]struct{} +} + +// NewNameAllocator seeds the allocator with existing material names. +func NewNameAllocator(existing []string) *NameAllocator { + used := make(map[string]struct{}, len(existing)) + for _, e := range existing { + used[e] = struct{}{} + } + return &NameAllocator{used: used} +} + +// Allocate returns a unique name derived from base (and optional prefix). +func (a *NameAllocator) Allocate(prefix, base string) string { + name := SanitizeMaterialName(base) + if prefix != "" { + name = SanitizeMaterialName(prefix) + "-" + name + } + + candidate := name + for i := 1; ; i++ { + if _, taken := a.used[candidate]; !taken { + a.used[candidate] = struct{}{} + return candidate + } + candidate = fmt.Sprintf("%s-%d", name, i) + } +} diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index 13ade4190..31a5cc246 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -188,3 +188,31 @@ func TestSafeArchivePath(t *testing.T) { }) } } + +func TestSanitizeMaterialName(t *testing.T) { + tests := []struct{ in, want string }{ + {"scan.json", "scan-json"}, + {"results.XML", "results-xml"}, + {"weird__name!!", "weird-name"}, + {"___", "material"}, + } + for _, tc := range tests { + assert.Equal(t, tc.want, SanitizeMaterialName(tc.in)) + } +} + +func TestNameAllocator(t *testing.T) { + a := NewNameAllocator([]string{"existing"}) + + assert.Equal(t, "scan-json", a.Allocate("", "scan.json")) + assert.Equal(t, "scan-json-1", a.Allocate("", "scan.json")) // collision + assert.Equal(t, "results-xml", a.Allocate("", "results.xml")) + assert.Equal(t, "existing-1", a.Allocate("", "existing")) // collides with pre-existing + assert.Equal(t, "sboms-a-json", a.Allocate("sboms", "a.json")) // prefix +} + +func TestIsArchiveNativeKind(t *testing.T) { + assert.True(t, IsArchiveNativeKind("ZAP_DAST_ZIP")) + assert.False(t, IsArchiveNativeKind("SBOM_CYCLONEDX_JSON")) + assert.False(t, IsArchiveNativeKind("ARTIFACT")) +} From e0dfe32d2a04a33ae89c6f3076ed0c16c328de26 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 18:00:49 +0200 Subject: [PATCH 06/23] refactor(crafter): split material crafting from state persistence Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/crafter.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go index 5a2db615b..f7f973729 100644 --- a/pkg/attestation/crafter/crafter.go +++ b/pkg/attestation/crafter/crafter.go @@ -680,8 +680,10 @@ func (c *Crafter) AddMaterialContactFreeWithAutoDetectedKind(ctx context.Context return nil, fmt.Errorf("failed to auto-discover material kind: %w", err) } -// addMaterials adds the incoming material m to the crafting state -func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { +// stageMaterial crafts a material into the in-memory crafting state WITHOUT +// persisting it. Callers must call stateManager.Write to commit. Splitting the +// write out lets the archive explode path craft many entries and commit once. +func (c *Crafter) stageMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { addOptions := &addOpts{} for _, opt := range opts { opt(addOptions) @@ -784,7 +786,16 @@ func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_M } c.CraftingState.Attestation.Materials[m.Name] = mt - // 6 - Persist state + return mt, nil +} + +// addMaterial crafts a single material and persists the crafting state. +func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { + mt, err := c.stageMaterial(ctx, m, value, casBackend, runtimeAnnotations, opts...) + if err != nil { + return nil, err + } + if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { return nil, fmt.Errorf("failed to persist crafting state: %w", err) } From 12cf2f41b8b23687e5f80ae87d31cea9290a8d34 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 18:07:00 +0200 Subject: [PATCH 07/23] feat(crafter): add AddMaterialsFromArchive for atomic archive expansion Adds an exported AddMaterialsFromArchive method on the Crafter that walks every entry in a zip/tar/tar.gz archive, stages each entry as an independent material via stageMaterial, and commits the in-memory state in a single stateManager.Write call. If any entry fails, previously staged entries are rolled back so no partial state is ever persisted. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/crafter.go | 110 ++++++++++++++++++ pkg/attestation/crafter/crafter_test.go | 67 +++++++++++ .../crafter/testdata/two-files.zip | Bin 0 -> 245 bytes 3 files changed, 177 insertions(+) create mode 100644 pkg/attestation/crafter/testdata/two-files.zip diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go index f7f973729..b8e5dcd2a 100644 --- a/pkg/attestation/crafter/crafter.go +++ b/pkg/attestation/crafter/crafter.go @@ -19,9 +19,11 @@ import ( "context" "errors" "fmt" + "io" "maps" "net/url" "os" + "path/filepath" "slices" "strings" "time" @@ -804,6 +806,114 @@ func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_M return mt, nil } +// AddMaterialsFromArchive expands an archive and stages every entry as an +// independent material, committing all of them atomically in a single +// stateManager.Write call. If any entry fails, no state is persisted and the +// in-memory materials map is rolled back. +// +// Parameters: +// - kind: the material type string for every entry (must be a valid +// CraftingSchema_Material_MaterialType name). +// - namePrefix: optional prefix prepended to each derived entry name. +// - archivePath: path to the archive on disk. +// - format: archive format (ArchiveZip / ArchiveTar / ArchiveTarGz). +// - limits: guard against zip-bomb expansion. +func (c *Crafter) AddMaterialsFromArchive( + ctx context.Context, + attestationID, kind, namePrefix, archivePath string, + format materials.ArchiveFormat, + casBackend *casclient.CASBackend, + runtimeAnnotations map[string]string, + limits materials.ArchiveLimits, + opts ...AddOpt, +) ([]*api.Attestation_Material, error) { + if err := c.requireStateLoaded(); err != nil { + return nil, fmt.Errorf("adding materials from archive: %w", err) + } + + // Validate kind up front so we fail fast before touching disk. + kindVal, found := schemaapi.CraftingSchema_Material_MaterialType_value[kind] + if !found { + return nil, fmt.Errorf("%q kind not found. Available options are %q", kind, schemaapi.ListAvailableMaterialKind()) + } + materialKind := schemaapi.CraftingSchema_Material_MaterialType(kindVal) + + // Seed the name allocator with existing material keys so we never collide. + existingKeys := make([]string, 0, len(c.CraftingState.Attestation.GetMaterials())) + for k := range c.CraftingState.Attestation.GetMaterials() { + existingKeys = append(existingKeys, k) + } + allocator := materials.NewNameAllocator(existingKeys) + + // Create a temporary directory for per-entry files; cleaned up on return. + tmpDir, err := os.MkdirTemp("", "chainloop-archive-*") + if err != nil { + return nil, fmt.Errorf("creating temp dir for archive expansion: %w", err) + } + defer os.RemoveAll(tmpDir) + + // Track which material names we staged so we can roll back on error. + var stagedNames []string + var result []*api.Attestation_Material + + walkErr := materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error { + base := filepath.Base(name) + matName := allocator.Allocate(namePrefix, base) + + // Write the entry to a temp file so material crafters can open it by path. + tmp, err := os.CreateTemp(tmpDir, "entry-*") + if err != nil { + return fmt.Errorf("creating temp file for entry %q: %w", name, err) + } + tmpPath := tmp.Name() + + if _, err := io.Copy(tmp, r); err != nil { + tmp.Close() + return fmt.Errorf("writing entry %q to temp file: %w", name, err) + } + tmp.Close() + + m := &schemaapi.CraftingSchema_Material{ + Optional: true, + Type: materialKind, + Name: matName, + } + + mt, err := c.stageMaterial(ctx, m, tmpPath, casBackend, runtimeAnnotations, opts...) + if err != nil { + return fmt.Errorf("staging entry %q as material %q: %w", name, matName, err) + } + + stagedNames = append(stagedNames, matName) + result = append(result, mt) + return nil + }) + + if walkErr != nil { + // Roll back any in-memory staging: remove the material map entries we just added. + for _, n := range stagedNames { + delete(c.CraftingState.Attestation.Materials, n) + } + return nil, fmt.Errorf("expanding archive %q: %w", archivePath, walkErr) + } + + if len(result) == 0 { + return nil, fmt.Errorf("archive %q contains no processable entries", archivePath) + } + + // All entries staged successfully; persist once. + if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { + // Roll back in-memory state. + for _, n := range stagedNames { + delete(c.CraftingState.Attestation.Materials, n) + } + return nil, fmt.Errorf("failed to persist crafting state: %w", err) + } + + c.Logger.Debug().Int("count", len(result)).Str("archive", archivePath).Msg("added archive materials to state") + return result, nil +} + // projectContext returns the project name and version from the workflow // metadata so policy verifiers can pass them to the engine. Either may be // empty (e.g. dry-run before workflow metadata is populated); built-ins diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index 1a8feb196..79e7905a4 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -672,6 +672,73 @@ func (s *crafterSuite) TestAddMaterialsAutomatic() { } } +func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { + const zipFixture = "testdata/two-files.zip" + + s.Run("happy path: two files produce two materials", func() { + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), "testdata/contracts/empty_generic.yaml", &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + // Nil uploader causes inline storage — no network required. + backend := &casclient.CASBackend{} + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", + "ARTIFACT", + "entry", + zipFixture, + materials.ArchiveZip, + backend, + nil, + materials.DefaultArchiveLimits(), + ) + + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 2) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 2) + + // Both derived names must be present (sanitized base names with prefix). + _, hasAlpha := stateMap["entry-alpha-txt"] + _, hasBeta := stateMap["entry-beta-txt"] + assert.True(s.T(), hasAlpha, "expected material entry-alpha-txt in state") + assert.True(s.T(), hasBeta, "expected material entry-beta-txt in state") + }) + + s.Run("atomicity: over-tight limit leaves state empty", func() { + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), "testdata/contracts/empty_generic.yaml", &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + backend := &casclient.CASBackend{} + + // MaxEntries:1 causes ErrTooManyEntries after the second entry. + tightLimits := materials.ArchiveLimits{MaxEntries: 1, MaxTotalSize: 1 << 30} + + _, err = c.AddMaterialsFromArchive( + context.Background(), + "", + "ARTIFACT", + "entry", + zipFixture, + materials.ArchiveZip, + backend, + nil, + tightLimits, + ) + + require.Error(s.T(), err) + assert.ErrorIs(s.T(), err, materials.ErrTooManyEntries) + + // Atomicity: no materials must have been committed. + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Empty(s.T(), stateMap, "state must be empty after a failed archive expansion") + }) +} + func loadSchema(path string) (*schemaapi.CraftingSchema, error) { // Extract json formatted data content, err := os.ReadFile(filepath.Clean(path)) diff --git a/pkg/attestation/crafter/testdata/two-files.zip b/pkg/attestation/crafter/testdata/two-files.zip new file mode 100644 index 0000000000000000000000000000000000000000..4b118a655be83a7369acb7168168e289b9d67fc1 GIT binary patch literal 245 zcmWIWW@Zs#U|`^2SU2HrjNR;|Z-jw7K_KP?;>4VSj6}VXijvbFXHT9C_s~9dKGfrk z<_V7{Obh`iIzN45_%8(1Appc2K%A6Xf}zn%*AuLdkx7IZx1B)EV9>w_qTntF@J7{! gZXrYy149F&6_5$DKERum4J5+^gt Date: Mon, 29 Jun 2026 18:11:05 +0200 Subject: [PATCH 08/23] fix(crafter): roll back policy evaluations and name temp files by entry on archive expand Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/crafter.go | 30 +++++++++++++++---------- pkg/attestation/crafter/crafter_test.go | 3 +++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go index b8e5dcd2a..60c8c5a0d 100644 --- a/pkg/attestation/crafter/crafter.go +++ b/pkg/attestation/crafter/crafter.go @@ -852,20 +852,29 @@ func (c *Crafter) AddMaterialsFromArchive( } defer os.RemoveAll(tmpDir) - // Track which material names we staged so we can roll back on error. + // Snapshot checkpoints for atomic rollback on any error path. var stagedNames []string var result []*api.Attestation_Material + policyEvalCheckpoint := len(c.CraftingState.Attestation.PolicyEvaluations) + + rollback := func() { + for _, n := range stagedNames { + delete(c.CraftingState.Attestation.Materials, n) + } + c.CraftingState.Attestation.PolicyEvaluations = c.CraftingState.Attestation.PolicyEvaluations[:policyEvalCheckpoint] + } walkErr := materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error { base := filepath.Base(name) matName := allocator.Allocate(namePrefix, base) - // Write the entry to a temp file so material crafters can open it by path. - tmp, err := os.CreateTemp(tmpDir, "entry-*") + // Write the entry to a temp file named after the entry basename so + // material crafters can identify it by name when opening it by path. + tmpPath := filepath.Join(tmpDir, filepath.Base(name)) + tmp, err := os.Create(tmpPath) if err != nil { return fmt.Errorf("creating temp file for entry %q: %w", name, err) } - tmpPath := tmp.Name() if _, err := io.Copy(tmp, r); err != nil { tmp.Close() @@ -890,10 +899,9 @@ func (c *Crafter) AddMaterialsFromArchive( }) if walkErr != nil { - // Roll back any in-memory staging: remove the material map entries we just added. - for _, n := range stagedNames { - delete(c.CraftingState.Attestation.Materials, n) - } + // Roll back any in-memory staging: remove material map entries and + // truncate policy evaluations back to the pre-call checkpoint. + rollback() return nil, fmt.Errorf("expanding archive %q: %w", archivePath, walkErr) } @@ -903,10 +911,8 @@ func (c *Crafter) AddMaterialsFromArchive( // All entries staged successfully; persist once. if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { - // Roll back in-memory state. - for _, n := range stagedNames { - delete(c.CraftingState.Attestation.Materials, n) - } + // Roll back in-memory state including policy evaluations. + rollback() return nil, fmt.Errorf("failed to persist crafting state: %w", err) } diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index 79e7905a4..158281e65 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -736,6 +736,9 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { // Atomicity: no materials must have been committed. stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Empty(s.T(), stateMap, "state must be empty after a failed archive expansion") + + // Atomicity: policy evaluations must also be rolled back. + assert.Empty(s.T(), c.CraftingState.GetAttestation().GetPolicyEvaluations(), "policy evaluations must be rolled back after a failed archive expansion") }) } From 820268a36adeb253103363dadab89bcd79f0789d Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 18:15:35 +0200 Subject: [PATCH 09/23] feat(cli): route att add to archive explode path Add MaxExtractEntries/MaxExtractSize to AttestationAddOpts and wire them into AttestationAdd with defaults from materials.DefaultArchiveLimits(). Change Run to return ([]*AttestationStatusMaterial, error) and insert an explode branch before the single-material switch: when --kind is set and the value is a non-archive-native archive, delegate to crafter.AddMaterialsFromArchive and return N results; otherwise the single-material path continues unchanged and its result is wrapped in a 1-element slice. Add shouldExplode helper and TestShouldExplode. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- app/cli/pkg/action/attestation_add.go | 60 ++++++++++++++- .../action/attestation_add_routing_test.go | 76 +++++++++++++++++++ 2 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 app/cli/pkg/action/attestation_add_routing_test.go diff --git a/app/cli/pkg/action/attestation_add.go b/app/cli/pkg/action/attestation_add.go index 3778a9a94..84fb8cd11 100644 --- a/app/cli/pkg/action/attestation_add.go +++ b/app/cli/pkg/action/attestation_add.go @@ -41,6 +41,12 @@ type AttestationAddOpts struct { LocalStatePath string // NoStrictValidation skips strict schema validation NoStrictValidation bool + // MaxExtractEntries limits the number of entries extracted from an archive. + // Zero defaults to materials.DefaultArchiveLimits().MaxEntries. + MaxExtractEntries int + // MaxExtractSize limits the total uncompressed bytes extracted from an archive. + // Zero defaults to materials.DefaultArchiveLimits().MaxTotalSize. + MaxExtractSize int64 } type newCrafterOpts struct { @@ -55,6 +61,8 @@ type AttestationAdd struct { casCAPath string connectionInsecure bool localStatePath string + maxExtractEntries int + maxExtractSize int64 *newCrafterOpts } @@ -68,6 +76,16 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) { opts = append(opts, crafter.WithNoStrictValidation(cfg.NoStrictValidation)) } + defaults := materials.DefaultArchiveLimits() + maxEntries := cfg.MaxExtractEntries + if maxEntries == 0 { + maxEntries = defaults.MaxEntries + } + maxSize := cfg.MaxExtractSize + if maxSize == 0 { + maxSize = defaults.MaxTotalSize + } + return &AttestationAdd{ ActionsOpts: cfg.ActionsOpts, newCrafterOpts: &newCrafterOpts{cpConnection: cfg.CPConnection, opts: opts}, @@ -75,12 +93,14 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) { casCAPath: cfg.CASCAPath, connectionInsecure: cfg.ConnectionInsecure, localStatePath: cfg.LocalStatePath, + maxExtractEntries: maxEntries, + maxExtractSize: maxSize, }, nil } var ErrAttestationNotInitialized = errors.New("attestation not yet initialized") -func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) (*AttestationStatusMaterial, error) { +func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) ([]*AttestationStatusMaterial, error) { // initialize the crafter. If attestation-id is provided we assume the attestation is performed using remote state crafter, err := newCrafter(&newCrafterStateOpts{enableRemoteState: (attestationID != ""), localStatePath: action.localStatePath}, action.CPConnection, action.opts...) if err != nil { @@ -132,6 +152,28 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa // 3. If materialType is not empty, add material contract free with materialType and materialName addOpts := runtimeInputAddOpts(runtimeInputs) + // Explode path: --kind set, value is a (non-archive-native) archive. + format, explode, err := shouldExplode(materialType, materialValue) + if err != nil { + return nil, fmt.Errorf("detecting archive: %w", err) + } + if explode { + limits := materials.ArchiveLimits{MaxEntries: action.maxExtractEntries, MaxTotalSize: action.maxExtractSize} + mts, err := crafter.AddMaterialsFromArchive(ctx, attestationID, materialType, materialName, materialValue, format, casBackend, annotations, limits, addOpts...) + if err != nil { + return nil, fmt.Errorf("adding materials from archive: %w", err) + } + results := make([]*AttestationStatusMaterial, 0, len(mts)) + for _, mt := range mts { + r, err := attMaterialToAction(mt) + if err != nil { + return nil, fmt.Errorf("converting material to action: %w", err) + } + results = append(results, r) + } + return results, nil + } + var mt *api.Attestation_Material switch { case materialName == "" && materialType == "": @@ -175,7 +217,21 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa return nil, fmt.Errorf("converting material to action: %w", err) } - return materialResult, nil + return []*AttestationStatusMaterial{materialResult}, nil +} + +// shouldExplode decides whether an att-add should explode the value into many +// materials: only when --kind is set, the value is a supported archive, and the +// kind is not archive-native (e.g. ZAP_DAST_ZIP, which is recorded whole). +func shouldExplode(materialType, value string) (materials.ArchiveFormat, bool, error) { + if materialType == "" || materials.IsArchiveNativeKind(materialType) { + return materials.ArchiveNone, false, nil + } + format, err := materials.DetectArchive(value) + if err != nil { + return materials.ArchiveNone, false, err + } + return format, format != materials.ArchiveNone, nil } // runtimeInputAddOpts wraps the runtime inputs as crafter add options, or diff --git a/app/cli/pkg/action/attestation_add_routing_test.go b/app/cli/pkg/action/attestation_add_routing_test.go new file mode 100644 index 000000000..3f061a238 --- /dev/null +++ b/app/cli/pkg/action/attestation_add_routing_test.go @@ -0,0 +1,76 @@ +// +// Copyright 2026 The Chainloop Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package action + +import ( + "archive/zip" + "os" + "path/filepath" + "testing" + + "github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeTestZip creates a zip archive at dir/name containing a single file +// "entry.txt" and returns its path. +func writeTestZip(t *testing.T, dir, name string) string { + t.Helper() + path := filepath.Join(dir, name) + f, err := os.Create(path) + require.NoError(t, err) + defer f.Close() + + w := zip.NewWriter(f) + entry, err := w.Create("entry.txt") + require.NoError(t, err) + _, err = entry.Write([]byte("hello")) + require.NoError(t, err) + require.NoError(t, w.Close()) + return path +} + +func TestShouldExplode(t *testing.T) { + dir := t.TempDir() + zipPath := writeTestZip(t, dir, "s.zip") + + // non-archive: a plain temp file with an unrecognised extension + plainPath := filepath.Join(dir, "plain.bin") + require.NoError(t, os.WriteFile(plainPath, []byte("not an archive"), 0600)) + + tests := []struct { + name string + kind string + value string + wantExplode bool + }{ + {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, true}, + {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false}, + {"no kind", "", zipPath, false}, + {"kind + non-archive", "ARTIFACT", plainPath, false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + format, explode, err := shouldExplode(tc.kind, tc.value) + require.NoError(t, err) + assert.Equal(t, tc.wantExplode, explode) + if explode { + assert.NotEqual(t, materials.ArchiveNone, format) + } + }) + } +} From 2e1450d8322d6bd35edcdd63fabd5d0e76bd47d5 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 18:20:10 +0200 Subject: [PATCH 10/23] feat(cli): add archive extraction guard flags and multi-material output Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- app/cli/cmd/attestation_add.go | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/app/cli/cmd/attestation_add.go b/app/cli/cmd/attestation_add.go index 37dd62353..fe52bad5b 100644 --- a/app/cli/cmd/attestation_add.go +++ b/app/cli/cmd/attestation_add.go @@ -20,6 +20,7 @@ import ( "fmt" "os" + "code.cloudfoundry.org/bytefmt" "github.com/jedib0t/go-pretty/v6/table" "github.com/muesli/reflow/wrap" "github.com/spf13/cobra" @@ -40,6 +41,8 @@ func newAttestationAddCmd() *cobra.Command { var annotationsFlag []string var noStrictValidation bool var policyInputFromFileFlag []string + var maxExtractEntries int + var maxExtractSize string // OCI registry credentials can be passed as flags or environment variables var registryServer, registryUsername, registryPassword string @@ -74,6 +77,11 @@ func newAttestationAddCmd() *cobra.Command { chainloop attestation add --name sigcheck --value sigcheckResult.csv --kind SYSINTERNALS_SIGCHECK \ --policy-input-from-file ignored_paths=exception.csv:Path`, RunE: func(cmd *cobra.Command, _ []string) error { + maxExtractSizeBytes, err := bytefmt.ToBytes(maxExtractSize) + if err != nil { + return fmt.Errorf("invalid --max-extract-size %q: %w", maxExtractSize, err) + } + a, err := action.NewAttestationAdd( &action.AttestationAddOpts{ ActionsOpts: ActionOpts, @@ -85,6 +93,8 @@ func newAttestationAddCmd() *cobra.Command { RegistryPassword: registryPassword, LocalStatePath: attestationLocalStatePath, NoStrictValidation: noStrictValidation, + MaxExtractEntries: maxExtractEntries, + MaxExtractSize: int64(maxExtractSizeBytes), }, ) if err != nil { @@ -122,22 +132,26 @@ func newAttestationAddCmd() *cobra.Command { return fmt.Errorf("loading resource: %w", err) } } - // TODO: take the material output and show render it resp, err := a.Run(cmd.Context(), attestationID, name, rawValuePath, kind, annotations, policyInputFiles) if err != nil { return err } - logger.Info().Msg("material added to attestation") + logger.Info().Int("materials", len(resp)).Msg("material(s) added to attestation") policies, err := a.GetPolicyEvaluations(cmd.Context(), attestationID) if err != nil { return err } - return output.EncodeOutput(flagOutputFormat, resp, func(s *action.AttestationStatusMaterial) error { - return displayMaterialInfo(s, policies[resp.Name]) - }) + for _, m := range resp { + if err := output.EncodeOutput(flagOutputFormat, m, func(s *action.AttestationStatusMaterial) error { + return displayMaterialInfo(s, policies[m.Name]) + }); err != nil { + return err + } + } + return nil }, ) }, @@ -166,6 +180,10 @@ func newAttestationAddCmd() *cobra.Command { cmd.Flags().StringVar(®istryUsername, "registry-username", "", fmt.Sprintf("registry username, ($%s)", registryUsernameEnvVarName)) cmd.Flags().StringVar(®istryPassword, "registry-password", "", fmt.Sprintf("registry password, ($%s)", registryPasswordEnvVarName)) + // Archive extraction guards + cmd.Flags().IntVar(&maxExtractEntries, "max-extract-entries", 10000, "max number of files to extract when --value is an archive") + cmd.Flags().StringVar(&maxExtractSize, "max-extract-size", "1GB", "max total uncompressed size to extract when --value is an archive") + if registryServer == "" { registryServer = os.Getenv(registryServerEnvVarName) } From aeb31029197e851c1c6a1089b779106d2d5dcb1e Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 18:27:25 +0200 Subject: [PATCH 11/23] test(crafter): cover archive explode behavior; regen CLI docs Add TestAddMaterialsFromArchiveBehavior to crafter_test.go covering five end-to-end scenarios via AddMaterialsFromArchive: name collision suffixing (scan-json / scan-json-1), name prefix, skipping dirs and symlinks in tar.gz, path-traversal rejection with atomic rollback, and tar.gz happy path with two materials. Fixtures are built programmatically with buildZip/buildTarGz helpers using t.TempDir(); no binary fixtures committed. Also regenerate app/cli/documentation/cli-reference.mdx to include the --max-extract-entries and --max-extract-size flags added in earlier tasks. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- app/cli/documentation/cli-reference.mdx | 2 + pkg/attestation/crafter/crafter_test.go | 205 ++++++++++++++++++++++++ 2 files changed, 207 insertions(+) diff --git a/app/cli/documentation/cli-reference.mdx b/app/cli/documentation/cli-reference.mdx index b4bf901dd..19857e915 100755 --- a/app/cli/documentation/cli-reference.mdx +++ b/app/cli/documentation/cli-reference.mdx @@ -258,6 +258,8 @@ Options --attestation-id string Unique identifier of the in-progress attestation -h, --help help for add --kind string kind of the material to be recorded: ["ARTIFACT" "ASYNCAPI_SPEC" "ATTESTATION" "BLACKDUCK_SCA_JSON" "CERTCC_DRANZER" "CHAINLOOP_AI_AGENT_CONFIG" "CHAINLOOP_AI_CODING_SESSION" "CHAINLOOP_PR_INFO" "CHAINLOOP_RUNNER_CONTEXT" "CONTAINER_IMAGE" "CSAF_INFORMATIONAL_ADVISORY" "CSAF_SECURITY_ADVISORY" "CSAF_SECURITY_INCIDENT_RESPONSE" "CSAF_VEX" "EVIDENCE" "GHAS_CODE_SCAN" "GHAS_DEPENDENCY_SCAN" "GHAS_SECRET_SCAN" "GITLAB_SECURITY_REPORT" "GITLEAKS_JSON" "GRAPHQL_SPEC" "HELM_CHART" "JACOCO_XML" "JUNIT_XML" "OPENAPI_SPEC" "OPENVEX" "OSSF_SCORECARD_JSON" "RADAMSA_CRASHES" "RADAMSA_REPORT" "SARIF" "SBOM_CYCLONEDX_JSON" "SBOM_SPDX_JSON" "SLSA_PROVENANCE" "STRING" "SYSINTERNALS_ACCESSCHK" "SYSINTERNALS_SIGCHECK" "TWISTCLI_SCAN_JSON" "YELP_DETECT_SECRETS_BASELINE" "ZAP_DAST_ZIP"] +--max-extract-entries int max number of files to extract when --value is an archive (default 10000) +--max-extract-size string max total uncompressed size to extract when --value is an archive (default "1GB") --name string name of the material as shown in the contract --no-strict-validation skip strict schema validation for structured materials (SBOM_CYCLONEDX_JSON, OPENAPI_SPEC, ASYNCAPI_SPEC, OSSF_SCORECARD_JSON) --policy-input-from-file stringArray feed a policy input from a column of a CSV or JSON file, in the format =[:] (e.g. ignored_paths=exception.csv:Path); is a single top-level column/field name and defaults to the input name; repeatable. The file is also recorded as EVIDENCE. diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index 158281e65..97c4eca81 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -16,6 +16,9 @@ package crafter_test import ( + "archive/tar" + "archive/zip" + "compress/gzip" "context" "fmt" "os" @@ -742,6 +745,208 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { }) } +// buildZip creates a zip archive at the given path containing the provided +// files (entry name → content). All entries are regular files. +func buildZip(t *testing.T, path string, files map[string]string) { + t.Helper() + f, err := os.Create(path) + require.NoError(t, err) + defer f.Close() + zw := zip.NewWriter(f) + for name, content := range files { + w, err := zw.Create(name) + require.NoError(t, err) + _, err = w.Write([]byte(content)) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) +} + +// buildTarGz creates a .tar.gz archive at path containing regular files, +// directory entries, and symlinks as described by the parameters. +func buildTarGz(t *testing.T, path string, regular map[string]string, dirs []string, symlinks map[string]string) { + t.Helper() + f, err := os.Create(path) + require.NoError(t, err) + defer f.Close() + gw := gzip.NewWriter(f) + tw := tar.NewWriter(gw) + + for name, content := range regular { + hdr := &tar.Header{ + Name: name, + Typeflag: tar.TypeReg, + Mode: 0o600, + Size: int64(len(content)), + } + require.NoError(t, tw.WriteHeader(hdr)) + _, err = tw.Write([]byte(content)) + require.NoError(t, err) + } + for _, name := range dirs { + hdr := &tar.Header{ + Name: name, + Typeflag: tar.TypeDir, + Mode: 0o700, + } + require.NoError(t, tw.WriteHeader(hdr)) + } + for name, target := range symlinks { + hdr := &tar.Header{ + Name: name, + Typeflag: tar.TypeSymlink, + Linkname: target, + } + require.NoError(t, tw.WriteHeader(hdr)) + } + + require.NoError(t, tw.Close()) + require.NoError(t, gw.Close()) +} + +func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { + const contract = "testdata/contracts/empty_generic.yaml" + backend := &casclient.CASBackend{} + + s.Run("name collision: both names present with suffix", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "collide.zip") + buildZip(s.T(), p, map[string]string{ + "scan.json": `{"a":1}`, + "nested/scan.json": `{"b":2}`, + }) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "", p, + materials.ArchiveZip, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 2) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 2) + _, hasScanJSON := stateMap["scan-json"] + _, hasScanJSON1 := stateMap["scan-json-1"] + assert.True(s.T(), hasScanJSON, "expected material scan-json in state") + assert.True(s.T(), hasScanJSON1, "expected material scan-json-1 in state (collision suffix)") + }) + + s.Run("name prefix: prefix prepended to sanitized entry name", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "prefix.zip") + buildZip(s.T(), p, map[string]string{ + "a.json": `{"x":1}`, + }) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "sboms", p, + materials.ArchiveZip, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 1) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + _, found := stateMap["sboms-a-json"] + assert.True(s.T(), found, "expected material sboms-a-json in state") + }) + + s.Run("skip dirs and symlinks in tar.gz: only regular file becomes material", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "mixed.tar.gz") + buildTarGz(s.T(), p, + map[string]string{"real.txt": "hello"}, + []string{"adir/"}, + map[string]string{"link.txt": "real.txt"}, + ) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "", p, + materials.ArchiveTarGz, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 1, "only the regular file must become a material") + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 1) + _, hasReal := stateMap["real-txt"] + assert.True(s.T(), hasReal, "expected material real-txt in state") + }) + + s.Run("traversal rejection: ../escape.txt entry causes error and empty state", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "evil.tar.gz") + buildTarGz(s.T(), p, + map[string]string{"../escape.txt": "evil"}, + nil, nil, + ) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + _, err = c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "", p, + materials.ArchiveTarGz, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.Error(s.T(), err, "path-traversal entry must cause an error") + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Empty(s.T(), stateMap, "state must be empty after traversal rejection (atomic rollback)") + }) + + s.Run("tar.gz happy path: two regular files produce two materials", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "two.tar.gz") + buildTarGz(s.T(), p, + map[string]string{ + "alpha.txt": "aaa", + "beta.txt": "bbb", + }, + nil, nil, + ) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "", p, + materials.ArchiveTarGz, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 2) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 2) + _, hasAlpha := stateMap["alpha-txt"] + _, hasBeta := stateMap["beta-txt"] + assert.True(s.T(), hasAlpha, "expected material alpha-txt in state") + assert.True(s.T(), hasBeta, "expected material beta-txt in state") + }) +} + func loadSchema(path string) (*schemaapi.CraftingSchema, error) { // Extract json formatted data content, err := os.ReadFile(filepath.Clean(path)) From e7a581b53bbd532552cfeb899b252f82206a6ec9 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 18:31:32 +0200 Subject: [PATCH 12/23] fix(crafter): export ErrUnsafeEntry sentinel and assert it in archive tests Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/crafter_test.go | 2 ++ pkg/attestation/crafter/materials/archive.go | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index 97c4eca81..fa8f23caa 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -858,6 +858,7 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { assert.Len(s.T(), mts, 1) stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 1) _, found := stateMap["sboms-a-json"] assert.True(s.T(), found, "expected material sboms-a-json in state") }) @@ -909,6 +910,7 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { materials.DefaultArchiveLimits(), ) require.Error(s.T(), err, "path-traversal entry must cause an error") + assert.ErrorIs(s.T(), err, materials.ErrUnsafeEntry) stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Empty(s.T(), stateMap, "state must be empty after traversal rejection (atomic rollback)") diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index 639d08277..b7657e1d2 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -90,6 +90,8 @@ var ( // ErrArchiveTooLarge is returned when the running uncompressed size of an // archive exceeds the configured maximum. ErrArchiveTooLarge = errors.New("archive exceeds the maximum uncompressed size") + // ErrUnsafeEntry is returned when an archive entry's path is absolute or escapes the extraction root. + ErrUnsafeEntry = errors.New("unsafe entry path in archive") ) // ArchiveLimits bounds archive expansion to guard against zip bombs. @@ -129,7 +131,7 @@ func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, count := 0 visit := func(name string, size int64, r io.Reader) error { if !safeArchivePath(name) { - return fmt.Errorf("unsafe entry path %q in archive", name) + return fmt.Errorf("%w: %q", ErrUnsafeEntry, name) } count++ if count > limits.MaxEntries { From f5bf99c53be904479a1e2ca6aa8eba9d481212f0 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Mon, 29 Jun 2026 18:44:22 +0200 Subject: [PATCH 13/23] fix(cli): correct archive detection for non-file values and tighten guards - detectByMagic now returns (ArchiveNone, nil) when os.Open fails so non-file material values (STRING, CONTAINER_IMAGE) never produce a spurious "no such file" error through the shouldExplode path. - safeArchivePath drops the over-broad strings.Contains(name, "..") early-return that wrongly rejected legitimate filenames like foo..bar.json; traversal detection now relies solely on path.Clean against a virtual root (/root/), which is the only reliable check. - Add a Warn log when --policy-input-from-file is supplied with an archive value so users know evidence cross-links are skipped on the explode path. - Name per-entry temp files with the allocated unique material name (matName) instead of filepath.Base(name) to eliminate basename collisions; remove each temp file immediately after staging to keep disk usage bounded. - Extend TestDetectArchive with .tar and .tgz cases; add writeTar helper mirroring writeTarGz without the gzip layer. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: ef6d3cdb-5a23-445c-b39a-510b659023e4 --- .superpowers/sdd/final-fix-report.md | 228 ++++++++++++++++++ app/cli/pkg/action/attestation_add.go | 3 + .../action/attestation_add_routing_test.go | 4 + pkg/attestation/crafter/crafter.go | 10 +- pkg/attestation/crafter/materials/archive.go | 27 ++- .../crafter/materials/archive_test.go | 23 ++ 6 files changed, 282 insertions(+), 13 deletions(-) create mode 100644 .superpowers/sdd/final-fix-report.md diff --git a/.superpowers/sdd/final-fix-report.md b/.superpowers/sdd/final-fix-report.md new file mode 100644 index 000000000..9217a0189 --- /dev/null +++ b/.superpowers/sdd/final-fix-report.md @@ -0,0 +1,228 @@ +# Archive Detection Fix Report + +**Date**: 2026-06-29 +**Branch**: zip-files +**Commit**: (see below) + +--- + +## Summary + +Five fixes applied to the archive detection and extraction pipeline in Chainloop. + +--- + +## Fix 1 (CRITICAL) — Non-file material values must not error in detection + +### Problem +`detectByMagic` in `pkg/attestation/crafter/materials/archive.go` called `os.Open(path)` and returned a hard error when the file didn't exist. This broke material kinds like `STRING` and `CONTAINER_IMAGE` whose `--value` is not a file path. + +### Before +```go +func detectByMagic(path string) (ArchiveFormat, error) { + f, err := os.Open(path) + if err != nil { + return ArchiveNone, fmt.Errorf("opening %q: %w", path, err) + } + ... +} +``` + +### After +```go +func detectByMagic(path string) (ArchiveFormat, error) { + f, err := os.Open(path) + if err != nil { + // If the file doesn't exist, the value is not a file path at all (e.g. + // "hello world" for STRING or "registry/app:v1" for CONTAINER_IMAGE). + // Treat it as a non-archive rather than propagating the error so callers + // that pass non-file values are not surprised. + return ArchiveNone, nil + } + ... +} +``` + +### TDD Evidence + +**RED** (before fix): +``` +=== RUN TestShouldExplode/kind_STRING_non-file_value + attestation_add_routing_test.go:73: + Error: Received unexpected error: + opening "hello world": open hello world: no such file or directory +=== RUN TestShouldExplode/kind_CONTAINER_IMAGE_non-file_value + attestation_add_routing_test.go:73: + Error: Received unexpected error: + opening "registry.example.com/app:v1": open registry.example.com/app:v1: no such file or directory +FAIL +``` + +**GREEN** (after fix): +``` +=== RUN TestShouldExplode/kind_STRING_non-file_value --- PASS +=== RUN TestShouldExplode/kind_CONTAINER_IMAGE_non-file_value --- PASS +PASS +ok github.com/chainloop-dev/chainloop/app/cli/pkg/action 0.026s +``` + +--- + +## Fix 2 (IMPORTANT) — safeArchivePath over-broad ".." rejection + +### Problem +`safeArchivePath` used `strings.Contains(normalized, "..")` which rejected any path with `".."` as a substring — including legitimate filenames like `foo..bar.json`. + +### Before +```go +func safeArchivePath(name string) bool { + normalized := strings.ReplaceAll(name, "\\", "/") + if strings.HasPrefix(normalized, "/") { + return false + } + // Reject any path containing ".." which could escape the root + if strings.Contains(normalized, "..") { + return false + } + clean := path.Clean("/" + normalized) + return !strings.Contains(clean, "/../") && clean != "/.." +} +``` + +### After +```go +func safeArchivePath(name string) bool { + normalized := strings.ReplaceAll(name, "\\", "/") + if strings.HasPrefix(normalized, "/") { + return false + } + // Canonicalise against a virtual root and check that the result stays + // within it. path.Clean will resolve ".." components so a path like + // "a/../../etc/passwd" becomes "/etc/passwd" which does not start with + // the virtual prefix "/root/"; a safe path like "a/b.txt" becomes + // "/root/a/b.txt" which does. + const root = "/root" + clean := path.Clean(root + "/" + normalized) + return strings.HasPrefix(clean, root+"/") || clean == root +} +``` + +### TDD Evidence + +**RED** (before fix — `foo..bar.json` case): +``` +=== RUN TestSafeArchivePath/double_dot_in_filename_is_ok + archive_test.go:189: + Error: Not equal: + expected: true + actual : false +FAIL +``` + +**GREEN** (after fix): +``` +=== RUN TestSafeArchivePath/double_dot_in_filename_is_ok --- PASS +=== RUN TestSafeArchivePath/path_traversal --- PASS +=== RUN TestSafeArchivePath/escape_via_nested_double_dot --- PASS +PASS +ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials 0.012s +``` + +--- + +## Fix 3 (IMPORTANT) — Warn when --policy-input-from-file is used on the explode path + +### Change +Added a warning log in `AttestationAdd.Run` (`app/cli/pkg/action/attestation_add.go`) inside the `if explode {` branch, before calling `AddMaterialsFromArchive`. + +```go +if explode { + if len(policyInputFiles) > 0 { + action.Logger.Warn().Msg("--policy-input-from-file is ignored when expanding an archive; evidence cross-links are not recorded for exploded materials") + } + ... +} +``` + +--- + +## Fix 4 (MINOR) — Bound temp disk + avoid basename collision + +### Problem +In `AddMaterialsFromArchive` (`pkg/attestation/crafter/crafter.go`), the temp file was named `filepath.Join(tmpDir, filepath.Base(name))`. Two archive entries with the same basename (e.g. `a/x.json` and `b/x.json`) would collide. Also, temp files were not removed until the deferred `os.RemoveAll(tmpDir)` at return. + +### Before +```go +tmpPath := filepath.Join(tmpDir, filepath.Base(name)) +// ... io.Copy ... +mt, err := c.stageMaterial(ctx, m, tmpPath, ...) +``` + +### After +```go +// Use the allocated unique material name for the temp file to avoid basename collisions. +tmpPath := filepath.Join(tmpDir, matName) +// ... io.Copy ... +mt, err := c.stageMaterial(ctx, m, tmpPath, ...) +// Remove the temp file immediately after staging to keep disk usage bounded. +os.Remove(tmpPath) //nolint:errcheck // best-effort cleanup +``` + +--- + +## Fix 5 (MINOR) — Add .tar / .tgz detection tests + +Added `writeTar` helper (uncompressed tar, mirroring `writeTarGz` without the gzip layer) and two new test cases to `TestDetectArchive`: + +- `{"tar by extension", tarPath, ArchiveTar}` +- `{"tgz by extension", tgzShortPath, ArchiveTarGz}` + +--- + +## Test Results + +### Focused tests +``` +go test ./app/cli/pkg/action/ -run TestShouldExplode -v +PASS ok github.com/chainloop-dev/chainloop/app/cli/pkg/action 0.028s + +go test ./pkg/attestation/crafter/materials/ -run 'TestDetectArchive|TestSafeArchivePath|TestWalkArchiveEntries' -v +PASS ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials 0.016s + +SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/ -run 'TestSuite/TestAddMaterialsFromArchive' -v +PASS ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter 0.037s +``` + +### Full regression +``` +SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/... ./app/cli/... +ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter 1.529s +ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials 2.643s +ok github.com/chainloop-dev/chainloop/app/cli/pkg/action 0.048s +... (all packages pass) +``` + +### Build +``` +go build ./app/cli/... # exit 0 — no output +``` + +--- + +## Files Changed + +1. `pkg/attestation/crafter/materials/archive.go` — Fix 1 (detectByMagic), Fix 2 (safeArchivePath) +2. `pkg/attestation/crafter/materials/archive_test.go` — Fix 2 new test cases, Fix 5 new detection tests +3. `app/cli/pkg/action/attestation_add.go` — Fix 3 (warning log) +4. `app/cli/pkg/action/attestation_add_routing_test.go` — Fix 1 new test cases +5. `pkg/attestation/crafter/crafter.go` — Fix 4 (temp file naming + cleanup) + +--- + +## Self-Review + +- Fix 1: The new `detectByMagic` behaviour is correct — if the value is not a file path, we silently return `ArchiveNone` which means `shouldExplode` returns `false, false, nil`. Non-archive kinds proceed through the normal add path. +- Fix 2: The virtual-root canonicalisation is the correct approach. `path.Clean("/root/" + name)` will resolve all `..` components against `/root`; if the result doesn't start with `/root/` the path escaped. +- Fix 3: Warning is placed before `AddMaterialsFromArchive` so it fires regardless of the archive content. +- Fix 4: `matName` is unique per entry by construction (NameAllocator), so no collision. `os.Remove` after staging keeps disk bounded; the `defer os.RemoveAll(tmpDir)` remains as a safety net. +- Fix 5: `writeTar` is a minimal helper that mirrors `writeTarGz` without the gzip wrapper. diff --git a/app/cli/pkg/action/attestation_add.go b/app/cli/pkg/action/attestation_add.go index 84fb8cd11..939261aa0 100644 --- a/app/cli/pkg/action/attestation_add.go +++ b/app/cli/pkg/action/attestation_add.go @@ -158,6 +158,9 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa return nil, fmt.Errorf("detecting archive: %w", err) } if explode { + if len(policyInputFiles) > 0 { + action.Logger.Warn().Msg("--policy-input-from-file is ignored when expanding an archive; evidence cross-links are not recorded for exploded materials") + } limits := materials.ArchiveLimits{MaxEntries: action.maxExtractEntries, MaxTotalSize: action.maxExtractSize} mts, err := crafter.AddMaterialsFromArchive(ctx, attestationID, materialType, materialName, materialValue, format, casBackend, annotations, limits, addOpts...) if err != nil { diff --git a/app/cli/pkg/action/attestation_add_routing_test.go b/app/cli/pkg/action/attestation_add_routing_test.go index 3f061a238..8104f9a7f 100644 --- a/app/cli/pkg/action/attestation_add_routing_test.go +++ b/app/cli/pkg/action/attestation_add_routing_test.go @@ -62,6 +62,10 @@ func TestShouldExplode(t *testing.T) { {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false}, {"no kind", "", zipPath, false}, {"kind + non-archive", "ARTIFACT", plainPath, false}, + // Non-file values must never return an error — STRING and CONTAINER_IMAGE + // carry values that are not file paths at all. + {"kind STRING non-file value", "STRING", "hello world", false}, + {"kind CONTAINER_IMAGE non-file value", "CONTAINER_IMAGE", "registry.example.com/app:v1", false}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go index 60c8c5a0d..09e643afb 100644 --- a/pkg/attestation/crafter/crafter.go +++ b/pkg/attestation/crafter/crafter.go @@ -868,9 +868,10 @@ func (c *Crafter) AddMaterialsFromArchive( base := filepath.Base(name) matName := allocator.Allocate(namePrefix, base) - // Write the entry to a temp file named after the entry basename so - // material crafters can identify it by name when opening it by path. - tmpPath := filepath.Join(tmpDir, filepath.Base(name)) + // Use the allocated unique material name for the temp file so that two + // archive entries with the same basename (e.g. "a/x.json" and "b/x.json") + // never collide in the shared tmpDir. + tmpPath := filepath.Join(tmpDir, matName) tmp, err := os.Create(tmpPath) if err != nil { return fmt.Errorf("creating temp file for entry %q: %w", name, err) @@ -889,6 +890,9 @@ func (c *Crafter) AddMaterialsFromArchive( } mt, err := c.stageMaterial(ctx, m, tmpPath, casBackend, runtimeAnnotations, opts...) + // Remove the temp file immediately after staging to keep disk usage bounded; + // the deferred os.RemoveAll(tmpDir) is the safety net. + os.Remove(tmpPath) //nolint:errcheck // best-effort cleanup if err != nil { return fmt.Errorf("staging entry %q as material %q: %w", name, matName, err) } diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index b7657e1d2..9af400b4c 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -61,7 +61,11 @@ func DetectArchive(path string) (ArchiveFormat, error) { func detectByMagic(path string) (ArchiveFormat, error) { f, err := os.Open(path) if err != nil { - return ArchiveNone, fmt.Errorf("opening %q: %w", path, err) + // If the file doesn't exist, the value is not a file path at all (e.g. + // "hello world" for STRING or "registry/app:v1" for CONTAINER_IMAGE). + // Treat it as a non-archive rather than propagating the error so callers + // that pass non-file values are not surprised. + return ArchiveNone, nil } defer f.Close() @@ -156,20 +160,23 @@ func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, } // safeArchivePath rejects absolute paths and any path that escapes the -// extraction root via "..". +// extraction root via ".." path components. A filename that merely contains +// ".." as a substring (e.g. "foo..bar.json") is accepted; only actual path +// components equal to ".." are rejected. func safeArchivePath(name string) bool { normalized := strings.ReplaceAll(name, "\\", "/") - // Reject absolute paths + // Reject absolute paths. if strings.HasPrefix(normalized, "/") { return false } - // Reject any path containing ".." which could escape the root - if strings.Contains(normalized, "..") { - return false - } - // Further validation: ensure no traversal after normalization - clean := path.Clean("/" + normalized) - return !strings.Contains(clean, "/../") && clean != "/.." + // Canonicalise against a virtual root and check that the result stays + // within it. path.Clean will resolve ".." components so a path like + // "a/../../etc/passwd" becomes "/etc/passwd" which does not start with + // the virtual prefix "/root/"; a safe path like "a/b.txt" becomes + // "/root/a/b.txt" which does. + const root = "/root" + clean := path.Clean(root + "/" + normalized) + return strings.HasPrefix(clean, root+"/") || clean == root } func walkZip(p string, visit func(name string, size int64, r io.Reader) error) error { diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index 31a5cc246..234b1bae7 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -65,10 +65,29 @@ func writeTarGz(t *testing.T, dir, name string, files map[string]string) string return p } +// writeTar creates an uncompressed .tar at dir/name containing the given regular files. +func writeTar(t *testing.T, dir, name string, files map[string]string) string { + t.Helper() + p := filepath.Join(dir, name) + f, err := os.Create(p) + require.NoError(t, err) + defer f.Close() + tw := tar.NewWriter(f) + for n, c := range files { + require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg})) + _, err = tw.Write([]byte(c)) + require.NoError(t, err) + } + require.NoError(t, tw.Close()) + return p +} + func TestDetectArchive(t *testing.T) { dir := t.TempDir() zipPath := writeZip(t, dir, "a.zip", map[string]string{"x.txt": "hi"}) tgzPath := writeTarGz(t, dir, "a.tar.gz", map[string]string{"x.txt": "hi"}) + tarPath := writeTar(t, dir, "a.tar", map[string]string{"x.txt": "hi"}) + tgzShortPath := writeTarGz(t, dir, "a.tgz", map[string]string{"x.txt": "hi"}) plain := filepath.Join(dir, "app.bin") require.NoError(t, os.WriteFile(plain, []byte("not an archive"), 0o600)) @@ -84,6 +103,8 @@ func TestDetectArchive(t *testing.T) { }{ {"zip by extension", zipPath, ArchiveZip}, {"tar.gz by extension", tgzPath, ArchiveTarGz}, + {"tar by extension", tarPath, ArchiveTar}, + {"tgz by extension", tgzShortPath, ArchiveTarGz}, {"plain file", plain, ArchiveNone}, {"zip without extension via magic", noExt, ArchiveZip}, } @@ -177,6 +198,8 @@ func TestSafeArchivePath(t *testing.T) { {"absolute path", "/etc/passwd", false}, {"path traversal", "../escape.txt", false}, {"nested path traversal", "foo/../../../etc/passwd", false}, + {"double dot in filename is ok", "foo..bar.json", true}, + {"escape via nested double dot", "a/../../etc/passwd", false}, {"valid nested path", "a/b.txt", true}, {"valid simple path", "file.txt", true}, {"valid with subdirs", "nested/dir/file.txt", true}, From a61e165372bf79630a9d950f726bb015ee076168 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Tue, 30 Jun 2026 08:05:02 +0200 Subject: [PATCH 14/23] fix(cli): address archive-explode code review findings Render multi-material explode output as a single JSON array so --output json stays a parseable document; only the table renderer is emitted per material. Drop the unused size parameter from the archive visit signature, document the intentional zero-length-entry skips and the zip symlink-detection limitation, and correct the --max-extract-size default label to 1GiB. Build the crafter archive test fixture in-process and drop the checked-in binary blob and SDD process artifacts. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- .superpowers/sdd/final-fix-report.md | 228 ---- app/cli/cmd/attestation_add.go | 22 +- app/cli/documentation/cli-reference.mdx | 2 +- .../2026-06-29-explode-archive-materials.md | 1105 ----------------- pkg/attestation/crafter/crafter_test.go | 4 +- pkg/attestation/crafter/materials/archive.go | 21 +- .../crafter/testdata/two-files.zip | Bin 245 -> 0 bytes 7 files changed, 35 insertions(+), 1347 deletions(-) delete mode 100644 .superpowers/sdd/final-fix-report.md delete mode 100644 docs/superpowers/plans/2026-06-29-explode-archive-materials.md delete mode 100644 pkg/attestation/crafter/testdata/two-files.zip diff --git a/.superpowers/sdd/final-fix-report.md b/.superpowers/sdd/final-fix-report.md deleted file mode 100644 index 9217a0189..000000000 --- a/.superpowers/sdd/final-fix-report.md +++ /dev/null @@ -1,228 +0,0 @@ -# Archive Detection Fix Report - -**Date**: 2026-06-29 -**Branch**: zip-files -**Commit**: (see below) - ---- - -## Summary - -Five fixes applied to the archive detection and extraction pipeline in Chainloop. - ---- - -## Fix 1 (CRITICAL) — Non-file material values must not error in detection - -### Problem -`detectByMagic` in `pkg/attestation/crafter/materials/archive.go` called `os.Open(path)` and returned a hard error when the file didn't exist. This broke material kinds like `STRING` and `CONTAINER_IMAGE` whose `--value` is not a file path. - -### Before -```go -func detectByMagic(path string) (ArchiveFormat, error) { - f, err := os.Open(path) - if err != nil { - return ArchiveNone, fmt.Errorf("opening %q: %w", path, err) - } - ... -} -``` - -### After -```go -func detectByMagic(path string) (ArchiveFormat, error) { - f, err := os.Open(path) - if err != nil { - // If the file doesn't exist, the value is not a file path at all (e.g. - // "hello world" for STRING or "registry/app:v1" for CONTAINER_IMAGE). - // Treat it as a non-archive rather than propagating the error so callers - // that pass non-file values are not surprised. - return ArchiveNone, nil - } - ... -} -``` - -### TDD Evidence - -**RED** (before fix): -``` -=== RUN TestShouldExplode/kind_STRING_non-file_value - attestation_add_routing_test.go:73: - Error: Received unexpected error: - opening "hello world": open hello world: no such file or directory -=== RUN TestShouldExplode/kind_CONTAINER_IMAGE_non-file_value - attestation_add_routing_test.go:73: - Error: Received unexpected error: - opening "registry.example.com/app:v1": open registry.example.com/app:v1: no such file or directory -FAIL -``` - -**GREEN** (after fix): -``` -=== RUN TestShouldExplode/kind_STRING_non-file_value --- PASS -=== RUN TestShouldExplode/kind_CONTAINER_IMAGE_non-file_value --- PASS -PASS -ok github.com/chainloop-dev/chainloop/app/cli/pkg/action 0.026s -``` - ---- - -## Fix 2 (IMPORTANT) — safeArchivePath over-broad ".." rejection - -### Problem -`safeArchivePath` used `strings.Contains(normalized, "..")` which rejected any path with `".."` as a substring — including legitimate filenames like `foo..bar.json`. - -### Before -```go -func safeArchivePath(name string) bool { - normalized := strings.ReplaceAll(name, "\\", "/") - if strings.HasPrefix(normalized, "/") { - return false - } - // Reject any path containing ".." which could escape the root - if strings.Contains(normalized, "..") { - return false - } - clean := path.Clean("/" + normalized) - return !strings.Contains(clean, "/../") && clean != "/.." -} -``` - -### After -```go -func safeArchivePath(name string) bool { - normalized := strings.ReplaceAll(name, "\\", "/") - if strings.HasPrefix(normalized, "/") { - return false - } - // Canonicalise against a virtual root and check that the result stays - // within it. path.Clean will resolve ".." components so a path like - // "a/../../etc/passwd" becomes "/etc/passwd" which does not start with - // the virtual prefix "/root/"; a safe path like "a/b.txt" becomes - // "/root/a/b.txt" which does. - const root = "/root" - clean := path.Clean(root + "/" + normalized) - return strings.HasPrefix(clean, root+"/") || clean == root -} -``` - -### TDD Evidence - -**RED** (before fix — `foo..bar.json` case): -``` -=== RUN TestSafeArchivePath/double_dot_in_filename_is_ok - archive_test.go:189: - Error: Not equal: - expected: true - actual : false -FAIL -``` - -**GREEN** (after fix): -``` -=== RUN TestSafeArchivePath/double_dot_in_filename_is_ok --- PASS -=== RUN TestSafeArchivePath/path_traversal --- PASS -=== RUN TestSafeArchivePath/escape_via_nested_double_dot --- PASS -PASS -ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials 0.012s -``` - ---- - -## Fix 3 (IMPORTANT) — Warn when --policy-input-from-file is used on the explode path - -### Change -Added a warning log in `AttestationAdd.Run` (`app/cli/pkg/action/attestation_add.go`) inside the `if explode {` branch, before calling `AddMaterialsFromArchive`. - -```go -if explode { - if len(policyInputFiles) > 0 { - action.Logger.Warn().Msg("--policy-input-from-file is ignored when expanding an archive; evidence cross-links are not recorded for exploded materials") - } - ... -} -``` - ---- - -## Fix 4 (MINOR) — Bound temp disk + avoid basename collision - -### Problem -In `AddMaterialsFromArchive` (`pkg/attestation/crafter/crafter.go`), the temp file was named `filepath.Join(tmpDir, filepath.Base(name))`. Two archive entries with the same basename (e.g. `a/x.json` and `b/x.json`) would collide. Also, temp files were not removed until the deferred `os.RemoveAll(tmpDir)` at return. - -### Before -```go -tmpPath := filepath.Join(tmpDir, filepath.Base(name)) -// ... io.Copy ... -mt, err := c.stageMaterial(ctx, m, tmpPath, ...) -``` - -### After -```go -// Use the allocated unique material name for the temp file to avoid basename collisions. -tmpPath := filepath.Join(tmpDir, matName) -// ... io.Copy ... -mt, err := c.stageMaterial(ctx, m, tmpPath, ...) -// Remove the temp file immediately after staging to keep disk usage bounded. -os.Remove(tmpPath) //nolint:errcheck // best-effort cleanup -``` - ---- - -## Fix 5 (MINOR) — Add .tar / .tgz detection tests - -Added `writeTar` helper (uncompressed tar, mirroring `writeTarGz` without the gzip layer) and two new test cases to `TestDetectArchive`: - -- `{"tar by extension", tarPath, ArchiveTar}` -- `{"tgz by extension", tgzShortPath, ArchiveTarGz}` - ---- - -## Test Results - -### Focused tests -``` -go test ./app/cli/pkg/action/ -run TestShouldExplode -v -PASS ok github.com/chainloop-dev/chainloop/app/cli/pkg/action 0.028s - -go test ./pkg/attestation/crafter/materials/ -run 'TestDetectArchive|TestSafeArchivePath|TestWalkArchiveEntries' -v -PASS ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials 0.016s - -SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/ -run 'TestSuite/TestAddMaterialsFromArchive' -v -PASS ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter 0.037s -``` - -### Full regression -``` -SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/... ./app/cli/... -ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter 1.529s -ok github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials 2.643s -ok github.com/chainloop-dev/chainloop/app/cli/pkg/action 0.048s -... (all packages pass) -``` - -### Build -``` -go build ./app/cli/... # exit 0 — no output -``` - ---- - -## Files Changed - -1. `pkg/attestation/crafter/materials/archive.go` — Fix 1 (detectByMagic), Fix 2 (safeArchivePath) -2. `pkg/attestation/crafter/materials/archive_test.go` — Fix 2 new test cases, Fix 5 new detection tests -3. `app/cli/pkg/action/attestation_add.go` — Fix 3 (warning log) -4. `app/cli/pkg/action/attestation_add_routing_test.go` — Fix 1 new test cases -5. `pkg/attestation/crafter/crafter.go` — Fix 4 (temp file naming + cleanup) - ---- - -## Self-Review - -- Fix 1: The new `detectByMagic` behaviour is correct — if the value is not a file path, we silently return `ArchiveNone` which means `shouldExplode` returns `false, false, nil`. Non-archive kinds proceed through the normal add path. -- Fix 2: The virtual-root canonicalisation is the correct approach. `path.Clean("/root/" + name)` will resolve all `..` components against `/root`; if the result doesn't start with `/root/` the path escaped. -- Fix 3: Warning is placed before `AddMaterialsFromArchive` so it fires regardless of the archive content. -- Fix 4: `matName` is unique per entry by construction (NameAllocator), so no collision. `os.Remove` after staging keeps disk bounded; the `defer os.RemoveAll(tmpDir)` remains as a safety net. -- Fix 5: `writeTar` is a minimal helper that mirrors `writeTarGz` without the gzip wrapper. diff --git a/app/cli/cmd/attestation_add.go b/app/cli/cmd/attestation_add.go index fe52bad5b..911ec6651 100644 --- a/app/cli/cmd/attestation_add.go +++ b/app/cli/cmd/attestation_add.go @@ -144,14 +144,22 @@ func newAttestationAddCmd() *cobra.Command { return err } - for _, m := range resp { - if err := output.EncodeOutput(flagOutputFormat, m, func(s *action.AttestationStatusMaterial) error { - return displayMaterialInfo(s, policies[m.Name]) - }); err != nil { - return err + // The explode path can return several materials. Render JSON as a + // single array so the output stays a parseable document; only the + // table renderer is emitted per material. + switch flagOutputFormat { + case output.FormatJSON: + return output.EncodeJSON(resp) + case output.FormatTable: + for _, m := range resp { + if err := displayMaterialInfo(m, policies[m.Name]); err != nil { + return err + } } + return nil + default: + return output.ErrOutputFormatNotImplemented } - return nil }, ) }, @@ -182,7 +190,7 @@ func newAttestationAddCmd() *cobra.Command { // Archive extraction guards cmd.Flags().IntVar(&maxExtractEntries, "max-extract-entries", 10000, "max number of files to extract when --value is an archive") - cmd.Flags().StringVar(&maxExtractSize, "max-extract-size", "1GB", "max total uncompressed size to extract when --value is an archive") + cmd.Flags().StringVar(&maxExtractSize, "max-extract-size", "1GiB", "max total uncompressed size to extract when --value is an archive") if registryServer == "" { registryServer = os.Getenv(registryServerEnvVarName) diff --git a/app/cli/documentation/cli-reference.mdx b/app/cli/documentation/cli-reference.mdx index 19857e915..3dfe7522e 100755 --- a/app/cli/documentation/cli-reference.mdx +++ b/app/cli/documentation/cli-reference.mdx @@ -259,7 +259,7 @@ Options -h, --help help for add --kind string kind of the material to be recorded: ["ARTIFACT" "ASYNCAPI_SPEC" "ATTESTATION" "BLACKDUCK_SCA_JSON" "CERTCC_DRANZER" "CHAINLOOP_AI_AGENT_CONFIG" "CHAINLOOP_AI_CODING_SESSION" "CHAINLOOP_PR_INFO" "CHAINLOOP_RUNNER_CONTEXT" "CONTAINER_IMAGE" "CSAF_INFORMATIONAL_ADVISORY" "CSAF_SECURITY_ADVISORY" "CSAF_SECURITY_INCIDENT_RESPONSE" "CSAF_VEX" "EVIDENCE" "GHAS_CODE_SCAN" "GHAS_DEPENDENCY_SCAN" "GHAS_SECRET_SCAN" "GITLAB_SECURITY_REPORT" "GITLEAKS_JSON" "GRAPHQL_SPEC" "HELM_CHART" "JACOCO_XML" "JUNIT_XML" "OPENAPI_SPEC" "OPENVEX" "OSSF_SCORECARD_JSON" "RADAMSA_CRASHES" "RADAMSA_REPORT" "SARIF" "SBOM_CYCLONEDX_JSON" "SBOM_SPDX_JSON" "SLSA_PROVENANCE" "STRING" "SYSINTERNALS_ACCESSCHK" "SYSINTERNALS_SIGCHECK" "TWISTCLI_SCAN_JSON" "YELP_DETECT_SECRETS_BASELINE" "ZAP_DAST_ZIP"] --max-extract-entries int max number of files to extract when --value is an archive (default 10000) ---max-extract-size string max total uncompressed size to extract when --value is an archive (default "1GB") +--max-extract-size string max total uncompressed size to extract when --value is an archive (default "1GiB") --name string name of the material as shown in the contract --no-strict-validation skip strict schema validation for structured materials (SBOM_CYCLONEDX_JSON, OPENAPI_SPEC, ASYNCAPI_SPEC, OSSF_SCORECARD_JSON) --policy-input-from-file stringArray feed a policy input from a column of a CSV or JSON file, in the format =[:] (e.g. ignored_paths=exception.csv:Path); is a single top-level column/field name and defaults to the input name; repeatable. The file is also recorded as EVIDENCE. diff --git a/docs/superpowers/plans/2026-06-29-explode-archive-materials.md b/docs/superpowers/plans/2026-06-29-explode-archive-materials.md deleted file mode 100644 index 12449e749..000000000 --- a/docs/superpowers/plans/2026-06-29-explode-archive-materials.md +++ /dev/null @@ -1,1105 +0,0 @@ -# Explode Archive Materials in `chainloop att add` Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Let `chainloop attestation add --kind --value ` unpack a `.zip`/`.tar`/`.tar.gz`/`.tgz` and record every contained regular file as its own material of kind ``, atomically. - -**Architecture:** A new stdlib-only archive helper in the `materials` package detects archives and walks their entries with resource guards. The `Crafter` gains an atomic batch method that crafts every entry into the in-memory crafting state and persists **once**. `AttestationAdd.Run` routes to that method when `--kind` is set, the value is an archive, and the kind is not "archive-native" (e.g. `ZAP_DAST_ZIP`); all other cases keep the existing single-material path unchanged. - -**Tech Stack:** Go 1.26.4, `archive/zip`, `archive/tar`, `compress/gzip` (stdlib only), protovalidate, testify suites. - -## Global Constraints - -- **Out of scope (explicit):** per-entry **auto-detection** when `--kind` is omitted. The explode path triggers **only** when `--kind` is explicitly provided. With no `--kind`, behavior is unchanged (the whole archive goes through the existing single-material path). -- **Go version:** 1.26.4. Stdlib only for archives — no new dependency. Formats: `.zip`, `.tar`, `.tar.gz`, `.tgz` (no bzip2/xz in this iteration). -- **Material names are DNS-1123:** must match `^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` (validated by protovalidate in `materials.Craft`). Derived names MUST be sanitized; raw basenames like `scan.json` are invalid as-is. -- **`--name` on explode path:** used as a **prefix** — derived name is `-`; when `--name` is empty, the bare sanitized basename is used. -- **Annotations:** every `--annotation key=value` applies to **every** extracted material. -- **Atomicity:** all qualifying entries are added or none. State is persisted exactly once, after all entries craft successfully. -- **Guards (defaults):** max entries `10000`, max total uncompressed size `1 GiB` (`1 << 30`), enforced by streaming (do not trust declared sizes). -- **Recursion:** nested archives are NOT recursed — treated as regular files. -- **License header:** every modified/created file's header ends with the current year `2026` (range if it already had an earlier start year, e.g. `2024-2026`). -- **Commits:** signed (`-S`), DCO (`-s`), Conventional Commits, and an `Assisted-by: Claude Code` trailer on every commit. No co-authored-by lines. -- **Quality gates:** `gofmt`, `golangci-lint`, table-driven tests, TDD (failing test first). -- **Module path:** `github.com/chainloop-dev/chainloop`. - ---- - -## File Structure - -- `pkg/attestation/crafter/materials/archive.go` — **NEW.** Archive detection, entry walking with guards, name sanitization/allocation, archive-native kind allowlist. -- `pkg/attestation/crafter/materials/archive_test.go` — **NEW.** Tests for the above. -- `pkg/attestation/crafter/materials/testdata/` — **NEW fixtures** generated by a test helper (no binary blobs checked in by hand). -- `pkg/attestation/crafter/crafter.go` — **MODIFY.** Extract `stageMaterial` from `addMaterial`; add atomic `AddMaterialsFromArchive`. -- `pkg/attestation/crafter/crafter_test.go` — **MODIFY.** Tests for `AddMaterialsFromArchive`. -- `app/cli/pkg/action/attestation_add.go` — **MODIFY.** Route to the explode path; `Run` returns `[]*AttestationStatusMaterial`; carry guard limits. -- `app/cli/cmd/attestation_add.go` — **MODIFY.** Add guard flags; render N materials. - ---- - -## Task 1: Archive detection - -**Files:** -- Create: `pkg/attestation/crafter/materials/archive.go` -- Test: `pkg/attestation/crafter/materials/archive_test.go` - -**Interfaces:** -- Produces: - - `type ArchiveFormat int` with `ArchiveNone, ArchiveZip, ArchiveTar, ArchiveTarGz` - - `func DetectArchive(path string) (ArchiveFormat, error)` — extension-first, magic-byte backstop; returns `ArchiveNone` (nil error) for non-archives. - -- [ ] **Step 1: Write the failing test** - -Create `pkg/attestation/crafter/materials/archive_test.go` (standard `2026` license header; package `materials`): - -```go -package materials - -import ( - "archive/tar" - "archive/zip" - "compress/gzip" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// writeZip creates a zip at dir/name containing the given files (name->content). -func writeZip(t *testing.T, dir, name string, files map[string]string) string { - t.Helper() - p := filepath.Join(dir, name) - f, err := os.Create(p) - require.NoError(t, err) - defer f.Close() - zw := zip.NewWriter(f) - for n, c := range files { - w, err := zw.Create(n) - require.NoError(t, err) - _, err = w.Write([]byte(c)) - require.NoError(t, err) - } - require.NoError(t, zw.Close()) - return p -} - -// writeTarGz creates a .tar.gz at dir/name containing the given regular files. -func writeTarGz(t *testing.T, dir, name string, files map[string]string) string { - t.Helper() - p := filepath.Join(dir, name) - f, err := os.Create(p) - require.NoError(t, err) - defer f.Close() - gw := gzip.NewWriter(f) - tw := tar.NewWriter(gw) - for n, c := range files { - require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg})) - _, err = tw.Write([]byte(c)) - require.NoError(t, err) - } - require.NoError(t, tw.Close()) - require.NoError(t, gw.Close()) - return p -} - -func TestDetectArchive(t *testing.T) { - dir := t.TempDir() - zipPath := writeZip(t, dir, "a.zip", map[string]string{"x.txt": "hi"}) - tgzPath := writeTarGz(t, dir, "a.tar.gz", map[string]string{"x.txt": "hi"}) - - plain := filepath.Join(dir, "app.bin") - require.NoError(t, os.WriteFile(plain, []byte("not an archive"), 0o600)) - - // A .zip renamed without extension — magic bytes must still detect it. - noExt := filepath.Join(dir, "noext") - require.NoError(t, os.WriteFile(noExt, mustRead(t, zipPath), 0o600)) - - tests := []struct { - name string - path string - want ArchiveFormat - }{ - {"zip by extension", zipPath, ArchiveZip}, - {"tar.gz by extension", tgzPath, ArchiveTarGz}, - {"plain file", plain, ArchiveNone}, - {"zip without extension via magic", noExt, ArchiveZip}, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - got, err := DetectArchive(tc.path) - require.NoError(t, err) - assert.Equal(t, tc.want, got) - }) - } -} - -func mustRead(t *testing.T, p string) []byte { - t.Helper() - b, err := os.ReadFile(p) - require.NoError(t, err) - return b -} -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `go test ./pkg/attestation/crafter/materials/ -run TestDetectArchive -v` -Expected: FAIL — `undefined: DetectArchive` / `ArchiveZip`. - -- [ ] **Step 3: Write minimal implementation** - -Create `pkg/attestation/crafter/materials/archive.go` (license header `Copyright 2026 The Chainloop Authors.`): - -```go -package materials - -import ( - "bytes" - "fmt" - "os" - "strings" -) - -// ArchiveFormat identifies a supported archive container. -type ArchiveFormat int - -const ( - ArchiveNone ArchiveFormat = iota - ArchiveZip - ArchiveTar - ArchiveTarGz -) - -// DetectArchive reports whether path is a supported archive and, if so, its -// format. Detection is by extension first; for files whose extension does not -// match, magic bytes are used as a backstop so renamed archives are still -// caught. A non-archive returns (ArchiveNone, nil). -func DetectArchive(path string) (ArchiveFormat, error) { - lower := strings.ToLower(path) - switch { - case strings.HasSuffix(lower, ".zip"): - return ArchiveZip, nil - case strings.HasSuffix(lower, ".tar.gz"), strings.HasSuffix(lower, ".tgz"): - return ArchiveTarGz, nil - case strings.HasSuffix(lower, ".tar"): - return ArchiveTar, nil - } - - return detectByMagic(path) -} - -func detectByMagic(path string) (ArchiveFormat, error) { - f, err := os.Open(path) - if err != nil { - return ArchiveNone, fmt.Errorf("opening %q: %w", path, err) - } - defer f.Close() - - // 512 bytes is enough for the gzip/zip magic and the tar "ustar" marker at - // offset 257. - header := make([]byte, 512) - n, _ := f.Read(header) - header = header[:n] - - switch { - case bytes.HasPrefix(header, []byte("PK\x03\x04")), bytes.HasPrefix(header, []byte("PK\x05\x06")): - return ArchiveZip, nil - case bytes.HasPrefix(header, []byte{0x1f, 0x8b}): - return ArchiveTarGz, nil - case len(header) >= 262 && bytes.Equal(header[257:262], []byte("ustar")): - return ArchiveTar, nil - } - - return ArchiveNone, nil -} -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `go test ./pkg/attestation/crafter/materials/ -run TestDetectArchive -v` -Expected: PASS. - -- [ ] **Step 5: Commit** - -```bash -gofmt -w pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go -git add pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go -git commit -S -s -m "feat(cli): detect archive materials for att add explode path - -Assisted-by: Claude Code" -``` - ---- - -## Task 2: Archive entry walking with guards - -**Files:** -- Modify: `pkg/attestation/crafter/materials/archive.go` -- Test: `pkg/attestation/crafter/materials/archive_test.go` - -**Interfaces:** -- Consumes: `ArchiveFormat`, `DetectArchive` (Task 1). -- Produces: - - `type ArchiveLimits struct { MaxEntries int; MaxTotalSize int64 }` - - `func DefaultArchiveLimits() ArchiveLimits` → `{10000, 1 << 30}` - - `var ErrArchiveTooLarge`, `var ErrTooManyEntries` - - `func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error` - - Calls `yield(innerPath, reader)` for each **regular** file. `innerPath` is the entry's path inside the archive (caller derives the basename for naming). - - Skips directories, symlinks, hardlinks, and zero-length entries. - - Rejects entries whose cleaned path escapes the archive root (zip-slip guard). - - Enforces `MaxEntries` (count of yielded entries) and `MaxTotalSize` (running uncompressed bytes streamed), returning the sentinel errors when exceeded. The byte cap is enforced **while streaming**, not from declared sizes. - - A `yield` error aborts the walk and is returned wrapped with the entry name. - -- [ ] **Step 1: Write the failing test** - -Append to `archive_test.go`: - -```go -func TestWalkArchiveEntries(t *testing.T) { - dir := t.TempDir() - - t.Run("yields regular files, skips dirs", func(t *testing.T) { - // Build a zip with a directory entry + two files. - p := filepath.Join(dir, "files.zip") - f, err := os.Create(p) - require.NoError(t, err) - zw := zip.NewWriter(f) - _, err = zw.Create("nested/") // directory entry - require.NoError(t, err) - for _, n := range []string{"a.json", "nested/b.json"} { - w, err := zw.Create(n) - require.NoError(t, err) - _, err = w.Write([]byte("{}")) - require.NoError(t, err) - } - require.NoError(t, zw.Close()) - require.NoError(t, f.Close()) - - var got []string - err = WalkArchiveEntries(p, ArchiveZip, DefaultArchiveLimits(), func(name string, r io.Reader) error { - b, _ := io.ReadAll(r) - assert.Equal(t, "{}", string(b)) - got = append(got, name) - return nil - }) - require.NoError(t, err) - assert.ElementsMatch(t, []string{"a.json", "nested/b.json"}, got) - }) - - t.Run("max entries exceeded", func(t *testing.T) { - p := writeTarGz(t, dir, "many.tar.gz", map[string]string{"a": "1", "b": "2", "c": "3"}) - err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 2, MaxTotalSize: 1 << 30}, func(string, io.Reader) error { return nil }) - require.ErrorIs(t, err, ErrTooManyEntries) - }) - - t.Run("max total size exceeded while streaming", func(t *testing.T) { - p := writeTarGz(t, dir, "big.tar.gz", map[string]string{"a": strings.Repeat("x", 1000)}) - err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 100, MaxTotalSize: 100}, func(_ string, r io.Reader) error { - _, err := io.ReadAll(r) - return err - }) - require.ErrorIs(t, err, ErrArchiveTooLarge) - }) -} -``` - -Add `"io"` and `"strings"` to the test imports. - -- [ ] **Step 2: Run test to verify it fails** - -Run: `go test ./pkg/attestation/crafter/materials/ -run TestWalkArchiveEntries -v` -Expected: FAIL — `undefined: WalkArchiveEntries` / `ErrTooManyEntries` / `ErrArchiveTooLarge`. - -- [ ] **Step 3: Write minimal implementation** - -Append to `archive.go` (add imports `archive/tar`, `archive/zip`, `compress/gzip`, `errors`, `io`, `path`): - -```go -var ( - // ErrTooManyEntries is returned when an archive has more qualifying entries - // than the configured maximum. - ErrTooManyEntries = errors.New("archive exceeds the maximum number of entries") - // ErrArchiveTooLarge is returned when the running uncompressed size of an - // archive exceeds the configured maximum. - ErrArchiveTooLarge = errors.New("archive exceeds the maximum uncompressed size") -) - -// ArchiveLimits bounds archive expansion to guard against zip bombs. -type ArchiveLimits struct { - MaxEntries int - MaxTotalSize int64 -} - -// DefaultArchiveLimits returns the safe defaults: 10000 entries and 1 GiB -// total uncompressed size. -func DefaultArchiveLimits() ArchiveLimits { - return ArchiveLimits{MaxEntries: 10000, MaxTotalSize: 1 << 30} -} - -// capReader wraps a reader and fails once the shared running total exceeds max, -// so we never trust an archive's declared sizes. -type capReader struct { - r io.Reader - total *int64 - max int64 -} - -func (c *capReader) Read(p []byte) (int, error) { - n, err := c.r.Read(p) - *c.total += int64(n) - if *c.total > c.max { - return n, ErrArchiveTooLarge - } - return n, err -} - -// WalkArchiveEntries calls yield for every regular file in the archive, -// enforcing the limits and skipping directories, symlinks, hardlinks, empty -// entries, and path-traversal entries. -func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error { - var total int64 - count := 0 - visit := func(name string, size int64, r io.Reader) error { - if !safeArchivePath(name) { - return fmt.Errorf("unsafe entry path %q in archive", name) - } - count++ - if count > limits.MaxEntries { - return ErrTooManyEntries - } - if err := yield(name, &capReader{r: r, total: &total, max: limits.MaxTotalSize}); err != nil { - return fmt.Errorf("processing entry %q: %w", name, err) - } - return nil - } - - switch format { - case ArchiveZip: - return walkZip(path, visit) - case ArchiveTar: - return walkTar(path, false, visit) - case ArchiveTarGz: - return walkTar(path, true, visit) - default: - return fmt.Errorf("unsupported archive format") - } -} - -// safeArchivePath rejects absolute paths and any path that escapes the -// extraction root via "..". -func safeArchivePath(name string) bool { - clean := path.Clean("/" + strings.ReplaceAll(name, "\\", "/")) - return !strings.Contains(clean, "/../") && clean != "/.." -} - -func walkZip(p string, visit func(name string, size int64, r io.Reader) error) error { - zr, err := zip.OpenReader(p) - if err != nil { - return fmt.Errorf("opening zip: %w", err) - } - defer zr.Close() - - for _, f := range zr.File { - if f.FileInfo().IsDir() || f.Mode()&os.ModeSymlink != 0 || f.UncompressedSize64 == 0 { - continue - } - rc, err := f.Open() - if err != nil { - return fmt.Errorf("opening entry %q: %w", f.Name, err) - } - err = visit(f.Name, int64(f.UncompressedSize64), rc) - rc.Close() - if err != nil { - return err - } - } - return nil -} - -func walkTar(p string, gzipped bool, visit func(name string, size int64, r io.Reader) error) error { - f, err := os.Open(p) - if err != nil { - return fmt.Errorf("opening tar: %w", err) - } - defer f.Close() - - var src io.Reader = f - if gzipped { - gz, err := gzip.NewReader(f) - if err != nil { - return fmt.Errorf("opening gzip: %w", err) - } - defer gz.Close() - src = gz - } - - tr := tar.NewReader(src) - for { - hdr, err := tr.Next() - if errors.Is(err, io.EOF) { - return nil - } - if err != nil { - return fmt.Errorf("reading tar: %w", err) - } - if hdr.Typeflag != tar.TypeReg || hdr.Size == 0 { - continue - } - if err := visit(hdr.Name, hdr.Size, tr); err != nil { - return err - } - } -} -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `go test ./pkg/attestation/crafter/materials/ -run TestWalkArchiveEntries -v` -Expected: PASS. - -- [ ] **Step 5: Commit** - -```bash -gofmt -w pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go -git add pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go -git commit -S -s -m "feat(cli): walk archive entries with zip-bomb and traversal guards - -Assisted-by: Claude Code" -``` - ---- - -## Task 3: Archive-native kind allowlist + name derivation - -**Files:** -- Modify: `pkg/attestation/crafter/materials/archive.go` -- Test: `pkg/attestation/crafter/materials/archive_test.go` - -**Interfaces:** -- Produces: - - `func IsArchiveNativeKind(kind string) bool` — true for kinds whose value is meant to be the archive itself; initial set: `ZAP_DAST_ZIP`. - - `func SanitizeMaterialName(s string) string` — lower-cases and collapses runs outside `[a-z0-9]` to a single `-`, trims leading/trailing `-`; falls back to `"material"` if empty. Result matches DNS-1123. - - `type NameAllocator struct{ ... }`, `func NewNameAllocator(existing []string) *NameAllocator`, `func (a *NameAllocator) Allocate(prefix, base string) string` — returns a unique DNS-1123 name. First use of a name is clean; collisions get `-1`, `-2`, …. When `prefix != ""` the name is `-`, else ``. - -- [ ] **Step 1: Write the failing test** - -Append to `archive_test.go`: - -```go -func TestSanitizeMaterialName(t *testing.T) { - tests := []struct{ in, want string }{ - {"scan.json", "scan-json"}, - {"results.XML", "results-xml"}, - {"weird__name!!", "weird-name"}, - {"___", "material"}, - } - for _, tc := range tests { - assert.Equal(t, tc.want, SanitizeMaterialName(tc.in)) - } -} - -func TestNameAllocator(t *testing.T) { - a := NewNameAllocator([]string{"existing"}) - - assert.Equal(t, "scan-json", a.Allocate("", "scan.json")) - assert.Equal(t, "scan-json-1", a.Allocate("", "scan.json")) // collision - assert.Equal(t, "results-xml", a.Allocate("", "results.xml")) - assert.Equal(t, "existing-1", a.Allocate("", "existing")) // collides with pre-existing - assert.Equal(t, "sboms-a-json", a.Allocate("sboms", "a.json")) // prefix -} - -func TestIsArchiveNativeKind(t *testing.T) { - assert.True(t, IsArchiveNativeKind("ZAP_DAST_ZIP")) - assert.False(t, IsArchiveNativeKind("SBOM_CYCLONEDX_JSON")) - assert.False(t, IsArchiveNativeKind("ARTIFACT")) -} -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `go test ./pkg/attestation/crafter/materials/ -run 'TestSanitizeMaterialName|TestNameAllocator|TestIsArchiveNativeKind' -v` -Expected: FAIL — undefined symbols. - -- [ ] **Step 3: Write minimal implementation** - -Append to `archive.go` (add `schemaapi "github.com/chainloop-dev/chainloop/app/controlplane/api/workflowcontract/v1"` to imports): - -```go -// archiveNativeKinds lists material kinds whose value is the archive itself. -// For these, --kind short-circuits the explode path and the archive is -// recorded whole. Extend this set as new "the archive is the material" kinds -// are added. -var archiveNativeKinds = map[string]struct{}{ - schemaapi.CraftingSchema_Material_ZAP_DAST_ZIP.String(): {}, -} - -// IsArchiveNativeKind reports whether kind treats the archive as a single -// material (recorded whole) rather than something to explode. -func IsArchiveNativeKind(kind string) bool { - _, ok := archiveNativeKinds[kind] - return ok -} - -// SanitizeMaterialName converts s into a valid DNS-1123 material name: -// lowercase, with every run of characters outside [a-z0-9] collapsed to a -// single "-" and leading/trailing "-" trimmed. Falls back to "material". -func SanitizeMaterialName(s string) string { - var b strings.Builder - pendingHyphen := false - for _, r := range strings.ToLower(s) { - if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') { - if pendingHyphen && b.Len() > 0 { - b.WriteByte('-') - } - b.WriteRune(r) - pendingHyphen = false - } else { - pendingHyphen = true - } - } - if b.Len() == 0 { - return "material" - } - return b.String() -} - -// NameAllocator hands out unique DNS-1123 material names, suffixing collisions -// with -1, -2, …. It is seeded with names already present in the attestation -// so derived names never overwrite existing materials. -type NameAllocator struct { - used map[string]struct{} -} - -// NewNameAllocator seeds the allocator with existing material names. -func NewNameAllocator(existing []string) *NameAllocator { - used := make(map[string]struct{}, len(existing)) - for _, e := range existing { - used[e] = struct{}{} - } - return &NameAllocator{used: used} -} - -// Allocate returns a unique name derived from base (and optional prefix). -func (a *NameAllocator) Allocate(prefix, base string) string { - name := SanitizeMaterialName(base) - if prefix != "" { - name = SanitizeMaterialName(prefix) + "-" + name - } - - candidate := name - for i := 1; ; i++ { - if _, taken := a.used[candidate]; !taken { - a.used[candidate] = struct{}{} - return candidate - } - candidate = fmt.Sprintf("%s-%d", name, i) - } -} -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `go test ./pkg/attestation/crafter/materials/ -run 'TestSanitizeMaterialName|TestNameAllocator|TestIsArchiveNativeKind' -v` -Expected: PASS. - -- [ ] **Step 5: Commit** - -```bash -gofmt -w pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go -git add pkg/attestation/crafter/materials/archive.go pkg/attestation/crafter/materials/archive_test.go -git commit -S -s -m "feat(cli): add archive-native allowlist and entry name allocation - -Assisted-by: Claude Code" -``` - ---- - -## Task 4: Extract `stageMaterial` from `addMaterial` (no behavior change) - -This is a pure refactor that makes a single atomic commit possible: split the -"craft + validate + evaluate policies + attach to in-memory state" work from the -final persistence write. - -**Files:** -- Modify: `pkg/attestation/crafter/crafter.go:684-794` - -**Interfaces:** -- Produces: - - `func (c *Crafter) stageMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error)` — everything `addMaterial` did **except** the final `c.stateManager.Write`. Mutates `c.CraftingState` in memory. - - `addMaterial` keeps its signature/behavior; now `stageMaterial` + a single `Write`. - -- [ ] **Step 1: Refactor — split the method** - -Replace the body of `addMaterial` (lines 684-794) so the crafting work moves into `stageMaterial`. `stageMaterial` is the current body up to and including step "5 - Attach it to state" (the `c.CraftingState.Attestation.Materials[m.Name] = mt` line). `addMaterial` becomes: - -```go -// addMaterial crafts a single material and persists the crafting state. -func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { - mt, err := c.stageMaterial(ctx, m, value, casBackend, runtimeAnnotations, opts...) - if err != nil { - return nil, err - } - - if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { - return nil, fmt.Errorf("failed to persist crafting state: %w", err) - } - - c.Logger.Debug().Str("key", m.Name).Msg("added to state") - return mt, nil -} -``` - -And `stageMaterial` holds the moved code (note: it no longer takes `attestationID`, since only the `Write` used it): - -```go -// stageMaterial crafts a material into the in-memory crafting state WITHOUT -// persisting it. Callers must call stateManager.Write to commit. Splitting the -// write out lets the archive explode path craft many entries and commit once. -func (c *Crafter) stageMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { - // ... moved body: addOpts handling, materials.Craft, annotations merge, - // annotation completeness check, protovalidate, policy-evaluation dedup + - // VerifyMaterial (group and regular), attach to c.CraftingState.Attestation.Materials. - // Ends by returning mt, nil — NO stateManager.Write here. -} -``` - -- [ ] **Step 2: Run existing crafter tests to verify no behavior change** - -Run: `go test ./pkg/attestation/crafter/... 2>&1 | tail -20` -Expected: PASS (same set of tests that passed before the refactor). If integration-gated, use `SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/...`. - -- [ ] **Step 3: Commit** - -```bash -gofmt -w pkg/attestation/crafter/crafter.go -git add pkg/attestation/crafter/crafter.go -git commit -S -s -m "refactor(crafter): split material crafting from state persistence - -Assisted-by: Claude Code" -``` - ---- - -## Task 5: Atomic `AddMaterialsFromArchive` on the crafter - -**Files:** -- Modify: `pkg/attestation/crafter/crafter.go` -- Test: `pkg/attestation/crafter/crafter_test.go` - -**Interfaces:** -- Consumes: `materials.DetectArchive`, `materials.WalkArchiveEntries`, `materials.NewNameAllocator`, `materials.ArchiveLimits` (Tasks 1-3); `c.stageMaterial` (Task 4). -- Produces: - - `func (c *Crafter) AddMaterialsFromArchive(ctx context.Context, attestationID, kind, namePrefix, archivePath string, format materials.ArchiveFormat, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, limits materials.ArchiveLimits, opts ...AddOpt) ([]*api.Attestation_Material, error)` - - Validates `kind` against `schemaapi.CraftingSchema_Material_MaterialType_value` (same as `AddMaterialContractFree`). - - Seeds a `NameAllocator` with existing `c.CraftingState.Attestation.Materials` keys. - - For each entry: copy to a temp file named with the entry basename, build a `schemaapi.CraftingSchema_Material{Optional: true, Type: kind, Name: }`, call `c.stageMaterial`. Accumulate results. - - Errors abort before any `Write` (state on disk unchanged); the error names the archive and inner entry. - - Errors if **zero** qualifying entries were added. - - After all succeed: a single `c.stateManager.Write`. - -- [ ] **Step 1: Write the failing test** - -Append to `crafter_test.go` a test in the existing suite style. Use a real `ARTIFACT` kind so no external services are needed (inline CAS backend, dry run). Build the zip with the test helper from Task 1 (or inline). Sketch: - -```go -func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { - t := s.T() - runner := crafter.NewGithubAction(s.dryRun, s.Logger) - c, err := newInitializedCrafter(t, "testdata/contracts/empty.cue", &v1.WorkflowMetadata{}, true, "", runner) - require.NoError(t, err) - - dir := t.TempDir() - zipPath := filepath.Join(dir, "bundle.zip") - zf, err := os.Create(zipPath) - require.NoError(t, err) - zw := zip.NewWriter(zf) - for _, n := range []string{"a.txt", "b.txt"} { - w, _ := zw.Create(n) - _, _ = w.Write([]byte("hello " + n)) - } - require.NoError(t, zw.Close()) - require.NoError(t, zf.Close()) - - backend := &casclient.CASBackend{Name: "not-set"} - got, err := c.AddMaterialsFromArchive(context.Background(), "", "ARTIFACT", "", zipPath, materials.ArchiveZip, backend, nil, materials.DefaultArchiveLimits()) - require.NoError(t, err) - require.Len(t, got, 2) - - st := c.CraftingState.GetAttestation().GetMaterials() - assert.Contains(t, st, "a-txt") - assert.Contains(t, st, "b-txt") - - // Atomic failure: too-tight limit adds nothing. - c2, err := newInitializedCrafter(t, "testdata/contracts/empty.cue", &v1.WorkflowMetadata{}, true, "", runner) - require.NoError(t, err) - _, err = c2.AddMaterialsFromArchive(context.Background(), "", "ARTIFACT", "", zipPath, materials.ArchiveZip, backend, nil, materials.ArchiveLimits{MaxEntries: 1, MaxTotalSize: 1 << 30}) - require.ErrorIs(t, err, materials.ErrTooManyEntries) - assert.Empty(t, c2.CraftingState.GetAttestation().GetMaterials()) -} -``` - -Confirm the argument order matches the signature in **Interfaces** (note `attestationID` is the first non-ctx arg). Adjust the contract fixture path to one that already exists in `crafter_test.go` (look for the constant the other suite tests pass to `newInitializedCrafter`). - -- [ ] **Step 2: Run test to verify it fails** - -Run: `SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/ -run 'TestSuite/TestAddMaterialsFromArchiveAtomic' -v` -Expected: FAIL — `c.AddMaterialsFromArchive undefined`. - -- [ ] **Step 3: Write minimal implementation** - -Add to `crafter.go` (imports: `io`, `os`, `path/filepath`, and the `materials` package is already imported): - -```go -// AddMaterialsFromArchive explodes an archive and records each contained -// regular file as its own material of the given kind, atomically: either all -// qualifying entries are added and the state is persisted once, or nothing is -// written. Entry names are derived from their basenames (DNS-1123 sanitized, -// collision-suffixed); when namePrefix is set, names are "-". -func (c *Crafter) AddMaterialsFromArchive(ctx context.Context, attestationID, kind, namePrefix, archivePath string, format materials.ArchiveFormat, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, limits materials.ArchiveLimits, opts ...AddOpt) ([]*api.Attestation_Material, error) { - if err := c.requireStateLoaded(); err != nil { - return nil, fmt.Errorf("adding materials from archive: %w", err) - } - - if _, found := schemaapi.CraftingSchema_Material_MaterialType_value[kind]; !found { - return nil, fmt.Errorf("%q kind not found. Available options are %q", kind, schemaapi.ListAvailableMaterialKind()) - } - kindType := schemaapi.CraftingSchema_Material_MaterialType(schemaapi.CraftingSchema_Material_MaterialType_value[kind]) - - existing := make([]string, 0, len(c.CraftingState.GetAttestation().GetMaterials())) - for name := range c.CraftingState.GetAttestation().GetMaterials() { - existing = append(existing, name) - } - allocator := materials.NewNameAllocator(existing) - - tmpDir, err := os.MkdirTemp("", "chainloop-explode-*") - if err != nil { - return nil, fmt.Errorf("creating temp dir: %w", err) - } - defer os.RemoveAll(tmpDir) - - var added []*api.Attestation_Material - err = materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error { - entryPath := filepath.Join(tmpDir, filepath.Base(name)) - out, err := os.Create(entryPath) - if err != nil { - return fmt.Errorf("staging entry to disk: %w", err) - } - if _, err := io.Copy(out, r); err != nil { - out.Close() - return err // may be materials.ErrArchiveTooLarge - } - out.Close() - - m := &schemaapi.CraftingSchema_Material{ - Optional: true, - Type: kindType, - Name: allocator.Allocate(namePrefix, filepath.Base(name)), - } - mt, err := c.stageMaterial(ctx, m, entryPath, casBackend, runtimeAnnotations, opts...) - if err != nil { - return err - } - added = append(added, mt) - - // Remove the per-entry temp file as we go to bound disk usage. - _ = os.Remove(entryPath) - return nil - }) - if err != nil { - return nil, fmt.Errorf("exploding archive %q: %w", archivePath, err) - } - - if len(added) == 0 { - return nil, fmt.Errorf("archive %q contained no addable files", archivePath) - } - - if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { - return nil, fmt.Errorf("failed to persist crafting state: %w", err) - } - - c.Logger.Info().Int("materials", len(added)).Str("archive", filepath.Base(archivePath)).Msg("archive exploded into materials") - return added, nil -} -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/ -run 'TestSuite/TestAddMaterialsFromArchiveAtomic' -v` -Expected: PASS. - -- [ ] **Step 5: Commit** - -```bash -gofmt -w pkg/attestation/crafter/crafter.go pkg/attestation/crafter/crafter_test.go -git add pkg/attestation/crafter/crafter.go pkg/attestation/crafter/crafter_test.go -git commit -S -s -m "feat(crafter): add atomic AddMaterialsFromArchive - -Assisted-by: Claude Code" -``` - ---- - -## Task 6: Route the explode path in `AttestationAdd.Run` - -**Files:** -- Modify: `app/cli/pkg/action/attestation_add.go:33-179` - -**Interfaces:** -- Consumes: `crafter.AddMaterialsFromArchive`, `materials.DetectArchive`, `materials.IsArchiveNativeKind`, `materials.ArchiveLimits` (Tasks 1-5). -- Produces: - - `AttestationAddOpts` and `AttestationAdd` gain `MaxExtractEntries int` and `MaxExtractSize int64`. - - `Run` signature changes to return `([]*AttestationStatusMaterial, error)`. Normal path returns a 1-element slice; explode path returns N. - -- [ ] **Step 1: Write the failing test** - -Action-layer tests need a loaded crafter; the crafter behavior is already covered in Task 5. Here, assert the **routing decision** with a small table test against a helper extracted from `Run`. Add to a new `app/cli/pkg/action/attestation_add_routing_test.go`: - -```go -package action - -import ( - "testing" - - "github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestShouldExplode(t *testing.T) { - dir := t.TempDir() - zipPath := writeTestZip(t, dir, "s.zip") // helper writing a 1-file zip - - tests := []struct { - name, kind, value string - wantExplode bool - }{ - {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, true}, - {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false}, - {"no kind", "", zipPath, false}, - {"kind + non-archive", "ARTIFACT", "/etc/hostname", false}, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - format, explode, err := shouldExplode(tc.kind, tc.value) - require.NoError(t, err) - assert.Equal(t, tc.wantExplode, explode) - if explode { - assert.NotEqual(t, materials.ArchiveNone, format) - } - }) - } -} -``` - -Add a `writeTestZip` helper in the test file (mirror Task 1's `writeZip`). - -- [ ] **Step 2: Run test to verify it fails** - -Run: `go test ./app/cli/pkg/action/ -run TestShouldExplode -v` -Expected: FAIL — `undefined: shouldExplode`. - -- [ ] **Step 3: Write the implementation** - -Add the helper to `attestation_add.go`: - -```go -// shouldExplode decides whether an att-add should explode the value into many -// materials: only when --kind is set, the value is a supported archive, and the -// kind is not archive-native (e.g. ZAP_DAST_ZIP, which is recorded whole). -func shouldExplode(materialType, value string) (materials.ArchiveFormat, bool, error) { - if materialType == "" || materials.IsArchiveNativeKind(materialType) { - return materials.ArchiveNone, false, nil - } - format, err := materials.DetectArchive(value) - if err != nil { - return materials.ArchiveNone, false, err - } - return format, format != materials.ArchiveNone, nil -} -``` - -Add the limit fields to `AttestationAddOpts` and `AttestationAdd`, wire them in `NewAttestationAdd` (defaulting to `materials.DefaultArchiveLimits()` values when zero), then change `Run`: - -```go -func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) ([]*AttestationStatusMaterial, error) { - // ... unchanged crafter setup, runtime inputs, init/load checks, CAS backend ... - - addOpts := runtimeInputAddOpts(runtimeInputs) - - // Explode path: --kind set, value is a (non-archive-native) archive. - format, explode, err := shouldExplode(materialType, materialValue) - if err != nil { - return nil, fmt.Errorf("detecting archive: %w", err) - } - if explode { - limits := materials.ArchiveLimits{MaxEntries: action.maxExtractEntries, MaxTotalSize: action.maxExtractSize} - mts, err := crafter.AddMaterialsFromArchive(ctx, attestationID, materialType, materialName, materialValue, format, casBackend, annotations, limits, addOpts...) - if err != nil { - return nil, fmt.Errorf("adding materials from archive: %w", err) - } - results := make([]*AttestationStatusMaterial, 0, len(mts)) - for _, mt := range mts { - r, err := attMaterialToAction(mt) - if err != nil { - return nil, fmt.Errorf("converting material to action: %w", err) - } - results = append(results, r) - } - return results, nil - } - - // ... existing single-material switch, but wrap the single result: - // return []*AttestationStatusMaterial{materialResult}, nil -} -``` - -Note: policy-input-from-file evidence (`addPolicyInputEvidence`) is only reachable on the single-material path; the explode path does not accept `--policy-input-from-file` mapping to N materials (out of scope — leave the existing single-path logic untouched). - -- [ ] **Step 4: Run tests to verify they pass** - -Run: `go test ./app/cli/pkg/action/ -run TestShouldExplode -v && go build ./app/cli/...` -Expected: PASS and the action package builds (the `Run` callers are updated in Task 7; until then `go build ./app/cli/...` may fail at the cmd caller — if so, do Task 7 before building, and run only the `-run TestShouldExplode` test here). - -- [ ] **Step 5: Commit** - -```bash -gofmt -w app/cli/pkg/action/attestation_add.go app/cli/pkg/action/attestation_add_routing_test.go -git add app/cli/pkg/action/attestation_add.go app/cli/pkg/action/attestation_add_routing_test.go -git commit -S -s -m "feat(cli): route att add to archive explode path - -Assisted-by: Claude Code" -``` - ---- - -## Task 7: CLI flags + multi-material rendering - -**Files:** -- Modify: `app/cli/cmd/attestation_add.go` - -**Interfaces:** -- Consumes: `AttestationAdd.Run` now returns `[]*AttestationStatusMaterial` (Task 6). -- Produces: two flags — `--max-extract-entries` (int, default `10000`) and `--max-extract-size` (string, e.g. `1GiB`, parsed with `bytefmt`/existing helper, default `1 << 30`) — passed into `AttestationAddOpts`. The `RunE` loops the returned slice and renders each material with its policy evaluations. - -- [ ] **Step 1: Add the flags and wire them** - -In `newAttestationAddCmd`, declare `var maxExtractEntries int` and `var maxExtractSize string`, register: - -```go -cmd.Flags().IntVar(&maxExtractEntries, "max-extract-entries", 10000, "max number of files to extract when --value is an archive") -cmd.Flags().StringVar(&maxExtractSize, "max-extract-size", "1GiB", "max total uncompressed size to extract when --value is an archive") -``` - -Parse `maxExtractSize` (reuse `bytefmt.ToBytes`, already a dependency — see `materials.go` import `code.cloudfoundry.org/bytefmt`) and set `MaxExtractEntries` / `MaxExtractSize` on `AttestationAddOpts`. - -- [ ] **Step 2: Update the result rendering** - -Change the `a.Run(...)` call site to receive a slice and render each entry: - -```go -resp, err := a.Run(cmd.Context(), attestationID, name, rawValuePath, kind, annotations, policyInputFiles) -if err != nil { - return err -} - -logger.Info().Int("materials", len(resp)).Msg("material(s) added to attestation") - -policies, err := a.GetPolicyEvaluations(cmd.Context(), attestationID) -if err != nil { - return err -} - -for _, m := range resp { - if err := output.EncodeOutput(flagOutputFormat, m, func(s *action.AttestationStatusMaterial) error { - return displayMaterialInfo(s, policies[m.Name]) - }); err != nil { - return err - } -} -return nil -``` - -- [ ] **Step 3: Build and smoke-test compilation** - -Run: `go build ./app/cli/... && go vet ./app/cli/...` -Expected: builds clean. - -- [ ] **Step 4: Commit** - -```bash -gofmt -w app/cli/cmd/attestation_add.go -git add app/cli/cmd/attestation_add.go -git commit -S -s -m "feat(cli): add archive extraction guard flags and multi-material output - -Assisted-by: Claude Code" -``` - ---- - -## Task 8: End-to-end behavior tests + docs regen + gates - -**Files:** -- Modify: `pkg/attestation/crafter/crafter_test.go` (collision, name-prefix, skip dirs/symlinks, zip-slip, mixed kinds-as-same-kind cases) -- Regen: CLI docs via `make generate` - -- [ ] **Step 1: Add table-driven behavior tests** - -Extend the Task-5 suite test (or add a sibling) covering, table-driven, the **spec's** behavior matrix on the explode path (all with `ARTIFACT`/`STRING` kind so no network needed): - -- Name collision: a zip with `scan.json` and `nested/scan.json` → materials `scan-json` and `scan-json-1`. -- `--name` prefix: namePrefix `sboms` + `a.json` → `sboms-a-json`. -- Directory + symlink entries → skipped (only regular files become materials). -- Zip-slip entry (`../evil`) → `WalkArchiveEntries` returns an unsafe-path error and nothing is written. -- Guard: `MaxTotalSize` smaller than the content → `ErrArchiveTooLarge`, nothing written. -- `.tar.gz` input via `ArchiveTarGz` → N materials. - -- [ ] **Step 2: Run the full package tests** - -Run: `SKIP_INTEGRATION=true go test ./pkg/attestation/crafter/... ./app/cli/...` -Expected: PASS. - -- [ ] **Step 3: Regenerate CLI docs** - -Run: `make generate` -Expected: updates the `add` command docs to include `--max-extract-entries` / `--max-extract-size`. Review the diff. - -- [ ] **Step 4: Lint** - -Run: `make -C app/cli lint && golangci-lint run ./pkg/attestation/crafter/...` -Expected: no findings. Fix any. - -- [ ] **Step 5: Commit** - -```bash -gofmt -w pkg/attestation/crafter/crafter_test.go -git add pkg/attestation/crafter/crafter_test.go app/cli/docs/ -git commit -S -s -m "test(crafter): cover archive explode behavior; regen CLI docs - -Assisted-by: Claude Code" -``` - ---- - -## Self-Review - -**Spec coverage:** -- Trigger/detection (extension + magic) → Task 1. Archive-native short-circuit → Task 3 (`IsArchiveNativeKind`) + Task 6 routing. -- Behavior matrix → Task 6 routing + Task 8 tests. (Row "no `--kind`" intentionally falls to the unchanged single path per the out-of-scope constraint.) -- Naming (basename, collision suffix, sanitization) → Task 3 (`SanitizeMaterialName` + `NameAllocator`), exercised in Task 8. **Correction vs. doc:** names are DNS-1123-sanitized (`scan.json` → `scan-json`), since raw basenames are invalid material names. -- `--name` as prefix → Task 3 (`Allocate(prefix, base)`) + Task 6 (passes `materialName`). -- Annotations to every entry → Task 5 (`runtimeAnnotations` passed per `stageMaterial`). -- Kind per entry (same `--kind`) → Task 5. (Per-entry auto-detection is out of scope.) -- Skipped entries (dirs/symlinks/empty) + zip-slip guard → Task 2, tested Task 8. -- Atomicity → Tasks 4-5 (stage-then-single-write); failure leaves disk state unchanged, tested Task 5/8. -- Resource guards (max entries/size, streaming) → Task 2 (`capReader`, `ArchiveLimits`), flags in Task 7. -- Recursion not supported → nested archives are regular files (no special handling); covered by "treated as one material" since a nested `.zip` entry is crafted as the given kind. -- Implementation sketch (archive util, action branch, cmd flags, allowlist) → Tasks 1-3, 6, 7, 3 respectively. -- Test plan → Tasks 1-3, 5, 8. - -**Open questions resolved:** `--name` → prefix (Task 3/6). Guard flag names/defaults → `--max-extract-entries=10000`, `--max-extract-size=1GiB` (Task 7). Formats → zip/tar/tar.gz/tgz only (Global Constraints). - -**Placeholder scan:** none — every code step shows the code; the one prose-only step (Task 4 moved body) describes an exact mechanical move of existing lines. - -**Type consistency:** `AddMaterialsFromArchive` signature is identical in Task 5 (definition) and Task 6 (call). `shouldExplode` returns `(materials.ArchiveFormat, bool, error)` in both Task 6 steps. `Run` returns `[]*AttestationStatusMaterial` in Tasks 6 and 7. `NameAllocator.Allocate(prefix, base)` order matches across Tasks 3, 5, 8. diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index fa8f23caa..bd633150c 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -676,7 +676,9 @@ func (s *crafterSuite) TestAddMaterialsAutomatic() { } func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { - const zipFixture = "testdata/two-files.zip" + // Build the fixture in-process so no binary blob is checked in. + zipFixture := filepath.Join(s.T().TempDir(), "two-files.zip") + buildZip(s.T(), zipFixture, map[string]string{"alpha.txt": "alpha", "beta.txt": "beta"}) s.Run("happy path: two files produce two materials", func() { runner := runners.NewGeneric() diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index 9af400b4c..1eccef7fb 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -133,7 +133,7 @@ func (c *capReader) Read(p []byte) (int, error) { func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error { var total int64 count := 0 - visit := func(name string, size int64, r io.Reader) error { + visit := func(name string, r io.Reader) error { if !safeArchivePath(name) { return fmt.Errorf("%w: %q", ErrUnsafeEntry, name) } @@ -179,7 +179,7 @@ func safeArchivePath(name string) bool { return strings.HasPrefix(clean, root+"/") || clean == root } -func walkZip(p string, visit func(name string, size int64, r io.Reader) error) error { +func walkZip(p string, visit func(name string, r io.Reader) error) error { zr, err := zip.OpenReader(p) if err != nil { return fmt.Errorf("opening zip: %w", err) @@ -187,6 +187,14 @@ func walkZip(p string, visit func(name string, size int64, r io.Reader) error) e defer zr.Close() for _, f := range zr.File { + // Skip directories, symlinks, and empty entries: they carry no file + // content worth recording as a material. Empty-entry skipping is + // intentional per the explode design (an empty evidence file produces + // no material). Note: symlink detection relies on Unix mode bits stored + // in the zip; archives written without Unix metadata won't carry the + // symlink bit, so such a symlink would be treated as a regular file + // (its content being the stored target path). Tar symlinks are detected + // reliably via the typeflag below. if f.FileInfo().IsDir() || f.Mode()&os.ModeSymlink != 0 || f.UncompressedSize64 == 0 { continue } @@ -194,7 +202,7 @@ func walkZip(p string, visit func(name string, size int64, r io.Reader) error) e if err != nil { return fmt.Errorf("opening entry %q: %w", f.Name, err) } - err = visit(f.Name, int64(f.UncompressedSize64), rc) + err = visit(f.Name, rc) rc.Close() if err != nil { return err @@ -203,7 +211,7 @@ func walkZip(p string, visit func(name string, size int64, r io.Reader) error) e return nil } -func walkTar(p string, gzipped bool, visit func(name string, size int64, r io.Reader) error) error { +func walkTar(p string, gzipped bool, visit func(name string, r io.Reader) error) error { f, err := os.Open(p) if err != nil { return fmt.Errorf("opening tar: %w", err) @@ -229,10 +237,13 @@ func walkTar(p string, gzipped bool, visit func(name string, size int64, r io.Re if err != nil { return fmt.Errorf("reading tar: %w", err) } + // Only regular files become materials; directories, symlinks, hardlinks + // and other special entries are skipped via the typeflag. Empty entries + // are skipped intentionally (an empty evidence file produces no material). if hdr.Typeflag != tar.TypeReg || hdr.Size == 0 { continue } - if err := visit(hdr.Name, hdr.Size, tr); err != nil { + if err := visit(hdr.Name, tr); err != nil { return err } } diff --git a/pkg/attestation/crafter/testdata/two-files.zip b/pkg/attestation/crafter/testdata/two-files.zip deleted file mode 100644 index 4b118a655be83a7369acb7168168e289b9d67fc1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 245 zcmWIWW@Zs#U|`^2SU2HrjNR;|Z-jw7K_KP?;>4VSj6}VXijvbFXHT9C_s~9dKGfrk z<_V7{Obh`iIzN45_%8(1Appc2K%A6Xf}zn%*AuLdkx7IZx1B)EV9>w_qTntF@J7{! gZXrYy149F&6_5$DKERum4J5+^gt Date: Tue, 30 Jun 2026 09:52:39 +0200 Subject: [PATCH 15/23] fix(cli): address archive-explode review comments Validate --max-extract-size before the uint64->int64 cast so oversized values are rejected instead of wrapping negative. Check the per-entry temp file Close error before staging so a failed flush never produces an incomplete material, and write each entry into its own temp subdirectory under its original basename so the recorded artifact filename keeps the real name rather than the sanitized material key. Reject Windows drive-letter absolute paths in safeArchivePath, and only swallow not-found errors in detectByMagic so permission/I/O errors surface. Treat non-positive extraction limits as use-default. Assert the exact detected archive format and the preserved filename in tests. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- app/cli/cmd/attestation_add.go | 6 ++++ app/cli/pkg/action/attestation_add.go | 4 +-- .../action/attestation_add_routing_test.go | 15 +++++----- pkg/attestation/crafter/crafter.go | 24 ++++++++++----- pkg/attestation/crafter/crafter_test.go | 9 ++++-- pkg/attestation/crafter/materials/archive.go | 29 ++++++++++++++----- .../crafter/materials/archive_test.go | 2 ++ 7 files changed, 63 insertions(+), 26 deletions(-) diff --git a/app/cli/cmd/attestation_add.go b/app/cli/cmd/attestation_add.go index 911ec6651..363def5cf 100644 --- a/app/cli/cmd/attestation_add.go +++ b/app/cli/cmd/attestation_add.go @@ -18,6 +18,7 @@ package cmd import ( "errors" "fmt" + "math" "os" "code.cloudfoundry.org/bytefmt" @@ -81,6 +82,11 @@ func newAttestationAddCmd() *cobra.Command { if err != nil { return fmt.Errorf("invalid --max-extract-size %q: %w", maxExtractSize, err) } + // Guard against the uint64->int64 cast wrapping negative, which would + // later surface as a misleading "archive too large" error. + if maxExtractSizeBytes > math.MaxInt64 { + return fmt.Errorf("--max-extract-size %q is too large", maxExtractSize) + } a, err := action.NewAttestationAdd( &action.AttestationAddOpts{ diff --git a/app/cli/pkg/action/attestation_add.go b/app/cli/pkg/action/attestation_add.go index 939261aa0..198ce8641 100644 --- a/app/cli/pkg/action/attestation_add.go +++ b/app/cli/pkg/action/attestation_add.go @@ -78,11 +78,11 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) { defaults := materials.DefaultArchiveLimits() maxEntries := cfg.MaxExtractEntries - if maxEntries == 0 { + if maxEntries <= 0 { maxEntries = defaults.MaxEntries } maxSize := cfg.MaxExtractSize - if maxSize == 0 { + if maxSize <= 0 { maxSize = defaults.MaxTotalSize } diff --git a/app/cli/pkg/action/attestation_add_routing_test.go b/app/cli/pkg/action/attestation_add_routing_test.go index 8104f9a7f..f40f01dbe 100644 --- a/app/cli/pkg/action/attestation_add_routing_test.go +++ b/app/cli/pkg/action/attestation_add_routing_test.go @@ -57,15 +57,16 @@ func TestShouldExplode(t *testing.T) { kind string value string wantExplode bool + wantFormat materials.ArchiveFormat }{ - {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, true}, - {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false}, - {"no kind", "", zipPath, false}, - {"kind + non-archive", "ARTIFACT", plainPath, false}, + {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, true, materials.ArchiveZip}, + {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false, materials.ArchiveNone}, + {"no kind", "", zipPath, false, materials.ArchiveNone}, + {"kind + non-archive", "ARTIFACT", plainPath, false, materials.ArchiveNone}, // Non-file values must never return an error — STRING and CONTAINER_IMAGE // carry values that are not file paths at all. - {"kind STRING non-file value", "STRING", "hello world", false}, - {"kind CONTAINER_IMAGE non-file value", "CONTAINER_IMAGE", "registry.example.com/app:v1", false}, + {"kind STRING non-file value", "STRING", "hello world", false, materials.ArchiveNone}, + {"kind CONTAINER_IMAGE non-file value", "CONTAINER_IMAGE", "registry.example.com/app:v1", false, materials.ArchiveNone}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { @@ -73,7 +74,7 @@ func TestShouldExplode(t *testing.T) { require.NoError(t, err) assert.Equal(t, tc.wantExplode, explode) if explode { - assert.NotEqual(t, materials.ArchiveNone, format) + assert.Equal(t, tc.wantFormat, format) } }) } diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go index 09e643afb..8c72a4a93 100644 --- a/pkg/attestation/crafter/crafter.go +++ b/pkg/attestation/crafter/crafter.go @@ -868,10 +868,15 @@ func (c *Crafter) AddMaterialsFromArchive( base := filepath.Base(name) matName := allocator.Allocate(namePrefix, base) - // Use the allocated unique material name for the temp file so that two - // archive entries with the same basename (e.g. "a/x.json" and "b/x.json") - // never collide in the shared tmpDir. - tmpPath := filepath.Join(tmpDir, matName) + // Give each entry its own temp subdirectory (named by the unique material + // name) so two entries sharing a basename (e.g. "a/x.json" and "b/x.json") + // never collide, while the temp file itself keeps the original basename so + // the recorded material metadata preserves the real filename. + entryDir, err := os.MkdirTemp(tmpDir, matName+"-*") + if err != nil { + return fmt.Errorf("creating temp dir for entry %q: %w", name, err) + } + tmpPath := filepath.Join(entryDir, base) tmp, err := os.Create(tmpPath) if err != nil { return fmt.Errorf("creating temp file for entry %q: %w", name, err) @@ -881,7 +886,10 @@ func (c *Crafter) AddMaterialsFromArchive( tmp.Close() return fmt.Errorf("writing entry %q to temp file: %w", name, err) } - tmp.Close() + // Check the Close error so a failed flush does not stage an incomplete file. + if err := tmp.Close(); err != nil { + return fmt.Errorf("closing temp file for entry %q: %w", name, err) + } m := &schemaapi.CraftingSchema_Material{ Optional: true, @@ -890,9 +898,9 @@ func (c *Crafter) AddMaterialsFromArchive( } mt, err := c.stageMaterial(ctx, m, tmpPath, casBackend, runtimeAnnotations, opts...) - // Remove the temp file immediately after staging to keep disk usage bounded; - // the deferred os.RemoveAll(tmpDir) is the safety net. - os.Remove(tmpPath) //nolint:errcheck // best-effort cleanup + // Remove the entry's temp subdir immediately after staging to keep disk + // usage bounded; the deferred os.RemoveAll(tmpDir) is the safety net. + os.RemoveAll(entryDir) //nolint:errcheck // best-effort cleanup if err != nil { return fmt.Errorf("staging entry %q as material %q: %w", name, matName, err) } diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index bd633150c..51fcc3aec 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -707,10 +707,15 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { assert.Len(s.T(), stateMap, 2) // Both derived names must be present (sanitized base names with prefix). - _, hasAlpha := stateMap["entry-alpha-txt"] - _, hasBeta := stateMap["entry-beta-txt"] + alpha, hasAlpha := stateMap["entry-alpha-txt"] + beta, hasBeta := stateMap["entry-beta-txt"] assert.True(s.T(), hasAlpha, "expected material entry-alpha-txt in state") assert.True(s.T(), hasBeta, "expected material entry-beta-txt in state") + + // The recorded artifact filename must preserve the original entry + // basename, not the sanitized material key. + assert.Equal(s.T(), "alpha.txt", alpha.GetArtifact().GetName()) + assert.Equal(s.T(), "beta.txt", beta.GetArtifact().GetName()) }) s.Run("atomicity: over-tight limit leaves state empty", func() { diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index 1eccef7fb..32c4662c6 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -23,6 +23,7 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "path" "strings" @@ -61,11 +62,14 @@ func DetectArchive(path string) (ArchiveFormat, error) { func detectByMagic(path string) (ArchiveFormat, error) { f, err := os.Open(path) if err != nil { - // If the file doesn't exist, the value is not a file path at all (e.g. - // "hello world" for STRING or "registry/app:v1" for CONTAINER_IMAGE). - // Treat it as a non-archive rather than propagating the error so callers - // that pass non-file values are not surprised. - return ArchiveNone, nil + // A not-found error means the value is not a file path at all (e.g. + // "hello world" for STRING or "registry/app:v1" for CONTAINER_IMAGE); + // treat that as a non-archive so callers passing non-file values are not + // surprised. Any other error (permissions, I/O) is real and must surface. + if errors.Is(err, fs.ErrNotExist) { + return ArchiveNone, nil + } + return ArchiveNone, fmt.Errorf("opening %q: %w", path, err) } defer f.Close() @@ -165,8 +169,9 @@ func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, // components equal to ".." are rejected. func safeArchivePath(name string) bool { normalized := strings.ReplaceAll(name, "\\", "/") - // Reject absolute paths. - if strings.HasPrefix(normalized, "/") { + // Reject absolute paths, including Windows drive-letter (e.g. "C:/x") and + // UNC paths (which normalize to a leading "/"). + if strings.HasPrefix(normalized, "/") || hasWindowsDriveLetter(normalized) { return false } // Canonicalise against a virtual root and check that the result stays @@ -179,6 +184,16 @@ func safeArchivePath(name string) bool { return strings.HasPrefix(clean, root+"/") || clean == root } +// hasWindowsDriveLetter reports whether name begins with a Windows drive-letter +// prefix such as "C:" or "c:/", which denotes an absolute path on Windows. +func hasWindowsDriveLetter(name string) bool { + if len(name) < 2 || name[1] != ':' { + return false + } + c := name[0] + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +} + func walkZip(p string, visit func(name string, r io.Reader) error) error { zr, err := zip.OpenReader(p) if err != nil { diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index 234b1bae7..0691effce 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -196,6 +196,8 @@ func TestSafeArchivePath(t *testing.T) { want bool }{ {"absolute path", "/etc/passwd", false}, + {"windows drive-letter backslash", "C:\\Windows\\system32", false}, + {"windows drive-letter forward slash", "c:/windows/system32", false}, {"path traversal", "../escape.txt", false}, {"nested path traversal", "foo/../../../etc/passwd", false}, {"double dot in filename is ok", "foo..bar.json", true}, From 43eddced8a58013ed1cd4a5936a3cae1ad070e6b Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Tue, 30 Jun 2026 11:45:14 +0200 Subject: [PATCH 16/23] fix(crafter): treat ENOTDIR as a non-archive in detectByMagic A non-file material value whose first path segment is an existing regular file (e.g. a CONTAINER_IMAGE ref like "registry/app:v1") yields ENOTDIR on open rather than not-found. Swallow it alongside fs.ErrNotExist so such values detect as non-archive instead of failing detection; genuine permission/I/O errors still surface. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/materials/archive.go | 13 ++++++++----- pkg/attestation/crafter/materials/archive_test.go | 5 +++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index 32c4662c6..faf108c41 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -27,6 +27,7 @@ import ( "os" "path" "strings" + "syscall" schemaapi "github.com/chainloop-dev/chainloop/app/controlplane/api/workflowcontract/v1" ) @@ -62,11 +63,13 @@ func DetectArchive(path string) (ArchiveFormat, error) { func detectByMagic(path string) (ArchiveFormat, error) { f, err := os.Open(path) if err != nil { - // A not-found error means the value is not a file path at all (e.g. - // "hello world" for STRING or "registry/app:v1" for CONTAINER_IMAGE); - // treat that as a non-archive so callers passing non-file values are not - // surprised. Any other error (permissions, I/O) is real and must surface. - if errors.Is(err, fs.ErrNotExist) { + // These errors mean the value is not a file path at all (e.g. "hello + // world" for STRING, or "registry/app:v1" for CONTAINER_IMAGE where + // "registry" happens to be a regular file in the working directory, which + // yields ENOTDIR); treat them as a non-archive so callers passing non-file + // values are not surprised. Any other error (permissions, I/O) is real and + // must surface. + if errors.Is(err, fs.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) { return ArchiveNone, nil } return ArchiveNone, fmt.Errorf("opening %q: %w", path, err) diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index 0691effce..d7d9bf86d 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -107,6 +107,11 @@ func TestDetectArchive(t *testing.T) { {"tgz by extension", tgzShortPath, ArchiveTarGz}, {"plain file", plain, ArchiveNone}, {"zip without extension via magic", noExt, ArchiveZip}, + // Non-file values must detect as non-archive without erroring. + {"non-existent value", filepath.Join(dir, "nope"), ArchiveNone}, + // A value whose first path segment is an existing regular file yields + // ENOTDIR on open (e.g. CONTAINER_IMAGE "registry/app:v1"); still a non-archive. + {"path segment is a file (ENOTDIR)", filepath.Join(plain, "app:v1"), ArchiveNone}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { From 0693694ca1725fb98b5064e8ecf5ad62e798dd4f Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Tue, 30 Jun 2026 12:38:37 +0200 Subject: [PATCH 17/23] fix(crafter): derive archive entry basename with OS-independent semantics filepath.Base treats backslash as a separator only on Windows, so an entry name embedding a literal backslash produced different material names across platforms. Add materials.ArchiveEntryBaseName, which normalizes separators and uses path.Base, and derive the per-entry basename through it so exploded material names are identical on every OS. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/crafter.go | 4 +++- pkg/attestation/crafter/materials/archive.go | 9 +++++++++ pkg/attestation/crafter/materials/archive_test.go | 15 +++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go index 8c72a4a93..27f6c3c48 100644 --- a/pkg/attestation/crafter/crafter.go +++ b/pkg/attestation/crafter/crafter.go @@ -865,7 +865,9 @@ func (c *Crafter) AddMaterialsFromArchive( } walkErr := materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error { - base := filepath.Base(name) + // Derive the basename with archive ("/") semantics so it is identical on + // every OS, regardless of separators embedded in the entry name. + base := materials.ArchiveEntryBaseName(name) matName := allocator.Allocate(namePrefix, base) // Give each entry its own temp subdirectory (named by the unique material diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index faf108c41..079b80127 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -282,6 +282,15 @@ func IsArchiveNativeKind(kind string) bool { return ok } +// ArchiveEntryBaseName returns the final element of an archive entry name using +// archive ("/") path semantics, independent of the host OS. Archive entry names +// are "/"-separated by spec; backslashes are normalized first so names produced +// on Windows resolve to the same basename everywhere (filepath.Base would treat +// "\\" as a separator only on Windows, yielding OS-dependent results). +func ArchiveEntryBaseName(name string) string { + return path.Base(strings.ReplaceAll(name, "\\", "/")) +} + // SanitizeMaterialName converts s into a valid DNS-1123 material name: // lowercase, with every run of characters outside [a-z0-9] collapsed to a // single "-" and leading/trailing "-" trimmed. Falls back to "material". diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index d7d9bf86d..a21ce3864 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -219,6 +219,21 @@ func TestSafeArchivePath(t *testing.T) { } } +func TestArchiveEntryBaseName(t *testing.T) { + tests := []struct{ name, in, want string }{ + {"simple", "scan.json", "scan.json"}, + {"forward-slash path", "nested/dir/scan.json", "scan.json"}, + {"backslash path resolves the same on any OS", "nested\\dir\\scan.json", "scan.json"}, + {"mixed separators", "a/b\\c.json", "c.json"}, + {"no directory", "report.sarif", "report.sarif"}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, ArchiveEntryBaseName(tc.in)) + }) + } +} + func TestSanitizeMaterialName(t *testing.T) { tests := []struct{ in, want string }{ {"scan.json", "scan-json"}, From 5948bea7504b1ad7f6d13991fd1a83a58f131a05 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Tue, 30 Jun 2026 16:37:40 +0200 Subject: [PATCH 18/23] feat(crafter): name exploded archive materials sequentially MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-entry material names are now sequential rather than derived from the entry basename: material-1, material-2, … with no --name, or -1, -2, … when --name is provided (used as the prefix). The original entry filename is still preserved in the recorded artifact metadata. Replaces NameAllocator.Allocate with AllocateSequential. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/crafter.go | 8 +-- pkg/attestation/crafter/crafter_test.go | 53 +++++++++++-------- pkg/attestation/crafter/materials/archive.go | 24 +++++---- .../crafter/materials/archive_test.go | 30 ++++++++--- 4 files changed, 72 insertions(+), 43 deletions(-) diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go index 27f6c3c48..38d86e7f9 100644 --- a/pkg/attestation/crafter/crafter.go +++ b/pkg/attestation/crafter/crafter.go @@ -865,10 +865,12 @@ func (c *Crafter) AddMaterialsFromArchive( } walkErr := materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error { - // Derive the basename with archive ("/") semantics so it is identical on - // every OS, regardless of separators embedded in the entry name. + // Material names are sequential ("-1", "-2", … or + // "material-N" with no prefix). The original basename is still derived + // (with archive "/" semantics, OS-independently) and used for the temp + // file so the recorded artifact filename preserves the real name. base := materials.ArchiveEntryBaseName(name) - matName := allocator.Allocate(namePrefix, base) + matName := allocator.AllocateSequential(namePrefix) // Give each entry its own temp subdirectory (named by the unique material // name) so two entries sharing a basename (e.g. "a/x.json" and "b/x.json") diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index 51fcc3aec..313d57a23 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -706,16 +706,17 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 2) - // Both derived names must be present (sanitized base names with prefix). - alpha, hasAlpha := stateMap["entry-alpha-txt"] - beta, hasBeta := stateMap["entry-beta-txt"] - assert.True(s.T(), hasAlpha, "expected material entry-alpha-txt in state") - assert.True(s.T(), hasBeta, "expected material entry-beta-txt in state") - - // The recorded artifact filename must preserve the original entry - // basename, not the sanitized material key. - assert.Equal(s.T(), "alpha.txt", alpha.GetArtifact().GetName()) - assert.Equal(s.T(), "beta.txt", beta.GetArtifact().GetName()) + // Material names are sequential with the --name value as prefix, + // independent of the entry order. + m1, has1 := stateMap["entry-1"] + m2, has2 := stateMap["entry-2"] + assert.True(s.T(), has1, "expected material entry-1 in state") + assert.True(s.T(), has2, "expected material entry-2 in state") + + // The recorded artifact filename must preserve each original entry + // basename, not the sequential material key. + gotFilenames := []string{m1.GetArtifact().GetName(), m2.GetArtifact().GetName()} + assert.ElementsMatch(s.T(), []string{"alpha.txt", "beta.txt"}, gotFilenames) }) s.Run("atomicity: over-tight limit leaves state empty", func() { @@ -838,13 +839,14 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 2) - _, hasScanJSON := stateMap["scan-json"] - _, hasScanJSON1 := stateMap["scan-json-1"] - assert.True(s.T(), hasScanJSON, "expected material scan-json in state") - assert.True(s.T(), hasScanJSON1, "expected material scan-json-1 in state (collision suffix)") + // Entries sharing a basename still get distinct sequential names. + _, hasMat1 := stateMap["material-1"] + _, hasMat2 := stateMap["material-2"] + assert.True(s.T(), hasMat1, "expected material material-1 in state") + assert.True(s.T(), hasMat2, "expected material material-2 in state") }) - s.Run("name prefix: prefix prepended to sanitized entry name", func() { + s.Run("name prefix: used as the sequential name prefix", func() { dir := s.T().TempDir() p := filepath.Join(dir, "prefix.zip") buildZip(s.T(), p, map[string]string{ @@ -866,8 +868,8 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 1) - _, found := stateMap["sboms-a-json"] - assert.True(s.T(), found, "expected material sboms-a-json in state") + _, found := stateMap["sboms-1"] + assert.True(s.T(), found, "expected material sboms-1 in state") }) s.Run("skip dirs and symlinks in tar.gz: only regular file becomes material", func() { @@ -894,8 +896,10 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 1) - _, hasReal := stateMap["real-txt"] - assert.True(s.T(), hasReal, "expected material real-txt in state") + real, hasReal := stateMap["material-1"] + assert.True(s.T(), hasReal, "expected material material-1 in state") + // The original filename is still preserved in the artifact metadata. + assert.Equal(s.T(), "real.txt", real.GetArtifact().GetName()) }) s.Run("traversal rejection: ../escape.txt entry causes error and empty state", func() { @@ -949,10 +953,13 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 2) - _, hasAlpha := stateMap["alpha-txt"] - _, hasBeta := stateMap["beta-txt"] - assert.True(s.T(), hasAlpha, "expected material alpha-txt in state") - assert.True(s.T(), hasBeta, "expected material beta-txt in state") + m1, has1 := stateMap["material-1"] + m2, has2 := stateMap["material-2"] + assert.True(s.T(), has1, "expected material material-1 in state") + assert.True(s.T(), has2, "expected material material-2 in state") + // Original filenames preserved regardless of the sequential keys. + gotFilenames := []string{m1.GetArtifact().GetName(), m2.GetArtifact().GetName()} + assert.ElementsMatch(s.T(), []string{"alpha.txt", "beta.txt"}, gotFilenames) }) } diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index 079b80127..8eb6444c1 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -314,11 +314,12 @@ func SanitizeMaterialName(s string) string { return b.String() } -// NameAllocator hands out unique DNS-1123 material names, suffixing collisions -// with -1, -2, …. It is seeded with names already present in the attestation -// so derived names never overwrite existing materials. +// NameAllocator hands out sequential, unique DNS-1123 material names of the +// form "-" (n starting at 1). It is seeded with names already present +// in the attestation so derived names never overwrite existing materials. type NameAllocator struct { used map[string]struct{} + seq int } // NewNameAllocator seeds the allocator with existing material names. @@ -330,19 +331,22 @@ func NewNameAllocator(existing []string) *NameAllocator { return &NameAllocator{used: used} } -// Allocate returns a unique name derived from base (and optional prefix). -func (a *NameAllocator) Allocate(prefix, base string) string { - name := SanitizeMaterialName(base) +// AllocateSequential returns the next unused "-" material name, where +// n is a counter that advances across calls and skips names already in use. +// prefix is sanitized to DNS-1123; an empty or symbol-only prefix yields the +// base "material" (so entries are named material-1, material-2, …). +func (a *NameAllocator) AllocateSequential(prefix string) string { + base := "material" if prefix != "" { - name = SanitizeMaterialName(prefix) + "-" + name + base = SanitizeMaterialName(prefix) } - candidate := name - for i := 1; ; i++ { + for { + a.seq++ + candidate := fmt.Sprintf("%s-%d", base, a.seq) if _, taken := a.used[candidate]; !taken { a.used[candidate] = struct{}{} return candidate } - candidate = fmt.Sprintf("%s-%d", name, i) } } diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index a21ce3864..e0cb6f459 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -246,14 +246,30 @@ func TestSanitizeMaterialName(t *testing.T) { } } -func TestNameAllocator(t *testing.T) { - a := NewNameAllocator([]string{"existing"}) +func TestNameAllocatorSequential(t *testing.T) { + t.Run("default prefix numbers from 1", func(t *testing.T) { + a := NewNameAllocator(nil) + assert.Equal(t, "material-1", a.AllocateSequential("")) + assert.Equal(t, "material-2", a.AllocateSequential("")) + assert.Equal(t, "material-3", a.AllocateSequential("")) + }) + + t.Run("custom prefix is sanitized and numbered", func(t *testing.T) { + a := NewNameAllocator(nil) + assert.Equal(t, "q3-scans-1", a.AllocateSequential("Q3 Scans")) + assert.Equal(t, "q3-scans-2", a.AllocateSequential("Q3 Scans")) + }) - assert.Equal(t, "scan-json", a.Allocate("", "scan.json")) - assert.Equal(t, "scan-json-1", a.Allocate("", "scan.json")) // collision - assert.Equal(t, "results-xml", a.Allocate("", "results.xml")) - assert.Equal(t, "existing-1", a.Allocate("", "existing")) // collides with pre-existing - assert.Equal(t, "sboms-a-json", a.Allocate("sboms", "a.json")) // prefix + t.Run("skips names already present in the attestation", func(t *testing.T) { + a := NewNameAllocator([]string{"material-1", "material-2"}) + assert.Equal(t, "material-3", a.AllocateSequential("")) + assert.Equal(t, "material-4", a.AllocateSequential("")) + }) + + t.Run("symbol-only prefix falls back to material", func(t *testing.T) { + a := NewNameAllocator(nil) + assert.Equal(t, "material-1", a.AllocateSequential("!!!")) + }) } func TestIsArchiveNativeKind(t *testing.T) { From 713274f0dcacec64dece43427b6b2b14dab41d9a Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Tue, 30 Jun 2026 16:51:46 +0200 Subject: [PATCH 19/23] feat(crafter): use 0-indexed sequential names for exploded materials MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exploded archive materials are now numbered from 0 (material-0, material-1, … or -0, -1, …) instead of 1. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/crafter_test.go | 32 +++++++++---------- pkg/attestation/crafter/materials/archive.go | 8 ++--- .../crafter/materials/archive_test.go | 12 +++---- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index 313d57a23..81ae1317b 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -706,12 +706,12 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 2) - // Material names are sequential with the --name value as prefix, - // independent of the entry order. - m1, has1 := stateMap["entry-1"] - m2, has2 := stateMap["entry-2"] - assert.True(s.T(), has1, "expected material entry-1 in state") - assert.True(s.T(), has2, "expected material entry-2 in state") + // Material names are sequential (0-indexed) with the --name value as + // prefix, independent of the entry order. + m1, has1 := stateMap["entry-0"] + m2, has2 := stateMap["entry-1"] + assert.True(s.T(), has1, "expected material entry-0 in state") + assert.True(s.T(), has2, "expected material entry-1 in state") // The recorded artifact filename must preserve each original entry // basename, not the sequential material key. @@ -840,10 +840,10 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 2) // Entries sharing a basename still get distinct sequential names. + _, hasMat0 := stateMap["material-0"] _, hasMat1 := stateMap["material-1"] - _, hasMat2 := stateMap["material-2"] + assert.True(s.T(), hasMat0, "expected material material-0 in state") assert.True(s.T(), hasMat1, "expected material material-1 in state") - assert.True(s.T(), hasMat2, "expected material material-2 in state") }) s.Run("name prefix: used as the sequential name prefix", func() { @@ -868,8 +868,8 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 1) - _, found := stateMap["sboms-1"] - assert.True(s.T(), found, "expected material sboms-1 in state") + _, found := stateMap["sboms-0"] + assert.True(s.T(), found, "expected material sboms-0 in state") }) s.Run("skip dirs and symlinks in tar.gz: only regular file becomes material", func() { @@ -896,8 +896,8 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 1) - real, hasReal := stateMap["material-1"] - assert.True(s.T(), hasReal, "expected material material-1 in state") + real, hasReal := stateMap["material-0"] + assert.True(s.T(), hasReal, "expected material material-0 in state") // The original filename is still preserved in the artifact metadata. assert.Equal(s.T(), "real.txt", real.GetArtifact().GetName()) }) @@ -953,10 +953,10 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 2) - m1, has1 := stateMap["material-1"] - m2, has2 := stateMap["material-2"] - assert.True(s.T(), has1, "expected material material-1 in state") - assert.True(s.T(), has2, "expected material material-2 in state") + m1, has1 := stateMap["material-0"] + m2, has2 := stateMap["material-1"] + assert.True(s.T(), has1, "expected material material-0 in state") + assert.True(s.T(), has2, "expected material material-1 in state") // Original filenames preserved regardless of the sequential keys. gotFilenames := []string{m1.GetArtifact().GetName(), m2.GetArtifact().GetName()} assert.ElementsMatch(s.T(), []string{"alpha.txt", "beta.txt"}, gotFilenames) diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index 8eb6444c1..cd5ad377d 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -332,9 +332,9 @@ func NewNameAllocator(existing []string) *NameAllocator { } // AllocateSequential returns the next unused "-" material name, where -// n is a counter that advances across calls and skips names already in use. -// prefix is sanitized to DNS-1123; an empty or symbol-only prefix yields the -// base "material" (so entries are named material-1, material-2, …). +// n is a zero-based counter that advances across calls and skips names already +// in use. prefix is sanitized to DNS-1123; an empty or symbol-only prefix yields +// the base "material" (so entries are named material-0, material-1, …). func (a *NameAllocator) AllocateSequential(prefix string) string { base := "material" if prefix != "" { @@ -342,8 +342,8 @@ func (a *NameAllocator) AllocateSequential(prefix string) string { } for { - a.seq++ candidate := fmt.Sprintf("%s-%d", base, a.seq) + a.seq++ if _, taken := a.used[candidate]; !taken { a.used[candidate] = struct{}{} return candidate diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index e0cb6f459..227594883 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -247,28 +247,28 @@ func TestSanitizeMaterialName(t *testing.T) { } func TestNameAllocatorSequential(t *testing.T) { - t.Run("default prefix numbers from 1", func(t *testing.T) { + t.Run("default prefix numbers from 0", func(t *testing.T) { a := NewNameAllocator(nil) + assert.Equal(t, "material-0", a.AllocateSequential("")) assert.Equal(t, "material-1", a.AllocateSequential("")) assert.Equal(t, "material-2", a.AllocateSequential("")) - assert.Equal(t, "material-3", a.AllocateSequential("")) }) t.Run("custom prefix is sanitized and numbered", func(t *testing.T) { a := NewNameAllocator(nil) + assert.Equal(t, "q3-scans-0", a.AllocateSequential("Q3 Scans")) assert.Equal(t, "q3-scans-1", a.AllocateSequential("Q3 Scans")) - assert.Equal(t, "q3-scans-2", a.AllocateSequential("Q3 Scans")) }) t.Run("skips names already present in the attestation", func(t *testing.T) { - a := NewNameAllocator([]string{"material-1", "material-2"}) + a := NewNameAllocator([]string{"material-0", "material-1"}) + assert.Equal(t, "material-2", a.AllocateSequential("")) assert.Equal(t, "material-3", a.AllocateSequential("")) - assert.Equal(t, "material-4", a.AllocateSequential("")) }) t.Run("symbol-only prefix falls back to material", func(t *testing.T) { a := NewNameAllocator(nil) - assert.Equal(t, "material-1", a.AllocateSequential("!!!")) + assert.Equal(t, "material-0", a.AllocateSequential("!!!")) }) } From daf76a3b0e49648f4cb7383cc40c6c2351fa6d14 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Tue, 30 Jun 2026 16:53:22 +0200 Subject: [PATCH 20/23] style(crafter): extract defaultMaterialName constant Avoid the repeated "material" string literal (goconst). Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/materials/archive.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index cd5ad377d..ff5e2bd68 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -291,6 +291,10 @@ func ArchiveEntryBaseName(name string) string { return path.Base(strings.ReplaceAll(name, "\\", "/")) } +// defaultMaterialName is the fallback base used when a name cannot be derived +// (empty/symbol-only input or prefix). +const defaultMaterialName = "material" + // SanitizeMaterialName converts s into a valid DNS-1123 material name: // lowercase, with every run of characters outside [a-z0-9] collapsed to a // single "-" and leading/trailing "-" trimmed. Falls back to "material". @@ -309,7 +313,7 @@ func SanitizeMaterialName(s string) string { } } if b.Len() == 0 { - return "material" + return defaultMaterialName } return b.String() } @@ -336,7 +340,7 @@ func NewNameAllocator(existing []string) *NameAllocator { // in use. prefix is sanitized to DNS-1123; an empty or symbol-only prefix yields // the base "material" (so entries are named material-0, material-1, …). func (a *NameAllocator) AllocateSequential(prefix string) string { - base := "material" + base := defaultMaterialName if prefix != "" { base = SanitizeMaterialName(prefix) } From 8d2a365e45a10d73289e4ac9ae93bed525a55fd8 Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Tue, 30 Jun 2026 17:03:35 +0200 Subject: [PATCH 21/23] test(crafter): rename test var shadowing builtin real Fixes revive redefines-builtin-id on the archive behavior test. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- pkg/attestation/crafter/crafter_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index 81ae1317b..28e32a000 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -896,10 +896,10 @@ func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { stateMap := c.CraftingState.GetAttestation().GetMaterials() assert.Len(s.T(), stateMap, 1) - real, hasReal := stateMap["material-0"] + realMat, hasReal := stateMap["material-0"] assert.True(s.T(), hasReal, "expected material material-0 in state") // The original filename is still preserved in the artifact metadata. - assert.Equal(s.T(), "real.txt", real.GetArtifact().GetName()) + assert.Equal(s.T(), "real.txt", realMat.GetArtifact().GetName()) }) s.Run("traversal rejection: ../escape.txt entry causes error and empty state", func() { From a25c29036504ee09e2c50186fe99fdb52528be9a Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Wed, 1 Jul 2026 14:11:52 +0200 Subject: [PATCH 22/23] refactor(cli): simplify archive-explode output and routing Drop the redundant explode bool from shouldExplode (derivable from the returned ArchiveFormat) and branch on the format at the call site. Reuse output.EncodeOutput for the multi-material result instead of a hand-rolled format switch, adding []*AttestationStatusMaterial to the tabulated-data union so the slice renders as a single JSON array. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- app/cli/cmd/attestation_add.go | 17 ++++------- app/cli/cmd/output/output.go | 1 + app/cli/pkg/action/attestation_add.go | 14 ++++----- .../action/attestation_add_routing_test.go | 29 +++++++++---------- 4 files changed, 25 insertions(+), 36 deletions(-) diff --git a/app/cli/cmd/attestation_add.go b/app/cli/cmd/attestation_add.go index 363def5cf..78c12dcfa 100644 --- a/app/cli/cmd/attestation_add.go +++ b/app/cli/cmd/attestation_add.go @@ -150,22 +150,17 @@ func newAttestationAddCmd() *cobra.Command { return err } - // The explode path can return several materials. Render JSON as a - // single array so the output stays a parseable document; only the - // table renderer is emitted per material. - switch flagOutputFormat { - case output.FormatJSON: - return output.EncodeJSON(resp) - case output.FormatTable: - for _, m := range resp { + // The explode path can return several materials. EncodeOutput + // renders the whole slice as a single JSON array (a parseable + // document) and the table renderer per material. + return output.EncodeOutput(flagOutputFormat, resp, func(mats []*action.AttestationStatusMaterial) error { + for _, m := range mats { if err := displayMaterialInfo(m, policies[m.Name]); err != nil { return err } } return nil - default: - return output.ErrOutputFormatNotImplemented - } + }) }, ) }, diff --git a/app/cli/cmd/output/output.go b/app/cli/cmd/output/output.go index bb46a31dc..a2e5cfdcc 100644 --- a/app/cli/cmd/output/output.go +++ b/app/cli/cmd/output/output.go @@ -56,6 +56,7 @@ type tabulatedData interface { *action.APITokenItem | []*action.APITokenItem | *action.AttestationStatusMaterial | + []*action.AttestationStatusMaterial | *action.ListMembershipResult | *action.PolicyLintResult } diff --git a/app/cli/pkg/action/attestation_add.go b/app/cli/pkg/action/attestation_add.go index 198ce8641..6193bea88 100644 --- a/app/cli/pkg/action/attestation_add.go +++ b/app/cli/pkg/action/attestation_add.go @@ -153,11 +153,11 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa addOpts := runtimeInputAddOpts(runtimeInputs) // Explode path: --kind set, value is a (non-archive-native) archive. - format, explode, err := shouldExplode(materialType, materialValue) + format, err := shouldExplode(materialType, materialValue) if err != nil { return nil, fmt.Errorf("detecting archive: %w", err) } - if explode { + if format != materials.ArchiveNone { if len(policyInputFiles) > 0 { action.Logger.Warn().Msg("--policy-input-from-file is ignored when expanding an archive; evidence cross-links are not recorded for exploded materials") } @@ -226,15 +226,11 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa // shouldExplode decides whether an att-add should explode the value into many // materials: only when --kind is set, the value is a supported archive, and the // kind is not archive-native (e.g. ZAP_DAST_ZIP, which is recorded whole). -func shouldExplode(materialType, value string) (materials.ArchiveFormat, bool, error) { +func shouldExplode(materialType, value string) (materials.ArchiveFormat, error) { if materialType == "" || materials.IsArchiveNativeKind(materialType) { - return materials.ArchiveNone, false, nil + return materials.ArchiveNone, nil } - format, err := materials.DetectArchive(value) - if err != nil { - return materials.ArchiveNone, false, err - } - return format, format != materials.ArchiveNone, nil + return materials.DetectArchive(value) } // runtimeInputAddOpts wraps the runtime inputs as crafter add options, or diff --git a/app/cli/pkg/action/attestation_add_routing_test.go b/app/cli/pkg/action/attestation_add_routing_test.go index f40f01dbe..648e9db4c 100644 --- a/app/cli/pkg/action/attestation_add_routing_test.go +++ b/app/cli/pkg/action/attestation_add_routing_test.go @@ -53,29 +53,26 @@ func TestShouldExplode(t *testing.T) { require.NoError(t, os.WriteFile(plainPath, []byte("not an archive"), 0600)) tests := []struct { - name string - kind string - value string - wantExplode bool - wantFormat materials.ArchiveFormat + name string + kind string + value string + wantFormat materials.ArchiveFormat }{ - {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, true, materials.ArchiveZip}, - {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false, materials.ArchiveNone}, - {"no kind", "", zipPath, false, materials.ArchiveNone}, - {"kind + non-archive", "ARTIFACT", plainPath, false, materials.ArchiveNone}, + // A non-ArchiveNone format means the value will be exploded. + {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, materials.ArchiveZip}, + {"archive-native kind", "ZAP_DAST_ZIP", zipPath, materials.ArchiveNone}, + {"no kind", "", zipPath, materials.ArchiveNone}, + {"kind + non-archive", "ARTIFACT", plainPath, materials.ArchiveNone}, // Non-file values must never return an error — STRING and CONTAINER_IMAGE // carry values that are not file paths at all. - {"kind STRING non-file value", "STRING", "hello world", false, materials.ArchiveNone}, - {"kind CONTAINER_IMAGE non-file value", "CONTAINER_IMAGE", "registry.example.com/app:v1", false, materials.ArchiveNone}, + {"kind STRING non-file value", "STRING", "hello world", materials.ArchiveNone}, + {"kind CONTAINER_IMAGE non-file value", "CONTAINER_IMAGE", "registry.example.com/app:v1", materials.ArchiveNone}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - format, explode, err := shouldExplode(tc.kind, tc.value) + format, err := shouldExplode(tc.kind, tc.value) require.NoError(t, err) - assert.Equal(t, tc.wantExplode, explode) - if explode { - assert.Equal(t, tc.wantFormat, format) - } + assert.Equal(t, tc.wantFormat, format) }) } } From e358fb8f8324760c391822c6214a2397e34c6e7b Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Date: Wed, 1 Jul 2026 14:20:26 +0200 Subject: [PATCH 23/23] feat(cli): limit archive explode to an SBOM/SARIF allowlist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback: explode only kinds with a meaningful bundle form (SBOM_CYCLONEDX_JSON, SBOM_SPDX_JSON, SARIF) instead of exploding any archive that is not archive-native. Every other kind (ARTIFACT, EVIDENCE, ZAP_DAST_ZIP, …) records the archive whole, so a regular zip provided as a single material is never expanded. Replaces the archive-native denylist with an explodable-kind allowlist. Also consolidate name sanitization: SanitizeMaterialName now returns the bare sanitized component (empty when nothing remains) and callers supply their fallback, so the action layer's sanitizeMaterialNamePart reuses it instead of duplicating the logic. Assisted-by: Claude Code Signed-off-by: Javier Rodriguez Chainloop-Trace-Sessions: da72e107-14e9-4da1-add0-28004f542628, ef6d3cdb-5a23-445c-b39a-510b659023e4 --- app/cli/pkg/action/attestation_add.go | 37 +++++++------------ .../action/attestation_add_routing_test.go | 20 ++++++---- pkg/attestation/crafter/materials/archive.go | 37 ++++++++++--------- .../crafter/materials/archive_test.go | 17 ++++++--- 4 files changed, 56 insertions(+), 55 deletions(-) diff --git a/app/cli/pkg/action/attestation_add.go b/app/cli/pkg/action/attestation_add.go index 6193bea88..6f891ecb7 100644 --- a/app/cli/pkg/action/attestation_add.go +++ b/app/cli/pkg/action/attestation_add.go @@ -224,10 +224,14 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa } // shouldExplode decides whether an att-add should explode the value into many -// materials: only when --kind is set, the value is a supported archive, and the -// kind is not archive-native (e.g. ZAP_DAST_ZIP, which is recorded whole). +// materials: only when the kind is explodable (SBOM/SARIF) and the value is a +// supported archive. It returns ArchiveNone for every other kind so a regular +// zip provided as e.g. ARTIFACT or EVIDENCE is recorded whole. func shouldExplode(materialType, value string) (materials.ArchiveFormat, error) { - if materialType == "" || materials.IsArchiveNativeKind(materialType) { + // Only explode kinds that have a meaningful "bundle of the same kind" + // archive form (SBOM, SARIF). Any other kind — including ARTIFACT and + // EVIDENCE — records the archive whole even when the value is a zip/tar. + if !materials.IsExplodableKind(materialType) { return materials.ArchiveNone, nil } return materials.DetectArchive(value) @@ -360,29 +364,14 @@ func policyInputEvidenceNames(materialName string, policyInputFiles []*PolicyInp return names } -// sanitizeMaterialNamePart lower-cases s and collapses every run of characters -// outside [a-z0-9] into a single "-", trimming leading/trailing "-", so the -// result is a valid material-name component. Falls back to "input" if nothing -// usable remains. +// sanitizeMaterialNamePart sanitizes s into a valid material-name component via +// materials.SanitizeMaterialName, falling back to "input" if nothing usable +// remains. func sanitizeMaterialNamePart(s string) string { - var b strings.Builder - pendingHyphen := false - for _, r := range strings.ToLower(s) { - if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') { - if pendingHyphen && b.Len() > 0 { - b.WriteByte('-') - } - b.WriteRune(r) - pendingHyphen = false - } else { - pendingHyphen = true - } - } - - if b.Len() == 0 { - return "input" + if name := materials.SanitizeMaterialName(s); name != "" { + return name } - return b.String() + return "input" } // GetPolicyEvaluations is a Wrapper around the getPolicyEvaluations diff --git a/app/cli/pkg/action/attestation_add_routing_test.go b/app/cli/pkg/action/attestation_add_routing_test.go index 648e9db4c..b798d256e 100644 --- a/app/cli/pkg/action/attestation_add_routing_test.go +++ b/app/cli/pkg/action/attestation_add_routing_test.go @@ -58,15 +58,19 @@ func TestShouldExplode(t *testing.T) { value string wantFormat materials.ArchiveFormat }{ - // A non-ArchiveNone format means the value will be exploded. - {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, materials.ArchiveZip}, - {"archive-native kind", "ZAP_DAST_ZIP", zipPath, materials.ArchiveNone}, + // A non-ArchiveNone format means the value will be exploded. Only + // explodable kinds (SBOM, SARIF) explode; everything else is recorded + // whole even when the value is an archive. + {"explodable SBOM + archive", "SBOM_CYCLONEDX_JSON", zipPath, materials.ArchiveZip}, + {"explodable SARIF + archive", "SARIF", zipPath, materials.ArchiveZip}, + {"non-explodable ARTIFACT + archive", "ARTIFACT", zipPath, materials.ArchiveNone}, + {"non-explodable EVIDENCE + archive", "EVIDENCE", zipPath, materials.ArchiveNone}, + {"archive-native ZAP + archive", "ZAP_DAST_ZIP", zipPath, materials.ArchiveNone}, {"no kind", "", zipPath, materials.ArchiveNone}, - {"kind + non-archive", "ARTIFACT", plainPath, materials.ArchiveNone}, - // Non-file values must never return an error — STRING and CONTAINER_IMAGE - // carry values that are not file paths at all. - {"kind STRING non-file value", "STRING", "hello world", materials.ArchiveNone}, - {"kind CONTAINER_IMAGE non-file value", "CONTAINER_IMAGE", "registry.example.com/app:v1", materials.ArchiveNone}, + {"explodable kind + non-archive", "SBOM_CYCLONEDX_JSON", plainPath, materials.ArchiveNone}, + // Non-file values must never return an error — even for an explodable kind + // the value here is not a file path at all. + {"explodable kind STRING-like non-file value", "SARIF", "hello world", materials.ArchiveNone}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go index ff5e2bd68..9e3a80f70 100644 --- a/pkg/attestation/crafter/materials/archive.go +++ b/pkg/attestation/crafter/materials/archive.go @@ -267,18 +267,21 @@ func walkTar(p string, gzipped bool, visit func(name string, r io.Reader) error) } } -// archiveNativeKinds lists material kinds whose value is the archive itself. -// For these, --kind short-circuits the explode path and the archive is -// recorded whole. Extend this set as new "the archive is the material" kinds -// are added. -var archiveNativeKinds = map[string]struct{}{ - schemaapi.CraftingSchema_Material_ZAP_DAST_ZIP.String(): {}, +// explodableKinds is the allowlist of material kinds whose archive value is +// expanded into one material per entry. Every other kind (ARTIFACT, EVIDENCE, +// ZAP_DAST_ZIP, …) records the archive whole, so a customer can still provide a +// regular zip as a single material. Extend this set as more kinds gain a +// meaningful "bundle of the same kind" archive form. +var explodableKinds = map[string]struct{}{ + schemaapi.CraftingSchema_Material_SBOM_CYCLONEDX_JSON.String(): {}, + schemaapi.CraftingSchema_Material_SBOM_SPDX_JSON.String(): {}, + schemaapi.CraftingSchema_Material_SARIF.String(): {}, } -// IsArchiveNativeKind reports whether kind treats the archive as a single -// material (recorded whole) rather than something to explode. -func IsArchiveNativeKind(kind string) bool { - _, ok := archiveNativeKinds[kind] +// IsExplodableKind reports whether an archive provided for kind should be +// expanded into one material per entry (true) or recorded whole (false). +func IsExplodableKind(kind string) bool { + _, ok := explodableKinds[kind] return ok } @@ -295,9 +298,10 @@ func ArchiveEntryBaseName(name string) string { // (empty/symbol-only input or prefix). const defaultMaterialName = "material" -// SanitizeMaterialName converts s into a valid DNS-1123 material name: -// lowercase, with every run of characters outside [a-z0-9] collapsed to a -// single "-" and leading/trailing "-" trimmed. Falls back to "material". +// SanitizeMaterialName converts s into a valid DNS-1123 material-name component: +// lowercase, with every run of characters outside [a-z0-9] collapsed to a single +// "-" and leading/trailing "-" trimmed. It returns "" when nothing usable +// remains; callers supply their own fallback. func SanitizeMaterialName(s string) string { var b strings.Builder pendingHyphen := false @@ -312,9 +316,6 @@ func SanitizeMaterialName(s string) string { pendingHyphen = true } } - if b.Len() == 0 { - return defaultMaterialName - } return b.String() } @@ -341,8 +342,8 @@ func NewNameAllocator(existing []string) *NameAllocator { // the base "material" (so entries are named material-0, material-1, …). func (a *NameAllocator) AllocateSequential(prefix string) string { base := defaultMaterialName - if prefix != "" { - base = SanitizeMaterialName(prefix) + if s := SanitizeMaterialName(prefix); s != "" { + base = s } for { diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go index 227594883..127cb3266 100644 --- a/pkg/attestation/crafter/materials/archive_test.go +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -239,7 +239,8 @@ func TestSanitizeMaterialName(t *testing.T) { {"scan.json", "scan-json"}, {"results.XML", "results-xml"}, {"weird__name!!", "weird-name"}, - {"___", "material"}, + {"___", ""}, // nothing usable -> empty; callers supply their own fallback + {"", ""}, } for _, tc := range tests { assert.Equal(t, tc.want, SanitizeMaterialName(tc.in)) @@ -272,8 +273,14 @@ func TestNameAllocatorSequential(t *testing.T) { }) } -func TestIsArchiveNativeKind(t *testing.T) { - assert.True(t, IsArchiveNativeKind("ZAP_DAST_ZIP")) - assert.False(t, IsArchiveNativeKind("SBOM_CYCLONEDX_JSON")) - assert.False(t, IsArchiveNativeKind("ARTIFACT")) +func TestIsExplodableKind(t *testing.T) { + // Explodable: SBOM and SARIF bundles. + assert.True(t, IsExplodableKind("SBOM_CYCLONEDX_JSON")) + assert.True(t, IsExplodableKind("SBOM_SPDX_JSON")) + assert.True(t, IsExplodableKind("SARIF")) + // Not explodable: recorded whole even when a zip/tar is provided. + assert.False(t, IsExplodableKind("ARTIFACT")) + assert.False(t, IsExplodableKind("EVIDENCE")) + assert.False(t, IsExplodableKind("ZAP_DAST_ZIP")) + assert.False(t, IsExplodableKind("")) }