diff --git a/app/cli/cmd/attestation_add.go b/app/cli/cmd/attestation_add.go
index 37dd62353..78c12dcfa 100644
--- a/app/cli/cmd/attestation_add.go
+++ b/app/cli/cmd/attestation_add.go
@@ -18,8 +18,10 @@ package cmd
import (
"errors"
"fmt"
+ "math"
"os"
+ "code.cloudfoundry.org/bytefmt"
"github.com/jedib0t/go-pretty/v6/table"
"github.com/muesli/reflow/wrap"
"github.com/spf13/cobra"
@@ -40,6 +42,8 @@ func newAttestationAddCmd() *cobra.Command {
var annotationsFlag []string
var noStrictValidation bool
var policyInputFromFileFlag []string
+ var maxExtractEntries int
+ var maxExtractSize string
// OCI registry credentials can be passed as flags or environment variables
var registryServer, registryUsername, registryPassword string
@@ -74,6 +78,16 @@ func newAttestationAddCmd() *cobra.Command {
chainloop attestation add --name sigcheck --value sigcheckResult.csv --kind SYSINTERNALS_SIGCHECK \
--policy-input-from-file ignored_paths=exception.csv:Path`,
RunE: func(cmd *cobra.Command, _ []string) error {
+ maxExtractSizeBytes, err := bytefmt.ToBytes(maxExtractSize)
+ if err != nil {
+ return fmt.Errorf("invalid --max-extract-size %q: %w", maxExtractSize, err)
+ }
+ // Guard against the uint64->int64 cast wrapping negative, which would
+ // later surface as a misleading "archive too large" error.
+ if maxExtractSizeBytes > math.MaxInt64 {
+ return fmt.Errorf("--max-extract-size %q is too large", maxExtractSize)
+ }
+
a, err := action.NewAttestationAdd(
&action.AttestationAddOpts{
ActionsOpts: ActionOpts,
@@ -85,6 +99,8 @@ func newAttestationAddCmd() *cobra.Command {
RegistryPassword: registryPassword,
LocalStatePath: attestationLocalStatePath,
NoStrictValidation: noStrictValidation,
+ MaxExtractEntries: maxExtractEntries,
+ MaxExtractSize: int64(maxExtractSizeBytes),
},
)
if err != nil {
@@ -122,21 +138,28 @@ func newAttestationAddCmd() *cobra.Command {
return fmt.Errorf("loading resource: %w", err)
}
}
- // TODO: take the material output and show render it
resp, err := a.Run(cmd.Context(), attestationID, name, rawValuePath, kind, annotations, policyInputFiles)
if err != nil {
return err
}
- logger.Info().Msg("material added to attestation")
+ logger.Info().Int("materials", len(resp)).Msg("material(s) added to attestation")
policies, err := a.GetPolicyEvaluations(cmd.Context(), attestationID)
if err != nil {
return err
}
- return output.EncodeOutput(flagOutputFormat, resp, func(s *action.AttestationStatusMaterial) error {
- return displayMaterialInfo(s, policies[resp.Name])
+ // The explode path can return several materials. EncodeOutput
+ // renders the whole slice as a single JSON array (a parseable
+ // document) and the table renderer per material.
+ return output.EncodeOutput(flagOutputFormat, resp, func(mats []*action.AttestationStatusMaterial) error {
+ for _, m := range mats {
+ if err := displayMaterialInfo(m, policies[m.Name]); err != nil {
+ return err
+ }
+ }
+ return nil
})
},
)
@@ -166,6 +189,10 @@ func newAttestationAddCmd() *cobra.Command {
cmd.Flags().StringVar(®istryUsername, "registry-username", "", fmt.Sprintf("registry username, ($%s)", registryUsernameEnvVarName))
cmd.Flags().StringVar(®istryPassword, "registry-password", "", fmt.Sprintf("registry password, ($%s)", registryPasswordEnvVarName))
+ // Archive extraction guards
+ cmd.Flags().IntVar(&maxExtractEntries, "max-extract-entries", 10000, "max number of files to extract when --value is an archive")
+ cmd.Flags().StringVar(&maxExtractSize, "max-extract-size", "1GiB", "max total uncompressed size to extract when --value is an archive")
+
if registryServer == "" {
registryServer = os.Getenv(registryServerEnvVarName)
}
diff --git a/app/cli/cmd/output/output.go b/app/cli/cmd/output/output.go
index bb46a31dc..a2e5cfdcc 100644
--- a/app/cli/cmd/output/output.go
+++ b/app/cli/cmd/output/output.go
@@ -56,6 +56,7 @@ type tabulatedData interface {
*action.APITokenItem |
[]*action.APITokenItem |
*action.AttestationStatusMaterial |
+ []*action.AttestationStatusMaterial |
*action.ListMembershipResult |
*action.PolicyLintResult
}
diff --git a/app/cli/documentation/cli-reference.mdx b/app/cli/documentation/cli-reference.mdx
index b4bf901dd..3dfe7522e 100755
--- a/app/cli/documentation/cli-reference.mdx
+++ b/app/cli/documentation/cli-reference.mdx
@@ -258,6 +258,8 @@ Options
--attestation-id string Unique identifier of the in-progress attestation
-h, --help help for add
--kind string kind of the material to be recorded: ["ARTIFACT" "ASYNCAPI_SPEC" "ATTESTATION" "BLACKDUCK_SCA_JSON" "CERTCC_DRANZER" "CHAINLOOP_AI_AGENT_CONFIG" "CHAINLOOP_AI_CODING_SESSION" "CHAINLOOP_PR_INFO" "CHAINLOOP_RUNNER_CONTEXT" "CONTAINER_IMAGE" "CSAF_INFORMATIONAL_ADVISORY" "CSAF_SECURITY_ADVISORY" "CSAF_SECURITY_INCIDENT_RESPONSE" "CSAF_VEX" "EVIDENCE" "GHAS_CODE_SCAN" "GHAS_DEPENDENCY_SCAN" "GHAS_SECRET_SCAN" "GITLAB_SECURITY_REPORT" "GITLEAKS_JSON" "GRAPHQL_SPEC" "HELM_CHART" "JACOCO_XML" "JUNIT_XML" "OPENAPI_SPEC" "OPENVEX" "OSSF_SCORECARD_JSON" "RADAMSA_CRASHES" "RADAMSA_REPORT" "SARIF" "SBOM_CYCLONEDX_JSON" "SBOM_SPDX_JSON" "SLSA_PROVENANCE" "STRING" "SYSINTERNALS_ACCESSCHK" "SYSINTERNALS_SIGCHECK" "TWISTCLI_SCAN_JSON" "YELP_DETECT_SECRETS_BASELINE" "ZAP_DAST_ZIP"]
+--max-extract-entries int max number of files to extract when --value is an archive (default 10000)
+--max-extract-size string max total uncompressed size to extract when --value is an archive (default "1GiB")
--name string name of the material as shown in the contract
--no-strict-validation skip strict schema validation for structured materials (SBOM_CYCLONEDX_JSON, OPENAPI_SPEC, ASYNCAPI_SPEC, OSSF_SCORECARD_JSON)
--policy-input-from-file stringArray feed a policy input from a column of a CSV or JSON file, in the format =[:] (e.g. ignored_paths=exception.csv:Path); is a single top-level column/field name and defaults to the input name; repeatable. The file is also recorded as EVIDENCE.
diff --git a/app/cli/pkg/action/attestation_add.go b/app/cli/pkg/action/attestation_add.go
index 3778a9a94..6f891ecb7 100644
--- a/app/cli/pkg/action/attestation_add.go
+++ b/app/cli/pkg/action/attestation_add.go
@@ -41,6 +41,12 @@ type AttestationAddOpts struct {
LocalStatePath string
// NoStrictValidation skips strict schema validation
NoStrictValidation bool
+ // MaxExtractEntries limits the number of entries extracted from an archive.
+ // Zero defaults to materials.DefaultArchiveLimits().MaxEntries.
+ MaxExtractEntries int
+ // MaxExtractSize limits the total uncompressed bytes extracted from an archive.
+ // Zero defaults to materials.DefaultArchiveLimits().MaxTotalSize.
+ MaxExtractSize int64
}
type newCrafterOpts struct {
@@ -55,6 +61,8 @@ type AttestationAdd struct {
casCAPath string
connectionInsecure bool
localStatePath string
+ maxExtractEntries int
+ maxExtractSize int64
*newCrafterOpts
}
@@ -68,6 +76,16 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) {
opts = append(opts, crafter.WithNoStrictValidation(cfg.NoStrictValidation))
}
+ defaults := materials.DefaultArchiveLimits()
+ maxEntries := cfg.MaxExtractEntries
+ if maxEntries <= 0 {
+ maxEntries = defaults.MaxEntries
+ }
+ maxSize := cfg.MaxExtractSize
+ if maxSize <= 0 {
+ maxSize = defaults.MaxTotalSize
+ }
+
return &AttestationAdd{
ActionsOpts: cfg.ActionsOpts,
newCrafterOpts: &newCrafterOpts{cpConnection: cfg.CPConnection, opts: opts},
@@ -75,12 +93,14 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) {
casCAPath: cfg.CASCAPath,
connectionInsecure: cfg.ConnectionInsecure,
localStatePath: cfg.LocalStatePath,
+ maxExtractEntries: maxEntries,
+ maxExtractSize: maxSize,
}, nil
}
var ErrAttestationNotInitialized = errors.New("attestation not yet initialized")
-func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) (*AttestationStatusMaterial, error) {
+func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) ([]*AttestationStatusMaterial, error) {
// initialize the crafter. If attestation-id is provided we assume the attestation is performed using remote state
crafter, err := newCrafter(&newCrafterStateOpts{enableRemoteState: (attestationID != ""), localStatePath: action.localStatePath}, action.CPConnection, action.opts...)
if err != nil {
@@ -132,6 +152,31 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa
// 3. If materialType is not empty, add material contract free with materialType and materialName
addOpts := runtimeInputAddOpts(runtimeInputs)
+ // Explode path: --kind set, value is a (non-archive-native) archive.
+ format, err := shouldExplode(materialType, materialValue)
+ if err != nil {
+ return nil, fmt.Errorf("detecting archive: %w", err)
+ }
+ if format != materials.ArchiveNone {
+ if len(policyInputFiles) > 0 {
+ action.Logger.Warn().Msg("--policy-input-from-file is ignored when expanding an archive; evidence cross-links are not recorded for exploded materials")
+ }
+ limits := materials.ArchiveLimits{MaxEntries: action.maxExtractEntries, MaxTotalSize: action.maxExtractSize}
+ mts, err := crafter.AddMaterialsFromArchive(ctx, attestationID, materialType, materialName, materialValue, format, casBackend, annotations, limits, addOpts...)
+ if err != nil {
+ return nil, fmt.Errorf("adding materials from archive: %w", err)
+ }
+ results := make([]*AttestationStatusMaterial, 0, len(mts))
+ for _, mt := range mts {
+ r, err := attMaterialToAction(mt)
+ if err != nil {
+ return nil, fmt.Errorf("converting material to action: %w", err)
+ }
+ results = append(results, r)
+ }
+ return results, nil
+ }
+
var mt *api.Attestation_Material
switch {
case materialName == "" && materialType == "":
@@ -175,7 +220,21 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa
return nil, fmt.Errorf("converting material to action: %w", err)
}
- return materialResult, nil
+ return []*AttestationStatusMaterial{materialResult}, nil
+}
+
+// shouldExplode decides whether an att-add should explode the value into many
+// materials: only when the kind is explodable (SBOM/SARIF) and the value is a
+// supported archive. It returns ArchiveNone for every other kind so a regular
+// zip provided as e.g. ARTIFACT or EVIDENCE is recorded whole.
+func shouldExplode(materialType, value string) (materials.ArchiveFormat, error) {
+ // Only explode kinds that have a meaningful "bundle of the same kind"
+ // archive form (SBOM, SARIF). Any other kind — including ARTIFACT and
+ // EVIDENCE — records the archive whole even when the value is a zip/tar.
+ if !materials.IsExplodableKind(materialType) {
+ return materials.ArchiveNone, nil
+ }
+ return materials.DetectArchive(value)
}
// runtimeInputAddOpts wraps the runtime inputs as crafter add options, or
@@ -305,29 +364,14 @@ func policyInputEvidenceNames(materialName string, policyInputFiles []*PolicyInp
return names
}
-// sanitizeMaterialNamePart lower-cases s and collapses every run of characters
-// outside [a-z0-9] into a single "-", trimming leading/trailing "-", so the
-// result is a valid material-name component. Falls back to "input" if nothing
-// usable remains.
+// sanitizeMaterialNamePart sanitizes s into a valid material-name component via
+// materials.SanitizeMaterialName, falling back to "input" if nothing usable
+// remains.
func sanitizeMaterialNamePart(s string) string {
- var b strings.Builder
- pendingHyphen := false
- for _, r := range strings.ToLower(s) {
- if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
- if pendingHyphen && b.Len() > 0 {
- b.WriteByte('-')
- }
- b.WriteRune(r)
- pendingHyphen = false
- } else {
- pendingHyphen = true
- }
- }
-
- if b.Len() == 0 {
- return "input"
+ if name := materials.SanitizeMaterialName(s); name != "" {
+ return name
}
- return b.String()
+ return "input"
}
// GetPolicyEvaluations is a Wrapper around the getPolicyEvaluations
diff --git a/app/cli/pkg/action/attestation_add_routing_test.go b/app/cli/pkg/action/attestation_add_routing_test.go
new file mode 100644
index 000000000..b798d256e
--- /dev/null
+++ b/app/cli/pkg/action/attestation_add_routing_test.go
@@ -0,0 +1,82 @@
+//
+// Copyright 2026 The Chainloop Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package action
+
+import (
+ "archive/zip"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// writeTestZip creates a zip archive at dir/name containing a single file
+// "entry.txt" and returns its path.
+func writeTestZip(t *testing.T, dir, name string) string {
+ t.Helper()
+ path := filepath.Join(dir, name)
+ f, err := os.Create(path)
+ require.NoError(t, err)
+ defer f.Close()
+
+ w := zip.NewWriter(f)
+ entry, err := w.Create("entry.txt")
+ require.NoError(t, err)
+ _, err = entry.Write([]byte("hello"))
+ require.NoError(t, err)
+ require.NoError(t, w.Close())
+ return path
+}
+
+func TestShouldExplode(t *testing.T) {
+ dir := t.TempDir()
+ zipPath := writeTestZip(t, dir, "s.zip")
+
+ // non-archive: a plain temp file with an unrecognised extension
+ plainPath := filepath.Join(dir, "plain.bin")
+ require.NoError(t, os.WriteFile(plainPath, []byte("not an archive"), 0600))
+
+ tests := []struct {
+ name string
+ kind string
+ value string
+ wantFormat materials.ArchiveFormat
+ }{
+ // A non-ArchiveNone format means the value will be exploded. Only
+ // explodable kinds (SBOM, SARIF) explode; everything else is recorded
+ // whole even when the value is an archive.
+ {"explodable SBOM + archive", "SBOM_CYCLONEDX_JSON", zipPath, materials.ArchiveZip},
+ {"explodable SARIF + archive", "SARIF", zipPath, materials.ArchiveZip},
+ {"non-explodable ARTIFACT + archive", "ARTIFACT", zipPath, materials.ArchiveNone},
+ {"non-explodable EVIDENCE + archive", "EVIDENCE", zipPath, materials.ArchiveNone},
+ {"archive-native ZAP + archive", "ZAP_DAST_ZIP", zipPath, materials.ArchiveNone},
+ {"no kind", "", zipPath, materials.ArchiveNone},
+ {"explodable kind + non-archive", "SBOM_CYCLONEDX_JSON", plainPath, materials.ArchiveNone},
+ // Non-file values must never return an error — even for an explodable kind
+ // the value here is not a file path at all.
+ {"explodable kind STRING-like non-file value", "SARIF", "hello world", materials.ArchiveNone},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ format, err := shouldExplode(tc.kind, tc.value)
+ require.NoError(t, err)
+ assert.Equal(t, tc.wantFormat, format)
+ })
+ }
+}
diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go
index 5a2db615b..38d86e7f9 100644
--- a/pkg/attestation/crafter/crafter.go
+++ b/pkg/attestation/crafter/crafter.go
@@ -19,9 +19,11 @@ import (
"context"
"errors"
"fmt"
+ "io"
"maps"
"net/url"
"os"
+ "path/filepath"
"slices"
"strings"
"time"
@@ -680,8 +682,10 @@ func (c *Crafter) AddMaterialContactFreeWithAutoDetectedKind(ctx context.Context
return nil, fmt.Errorf("failed to auto-discover material kind: %w", err)
}
-// addMaterials adds the incoming material m to the crafting state
-func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) {
+// stageMaterial crafts a material into the in-memory crafting state WITHOUT
+// persisting it. Callers must call stateManager.Write to commit. Splitting the
+// write out lets the archive explode path craft many entries and commit once.
+func (c *Crafter) stageMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) {
addOptions := &addOpts{}
for _, opt := range opts {
opt(addOptions)
@@ -784,7 +788,16 @@ func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_M
}
c.CraftingState.Attestation.Materials[m.Name] = mt
- // 6 - Persist state
+ return mt, nil
+}
+
+// addMaterial crafts a single material and persists the crafting state.
+func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) {
+ mt, err := c.stageMaterial(ctx, m, value, casBackend, runtimeAnnotations, opts...)
+ if err != nil {
+ return nil, err
+ }
+
if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil {
return nil, fmt.Errorf("failed to persist crafting state: %w", err)
}
@@ -793,6 +806,136 @@ func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_M
return mt, nil
}
+// AddMaterialsFromArchive expands an archive and stages every entry as an
+// independent material, committing all of them atomically in a single
+// stateManager.Write call. If any entry fails, no state is persisted and the
+// in-memory materials map is rolled back.
+//
+// Parameters:
+// - kind: the material type string for every entry (must be a valid
+// CraftingSchema_Material_MaterialType name).
+// - namePrefix: optional prefix prepended to each derived entry name.
+// - archivePath: path to the archive on disk.
+// - format: archive format (ArchiveZip / ArchiveTar / ArchiveTarGz).
+// - limits: guard against zip-bomb expansion.
+func (c *Crafter) AddMaterialsFromArchive(
+ ctx context.Context,
+ attestationID, kind, namePrefix, archivePath string,
+ format materials.ArchiveFormat,
+ casBackend *casclient.CASBackend,
+ runtimeAnnotations map[string]string,
+ limits materials.ArchiveLimits,
+ opts ...AddOpt,
+) ([]*api.Attestation_Material, error) {
+ if err := c.requireStateLoaded(); err != nil {
+ return nil, fmt.Errorf("adding materials from archive: %w", err)
+ }
+
+ // Validate kind up front so we fail fast before touching disk.
+ kindVal, found := schemaapi.CraftingSchema_Material_MaterialType_value[kind]
+ if !found {
+ return nil, fmt.Errorf("%q kind not found. Available options are %q", kind, schemaapi.ListAvailableMaterialKind())
+ }
+ materialKind := schemaapi.CraftingSchema_Material_MaterialType(kindVal)
+
+ // Seed the name allocator with existing material keys so we never collide.
+ existingKeys := make([]string, 0, len(c.CraftingState.Attestation.GetMaterials()))
+ for k := range c.CraftingState.Attestation.GetMaterials() {
+ existingKeys = append(existingKeys, k)
+ }
+ allocator := materials.NewNameAllocator(existingKeys)
+
+ // Create a temporary directory for per-entry files; cleaned up on return.
+ tmpDir, err := os.MkdirTemp("", "chainloop-archive-*")
+ if err != nil {
+ return nil, fmt.Errorf("creating temp dir for archive expansion: %w", err)
+ }
+ defer os.RemoveAll(tmpDir)
+
+ // Snapshot checkpoints for atomic rollback on any error path.
+ var stagedNames []string
+ var result []*api.Attestation_Material
+ policyEvalCheckpoint := len(c.CraftingState.Attestation.PolicyEvaluations)
+
+ rollback := func() {
+ for _, n := range stagedNames {
+ delete(c.CraftingState.Attestation.Materials, n)
+ }
+ c.CraftingState.Attestation.PolicyEvaluations = c.CraftingState.Attestation.PolicyEvaluations[:policyEvalCheckpoint]
+ }
+
+ walkErr := materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error {
+ // Material names are sequential ("-1", "-2", … or
+ // "material-N" with no prefix). The original basename is still derived
+ // (with archive "/" semantics, OS-independently) and used for the temp
+ // file so the recorded artifact filename preserves the real name.
+ base := materials.ArchiveEntryBaseName(name)
+ matName := allocator.AllocateSequential(namePrefix)
+
+ // Give each entry its own temp subdirectory (named by the unique material
+ // name) so two entries sharing a basename (e.g. "a/x.json" and "b/x.json")
+ // never collide, while the temp file itself keeps the original basename so
+ // the recorded material metadata preserves the real filename.
+ entryDir, err := os.MkdirTemp(tmpDir, matName+"-*")
+ if err != nil {
+ return fmt.Errorf("creating temp dir for entry %q: %w", name, err)
+ }
+ tmpPath := filepath.Join(entryDir, base)
+ tmp, err := os.Create(tmpPath)
+ if err != nil {
+ return fmt.Errorf("creating temp file for entry %q: %w", name, err)
+ }
+
+ if _, err := io.Copy(tmp, r); err != nil {
+ tmp.Close()
+ return fmt.Errorf("writing entry %q to temp file: %w", name, err)
+ }
+ // Check the Close error so a failed flush does not stage an incomplete file.
+ if err := tmp.Close(); err != nil {
+ return fmt.Errorf("closing temp file for entry %q: %w", name, err)
+ }
+
+ m := &schemaapi.CraftingSchema_Material{
+ Optional: true,
+ Type: materialKind,
+ Name: matName,
+ }
+
+ mt, err := c.stageMaterial(ctx, m, tmpPath, casBackend, runtimeAnnotations, opts...)
+ // Remove the entry's temp subdir immediately after staging to keep disk
+ // usage bounded; the deferred os.RemoveAll(tmpDir) is the safety net.
+ os.RemoveAll(entryDir) //nolint:errcheck // best-effort cleanup
+ if err != nil {
+ return fmt.Errorf("staging entry %q as material %q: %w", name, matName, err)
+ }
+
+ stagedNames = append(stagedNames, matName)
+ result = append(result, mt)
+ return nil
+ })
+
+ if walkErr != nil {
+ // Roll back any in-memory staging: remove material map entries and
+ // truncate policy evaluations back to the pre-call checkpoint.
+ rollback()
+ return nil, fmt.Errorf("expanding archive %q: %w", archivePath, walkErr)
+ }
+
+ if len(result) == 0 {
+ return nil, fmt.Errorf("archive %q contains no processable entries", archivePath)
+ }
+
+ // All entries staged successfully; persist once.
+ if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil {
+ // Roll back in-memory state including policy evaluations.
+ rollback()
+ return nil, fmt.Errorf("failed to persist crafting state: %w", err)
+ }
+
+ c.Logger.Debug().Int("count", len(result)).Str("archive", archivePath).Msg("added archive materials to state")
+ return result, nil
+}
+
// projectContext returns the project name and version from the workflow
// metadata so policy verifiers can pass them to the engine. Either may be
// empty (e.g. dry-run before workflow metadata is populated); built-ins
diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go
index 1a8feb196..28e32a000 100644
--- a/pkg/attestation/crafter/crafter_test.go
+++ b/pkg/attestation/crafter/crafter_test.go
@@ -16,6 +16,9 @@
package crafter_test
import (
+ "archive/tar"
+ "archive/zip"
+ "compress/gzip"
"context"
"fmt"
"os"
@@ -672,6 +675,294 @@ func (s *crafterSuite) TestAddMaterialsAutomatic() {
}
}
+func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() {
+ // Build the fixture in-process so no binary blob is checked in.
+ zipFixture := filepath.Join(s.T().TempDir(), "two-files.zip")
+ buildZip(s.T(), zipFixture, map[string]string{"alpha.txt": "alpha", "beta.txt": "beta"})
+
+ s.Run("happy path: two files produce two materials", func() {
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), "testdata/contracts/empty_generic.yaml", &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ // Nil uploader causes inline storage — no network required.
+ backend := &casclient.CASBackend{}
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "",
+ "ARTIFACT",
+ "entry",
+ zipFixture,
+ materials.ArchiveZip,
+ backend,
+ nil,
+ materials.DefaultArchiveLimits(),
+ )
+
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 2)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 2)
+
+ // Material names are sequential (0-indexed) with the --name value as
+ // prefix, independent of the entry order.
+ m1, has1 := stateMap["entry-0"]
+ m2, has2 := stateMap["entry-1"]
+ assert.True(s.T(), has1, "expected material entry-0 in state")
+ assert.True(s.T(), has2, "expected material entry-1 in state")
+
+ // The recorded artifact filename must preserve each original entry
+ // basename, not the sequential material key.
+ gotFilenames := []string{m1.GetArtifact().GetName(), m2.GetArtifact().GetName()}
+ assert.ElementsMatch(s.T(), []string{"alpha.txt", "beta.txt"}, gotFilenames)
+ })
+
+ s.Run("atomicity: over-tight limit leaves state empty", func() {
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), "testdata/contracts/empty_generic.yaml", &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ backend := &casclient.CASBackend{}
+
+ // MaxEntries:1 causes ErrTooManyEntries after the second entry.
+ tightLimits := materials.ArchiveLimits{MaxEntries: 1, MaxTotalSize: 1 << 30}
+
+ _, err = c.AddMaterialsFromArchive(
+ context.Background(),
+ "",
+ "ARTIFACT",
+ "entry",
+ zipFixture,
+ materials.ArchiveZip,
+ backend,
+ nil,
+ tightLimits,
+ )
+
+ require.Error(s.T(), err)
+ assert.ErrorIs(s.T(), err, materials.ErrTooManyEntries)
+
+ // Atomicity: no materials must have been committed.
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Empty(s.T(), stateMap, "state must be empty after a failed archive expansion")
+
+ // Atomicity: policy evaluations must also be rolled back.
+ assert.Empty(s.T(), c.CraftingState.GetAttestation().GetPolicyEvaluations(), "policy evaluations must be rolled back after a failed archive expansion")
+ })
+}
+
+// buildZip creates a zip archive at the given path containing the provided
+// files (entry name → content). All entries are regular files.
+func buildZip(t *testing.T, path string, files map[string]string) {
+ t.Helper()
+ f, err := os.Create(path)
+ require.NoError(t, err)
+ defer f.Close()
+ zw := zip.NewWriter(f)
+ for name, content := range files {
+ w, err := zw.Create(name)
+ require.NoError(t, err)
+ _, err = w.Write([]byte(content))
+ require.NoError(t, err)
+ }
+ require.NoError(t, zw.Close())
+}
+
+// buildTarGz creates a .tar.gz archive at path containing regular files,
+// directory entries, and symlinks as described by the parameters.
+func buildTarGz(t *testing.T, path string, regular map[string]string, dirs []string, symlinks map[string]string) {
+ t.Helper()
+ f, err := os.Create(path)
+ require.NoError(t, err)
+ defer f.Close()
+ gw := gzip.NewWriter(f)
+ tw := tar.NewWriter(gw)
+
+ for name, content := range regular {
+ hdr := &tar.Header{
+ Name: name,
+ Typeflag: tar.TypeReg,
+ Mode: 0o600,
+ Size: int64(len(content)),
+ }
+ require.NoError(t, tw.WriteHeader(hdr))
+ _, err = tw.Write([]byte(content))
+ require.NoError(t, err)
+ }
+ for _, name := range dirs {
+ hdr := &tar.Header{
+ Name: name,
+ Typeflag: tar.TypeDir,
+ Mode: 0o700,
+ }
+ require.NoError(t, tw.WriteHeader(hdr))
+ }
+ for name, target := range symlinks {
+ hdr := &tar.Header{
+ Name: name,
+ Typeflag: tar.TypeSymlink,
+ Linkname: target,
+ }
+ require.NoError(t, tw.WriteHeader(hdr))
+ }
+
+ require.NoError(t, tw.Close())
+ require.NoError(t, gw.Close())
+}
+
+func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() {
+ const contract = "testdata/contracts/empty_generic.yaml"
+ backend := &casclient.CASBackend{}
+
+ s.Run("name collision: both names present with suffix", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "collide.zip")
+ buildZip(s.T(), p, map[string]string{
+ "scan.json": `{"a":1}`,
+ "nested/scan.json": `{"b":2}`,
+ })
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "", p,
+ materials.ArchiveZip, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 2)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 2)
+ // Entries sharing a basename still get distinct sequential names.
+ _, hasMat0 := stateMap["material-0"]
+ _, hasMat1 := stateMap["material-1"]
+ assert.True(s.T(), hasMat0, "expected material material-0 in state")
+ assert.True(s.T(), hasMat1, "expected material material-1 in state")
+ })
+
+ s.Run("name prefix: used as the sequential name prefix", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "prefix.zip")
+ buildZip(s.T(), p, map[string]string{
+ "a.json": `{"x":1}`,
+ })
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "sboms", p,
+ materials.ArchiveZip, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 1)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 1)
+ _, found := stateMap["sboms-0"]
+ assert.True(s.T(), found, "expected material sboms-0 in state")
+ })
+
+ s.Run("skip dirs and symlinks in tar.gz: only regular file becomes material", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "mixed.tar.gz")
+ buildTarGz(s.T(), p,
+ map[string]string{"real.txt": "hello"},
+ []string{"adir/"},
+ map[string]string{"link.txt": "real.txt"},
+ )
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "", p,
+ materials.ArchiveTarGz, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 1, "only the regular file must become a material")
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 1)
+ realMat, hasReal := stateMap["material-0"]
+ assert.True(s.T(), hasReal, "expected material material-0 in state")
+ // The original filename is still preserved in the artifact metadata.
+ assert.Equal(s.T(), "real.txt", realMat.GetArtifact().GetName())
+ })
+
+ s.Run("traversal rejection: ../escape.txt entry causes error and empty state", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "evil.tar.gz")
+ buildTarGz(s.T(), p,
+ map[string]string{"../escape.txt": "evil"},
+ nil, nil,
+ )
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ _, err = c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "", p,
+ materials.ArchiveTarGz, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.Error(s.T(), err, "path-traversal entry must cause an error")
+ assert.ErrorIs(s.T(), err, materials.ErrUnsafeEntry)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Empty(s.T(), stateMap, "state must be empty after traversal rejection (atomic rollback)")
+ })
+
+ s.Run("tar.gz happy path: two regular files produce two materials", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "two.tar.gz")
+ buildTarGz(s.T(), p,
+ map[string]string{
+ "alpha.txt": "aaa",
+ "beta.txt": "bbb",
+ },
+ nil, nil,
+ )
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "", p,
+ materials.ArchiveTarGz, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 2)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 2)
+ m1, has1 := stateMap["material-0"]
+ m2, has2 := stateMap["material-1"]
+ assert.True(s.T(), has1, "expected material material-0 in state")
+ assert.True(s.T(), has2, "expected material material-1 in state")
+ // Original filenames preserved regardless of the sequential keys.
+ gotFilenames := []string{m1.GetArtifact().GetName(), m2.GetArtifact().GetName()}
+ assert.ElementsMatch(s.T(), []string{"alpha.txt", "beta.txt"}, gotFilenames)
+ })
+}
+
func loadSchema(path string) (*schemaapi.CraftingSchema, error) {
// Extract json formatted data
content, err := os.ReadFile(filepath.Clean(path))
diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go
new file mode 100644
index 000000000..9e3a80f70
--- /dev/null
+++ b/pkg/attestation/crafter/materials/archive.go
@@ -0,0 +1,357 @@
+//
+// Copyright 2026 The Chainloop Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package materials
+
+import (
+ "archive/tar"
+ "archive/zip"
+ "bytes"
+ "compress/gzip"
+ "errors"
+ "fmt"
+ "io"
+ "io/fs"
+ "os"
+ "path"
+ "strings"
+ "syscall"
+
+ schemaapi "github.com/chainloop-dev/chainloop/app/controlplane/api/workflowcontract/v1"
+)
+
+// ArchiveFormat identifies a supported archive container.
+type ArchiveFormat int
+
+const (
+ ArchiveNone ArchiveFormat = iota
+ ArchiveZip
+ ArchiveTar
+ ArchiveTarGz
+)
+
+// DetectArchive reports whether path is a supported archive and, if so, its
+// format. Detection is by extension first; for files whose extension does not
+// match, magic bytes are used as a backstop so renamed archives are still
+// caught. A non-archive returns (ArchiveNone, nil).
+func DetectArchive(path string) (ArchiveFormat, error) {
+ lower := strings.ToLower(path)
+ switch {
+ case strings.HasSuffix(lower, ".zip"):
+ return ArchiveZip, nil
+ case strings.HasSuffix(lower, ".tar.gz"), strings.HasSuffix(lower, ".tgz"):
+ return ArchiveTarGz, nil
+ case strings.HasSuffix(lower, ".tar"):
+ return ArchiveTar, nil
+ }
+
+ return detectByMagic(path)
+}
+
+func detectByMagic(path string) (ArchiveFormat, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ // These errors mean the value is not a file path at all (e.g. "hello
+ // world" for STRING, or "registry/app:v1" for CONTAINER_IMAGE where
+ // "registry" happens to be a regular file in the working directory, which
+ // yields ENOTDIR); treat them as a non-archive so callers passing non-file
+ // values are not surprised. Any other error (permissions, I/O) is real and
+ // must surface.
+ if errors.Is(err, fs.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) {
+ return ArchiveNone, nil
+ }
+ return ArchiveNone, fmt.Errorf("opening %q: %w", path, err)
+ }
+ defer f.Close()
+
+ // 512 bytes is enough for the gzip/zip magic and the tar "ustar" marker at
+ // offset 257.
+ header := make([]byte, 512)
+ n, _ := f.Read(header)
+ header = header[:n]
+
+ switch {
+ case bytes.HasPrefix(header, []byte("PK\x03\x04")), bytes.HasPrefix(header, []byte("PK\x05\x06")):
+ return ArchiveZip, nil
+ case bytes.HasPrefix(header, []byte{0x1f, 0x8b}):
+ return ArchiveTarGz, nil
+ case len(header) >= 262 && bytes.Equal(header[257:262], []byte("ustar")):
+ return ArchiveTar, nil
+ }
+
+ return ArchiveNone, nil
+}
+
+var (
+ // ErrTooManyEntries is returned when an archive has more qualifying entries
+ // than the configured maximum.
+ ErrTooManyEntries = errors.New("archive exceeds the maximum number of entries")
+ // ErrArchiveTooLarge is returned when the running uncompressed size of an
+ // archive exceeds the configured maximum.
+ ErrArchiveTooLarge = errors.New("archive exceeds the maximum uncompressed size")
+ // ErrUnsafeEntry is returned when an archive entry's path is absolute or escapes the extraction root.
+ ErrUnsafeEntry = errors.New("unsafe entry path in archive")
+)
+
+// ArchiveLimits bounds archive expansion to guard against zip bombs.
+type ArchiveLimits struct {
+ MaxEntries int
+ MaxTotalSize int64
+}
+
+// DefaultArchiveLimits returns the safe defaults: 10000 entries and 1 GiB
+// total uncompressed size.
+func DefaultArchiveLimits() ArchiveLimits {
+ return ArchiveLimits{MaxEntries: 10000, MaxTotalSize: 1 << 30}
+}
+
+// capReader wraps a reader and fails once the shared running total exceeds max,
+// so we never trust an archive's declared sizes.
+type capReader struct {
+ r io.Reader
+ total *int64
+ max int64
+}
+
+func (c *capReader) Read(p []byte) (int, error) {
+ n, err := c.r.Read(p)
+ *c.total += int64(n)
+ if *c.total > c.max {
+ return n, ErrArchiveTooLarge
+ }
+ return n, err
+}
+
+// WalkArchiveEntries calls yield for every regular file in the archive,
+// enforcing the limits and skipping directories, symlinks, hardlinks, empty
+// entries, and path-traversal entries.
+func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error {
+ var total int64
+ count := 0
+ visit := func(name string, r io.Reader) error {
+ if !safeArchivePath(name) {
+ return fmt.Errorf("%w: %q", ErrUnsafeEntry, name)
+ }
+ count++
+ if count > limits.MaxEntries {
+ return ErrTooManyEntries
+ }
+ if err := yield(name, &capReader{r: r, total: &total, max: limits.MaxTotalSize}); err != nil {
+ return fmt.Errorf("processing entry %q: %w", name, err)
+ }
+ return nil
+ }
+
+ switch format {
+ case ArchiveZip:
+ return walkZip(path, visit)
+ case ArchiveTar:
+ return walkTar(path, false, visit)
+ case ArchiveTarGz:
+ return walkTar(path, true, visit)
+ default:
+ return fmt.Errorf("unsupported archive format")
+ }
+}
+
+// safeArchivePath rejects absolute paths and any path that escapes the
+// extraction root via ".." path components. A filename that merely contains
+// ".." as a substring (e.g. "foo..bar.json") is accepted; only actual path
+// components equal to ".." are rejected.
+func safeArchivePath(name string) bool {
+ normalized := strings.ReplaceAll(name, "\\", "/")
+ // Reject absolute paths, including Windows drive-letter (e.g. "C:/x") and
+ // UNC paths (which normalize to a leading "/").
+ if strings.HasPrefix(normalized, "/") || hasWindowsDriveLetter(normalized) {
+ return false
+ }
+ // Canonicalise against a virtual root and check that the result stays
+ // within it. path.Clean will resolve ".." components so a path like
+ // "a/../../etc/passwd" becomes "/etc/passwd" which does not start with
+ // the virtual prefix "/root/"; a safe path like "a/b.txt" becomes
+ // "/root/a/b.txt" which does.
+ const root = "/root"
+ clean := path.Clean(root + "/" + normalized)
+ return strings.HasPrefix(clean, root+"/") || clean == root
+}
+
+// hasWindowsDriveLetter reports whether name begins with a Windows drive-letter
+// prefix such as "C:" or "c:/", which denotes an absolute path on Windows.
+func hasWindowsDriveLetter(name string) bool {
+ if len(name) < 2 || name[1] != ':' {
+ return false
+ }
+ c := name[0]
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+}
+
+func walkZip(p string, visit func(name string, r io.Reader) error) error {
+ zr, err := zip.OpenReader(p)
+ if err != nil {
+ return fmt.Errorf("opening zip: %w", err)
+ }
+ defer zr.Close()
+
+ for _, f := range zr.File {
+ // Skip directories, symlinks, and empty entries: they carry no file
+ // content worth recording as a material. Empty-entry skipping is
+ // intentional per the explode design (an empty evidence file produces
+ // no material). Note: symlink detection relies on Unix mode bits stored
+ // in the zip; archives written without Unix metadata won't carry the
+ // symlink bit, so such a symlink would be treated as a regular file
+ // (its content being the stored target path). Tar symlinks are detected
+ // reliably via the typeflag below.
+ if f.FileInfo().IsDir() || f.Mode()&os.ModeSymlink != 0 || f.UncompressedSize64 == 0 {
+ continue
+ }
+ rc, err := f.Open()
+ if err != nil {
+ return fmt.Errorf("opening entry %q: %w", f.Name, err)
+ }
+ err = visit(f.Name, rc)
+ rc.Close()
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func walkTar(p string, gzipped bool, visit func(name string, r io.Reader) error) error {
+ f, err := os.Open(p)
+ if err != nil {
+ return fmt.Errorf("opening tar: %w", err)
+ }
+ defer f.Close()
+
+ var src io.Reader = f
+ if gzipped {
+ gz, err := gzip.NewReader(f)
+ if err != nil {
+ return fmt.Errorf("opening gzip: %w", err)
+ }
+ defer gz.Close()
+ src = gz
+ }
+
+ tr := tar.NewReader(src)
+ for {
+ hdr, err := tr.Next()
+ if errors.Is(err, io.EOF) {
+ return nil
+ }
+ if err != nil {
+ return fmt.Errorf("reading tar: %w", err)
+ }
+ // Only regular files become materials; directories, symlinks, hardlinks
+ // and other special entries are skipped via the typeflag. Empty entries
+ // are skipped intentionally (an empty evidence file produces no material).
+ if hdr.Typeflag != tar.TypeReg || hdr.Size == 0 {
+ continue
+ }
+ if err := visit(hdr.Name, tr); err != nil {
+ return err
+ }
+ }
+}
+
+// explodableKinds is the allowlist of material kinds whose archive value is
+// expanded into one material per entry. Every other kind (ARTIFACT, EVIDENCE,
+// ZAP_DAST_ZIP, …) records the archive whole, so a customer can still provide a
+// regular zip as a single material. Extend this set as more kinds gain a
+// meaningful "bundle of the same kind" archive form.
+var explodableKinds = map[string]struct{}{
+ schemaapi.CraftingSchema_Material_SBOM_CYCLONEDX_JSON.String(): {},
+ schemaapi.CraftingSchema_Material_SBOM_SPDX_JSON.String(): {},
+ schemaapi.CraftingSchema_Material_SARIF.String(): {},
+}
+
+// IsExplodableKind reports whether an archive provided for kind should be
+// expanded into one material per entry (true) or recorded whole (false).
+func IsExplodableKind(kind string) bool {
+ _, ok := explodableKinds[kind]
+ return ok
+}
+
+// ArchiveEntryBaseName returns the final element of an archive entry name using
+// archive ("/") path semantics, independent of the host OS. Archive entry names
+// are "/"-separated by spec; backslashes are normalized first so names produced
+// on Windows resolve to the same basename everywhere (filepath.Base would treat
+// "\\" as a separator only on Windows, yielding OS-dependent results).
+func ArchiveEntryBaseName(name string) string {
+ return path.Base(strings.ReplaceAll(name, "\\", "/"))
+}
+
+// defaultMaterialName is the fallback base used when a name cannot be derived
+// (empty/symbol-only input or prefix).
+const defaultMaterialName = "material"
+
+// SanitizeMaterialName converts s into a valid DNS-1123 material-name component:
+// lowercase, with every run of characters outside [a-z0-9] collapsed to a single
+// "-" and leading/trailing "-" trimmed. It returns "" when nothing usable
+// remains; callers supply their own fallback.
+func SanitizeMaterialName(s string) string {
+ var b strings.Builder
+ pendingHyphen := false
+ for _, r := range strings.ToLower(s) {
+ if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
+ if pendingHyphen && b.Len() > 0 {
+ b.WriteByte('-')
+ }
+ b.WriteRune(r)
+ pendingHyphen = false
+ } else {
+ pendingHyphen = true
+ }
+ }
+ return b.String()
+}
+
+// NameAllocator hands out sequential, unique DNS-1123 material names of the
+// form "-" (n starting at 1). It is seeded with names already present
+// in the attestation so derived names never overwrite existing materials.
+type NameAllocator struct {
+ used map[string]struct{}
+ seq int
+}
+
+// NewNameAllocator seeds the allocator with existing material names.
+func NewNameAllocator(existing []string) *NameAllocator {
+ used := make(map[string]struct{}, len(existing))
+ for _, e := range existing {
+ used[e] = struct{}{}
+ }
+ return &NameAllocator{used: used}
+}
+
+// AllocateSequential returns the next unused "-" material name, where
+// n is a zero-based counter that advances across calls and skips names already
+// in use. prefix is sanitized to DNS-1123; an empty or symbol-only prefix yields
+// the base "material" (so entries are named material-0, material-1, …).
+func (a *NameAllocator) AllocateSequential(prefix string) string {
+ base := defaultMaterialName
+ if s := SanitizeMaterialName(prefix); s != "" {
+ base = s
+ }
+
+ for {
+ candidate := fmt.Sprintf("%s-%d", base, a.seq)
+ a.seq++
+ if _, taken := a.used[candidate]; !taken {
+ a.used[candidate] = struct{}{}
+ return candidate
+ }
+ }
+}
diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go
new file mode 100644
index 000000000..127cb3266
--- /dev/null
+++ b/pkg/attestation/crafter/materials/archive_test.go
@@ -0,0 +1,286 @@
+// Copyright 2026 The Chainloop Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package materials
+
+import (
+ "archive/tar"
+ "archive/zip"
+ "compress/gzip"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// writeZip creates a zip at dir/name containing the given files (name->content).
+func writeZip(t *testing.T, dir, name string, files map[string]string) string {
+ t.Helper()
+ p := filepath.Join(dir, name)
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ defer f.Close()
+ zw := zip.NewWriter(f)
+ for n, c := range files {
+ w, err := zw.Create(n)
+ require.NoError(t, err)
+ _, err = w.Write([]byte(c))
+ require.NoError(t, err)
+ }
+ require.NoError(t, zw.Close())
+ return p
+}
+
+// writeTarGz creates a .tar.gz at dir/name containing the given regular files.
+func writeTarGz(t *testing.T, dir, name string, files map[string]string) string {
+ t.Helper()
+ p := filepath.Join(dir, name)
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ defer f.Close()
+ gw := gzip.NewWriter(f)
+ tw := tar.NewWriter(gw)
+ for n, c := range files {
+ require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg}))
+ _, err = tw.Write([]byte(c))
+ require.NoError(t, err)
+ }
+ require.NoError(t, tw.Close())
+ require.NoError(t, gw.Close())
+ return p
+}
+
+// writeTar creates an uncompressed .tar at dir/name containing the given regular files.
+func writeTar(t *testing.T, dir, name string, files map[string]string) string {
+ t.Helper()
+ p := filepath.Join(dir, name)
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ defer f.Close()
+ tw := tar.NewWriter(f)
+ for n, c := range files {
+ require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg}))
+ _, err = tw.Write([]byte(c))
+ require.NoError(t, err)
+ }
+ require.NoError(t, tw.Close())
+ return p
+}
+
+func TestDetectArchive(t *testing.T) {
+ dir := t.TempDir()
+ zipPath := writeZip(t, dir, "a.zip", map[string]string{"x.txt": "hi"})
+ tgzPath := writeTarGz(t, dir, "a.tar.gz", map[string]string{"x.txt": "hi"})
+ tarPath := writeTar(t, dir, "a.tar", map[string]string{"x.txt": "hi"})
+ tgzShortPath := writeTarGz(t, dir, "a.tgz", map[string]string{"x.txt": "hi"})
+
+ plain := filepath.Join(dir, "app.bin")
+ require.NoError(t, os.WriteFile(plain, []byte("not an archive"), 0o600))
+
+ // A .zip renamed without extension — magic bytes must still detect it.
+ noExt := filepath.Join(dir, "noext")
+ require.NoError(t, os.WriteFile(noExt, mustRead(t, zipPath), 0o600))
+
+ tests := []struct {
+ name string
+ path string
+ want ArchiveFormat
+ }{
+ {"zip by extension", zipPath, ArchiveZip},
+ {"tar.gz by extension", tgzPath, ArchiveTarGz},
+ {"tar by extension", tarPath, ArchiveTar},
+ {"tgz by extension", tgzShortPath, ArchiveTarGz},
+ {"plain file", plain, ArchiveNone},
+ {"zip without extension via magic", noExt, ArchiveZip},
+ // Non-file values must detect as non-archive without erroring.
+ {"non-existent value", filepath.Join(dir, "nope"), ArchiveNone},
+ // A value whose first path segment is an existing regular file yields
+ // ENOTDIR on open (e.g. CONTAINER_IMAGE "registry/app:v1"); still a non-archive.
+ {"path segment is a file (ENOTDIR)", filepath.Join(plain, "app:v1"), ArchiveNone},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ got, err := DetectArchive(tc.path)
+ require.NoError(t, err)
+ assert.Equal(t, tc.want, got)
+ })
+ }
+}
+
+func mustRead(t *testing.T, p string) []byte {
+ t.Helper()
+ b, err := os.ReadFile(p)
+ require.NoError(t, err)
+ return b
+}
+
+func TestWalkArchiveEntries(t *testing.T) {
+ dir := t.TempDir()
+
+ t.Run("yields regular files, skips dirs", func(t *testing.T) {
+ // Build a zip with a directory entry + two files.
+ p := filepath.Join(dir, "files.zip")
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ zw := zip.NewWriter(f)
+ _, err = zw.Create("nested/") // directory entry
+ require.NoError(t, err)
+ for _, n := range []string{"a.json", "nested/b.json"} {
+ w, err := zw.Create(n)
+ require.NoError(t, err)
+ _, err = w.Write([]byte("{}"))
+ require.NoError(t, err)
+ }
+ require.NoError(t, zw.Close())
+ require.NoError(t, f.Close())
+
+ var got []string
+ err = WalkArchiveEntries(p, ArchiveZip, DefaultArchiveLimits(), func(name string, r io.Reader) error {
+ b, _ := io.ReadAll(r)
+ assert.Equal(t, "{}", string(b))
+ got = append(got, name)
+ return nil
+ })
+ require.NoError(t, err)
+ assert.ElementsMatch(t, []string{"a.json", "nested/b.json"}, got)
+ })
+
+ t.Run("max entries exceeded", func(t *testing.T) {
+ p := writeTarGz(t, dir, "many.tar.gz", map[string]string{"a": "1", "b": "2", "c": "3"})
+ err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 2, MaxTotalSize: 1 << 30}, func(string, io.Reader) error { return nil })
+ require.ErrorIs(t, err, ErrTooManyEntries)
+ })
+
+ t.Run("max total size exceeded while streaming", func(t *testing.T) {
+ p := writeTarGz(t, dir, "big.tar.gz", map[string]string{"a": strings.Repeat("x", 1000)})
+ err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 100, MaxTotalSize: 100}, func(_ string, r io.Reader) error {
+ _, err := io.ReadAll(r)
+ return err
+ })
+ require.ErrorIs(t, err, ErrArchiveTooLarge)
+ })
+
+ t.Run("rejects traversal via tar with .. entries", func(t *testing.T) {
+ // tar allows .. in header, so we can test via tar.
+ p := filepath.Join(dir, "evil.tar.gz")
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ gw := gzip.NewWriter(f)
+ tw := tar.NewWriter(gw)
+ require.NoError(t, tw.WriteHeader(&tar.Header{Name: "../escape.txt", Mode: 0o600, Size: 1, Typeflag: tar.TypeReg}))
+ _, err = tw.Write([]byte("x"))
+ require.NoError(t, err)
+ require.NoError(t, tw.Close())
+ require.NoError(t, gw.Close())
+ require.NoError(t, f.Close())
+
+ err = WalkArchiveEntries(p, ArchiveTarGz, DefaultArchiveLimits(), func(string, io.Reader) error { return nil })
+ require.Error(t, err, "entry ../escape.txt must be rejected")
+ })
+}
+
+func TestSafeArchivePath(t *testing.T) {
+ tests := []struct {
+ name string
+ path string
+ want bool
+ }{
+ {"absolute path", "/etc/passwd", false},
+ {"windows drive-letter backslash", "C:\\Windows\\system32", false},
+ {"windows drive-letter forward slash", "c:/windows/system32", false},
+ {"path traversal", "../escape.txt", false},
+ {"nested path traversal", "foo/../../../etc/passwd", false},
+ {"double dot in filename is ok", "foo..bar.json", true},
+ {"escape via nested double dot", "a/../../etc/passwd", false},
+ {"valid nested path", "a/b.txt", true},
+ {"valid simple path", "file.txt", true},
+ {"valid with subdirs", "nested/dir/file.txt", true},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ got := safeArchivePath(tc.path)
+ assert.Equal(t, tc.want, got)
+ })
+ }
+}
+
+func TestArchiveEntryBaseName(t *testing.T) {
+ tests := []struct{ name, in, want string }{
+ {"simple", "scan.json", "scan.json"},
+ {"forward-slash path", "nested/dir/scan.json", "scan.json"},
+ {"backslash path resolves the same on any OS", "nested\\dir\\scan.json", "scan.json"},
+ {"mixed separators", "a/b\\c.json", "c.json"},
+ {"no directory", "report.sarif", "report.sarif"},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ assert.Equal(t, tc.want, ArchiveEntryBaseName(tc.in))
+ })
+ }
+}
+
+func TestSanitizeMaterialName(t *testing.T) {
+ tests := []struct{ in, want string }{
+ {"scan.json", "scan-json"},
+ {"results.XML", "results-xml"},
+ {"weird__name!!", "weird-name"},
+ {"___", ""}, // nothing usable -> empty; callers supply their own fallback
+ {"", ""},
+ }
+ for _, tc := range tests {
+ assert.Equal(t, tc.want, SanitizeMaterialName(tc.in))
+ }
+}
+
+func TestNameAllocatorSequential(t *testing.T) {
+ t.Run("default prefix numbers from 0", func(t *testing.T) {
+ a := NewNameAllocator(nil)
+ assert.Equal(t, "material-0", a.AllocateSequential(""))
+ assert.Equal(t, "material-1", a.AllocateSequential(""))
+ assert.Equal(t, "material-2", a.AllocateSequential(""))
+ })
+
+ t.Run("custom prefix is sanitized and numbered", func(t *testing.T) {
+ a := NewNameAllocator(nil)
+ assert.Equal(t, "q3-scans-0", a.AllocateSequential("Q3 Scans"))
+ assert.Equal(t, "q3-scans-1", a.AllocateSequential("Q3 Scans"))
+ })
+
+ t.Run("skips names already present in the attestation", func(t *testing.T) {
+ a := NewNameAllocator([]string{"material-0", "material-1"})
+ assert.Equal(t, "material-2", a.AllocateSequential(""))
+ assert.Equal(t, "material-3", a.AllocateSequential(""))
+ })
+
+ t.Run("symbol-only prefix falls back to material", func(t *testing.T) {
+ a := NewNameAllocator(nil)
+ assert.Equal(t, "material-0", a.AllocateSequential("!!!"))
+ })
+}
+
+func TestIsExplodableKind(t *testing.T) {
+ // Explodable: SBOM and SARIF bundles.
+ assert.True(t, IsExplodableKind("SBOM_CYCLONEDX_JSON"))
+ assert.True(t, IsExplodableKind("SBOM_SPDX_JSON"))
+ assert.True(t, IsExplodableKind("SARIF"))
+ // Not explodable: recorded whole even when a zip/tar is provided.
+ assert.False(t, IsExplodableKind("ARTIFACT"))
+ assert.False(t, IsExplodableKind("EVIDENCE"))
+ assert.False(t, IsExplodableKind("ZAP_DAST_ZIP"))
+ assert.False(t, IsExplodableKind(""))
+}