diff --git a/app/cli/cmd/attestation_add.go b/app/cli/cmd/attestation_add.go
index 37dd62353..911ec6651 100644
--- a/app/cli/cmd/attestation_add.go
+++ b/app/cli/cmd/attestation_add.go
@@ -20,6 +20,7 @@ import (
"fmt"
"os"
+ "code.cloudfoundry.org/bytefmt"
"github.com/jedib0t/go-pretty/v6/table"
"github.com/muesli/reflow/wrap"
"github.com/spf13/cobra"
@@ -40,6 +41,8 @@ func newAttestationAddCmd() *cobra.Command {
var annotationsFlag []string
var noStrictValidation bool
var policyInputFromFileFlag []string
+ var maxExtractEntries int
+ var maxExtractSize string
// OCI registry credentials can be passed as flags or environment variables
var registryServer, registryUsername, registryPassword string
@@ -74,6 +77,11 @@ func newAttestationAddCmd() *cobra.Command {
chainloop attestation add --name sigcheck --value sigcheckResult.csv --kind SYSINTERNALS_SIGCHECK \
--policy-input-from-file ignored_paths=exception.csv:Path`,
RunE: func(cmd *cobra.Command, _ []string) error {
+ maxExtractSizeBytes, err := bytefmt.ToBytes(maxExtractSize)
+ if err != nil {
+ return fmt.Errorf("invalid --max-extract-size %q: %w", maxExtractSize, err)
+ }
+
a, err := action.NewAttestationAdd(
&action.AttestationAddOpts{
ActionsOpts: ActionOpts,
@@ -85,6 +93,8 @@ func newAttestationAddCmd() *cobra.Command {
RegistryPassword: registryPassword,
LocalStatePath: attestationLocalStatePath,
NoStrictValidation: noStrictValidation,
+ MaxExtractEntries: maxExtractEntries,
+ MaxExtractSize: int64(maxExtractSizeBytes),
},
)
if err != nil {
@@ -122,22 +132,34 @@ func newAttestationAddCmd() *cobra.Command {
return fmt.Errorf("loading resource: %w", err)
}
}
- // TODO: take the material output and show render it
resp, err := a.Run(cmd.Context(), attestationID, name, rawValuePath, kind, annotations, policyInputFiles)
if err != nil {
return err
}
- logger.Info().Msg("material added to attestation")
+ logger.Info().Int("materials", len(resp)).Msg("material(s) added to attestation")
policies, err := a.GetPolicyEvaluations(cmd.Context(), attestationID)
if err != nil {
return err
}
- return output.EncodeOutput(flagOutputFormat, resp, func(s *action.AttestationStatusMaterial) error {
- return displayMaterialInfo(s, policies[resp.Name])
- })
+ // The explode path can return several materials. Render JSON as a
+ // single array so the output stays a parseable document; only the
+ // table renderer is emitted per material.
+ switch flagOutputFormat {
+ case output.FormatJSON:
+ return output.EncodeJSON(resp)
+ case output.FormatTable:
+ for _, m := range resp {
+ if err := displayMaterialInfo(m, policies[m.Name]); err != nil {
+ return err
+ }
+ }
+ return nil
+ default:
+ return output.ErrOutputFormatNotImplemented
+ }
},
)
},
@@ -166,6 +188,10 @@ func newAttestationAddCmd() *cobra.Command {
cmd.Flags().StringVar(®istryUsername, "registry-username", "", fmt.Sprintf("registry username, ($%s)", registryUsernameEnvVarName))
cmd.Flags().StringVar(®istryPassword, "registry-password", "", fmt.Sprintf("registry password, ($%s)", registryPasswordEnvVarName))
+ // Archive extraction guards
+ cmd.Flags().IntVar(&maxExtractEntries, "max-extract-entries", 10000, "max number of files to extract when --value is an archive")
+ cmd.Flags().StringVar(&maxExtractSize, "max-extract-size", "1GiB", "max total uncompressed size to extract when --value is an archive")
+
if registryServer == "" {
registryServer = os.Getenv(registryServerEnvVarName)
}
diff --git a/app/cli/documentation/cli-reference.mdx b/app/cli/documentation/cli-reference.mdx
index b4bf901dd..3dfe7522e 100755
--- a/app/cli/documentation/cli-reference.mdx
+++ b/app/cli/documentation/cli-reference.mdx
@@ -258,6 +258,8 @@ Options
--attestation-id string Unique identifier of the in-progress attestation
-h, --help help for add
--kind string kind of the material to be recorded: ["ARTIFACT" "ASYNCAPI_SPEC" "ATTESTATION" "BLACKDUCK_SCA_JSON" "CERTCC_DRANZER" "CHAINLOOP_AI_AGENT_CONFIG" "CHAINLOOP_AI_CODING_SESSION" "CHAINLOOP_PR_INFO" "CHAINLOOP_RUNNER_CONTEXT" "CONTAINER_IMAGE" "CSAF_INFORMATIONAL_ADVISORY" "CSAF_SECURITY_ADVISORY" "CSAF_SECURITY_INCIDENT_RESPONSE" "CSAF_VEX" "EVIDENCE" "GHAS_CODE_SCAN" "GHAS_DEPENDENCY_SCAN" "GHAS_SECRET_SCAN" "GITLAB_SECURITY_REPORT" "GITLEAKS_JSON" "GRAPHQL_SPEC" "HELM_CHART" "JACOCO_XML" "JUNIT_XML" "OPENAPI_SPEC" "OPENVEX" "OSSF_SCORECARD_JSON" "RADAMSA_CRASHES" "RADAMSA_REPORT" "SARIF" "SBOM_CYCLONEDX_JSON" "SBOM_SPDX_JSON" "SLSA_PROVENANCE" "STRING" "SYSINTERNALS_ACCESSCHK" "SYSINTERNALS_SIGCHECK" "TWISTCLI_SCAN_JSON" "YELP_DETECT_SECRETS_BASELINE" "ZAP_DAST_ZIP"]
+--max-extract-entries int max number of files to extract when --value is an archive (default 10000)
+--max-extract-size string max total uncompressed size to extract when --value is an archive (default "1GiB")
--name string name of the material as shown in the contract
--no-strict-validation skip strict schema validation for structured materials (SBOM_CYCLONEDX_JSON, OPENAPI_SPEC, ASYNCAPI_SPEC, OSSF_SCORECARD_JSON)
--policy-input-from-file stringArray feed a policy input from a column of a CSV or JSON file, in the format =[:] (e.g. ignored_paths=exception.csv:Path); is a single top-level column/field name and defaults to the input name; repeatable. The file is also recorded as EVIDENCE.
diff --git a/app/cli/pkg/action/attestation_add.go b/app/cli/pkg/action/attestation_add.go
index 3778a9a94..939261aa0 100644
--- a/app/cli/pkg/action/attestation_add.go
+++ b/app/cli/pkg/action/attestation_add.go
@@ -41,6 +41,12 @@ type AttestationAddOpts struct {
LocalStatePath string
// NoStrictValidation skips strict schema validation
NoStrictValidation bool
+ // MaxExtractEntries limits the number of entries extracted from an archive.
+ // Zero defaults to materials.DefaultArchiveLimits().MaxEntries.
+ MaxExtractEntries int
+ // MaxExtractSize limits the total uncompressed bytes extracted from an archive.
+ // Zero defaults to materials.DefaultArchiveLimits().MaxTotalSize.
+ MaxExtractSize int64
}
type newCrafterOpts struct {
@@ -55,6 +61,8 @@ type AttestationAdd struct {
casCAPath string
connectionInsecure bool
localStatePath string
+ maxExtractEntries int
+ maxExtractSize int64
*newCrafterOpts
}
@@ -68,6 +76,16 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) {
opts = append(opts, crafter.WithNoStrictValidation(cfg.NoStrictValidation))
}
+ defaults := materials.DefaultArchiveLimits()
+ maxEntries := cfg.MaxExtractEntries
+ if maxEntries == 0 {
+ maxEntries = defaults.MaxEntries
+ }
+ maxSize := cfg.MaxExtractSize
+ if maxSize == 0 {
+ maxSize = defaults.MaxTotalSize
+ }
+
return &AttestationAdd{
ActionsOpts: cfg.ActionsOpts,
newCrafterOpts: &newCrafterOpts{cpConnection: cfg.CPConnection, opts: opts},
@@ -75,12 +93,14 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) {
casCAPath: cfg.CASCAPath,
connectionInsecure: cfg.ConnectionInsecure,
localStatePath: cfg.LocalStatePath,
+ maxExtractEntries: maxEntries,
+ maxExtractSize: maxSize,
}, nil
}
var ErrAttestationNotInitialized = errors.New("attestation not yet initialized")
-func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) (*AttestationStatusMaterial, error) {
+func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) ([]*AttestationStatusMaterial, error) {
// initialize the crafter. If attestation-id is provided we assume the attestation is performed using remote state
crafter, err := newCrafter(&newCrafterStateOpts{enableRemoteState: (attestationID != ""), localStatePath: action.localStatePath}, action.CPConnection, action.opts...)
if err != nil {
@@ -132,6 +152,31 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa
// 3. If materialType is not empty, add material contract free with materialType and materialName
addOpts := runtimeInputAddOpts(runtimeInputs)
+ // Explode path: --kind set, value is a (non-archive-native) archive.
+ format, explode, err := shouldExplode(materialType, materialValue)
+ if err != nil {
+ return nil, fmt.Errorf("detecting archive: %w", err)
+ }
+ if explode {
+ if len(policyInputFiles) > 0 {
+ action.Logger.Warn().Msg("--policy-input-from-file is ignored when expanding an archive; evidence cross-links are not recorded for exploded materials")
+ }
+ limits := materials.ArchiveLimits{MaxEntries: action.maxExtractEntries, MaxTotalSize: action.maxExtractSize}
+ mts, err := crafter.AddMaterialsFromArchive(ctx, attestationID, materialType, materialName, materialValue, format, casBackend, annotations, limits, addOpts...)
+ if err != nil {
+ return nil, fmt.Errorf("adding materials from archive: %w", err)
+ }
+ results := make([]*AttestationStatusMaterial, 0, len(mts))
+ for _, mt := range mts {
+ r, err := attMaterialToAction(mt)
+ if err != nil {
+ return nil, fmt.Errorf("converting material to action: %w", err)
+ }
+ results = append(results, r)
+ }
+ return results, nil
+ }
+
var mt *api.Attestation_Material
switch {
case materialName == "" && materialType == "":
@@ -175,7 +220,21 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa
return nil, fmt.Errorf("converting material to action: %w", err)
}
- return materialResult, nil
+ return []*AttestationStatusMaterial{materialResult}, nil
+}
+
+// shouldExplode decides whether an att-add should explode the value into many
+// materials: only when --kind is set, the value is a supported archive, and the
+// kind is not archive-native (e.g. ZAP_DAST_ZIP, which is recorded whole).
+func shouldExplode(materialType, value string) (materials.ArchiveFormat, bool, error) {
+ if materialType == "" || materials.IsArchiveNativeKind(materialType) {
+ return materials.ArchiveNone, false, nil
+ }
+ format, err := materials.DetectArchive(value)
+ if err != nil {
+ return materials.ArchiveNone, false, err
+ }
+ return format, format != materials.ArchiveNone, nil
}
// runtimeInputAddOpts wraps the runtime inputs as crafter add options, or
diff --git a/app/cli/pkg/action/attestation_add_routing_test.go b/app/cli/pkg/action/attestation_add_routing_test.go
new file mode 100644
index 000000000..8104f9a7f
--- /dev/null
+++ b/app/cli/pkg/action/attestation_add_routing_test.go
@@ -0,0 +1,80 @@
+//
+// Copyright 2026 The Chainloop Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package action
+
+import (
+ "archive/zip"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// writeTestZip creates a zip archive at dir/name containing a single file
+// "entry.txt" and returns its path.
+func writeTestZip(t *testing.T, dir, name string) string {
+ t.Helper()
+ path := filepath.Join(dir, name)
+ f, err := os.Create(path)
+ require.NoError(t, err)
+ defer f.Close()
+
+ w := zip.NewWriter(f)
+ entry, err := w.Create("entry.txt")
+ require.NoError(t, err)
+ _, err = entry.Write([]byte("hello"))
+ require.NoError(t, err)
+ require.NoError(t, w.Close())
+ return path
+}
+
+func TestShouldExplode(t *testing.T) {
+ dir := t.TempDir()
+ zipPath := writeTestZip(t, dir, "s.zip")
+
+ // non-archive: a plain temp file with an unrecognised extension
+ plainPath := filepath.Join(dir, "plain.bin")
+ require.NoError(t, os.WriteFile(plainPath, []byte("not an archive"), 0600))
+
+ tests := []struct {
+ name string
+ kind string
+ value string
+ wantExplode bool
+ }{
+ {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, true},
+ {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false},
+ {"no kind", "", zipPath, false},
+ {"kind + non-archive", "ARTIFACT", plainPath, false},
+ // Non-file values must never return an error — STRING and CONTAINER_IMAGE
+ // carry values that are not file paths at all.
+ {"kind STRING non-file value", "STRING", "hello world", false},
+ {"kind CONTAINER_IMAGE non-file value", "CONTAINER_IMAGE", "registry.example.com/app:v1", false},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ format, explode, err := shouldExplode(tc.kind, tc.value)
+ require.NoError(t, err)
+ assert.Equal(t, tc.wantExplode, explode)
+ if explode {
+ assert.NotEqual(t, materials.ArchiveNone, format)
+ }
+ })
+ }
+}
diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go
index 5a2db615b..09e643afb 100644
--- a/pkg/attestation/crafter/crafter.go
+++ b/pkg/attestation/crafter/crafter.go
@@ -19,9 +19,11 @@ import (
"context"
"errors"
"fmt"
+ "io"
"maps"
"net/url"
"os"
+ "path/filepath"
"slices"
"strings"
"time"
@@ -680,8 +682,10 @@ func (c *Crafter) AddMaterialContactFreeWithAutoDetectedKind(ctx context.Context
return nil, fmt.Errorf("failed to auto-discover material kind: %w", err)
}
-// addMaterials adds the incoming material m to the crafting state
-func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) {
+// stageMaterial crafts a material into the in-memory crafting state WITHOUT
+// persisting it. Callers must call stateManager.Write to commit. Splitting the
+// write out lets the archive explode path craft many entries and commit once.
+func (c *Crafter) stageMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) {
addOptions := &addOpts{}
for _, opt := range opts {
opt(addOptions)
@@ -784,7 +788,16 @@ func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_M
}
c.CraftingState.Attestation.Materials[m.Name] = mt
- // 6 - Persist state
+ return mt, nil
+}
+
+// addMaterial crafts a single material and persists the crafting state.
+func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) {
+ mt, err := c.stageMaterial(ctx, m, value, casBackend, runtimeAnnotations, opts...)
+ if err != nil {
+ return nil, err
+ }
+
if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil {
return nil, fmt.Errorf("failed to persist crafting state: %w", err)
}
@@ -793,6 +806,124 @@ func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_M
return mt, nil
}
+// AddMaterialsFromArchive expands an archive and stages every entry as an
+// independent material, committing all of them atomically in a single
+// stateManager.Write call. If any entry fails, no state is persisted and the
+// in-memory materials map is rolled back.
+//
+// Parameters:
+// - kind: the material type string for every entry (must be a valid
+// CraftingSchema_Material_MaterialType name).
+// - namePrefix: optional prefix prepended to each derived entry name.
+// - archivePath: path to the archive on disk.
+// - format: archive format (ArchiveZip / ArchiveTar / ArchiveTarGz).
+// - limits: guard against zip-bomb expansion.
+func (c *Crafter) AddMaterialsFromArchive(
+ ctx context.Context,
+ attestationID, kind, namePrefix, archivePath string,
+ format materials.ArchiveFormat,
+ casBackend *casclient.CASBackend,
+ runtimeAnnotations map[string]string,
+ limits materials.ArchiveLimits,
+ opts ...AddOpt,
+) ([]*api.Attestation_Material, error) {
+ if err := c.requireStateLoaded(); err != nil {
+ return nil, fmt.Errorf("adding materials from archive: %w", err)
+ }
+
+ // Validate kind up front so we fail fast before touching disk.
+ kindVal, found := schemaapi.CraftingSchema_Material_MaterialType_value[kind]
+ if !found {
+ return nil, fmt.Errorf("%q kind not found. Available options are %q", kind, schemaapi.ListAvailableMaterialKind())
+ }
+ materialKind := schemaapi.CraftingSchema_Material_MaterialType(kindVal)
+
+ // Seed the name allocator with existing material keys so we never collide.
+ existingKeys := make([]string, 0, len(c.CraftingState.Attestation.GetMaterials()))
+ for k := range c.CraftingState.Attestation.GetMaterials() {
+ existingKeys = append(existingKeys, k)
+ }
+ allocator := materials.NewNameAllocator(existingKeys)
+
+ // Create a temporary directory for per-entry files; cleaned up on return.
+ tmpDir, err := os.MkdirTemp("", "chainloop-archive-*")
+ if err != nil {
+ return nil, fmt.Errorf("creating temp dir for archive expansion: %w", err)
+ }
+ defer os.RemoveAll(tmpDir)
+
+ // Snapshot checkpoints for atomic rollback on any error path.
+ var stagedNames []string
+ var result []*api.Attestation_Material
+ policyEvalCheckpoint := len(c.CraftingState.Attestation.PolicyEvaluations)
+
+ rollback := func() {
+ for _, n := range stagedNames {
+ delete(c.CraftingState.Attestation.Materials, n)
+ }
+ c.CraftingState.Attestation.PolicyEvaluations = c.CraftingState.Attestation.PolicyEvaluations[:policyEvalCheckpoint]
+ }
+
+ walkErr := materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error {
+ base := filepath.Base(name)
+ matName := allocator.Allocate(namePrefix, base)
+
+ // Use the allocated unique material name for the temp file so that two
+ // archive entries with the same basename (e.g. "a/x.json" and "b/x.json")
+ // never collide in the shared tmpDir.
+ tmpPath := filepath.Join(tmpDir, matName)
+ tmp, err := os.Create(tmpPath)
+ if err != nil {
+ return fmt.Errorf("creating temp file for entry %q: %w", name, err)
+ }
+
+ if _, err := io.Copy(tmp, r); err != nil {
+ tmp.Close()
+ return fmt.Errorf("writing entry %q to temp file: %w", name, err)
+ }
+ tmp.Close()
+
+ m := &schemaapi.CraftingSchema_Material{
+ Optional: true,
+ Type: materialKind,
+ Name: matName,
+ }
+
+ mt, err := c.stageMaterial(ctx, m, tmpPath, casBackend, runtimeAnnotations, opts...)
+ // Remove the temp file immediately after staging to keep disk usage bounded;
+ // the deferred os.RemoveAll(tmpDir) is the safety net.
+ os.Remove(tmpPath) //nolint:errcheck // best-effort cleanup
+ if err != nil {
+ return fmt.Errorf("staging entry %q as material %q: %w", name, matName, err)
+ }
+
+ stagedNames = append(stagedNames, matName)
+ result = append(result, mt)
+ return nil
+ })
+
+ if walkErr != nil {
+ // Roll back any in-memory staging: remove material map entries and
+ // truncate policy evaluations back to the pre-call checkpoint.
+ rollback()
+ return nil, fmt.Errorf("expanding archive %q: %w", archivePath, walkErr)
+ }
+
+ if len(result) == 0 {
+ return nil, fmt.Errorf("archive %q contains no processable entries", archivePath)
+ }
+
+ // All entries staged successfully; persist once.
+ if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil {
+ // Roll back in-memory state including policy evaluations.
+ rollback()
+ return nil, fmt.Errorf("failed to persist crafting state: %w", err)
+ }
+
+ c.Logger.Debug().Int("count", len(result)).Str("archive", archivePath).Msg("added archive materials to state")
+ return result, nil
+}
+
// projectContext returns the project name and version from the workflow
// metadata so policy verifiers can pass them to the engine. Either may be
// empty (e.g. dry-run before workflow metadata is populated); built-ins
diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go
index 1a8feb196..bd633150c 100644
--- a/pkg/attestation/crafter/crafter_test.go
+++ b/pkg/attestation/crafter/crafter_test.go
@@ -16,6 +16,9 @@
package crafter_test
import (
+ "archive/tar"
+ "archive/zip"
+ "compress/gzip"
"context"
"fmt"
"os"
@@ -672,6 +675,282 @@ func (s *crafterSuite) TestAddMaterialsAutomatic() {
}
}
+func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() {
+ // Build the fixture in-process so no binary blob is checked in.
+ zipFixture := filepath.Join(s.T().TempDir(), "two-files.zip")
+ buildZip(s.T(), zipFixture, map[string]string{"alpha.txt": "alpha", "beta.txt": "beta"})
+
+ s.Run("happy path: two files produce two materials", func() {
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), "testdata/contracts/empty_generic.yaml", &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ // Nil uploader causes inline storage — no network required.
+ backend := &casclient.CASBackend{}
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "",
+ "ARTIFACT",
+ "entry",
+ zipFixture,
+ materials.ArchiveZip,
+ backend,
+ nil,
+ materials.DefaultArchiveLimits(),
+ )
+
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 2)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 2)
+
+ // Both derived names must be present (sanitized base names with prefix).
+ _, hasAlpha := stateMap["entry-alpha-txt"]
+ _, hasBeta := stateMap["entry-beta-txt"]
+ assert.True(s.T(), hasAlpha, "expected material entry-alpha-txt in state")
+ assert.True(s.T(), hasBeta, "expected material entry-beta-txt in state")
+ })
+
+ s.Run("atomicity: over-tight limit leaves state empty", func() {
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), "testdata/contracts/empty_generic.yaml", &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ backend := &casclient.CASBackend{}
+
+ // MaxEntries:1 causes ErrTooManyEntries after the second entry.
+ tightLimits := materials.ArchiveLimits{MaxEntries: 1, MaxTotalSize: 1 << 30}
+
+ _, err = c.AddMaterialsFromArchive(
+ context.Background(),
+ "",
+ "ARTIFACT",
+ "entry",
+ zipFixture,
+ materials.ArchiveZip,
+ backend,
+ nil,
+ tightLimits,
+ )
+
+ require.Error(s.T(), err)
+ assert.ErrorIs(s.T(), err, materials.ErrTooManyEntries)
+
+ // Atomicity: no materials must have been committed.
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Empty(s.T(), stateMap, "state must be empty after a failed archive expansion")
+
+ // Atomicity: policy evaluations must also be rolled back.
+ assert.Empty(s.T(), c.CraftingState.GetAttestation().GetPolicyEvaluations(), "policy evaluations must be rolled back after a failed archive expansion")
+ })
+}
+
+// buildZip creates a zip archive at the given path containing the provided
+// files (entry name → content). All entries are regular files.
+func buildZip(t *testing.T, path string, files map[string]string) {
+ t.Helper()
+ f, err := os.Create(path)
+ require.NoError(t, err)
+ defer f.Close()
+ zw := zip.NewWriter(f)
+ for name, content := range files {
+ w, err := zw.Create(name)
+ require.NoError(t, err)
+ _, err = w.Write([]byte(content))
+ require.NoError(t, err)
+ }
+ require.NoError(t, zw.Close())
+}
+
+// buildTarGz creates a .tar.gz archive at path containing regular files,
+// directory entries, and symlinks as described by the parameters.
+func buildTarGz(t *testing.T, path string, regular map[string]string, dirs []string, symlinks map[string]string) {
+ t.Helper()
+ f, err := os.Create(path)
+ require.NoError(t, err)
+ defer f.Close()
+ gw := gzip.NewWriter(f)
+ tw := tar.NewWriter(gw)
+
+ for name, content := range regular {
+ hdr := &tar.Header{
+ Name: name,
+ Typeflag: tar.TypeReg,
+ Mode: 0o600,
+ Size: int64(len(content)),
+ }
+ require.NoError(t, tw.WriteHeader(hdr))
+ _, err = tw.Write([]byte(content))
+ require.NoError(t, err)
+ }
+ for _, name := range dirs {
+ hdr := &tar.Header{
+ Name: name,
+ Typeflag: tar.TypeDir,
+ Mode: 0o700,
+ }
+ require.NoError(t, tw.WriteHeader(hdr))
+ }
+ for name, target := range symlinks {
+ hdr := &tar.Header{
+ Name: name,
+ Typeflag: tar.TypeSymlink,
+ Linkname: target,
+ }
+ require.NoError(t, tw.WriteHeader(hdr))
+ }
+
+ require.NoError(t, tw.Close())
+ require.NoError(t, gw.Close())
+}
+
+func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() {
+ const contract = "testdata/contracts/empty_generic.yaml"
+ backend := &casclient.CASBackend{}
+
+ s.Run("name collision: both names present with suffix", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "collide.zip")
+ buildZip(s.T(), p, map[string]string{
+ "scan.json": `{"a":1}`,
+ "nested/scan.json": `{"b":2}`,
+ })
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "", p,
+ materials.ArchiveZip, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 2)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 2)
+ _, hasScanJSON := stateMap["scan-json"]
+ _, hasScanJSON1 := stateMap["scan-json-1"]
+ assert.True(s.T(), hasScanJSON, "expected material scan-json in state")
+ assert.True(s.T(), hasScanJSON1, "expected material scan-json-1 in state (collision suffix)")
+ })
+
+ s.Run("name prefix: prefix prepended to sanitized entry name", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "prefix.zip")
+ buildZip(s.T(), p, map[string]string{
+ "a.json": `{"x":1}`,
+ })
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "sboms", p,
+ materials.ArchiveZip, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 1)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 1)
+ _, found := stateMap["sboms-a-json"]
+ assert.True(s.T(), found, "expected material sboms-a-json in state")
+ })
+
+ s.Run("skip dirs and symlinks in tar.gz: only regular file becomes material", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "mixed.tar.gz")
+ buildTarGz(s.T(), p,
+ map[string]string{"real.txt": "hello"},
+ []string{"adir/"},
+ map[string]string{"link.txt": "real.txt"},
+ )
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "", p,
+ materials.ArchiveTarGz, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 1, "only the regular file must become a material")
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 1)
+ _, hasReal := stateMap["real-txt"]
+ assert.True(s.T(), hasReal, "expected material real-txt in state")
+ })
+
+ s.Run("traversal rejection: ../escape.txt entry causes error and empty state", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "evil.tar.gz")
+ buildTarGz(s.T(), p,
+ map[string]string{"../escape.txt": "evil"},
+ nil, nil,
+ )
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ _, err = c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "", p,
+ materials.ArchiveTarGz, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.Error(s.T(), err, "path-traversal entry must cause an error")
+ assert.ErrorIs(s.T(), err, materials.ErrUnsafeEntry)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Empty(s.T(), stateMap, "state must be empty after traversal rejection (atomic rollback)")
+ })
+
+ s.Run("tar.gz happy path: two regular files produce two materials", func() {
+ dir := s.T().TempDir()
+ p := filepath.Join(dir, "two.tar.gz")
+ buildTarGz(s.T(), p,
+ map[string]string{
+ "alpha.txt": "aaa",
+ "beta.txt": "bbb",
+ },
+ nil, nil,
+ )
+
+ runner := runners.NewGeneric()
+ c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner)
+ require.NoError(s.T(), err)
+
+ mts, err := c.AddMaterialsFromArchive(
+ context.Background(),
+ "", "ARTIFACT", "", p,
+ materials.ArchiveTarGz, backend, nil,
+ materials.DefaultArchiveLimits(),
+ )
+ require.NoError(s.T(), err)
+ assert.Len(s.T(), mts, 2)
+
+ stateMap := c.CraftingState.GetAttestation().GetMaterials()
+ assert.Len(s.T(), stateMap, 2)
+ _, hasAlpha := stateMap["alpha-txt"]
+ _, hasBeta := stateMap["beta-txt"]
+ assert.True(s.T(), hasAlpha, "expected material alpha-txt in state")
+ assert.True(s.T(), hasBeta, "expected material beta-txt in state")
+ })
+}
+
func loadSchema(path string) (*schemaapi.CraftingSchema, error) {
// Extract json formatted data
content, err := os.ReadFile(filepath.Clean(path))
diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go
new file mode 100644
index 000000000..1eccef7fb
--- /dev/null
+++ b/pkg/attestation/crafter/materials/archive.go
@@ -0,0 +1,321 @@
+//
+// Copyright 2026 The Chainloop Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package materials
+
+import (
+ "archive/tar"
+ "archive/zip"
+ "bytes"
+ "compress/gzip"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path"
+ "strings"
+
+ schemaapi "github.com/chainloop-dev/chainloop/app/controlplane/api/workflowcontract/v1"
+)
+
+// ArchiveFormat identifies a supported archive container.
+type ArchiveFormat int
+
+const (
+ ArchiveNone ArchiveFormat = iota
+ ArchiveZip
+ ArchiveTar
+ ArchiveTarGz
+)
+
+// DetectArchive reports whether path is a supported archive and, if so, its
+// format. Detection is by extension first; for files whose extension does not
+// match, magic bytes are used as a backstop so renamed archives are still
+// caught. A non-archive returns (ArchiveNone, nil).
+func DetectArchive(path string) (ArchiveFormat, error) {
+ lower := strings.ToLower(path)
+ switch {
+ case strings.HasSuffix(lower, ".zip"):
+ return ArchiveZip, nil
+ case strings.HasSuffix(lower, ".tar.gz"), strings.HasSuffix(lower, ".tgz"):
+ return ArchiveTarGz, nil
+ case strings.HasSuffix(lower, ".tar"):
+ return ArchiveTar, nil
+ }
+
+ return detectByMagic(path)
+}
+
+func detectByMagic(path string) (ArchiveFormat, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ // If the file doesn't exist, the value is not a file path at all (e.g.
+ // "hello world" for STRING or "registry/app:v1" for CONTAINER_IMAGE).
+ // Treat it as a non-archive rather than propagating the error so callers
+ // that pass non-file values are not surprised.
+ return ArchiveNone, nil
+ }
+ defer f.Close()
+
+ // 512 bytes is enough for the gzip/zip magic and the tar "ustar" marker at
+ // offset 257.
+ header := make([]byte, 512)
+ n, _ := f.Read(header)
+ header = header[:n]
+
+ switch {
+ case bytes.HasPrefix(header, []byte("PK\x03\x04")), bytes.HasPrefix(header, []byte("PK\x05\x06")):
+ return ArchiveZip, nil
+ case bytes.HasPrefix(header, []byte{0x1f, 0x8b}):
+ return ArchiveTarGz, nil
+ case len(header) >= 262 && bytes.Equal(header[257:262], []byte("ustar")):
+ return ArchiveTar, nil
+ }
+
+ return ArchiveNone, nil
+}
+
+var (
+ // ErrTooManyEntries is returned when an archive has more qualifying entries
+ // than the configured maximum.
+ ErrTooManyEntries = errors.New("archive exceeds the maximum number of entries")
+ // ErrArchiveTooLarge is returned when the running uncompressed size of an
+ // archive exceeds the configured maximum.
+ ErrArchiveTooLarge = errors.New("archive exceeds the maximum uncompressed size")
+ // ErrUnsafeEntry is returned when an archive entry's path is absolute or escapes the extraction root.
+ ErrUnsafeEntry = errors.New("unsafe entry path in archive")
+)
+
+// ArchiveLimits bounds archive expansion to guard against zip bombs.
+type ArchiveLimits struct {
+ MaxEntries int
+ MaxTotalSize int64
+}
+
+// DefaultArchiveLimits returns the safe defaults: 10000 entries and 1 GiB
+// total uncompressed size.
+func DefaultArchiveLimits() ArchiveLimits {
+ return ArchiveLimits{MaxEntries: 10000, MaxTotalSize: 1 << 30}
+}
+
+// capReader wraps a reader and fails once the shared running total exceeds max,
+// so we never trust an archive's declared sizes.
+type capReader struct {
+ r io.Reader
+ total *int64
+ max int64
+}
+
+func (c *capReader) Read(p []byte) (int, error) {
+ n, err := c.r.Read(p)
+ *c.total += int64(n)
+ if *c.total > c.max {
+ return n, ErrArchiveTooLarge
+ }
+ return n, err
+}
+
+// WalkArchiveEntries calls yield for every regular file in the archive,
+// enforcing the limits and skipping directories, symlinks, hardlinks, empty
+// entries, and path-traversal entries.
+func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error {
+ var total int64
+ count := 0
+ visit := func(name string, r io.Reader) error {
+ if !safeArchivePath(name) {
+ return fmt.Errorf("%w: %q", ErrUnsafeEntry, name)
+ }
+ count++
+ if count > limits.MaxEntries {
+ return ErrTooManyEntries
+ }
+ if err := yield(name, &capReader{r: r, total: &total, max: limits.MaxTotalSize}); err != nil {
+ return fmt.Errorf("processing entry %q: %w", name, err)
+ }
+ return nil
+ }
+
+ switch format {
+ case ArchiveZip:
+ return walkZip(path, visit)
+ case ArchiveTar:
+ return walkTar(path, false, visit)
+ case ArchiveTarGz:
+ return walkTar(path, true, visit)
+ default:
+ return fmt.Errorf("unsupported archive format")
+ }
+}
+
+// safeArchivePath rejects absolute paths and any path that escapes the
+// extraction root via ".." path components. A filename that merely contains
+// ".." as a substring (e.g. "foo..bar.json") is accepted; only actual path
+// components equal to ".." are rejected.
+func safeArchivePath(name string) bool {
+ normalized := strings.ReplaceAll(name, "\\", "/")
+ // Reject absolute paths.
+ if strings.HasPrefix(normalized, "/") {
+ return false
+ }
+ // Canonicalise against a virtual root and check that the result stays
+ // within it. path.Clean will resolve ".." components so a path like
+ // "a/../../etc/passwd" becomes "/etc/passwd" which does not start with
+ // the virtual prefix "/root/"; a safe path like "a/b.txt" becomes
+ // "/root/a/b.txt" which does.
+ const root = "/root"
+ clean := path.Clean(root + "/" + normalized)
+ return strings.HasPrefix(clean, root+"/") || clean == root
+}
+
+func walkZip(p string, visit func(name string, r io.Reader) error) error {
+ zr, err := zip.OpenReader(p)
+ if err != nil {
+ return fmt.Errorf("opening zip: %w", err)
+ }
+ defer zr.Close()
+
+ for _, f := range zr.File {
+ // Skip directories, symlinks, and empty entries: they carry no file
+ // content worth recording as a material. Empty-entry skipping is
+ // intentional per the explode design (an empty evidence file produces
+ // no material). Note: symlink detection relies on Unix mode bits stored
+ // in the zip; archives written without Unix metadata won't carry the
+ // symlink bit, so such a symlink would be treated as a regular file
+ // (its content being the stored target path). Tar symlinks are detected
+ // reliably via the typeflag below.
+ if f.FileInfo().IsDir() || f.Mode()&os.ModeSymlink != 0 || f.UncompressedSize64 == 0 {
+ continue
+ }
+ rc, err := f.Open()
+ if err != nil {
+ return fmt.Errorf("opening entry %q: %w", f.Name, err)
+ }
+ err = visit(f.Name, rc)
+ rc.Close()
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func walkTar(p string, gzipped bool, visit func(name string, r io.Reader) error) error {
+ f, err := os.Open(p)
+ if err != nil {
+ return fmt.Errorf("opening tar: %w", err)
+ }
+ defer f.Close()
+
+ var src io.Reader = f
+ if gzipped {
+ gz, err := gzip.NewReader(f)
+ if err != nil {
+ return fmt.Errorf("opening gzip: %w", err)
+ }
+ defer gz.Close()
+ src = gz
+ }
+
+ tr := tar.NewReader(src)
+ for {
+ hdr, err := tr.Next()
+ if errors.Is(err, io.EOF) {
+ return nil
+ }
+ if err != nil {
+ return fmt.Errorf("reading tar: %w", err)
+ }
+ // Only regular files become materials; directories, symlinks, hardlinks
+ // and other special entries are skipped via the typeflag. Empty entries
+ // are skipped intentionally (an empty evidence file produces no material).
+ if hdr.Typeflag != tar.TypeReg || hdr.Size == 0 {
+ continue
+ }
+ if err := visit(hdr.Name, tr); err != nil {
+ return err
+ }
+ }
+}
+
+// archiveNativeKinds lists material kinds whose value is the archive itself.
+// For these, --kind short-circuits the explode path and the archive is
+// recorded whole. Extend this set as new "the archive is the material" kinds
+// are added.
+var archiveNativeKinds = map[string]struct{}{
+ schemaapi.CraftingSchema_Material_ZAP_DAST_ZIP.String(): {},
+}
+
+// IsArchiveNativeKind reports whether kind treats the archive as a single
+// material (recorded whole) rather than something to explode.
+func IsArchiveNativeKind(kind string) bool {
+ _, ok := archiveNativeKinds[kind]
+ return ok
+}
+
+// SanitizeMaterialName converts s into a valid DNS-1123 material name:
+// lowercase, with every run of characters outside [a-z0-9] collapsed to a
+// single "-" and leading/trailing "-" trimmed. Falls back to "material".
+func SanitizeMaterialName(s string) string {
+ var b strings.Builder
+ pendingHyphen := false
+ for _, r := range strings.ToLower(s) {
+ if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
+ if pendingHyphen && b.Len() > 0 {
+ b.WriteByte('-')
+ }
+ b.WriteRune(r)
+ pendingHyphen = false
+ } else {
+ pendingHyphen = true
+ }
+ }
+ if b.Len() == 0 {
+ return "material"
+ }
+ return b.String()
+}
+
+// NameAllocator hands out unique DNS-1123 material names, suffixing collisions
+// with -1, -2, …. It is seeded with names already present in the attestation
+// so derived names never overwrite existing materials.
+type NameAllocator struct {
+ used map[string]struct{}
+}
+
+// NewNameAllocator seeds the allocator with existing material names.
+func NewNameAllocator(existing []string) *NameAllocator {
+ used := make(map[string]struct{}, len(existing))
+ for _, e := range existing {
+ used[e] = struct{}{}
+ }
+ return &NameAllocator{used: used}
+}
+
+// Allocate returns a unique name derived from base (and optional prefix).
+func (a *NameAllocator) Allocate(prefix, base string) string {
+ name := SanitizeMaterialName(base)
+ if prefix != "" {
+ name = SanitizeMaterialName(prefix) + "-" + name
+ }
+
+ candidate := name
+ for i := 1; ; i++ {
+ if _, taken := a.used[candidate]; !taken {
+ a.used[candidate] = struct{}{}
+ return candidate
+ }
+ candidate = fmt.Sprintf("%s-%d", name, i)
+ }
+}
diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go
new file mode 100644
index 000000000..234b1bae7
--- /dev/null
+++ b/pkg/attestation/crafter/materials/archive_test.go
@@ -0,0 +1,241 @@
+// Copyright 2026 The Chainloop Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package materials
+
+import (
+ "archive/tar"
+ "archive/zip"
+ "compress/gzip"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// writeZip creates a zip at dir/name containing the given files (name->content).
+func writeZip(t *testing.T, dir, name string, files map[string]string) string {
+ t.Helper()
+ p := filepath.Join(dir, name)
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ defer f.Close()
+ zw := zip.NewWriter(f)
+ for n, c := range files {
+ w, err := zw.Create(n)
+ require.NoError(t, err)
+ _, err = w.Write([]byte(c))
+ require.NoError(t, err)
+ }
+ require.NoError(t, zw.Close())
+ return p
+}
+
+// writeTarGz creates a .tar.gz at dir/name containing the given regular files.
+func writeTarGz(t *testing.T, dir, name string, files map[string]string) string {
+ t.Helper()
+ p := filepath.Join(dir, name)
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ defer f.Close()
+ gw := gzip.NewWriter(f)
+ tw := tar.NewWriter(gw)
+ for n, c := range files {
+ require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg}))
+ _, err = tw.Write([]byte(c))
+ require.NoError(t, err)
+ }
+ require.NoError(t, tw.Close())
+ require.NoError(t, gw.Close())
+ return p
+}
+
+// writeTar creates an uncompressed .tar at dir/name containing the given regular files.
+func writeTar(t *testing.T, dir, name string, files map[string]string) string {
+ t.Helper()
+ p := filepath.Join(dir, name)
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ defer f.Close()
+ tw := tar.NewWriter(f)
+ for n, c := range files {
+ require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg}))
+ _, err = tw.Write([]byte(c))
+ require.NoError(t, err)
+ }
+ require.NoError(t, tw.Close())
+ return p
+}
+
+func TestDetectArchive(t *testing.T) {
+ dir := t.TempDir()
+ zipPath := writeZip(t, dir, "a.zip", map[string]string{"x.txt": "hi"})
+ tgzPath := writeTarGz(t, dir, "a.tar.gz", map[string]string{"x.txt": "hi"})
+ tarPath := writeTar(t, dir, "a.tar", map[string]string{"x.txt": "hi"})
+ tgzShortPath := writeTarGz(t, dir, "a.tgz", map[string]string{"x.txt": "hi"})
+
+ plain := filepath.Join(dir, "app.bin")
+ require.NoError(t, os.WriteFile(plain, []byte("not an archive"), 0o600))
+
+ // A .zip renamed without extension — magic bytes must still detect it.
+ noExt := filepath.Join(dir, "noext")
+ require.NoError(t, os.WriteFile(noExt, mustRead(t, zipPath), 0o600))
+
+ tests := []struct {
+ name string
+ path string
+ want ArchiveFormat
+ }{
+ {"zip by extension", zipPath, ArchiveZip},
+ {"tar.gz by extension", tgzPath, ArchiveTarGz},
+ {"tar by extension", tarPath, ArchiveTar},
+ {"tgz by extension", tgzShortPath, ArchiveTarGz},
+ {"plain file", plain, ArchiveNone},
+ {"zip without extension via magic", noExt, ArchiveZip},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ got, err := DetectArchive(tc.path)
+ require.NoError(t, err)
+ assert.Equal(t, tc.want, got)
+ })
+ }
+}
+
+func mustRead(t *testing.T, p string) []byte {
+ t.Helper()
+ b, err := os.ReadFile(p)
+ require.NoError(t, err)
+ return b
+}
+
+func TestWalkArchiveEntries(t *testing.T) {
+ dir := t.TempDir()
+
+ t.Run("yields regular files, skips dirs", func(t *testing.T) {
+ // Build a zip with a directory entry + two files.
+ p := filepath.Join(dir, "files.zip")
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ zw := zip.NewWriter(f)
+ _, err = zw.Create("nested/") // directory entry
+ require.NoError(t, err)
+ for _, n := range []string{"a.json", "nested/b.json"} {
+ w, err := zw.Create(n)
+ require.NoError(t, err)
+ _, err = w.Write([]byte("{}"))
+ require.NoError(t, err)
+ }
+ require.NoError(t, zw.Close())
+ require.NoError(t, f.Close())
+
+ var got []string
+ err = WalkArchiveEntries(p, ArchiveZip, DefaultArchiveLimits(), func(name string, r io.Reader) error {
+ b, _ := io.ReadAll(r)
+ assert.Equal(t, "{}", string(b))
+ got = append(got, name)
+ return nil
+ })
+ require.NoError(t, err)
+ assert.ElementsMatch(t, []string{"a.json", "nested/b.json"}, got)
+ })
+
+ t.Run("max entries exceeded", func(t *testing.T) {
+ p := writeTarGz(t, dir, "many.tar.gz", map[string]string{"a": "1", "b": "2", "c": "3"})
+ err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 2, MaxTotalSize: 1 << 30}, func(string, io.Reader) error { return nil })
+ require.ErrorIs(t, err, ErrTooManyEntries)
+ })
+
+ t.Run("max total size exceeded while streaming", func(t *testing.T) {
+ p := writeTarGz(t, dir, "big.tar.gz", map[string]string{"a": strings.Repeat("x", 1000)})
+ err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 100, MaxTotalSize: 100}, func(_ string, r io.Reader) error {
+ _, err := io.ReadAll(r)
+ return err
+ })
+ require.ErrorIs(t, err, ErrArchiveTooLarge)
+ })
+
+ t.Run("rejects traversal via tar with .. entries", func(t *testing.T) {
+ // tar allows .. in header, so we can test via tar.
+ p := filepath.Join(dir, "evil.tar.gz")
+ f, err := os.Create(p)
+ require.NoError(t, err)
+ gw := gzip.NewWriter(f)
+ tw := tar.NewWriter(gw)
+ require.NoError(t, tw.WriteHeader(&tar.Header{Name: "../escape.txt", Mode: 0o600, Size: 1, Typeflag: tar.TypeReg}))
+ _, err = tw.Write([]byte("x"))
+ require.NoError(t, err)
+ require.NoError(t, tw.Close())
+ require.NoError(t, gw.Close())
+ require.NoError(t, f.Close())
+
+ err = WalkArchiveEntries(p, ArchiveTarGz, DefaultArchiveLimits(), func(string, io.Reader) error { return nil })
+ require.Error(t, err, "entry ../escape.txt must be rejected")
+ })
+}
+
+func TestSafeArchivePath(t *testing.T) {
+ tests := []struct {
+ name string
+ path string
+ want bool
+ }{
+ {"absolute path", "/etc/passwd", false},
+ {"path traversal", "../escape.txt", false},
+ {"nested path traversal", "foo/../../../etc/passwd", false},
+ {"double dot in filename is ok", "foo..bar.json", true},
+ {"escape via nested double dot", "a/../../etc/passwd", false},
+ {"valid nested path", "a/b.txt", true},
+ {"valid simple path", "file.txt", true},
+ {"valid with subdirs", "nested/dir/file.txt", true},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ got := safeArchivePath(tc.path)
+ assert.Equal(t, tc.want, got)
+ })
+ }
+}
+
+func TestSanitizeMaterialName(t *testing.T) {
+ tests := []struct{ in, want string }{
+ {"scan.json", "scan-json"},
+ {"results.XML", "results-xml"},
+ {"weird__name!!", "weird-name"},
+ {"___", "material"},
+ }
+ for _, tc := range tests {
+ assert.Equal(t, tc.want, SanitizeMaterialName(tc.in))
+ }
+}
+
+func TestNameAllocator(t *testing.T) {
+ a := NewNameAllocator([]string{"existing"})
+
+ assert.Equal(t, "scan-json", a.Allocate("", "scan.json"))
+ assert.Equal(t, "scan-json-1", a.Allocate("", "scan.json")) // collision
+ assert.Equal(t, "results-xml", a.Allocate("", "results.xml"))
+ assert.Equal(t, "existing-1", a.Allocate("", "existing")) // collides with pre-existing
+ assert.Equal(t, "sboms-a-json", a.Allocate("sboms", "a.json")) // prefix
+}
+
+func TestIsArchiveNativeKind(t *testing.T) {
+ assert.True(t, IsArchiveNativeKind("ZAP_DAST_ZIP"))
+ assert.False(t, IsArchiveNativeKind("SBOM_CYCLONEDX_JSON"))
+ assert.False(t, IsArchiveNativeKind("ARTIFACT"))
+}