diff --git a/app/cli/cmd/attestation_add.go b/app/cli/cmd/attestation_add.go index 37dd62353..911ec6651 100644 --- a/app/cli/cmd/attestation_add.go +++ b/app/cli/cmd/attestation_add.go @@ -20,6 +20,7 @@ import ( "fmt" "os" + "code.cloudfoundry.org/bytefmt" "github.com/jedib0t/go-pretty/v6/table" "github.com/muesli/reflow/wrap" "github.com/spf13/cobra" @@ -40,6 +41,8 @@ func newAttestationAddCmd() *cobra.Command { var annotationsFlag []string var noStrictValidation bool var policyInputFromFileFlag []string + var maxExtractEntries int + var maxExtractSize string // OCI registry credentials can be passed as flags or environment variables var registryServer, registryUsername, registryPassword string @@ -74,6 +77,11 @@ func newAttestationAddCmd() *cobra.Command { chainloop attestation add --name sigcheck --value sigcheckResult.csv --kind SYSINTERNALS_SIGCHECK \ --policy-input-from-file ignored_paths=exception.csv:Path`, RunE: func(cmd *cobra.Command, _ []string) error { + maxExtractSizeBytes, err := bytefmt.ToBytes(maxExtractSize) + if err != nil { + return fmt.Errorf("invalid --max-extract-size %q: %w", maxExtractSize, err) + } + a, err := action.NewAttestationAdd( &action.AttestationAddOpts{ ActionsOpts: ActionOpts, @@ -85,6 +93,8 @@ func newAttestationAddCmd() *cobra.Command { RegistryPassword: registryPassword, LocalStatePath: attestationLocalStatePath, NoStrictValidation: noStrictValidation, + MaxExtractEntries: maxExtractEntries, + MaxExtractSize: int64(maxExtractSizeBytes), }, ) if err != nil { @@ -122,22 +132,34 @@ func newAttestationAddCmd() *cobra.Command { return fmt.Errorf("loading resource: %w", err) } } - // TODO: take the material output and show render it resp, err := a.Run(cmd.Context(), attestationID, name, rawValuePath, kind, annotations, policyInputFiles) if err != nil { return err } - logger.Info().Msg("material added to attestation") + logger.Info().Int("materials", len(resp)).Msg("material(s) added to attestation") policies, err := a.GetPolicyEvaluations(cmd.Context(), attestationID) if err != nil { return err } - return output.EncodeOutput(flagOutputFormat, resp, func(s *action.AttestationStatusMaterial) error { - return displayMaterialInfo(s, policies[resp.Name]) - }) + // The explode path can return several materials. Render JSON as a + // single array so the output stays a parseable document; only the + // table renderer is emitted per material. + switch flagOutputFormat { + case output.FormatJSON: + return output.EncodeJSON(resp) + case output.FormatTable: + for _, m := range resp { + if err := displayMaterialInfo(m, policies[m.Name]); err != nil { + return err + } + } + return nil + default: + return output.ErrOutputFormatNotImplemented + } }, ) }, @@ -166,6 +188,10 @@ func newAttestationAddCmd() *cobra.Command { cmd.Flags().StringVar(®istryUsername, "registry-username", "", fmt.Sprintf("registry username, ($%s)", registryUsernameEnvVarName)) cmd.Flags().StringVar(®istryPassword, "registry-password", "", fmt.Sprintf("registry password, ($%s)", registryPasswordEnvVarName)) + // Archive extraction guards + cmd.Flags().IntVar(&maxExtractEntries, "max-extract-entries", 10000, "max number of files to extract when --value is an archive") + cmd.Flags().StringVar(&maxExtractSize, "max-extract-size", "1GiB", "max total uncompressed size to extract when --value is an archive") + if registryServer == "" { registryServer = os.Getenv(registryServerEnvVarName) } diff --git a/app/cli/documentation/cli-reference.mdx b/app/cli/documentation/cli-reference.mdx index b4bf901dd..3dfe7522e 100755 --- a/app/cli/documentation/cli-reference.mdx +++ b/app/cli/documentation/cli-reference.mdx @@ -258,6 +258,8 @@ Options --attestation-id string Unique identifier of the in-progress attestation -h, --help help for add --kind string kind of the material to be recorded: ["ARTIFACT" "ASYNCAPI_SPEC" "ATTESTATION" "BLACKDUCK_SCA_JSON" "CERTCC_DRANZER" "CHAINLOOP_AI_AGENT_CONFIG" "CHAINLOOP_AI_CODING_SESSION" "CHAINLOOP_PR_INFO" "CHAINLOOP_RUNNER_CONTEXT" "CONTAINER_IMAGE" "CSAF_INFORMATIONAL_ADVISORY" "CSAF_SECURITY_ADVISORY" "CSAF_SECURITY_INCIDENT_RESPONSE" "CSAF_VEX" "EVIDENCE" "GHAS_CODE_SCAN" "GHAS_DEPENDENCY_SCAN" "GHAS_SECRET_SCAN" "GITLAB_SECURITY_REPORT" "GITLEAKS_JSON" "GRAPHQL_SPEC" "HELM_CHART" "JACOCO_XML" "JUNIT_XML" "OPENAPI_SPEC" "OPENVEX" "OSSF_SCORECARD_JSON" "RADAMSA_CRASHES" "RADAMSA_REPORT" "SARIF" "SBOM_CYCLONEDX_JSON" "SBOM_SPDX_JSON" "SLSA_PROVENANCE" "STRING" "SYSINTERNALS_ACCESSCHK" "SYSINTERNALS_SIGCHECK" "TWISTCLI_SCAN_JSON" "YELP_DETECT_SECRETS_BASELINE" "ZAP_DAST_ZIP"] +--max-extract-entries int max number of files to extract when --value is an archive (default 10000) +--max-extract-size string max total uncompressed size to extract when --value is an archive (default "1GiB") --name string name of the material as shown in the contract --no-strict-validation skip strict schema validation for structured materials (SBOM_CYCLONEDX_JSON, OPENAPI_SPEC, ASYNCAPI_SPEC, OSSF_SCORECARD_JSON) --policy-input-from-file stringArray feed a policy input from a column of a CSV or JSON file, in the format =[:] (e.g. ignored_paths=exception.csv:Path); is a single top-level column/field name and defaults to the input name; repeatable. The file is also recorded as EVIDENCE. diff --git a/app/cli/pkg/action/attestation_add.go b/app/cli/pkg/action/attestation_add.go index 3778a9a94..939261aa0 100644 --- a/app/cli/pkg/action/attestation_add.go +++ b/app/cli/pkg/action/attestation_add.go @@ -41,6 +41,12 @@ type AttestationAddOpts struct { LocalStatePath string // NoStrictValidation skips strict schema validation NoStrictValidation bool + // MaxExtractEntries limits the number of entries extracted from an archive. + // Zero defaults to materials.DefaultArchiveLimits().MaxEntries. + MaxExtractEntries int + // MaxExtractSize limits the total uncompressed bytes extracted from an archive. + // Zero defaults to materials.DefaultArchiveLimits().MaxTotalSize. + MaxExtractSize int64 } type newCrafterOpts struct { @@ -55,6 +61,8 @@ type AttestationAdd struct { casCAPath string connectionInsecure bool localStatePath string + maxExtractEntries int + maxExtractSize int64 *newCrafterOpts } @@ -68,6 +76,16 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) { opts = append(opts, crafter.WithNoStrictValidation(cfg.NoStrictValidation)) } + defaults := materials.DefaultArchiveLimits() + maxEntries := cfg.MaxExtractEntries + if maxEntries == 0 { + maxEntries = defaults.MaxEntries + } + maxSize := cfg.MaxExtractSize + if maxSize == 0 { + maxSize = defaults.MaxTotalSize + } + return &AttestationAdd{ ActionsOpts: cfg.ActionsOpts, newCrafterOpts: &newCrafterOpts{cpConnection: cfg.CPConnection, opts: opts}, @@ -75,12 +93,14 @@ func NewAttestationAdd(cfg *AttestationAddOpts) (*AttestationAdd, error) { casCAPath: cfg.CASCAPath, connectionInsecure: cfg.ConnectionInsecure, localStatePath: cfg.LocalStatePath, + maxExtractEntries: maxEntries, + maxExtractSize: maxSize, }, nil } var ErrAttestationNotInitialized = errors.New("attestation not yet initialized") -func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) (*AttestationStatusMaterial, error) { +func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialName, materialValue, materialType string, annotations map[string]string, policyInputFiles []*PolicyInputFromFile) ([]*AttestationStatusMaterial, error) { // initialize the crafter. If attestation-id is provided we assume the attestation is performed using remote state crafter, err := newCrafter(&newCrafterStateOpts{enableRemoteState: (attestationID != ""), localStatePath: action.localStatePath}, action.CPConnection, action.opts...) if err != nil { @@ -132,6 +152,31 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa // 3. If materialType is not empty, add material contract free with materialType and materialName addOpts := runtimeInputAddOpts(runtimeInputs) + // Explode path: --kind set, value is a (non-archive-native) archive. + format, explode, err := shouldExplode(materialType, materialValue) + if err != nil { + return nil, fmt.Errorf("detecting archive: %w", err) + } + if explode { + if len(policyInputFiles) > 0 { + action.Logger.Warn().Msg("--policy-input-from-file is ignored when expanding an archive; evidence cross-links are not recorded for exploded materials") + } + limits := materials.ArchiveLimits{MaxEntries: action.maxExtractEntries, MaxTotalSize: action.maxExtractSize} + mts, err := crafter.AddMaterialsFromArchive(ctx, attestationID, materialType, materialName, materialValue, format, casBackend, annotations, limits, addOpts...) + if err != nil { + return nil, fmt.Errorf("adding materials from archive: %w", err) + } + results := make([]*AttestationStatusMaterial, 0, len(mts)) + for _, mt := range mts { + r, err := attMaterialToAction(mt) + if err != nil { + return nil, fmt.Errorf("converting material to action: %w", err) + } + results = append(results, r) + } + return results, nil + } + var mt *api.Attestation_Material switch { case materialName == "" && materialType == "": @@ -175,7 +220,21 @@ func (action *AttestationAdd) Run(ctx context.Context, attestationID, materialNa return nil, fmt.Errorf("converting material to action: %w", err) } - return materialResult, nil + return []*AttestationStatusMaterial{materialResult}, nil +} + +// shouldExplode decides whether an att-add should explode the value into many +// materials: only when --kind is set, the value is a supported archive, and the +// kind is not archive-native (e.g. ZAP_DAST_ZIP, which is recorded whole). +func shouldExplode(materialType, value string) (materials.ArchiveFormat, bool, error) { + if materialType == "" || materials.IsArchiveNativeKind(materialType) { + return materials.ArchiveNone, false, nil + } + format, err := materials.DetectArchive(value) + if err != nil { + return materials.ArchiveNone, false, err + } + return format, format != materials.ArchiveNone, nil } // runtimeInputAddOpts wraps the runtime inputs as crafter add options, or diff --git a/app/cli/pkg/action/attestation_add_routing_test.go b/app/cli/pkg/action/attestation_add_routing_test.go new file mode 100644 index 000000000..8104f9a7f --- /dev/null +++ b/app/cli/pkg/action/attestation_add_routing_test.go @@ -0,0 +1,80 @@ +// +// Copyright 2026 The Chainloop Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package action + +import ( + "archive/zip" + "os" + "path/filepath" + "testing" + + "github.com/chainloop-dev/chainloop/pkg/attestation/crafter/materials" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeTestZip creates a zip archive at dir/name containing a single file +// "entry.txt" and returns its path. +func writeTestZip(t *testing.T, dir, name string) string { + t.Helper() + path := filepath.Join(dir, name) + f, err := os.Create(path) + require.NoError(t, err) + defer f.Close() + + w := zip.NewWriter(f) + entry, err := w.Create("entry.txt") + require.NoError(t, err) + _, err = entry.Write([]byte("hello")) + require.NoError(t, err) + require.NoError(t, w.Close()) + return path +} + +func TestShouldExplode(t *testing.T) { + dir := t.TempDir() + zipPath := writeTestZip(t, dir, "s.zip") + + // non-archive: a plain temp file with an unrecognised extension + plainPath := filepath.Join(dir, "plain.bin") + require.NoError(t, os.WriteFile(plainPath, []byte("not an archive"), 0600)) + + tests := []struct { + name string + kind string + value string + wantExplode bool + }{ + {"kind + archive", "SBOM_CYCLONEDX_JSON", zipPath, true}, + {"archive-native kind", "ZAP_DAST_ZIP", zipPath, false}, + {"no kind", "", zipPath, false}, + {"kind + non-archive", "ARTIFACT", plainPath, false}, + // Non-file values must never return an error — STRING and CONTAINER_IMAGE + // carry values that are not file paths at all. + {"kind STRING non-file value", "STRING", "hello world", false}, + {"kind CONTAINER_IMAGE non-file value", "CONTAINER_IMAGE", "registry.example.com/app:v1", false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + format, explode, err := shouldExplode(tc.kind, tc.value) + require.NoError(t, err) + assert.Equal(t, tc.wantExplode, explode) + if explode { + assert.NotEqual(t, materials.ArchiveNone, format) + } + }) + } +} diff --git a/pkg/attestation/crafter/crafter.go b/pkg/attestation/crafter/crafter.go index 5a2db615b..09e643afb 100644 --- a/pkg/attestation/crafter/crafter.go +++ b/pkg/attestation/crafter/crafter.go @@ -19,9 +19,11 @@ import ( "context" "errors" "fmt" + "io" "maps" "net/url" "os" + "path/filepath" "slices" "strings" "time" @@ -680,8 +682,10 @@ func (c *Crafter) AddMaterialContactFreeWithAutoDetectedKind(ctx context.Context return nil, fmt.Errorf("failed to auto-discover material kind: %w", err) } -// addMaterials adds the incoming material m to the crafting state -func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { +// stageMaterial crafts a material into the in-memory crafting state WITHOUT +// persisting it. Callers must call stateManager.Write to commit. Splitting the +// write out lets the archive explode path craft many entries and commit once. +func (c *Crafter) stageMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { addOptions := &addOpts{} for _, opt := range opts { opt(addOptions) @@ -784,7 +788,16 @@ func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_M } c.CraftingState.Attestation.Materials[m.Name] = mt - // 6 - Persist state + return mt, nil +} + +// addMaterial crafts a single material and persists the crafting state. +func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_Material, attestationID, value string, casBackend *casclient.CASBackend, runtimeAnnotations map[string]string, opts ...AddOpt) (*api.Attestation_Material, error) { + mt, err := c.stageMaterial(ctx, m, value, casBackend, runtimeAnnotations, opts...) + if err != nil { + return nil, err + } + if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { return nil, fmt.Errorf("failed to persist crafting state: %w", err) } @@ -793,6 +806,124 @@ func (c *Crafter) addMaterial(ctx context.Context, m *schemaapi.CraftingSchema_M return mt, nil } +// AddMaterialsFromArchive expands an archive and stages every entry as an +// independent material, committing all of them atomically in a single +// stateManager.Write call. If any entry fails, no state is persisted and the +// in-memory materials map is rolled back. +// +// Parameters: +// - kind: the material type string for every entry (must be a valid +// CraftingSchema_Material_MaterialType name). +// - namePrefix: optional prefix prepended to each derived entry name. +// - archivePath: path to the archive on disk. +// - format: archive format (ArchiveZip / ArchiveTar / ArchiveTarGz). +// - limits: guard against zip-bomb expansion. +func (c *Crafter) AddMaterialsFromArchive( + ctx context.Context, + attestationID, kind, namePrefix, archivePath string, + format materials.ArchiveFormat, + casBackend *casclient.CASBackend, + runtimeAnnotations map[string]string, + limits materials.ArchiveLimits, + opts ...AddOpt, +) ([]*api.Attestation_Material, error) { + if err := c.requireStateLoaded(); err != nil { + return nil, fmt.Errorf("adding materials from archive: %w", err) + } + + // Validate kind up front so we fail fast before touching disk. + kindVal, found := schemaapi.CraftingSchema_Material_MaterialType_value[kind] + if !found { + return nil, fmt.Errorf("%q kind not found. Available options are %q", kind, schemaapi.ListAvailableMaterialKind()) + } + materialKind := schemaapi.CraftingSchema_Material_MaterialType(kindVal) + + // Seed the name allocator with existing material keys so we never collide. + existingKeys := make([]string, 0, len(c.CraftingState.Attestation.GetMaterials())) + for k := range c.CraftingState.Attestation.GetMaterials() { + existingKeys = append(existingKeys, k) + } + allocator := materials.NewNameAllocator(existingKeys) + + // Create a temporary directory for per-entry files; cleaned up on return. + tmpDir, err := os.MkdirTemp("", "chainloop-archive-*") + if err != nil { + return nil, fmt.Errorf("creating temp dir for archive expansion: %w", err) + } + defer os.RemoveAll(tmpDir) + + // Snapshot checkpoints for atomic rollback on any error path. + var stagedNames []string + var result []*api.Attestation_Material + policyEvalCheckpoint := len(c.CraftingState.Attestation.PolicyEvaluations) + + rollback := func() { + for _, n := range stagedNames { + delete(c.CraftingState.Attestation.Materials, n) + } + c.CraftingState.Attestation.PolicyEvaluations = c.CraftingState.Attestation.PolicyEvaluations[:policyEvalCheckpoint] + } + + walkErr := materials.WalkArchiveEntries(archivePath, format, limits, func(name string, r io.Reader) error { + base := filepath.Base(name) + matName := allocator.Allocate(namePrefix, base) + + // Use the allocated unique material name for the temp file so that two + // archive entries with the same basename (e.g. "a/x.json" and "b/x.json") + // never collide in the shared tmpDir. + tmpPath := filepath.Join(tmpDir, matName) + tmp, err := os.Create(tmpPath) + if err != nil { + return fmt.Errorf("creating temp file for entry %q: %w", name, err) + } + + if _, err := io.Copy(tmp, r); err != nil { + tmp.Close() + return fmt.Errorf("writing entry %q to temp file: %w", name, err) + } + tmp.Close() + + m := &schemaapi.CraftingSchema_Material{ + Optional: true, + Type: materialKind, + Name: matName, + } + + mt, err := c.stageMaterial(ctx, m, tmpPath, casBackend, runtimeAnnotations, opts...) + // Remove the temp file immediately after staging to keep disk usage bounded; + // the deferred os.RemoveAll(tmpDir) is the safety net. + os.Remove(tmpPath) //nolint:errcheck // best-effort cleanup + if err != nil { + return fmt.Errorf("staging entry %q as material %q: %w", name, matName, err) + } + + stagedNames = append(stagedNames, matName) + result = append(result, mt) + return nil + }) + + if walkErr != nil { + // Roll back any in-memory staging: remove material map entries and + // truncate policy evaluations back to the pre-call checkpoint. + rollback() + return nil, fmt.Errorf("expanding archive %q: %w", archivePath, walkErr) + } + + if len(result) == 0 { + return nil, fmt.Errorf("archive %q contains no processable entries", archivePath) + } + + // All entries staged successfully; persist once. + if err := c.stateManager.Write(ctx, attestationID, c.CraftingState); err != nil { + // Roll back in-memory state including policy evaluations. + rollback() + return nil, fmt.Errorf("failed to persist crafting state: %w", err) + } + + c.Logger.Debug().Int("count", len(result)).Str("archive", archivePath).Msg("added archive materials to state") + return result, nil +} + // projectContext returns the project name and version from the workflow // metadata so policy verifiers can pass them to the engine. Either may be // empty (e.g. dry-run before workflow metadata is populated); built-ins diff --git a/pkg/attestation/crafter/crafter_test.go b/pkg/attestation/crafter/crafter_test.go index 1a8feb196..bd633150c 100644 --- a/pkg/attestation/crafter/crafter_test.go +++ b/pkg/attestation/crafter/crafter_test.go @@ -16,6 +16,9 @@ package crafter_test import ( + "archive/tar" + "archive/zip" + "compress/gzip" "context" "fmt" "os" @@ -672,6 +675,282 @@ func (s *crafterSuite) TestAddMaterialsAutomatic() { } } +func (s *crafterSuite) TestAddMaterialsFromArchiveAtomic() { + // Build the fixture in-process so no binary blob is checked in. + zipFixture := filepath.Join(s.T().TempDir(), "two-files.zip") + buildZip(s.T(), zipFixture, map[string]string{"alpha.txt": "alpha", "beta.txt": "beta"}) + + s.Run("happy path: two files produce two materials", func() { + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), "testdata/contracts/empty_generic.yaml", &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + // Nil uploader causes inline storage — no network required. + backend := &casclient.CASBackend{} + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", + "ARTIFACT", + "entry", + zipFixture, + materials.ArchiveZip, + backend, + nil, + materials.DefaultArchiveLimits(), + ) + + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 2) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 2) + + // Both derived names must be present (sanitized base names with prefix). + _, hasAlpha := stateMap["entry-alpha-txt"] + _, hasBeta := stateMap["entry-beta-txt"] + assert.True(s.T(), hasAlpha, "expected material entry-alpha-txt in state") + assert.True(s.T(), hasBeta, "expected material entry-beta-txt in state") + }) + + s.Run("atomicity: over-tight limit leaves state empty", func() { + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), "testdata/contracts/empty_generic.yaml", &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + backend := &casclient.CASBackend{} + + // MaxEntries:1 causes ErrTooManyEntries after the second entry. + tightLimits := materials.ArchiveLimits{MaxEntries: 1, MaxTotalSize: 1 << 30} + + _, err = c.AddMaterialsFromArchive( + context.Background(), + "", + "ARTIFACT", + "entry", + zipFixture, + materials.ArchiveZip, + backend, + nil, + tightLimits, + ) + + require.Error(s.T(), err) + assert.ErrorIs(s.T(), err, materials.ErrTooManyEntries) + + // Atomicity: no materials must have been committed. + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Empty(s.T(), stateMap, "state must be empty after a failed archive expansion") + + // Atomicity: policy evaluations must also be rolled back. + assert.Empty(s.T(), c.CraftingState.GetAttestation().GetPolicyEvaluations(), "policy evaluations must be rolled back after a failed archive expansion") + }) +} + +// buildZip creates a zip archive at the given path containing the provided +// files (entry name → content). All entries are regular files. +func buildZip(t *testing.T, path string, files map[string]string) { + t.Helper() + f, err := os.Create(path) + require.NoError(t, err) + defer f.Close() + zw := zip.NewWriter(f) + for name, content := range files { + w, err := zw.Create(name) + require.NoError(t, err) + _, err = w.Write([]byte(content)) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) +} + +// buildTarGz creates a .tar.gz archive at path containing regular files, +// directory entries, and symlinks as described by the parameters. +func buildTarGz(t *testing.T, path string, regular map[string]string, dirs []string, symlinks map[string]string) { + t.Helper() + f, err := os.Create(path) + require.NoError(t, err) + defer f.Close() + gw := gzip.NewWriter(f) + tw := tar.NewWriter(gw) + + for name, content := range regular { + hdr := &tar.Header{ + Name: name, + Typeflag: tar.TypeReg, + Mode: 0o600, + Size: int64(len(content)), + } + require.NoError(t, tw.WriteHeader(hdr)) + _, err = tw.Write([]byte(content)) + require.NoError(t, err) + } + for _, name := range dirs { + hdr := &tar.Header{ + Name: name, + Typeflag: tar.TypeDir, + Mode: 0o700, + } + require.NoError(t, tw.WriteHeader(hdr)) + } + for name, target := range symlinks { + hdr := &tar.Header{ + Name: name, + Typeflag: tar.TypeSymlink, + Linkname: target, + } + require.NoError(t, tw.WriteHeader(hdr)) + } + + require.NoError(t, tw.Close()) + require.NoError(t, gw.Close()) +} + +func (s *crafterSuite) TestAddMaterialsFromArchiveBehavior() { + const contract = "testdata/contracts/empty_generic.yaml" + backend := &casclient.CASBackend{} + + s.Run("name collision: both names present with suffix", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "collide.zip") + buildZip(s.T(), p, map[string]string{ + "scan.json": `{"a":1}`, + "nested/scan.json": `{"b":2}`, + }) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "", p, + materials.ArchiveZip, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 2) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 2) + _, hasScanJSON := stateMap["scan-json"] + _, hasScanJSON1 := stateMap["scan-json-1"] + assert.True(s.T(), hasScanJSON, "expected material scan-json in state") + assert.True(s.T(), hasScanJSON1, "expected material scan-json-1 in state (collision suffix)") + }) + + s.Run("name prefix: prefix prepended to sanitized entry name", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "prefix.zip") + buildZip(s.T(), p, map[string]string{ + "a.json": `{"x":1}`, + }) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "sboms", p, + materials.ArchiveZip, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 1) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 1) + _, found := stateMap["sboms-a-json"] + assert.True(s.T(), found, "expected material sboms-a-json in state") + }) + + s.Run("skip dirs and symlinks in tar.gz: only regular file becomes material", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "mixed.tar.gz") + buildTarGz(s.T(), p, + map[string]string{"real.txt": "hello"}, + []string{"adir/"}, + map[string]string{"link.txt": "real.txt"}, + ) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "", p, + materials.ArchiveTarGz, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 1, "only the regular file must become a material") + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 1) + _, hasReal := stateMap["real-txt"] + assert.True(s.T(), hasReal, "expected material real-txt in state") + }) + + s.Run("traversal rejection: ../escape.txt entry causes error and empty state", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "evil.tar.gz") + buildTarGz(s.T(), p, + map[string]string{"../escape.txt": "evil"}, + nil, nil, + ) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + _, err = c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "", p, + materials.ArchiveTarGz, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.Error(s.T(), err, "path-traversal entry must cause an error") + assert.ErrorIs(s.T(), err, materials.ErrUnsafeEntry) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Empty(s.T(), stateMap, "state must be empty after traversal rejection (atomic rollback)") + }) + + s.Run("tar.gz happy path: two regular files produce two materials", func() { + dir := s.T().TempDir() + p := filepath.Join(dir, "two.tar.gz") + buildTarGz(s.T(), p, + map[string]string{ + "alpha.txt": "aaa", + "beta.txt": "bbb", + }, + nil, nil, + ) + + runner := runners.NewGeneric() + c, err := newInitializedCrafter(s.T(), contract, &v1.WorkflowMetadata{}, true, "", runner) + require.NoError(s.T(), err) + + mts, err := c.AddMaterialsFromArchive( + context.Background(), + "", "ARTIFACT", "", p, + materials.ArchiveTarGz, backend, nil, + materials.DefaultArchiveLimits(), + ) + require.NoError(s.T(), err) + assert.Len(s.T(), mts, 2) + + stateMap := c.CraftingState.GetAttestation().GetMaterials() + assert.Len(s.T(), stateMap, 2) + _, hasAlpha := stateMap["alpha-txt"] + _, hasBeta := stateMap["beta-txt"] + assert.True(s.T(), hasAlpha, "expected material alpha-txt in state") + assert.True(s.T(), hasBeta, "expected material beta-txt in state") + }) +} + func loadSchema(path string) (*schemaapi.CraftingSchema, error) { // Extract json formatted data content, err := os.ReadFile(filepath.Clean(path)) diff --git a/pkg/attestation/crafter/materials/archive.go b/pkg/attestation/crafter/materials/archive.go new file mode 100644 index 000000000..1eccef7fb --- /dev/null +++ b/pkg/attestation/crafter/materials/archive.go @@ -0,0 +1,321 @@ +// +// Copyright 2026 The Chainloop Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package materials + +import ( + "archive/tar" + "archive/zip" + "bytes" + "compress/gzip" + "errors" + "fmt" + "io" + "os" + "path" + "strings" + + schemaapi "github.com/chainloop-dev/chainloop/app/controlplane/api/workflowcontract/v1" +) + +// ArchiveFormat identifies a supported archive container. +type ArchiveFormat int + +const ( + ArchiveNone ArchiveFormat = iota + ArchiveZip + ArchiveTar + ArchiveTarGz +) + +// DetectArchive reports whether path is a supported archive and, if so, its +// format. Detection is by extension first; for files whose extension does not +// match, magic bytes are used as a backstop so renamed archives are still +// caught. A non-archive returns (ArchiveNone, nil). +func DetectArchive(path string) (ArchiveFormat, error) { + lower := strings.ToLower(path) + switch { + case strings.HasSuffix(lower, ".zip"): + return ArchiveZip, nil + case strings.HasSuffix(lower, ".tar.gz"), strings.HasSuffix(lower, ".tgz"): + return ArchiveTarGz, nil + case strings.HasSuffix(lower, ".tar"): + return ArchiveTar, nil + } + + return detectByMagic(path) +} + +func detectByMagic(path string) (ArchiveFormat, error) { + f, err := os.Open(path) + if err != nil { + // If the file doesn't exist, the value is not a file path at all (e.g. + // "hello world" for STRING or "registry/app:v1" for CONTAINER_IMAGE). + // Treat it as a non-archive rather than propagating the error so callers + // that pass non-file values are not surprised. + return ArchiveNone, nil + } + defer f.Close() + + // 512 bytes is enough for the gzip/zip magic and the tar "ustar" marker at + // offset 257. + header := make([]byte, 512) + n, _ := f.Read(header) + header = header[:n] + + switch { + case bytes.HasPrefix(header, []byte("PK\x03\x04")), bytes.HasPrefix(header, []byte("PK\x05\x06")): + return ArchiveZip, nil + case bytes.HasPrefix(header, []byte{0x1f, 0x8b}): + return ArchiveTarGz, nil + case len(header) >= 262 && bytes.Equal(header[257:262], []byte("ustar")): + return ArchiveTar, nil + } + + return ArchiveNone, nil +} + +var ( + // ErrTooManyEntries is returned when an archive has more qualifying entries + // than the configured maximum. + ErrTooManyEntries = errors.New("archive exceeds the maximum number of entries") + // ErrArchiveTooLarge is returned when the running uncompressed size of an + // archive exceeds the configured maximum. + ErrArchiveTooLarge = errors.New("archive exceeds the maximum uncompressed size") + // ErrUnsafeEntry is returned when an archive entry's path is absolute or escapes the extraction root. + ErrUnsafeEntry = errors.New("unsafe entry path in archive") +) + +// ArchiveLimits bounds archive expansion to guard against zip bombs. +type ArchiveLimits struct { + MaxEntries int + MaxTotalSize int64 +} + +// DefaultArchiveLimits returns the safe defaults: 10000 entries and 1 GiB +// total uncompressed size. +func DefaultArchiveLimits() ArchiveLimits { + return ArchiveLimits{MaxEntries: 10000, MaxTotalSize: 1 << 30} +} + +// capReader wraps a reader and fails once the shared running total exceeds max, +// so we never trust an archive's declared sizes. +type capReader struct { + r io.Reader + total *int64 + max int64 +} + +func (c *capReader) Read(p []byte) (int, error) { + n, err := c.r.Read(p) + *c.total += int64(n) + if *c.total > c.max { + return n, ErrArchiveTooLarge + } + return n, err +} + +// WalkArchiveEntries calls yield for every regular file in the archive, +// enforcing the limits and skipping directories, symlinks, hardlinks, empty +// entries, and path-traversal entries. +func WalkArchiveEntries(path string, format ArchiveFormat, limits ArchiveLimits, yield func(name string, r io.Reader) error) error { + var total int64 + count := 0 + visit := func(name string, r io.Reader) error { + if !safeArchivePath(name) { + return fmt.Errorf("%w: %q", ErrUnsafeEntry, name) + } + count++ + if count > limits.MaxEntries { + return ErrTooManyEntries + } + if err := yield(name, &capReader{r: r, total: &total, max: limits.MaxTotalSize}); err != nil { + return fmt.Errorf("processing entry %q: %w", name, err) + } + return nil + } + + switch format { + case ArchiveZip: + return walkZip(path, visit) + case ArchiveTar: + return walkTar(path, false, visit) + case ArchiveTarGz: + return walkTar(path, true, visit) + default: + return fmt.Errorf("unsupported archive format") + } +} + +// safeArchivePath rejects absolute paths and any path that escapes the +// extraction root via ".." path components. A filename that merely contains +// ".." as a substring (e.g. "foo..bar.json") is accepted; only actual path +// components equal to ".." are rejected. +func safeArchivePath(name string) bool { + normalized := strings.ReplaceAll(name, "\\", "/") + // Reject absolute paths. + if strings.HasPrefix(normalized, "/") { + return false + } + // Canonicalise against a virtual root and check that the result stays + // within it. path.Clean will resolve ".." components so a path like + // "a/../../etc/passwd" becomes "/etc/passwd" which does not start with + // the virtual prefix "/root/"; a safe path like "a/b.txt" becomes + // "/root/a/b.txt" which does. + const root = "/root" + clean := path.Clean(root + "/" + normalized) + return strings.HasPrefix(clean, root+"/") || clean == root +} + +func walkZip(p string, visit func(name string, r io.Reader) error) error { + zr, err := zip.OpenReader(p) + if err != nil { + return fmt.Errorf("opening zip: %w", err) + } + defer zr.Close() + + for _, f := range zr.File { + // Skip directories, symlinks, and empty entries: they carry no file + // content worth recording as a material. Empty-entry skipping is + // intentional per the explode design (an empty evidence file produces + // no material). Note: symlink detection relies on Unix mode bits stored + // in the zip; archives written without Unix metadata won't carry the + // symlink bit, so such a symlink would be treated as a regular file + // (its content being the stored target path). Tar symlinks are detected + // reliably via the typeflag below. + if f.FileInfo().IsDir() || f.Mode()&os.ModeSymlink != 0 || f.UncompressedSize64 == 0 { + continue + } + rc, err := f.Open() + if err != nil { + return fmt.Errorf("opening entry %q: %w", f.Name, err) + } + err = visit(f.Name, rc) + rc.Close() + if err != nil { + return err + } + } + return nil +} + +func walkTar(p string, gzipped bool, visit func(name string, r io.Reader) error) error { + f, err := os.Open(p) + if err != nil { + return fmt.Errorf("opening tar: %w", err) + } + defer f.Close() + + var src io.Reader = f + if gzipped { + gz, err := gzip.NewReader(f) + if err != nil { + return fmt.Errorf("opening gzip: %w", err) + } + defer gz.Close() + src = gz + } + + tr := tar.NewReader(src) + for { + hdr, err := tr.Next() + if errors.Is(err, io.EOF) { + return nil + } + if err != nil { + return fmt.Errorf("reading tar: %w", err) + } + // Only regular files become materials; directories, symlinks, hardlinks + // and other special entries are skipped via the typeflag. Empty entries + // are skipped intentionally (an empty evidence file produces no material). + if hdr.Typeflag != tar.TypeReg || hdr.Size == 0 { + continue + } + if err := visit(hdr.Name, tr); err != nil { + return err + } + } +} + +// archiveNativeKinds lists material kinds whose value is the archive itself. +// For these, --kind short-circuits the explode path and the archive is +// recorded whole. Extend this set as new "the archive is the material" kinds +// are added. +var archiveNativeKinds = map[string]struct{}{ + schemaapi.CraftingSchema_Material_ZAP_DAST_ZIP.String(): {}, +} + +// IsArchiveNativeKind reports whether kind treats the archive as a single +// material (recorded whole) rather than something to explode. +func IsArchiveNativeKind(kind string) bool { + _, ok := archiveNativeKinds[kind] + return ok +} + +// SanitizeMaterialName converts s into a valid DNS-1123 material name: +// lowercase, with every run of characters outside [a-z0-9] collapsed to a +// single "-" and leading/trailing "-" trimmed. Falls back to "material". +func SanitizeMaterialName(s string) string { + var b strings.Builder + pendingHyphen := false + for _, r := range strings.ToLower(s) { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') { + if pendingHyphen && b.Len() > 0 { + b.WriteByte('-') + } + b.WriteRune(r) + pendingHyphen = false + } else { + pendingHyphen = true + } + } + if b.Len() == 0 { + return "material" + } + return b.String() +} + +// NameAllocator hands out unique DNS-1123 material names, suffixing collisions +// with -1, -2, …. It is seeded with names already present in the attestation +// so derived names never overwrite existing materials. +type NameAllocator struct { + used map[string]struct{} +} + +// NewNameAllocator seeds the allocator with existing material names. +func NewNameAllocator(existing []string) *NameAllocator { + used := make(map[string]struct{}, len(existing)) + for _, e := range existing { + used[e] = struct{}{} + } + return &NameAllocator{used: used} +} + +// Allocate returns a unique name derived from base (and optional prefix). +func (a *NameAllocator) Allocate(prefix, base string) string { + name := SanitizeMaterialName(base) + if prefix != "" { + name = SanitizeMaterialName(prefix) + "-" + name + } + + candidate := name + for i := 1; ; i++ { + if _, taken := a.used[candidate]; !taken { + a.used[candidate] = struct{}{} + return candidate + } + candidate = fmt.Sprintf("%s-%d", name, i) + } +} diff --git a/pkg/attestation/crafter/materials/archive_test.go b/pkg/attestation/crafter/materials/archive_test.go new file mode 100644 index 000000000..234b1bae7 --- /dev/null +++ b/pkg/attestation/crafter/materials/archive_test.go @@ -0,0 +1,241 @@ +// Copyright 2026 The Chainloop Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package materials + +import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "io" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeZip creates a zip at dir/name containing the given files (name->content). +func writeZip(t *testing.T, dir, name string, files map[string]string) string { + t.Helper() + p := filepath.Join(dir, name) + f, err := os.Create(p) + require.NoError(t, err) + defer f.Close() + zw := zip.NewWriter(f) + for n, c := range files { + w, err := zw.Create(n) + require.NoError(t, err) + _, err = w.Write([]byte(c)) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) + return p +} + +// writeTarGz creates a .tar.gz at dir/name containing the given regular files. +func writeTarGz(t *testing.T, dir, name string, files map[string]string) string { + t.Helper() + p := filepath.Join(dir, name) + f, err := os.Create(p) + require.NoError(t, err) + defer f.Close() + gw := gzip.NewWriter(f) + tw := tar.NewWriter(gw) + for n, c := range files { + require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg})) + _, err = tw.Write([]byte(c)) + require.NoError(t, err) + } + require.NoError(t, tw.Close()) + require.NoError(t, gw.Close()) + return p +} + +// writeTar creates an uncompressed .tar at dir/name containing the given regular files. +func writeTar(t *testing.T, dir, name string, files map[string]string) string { + t.Helper() + p := filepath.Join(dir, name) + f, err := os.Create(p) + require.NoError(t, err) + defer f.Close() + tw := tar.NewWriter(f) + for n, c := range files { + require.NoError(t, tw.WriteHeader(&tar.Header{Name: n, Mode: 0o600, Size: int64(len(c)), Typeflag: tar.TypeReg})) + _, err = tw.Write([]byte(c)) + require.NoError(t, err) + } + require.NoError(t, tw.Close()) + return p +} + +func TestDetectArchive(t *testing.T) { + dir := t.TempDir() + zipPath := writeZip(t, dir, "a.zip", map[string]string{"x.txt": "hi"}) + tgzPath := writeTarGz(t, dir, "a.tar.gz", map[string]string{"x.txt": "hi"}) + tarPath := writeTar(t, dir, "a.tar", map[string]string{"x.txt": "hi"}) + tgzShortPath := writeTarGz(t, dir, "a.tgz", map[string]string{"x.txt": "hi"}) + + plain := filepath.Join(dir, "app.bin") + require.NoError(t, os.WriteFile(plain, []byte("not an archive"), 0o600)) + + // A .zip renamed without extension — magic bytes must still detect it. + noExt := filepath.Join(dir, "noext") + require.NoError(t, os.WriteFile(noExt, mustRead(t, zipPath), 0o600)) + + tests := []struct { + name string + path string + want ArchiveFormat + }{ + {"zip by extension", zipPath, ArchiveZip}, + {"tar.gz by extension", tgzPath, ArchiveTarGz}, + {"tar by extension", tarPath, ArchiveTar}, + {"tgz by extension", tgzShortPath, ArchiveTarGz}, + {"plain file", plain, ArchiveNone}, + {"zip without extension via magic", noExt, ArchiveZip}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := DetectArchive(tc.path) + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func mustRead(t *testing.T, p string) []byte { + t.Helper() + b, err := os.ReadFile(p) + require.NoError(t, err) + return b +} + +func TestWalkArchiveEntries(t *testing.T) { + dir := t.TempDir() + + t.Run("yields regular files, skips dirs", func(t *testing.T) { + // Build a zip with a directory entry + two files. + p := filepath.Join(dir, "files.zip") + f, err := os.Create(p) + require.NoError(t, err) + zw := zip.NewWriter(f) + _, err = zw.Create("nested/") // directory entry + require.NoError(t, err) + for _, n := range []string{"a.json", "nested/b.json"} { + w, err := zw.Create(n) + require.NoError(t, err) + _, err = w.Write([]byte("{}")) + require.NoError(t, err) + } + require.NoError(t, zw.Close()) + require.NoError(t, f.Close()) + + var got []string + err = WalkArchiveEntries(p, ArchiveZip, DefaultArchiveLimits(), func(name string, r io.Reader) error { + b, _ := io.ReadAll(r) + assert.Equal(t, "{}", string(b)) + got = append(got, name) + return nil + }) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"a.json", "nested/b.json"}, got) + }) + + t.Run("max entries exceeded", func(t *testing.T) { + p := writeTarGz(t, dir, "many.tar.gz", map[string]string{"a": "1", "b": "2", "c": "3"}) + err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 2, MaxTotalSize: 1 << 30}, func(string, io.Reader) error { return nil }) + require.ErrorIs(t, err, ErrTooManyEntries) + }) + + t.Run("max total size exceeded while streaming", func(t *testing.T) { + p := writeTarGz(t, dir, "big.tar.gz", map[string]string{"a": strings.Repeat("x", 1000)}) + err := WalkArchiveEntries(p, ArchiveTarGz, ArchiveLimits{MaxEntries: 100, MaxTotalSize: 100}, func(_ string, r io.Reader) error { + _, err := io.ReadAll(r) + return err + }) + require.ErrorIs(t, err, ErrArchiveTooLarge) + }) + + t.Run("rejects traversal via tar with .. entries", func(t *testing.T) { + // tar allows .. in header, so we can test via tar. + p := filepath.Join(dir, "evil.tar.gz") + f, err := os.Create(p) + require.NoError(t, err) + gw := gzip.NewWriter(f) + tw := tar.NewWriter(gw) + require.NoError(t, tw.WriteHeader(&tar.Header{Name: "../escape.txt", Mode: 0o600, Size: 1, Typeflag: tar.TypeReg})) + _, err = tw.Write([]byte("x")) + require.NoError(t, err) + require.NoError(t, tw.Close()) + require.NoError(t, gw.Close()) + require.NoError(t, f.Close()) + + err = WalkArchiveEntries(p, ArchiveTarGz, DefaultArchiveLimits(), func(string, io.Reader) error { return nil }) + require.Error(t, err, "entry ../escape.txt must be rejected") + }) +} + +func TestSafeArchivePath(t *testing.T) { + tests := []struct { + name string + path string + want bool + }{ + {"absolute path", "/etc/passwd", false}, + {"path traversal", "../escape.txt", false}, + {"nested path traversal", "foo/../../../etc/passwd", false}, + {"double dot in filename is ok", "foo..bar.json", true}, + {"escape via nested double dot", "a/../../etc/passwd", false}, + {"valid nested path", "a/b.txt", true}, + {"valid simple path", "file.txt", true}, + {"valid with subdirs", "nested/dir/file.txt", true}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := safeArchivePath(tc.path) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestSanitizeMaterialName(t *testing.T) { + tests := []struct{ in, want string }{ + {"scan.json", "scan-json"}, + {"results.XML", "results-xml"}, + {"weird__name!!", "weird-name"}, + {"___", "material"}, + } + for _, tc := range tests { + assert.Equal(t, tc.want, SanitizeMaterialName(tc.in)) + } +} + +func TestNameAllocator(t *testing.T) { + a := NewNameAllocator([]string{"existing"}) + + assert.Equal(t, "scan-json", a.Allocate("", "scan.json")) + assert.Equal(t, "scan-json-1", a.Allocate("", "scan.json")) // collision + assert.Equal(t, "results-xml", a.Allocate("", "results.xml")) + assert.Equal(t, "existing-1", a.Allocate("", "existing")) // collides with pre-existing + assert.Equal(t, "sboms-a-json", a.Allocate("sboms", "a.json")) // prefix +} + +func TestIsArchiveNativeKind(t *testing.T) { + assert.True(t, IsArchiveNativeKind("ZAP_DAST_ZIP")) + assert.False(t, IsArchiveNativeKind("SBOM_CYCLONEDX_JSON")) + assert.False(t, IsArchiveNativeKind("ARTIFACT")) +}