Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/BurntSushi/toml v1.6.0
github.com/anchore/syft v1.44.0
github.com/spf13/cobra v1.10.2
golang.org/x/sync v0.20.0
)

require (
Expand Down Expand Up @@ -74,7 +75,6 @@ require (
github.com/wagoodman/go-progress v0.0.0-20260303201901-10176f79b2c0 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.43.0 // indirect
golang.org/x/term v0.42.0 // indirect
golang.org/x/text v0.36.0 // indirect
Expand Down
52 changes: 43 additions & 9 deletions internal/catalog/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,25 @@ package catalog

import (
"fmt"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"

"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"golang.org/x/sync/errgroup"
)

func catalogConcurrency() int {
if n, err := strconv.Atoi(strings.TrimSpace(os.Getenv("OSSPREY_SCAN_CONCURRENCY"))); err == nil && n > 0 {
return n
}
return 8

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Default should be runtime.NumCPU

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're saying the expensive part is network and CPU just spends a bunch of time waiting

}
Comment thread
Copilot marked this conversation as resolved.

// fileParser converts one matched manifest into syft packages.
type fileParser func(absPath string, loc file.Location) ([]pkg.Package, error)

Expand All @@ -25,18 +37,40 @@ func catalogByGlob(resolver file.Resolver, root, glob, label string, parse fileP
if err != nil {
return nil, fmt.Errorf("%s cataloger: glob: %w", label, err)
}
seen := make(map[string]struct{})
var out []pkg.Package
for _, loc := range locs {
// Skip vendored dependencies.

type result struct {
idx int
pkgs []pkg.Package
}
var (
mu sync.Mutex
results []result
)
g := new(errgroup.Group)
g.SetLimit(catalogConcurrency())
for i, loc := range locs {
if isVendoredPath(loc.RealPath) {
continue
}
pkgs, err := parse(filepath.Join(root, loc.RealPath), loc)
if err != nil || len(pkgs) == 0 {
continue
}
for _, p := range pkgs {
i, loc := i, loc
g.Go(func() error {
pkgs, err := parse(filepath.Join(root, loc.RealPath), loc)
if err != nil || len(pkgs) == 0 {
return nil // per-file errors are non-fatal, as before
}
mu.Lock()
results = append(results, result{idx: i, pkgs: pkgs})
mu.Unlock()
return nil
})
}
_ = g.Wait() // workers never return non-nil; Wait is just the barrier

Comment thread
ossprey-valentino marked this conversation as resolved.
sort.Slice(results, func(a, b int) bool { return results[a].idx < results[b].idx })
seen := make(map[string]struct{})
var out []pkg.Package
for _, r := range results {
for _, p := range r.pkgs {
key := p.Name + "@" + p.Version
if _, ok := seen[key]; ok {
continue
Expand Down
106 changes: 106 additions & 0 deletions internal/catalog/concurrency_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package catalog

import (
"fmt"
"path/filepath"
"strconv"
"testing"
"time"

"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/source/directorysource"
)

func TestCatalogConcurrency(t *testing.T) {
t.Setenv("OSSPREY_SCAN_CONCURRENCY", "")
if got := catalogConcurrency(); got != 8 {
t.Errorf("default = %d, want 8", got)
}
t.Setenv("OSSPREY_SCAN_CONCURRENCY", "3")
if got := catalogConcurrency(); got != 3 {
t.Errorf("override = %d, want 3", got)
}
for _, bad := range []string{"0", "-1", "abc"} {
t.Setenv("OSSPREY_SCAN_CONCURRENCY", bad)
if got := catalogConcurrency(); got != 8 {
t.Errorf("invalid(%q) = %d, want default 8", bad, got)
}
}
}

func buildResolver(t *testing.T, dir string) file.Resolver {
t.Helper()
src, err := directorysource.NewFromPath(dir)
if err != nil {
t.Fatalf("source: %v", err)
}
t.Cleanup(func() { src.Close() })
r, err := src.FileResolver(source.SquashedScope)
if err != nil {
t.Fatalf("resolver: %v", err)
}
return r
}

func TestCatalogByGlob_DeterministicUnderConcurrency(t *testing.T) {
dir := t.TempDir()
const n = 24
for i := 0; i < n; i++ {
writeFile(t, dir, fmt.Sprintf("m%02d.dep", i), "x")
}
resolver := buildResolver(t, dir)

parse := func(absPath string, loc file.Location) ([]pkg.Package, error) {
base := filepath.Base(loc.RealPath)
idx, _ := strconv.Atoi(base[1:3])
time.Sleep(time.Duration(n-idx) * time.Millisecond)
return []pkg.Package{{
Name: fmt.Sprintf("pkg%02d", idx),
Version: "1.0.0",
Type: pkg.NpmPkg,
Locations: file.NewLocationSet(loc),
}}, nil
}

t.Setenv("OSSPREY_SCAN_CONCURRENCY", "1")
seq, err := catalogByGlob(resolver, dir, "**/*.dep", "test", parse)
if err != nil {
t.Fatal(err)
}
t.Setenv("OSSPREY_SCAN_CONCURRENCY", "8")
conc, err := catalogByGlob(resolver, dir, "**/*.dep", "test", parse)
if err != nil {
t.Fatal(err)
}

if len(seq) != n || len(conc) != n {
t.Fatalf("len seq=%d conc=%d, want %d", len(seq), len(conc), n)
}
for i := range seq {
if seq[i].Name != conc[i].Name || seq[i].Version != conc[i].Version {
t.Fatalf("order/content differs at %d: seq=%s conc=%s", i, seq[i].Name, conc[i].Name)
}
}
}

// Duplicate (name,version) across files collapses to one entry under concurrency.
func TestCatalogByGlob_DedupUnderConcurrency(t *testing.T) {
dir := t.TempDir()
for _, name := range []string{"a.dep", "b.dep", "c.dep"} {
writeFile(t, dir, name, "x")
}
resolver := buildResolver(t, dir)
parse := func(absPath string, loc file.Location) ([]pkg.Package, error) {
return []pkg.Package{{Name: "dup", Version: "1.0.0", Type: pkg.NpmPkg, Locations: file.NewLocationSet(loc)}}, nil
}
t.Setenv("OSSPREY_SCAN_CONCURRENCY", "8")
out, err := catalogByGlob(resolver, dir, "**/*.dep", "test", parse)
if err != nil {
t.Fatal(err)
}
if len(out) != 1 {
t.Fatalf("got %d packages, want 1 (deduped): %v", len(out), out)
}
}
18 changes: 12 additions & 6 deletions internal/catalog/npm_cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,19 @@ func (c *NpmResolveCataloger) Catalog(ctx context.Context, resolver file.Resolve
if err != nil {
return nil, nil, nil // no npm on PATH — silently skip
}
// One shared cache for the whole scan
cache, err := os.MkdirTemp("", "ossprey-npm-cache-")
if err != nil {
return nil, nil, fmt.Errorf("npm cache: %w", err)
}
defer os.RemoveAll(cache)

parse := func(absPath string, loc file.Location) ([]pkg.Package, error) {
dir := filepath.Dir(absPath)
if hasNpmLockfile(dir) {
return nil, nil // syft's lock cataloger already resolves this project
}
return runNpmResolve(ctx, npm, absPath, loc)
return runNpmResolve(ctx, npm, cache, absPath, loc)
}
out, err := catalogByGlob(resolver, c.root, "**/package.json", "npm", parse)
return out, nil, err
Expand All @@ -61,8 +68,9 @@ func hasNpmLockfile(dir string) bool {
// with `npm install --package-lock-only` (resolution only — no node_modules,
// no install scripts), and parses the resolved versions. The temp dir keeps the
// user's working tree clean; --ignore-scripts guarantees we never execute code
// from the (potentially malicious) dependency tree we are about to scan.
func runNpmResolve(ctx context.Context, npm, packageJSON string, loc file.Location) ([]pkg.Package, error) {
// from the (potentially malicious) dependency tree we are about to scan. cache
// is the scan-wide shared npm cache.
func runNpmResolve(ctx context.Context, npm, cache, packageJSON string, loc file.Location) ([]pkg.Package, error) {
tmp, err := os.MkdirTemp("", "ossprey-npm-")
if err != nil {
return nil, err
Expand All @@ -85,9 +93,7 @@ func runNpmResolve(ctx context.Context, npm, packageJSON string, loc file.Locati
"--no-update-notifier",
)
cmd.Dir = tmp
// Keep npm's cache inside the temp dir so we neither touch the user's cache
// nor fail when HOME is read-only (Lambda).
cmd.Env = append(os.Environ(), "npm_config_cache="+filepath.Join(tmp, ".npm-cache"))
cmd.Env = append(os.Environ(), "npm_config_cache="+cache)
if out, err := cmd.CombinedOutput(); err != nil {
return nil, fmt.Errorf("npm install --package-lock-only: %w: %s", err, strings.TrimSpace(string(out)))
}
Expand Down
10 changes: 9 additions & 1 deletion internal/catalog/setuppy_cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package catalog

import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"

Expand Down Expand Up @@ -31,6 +33,12 @@ func (c *SetupPyCataloger) Catalog(ctx context.Context, resolver file.Resolver)
if err != nil {
return nil, nil, nil // no uv on PATH — silently skip
}
cache, err := os.MkdirTemp("", "ossprey-uv-cache-")
if err != nil {
return nil, nil, fmt.Errorf("uv cache: %w", err)
}
defer os.RemoveAll(cache)

parse := func(absPath string, loc file.Location) ([]pkg.Package, error) {
dir := filepath.Dir(absPath)
// Skip if pyproject.toml in same dir AND has [project] table — UVCataloger
Expand All @@ -40,7 +48,7 @@ func (c *SetupPyCataloger) Catalog(ctx context.Context, resolver file.Resolver)
return nil, nil
}
args := []string{"pip", "compile", "--universal", "--no-progress", absPath}
return runUV(ctx, uv, dir, args, loc)
return runUV(ctx, uv, cache, dir, args, loc)
}
out, err := catalogByGlob(resolver, c.root, "**/setup.py", "setup.py", parse)
return out, nil, err
Expand Down
12 changes: 10 additions & 2 deletions internal/catalog/uv_cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,17 @@ func (c *UVCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pk
if err != nil {
return nil, nil, nil // no uv on PATH — silently skip
}

cache, err := os.MkdirTemp("", "ossprey-uv-cache-")
if err != nil {
return nil, nil, fmt.Errorf("uv cache: %w", err)
}
defer os.RemoveAll(cache)

parse := func(absPath string, loc file.Location) ([]pkg.Package, error) {
dir := filepath.Dir(absPath)
args := uvArgsForPyProject(dir)
return runUV(ctx, uv, dir, args, loc)
return runUV(ctx, uv, cache, dir, args, loc)
}
out, err := catalogByGlob(resolver, c.root, "**/pyproject.toml", "uv", parse)
return out, nil, err
Expand Down Expand Up @@ -69,8 +76,9 @@ func uvArgsForPyProject(dir string) []string {
}
}

func runUV(ctx context.Context, uv, dir string, args []string, loc file.Location) ([]pkg.Package, error) {
func runUV(ctx context.Context, uv, cache, dir string, args []string, loc file.Location) ([]pkg.Package, error) {
cmd := exec.CommandContext(ctx, uv, args...)
cmd.Env = append(os.Environ(), "UV_CACHE_DIR="+cache)
stdout, err := cmd.Output()
if err != nil {
var ee *exec.ExitError
Expand Down
Loading