From 02fdb686aeedfba447d1b2d8e909ed1577dcf748 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 10:56:38 +0200 Subject: [PATCH 01/12] Document unsupported newline paths Document that treefmt does not support paths containing newlines before introducing command-backed path stream walkers. --- docs/site/getting-started/configure.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/site/getting-started/configure.md b/docs/site/getting-started/configure.md index 77f55adb..c7e7045e 100644 --- a/docs/site/getting-started/configure.md +++ b/docs/site/getting-started/configure.md @@ -398,6 +398,8 @@ Set the verbosity level of logs: The method used to traverse the files within the tree root. Currently, we support 'auto', 'git', 'jujutsu' or 'filesystem' +Paths containing newlines are unsupported. + === "Flag" ```console From 4a502a3be16823b0c9a90abb529cef749fe57f19 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 10:57:11 +0200 Subject: [PATCH 02/12] Add command-backed path stream reader Introduce a shared reader for walker commands that emit paths on stdout. Parse newline- and NUL-delimited records, normalize emitted paths inside the tree root, post-filter requested path filters, and convert existing regular files into walk.File values. Own the stdout and stderr pipes instead of using Cmd.StdoutPipe so the command can be waited for while Read drains stdout. Close cancels the command to unblock producers when callers stop before EOF, and treats post-cancel signal exits as expected. --- walk/path_stream.go | 280 +++++++++++++++++++++++++++++++++++++++ walk/path_stream_test.go | 193 +++++++++++++++++++++++++++ walk/walk.go | 8 +- 3 files changed, 480 insertions(+), 1 deletion(-) create mode 100644 walk/path_stream.go create mode 100644 walk/path_stream_test.go diff --git a/walk/path_stream.go b/walk/path_stream.go new file mode 100644 index 00000000..85d033b1 --- /dev/null +++ b/walk/path_stream.go @@ -0,0 +1,280 @@ +package walk + +import ( + "bufio" + "context" + "errors" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + + "charm.land/log/v2" + "github.com/numtide/treefmt/v2/stats" + "golang.org/x/sync/errgroup" + "mvdan.cc/sh/v3/expand" + "mvdan.cc/sh/v3/interp" +) + +type PathStreamConfig struct { + Name string + Command string + Options []string + PathFilters []string +} + +type PathStreamReader struct { + root string + cfg PathStreamConfig + + log *log.Logger + stats *stats.Stats + + cancel context.CancelFunc + eg *errgroup.Group + scanner *bufio.Scanner + + waitMu sync.Mutex + waitErr error + waitCanceled bool +} + +func (p *PathStreamReader) Read(ctx context.Context, files []*File) (n int, err error) { + // ensure we record how many files we traversed + defer func() { + p.stats.Add(stats.Traversed, n) + }() + + for n < len(files) { + select { + // exit early if the context was cancelled + case <-ctx.Done(): + return n, ctx.Err() //nolint:wrapcheck + + default: + if !p.scanner.Scan() { + if err := p.scanner.Err(); err != nil { + return n, fmt.Errorf("failed to read %s output: %w", p.cfg.Name, err) + } + + return n, io.EOF + } + + file, ok, err := p.file(p.scanner.Text()) + if err != nil { + return n, err + } else if !ok { + continue + } + + files[n] = file + n++ + } + } + + return n, nil +} + +func (p *PathStreamReader) file(record string) (*File, bool, error) { + record = strings.TrimSuffix(record, "\r") + if record == "" { + return nil, false, nil + } + + relPath, err := p.relativePath(record) + if err != nil { + return nil, false, err + } + + if !containsAnyPath(p.cfg.PathFilters, relPath) { + return nil, false, nil + } + + path := filepath.Join(p.root, relPath) + + p.log.Debugf("processing file: %s", path) + + info, err := os.Lstat(path) + switch { + case os.IsNotExist(err): + p.log.Warnf( + "Path %s was emitted by %s but no file exists at that path", + path, p.cfg.Name, + ) + + return nil, false, nil + case err != nil: + return nil, false, fmt.Errorf("failed to stat %s: %w", path, err) + case !info.Mode().IsRegular(): + return nil, false, nil + } + + return &File{ + Path: path, + RelPath: relPath, + Info: info, + }, true, nil +} + +func (p *PathStreamReader) relativePath(entry string) (string, error) { + entry = filepath.Clean(entry) + if filepath.IsAbs(entry) { + relPath, err := filepath.Rel(p.root, entry) + if err != nil { + return "", fmt.Errorf("failed to determine a relative path for %s: %w", entry, err) + } + + entry = relPath + } + + if entry == ".." || strings.HasPrefix(entry, ".."+string(os.PathSeparator)) || filepath.IsAbs(entry) { + return "", fmt.Errorf("%s emitted path %s outside the tree root %s", p.cfg.Name, entry, p.root) + } + + return entry, nil +} + +func (p *PathStreamReader) Close() error { + // Unblock the walker command if the caller stopped draining Read() before + // EOF. Without this, the command can block writing stdout while Close waits + // for it to exit. + p.cancel() + + err := p.eg.Wait() + if err != nil { + return fmt.Errorf("failed to wait for %s command to complete: %w", p.cfg.Name, err) + } + + p.waitMu.Lock() + defer p.waitMu.Unlock() + + if p.waitErr != nil && !errors.Is(p.waitErr, context.Canceled) && !p.waitCanceled { + return fmt.Errorf("failed to wait for %s command to complete: %w", p.cfg.Name, p.waitErr) + } + + return nil +} + +func splitPathRecord(data []byte, atEOF bool) (advance int, token []byte, err error) { + for i, b := range data { + if b == '\n' || b == 0 { + return i + 1, data[:i], nil + } + } + + if atEOF && len(data) > 0 { + return len(data), data, nil + } + + return 0, nil, nil +} + +func containsAnyPath(filters []string, path string) bool { + if len(filters) == 0 { + return true + } + + for _, filter := range filters { + if containsPath(filter, path) { + return true + } + } + + return false +} + +func containsPath(root string, path string) bool { + if root == "" || root == "." || path == root { + return true + } + + relPath, err := filepath.Rel(root, path) + if err != nil { + return false + } + + return relPath != ".." && + !strings.HasPrefix(relPath, ".."+string(os.PathSeparator)) && + !filepath.IsAbs(relPath) +} + +func NewPathStreamReader( + root string, + statz *stats.Stats, + cfg PathStreamConfig, +) (*PathStreamReader, error) { + env := expand.ListEnviron(os.Environ()...) + + executable, err := interp.LookPathDir(root, env, cfg.Command) + if err != nil { + return nil, fmt.Errorf("failed to find %s command %q: %w", cfg.Name, cfg.Command, err) + } + + eg := &errgroup.Group{} + + args := append([]string{}, cfg.Options...) + args = append(args, cfg.PathFilters...) + + ctx, cancel := context.WithCancel(context.Background()) + cmd := exec.CommandContext(ctx, executable, args...) + cmd.Dir = root + + // Don't use cmd.StdoutPipe here. Its docs say it is incorrect to call + // Cmd.Wait before all reads from the pipe have completed, and we wait for + // the command in a producer goroutine while Read drains stdout. + // See https://pkg.go.dev/os/exec#Cmd.StdoutPipe. + stdout, stdoutW := io.Pipe() + stderr, stderrW := io.Pipe() + + cmd.Stdout = stdoutW + cmd.Stderr = stderrW + + scanner := bufio.NewScanner(stdout) + scanner.Split(splitPathRecord) + + reader := &PathStreamReader{ + root: root, + cfg: cfg, + log: log.WithPrefix("walk | " + cfg.Name), + stats: statz, + cancel: cancel, + eg: eg, + scanner: scanner, + } + + eg.Go(func() error { + err := cmd.Run() + + reader.waitMu.Lock() + reader.waitErr = err + reader.waitCanceled = ctx.Err() != nil + reader.waitMu.Unlock() + + closeErr := stdoutW.Close() + if stderrCloseErr := stderrW.Close(); stderrCloseErr != nil && closeErr == nil { + closeErr = stderrCloseErr + } + + return closeErr + }) + + eg.Go(func() error { + l := log.WithPrefix("walk | " + cfg.Name + " | stderr") + + scanner := bufio.NewScanner(stderr) + for scanner.Scan() { + l.Debugf("%s", scanner.Text()) + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("failed to read %s stderr: %w", cfg.Name, err) + } + + return nil + }) + + return reader, nil +} diff --git a/walk/path_stream_test.go b/walk/path_stream_test.go new file mode 100644 index 00000000..d4fd4a7a --- /dev/null +++ b/walk/path_stream_test.go @@ -0,0 +1,193 @@ +package walk_test + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "testing" + "time" + + "github.com/numtide/treefmt/v2/stats" + "github.com/numtide/treefmt/v2/test" + "github.com/numtide/treefmt/v2/walk" + "github.com/stretchr/testify/require" +) + +func readPathStream( + t *testing.T, + reader *walk.PathStreamReader, + statz *stats.Stats, + expected int, +) []*walk.File { + t.Helper() + as := require.New(t) + + files := make([]*walk.File, 8) + ctx, cancel := context.WithTimeout(t.Context(), time.Second) + n, err := reader.Read(ctx, files) + + cancel() + + as.Equal(expected, n) + as.ErrorIs(err, io.EOF) + as.Equal(expected, statz.Value(stats.Traversed)) + + return files[:n] +} + +func TestPathStreamReader(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + statz := stats.New() + reader, err := walk.NewPathStreamReader(tempDir, &statz, walk.PathStreamConfig{ + Name: "test walker", + Command: "bash", + Options: []string{ + "-c", + "printf '%s\n' go/main.go missing.txt haskell/Foo.hs", + }, + }) + as.NoError(err) + + files := readPathStream(t, reader, &statz, 2) + as.Equal("go/main.go", files[0].RelPath) + as.Equal("haskell/Foo.hs", files[1].RelPath) + as.NoError(reader.Close()) +} + +func TestPathStreamReaderSubpath(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + statz := stats.New() + reader, err := walk.NewPathStreamReader(tempDir, &statz, walk.PathStreamConfig{ + Name: "test walker", + Command: "bash", + Options: []string{ + "-c", + "printf '%s\n' go/main.go haskell/Foo.hs", + }, + PathFilters: []string{"go"}, + }) + as.NoError(err) + + files := readPathStream(t, reader, &statz, 1) + as.Equal("go/main.go", files[0].RelPath) + as.NoError(reader.Close()) +} + +func TestPathStreamReaderPassesPathFilters(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + statz := stats.New() + reader, err := walk.NewPathStreamReader(tempDir, &statz, walk.PathStreamConfig{ + Name: "test walker", + Command: "bash", + Options: []string{ + "-c", + `for path in "$@"; do + case "$path" in + go) printf '%s\n' go/go.mod go/main.go ;; + *) printf '%s\n' "$path" ;; + esac +done`, + "walker", + }, + PathFilters: []string{"go"}, + }) + as.NoError(err) + + files := readPathStream(t, reader, &statz, 2) + as.Equal("go/go.mod", files[0].RelPath) + as.Equal("go/main.go", files[1].RelPath) + as.NoError(reader.Close()) +} + +func TestPathStreamReaderDelimiters(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + statz := stats.New() + reader, err := walk.NewPathStreamReader(tempDir, &statz, walk.PathStreamConfig{ + Name: "test walker", + Command: "bash", + Options: []string{ + "-c", + `printf 'go/main.go\0haskell/Foo.hs\n'`, + }, + }) + as.NoError(err) + + files := readPathStream(t, reader, &statz, 2) + as.Equal("go/main.go", files[0].RelPath) + as.Equal("haskell/Foo.hs", files[1].RelPath) + as.NoError(reader.Close()) +} + +func TestPathStreamReaderCloseUnblocks(t *testing.T) { + as := require.New(t) + + tempDir := t.TempDir() + + for i := range walk.BatchSize { + path := fmt.Sprintf("f%04d", i) + as.NoError(os.WriteFile(filepath.Join(tempDir, path), nil, 0o600)) + } + + statz := stats.New() + reader, err := walk.NewPathStreamReader(tempDir, &statz, walk.PathStreamConfig{ + Name: "test walker", + Command: "bash", + Options: []string{ + "-c", + `while true; do for path in "$@"; do printf '%s\n' "$path"; done; done`, + "walker", + }, + PathFilters: []string{"."}, + }) + as.NoError(err) + + done := make(chan error, 1) + + go func() { done <- reader.Close() }() + + select { + case err := <-done: + as.NoError(err) + case <-time.After(5 * time.Second): + t.Fatal("Close() did not return; walker command is deadlocked") + } +} + +func TestPathStreamReaderCommandFailure(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + statz := stats.New() + reader, err := walk.NewPathStreamReader(tempDir, &statz, walk.PathStreamConfig{ + Name: "test walker", + Command: "bash", + Options: []string{ + "-c", + "printf '%s\n' go/main.go; exit 7", + }, + }) + as.NoError(err) + + files := readPathStream(t, reader, &statz, 1) + as.Equal("go/main.go", files[0].RelPath) + + err = reader.Close() + as.Error(err) + as.ErrorContains(err, "failed to wait for test walker command to complete") + as.ErrorContains(err, "exit status 7") +} diff --git a/walk/walk.go b/walk/walk.go index fd1500bf..1270493d 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -144,7 +144,13 @@ func (f *File) String() string { return f.Path } -// Reader is an interface for reading files. +// Reader produces batches of files for the formatter pipeline. +// +// Read fills the provided files slice with up to len(files) files and returns +// the number of entries written. Callers must consume only files[:n]. A reader +// may return n > 0 together with a non-nil error; callers should process the +// returned files before handling the error. Readers return io.EOF when no more +// files are available. type Reader interface { Read(ctx context.Context, files []*File) (n int, err error) Close() error From 2f52fe6087600eb11d9c14849e7691b80e260830 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 10:58:10 +0200 Subject: [PATCH 03/12] Split walker reader construction Build the uncached reader in a helper and apply the cache wrapper once in NewReader. Keep auto fallback inside uncached construction so trying built-in walkers does not recurse through cache setup. --- walk/walk.go | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/walk/walk.go b/walk/walk.go index 1270493d..36d25e1e 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -210,6 +210,27 @@ func NewReader( path string, db *bolt.DB, statz *stats.Stats, +) (Reader, error) { + reader, err := newUncachedReader(walkType, root, path, statz) + if err != nil { + return nil, err + } + + if db != nil { + // wrap with cached reader + // db will be null if --no-cache is enabled + reader, err = NewCachedReader(db, BatchSize, reader) + } + + return reader, err +} + +//nolint:ireturn +func newUncachedReader( + walkType Type, + root string, + path string, + statz *stats.Stats, ) (Reader, error) { var ( err error @@ -219,11 +240,11 @@ func NewReader( switch walkType { case Auto: // for now, we keep it simple and try git first, jujutsu second, and filesystem last - reader, err = NewReader(Git, root, path, db, statz) + reader, err = newUncachedReader(Git, root, path, statz) if err != nil { - reader, err = NewReader(Jujutsu, root, path, db, statz) + reader, err = newUncachedReader(Jujutsu, root, path, statz) if err != nil { - reader, err = NewReader(Filesystem, root, path, db, statz) + reader, err = newUncachedReader(Filesystem, root, path, statz) } } @@ -245,12 +266,6 @@ func NewReader( return nil, err } - if db != nil { - // wrap with cached reader - // db will be null if --no-cache is enabled - reader, err = NewCachedReader(db, BatchSize, reader) - } - return reader, err } From 6c8fb0e8344042ae6aab71f772011b91e08f1b2e Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 10:58:41 +0200 Subject: [PATCH 04/12] Let filesystem walker handle multiple paths Store filesystem path filters as a slice and walk them sequentially in the filesystem reader. Use one filesystem reader for multiple requested paths instead of building one reader per path. Add coverage for multi-path filesystem walking. --- walk/filesystem.go | 25 ++++++++++--- walk/filesystem_test.go | 26 ++++++++++++-- walk/walk.go | 77 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 118 insertions(+), 10 deletions(-) diff --git a/walk/filesystem.go b/walk/filesystem.go index 4b9bea86..9a84134a 100644 --- a/walk/filesystem.go +++ b/walk/filesystem.go @@ -19,7 +19,7 @@ import ( type FilesystemReader struct { log *log.Logger root string - path string + paths []string batchSize int eg *errgroup.Group @@ -35,9 +35,24 @@ func (f *FilesystemReader) process() error { close(f.filesCh) }() - // f.path is relative to the root, so we create a fully qualified version + paths := f.paths + if len(paths) == 0 { + paths = []string{""} + } + + for _, path := range paths { + if err := f.processPath(path); err != nil { + return err + } + } + + return nil +} + +func (f *FilesystemReader) processPath(pathFilter string) error { + // pathFilter is relative to the root, so we create a fully qualified version // we also clean the path up in case there are any ../../ components etc. - path := filepath.Clean(filepath.Join(f.root, f.path)) + path := filepath.Clean(filepath.Join(f.root, pathFilter)) // ensure the path is within the root if !strings.HasPrefix(path, f.root) { @@ -130,7 +145,7 @@ func (f *FilesystemReader) Close() error { // and root. func NewFilesystemReader( root string, - path string, + paths []string, statz *stats.Stats, batchSize int, ) *FilesystemReader { @@ -140,7 +155,7 @@ func NewFilesystemReader( r := FilesystemReader{ log: log.WithPrefix("walk | filesystem"), root: root, - path: path, + paths: paths, batchSize: batchSize, eg: &eg, diff --git a/walk/filesystem_test.go b/walk/filesystem_test.go index 45d7d17b..16565b33 100644 --- a/walk/filesystem_test.go +++ b/walk/filesystem_test.go @@ -56,7 +56,7 @@ func TestFilesystemReaderCancellation(t *testing.T) { tempDir := test.TempExamples(t) statz := stats.New() - r := walk.NewFilesystemReader(tempDir, "", &statz, 1024) + r := walk.NewFilesystemReader(tempDir, nil, &statz, 1024) ctx, cancel := context.WithCancel(t.Context()) cancel() @@ -72,7 +72,7 @@ func TestFilesystemReader(t *testing.T) { tempDir := test.TempExamples(t) statz := stats.New() - r := walk.NewFilesystemReader(tempDir, "", &statz, 1024) + r := walk.NewFilesystemReader(tempDir, nil, &statz, 1024) count := 0 @@ -101,3 +101,25 @@ func TestFilesystemReader(t *testing.T) { as.Equal(0, statz.Value(stats.Formatted)) as.Equal(0, statz.Value(stats.Changed)) } + +func TestFilesystemReaderSubpaths(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + statz := stats.New() + + r := walk.NewFilesystemReader(tempDir, []string{"go", "haskell/Foo.hs"}, &statz, 1024) + + ctx, cancel := context.WithTimeout(t.Context(), 100*time.Millisecond) + defer cancel() + + files := make([]*walk.File, 8) + n, err := r.Read(ctx, files) + + as.ErrorIs(err, io.EOF) + as.Equal(3, n) + as.Equal("go/go.mod", files[0].RelPath) + as.Equal("go/main.go", files[1].RelPath) + as.Equal("haskell/Foo.hs", files[2].RelPath) + as.Equal(3, statz.Value(stats.Traversed)) +} diff --git a/walk/walk.go b/walk/walk.go index 36d25e1e..697c87ce 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -216,13 +216,18 @@ func NewReader( return nil, err } + return withCache(db, reader) +} + +//nolint:ireturn +func withCache(db *bolt.DB, reader Reader) (Reader, error) { if db != nil { // wrap with cached reader // db will be null if --no-cache is enabled - reader, err = NewCachedReader(db, BatchSize, reader) + return NewCachedReader(db, BatchSize, reader) } - return reader, err + return reader, nil } //nolint:ireturn @@ -252,7 +257,12 @@ func newUncachedReader( case Stdin: return nil, errors.New("stdin walk type is not supported") case Filesystem: - reader = NewFilesystemReader(root, path, statz, BatchSize) + paths := []string(nil) + if path != "" { + paths = []string{path} + } + + reader = NewFilesystemReader(root, paths, statz, BatchSize) case Git: reader, err = NewGitReader(root, path, statz) case Jujutsu: @@ -295,6 +305,30 @@ func NewCompositeReader( return NewReader(walkType, root, "", db, statz) } + // check we have received 1 path for the stdin walk type + if walkType == Stdin { + if len(paths) != 1 { + return nil, errors.New("stdin walk requires exactly one path") + } + + path := paths[0] + + if strings.HasPrefix(path, "..") { + return nil, fmt.Errorf("path %s not inside the tree root %s", path, root) + } + + return NewStdinReader(root, path, statz), nil + } + + if walkType == Filesystem { + pathFilters, err := resolvePathFilters(root, paths) + if err != nil { + return nil, err + } + + return withCache(db, NewFilesystemReader(root, pathFilters, statz, BatchSize)) + } + readers := make([]Reader, len(paths)) // create a reader for each provided path @@ -359,6 +393,43 @@ func NewCompositeReader( }, nil } +func resolvePathFilters(root string, paths []string) ([]string, error) { + pathFilters := make([]string, 0, len(paths)) + + for _, path := range paths { + resolvedPath, err := resolvePath(path) + if err != nil { + return nil, fmt.Errorf("error resolving path %s: %w", path, err) + } + + relativePath, err := filepath.Rel(root, resolvedPath) + if err != nil { + return nil, fmt.Errorf("error computing relative path from %s to %s: %w", root, resolvedPath, err) + } + + isInsideTreeRoot, err := isDescendant(path, root) + if err != nil { + return nil, fmt.Errorf("error checking if %s is inside the tree root %s", path, root) + } + + if !isInsideTreeRoot { + return nil, fmt.Errorf("path %s not inside the tree root %s (relative path: %s)", path, root, relativePath) + } + + if _, err = os.Lstat(resolvedPath); err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("path %s not found: %w", resolvedPath, err) + } + + return nil, fmt.Errorf("failed to stat %s: %w", resolvedPath, err) + } + + pathFilters = append(pathFilters, relativePath) + } + + return pathFilters, nil +} + // Resolve a path to an absolute path. // Furthermore, if the path exists, any symlinks in its components are resolved. func resolvePath(path string) (string, error) { From f4b79f9ae0c48a9032bb45eb5b82435985a4e568 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 11:01:51 +0200 Subject: [PATCH 05/12] Use path stream reader for git walker Run git ls-files from the tree root and parse its NUL-delimited output with the shared path stream reader. Pass requested paths to git as path filters after -- so subtree formatting can prune the git output inside git. --- walk/git.go | 163 ++--------------------------------------------- walk/git_test.go | 4 +- walk/walk.go | 21 +++++- 3 files changed, 29 insertions(+), 159 deletions(-) diff --git a/walk/git.go b/walk/git.go index 4874e2ca..4f59d419 100644 --- a/walk/git.go +++ b/walk/git.go @@ -1,143 +1,18 @@ package walk import ( - "bufio" - "context" "fmt" - "io" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" "charm.land/log/v2" "github.com/numtide/treefmt/v2/git" "github.com/numtide/treefmt/v2/stats" - "golang.org/x/sync/errgroup" ) -type GitReader struct { - root string - path string - - log *log.Logger - stats *stats.Stats - - eg *errgroup.Group - scanner *bufio.Scanner -} - -func (g *GitReader) Read(ctx context.Context, files []*File) (n int, err error) { - // ensure we record how many files we traversed - defer func() { - g.stats.Add(stats.Traversed, n) - }() - - nextFile := func() (string, error) { - for line := g.scanner.Text(); len(line) > 0; line = g.scanner.Text() { - lineSplit := strings.Split(line, "\t") - - var stage, file string - // Untracked files just show as ``, while tracked files show as ` ` - if len(lineSplit) == 1 { - stage, file = "", lineSplit[0] - } else { - stage, file = lineSplit[0], lineSplit[1] - } - - // 160000 is the mode for submodules, skip them because they are separate projects that may have their own - // formatting rules - if strings.HasPrefix(stage, "160000") { - g.scanner.Scan() - - continue - } - - if file[0] != '"' { - return file, nil - } - - unquoted, err := strconv.Unquote(file) - if err != nil { - return "", fmt.Errorf("failed to unquote file %s: %w", file, err) - } - - return unquoted, nil - } - - return "", io.EOF - } - -LOOP: - - for n < len(files) { - select { - // exit early if the context was cancelled - case <-ctx.Done(): - return n, ctx.Err() //nolint:wrapcheck - - default: - // read the next file - if g.scanner.Scan() { - entry, err := nextFile() - if err != nil { - return n, err - } - - path := filepath.Join(g.root, g.path, entry) - - g.log.Debugf("processing file: %s", path) - - info, err := os.Lstat(path) - - switch { - case os.IsNotExist(err): - // the underlying file might have been removed - g.log.Warnf( - "Path %s is in the worktree but appears to have been removed from the filesystem", path, - ) - - continue - case err != nil: - return n, fmt.Errorf("failed to stat %s: %w", path, err) - case info.Mode()&os.ModeSymlink == os.ModeSymlink: - // we skip reporting symlinks stored in Git, they should - // point to local files which we would list anyway. - continue - } - - files[n] = &File{ - Path: path, - RelPath: filepath.Join(g.path, entry), - Info: info, - } - - n++ - } else { - // nothing more to read - err = io.EOF - - break LOOP - } - } - } - - return n, err -} - -func (g *GitReader) Close() error { - err := g.eg.Wait() - if err != nil { - return fmt.Errorf("failed to wait for git command to complete: %w", err) - } - - return nil -} +type GitReader = PathStreamReader func NewGitReader( root string, - path string, + pathFilters []string, statz *stats.Stats, ) (*GitReader, error) { // check if the root is a git repository @@ -150,34 +25,10 @@ func NewGitReader( return nil, fmt.Errorf("%s is not a git repository", root) } - // create an errgroup for executing in the background - eg := &errgroup.Group{} - - // create a pipe to capture the command output - r, w := io.Pipe() - - // create a command which will execute from the specified sub path within root - cmd := exec.CommandContext( - context.Background(), - "git", "ls-files", "--cached", "--others", "--exclude-standard", "--stage", - ) - cmd.Dir = filepath.Join(root, path) - cmd.Stdout = w - - // execute the command in the background - eg.Go(func() error { - return w.CloseWithError(cmd.Run()) + return NewPathStreamReader(root, statz, PathStreamConfig{ + Name: "git", + Command: "git", + Options: []string{"ls-files", "-z", "--cached", "--others", "--exclude-standard", "--full-name", "--"}, + PathFilters: pathFilters, }) - - // create a new scanner for reading the output - scanner := bufio.NewScanner(r) - - return &GitReader{ - eg: eg, - root: root, - path: path, - stats: statz, - scanner: scanner, - log: log.WithPrefix("walk | git"), - }, nil } diff --git a/walk/git_test.go b/walk/git_test.go index ce4e3d29..00b4bbe0 100644 --- a/walk/git_test.go +++ b/walk/git_test.go @@ -35,7 +35,7 @@ func TestGitReader(t *testing.T) { // read empty worktree statz := stats.New() - reader, err := walk.NewGitReader(tempDir, "", &statz) + reader, err := walk.NewGitReader(tempDir, nil, &statz) as.NoError(err) files := make([]*walk.File, 34) @@ -84,7 +84,7 @@ func TestGitReader(t *testing.T) { as.NoError(cmd.Run(), "failed to add everything to the index") statz = stats.New() - reader, err = walk.NewGitReader(tempDir, "", &statz) + reader, err = walk.NewGitReader(tempDir, nil, &statz) as.NoError(err) count := 0 diff --git a/walk/walk.go b/walk/walk.go index 697c87ce..bbc085f7 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -264,7 +264,12 @@ func newUncachedReader( reader = NewFilesystemReader(root, paths, statz, BatchSize) case Git: - reader, err = NewGitReader(root, path, statz) + paths := []string(nil) + if path != "" { + paths = []string{path} + } + + reader, err = NewGitReader(root, paths, statz) case Jujutsu: reader, err = NewJujutsuReader(root, path, statz) @@ -329,6 +334,20 @@ func NewCompositeReader( return withCache(db, NewFilesystemReader(root, pathFilters, statz, BatchSize)) } + if walkType == Git { + pathFilters, err := resolvePathFilters(root, paths) + if err != nil { + return nil, err + } + + reader, err := NewGitReader(root, pathFilters, statz) + if err != nil { + return nil, err + } + + return withCache(db, reader) + } + readers := make([]Reader, len(paths)) // create a reader for each provided path From 57fe0e56c6c1241d92d3f173e3b853819beb2020 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 11:03:11 +0200 Subject: [PATCH 06/12] Use path stream reader for jujutsu walker Run jj file list from the tree root and parse its output with the shared path stream reader. Pass requested paths to jj after -- so paths that start with dashes are handled as path filters. Remove the generic CompositeReader now that all built-in walkers consume the same validated path-filter list. --- cmd/format/format.go | 2 +- walk/git.go | 1 - walk/jujutsu.go | 135 ++--------------------- walk/jujutsu_test.go | 4 +- walk/walk.go | 248 ++++++++++++------------------------------- 5 files changed, 75 insertions(+), 315 deletions(-) diff --git a/cmd/format/format.go b/cmd/format/format.go index 2b5cd9bd..b6231949 100644 --- a/cmd/format/format.go +++ b/cmd/format/format.go @@ -125,7 +125,7 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) } // create a new walker for traversing the paths - walker, err := walk.NewCompositeReader(walkType, cfg.TreeRoot, paths, db, statz) + walker, err := walk.NewReader(walkType, cfg.TreeRoot, paths, db, statz) if err != nil { return fmt.Errorf("failed to create walker: %w", err) } diff --git a/walk/git.go b/walk/git.go index 4f59d419..d39fcc5f 100644 --- a/walk/git.go +++ b/walk/git.go @@ -3,7 +3,6 @@ package walk import ( "fmt" - "charm.land/log/v2" "github.com/numtide/treefmt/v2/git" "github.com/numtide/treefmt/v2/stats" ) diff --git a/walk/jujutsu.go b/walk/jujutsu.go index 81385864..b2566be1 100644 --- a/walk/jujutsu.go +++ b/walk/jujutsu.go @@ -1,109 +1,17 @@ package walk import ( - "bufio" - "context" "fmt" - "io" - "os" - "os/exec" - "path/filepath" - "charm.land/log/v2" "github.com/numtide/treefmt/v2/jujutsu" "github.com/numtide/treefmt/v2/stats" - "golang.org/x/sync/errgroup" ) -type JujutsuReader struct { - root string - path string - - log *log.Logger - stats *stats.Stats - - eg *errgroup.Group - scanner *bufio.Scanner -} - -func (j *JujutsuReader) Read(ctx context.Context, files []*File) (n int, err error) { - // ensure we record how many files we traversed - defer func() { - j.stats.Add(stats.Traversed, n) - }() - - nextLine := func() string { - line := j.scanner.Text() - - return line - } - -LOOP: - - for n < len(files) { - select { - // exit early if the context was cancelled - case <-ctx.Done(): - return n, ctx.Err() //nolint:wrapcheck - - default: - // read the next file - if j.scanner.Scan() { - entry := nextLine() - - path := filepath.Join(j.root, entry) - - j.log.Debugf("processing file: %s", path) - - info, err := os.Lstat(path) - - switch { - case os.IsNotExist(err): - // the underlying file might have been removed - j.log.Warnf( - "Path %s is in the worktree but appears to have been removed from the filesystem", path, - ) - - continue - case err != nil: - return n, fmt.Errorf("failed to stat %s: %w", path, err) - case info.Mode()&os.ModeSymlink == os.ModeSymlink: - // we skip reporting symlinks stored in Jujutsu, they should - // point to local files which we would list anyway. - continue - } - - files[n] = &File{ - Path: path, - RelPath: entry, - Info: info, - } - - n++ - } else { - // nothing more to read - err = io.EOF - - break LOOP - } - } - } - - return n, err -} - -func (j *JujutsuReader) Close() error { - err := j.eg.Wait() - if err != nil { - return fmt.Errorf("failed to wait for jujutsu command to complete: %w", err) - } - - return nil -} +type JujutsuReader = PathStreamReader func NewJujutsuReader( root string, - path string, + pathFilters []string, statz *stats.Stats, ) (*JujutsuReader, error) { // check if the root is a jujutsu repository @@ -116,42 +24,13 @@ func NewJujutsuReader( return nil, fmt.Errorf("%s is not a jujutsu repository", root) } - // create an errgroup for async list task - eg := &errgroup.Group{} - - // create a pipe to capture the command output - r, w := io.Pipe() - - // create a command which will execute from root // --ignore-working-copy: Don't snapshot the working copy, and don't update it. This prevents that the user has to // enter a password for signing the commit. New files also won't be added to the index and not displayed in the // output. - // Add the subpath as a fileset displaying only files matching this prefix. If - // the subpath is empty ignore it since it interferes with the command - args := []string{"file", "list", "--ignore-working-copy"} - if path != "" { - args = append(args, path) - } - - // create the jj command - cmd := exec.CommandContext(context.Background(), "jj", args...) - cmd.Dir = root - cmd.Stdout = w - - // execute the command in the background - eg.Go(func() error { - return w.CloseWithError(cmd.Run()) + return NewPathStreamReader(root, statz, PathStreamConfig{ + Name: "jujutsu", + Command: "jj", + Options: []string{"file", "list", "--ignore-working-copy", "--"}, + PathFilters: pathFilters, }) - - // create a new scanner for reading the output - scanner := bufio.NewScanner(r) - - return &JujutsuReader{ - eg: eg, - root: root, - path: path, - stats: statz, - scanner: scanner, - log: log.WithPrefix("walk | jujutsu"), - }, nil } diff --git a/walk/jujutsu_test.go b/walk/jujutsu_test.go index c50b6f11..2d829f03 100644 --- a/walk/jujutsu_test.go +++ b/walk/jujutsu_test.go @@ -27,7 +27,7 @@ func TestJujutsuReader(t *testing.T) { // read empty worktree statz := stats.New() - reader, err := walk.NewJujutsuReader(tempDir, "", &statz) + reader, err := walk.NewJujutsuReader(tempDir, nil, &statz) as.NoError(err) files := make([]*walk.File, 33) // The number of files in `test/examples` used for testing @@ -47,7 +47,7 @@ func TestJujutsuReader(t *testing.T) { as.NoError(cmd.Run(), "failed to update the index") statz = stats.New() - reader, err = walk.NewJujutsuReader(tempDir, "", &statz) + reader, err = walk.NewJujutsuReader(tempDir, nil, &statz) as.NoError(err) count := 0 diff --git a/walk/walk.go b/walk/walk.go index bbc085f7..48e7387e 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -5,7 +5,6 @@ import ( "crypto/md5" //nolint:gosec "errors" "fmt" - "io" "io/fs" "os" "path/filepath" @@ -156,62 +155,83 @@ type Reader interface { Close() error } -// CompositeReader combines multiple Readers into one. -// It iterates over the given readers, reading each until completion. -type CompositeReader struct { - idx int - current Reader - readers []Reader -} +//nolint:ireturn +func NewReader( + walkType Type, + root string, + paths []string, + db *bolt.DB, + statz *stats.Stats, +) (Reader, error) { + // Note: `root` may itself be or contain a symlink (e.g. it is in + // `$TMPDIR` on macOS or a user has set a symlink to shorten the repository + // path for path length restrictions), so we resolve it here first. + // + // See: https://github.com/numtide/treefmt/issues/578 + root, err := resolvePath(root) + if err != nil { + return nil, fmt.Errorf("error resolving path %s: %w", root, err) + } + + // if no paths are provided we default to processing the tree root + if len(paths) == 0 { + return newReaderFromPathFilters(walkType, root, nil, db, statz) + } -func (c *CompositeReader) Read(ctx context.Context, files []*File) (n int, err error) { - if c.current == nil { - // check if we have exhausted all the readers - if c.idx >= len(c.readers) { - return 0, io.EOF + // check we have received 1 path for the stdin walk type + if walkType == Stdin { + path, err := stdinPath(root, paths) + if err != nil { + return nil, err } - // if not, select the next reader - c.current = c.readers[c.idx] - c.idx++ + return NewStdinReader(root, path, statz), nil } - // attempt a read - n, err = c.current.Read(ctx, files) - - // check if the current reader has been exhausted - if errors.Is(err, io.EOF) { - // reset the error if it's EOF - err = nil - // set the current reader to nil so we try to read from the next reader on the next call - c.current = nil - } else if err != nil { - err = fmt.Errorf("failed to read from current reader: %w", err) + pathFilters, err := resolvePathFilters(root, paths) + if err != nil { + return nil, err } - // return the number of files read in this call and any error - return n, err + return newReaderFromPathFilters(walkType, root, pathFilters, db, statz) } -func (c *CompositeReader) Close() error { - for _, reader := range c.readers { - if err := reader.Close(); err != nil { - return fmt.Errorf("failed to close reader: %w", err) - } +func stdinPath(root string, paths []string) (string, error) { + if len(paths) != 1 { + return "", errors.New("stdin walk requires exactly one path") } - return nil + resolvedPath, err := resolvePath(paths[0]) + if err != nil { + return "", fmt.Errorf("error resolving path %s: %w", paths[0], err) + } + + path, err := filepath.Rel(root, resolvedPath) + if err != nil { + return "", fmt.Errorf("error computing relative path from %s to %s: %w", root, resolvedPath, err) + } + + isInsideTreeRoot, err := isDescendant(paths[0], root) + if err != nil { + return "", fmt.Errorf("error checking if %s is inside the tree root %s", paths[0], root) + } + + if !isInsideTreeRoot { + return "", fmt.Errorf("path %s not inside the tree root %s", paths[0], root) + } + + return path, nil } //nolint:ireturn -func NewReader( +func newReaderFromPathFilters( walkType Type, root string, - path string, + pathFilters []string, db *bolt.DB, statz *stats.Stats, ) (Reader, error) { - reader, err := newUncachedReader(walkType, root, path, statz) + reader, err := newUncachedReader(walkType, root, pathFilters, statz) if err != nil { return nil, err } @@ -234,7 +254,7 @@ func withCache(db *bolt.DB, reader Reader) (Reader, error) { func newUncachedReader( walkType Type, root string, - path string, + pathFilters []string, statz *stats.Stats, ) (Reader, error) { var ( @@ -245,11 +265,11 @@ func newUncachedReader( switch walkType { case Auto: // for now, we keep it simple and try git first, jujutsu second, and filesystem last - reader, err = newUncachedReader(Git, root, path, statz) + reader, err = newUncachedReader(Git, root, pathFilters, statz) if err != nil { - reader, err = newUncachedReader(Jujutsu, root, path, statz) + reader, err = newUncachedReader(Jujutsu, root, pathFilters, statz) if err != nil { - reader, err = newUncachedReader(Filesystem, root, path, statz) + reader, err = newUncachedReader(Filesystem, root, pathFilters, statz) } } @@ -257,21 +277,11 @@ func newUncachedReader( case Stdin: return nil, errors.New("stdin walk type is not supported") case Filesystem: - paths := []string(nil) - if path != "" { - paths = []string{path} - } - - reader = NewFilesystemReader(root, paths, statz, BatchSize) + reader = NewFilesystemReader(root, pathFilters, statz, BatchSize) case Git: - paths := []string(nil) - if path != "" { - paths = []string{path} - } - - reader, err = NewGitReader(root, paths, statz) + reader, err = NewGitReader(root, pathFilters, statz) case Jujutsu: - reader, err = NewJujutsuReader(root, path, statz) + reader, err = NewJujutsuReader(root, pathFilters, statz) default: return nil, fmt.Errorf("unknown walk type: %v", walkType) @@ -284,134 +294,6 @@ func newUncachedReader( return reader, err } -// NewCompositeReader returns a composite reader for the `root` and all `paths`. It -// never follows symlinks. -// -//nolint:ireturn -func NewCompositeReader( - walkType Type, - root string, - paths []string, - db *bolt.DB, - statz *stats.Stats, -) (Reader, error) { - // Note: `root` may itself be or contain a symlink (e.g. it is in - // `$TMPDIR` on macOS or a user has set a symlink to shorten the repository - // path for path length restrictions), so we resolve it here first. - // - // See: https://github.com/numtide/treefmt/issues/578 - root, err := resolvePath(root) - if err != nil { - return nil, fmt.Errorf("error resolving path %s: %w", root, err) - } - - // if no paths are provided we default to processing the tree root - if len(paths) == 0 { - return NewReader(walkType, root, "", db, statz) - } - - // check we have received 1 path for the stdin walk type - if walkType == Stdin { - if len(paths) != 1 { - return nil, errors.New("stdin walk requires exactly one path") - } - - path := paths[0] - - if strings.HasPrefix(path, "..") { - return nil, fmt.Errorf("path %s not inside the tree root %s", path, root) - } - - return NewStdinReader(root, path, statz), nil - } - - if walkType == Filesystem { - pathFilters, err := resolvePathFilters(root, paths) - if err != nil { - return nil, err - } - - return withCache(db, NewFilesystemReader(root, pathFilters, statz, BatchSize)) - } - - if walkType == Git { - pathFilters, err := resolvePathFilters(root, paths) - if err != nil { - return nil, err - } - - reader, err := NewGitReader(root, pathFilters, statz) - if err != nil { - return nil, err - } - - return withCache(db, reader) - } - - readers := make([]Reader, len(paths)) - - // create a reader for each provided path - for idx, path := range paths { - var ( - err error - info os.FileInfo - ) - - resolvedPath, err := resolvePath(path) - if err != nil { - return nil, fmt.Errorf("error resolving path %s: %w", path, err) - } - - relativePath, err := filepath.Rel(root, resolvedPath) - if err != nil { - return nil, fmt.Errorf("error computing relative path from %s to %s: %w", root, resolvedPath, err) - } - - isInsideTreeRoot, err := isDescendant(path, root) - if err != nil { - return nil, fmt.Errorf("error checking if %s is inside the tree root %s", path, root) - } - - if !isInsideTreeRoot { - return nil, fmt.Errorf("path %s not inside the tree root %s (relative path: %s)", path, root, relativePath) - } - - if walkType == Stdin { - if len(paths) != 1 { - return nil, errors.New("stdin walk requires exactly one path") - } - - return NewStdinReader(root, relativePath, statz), nil - } - - // check the path exists - info, err = os.Lstat(resolvedPath) - if err != nil { - if os.IsNotExist(err) { - return nil, fmt.Errorf("path %s not found: %w", resolvedPath, err) - } - - return nil, fmt.Errorf("failed to stat %s: %w", resolvedPath, err) - } - - if info.IsDir() { - // for directories, we honour the walk type as we traverse them - readers[idx], err = NewReader(walkType, root, relativePath, db, statz) - } else { - // for files, we enforce a simple filesystem read - readers[idx], err = NewReader(Filesystem, root, relativePath, db, statz) - } - - if err != nil { - return nil, fmt.Errorf("failed to create reader for %s: %w", relativePath, err) - } - } - - return &CompositeReader{ - readers: readers, - }, nil -} - func resolvePathFilters(root string, paths []string) ([]string, error) { pathFilters := make([]string, 0, len(paths)) From e697c129c22e2270b1645575c28fa21d95e40039 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 17:39:40 +0200 Subject: [PATCH 07/12] Constrain jujutsu walker to tree root When no explicit path filters are passed, run `jj file list` with `.` as the positional filter. In non-colocated jj workspaces, running from a subdirectory can otherwise emit paths outside the treefmt tree root. Add a regression test for the jj-only workspace shape from #704. --- walk/jujutsu.go | 4 ++++ walk/jujutsu_test.go | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/walk/jujutsu.go b/walk/jujutsu.go index b2566be1..8167700d 100644 --- a/walk/jujutsu.go +++ b/walk/jujutsu.go @@ -24,6 +24,10 @@ func NewJujutsuReader( return nil, fmt.Errorf("%s is not a jujutsu repository", root) } + if len(pathFilters) == 0 { + pathFilters = []string{"."} + } + // --ignore-working-copy: Don't snapshot the working copy, and don't update it. This prevents that the user has to // enter a password for signing the commit. New files also won't be added to the index and not displayed in the // output. diff --git a/walk/jujutsu_test.go b/walk/jujutsu_test.go index 2d829f03..dbf30854 100644 --- a/walk/jujutsu_test.go +++ b/walk/jujutsu_test.go @@ -4,7 +4,9 @@ import ( "context" "errors" "io" + "os" "os/exec" + "path/filepath" "testing" "time" @@ -73,3 +75,44 @@ func TestJujutsuReader(t *testing.T) { as.Equal(0, statz.Value(stats.Formatted)) as.Equal(0, statz.Value(stats.Changed)) } + +func TestJujutsuReaderUsesTreeRoot(t *testing.T) { + as := require.New(t) + + test.SetenvXdgConfigDir(t) + tempDir := t.TempDir() + projectDir := filepath.Join(tempDir, "my-project") + otherDir := filepath.Join(tempDir, "other-project") + + as.NoError(os.MkdirAll(projectDir, 0o755)) + as.NoError(os.MkdirAll(otherDir, 0o755)) + as.NoError(os.WriteFile(filepath.Join(projectDir, "default.nix"), []byte("{}\n"), 0o600)) + as.NoError(os.WriteFile(filepath.Join(otherDir, "shell.nix"), []byte("{}\n"), 0o600)) + + cmd := exec.CommandContext(t.Context(), "jj", "git", "init", "--no-colocate") + cmd.Dir = tempDir + as.NoError(cmd.Run(), "failed to init jujutsu repository") + + cmd = exec.CommandContext(t.Context(), "jj", "file", "track", "my-project/default.nix", "other-project/shell.nix") + cmd.Dir = tempDir + as.NoError(cmd.Run(), "failed to track files") + + statz := stats.New() + reader, err := walk.NewJujutsuReader(projectDir, nil, &statz) + as.NoError(err) + + defer func() { + as.NoError(reader.Close()) + }() + + files := make([]*walk.File, 8) + ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second) + n, err := reader.Read(ctx, files) + + cancel() + + as.Equal(1, n) + as.ErrorIs(err, io.EOF) + as.Equal("default.nix", files[0].RelPath) + as.Equal(1, statz.Value(stats.Traversed)) +} From f85344fc78a944f5d7ff6de435381db4e3a900f9 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 11:06:13 +0200 Subject: [PATCH 08/12] Document custom walker configuration Document selecting a custom walker with the global walk option and defining its command and options under [walker.]. Document that custom walker commands run from the tree root, receive requested path filters as positional arguments, and emit newline- or NUL-delimited path records. --- cmd/init/init.toml | 10 +++++- docs/site/getting-started/configure.md | 47 +++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/cmd/init/init.toml b/cmd/init/init.toml index 375686b3..73182b0f 100644 --- a/cmd/init/init.toml +++ b/cmd/init/init.toml @@ -48,9 +48,17 @@ # verbose = 2 # The method used to traverse the files within the tree root -# Currently, we support 'auto', 'git', 'jujutsu', or 'filesystem' +# Built-in values are 'auto', 'git', 'jujutsu', and 'filesystem' +# You can also set this to the name of a configured custom walker # Env $TREEFMT_WALK # walk = "filesystem" +# walk = "mywalker" + +# Custom walkers are configured with [walker.] +# They receive requested file and directory paths as positional args +# [walker.mywalker] +# command = "command-to-run" +# options = [] [formatter.mylanguage] # Command to execute diff --git a/docs/site/getting-started/configure.md b/docs/site/getting-started/configure.md index c7e7045e..a548dfb8 100644 --- a/docs/site/getting-started/configure.md +++ b/docs/site/getting-started/configure.md @@ -396,7 +396,8 @@ Set the verbosity level of logs: ### `walk` The method used to traverse the files within the tree root. -Currently, we support 'auto', 'git', 'jujutsu' or 'filesystem' +Built-in values are `auto`, `git`, `jujutsu`, and `filesystem`. +You can also set this to the name of a configured [custom walker](#walker-options). Paths containing newlines are unsupported. @@ -418,6 +419,14 @@ Paths containing newlines are unsupported. walk = "filesystem" ``` + ```toml + walk = "mywalker" + + [walker.mywalker] + command = "command-to-run" + options = [] + ``` + ### `working-dir` Run as if `treefmt` was started in the specified working directory instead of the current working directory. @@ -435,6 +444,42 @@ Run as if `treefmt` was started in the specified working directory instead of th TREEFMT_WORKING_DIR=/tmp/foo treefmt ``` +## Walker Options + +Custom walkers are configured using a [table](https://toml.io/en/v1.0.0#table) entry in `treefmt.toml` of the form +`[walker.]`. +To use a custom walker, set the global [`walk`](#walk) option to the same name: + +```toml +walk = "mywalker" + +[walker.mywalker] +command = "command-to-run" +options = [] +``` + +### `command` + +The command to invoke when walking the tree. +`treefmt` runs the command from the tree root. + +When you pass file or directory paths to `treefmt`, `treefmt` passes those paths to the walker command as positional +arguments relative to the tree root. +A custom walker should use those arguments to reduce the paths it emits. +This improves performance when you format a subtree in a large repository. + +`treefmt` still filters the command output to the requested paths. + +The command must write path records to `stdout`. +Records may be separated by newlines or NUL bytes. +Each path must be relative to the tree root or an absolute path inside the tree root. +Paths containing newlines are unsupported. + +### `options` + +An optional list of args to be passed to `command`. +`treefmt` passes these args before any positional path filters. + ## Formatter Options Formatters are configured using a [table](https://toml.io/en/v1.0.0#table) entry in `treefmt.toml` of the form From 0db4f01ec343d3cd80e77ac2f6b3f58604d59d3b Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 11:07:00 +0200 Subject: [PATCH 09/12] Add custom walker configuration Parse [walker.] tables into the configuration model and validate walker names, commands, and walk values. Treat custom walker names as exact configuration keys and keep the default walk value in NewViper. --- config/config.go | 39 ++++++++++++++ config/config_test.go | 115 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 153 insertions(+), 1 deletion(-) diff --git a/config/config.go b/config/config.go index 715d81a5..926ae48a 100644 --- a/config/config.go +++ b/config/config.go @@ -45,6 +45,7 @@ type Config struct { Stdin bool `mapstructure:"stdin" toml:"-"` // not allowed in config FormatterConfigs map[string]*Formatter `mapstructure:"formatter" toml:"formatter,omitempty"` + WalkerConfigs map[string]*Walker `mapstructure:"walker" toml:"walker,omitempty"` Global struct { // Deprecated: Use Excludes @@ -68,6 +69,13 @@ type Formatter struct { NoPositionalArgSupport *bool `mapstructure:"no-positional-arg-support" toml:"no-positional-arg-support"` } +type Walker struct { + // Command is the command to invoke when walking the tree. + Command string `mapstructure:"command" toml:"command"` + // Options are an optional list of args to be passed to Command. + Options []string `mapstructure:"options,omitempty" toml:"options,omitempty"` +} + // SetFlags appends our flags to the provided flag set. // We have a flag matching most entries in Config, taking care to ensure the name matches the field name defined in the // mapstructure tag. @@ -165,6 +173,7 @@ func NewViper() (*viper.Viper, error) { v.SetEnvPrefix("treefmt") v.AutomaticEnv() v.SetEnvKeyReplacer(strings.NewReplacer("-", "_", ".", "_")) + v.SetDefault("walk", walk.Auto.String()) // unset some env variables that we don't want automatically applied if err := os.Unsetenv("TREEFMT_STDIN"); err != nil { @@ -233,6 +242,36 @@ func FromViper(v *viper.Viper) (*Config, error) { } } + for name, walkerCfg := range cfg.WalkerConfigs { + if !nameRegex.MatchString(name) { + return nil, fmt.Errorf( + "walker name %q is invalid, must be of the form %s", + name, nameRegex.String(), + ) + } + + if _, err := walk.TypeString(name); err == nil { + return nil, fmt.Errorf("walker name %q is reserved for a built-in walk type", name) + } + + if walkerCfg.Command == "" { + return nil, fmt.Errorf("walker %v has no command", name) + } + } + + if _, err := walk.TypeString(cfg.Walk); err != nil { + if !nameRegex.MatchString(cfg.Walk) { + return nil, fmt.Errorf( + "walk value %q is invalid, must be a built-in walk type or a walker name of the form %s", + cfg.Walk, nameRegex.String(), + ) + } + + if _, ok := cfg.WalkerConfigs[cfg.Walk]; !ok { + return nil, fmt.Errorf("walker %v not found in config", cfg.Walk) + } + } + // filter formatters based on provided names if len(cfg.Formatters) > 0 { filtered := make(map[string]*Formatter) diff --git a/config/config_test.go b/config/config_test.go index 218b87e7..6e527379 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -591,6 +591,117 @@ func TestWalk(t *testing.T) { checkValue("auto") } +func TestWalkers(t *testing.T) { + t.Run("configured walker", func(t *testing.T) { + as := require.New(t) + + cfg := &config.Config{ + Walk: "mywalker", + WalkerConfigs: map[string]*config.Walker{ + "mywalker": { + Command: "command-to-run", + Options: []string{ + "--foo", + "bar", + }, + }, + }, + } + + v, _ := newViper(t) + + readValue(t, v, cfg, func(cfg *config.Config) { + walker, ok := cfg.WalkerConfigs["mywalker"] + as.True(ok, "walker not found") + as.Equal("mywalker", cfg.Walk) + as.Equal("command-to-run", walker.Command) + as.Equal([]string{"--foo", "bar"}, walker.Options) + }) + }) + + t.Run("missing walker", func(t *testing.T) { + as := require.New(t) + + cfg := &config.Config{ + Walk: "mywalker", + } + + v, _ := newViper(t) + + readError(t, v, cfg, func(err error) { + as.ErrorContains(err, "walker mywalker not found in config") + }) + }) + + t.Run("empty command", func(t *testing.T) { + as := require.New(t) + + cfg := &config.Config{ + Walk: "mywalker", + WalkerConfigs: map[string]*config.Walker{ + "mywalker": {}, + }, + } + + v, _ := newViper(t) + + readError(t, v, cfg, func(err error) { + as.ErrorContains(err, "walker mywalker has no command") + }) + }) + + t.Run("invalid walker name", func(t *testing.T) { + as := require.New(t) + + cfg := &config.Config{ + Walk: "mywalker", + WalkerConfigs: map[string]*config.Walker{ + "my/walker": { + Command: "command-to-run", + }, + }, + } + + v, _ := newViper(t) + + readError(t, v, cfg, func(err error) { + as.ErrorContains(err, "walker name \"my/walker\" is invalid") + }) + }) + + t.Run("reserved walker name", func(t *testing.T) { + as := require.New(t) + + cfg := &config.Config{ + WalkerConfigs: map[string]*config.Walker{ + "git": { + Command: "command-to-run", + }, + }, + } + + v, _ := newViper(t) + + readError(t, v, cfg, func(err error) { + as.ErrorContains(err, "walker name \"git\" is reserved for a built-in walk type") + }) + }) + + t.Run("invalid walk value", func(t *testing.T) { + as := require.New(t) + + cfg := &config.Config{ + Walk: "my.walker", + } + + v, _ := newViper(t) + + readError(t, v, cfg, func(err error) { + as.ErrorContains(err, "walk value \"my.walker\" is invalid") + }) + }) +} + func TestWorkingDirectory(t *testing.T) { as := require.New(t) @@ -662,7 +773,9 @@ func TestStdin(t *testing.T) { func TestSampleConfigFile(t *testing.T) { as := require.New(t) - v := viper.New() + v, err := config.NewViper() + as.NoError(err, "failed to create viper") + v.SetConfigFile("../test/examples/treefmt.toml") as.NoError(v.ReadInConfig(), "failed to read config file") From 5ebf6939f32ba9a37373a7a3470f75441623918f Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 11:07:28 +0200 Subject: [PATCH 10/12] Use explicit walker selectors Represent walker selection as either a built-in walk type or a custom walker configuration instead of passing only the built-in enum through the format command. Keep selector fields private and expose IsBuiltin for callers that need to test for stdin. --- cmd/format/format.go | 26 ++++++++++-- walk/walk.go | 96 ++++++++++++++++++++++++++++++++------------ 2 files changed, 92 insertions(+), 30 deletions(-) diff --git a/cmd/format/format.go b/cmd/format/format.go index b6231949..b437447a 100644 --- a/cmd/format/format.go +++ b/cmd/format/format.go @@ -107,13 +107,13 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) cancel() }() - // parse the walk type - walkType, err := walk.TypeString(cfg.Walk) + // parse the walk selector + walkSelector, err := newWalkSelector(cfg) if err != nil { return fmt.Errorf("invalid walk type: %w", err) } - if walkType == walk.Stdin && len(paths) != 1 { + if walkSelector.IsBuiltin(walk.Stdin) && len(paths) != 1 { // check we have only received one path arg which we use for the file extension / matching to formatters return errors.New("exactly one path should be specified when using the --stdin flag") } @@ -125,7 +125,7 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) } // create a new walker for traversing the paths - walker, err := walk.NewReader(walkType, cfg.TreeRoot, paths, db, statz) + walker, err := walk.NewReader(walkSelector, cfg.TreeRoot, paths, db, statz) if err != nil { return fmt.Errorf("failed to create walker: %w", err) } @@ -205,3 +205,21 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) return nil } + +func newWalkSelector(cfg *config.Config) (walk.Selector, error) { + walkType, err := walk.TypeString(cfg.Walk) + if err == nil { + return walk.BuiltinSelector(walkType), nil + } + + walkerCfg, ok := cfg.WalkerConfigs[cfg.Walk] + if !ok { + return walk.Selector{}, fmt.Errorf("walker %v not found in config", cfg.Walk) + } + + return walk.CustomSelector(walk.CustomConfig{ + Name: cfg.Walk, + Command: walkerCfg.Command, + Options: walkerCfg.Options, + }), nil +} diff --git a/walk/walk.go b/walk/walk.go index 48e7387e..c9a37cfd 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -29,6 +29,43 @@ const ( const BatchSize = 1024 +type CustomConfig struct { + Name string + Command string + Options []string +} + +type selectorKind int + +const ( + builtinSelector selectorKind = iota + customSelector +) + +type Selector struct { + kind selectorKind + builtin Type + custom CustomConfig +} + +func BuiltinSelector(walkType Type) Selector { + return Selector{ + kind: builtinSelector, + builtin: walkType, + } +} + +func CustomSelector(cfg CustomConfig) Selector { + return Selector{ + kind: customSelector, + custom: cfg, + } +} + +func (s Selector) IsBuiltin(walkType Type) bool { + return s.kind == builtinSelector && s.builtin == walkType +} + type ReleaseFunc func(ctx context.Context) error // File represents a file object with its path, relative path, file info, and potential cache entry. @@ -157,7 +194,7 @@ type Reader interface { //nolint:ireturn func NewReader( - walkType Type, + selector Selector, root string, paths []string, db *bolt.DB, @@ -175,11 +212,11 @@ func NewReader( // if no paths are provided we default to processing the tree root if len(paths) == 0 { - return newReaderFromPathFilters(walkType, root, nil, db, statz) + return newReaderFromPathFilters(selector, root, nil, db, statz) } // check we have received 1 path for the stdin walk type - if walkType == Stdin { + if selector.IsBuiltin(Stdin) { path, err := stdinPath(root, paths) if err != nil { return nil, err @@ -193,7 +230,7 @@ func NewReader( return nil, err } - return newReaderFromPathFilters(walkType, root, pathFilters, db, statz) + return newReaderFromPathFilters(selector, root, pathFilters, db, statz) } func stdinPath(root string, paths []string) (string, error) { @@ -225,13 +262,13 @@ func stdinPath(root string, paths []string) (string, error) { //nolint:ireturn func newReaderFromPathFilters( - walkType Type, + selector Selector, root string, pathFilters []string, db *bolt.DB, statz *stats.Stats, ) (Reader, error) { - reader, err := newUncachedReader(walkType, root, pathFilters, statz) + reader, err := newUncachedReader(selector, root, pathFilters, statz) if err != nil { return nil, err } @@ -252,7 +289,7 @@ func withCache(db *bolt.DB, reader Reader) (Reader, error) { //nolint:ireturn func newUncachedReader( - walkType Type, + selector Selector, root string, pathFilters []string, statz *stats.Stats, @@ -262,29 +299,36 @@ func newUncachedReader( reader Reader ) - switch walkType { - case Auto: - // for now, we keep it simple and try git first, jujutsu second, and filesystem last - reader, err = newUncachedReader(Git, root, pathFilters, statz) - if err != nil { - reader, err = newUncachedReader(Jujutsu, root, pathFilters, statz) + switch selector.kind { + case customSelector: + return nil, errors.New("custom walk type is not supported") + case builtinSelector: + switch selector.builtin { + case Auto: + // for now, we keep it simple and try git first, jujutsu second, and filesystem last + reader, err = newUncachedReader(BuiltinSelector(Git), root, pathFilters, statz) if err != nil { - reader, err = newUncachedReader(Filesystem, root, pathFilters, statz) + reader, err = newUncachedReader(BuiltinSelector(Jujutsu), root, pathFilters, statz) + if err != nil { + reader, err = newUncachedReader(BuiltinSelector(Filesystem), root, pathFilters, statz) + } } - } - - return reader, err - case Stdin: - return nil, errors.New("stdin walk type is not supported") - case Filesystem: - reader = NewFilesystemReader(root, pathFilters, statz, BatchSize) - case Git: - reader, err = NewGitReader(root, pathFilters, statz) - case Jujutsu: - reader, err = NewJujutsuReader(root, pathFilters, statz) + return reader, err + case Stdin: + return nil, errors.New("stdin walk type is not supported") + case Filesystem: + reader = NewFilesystemReader(root, pathFilters, statz, BatchSize) + case Git: + reader, err = NewGitReader(root, pathFilters, statz) + case Jujutsu: + reader, err = NewJujutsuReader(root, pathFilters, statz) + + default: + return nil, fmt.Errorf("unknown walk type: %v", selector.builtin) + } default: - return nil, fmt.Errorf("unknown walk type: %v", walkType) + return nil, fmt.Errorf("unknown selector type: %v", selector.kind) } if err != nil { From 07570750aca0b299582938fdc6984260e524a6d5 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 11:09:41 +0200 Subject: [PATCH 11/12] Add custom walker reader Implement custom walkers as a small adapter around the shared path stream reader. Run the configured command from the tree root and pass validated path filters as positional arguments after configured options. --- walk/custom.go | 19 +++++++++++++++++++ walk/walk.go | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 walk/custom.go diff --git a/walk/custom.go b/walk/custom.go new file mode 100644 index 00000000..c20a1fff --- /dev/null +++ b/walk/custom.go @@ -0,0 +1,19 @@ +package walk + +import "github.com/numtide/treefmt/v2/stats" + +type CustomReader = PathStreamReader + +func NewCustomReader( + root string, + pathFilters []string, + statz *stats.Stats, + cfg CustomConfig, +) (*CustomReader, error) { + return NewPathStreamReader(root, statz, PathStreamConfig{ + Name: "custom walker " + cfg.Name, + Command: cfg.Command, + Options: cfg.Options, + PathFilters: pathFilters, + }) +} diff --git a/walk/walk.go b/walk/walk.go index c9a37cfd..a0275fa8 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -301,7 +301,7 @@ func newUncachedReader( switch selector.kind { case customSelector: - return nil, errors.New("custom walk type is not supported") + reader, err = NewCustomReader(root, pathFilters, statz, selector.custom) case builtinSelector: switch selector.builtin { case Auto: From 4e1f4b8c4a9a7e31b493966cc0b0c351207b6629 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Sun, 7 Jun 2026 11:10:58 +0200 Subject: [PATCH 12/12] Test custom walker integration Cover selecting a custom walker from treefmt.toml, passing configured walker options, and forwarding requested path filters to the walker command. Use sandbox-compatible bash commands, exact lowercase walker names, and the long-form --clear-cache option in CLI coverage. --- cmd/root_test.go | 59 ++++++++++++++++++ walk/custom_test.go | 146 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 walk/custom_test.go diff --git a/cmd/root_test.go b/cmd/root_test.go index 148dd3b0..59ca4406 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -1814,6 +1814,65 @@ func TestJujutsu(t *testing.T) { ) } +func TestCustomWalker(t *testing.T) { + tempDir := test.TempExamples(t) + configPath := filepath.Join(tempDir, "/treefmt.toml") + + test.ChangeWorkDir(t, tempDir) + + cfg := &config.Config{ + Walk: "mywalker", + WalkerConfigs: map[string]*config.Walker{ + "mywalker": { + Command: "bash", + Options: []string{ + "-c", + `if [ "$#" -eq 0 ]; then set -- go haskell; fi +for path in "$@"; do + case "$path" in + go) printf '%s\n' go/main.go go/go.mod ;; + haskell) printf '%s\n' haskell/Foo.hs ;; + *) printf '%s\n' "$path" ;; + esac +done`, + "custom-walker", + }, + }, + }, + FormatterConfigs: map[string]*config.Formatter{ + "echo": { + Command: "echo", // will not generate any underlying changes in the file + Includes: []string{"*"}, + }, + }, + } + + test.WriteConfig(t, configPath, cfg) + + treefmt(t, + withConfig(configPath, cfg), + withNoError(t), + withStats(t, map[stats.Type]int{ + stats.Traversed: 3, + stats.Matched: 3, + stats.Formatted: 3, + stats.Changed: 0, + }), + ) + + treefmt(t, + withArgs("--clear-cache", "go"), + withConfig(configPath, cfg), + withNoError(t), + withStats(t, map[stats.Type]int{ + stats.Traversed: 2, + stats.Matched: 2, + stats.Formatted: 2, + stats.Changed: 0, + }), + ) +} + func TestTreeRootCmd(t *testing.T) { as := require.New(t) diff --git a/walk/custom_test.go b/walk/custom_test.go new file mode 100644 index 00000000..c1fbbdc8 --- /dev/null +++ b/walk/custom_test.go @@ -0,0 +1,146 @@ +package walk_test + +import ( + "context" + "io" + "os" + "path/filepath" + "testing" + "time" + + "github.com/numtide/treefmt/v2/stats" + "github.com/numtide/treefmt/v2/test" + "github.com/numtide/treefmt/v2/walk" + "github.com/stretchr/testify/require" +) + +func TestCustomReader(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + as.NoError(os.Symlink(filepath.Join(tempDir, "go", "main.go"), filepath.Join(tempDir, "link-to-main"))) + + statz := stats.New() + reader, err := walk.NewCustomReader(tempDir, nil, &statz, walk.CustomConfig{ + Name: "mywalker", + Command: "bash", + Options: []string{ + "-c", + `for path in "$@"; do printf '%s\n' "$path"; done`, + "walker", + "go/main.go", + "go/go.mod", + "go", + "missing.txt", + "link-to-main", + }, + }) + as.NoError(err) + + files := make([]*walk.File, 8) + ctx, cancel := context.WithTimeout(t.Context(), time.Second) + n, err := reader.Read(ctx, files) + + cancel() + + as.Equal(2, n) + as.ErrorIs(err, io.EOF) + as.Equal("go/main.go", files[0].RelPath) + as.Equal("go/go.mod", files[1].RelPath) + as.Equal(2, statz.Value(stats.Traversed)) + as.NoError(reader.Close()) +} + +func TestCustomReaderSubpath(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + statz := stats.New() + reader, err := walk.NewCustomReader(tempDir, []string{"go"}, &statz, walk.CustomConfig{ + Name: "mywalker", + Command: "bash", + Options: []string{ + "-c", + "printf '%s\n' go/main.go haskell/Foo.hs go/go.mod", + }, + }) + as.NoError(err) + + files := make([]*walk.File, 8) + ctx, cancel := context.WithTimeout(t.Context(), time.Second) + n, err := reader.Read(ctx, files) + + cancel() + + as.Equal(2, n) + as.ErrorIs(err, io.EOF) + as.Equal("go/main.go", files[0].RelPath) + as.Equal("go/go.mod", files[1].RelPath) + as.Equal(2, statz.Value(stats.Traversed)) + as.NoError(reader.Close()) +} + +func TestCustomReaderPassesPathFilters(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + statz := stats.New() + reader, err := walk.NewCustomReader(tempDir, []string{"go"}, &statz, walk.CustomConfig{ + Name: "mywalker", + Command: "bash", + Options: []string{ + "-c", + `for path in "$@"; do printf '%s\n' "$path/main.go" "$path/go.mod"; done`, + "walker", + }, + }) + as.NoError(err) + + files := make([]*walk.File, 8) + ctx, cancel := context.WithTimeout(t.Context(), time.Second) + n, err := reader.Read(ctx, files) + + cancel() + + as.Equal(2, n) + as.ErrorIs(err, io.EOF) + as.Equal("go/main.go", files[0].RelPath) + as.Equal("go/go.mod", files[1].RelPath) + as.Equal(2, statz.Value(stats.Traversed)) + as.NoError(reader.Close()) +} + +func TestCustomReaderCommandFailure(t *testing.T) { + as := require.New(t) + + tempDir := test.TempExamples(t) + + statz := stats.New() + reader, err := walk.NewCustomReader(tempDir, nil, &statz, walk.CustomConfig{ + Name: "mywalker", + Command: "bash", + Options: []string{ + "-c", + "printf '%s\n' go/main.go; exit 7", + }, + }) + as.NoError(err) + + files := make([]*walk.File, 8) + ctx, cancel := context.WithTimeout(t.Context(), time.Second) + n, err := reader.Read(ctx, files) + + cancel() + + as.Equal(1, n) + as.ErrorIs(err, io.EOF) + as.Equal("go/main.go", files[0].RelPath) + + err = reader.Close() + as.Error(err) + as.ErrorContains(err, "failed to wait for custom walker mywalker command to complete") + as.ErrorContains(err, "exit status 7") +}