Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/format/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string)
}

// create a new walker for traversing the paths
walker, err := walk.NewCompositeReader(walkType, cfg.TreeRoot, paths, db, statz)
walker, err := walk.NewReader(walkType, cfg.TreeRoot, paths, db, statz)
if err != nil {
return fmt.Errorf("failed to create walker: %w", err)
}
Expand Down
2 changes: 2 additions & 0 deletions docs/site/getting-started/configure.md
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ Set the verbosity level of logs:
The method used to traverse the files within the tree root.
Currently, we support 'auto', 'git', 'jujutsu' or 'filesystem'

Paths containing newlines are unsupported.

=== "Flag"

```console
Expand Down
25 changes: 20 additions & 5 deletions walk/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
type FilesystemReader struct {
log *log.Logger
root string
path string
paths []string
batchSize int

eg *errgroup.Group
Expand All @@ -35,9 +35,24 @@ func (f *FilesystemReader) process() error {
close(f.filesCh)
}()

// f.path is relative to the root, so we create a fully qualified version
paths := f.paths
if len(paths) == 0 {
paths = []string{""}
}

for _, path := range paths {
if err := f.processPath(path); err != nil {
return err
}
}

return nil
}

func (f *FilesystemReader) processPath(pathFilter string) error {
// pathFilter is relative to the root, so we create a fully qualified version
// we also clean the path up in case there are any ../../ components etc.
path := filepath.Clean(filepath.Join(f.root, f.path))
path := filepath.Clean(filepath.Join(f.root, pathFilter))

// ensure the path is within the root
if !strings.HasPrefix(path, f.root) {
Expand Down Expand Up @@ -130,7 +145,7 @@ func (f *FilesystemReader) Close() error {
// and root.
func NewFilesystemReader(
root string,
path string,
paths []string,
statz *stats.Stats,
batchSize int,
) *FilesystemReader {
Expand All @@ -140,7 +155,7 @@ func NewFilesystemReader(
r := FilesystemReader{
log: log.WithPrefix("walk | filesystem"),
root: root,
path: path,
paths: paths,
batchSize: batchSize,

eg: &eg,
Expand Down
26 changes: 24 additions & 2 deletions walk/filesystem_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func TestFilesystemReaderCancellation(t *testing.T) {
tempDir := test.TempExamples(t)
statz := stats.New()

r := walk.NewFilesystemReader(tempDir, "", &statz, 1024)
r := walk.NewFilesystemReader(tempDir, nil, &statz, 1024)

ctx, cancel := context.WithCancel(t.Context())
cancel()
Expand All @@ -72,7 +72,7 @@ func TestFilesystemReader(t *testing.T) {
tempDir := test.TempExamples(t)
statz := stats.New()

r := walk.NewFilesystemReader(tempDir, "", &statz, 1024)
r := walk.NewFilesystemReader(tempDir, nil, &statz, 1024)

count := 0

Expand Down Expand Up @@ -101,3 +101,25 @@ func TestFilesystemReader(t *testing.T) {
as.Equal(0, statz.Value(stats.Formatted))
as.Equal(0, statz.Value(stats.Changed))
}

func TestFilesystemReaderSubpaths(t *testing.T) {
as := require.New(t)

tempDir := test.TempExamples(t)
statz := stats.New()

r := walk.NewFilesystemReader(tempDir, []string{"go", "haskell/Foo.hs"}, &statz, 1024)

ctx, cancel := context.WithTimeout(t.Context(), 100*time.Millisecond)
defer cancel()

files := make([]*walk.File, 8)
n, err := r.Read(ctx, files)

as.ErrorIs(err, io.EOF)
as.Equal(3, n)
as.Equal("go/go.mod", files[0].RelPath)
as.Equal("go/main.go", files[1].RelPath)
as.Equal("haskell/Foo.hs", files[2].RelPath)
as.Equal(3, statz.Value(stats.Traversed))
}
164 changes: 7 additions & 157 deletions walk/git.go
Original file line number Diff line number Diff line change
@@ -1,143 +1,17 @@
package walk

import (
"bufio"
"context"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"

"charm.land/log/v2"
"github.com/numtide/treefmt/v2/git"
"github.com/numtide/treefmt/v2/stats"
"golang.org/x/sync/errgroup"
)

type GitReader struct {
root string
path string

log *log.Logger
stats *stats.Stats

eg *errgroup.Group
scanner *bufio.Scanner
}

func (g *GitReader) Read(ctx context.Context, files []*File) (n int, err error) {
// ensure we record how many files we traversed
defer func() {
g.stats.Add(stats.Traversed, n)
}()

nextFile := func() (string, error) {
for line := g.scanner.Text(); len(line) > 0; line = g.scanner.Text() {
lineSplit := strings.Split(line, "\t")

var stage, file string
// Untracked files just show as `<filename>`, while tracked files show as `<mode> <object> <stage><TAB><file>`
if len(lineSplit) == 1 {
stage, file = "", lineSplit[0]
} else {
stage, file = lineSplit[0], lineSplit[1]
}

// 160000 is the mode for submodules, skip them because they are separate projects that may have their own
// formatting rules
if strings.HasPrefix(stage, "160000") {
g.scanner.Scan()

continue
}

if file[0] != '"' {
return file, nil
}

unquoted, err := strconv.Unquote(file)
if err != nil {
return "", fmt.Errorf("failed to unquote file %s: %w", file, err)
}

return unquoted, nil
}

return "", io.EOF
}

LOOP:

for n < len(files) {
select {
// exit early if the context was cancelled
case <-ctx.Done():
return n, ctx.Err() //nolint:wrapcheck

default:
// read the next file
if g.scanner.Scan() {
entry, err := nextFile()
if err != nil {
return n, err
}

path := filepath.Join(g.root, g.path, entry)

g.log.Debugf("processing file: %s", path)

info, err := os.Lstat(path)

switch {
case os.IsNotExist(err):
// the underlying file might have been removed
g.log.Warnf(
"Path %s is in the worktree but appears to have been removed from the filesystem", path,
)

continue
case err != nil:
return n, fmt.Errorf("failed to stat %s: %w", path, err)
case info.Mode()&os.ModeSymlink == os.ModeSymlink:
// we skip reporting symlinks stored in Git, they should
// point to local files which we would list anyway.
continue
}

files[n] = &File{
Path: path,
RelPath: filepath.Join(g.path, entry),
Info: info,
}

n++
} else {
// nothing more to read
err = io.EOF

break LOOP
}
}
}

return n, err
}

func (g *GitReader) Close() error {
err := g.eg.Wait()
if err != nil {
return fmt.Errorf("failed to wait for git command to complete: %w", err)
}

return nil
}
type GitReader = PathStreamReader

func NewGitReader(
root string,
path string,
pathFilters []string,
statz *stats.Stats,
) (*GitReader, error) {
// check if the root is a git repository
Expand All @@ -150,34 +24,10 @@ func NewGitReader(
return nil, fmt.Errorf("%s is not a git repository", root)
}

// create an errgroup for executing in the background
eg := &errgroup.Group{}

// create a pipe to capture the command output
r, w := io.Pipe()

// create a command which will execute from the specified sub path within root
cmd := exec.CommandContext(
context.Background(),
"git", "ls-files", "--cached", "--others", "--exclude-standard", "--stage",
)
cmd.Dir = filepath.Join(root, path)
cmd.Stdout = w

// execute the command in the background
eg.Go(func() error {
return w.CloseWithError(cmd.Run())
return NewPathStreamReader(root, statz, PathStreamConfig{
Name: "git",
Command: "git",
Options: []string{"ls-files", "-z", "--cached", "--others", "--exclude-standard", "--full-name", "--"},

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing --stage here should not pose a problem.
The original reason to pass that option was so submodule paths are omitted, but the PathStreamReader filters out directories by itself. This also speeds up the execution time of git ls-files quite a bit.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would also like to remove --others, because those files can't be recovered when they get formatted but the user didn't want that. Imho the "git" walker should not list untracked files.
I didn't do it tho because that would make this PR a behavioral change.

PathFilters: pathFilters,
})

// create a new scanner for reading the output
scanner := bufio.NewScanner(r)

return &GitReader{
eg: eg,
root: root,
path: path,
stats: statz,
scanner: scanner,
log: log.WithPrefix("walk | git"),
}, nil
}
4 changes: 2 additions & 2 deletions walk/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestGitReader(t *testing.T) {

// read empty worktree
statz := stats.New()
reader, err := walk.NewGitReader(tempDir, "", &statz)
reader, err := walk.NewGitReader(tempDir, nil, &statz)
as.NoError(err)

files := make([]*walk.File, 34)
Expand Down Expand Up @@ -84,7 +84,7 @@ func TestGitReader(t *testing.T) {
as.NoError(cmd.Run(), "failed to add everything to the index")

statz = stats.New()
reader, err = walk.NewGitReader(tempDir, "", &statz)
reader, err = walk.NewGitReader(tempDir, nil, &statz)
as.NoError(err)

count := 0
Expand Down
Loading