From c744b15032f09d2e2908d09dd10e377b44b2e52a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Scha=CC=88fer?= <101886095+PeterSchafer@users.noreply.github.com> Date: Mon, 23 Mar 2026 12:28:53 +0100 Subject: [PATCH 1/2] feat: Do not filter tracked files even if they match gitignore ... --- pkg/utils/file_filter.go | 58 ++++++++++++++++ pkg/utils/file_filter_test.go | 124 ++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+) diff --git a/pkg/utils/file_filter.go b/pkg/utils/file_filter.go index 96930b650..42fc4748e 100644 --- a/pkg/utils/file_filter.go +++ b/pkg/utils/file_filter.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/go-git/go-git/v5" "github.com/rs/zerolog" gitignore "github.com/sabhiram/go-gitignore" "golang.org/x/sync/semaphore" @@ -112,6 +113,10 @@ func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan // create pattern matcher used to match filesToFilter to glob patterns globPatternMatcher := gitignore.CompileIgnoreLines(globs...) + + // get git-tracked files to avoid filtering them out + gitTrackedFiles := fw.getGitTrackedFiles() + go func() { ctx := context.Background() availableThreads := semaphore.NewWeighted(fw.max_threads) @@ -126,6 +131,16 @@ func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan } go func(f string) { defer availableThreads.Release(1) + // normalize path to absolute for consistent comparison + absPath, err := filepath.Abs(f) + if err != nil { + absPath = filepath.Clean(f) + } + // files tracked in git should not be filtered, even if they match gitignore patterns + if gitTrackedFiles[absPath] { + filteredFilesCh <- f + return + } // filesToFilter that do not match the glob pattern are filtered if !globPatternMatcher.MatchesPath(f) { filteredFilesCh <- f @@ -143,6 +158,49 @@ func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan return filteredFilesCh } +// getGitTrackedFiles returns a map of absolute file paths that are tracked in git (in the index/staging area) +func (fw *FileFilter) getGitTrackedFiles() map[string]bool { + trackedFiles := make(map[string]bool) + + // open the git repository + repo, err := git.PlainOpenWithOptions(fw.path, &git.PlainOpenOptions{ + DetectDotGit: true, + }) + if err != nil { + fw.logger.Debug().Msgf("failed to open git repository: %v", err) + return trackedFiles + } + + // get the worktree to find the root path and access the index + worktree, err := repo.Worktree() + if err != nil { + fw.logger.Debug().Msgf("failed to get worktree: %v", err) + return trackedFiles + } + repoRoot := worktree.Filesystem.Root() + + // get the index (staging area) - this contains all tracked files + // A file is tracked in git once it's added to the index, even before commit + idx, err := repo.Storer.Index() + if err != nil { + fw.logger.Debug().Msgf("failed to get git index: %v", err) + return trackedFiles + } + + // iterate through all entries in the index + for _, entry := range idx.Entries { + absolutePath := filepath.Join(repoRoot, entry.Name) + // ensure the path is absolute and cleaned for consistent comparison + absolutePath, err = filepath.Abs(absolutePath) + if err != nil { + absolutePath = filepath.Clean(filepath.Join(repoRoot, entry.Name)) + } + trackedFiles[absolutePath] = true + } + + return trackedFiles +} + // buildGlobs iterates a list of ignore filesToFilter and returns a list of glob patterns that can be used to test for ignored filesToFilter func (fw *FileFilter) buildGlobs(ignoreFiles []string) ([]string, error) { var globs = make([]string, 0) diff --git a/pkg/utils/file_filter_test.go b/pkg/utils/file_filter_test.go index 3764f8642..57a3899ef 100644 --- a/pkg/utils/file_filter_test.go +++ b/pkg/utils/file_filter_test.go @@ -7,7 +7,10 @@ import ( "runtime" "strings" "testing" + "time" + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/object" "github.com/rs/zerolog/log" "github.com/stretchr/testify/assert" ) @@ -707,3 +710,124 @@ func TestDotSnykExclude_isExpired(t *testing.T) { }) } } + +func TestFileFilter_GitTrackedFilesNotFiltered(t *testing.T) { + t.Run("committed files are not filtered even if they match gitignore", func(t *testing.T) { + tempDir := t.TempDir() + + // Initialize a git repository + repo, err := git.PlainInit(tempDir, false) + assert.NoError(t, err) + + // Create a file that will be tracked and matches gitignore pattern + trackedIgnoredFile := filepath.Join(tempDir, "ignored.log") + createFileInPath(t, trackedIgnoredFile, []byte("tracked but ignored")) + + // Create another file that will NOT be tracked and matches gitignore pattern + untrackedIgnoredFile := filepath.Join(tempDir, "untracked.log") + createFileInPath(t, untrackedIgnoredFile, []byte("untracked and ignored")) + + // Create a regular file that doesn't match gitignore + regularFile := filepath.Join(tempDir, "regular.txt") + createFileInPath(t, regularFile, []byte("regular file")) + + // Add and commit the tracked file BEFORE creating gitignore + worktree, err := repo.Worktree() + assert.NoError(t, err) + + _, err = worktree.Add("ignored.log") + assert.NoError(t, err) + _, err = worktree.Add("regular.txt") + assert.NoError(t, err) + + _, err = worktree.Commit("initial commit", &git.CommitOptions{ + Author: &object.Signature{ + Name: "Test", + Email: "test@test.com", + When: time.Now(), + }, + }) + assert.NoError(t, err) + + // Now create .gitignore that ignores *.log files + gitignorePath := filepath.Join(tempDir, ".gitignore") + createFileInPath(t, gitignorePath, []byte("*.log\n")) + + // Create file filter and get filtered files + fileFilter := NewFileFilter(tempDir, &log.Logger) + rules, err := fileFilter.GetRules([]string{".gitignore"}) + assert.NoError(t, err) + + allFiles := fileFilter.GetAllFiles() + filteredFiles := fileFilter.GetFilteredFiles(allFiles, rules) + + // Collect filtered files + var filteredFilesList []string + for file := range filteredFiles { + filteredFilesList = append(filteredFilesList, file) + } + + // The tracked file (ignored.log) should NOT be filtered out + assert.Contains(t, filteredFilesList, trackedIgnoredFile, "git tracked file should not be filtered even if it matches gitignore") + + // The untracked file (untracked.log) SHOULD be filtered out + assert.NotContains(t, filteredFilesList, untrackedIgnoredFile, "untracked file matching gitignore should be filtered") + + // The regular file should be present + assert.Contains(t, filteredFilesList, regularFile, "regular file should not be filtered") + + // The gitignore file should be present + assert.Contains(t, filteredFilesList, gitignorePath, ".gitignore should not be filtered") + }) + + t.Run("staged but uncommitted files are not filtered even if they match gitignore", func(t *testing.T) { + tempDir := t.TempDir() + + // Initialize a git repository + repo, err := git.PlainInit(tempDir, false) + assert.NoError(t, err) + + // Create .gitignore first + gitignorePath := filepath.Join(tempDir, ".gitignore") + createFileInPath(t, gitignorePath, []byte("*.log\n")) + + // Create a file that matches gitignore pattern but will be staged (git add) + stagedIgnoredFile := filepath.Join(tempDir, "staged.log") + createFileInPath(t, stagedIgnoredFile, []byte("staged but ignored")) + + // Create another file that will NOT be staged and matches gitignore pattern + unstagedIgnoredFile := filepath.Join(tempDir, "unstaged.log") + createFileInPath(t, unstagedIgnoredFile, []byte("unstaged and ignored")) + + // Stage the file (git add) but do NOT commit + worktree, err := repo.Worktree() + assert.NoError(t, err) + + _, err = worktree.Add("staged.log") + assert.NoError(t, err) + // Note: we do NOT commit here - the file is only staged + + // Create file filter and get filtered files + fileFilter := NewFileFilter(tempDir, &log.Logger) + rules, err := fileFilter.GetRules([]string{".gitignore"}) + assert.NoError(t, err) + + allFiles := fileFilter.GetAllFiles() + filteredFiles := fileFilter.GetFilteredFiles(allFiles, rules) + + // Collect filtered files + var filteredFilesList []string + for file := range filteredFiles { + filteredFilesList = append(filteredFilesList, file) + } + + // The staged file (staged.log) should NOT be filtered out - it's in the index + assert.Contains(t, filteredFilesList, stagedIgnoredFile, "staged file should not be filtered even if it matches gitignore") + + // The unstaged file (unstaged.log) SHOULD be filtered out + assert.NotContains(t, filteredFilesList, unstagedIgnoredFile, "unstaged file matching gitignore should be filtered") + + // The gitignore file should be present + assert.Contains(t, filteredFilesList, gitignorePath, ".gitignore should not be filtered") + }) +} From 45b2515185916e4c3c4ddaa815c94c0f305fb224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Scha=CC=88fer?= <101886095+PeterSchafer@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:56:46 +0100 Subject: [PATCH 2/2] chore: iterate filter implementation --- pkg/utils/file_filter.go | 60 +++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/pkg/utils/file_filter.go b/pkg/utils/file_filter.go index 42fc4748e..c1a935811 100644 --- a/pkg/utils/file_filter.go +++ b/pkg/utils/file_filter.go @@ -114,8 +114,8 @@ func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan // create pattern matcher used to match filesToFilter to glob patterns globPatternMatcher := gitignore.CompileIgnoreLines(globs...) - // get git-tracked files to avoid filtering them out - gitTrackedFiles := fw.getGitTrackedFiles() + // get git-tracked files (relative paths) and repo root to avoid filtering them out + gitTrackedFiles, repoRoot := fw.getGitTrackedFiles() go func() { ctx := context.Background() @@ -131,13 +131,8 @@ func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan } go func(f string) { defer availableThreads.Release(1) - // normalize path to absolute for consistent comparison - absPath, err := filepath.Abs(f) - if err != nil { - absPath = filepath.Clean(f) - } // files tracked in git should not be filtered, even if they match gitignore patterns - if gitTrackedFiles[absPath] { + if fw.isGitTracked(f, gitTrackedFiles, repoRoot) { filteredFilesCh <- f return } @@ -158,8 +153,34 @@ func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan return filteredFilesCh } -// getGitTrackedFiles returns a map of absolute file paths that are tracked in git (in the index/staging area) -func (fw *FileFilter) getGitTrackedFiles() map[string]bool { +// isGitTracked checks if a file path is tracked in git by computing its path relative to the repo root +func (fw *FileFilter) isGitTracked(filePath string, trackedFiles map[string]bool, repoRoot string) bool { + if len(trackedFiles) == 0 || repoRoot == "" { + return false + } + + // convert file path to absolute to ensure consistent comparison + absPath, err := filepath.Abs(filePath) + if err != nil { + return false + } + + // compute path relative to repo root + relPath, err := filepath.Rel(repoRoot, absPath) + if err != nil { + return false + } + + // normalize to forward slashes (git index uses forward slashes) + relPath = filepath.ToSlash(relPath) + + // check if this relative path is in the tracked files + return trackedFiles[relPath] +} + +// getGitTrackedFiles returns a map of relative file paths (using forward slashes) that are tracked in git, +// along with the repository root path. Using relative paths reduces memory usage in large repos. +func (fw *FileFilter) getGitTrackedFiles() (map[string]bool, string) { trackedFiles := make(map[string]bool) // open the git repository @@ -168,14 +189,14 @@ func (fw *FileFilter) getGitTrackedFiles() map[string]bool { }) if err != nil { fw.logger.Debug().Msgf("failed to open git repository: %v", err) - return trackedFiles + return trackedFiles, "" } // get the worktree to find the root path and access the index worktree, err := repo.Worktree() if err != nil { fw.logger.Debug().Msgf("failed to get worktree: %v", err) - return trackedFiles + return trackedFiles, "" } repoRoot := worktree.Filesystem.Root() @@ -184,21 +205,16 @@ func (fw *FileFilter) getGitTrackedFiles() map[string]bool { idx, err := repo.Storer.Index() if err != nil { fw.logger.Debug().Msgf("failed to get git index: %v", err) - return trackedFiles + return trackedFiles, "" } - // iterate through all entries in the index + // store relative paths (as they appear in git index) - uses forward slashes + // this reduces memory usage compared to storing full absolute paths for _, entry := range idx.Entries { - absolutePath := filepath.Join(repoRoot, entry.Name) - // ensure the path is absolute and cleaned for consistent comparison - absolutePath, err = filepath.Abs(absolutePath) - if err != nil { - absolutePath = filepath.Clean(filepath.Join(repoRoot, entry.Name)) - } - trackedFiles[absolutePath] = true + trackedFiles[entry.Name] = true } - return trackedFiles + return trackedFiles, repoRoot } // buildGlobs iterates a list of ignore filesToFilter and returns a list of glob patterns that can be used to test for ignored filesToFilter