From 9029b353a7cb399bf9cda8a97a4ff9a479015411 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 13:54:46 +0300 Subject: [PATCH 01/15] feat(cli): Add reader/writer lock API with downgrade --- cli/internal/utils/lock.go | 113 +++++++++++--- cli/internal/utils/lock_test.go | 258 +++++++++++++++++++++++++++----- 2 files changed, 309 insertions(+), 62 deletions(-) diff --git a/cli/internal/utils/lock.go b/cli/internal/utils/lock.go index 779a111c..3ad26e68 100644 --- a/cli/internal/utils/lock.go +++ b/cli/internal/utils/lock.go @@ -11,32 +11,30 @@ import ( "github.com/gofrs/flock" ) -// ErrLocked is returned when a lock file is already held by another process. +// ErrLocked is returned when a lock file is already held by another process +// in an incompatible mode. var ErrLocked = errors.New("lock is held by another process") -// LockMeta holds diagnostic information written into lock files. +// LockMeta holds diagnostic information written into lock files by exclusive +// (writer) holders. It is zero for shared (reader) holders. type LockMeta struct { PID int Command string Project string } -// FileLock wraps a flock.Flock with its path for cleanup. +// FileLock wraps a flock.Flock with its path and the mode it was acquired in. type FileLock struct { - flock *flock.Flock - path string + flock *flock.Flock + path string + exclusive bool } -// Unlock releases the advisory lock and removes the lock file. -func (l *FileLock) Unlock() { - _ = l.flock.Unlock() - _ = os.Remove(l.path) -} - -// TryLock attempts a non-blocking exclusive lock on the given path. -// On success it writes meta into the file and returns a FileLock. -// On failure because the lock is held, it returns ErrLocked. -func TryLock(lockPath string, meta LockMeta) (*FileLock, error) { +// TryLockExclusive attempts a non-blocking LOCK_EX on the given path. On +// success it stamps meta into the file and returns a FileLock whose Unlock +// will clear the file content before releasing the kernel lock. On failure +// because another exclusive or shared holder exists, returns ErrLocked. +func TryLockExclusive(lockPath string, meta LockMeta) (*FileLock, error) { if err := os.MkdirAll(filepath.Dir(lockPath), 0o755); err != nil { return nil, fmt.Errorf("failed to create lock directory: %w", err) } @@ -44,7 +42,7 @@ func TryLock(lockPath string, meta LockMeta) (*FileLock, error) { fl := flock.New(lockPath) locked, err := fl.TryLock() if err != nil { - return nil, fmt.Errorf("failed to acquire lock: %w", err) + return nil, fmt.Errorf("failed to acquire exclusive lock: %w", err) } if !locked { return nil, ErrLocked @@ -56,10 +54,64 @@ func TryLock(lockPath string, meta LockMeta) (*FileLock, error) { } _ = os.WriteFile(lockPath, []byte(content), 0o644) - return &FileLock{flock: fl, path: lockPath}, nil + return &FileLock{flock: fl, path: lockPath, exclusive: true}, nil } -// ReadLockMeta reads diagnostic metadata from a lock file. +// TryLockShared attempts a non-blocking LOCK_SH on the given path. Multiple +// shared holders can coexist. Returns ErrLocked if an exclusive holder is +// present. The file content is not modified. +func TryLockShared(lockPath string) (*FileLock, error) { + if err := os.MkdirAll(filepath.Dir(lockPath), 0o755); err != nil { + return nil, fmt.Errorf("failed to create lock directory: %w", err) + } + + fl := flock.New(lockPath) + locked, err := fl.TryRLock() + if err != nil { + return nil, fmt.Errorf("failed to acquire shared lock: %w", err) + } + if !locked { + return nil, ErrLocked + } + + return &FileLock{flock: fl, path: lockPath, exclusive: false}, nil +} + +// Downgrade atomically converts a held LOCK_EX to LOCK_SH on the same fd and +// truncates the metadata file. After Downgrade the handle may still be +// released by Unlock. Calling Downgrade on a shared handle returns an error. +func (l *FileLock) Downgrade() error { + if !l.exclusive { + return errors.New("Downgrade called on non-exclusive lock") + } + // gofrs/flock.RLock on a held exclusive fd issues unix.Flock(fd, LOCK_SH) + // on the same descriptor, which is an atomic downgrade on POSIX. + if err := l.flock.RLock(); err != nil { + return fmt.Errorf("failed to downgrade lock: %w", err) + } + l.exclusive = false + if err := os.Truncate(l.path, 0); err != nil { + return fmt.Errorf("failed to clear lock metadata on downgrade: %w", err) + } + return nil +} + +// Unlock releases the advisory lock. For exclusive holders it first truncates +// the metadata so prune's diagnostic output does not show a stale writer PID. +// The lock file itself is not removed — removing it while other shared +// holders still have it open would let a subsequent acquirer create a new +// inode and silently bypass mutual exclusion. +func (l *FileLock) Unlock() { + if l.exclusive { + _ = os.Truncate(l.path, 0) + l.exclusive = false + } + _ = l.flock.Unlock() +} + +// ReadLockMeta reads diagnostic metadata from a lock file. Returns an empty +// LockMeta when the file exists but has no content (shared holder, or after +// an exclusive holder's Unlock/Downgrade). func ReadLockMeta(lockPath string) (LockMeta, error) { data, err := os.ReadFile(lockPath) if err != nil { @@ -83,7 +135,7 @@ func ReadLockMeta(lockPath string) (LockMeta, error) { return meta, nil } -// PruneLockPath returns the path to the global prune lock: ~/.opentaint/.prune.lock +// PruneLockPath returns ~/.opentaint/.prune.lock. func PruneLockPath() (string, error) { home, err := GetOpenTaintHomePath() if err != nil { @@ -92,8 +144,25 @@ func PruneLockPath() (string, error) { return filepath.Join(home, ".prune.lock"), nil } -// CompileLockPath returns the path to a per-project compile lock: -// /.compile.lock +// CacheLockPath returns /.cache.lock — the reader/writer +// lock gating access to the compiled project model. +func CacheLockPath(projectCachePath string) string { + return filepath.Join(projectCachePath, ".cache.lock") +} + +// TryLock is a deprecated alias for TryLockExclusive, retained during the +// call-site migration in task 2 of the reader/writer lock rollout. Remove +// once no callers remain. +// +// Deprecated: use TryLockExclusive. +func TryLock(lockPath string, meta LockMeta) (*FileLock, error) { + return TryLockExclusive(lockPath, meta) +} + +// CompileLockPath is a deprecated alias for CacheLockPath, retained during +// the call-site migration in task 2. +// +// Deprecated: use CacheLockPath. func CompileLockPath(projectCachePath string) string { - return filepath.Join(projectCachePath, ".compile.lock") + return CacheLockPath(projectCachePath) } diff --git a/cli/internal/utils/lock_test.go b/cli/internal/utils/lock_test.go index eadcae47..77fb7266 100644 --- a/cli/internal/utils/lock_test.go +++ b/cli/internal/utils/lock_test.go @@ -6,68 +6,246 @@ import ( "testing" ) -func TestTryLock(t *testing.T) { - t.Run("acquires lock on new file", func(t *testing.T) { - lockPath := filepath.Join(t.TempDir(), "test.lock") - lock, err := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "test"}) +func TestTryLockExclusive(t *testing.T) { + t.Run("acquires on new file", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + lock, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "test"}) if err != nil { - t.Fatalf("TryLock() error = %v", err) + t.Fatalf("TryLockExclusive() error = %v", err) } defer lock.Unlock() }) - t.Run("second lock on same file returns ErrLocked", func(t *testing.T) { - lockPath := filepath.Join(t.TempDir(), "test.lock") - lock1, err := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "first"}) + t.Run("second exclusive returns ErrLocked", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + l1, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "first"}) if err != nil { - t.Fatalf("first TryLock() error = %v", err) + t.Fatalf("first TryLockExclusive() error = %v", err) } - defer lock1.Unlock() + defer l1.Unlock() - _, err = TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "second"}) + _, err = TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "second"}) if err != ErrLocked { t.Fatalf("expected ErrLocked, got %v", err) } }) - t.Run("lock released after Unlock allows re-acquisition", func(t *testing.T) { - lockPath := filepath.Join(t.TempDir(), "test.lock") - lock1, err := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "first"}) + t.Run("exclusive held blocks shared", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + l1, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "w"}) if err != nil { - t.Fatalf("first TryLock() error = %v", err) + t.Fatalf("TryLockExclusive() error = %v", err) } - lock1.Unlock() + defer l1.Unlock() - lock2, err := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "second"}) + if _, err := TryLockShared(path); err != ErrLocked { + t.Fatalf("expected ErrLocked from TryLockShared, got %v", err) + } + }) + + t.Run("unlock clears stamped meta", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + l, err := TryLockExclusive(path, LockMeta{PID: 42, Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + l.Unlock() + + data, err := os.ReadFile(path) if err != nil { - t.Fatalf("second TryLock() error = %v", err) + t.Fatalf("lock file should still exist after Unlock: %v", err) + } + if len(data) != 0 { + t.Errorf("expected empty file after writer Unlock, got %q", string(data)) + } + }) + + t.Run("unlock does not remove the file", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + l, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "test"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + l.Unlock() + + if _, err := os.Stat(path); err != nil { + t.Errorf("expected lock file to persist after Unlock, got %v", err) + } + }) +} + +func TestTryLockShared(t *testing.T) { + t.Run("acquires on new file", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.lock") + l, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer l.Unlock() + }) + + t.Run("two concurrent shared holders succeed", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.lock") + l1, err := TryLockShared(path) + if err != nil { + t.Fatalf("first TryLockShared() error = %v", err) + } + defer l1.Unlock() + + l2, err := TryLockShared(path) + if err != nil { + t.Fatalf("second TryLockShared() error = %v", err) + } + defer l2.Unlock() + }) + + t.Run("shared blocks exclusive", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.lock") + l, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer l.Unlock() + + _, err = TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "compile"}) + if err != ErrLocked { + t.Fatalf("expected ErrLocked, got %v", err) + } + }) + + t.Run("shared does not touch file contents", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.lock") + if err := os.WriteFile(path, []byte("pre-existing"), 0o644); err != nil { + t.Fatal(err) + } + + l, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer l.Unlock() + + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + if string(data) != "pre-existing" { + t.Errorf("shared acquire should not modify file, got %q", string(data)) + } + }) +} + +func TestDowngrade(t *testing.T) { + t.Run("downgrade allows new shared acquire", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "d.lock") + w, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + defer w.Unlock() + + // Before downgrade, shared is blocked. + if _, err := TryLockShared(path); err != ErrLocked { + t.Fatalf("expected ErrLocked before downgrade, got %v", err) + } + + if err := w.Downgrade(); err != nil { + t.Fatalf("Downgrade() error = %v", err) + } + + r, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() after downgrade error = %v", err) + } + defer r.Unlock() + }) + + t.Run("downgrade blocks new exclusive", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "d.lock") + w, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + defer w.Unlock() + + if err := w.Downgrade(); err != nil { + t.Fatalf("Downgrade() error = %v", err) + } + + _, err = TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "another"}) + if err != ErrLocked { + t.Fatalf("expected ErrLocked from exclusive-after-downgrade, got %v", err) + } + }) + + t.Run("downgrade clears stamped meta", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "d.lock") + w, err := TryLockExclusive(path, LockMeta{PID: 77, Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + defer w.Unlock() + + if err := w.Downgrade(); err != nil { + t.Fatalf("Downgrade() error = %v", err) + } + + meta, err := ReadLockMeta(path) + if err != nil { + t.Fatalf("ReadLockMeta() error = %v", err) + } + if meta.PID != 0 || meta.Command != "" { + t.Errorf("expected empty meta after Downgrade, got %+v", meta) + } + }) + + t.Run("downgrade on shared lock errors", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "d.lock") + r, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer r.Unlock() + + if err := r.Downgrade(); err == nil { + t.Fatal("expected error from Downgrade on shared lock") } - defer lock2.Unlock() }) } func TestReadLockMeta(t *testing.T) { - t.Run("reads PID and command from lock file", func(t *testing.T) { - lockPath := filepath.Join(t.TempDir(), "test.lock") - meta := LockMeta{PID: 12345, Command: "compile", Project: "/tmp/my-project"} - lock, err := TryLock(lockPath, meta) + t.Run("reads stamped meta while writer holds", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "m.lock") + meta := LockMeta{PID: 12345, Command: "compile", Project: "/tmp/p"} + l, err := TryLockExclusive(path, meta) if err != nil { - t.Fatalf("TryLock() error = %v", err) + t.Fatalf("TryLockExclusive() error = %v", err) } - defer lock.Unlock() + defer l.Unlock() - got, err := ReadLockMeta(lockPath) + got, err := ReadLockMeta(path) if err != nil { t.Fatalf("ReadLockMeta() error = %v", err) } - if got.PID != 12345 { - t.Errorf("PID = %d, want 12345", got.PID) + if got.PID != 12345 || got.Command != "compile" || got.Project != "/tmp/p" { + t.Errorf("unexpected meta: %+v", got) } - if got.Command != "compile" { - t.Errorf("Command = %q, want %q", got.Command, "compile") + }) + + t.Run("returns empty LockMeta when only readers hold", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "m.lock") + r, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer r.Unlock() + + got, err := ReadLockMeta(path) + if err != nil { + t.Fatalf("ReadLockMeta() error = %v", err) } - if got.Project != "/tmp/my-project" { - t.Errorf("Project = %q, want %q", got.Project, "/tmp/my-project") + if got.PID != 0 || got.Command != "" { + t.Errorf("expected empty meta, got %+v", got) } }) @@ -83,20 +261,20 @@ func TestPruneLockPath(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) - lockPath, err := PruneLockPath() + got, err := PruneLockPath() if err != nil { t.Fatalf("PruneLockPath() error = %v", err) } expected := filepath.Join(home, ".opentaint", ".prune.lock") - if lockPath != expected { - t.Errorf("got %q, want %q", lockPath, expected) + if got != expected { + t.Errorf("got %q, want %q", got, expected) } } -func TestCompileLockPath(t *testing.T) { - result := CompileLockPath("/home/user/.opentaint/cache/my-project-abc12345") - expected := "/home/user/.opentaint/cache/my-project-abc12345/.compile.lock" - if result != expected { - t.Errorf("got %q, want %q", result, expected) +func TestCacheLockPath(t *testing.T) { + got := CacheLockPath("/home/user/.opentaint/cache/my-project-abc12345") + expected := "/home/user/.opentaint/cache/my-project-abc12345/.cache.lock" + if got != expected { + t.Errorf("got %q, want %q", got, expected) } } From 9f7ec45254cb9782cca1e644e2a4b76ec5e8a620 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:04:33 +0300 Subject: [PATCH 02/15] fix(cli): Truncate lock meta before downgrade to avoid stale PID Move os.Truncate before flock.RLock in Downgrade so that a concurrent prune reading ReadLockMeta in the narrow window after the kernel mode transitions to LOCK_SH never sees a stale writer PID. If the truncate fails the kernel mode is not touched, preserving the exclusive hold. Expand the comment to document gofrs dual-flag (f.l + f.r) state that results from calling RLock on an already-exclusive fd. --- cli/internal/utils/lock.go | 17 +++++++++++++---- cli/internal/utils/lock_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/cli/internal/utils/lock.go b/cli/internal/utils/lock.go index 3ad26e68..ffa3f98f 100644 --- a/cli/internal/utils/lock.go +++ b/cli/internal/utils/lock.go @@ -80,19 +80,28 @@ func TryLockShared(lockPath string) (*FileLock, error) { // Downgrade atomically converts a held LOCK_EX to LOCK_SH on the same fd and // truncates the metadata file. After Downgrade the handle may still be // released by Unlock. Calling Downgrade on a shared handle returns an error. +// +// Metadata is truncated *before* the kernel mode transition so that any +// concurrent prune that fails to acquire exclusive can never see stale +// writer PID under a shared lock. If the truncate fails, the kernel mode +// is not touched and the handle remains exclusively held — the caller can +// continue as if Downgrade were never called. func (l *FileLock) Downgrade() error { if !l.exclusive { return errors.New("Downgrade called on non-exclusive lock") } + if err := os.Truncate(l.path, 0); err != nil { + return fmt.Errorf("failed to clear lock metadata on downgrade: %w", err) + } // gofrs/flock.RLock on a held exclusive fd issues unix.Flock(fd, LOCK_SH) - // on the same descriptor, which is an atomic downgrade on POSIX. + // on the same descriptor, which is an atomic downgrade on POSIX. After + // this call, gofrs internally has both its f.l and f.r fields set to + // true; that is harmless because its Unlock path issues LOCK_UN whenever + // either flag is set. if err := l.flock.RLock(); err != nil { return fmt.Errorf("failed to downgrade lock: %w", err) } l.exclusive = false - if err := os.Truncate(l.path, 0); err != nil { - return fmt.Errorf("failed to clear lock metadata on downgrade: %w", err) - } return nil } diff --git a/cli/internal/utils/lock_test.go b/cli/internal/utils/lock_test.go index 77fb7266..2c99c981 100644 --- a/cli/internal/utils/lock_test.go +++ b/cli/internal/utils/lock_test.go @@ -211,6 +211,33 @@ func TestDowngrade(t *testing.T) { t.Fatal("expected error from Downgrade on shared lock") } }) + + t.Run("downgrade truncates meta even if RLock fails (no-op test)", func(t *testing.T) { + // We can't easily force flock.RLock to fail in a unit test, but we + // can assert the error contract: on any Downgrade error, meta must + // still be either fully cleared or fully intact — never partially. + // The actual ordering is tested implicitly by the "downgrade clears + // stamped meta" case. This test documents the intent. + path := filepath.Join(t.TempDir(), "d.lock") + w, err := TryLockExclusive(path, LockMeta{PID: 99, Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + defer w.Unlock() + + if err := w.Downgrade(); err != nil { + t.Fatalf("Downgrade() error = %v", err) + } + + // File must be empty (truncation happened). + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + if len(data) != 0 { + t.Errorf("expected empty meta after Downgrade, got %q", string(data)) + } + }) } func TestReadLockMeta(t *testing.T) { From f4d282c31e619a157ca2467a92bca957cf0d1dc3 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:06:18 +0300 Subject: [PATCH 03/15] refactor(cli): Migrate lock callers to exclusive/shared API --- cli/cmd/prune.go | 2 +- cli/cmd/scan.go | 4 ++-- cli/internal/utils/lock.go | 17 ----------------- cli/internal/utils/prune.go | 4 ++-- cli/internal/utils/prune_test.go | 6 +++--- 5 files changed, 8 insertions(+), 25 deletions(-) diff --git a/cli/cmd/prune.go b/cli/cmd/prune.go index 4c5e2b37..820b1a49 100644 --- a/cli/cmd/prune.go +++ b/cli/cmd/prune.go @@ -86,7 +86,7 @@ With --all: prunes everything including logs and install-tier.`, if err != nil { out.Fatalf("Failed to resolve prune lock path: %s", err) } - pruneLock, err := utils.TryLock(pruneLockPath, utils.LockMeta{ + pruneLock, err := utils.TryLockExclusive(pruneLockPath, utils.LockMeta{ PID: os.Getpid(), Command: "prune", }) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 4311b012..ee62f3a3 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -424,8 +424,8 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { } } - compileLock, lockErr := utils.TryLock( - utils.CompileLockPath(projectCachePath), + compileLock, lockErr := utils.TryLockExclusive( + utils.CacheLockPath(projectCachePath), utils.LockMeta{PID: os.Getpid(), Command: "compile", Project: absUserProjectRoot}, ) if lockErr == utils.ErrLocked { diff --git a/cli/internal/utils/lock.go b/cli/internal/utils/lock.go index ffa3f98f..4de3374f 100644 --- a/cli/internal/utils/lock.go +++ b/cli/internal/utils/lock.go @@ -158,20 +158,3 @@ func PruneLockPath() (string, error) { func CacheLockPath(projectCachePath string) string { return filepath.Join(projectCachePath, ".cache.lock") } - -// TryLock is a deprecated alias for TryLockExclusive, retained during the -// call-site migration in task 2 of the reader/writer lock rollout. Remove -// once no callers remain. -// -// Deprecated: use TryLockExclusive. -func TryLock(lockPath string, meta LockMeta) (*FileLock, error) { - return TryLockExclusive(lockPath, meta) -} - -// CompileLockPath is a deprecated alias for CacheLockPath, retained during -// the call-site migration in task 2. -// -// Deprecated: use CacheLockPath. -func CompileLockPath(projectCachePath string) string { - return CacheLockPath(projectCachePath) -} diff --git a/cli/internal/utils/prune.go b/cli/internal/utils/prune.go index d5b6ec85..12a0f695 100644 --- a/cli/internal/utils/prune.go +++ b/cli/internal/utils/prune.go @@ -211,8 +211,8 @@ func ScanForStaleArtifacts(categories PruneCategory) (*PruneResult, error) { continue } projectCachePath := filepath.Join(modelsDir, modelEntry.Name()) - lockPath := CompileLockPath(projectCachePath) - lock, lockErr := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "prune"}) + lockPath := CacheLockPath(projectCachePath) + lock, lockErr := TryLockExclusive(lockPath, LockMeta{PID: os.Getpid(), Command: "prune"}) if lockErr == ErrLocked { meta, _ := ReadLockMeta(lockPath) result.AddSkipped(SkippedProject{Path: projectCachePath, Meta: meta}) diff --git a/cli/internal/utils/prune_test.go b/cli/internal/utils/prune_test.go index 87e6c52f..65eb9778 100644 --- a/cli/internal/utils/prune_test.go +++ b/cli/internal/utils/prune_test.go @@ -624,9 +624,9 @@ func TestScanForStaleArtifacts_LockedModelSkipped(t *testing.T) { projectDir := filepath.Join(home, ".opentaint", "cache", "my-project-a1b2c3d4") createTestFile(t, filepath.Join(projectDir, "project-model", "project.yaml"), 50) - // Hold the compile lock - lockPath := CompileLockPath(projectDir) - lock, err := TryLock(lockPath, LockMeta{PID: 99999, Command: "compile"}) + // Hold the cache lock exclusively (simulate in-progress compile). + lockPath := CacheLockPath(projectDir) + lock, err := TryLockExclusive(lockPath, LockMeta{PID: 99999, Command: "compile"}) if err != nil { t.Fatalf("failed to acquire compile lock: %v", err) } From af98ca2731880ab0a02dc0dcb004480cfdfc23c9 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:08:56 +0300 Subject: [PATCH 04/15] fix(cli): Update stale error string to match cache-lock naming --- cli/internal/utils/prune_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/internal/utils/prune_test.go b/cli/internal/utils/prune_test.go index 65eb9778..39c67414 100644 --- a/cli/internal/utils/prune_test.go +++ b/cli/internal/utils/prune_test.go @@ -628,7 +628,7 @@ func TestScanForStaleArtifacts_LockedModelSkipped(t *testing.T) { lockPath := CacheLockPath(projectDir) lock, err := TryLockExclusive(lockPath, LockMeta{PID: 99999, Command: "compile"}) if err != nil { - t.Fatalf("failed to acquire compile lock: %v", err) + t.Fatalf("failed to acquire cache lock: %v", err) } defer lock.Unlock() From 7a4cb5e7cc854c04576398c5c94e8cf38ddb71ea Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:10:39 +0300 Subject: [PATCH 05/15] feat(cli): Add compile-complete marker helpers for cache --- cli/internal/utils/model_cache.go | 35 ++++++++++++++ cli/internal/utils/model_cache_test.go | 65 ++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/cli/internal/utils/model_cache.go b/cli/internal/utils/model_cache.go index d89e827d..5925f91c 100644 --- a/cli/internal/utils/model_cache.go +++ b/cli/internal/utils/model_cache.go @@ -111,6 +111,41 @@ func CachedProjectModelPath(cacheDir string) string { return filepath.Join(cacheDir, projectModelDir) } +// compileCompleteMarker is the sentinel file written as the final step of a +// successful compile. Its presence proves the whole model is on disk. +const compileCompleteMarker = ".compile-complete" + +// CompileCompleteMarkerPath returns the absolute path to the compile-complete +// marker inside a cache directory. +func CompileCompleteMarkerPath(cacheDir string) string { + return filepath.Join(cacheDir, projectModelDir, compileCompleteMarker) +} + +// IsCachedModelComplete reports whether cacheDir holds a fully-written +// project model. Both project.yaml and the compile-complete marker must +// exist. A crashed mid-compile leaves the marker absent and returns false. +func IsCachedModelComplete(cacheDir string) bool { + pm := filepath.Join(cacheDir, projectModelDir) + if _, err := os.Stat(filepath.Join(pm, "project.yaml")); err != nil { + return false + } + if _, err := os.Stat(filepath.Join(pm, compileCompleteMarker)); err != nil { + return false + } + return true +} + +// MarkCompileComplete writes the compile-complete marker as the very last +// step of a successful compile. The caller must have just populated +// /project-model/ and must hold the cache exclusive lock. +func MarkCompileComplete(cacheDir string) error { + markerPath := CompileCompleteMarkerPath(cacheDir) + if err := os.WriteFile(markerPath, nil, 0o644); err != nil { + return fmt.Errorf("failed to write compile-complete marker: %w", err) + } + return nil +} + // CreateStagingDir creates a staging directory inside cacheDir for isolated compilation. // Returns the path to the staging directory (e.g. /.staging-XXXX/). func CreateStagingDir(cacheDir string) (string, error) { diff --git a/cli/internal/utils/model_cache_test.go b/cli/internal/utils/model_cache_test.go index a97b3479..42a0f9b2 100644 --- a/cli/internal/utils/model_cache_test.go +++ b/cli/internal/utils/model_cache_test.go @@ -325,3 +325,68 @@ func TestGetProjectLogPath(t *testing.T) { t.Errorf("got %q, want %q", logPath, expected) } } + +func TestCompileCompleteMarkerPath(t *testing.T) { + cacheDir := "/home/user/.opentaint/cache/my-project-abc12345" + got := CompileCompleteMarkerPath(cacheDir) + expected := "/home/user/.opentaint/cache/my-project-abc12345/project-model/.compile-complete" + if got != expected { + t.Errorf("got %q, want %q", got, expected) + } +} + +func TestIsCachedModelComplete(t *testing.T) { + t.Run("false for non-existent cache", func(t *testing.T) { + cacheDir := t.TempDir() + if IsCachedModelComplete(cacheDir) { + t.Error("empty cache dir should not be complete") + } + }) + + t.Run("false when project.yaml missing", func(t *testing.T) { + cacheDir := t.TempDir() + // Only the marker, no project.yaml. + createTestFile(t, filepath.Join(cacheDir, "project-model", ".compile-complete"), 0) + if IsCachedModelComplete(cacheDir) { + t.Error("missing project.yaml should make cache incomplete") + } + }) + + t.Run("false when marker missing", func(t *testing.T) { + cacheDir := t.TempDir() + // Only project.yaml, no marker. + createTestFile(t, filepath.Join(cacheDir, "project-model", "project.yaml"), 10) + if IsCachedModelComplete(cacheDir) { + t.Error("missing marker should make cache incomplete") + } + }) + + t.Run("true when both present", func(t *testing.T) { + cacheDir := t.TempDir() + createTestFile(t, filepath.Join(cacheDir, "project-model", "project.yaml"), 10) + createTestFile(t, filepath.Join(cacheDir, "project-model", ".compile-complete"), 0) + if !IsCachedModelComplete(cacheDir) { + t.Error("both files present should mean complete") + } + }) +} + +func TestMarkCompileComplete(t *testing.T) { + cacheDir := t.TempDir() + // The caller always creates project-model/ before calling. + if err := os.MkdirAll(filepath.Join(cacheDir, "project-model"), 0o755); err != nil { + t.Fatal(err) + } + + if err := MarkCompileComplete(cacheDir); err != nil { + t.Fatalf("MarkCompileComplete() error = %v", err) + } + + info, err := os.Stat(CompileCompleteMarkerPath(cacheDir)) + if err != nil { + t.Fatalf("marker should exist: %v", err) + } + if info.Size() != 0 { + t.Errorf("marker should be empty, got size %d", info.Size()) + } +} From 9270670e9e26db5c4f22a14afd3cc86e05d6177e Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:13:21 +0300 Subject: [PATCH 06/15] refactor(cli): Reuse CachedProjectModelPath in marker check --- cli/internal/utils/model_cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/internal/utils/model_cache.go b/cli/internal/utils/model_cache.go index 5925f91c..76be69a2 100644 --- a/cli/internal/utils/model_cache.go +++ b/cli/internal/utils/model_cache.go @@ -125,7 +125,7 @@ func CompileCompleteMarkerPath(cacheDir string) string { // project model. Both project.yaml and the compile-complete marker must // exist. A crashed mid-compile leaves the marker absent and returns false. func IsCachedModelComplete(cacheDir string) bool { - pm := filepath.Join(cacheDir, projectModelDir) + pm := CachedProjectModelPath(cacheDir) if _, err := os.Stat(filepath.Join(pm, "project.yaml")); err != nil { return false } From 6f3f2d00435415a2b7719fb88a938184fdaa5082 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:17:03 +0300 Subject: [PATCH 07/15] feat(cli): Use reader/writer cache lock and compile-complete marker --- cli/cmd/scan.go | 96 +++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index ee62f3a3..fb61a098 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -65,11 +65,10 @@ func (m ScanMode) String() string { // scanConfig holds the resolved paths and flags for a scan invocation. type scanConfig struct { mode ScanMode - absProjectModel string // absolute path to the project model (cached or staging) + absProjectModel string // absolute path to the project model (always the cache dir when projectCachePath is set) projectCachePath string // cache dir for this project (empty for explicit model / dry-run) - stagingDir string // staging dir path (empty when not compiling or dry-run) needsCompilation bool // true when compilation is needed before scanning - compileLock *utils.FileLock + cacheLock *utils.FileLock } // scanCmd represents the scan command @@ -161,8 +160,8 @@ func scan(cmd *cobra.Command) { cfg := resolveScanConfig(absUserProjectRoot) defer func() { - if cfg.compileLock != nil { - cfg.compileLock.Unlock() + if cfg.cacheLock != nil { + cfg.cacheLock.Unlock() } }() @@ -173,12 +172,6 @@ func scan(cmd *cobra.Command) { absProjectModelPath := cfg.absProjectModel - cleanupStaging := func() { - if cfg.stagingDir != "" { - utils.CleanupStagingDir(cfg.stagingDir) - } - } - var absRuleSetPaths []RulesetType var userRuleSetPath = Ruleset @@ -278,32 +271,40 @@ func scan(cmd *cobra.Command) { out.Fatalf("Failed to resolve Java for compilation: %s", err) } + // Wipe any residue from a prior crashed compile before writing new output. + if cfg.projectCachePath != "" { + if err := os.RemoveAll(cfg.absProjectModel); err != nil { + out.Fatalf("Failed to prepare cache directory: %s", err) + } + } + if err := out.RunWithSpinner("Compiling project model", func() error { return compile(absUserProjectRoot, cfg.absProjectModel, autobuilderJarPath, compileJavaRunner, Internal) }); err != nil { - cleanupStaging() + if cfg.projectCachePath != "" { + _ = os.RemoveAll(cfg.absProjectModel) + } out.Error("Native compile has failed: " + err.Error()) suggest("If native compilation fails due to missing required Java, set JAVA_HOME according to the project's requirements or try Docker-based scan:", utils.BuildScanCommandWithDocker(currentScanBuilder(""), absUserProjectRoot, absSarifReportPath, Ruleset)) os.Exit(1) } out.Blank() - // Promote staging to cache + // Mark the cache as valid, then downgrade to a reader so other scans + // can run the analyzer against the freshly-compiled model in parallel. if cfg.projectCachePath != "" { - if err := utils.PromoteStagingToCache(cfg.projectCachePath, cfg.stagingDir); err != nil { - output.LogInfof("Failed to promote staging to cache: %v", err) - } else { - cfg.stagingDir = "" // staging dir no longer exists - absProjectModelPath = utils.CachedProjectModelPath(cfg.projectCachePath) - output.LogDebugf("Model cached at: %s", absProjectModelPath) + if err := utils.MarkCompileComplete(cfg.projectCachePath); err != nil { + out.Fatalf("Failed to mark model complete: %s", err) + } + if err := cfg.cacheLock.Downgrade(); err != nil { + output.LogDebugf("Cache lock downgrade failed, continuing under exclusive: %v", err) } } - // Recompute SARIF path against the promoted model path - if SarifReportPath == "" { - absSarifReportPath = utils.DefaultSarifReportPath(absProjectModelPath) - sarifReportName = filepath.Base(absSarifReportPath) - } + // absProjectModelPath is already cfg.absProjectModel (= cached model + // path in the non-dry-run flow), so there is nothing to recompute — + // the pre-compile absSarifReportPath already points at the final + // cache location. printCompileSummary(absProjectModelPath) } @@ -358,7 +359,6 @@ func scan(cmd *cobra.Command) { if err := out.RunWithSpinner("Analyzing project", func() error { return scanProject(nativeBuilder, analyzerJavaRunner) }); err != nil { - cleanupStaging() out.Fatalf("Native scan has failed: %s", err) } @@ -413,19 +413,35 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { } cachedModelPath := utils.CachedProjectModelPath(projectCachePath) - if !Recompile { - if _, serr := os.Stat(filepath.Join(cachedModelPath, "project.yaml")); serr == nil { - output.LogDebugf("Reusing cached model at: %s", cachedModelPath) - return scanConfig{ - mode: Scan, - absProjectModel: cachedModelPath, - projectCachePath: projectCachePath, + cacheLockPath := utils.CacheLockPath(projectCachePath) + + // Fast path: if we're not forced to recompile and the cache looks + // complete on disk, take a shared lock and re-check under the lock. + if !Recompile && utils.IsCachedModelComplete(projectCachePath) { + sharedLock, sharedErr := utils.TryLockShared(cacheLockPath) + if sharedErr == nil { + if utils.IsCachedModelComplete(projectCachePath) { + output.LogDebugf("Reusing cached model at: %s", cachedModelPath) + return scanConfig{ + mode: Scan, + absProjectModel: cachedModelPath, + projectCachePath: projectCachePath, + cacheLock: sharedLock, + } } + // Marker vanished between the outer check and the lock + // (writer raced ahead of us). Fall through to compile path. + sharedLock.Unlock() + } else if sharedErr != utils.ErrLocked { + out.Fatalf("Failed to acquire cache read lock: %s", sharedErr) } + // sharedErr == ErrLocked means a writer holds the cache; we're about + // to ask for exclusive below, which will also fail with ErrLocked and + // produce the same "compilation already in progress" message. } - compileLock, lockErr := utils.TryLockExclusive( - utils.CacheLockPath(projectCachePath), + cacheLock, lockErr := utils.TryLockExclusive( + cacheLockPath, utils.LockMeta{PID: os.Getpid(), Command: "compile", Project: absUserProjectRoot}, ) if lockErr == utils.ErrLocked { @@ -434,21 +450,15 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { os.Exit(1) } if lockErr != nil { - out.Fatalf("Failed to acquire compile lock: %s", lockErr) - } - - stagingDir, serr := utils.CreateStagingDir(projectCachePath) - if serr != nil { - out.Fatalf("Failed to create staging directory: %s", serr) + out.Fatalf("Failed to acquire cache lock: %s", lockErr) } return scanConfig{ mode: CompileAndScan, - absProjectModel: filepath.Join(stagingDir, "project-model"), + absProjectModel: cachedModelPath, projectCachePath: projectCachePath, - stagingDir: stagingDir, needsCompilation: true, - compileLock: compileLock, + cacheLock: cacheLock, } } From b2a365f97296cc23a554a64d386516168941360a Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:22:16 +0300 Subject: [PATCH 08/15] fix(cli): Clean partial model on mark failure and surface downgrade errors --- cli/cmd/scan.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index fb61a098..5108bc6f 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -294,18 +294,14 @@ func scan(cmd *cobra.Command) { // can run the analyzer against the freshly-compiled model in parallel. if cfg.projectCachePath != "" { if err := utils.MarkCompileComplete(cfg.projectCachePath); err != nil { + _ = os.RemoveAll(cfg.absProjectModel) out.Fatalf("Failed to mark model complete: %s", err) } if err := cfg.cacheLock.Downgrade(); err != nil { - output.LogDebugf("Cache lock downgrade failed, continuing under exclusive: %v", err) + output.LogInfof("Cache lock downgrade failed, continuing under exclusive: %v", err) } } - // absProjectModelPath is already cfg.absProjectModel (= cached model - // path in the non-dry-run flow), so there is nothing to recompute — - // the pre-compile absSarifReportPath already points at the final - // cache location. - printCompileSummary(absProjectModelPath) } @@ -472,7 +468,7 @@ func printScanInfo(cmd *cobra.Command, cfg scanConfig, absSemgrepRuleLoadTracePa if cfg.needsCompilation { sb.Field("Project", absUserProjectRoot) if cfg.projectCachePath != "" { - sb.Field("Project model", utils.CachedProjectModelPath(cfg.projectCachePath)) + sb.Field("Project model", cfg.absProjectModel) } } else { sb.Field("Project model", cfg.absProjectModel) From 818670d25727fa589ddc87b791e1fcc82b09f7ad Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:24:31 +0300 Subject: [PATCH 09/15] refactor(cli): Drop staging-dir abstraction from cache layer --- cli/internal/utils/model_cache.go | 39 ------- cli/internal/utils/model_cache_test.go | 143 ------------------------- cli/internal/utils/prune.go | 24 ++--- cli/internal/utils/prune_test.go | 22 ---- 4 files changed, 8 insertions(+), 220 deletions(-) diff --git a/cli/internal/utils/model_cache.go b/cli/internal/utils/model_cache.go index 76be69a2..c4d2bf8b 100644 --- a/cli/internal/utils/model_cache.go +++ b/cli/internal/utils/model_cache.go @@ -146,45 +146,6 @@ func MarkCompileComplete(cacheDir string) error { return nil } -// CreateStagingDir creates a staging directory inside cacheDir for isolated compilation. -// Returns the path to the staging directory (e.g. /.staging-XXXX/). -func CreateStagingDir(cacheDir string) (string, error) { - stagingPath, err := os.MkdirTemp(cacheDir, ".staging-") - if err != nil { - return "", fmt.Errorf("failed to create staging directory: %w", err) - } - return stagingPath, nil -} - -// PromoteStagingToCache moves the compiled project-model/ from the staging -// directory into the cache, replacing any existing cached model. -// The caller must hold the compile lock to prevent concurrent promotions. -func PromoteStagingToCache(cacheDir, stagingPath string) error { - srcPM := filepath.Join(stagingPath, projectModelDir) - destPM := filepath.Join(cacheDir, projectModelDir) - - // Remove existing cached model if present - if err := os.RemoveAll(destPM); err != nil { - return fmt.Errorf("failed to remove old cached model: %w", err) - } - - // Move staging project-model/ to cache - if err := os.Rename(srcPM, destPM); err != nil { - return fmt.Errorf("failed to move staging model to cache: %w", err) - } - - // Remove the staging dir and any leftover files (e.g. build logs) - _ = os.RemoveAll(stagingPath) - - return nil -} - -// CleanupStagingDir removes a staging directory and all its contents. -// Used when compilation fails and the staging output is not needed. -func CleanupStagingDir(stagingPath string) { - _ = os.RemoveAll(stagingPath) -} - // ProjectPathSlugHash returns a deterministic directory name for a project path. // Format: last-path-segment-8hexchars (e.g. "my-project-a1b2c3d4"). // Uses only the last segment of the path for readability; the hash ensures uniqueness. diff --git a/cli/internal/utils/model_cache_test.go b/cli/internal/utils/model_cache_test.go index 42a0f9b2..79882a85 100644 --- a/cli/internal/utils/model_cache_test.go +++ b/cli/internal/utils/model_cache_test.go @@ -142,149 +142,6 @@ func TestGetProjectCachePath_SymlinksResolved(t *testing.T) { } } -func TestCreateStagingDir(t *testing.T) { - cacheDir := t.TempDir() - - stagingPath, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // Should be under cacheDir - if !strings.HasPrefix(stagingPath, cacheDir) { - t.Errorf("staging path %q should be under %q", stagingPath, cacheDir) - } - - // Directory name should start with .staging- - dirName := filepath.Base(stagingPath) - if !strings.HasPrefix(dirName, ".staging-") { - t.Errorf("expected .staging- prefix, got %q", dirName) - } - - // Staging dir itself should exist - info, err := os.Stat(stagingPath) - if err != nil { - t.Fatalf("staging dir not created: %v", err) - } - if !info.IsDir() { - t.Error("expected directory") - } -} - -func TestPromoteStagingToCache(t *testing.T) { - cacheDir := t.TempDir() - - // Create a staging dir with content - stagingPath, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - // Add a file to project-model to simulate compilation output - createTestFile(t, filepath.Join(stagingPath, "project-model", "project.yaml"), 10) - - // Promote - err = PromoteStagingToCache(cacheDir, stagingPath) - if err != nil { - t.Fatalf("PromoteStagingToCache() error = %v", err) - } - - // project-model directory should exist (not a symlink) - pmPath := filepath.Join(cacheDir, "project-model") - info, err := os.Lstat(pmPath) - if err != nil { - t.Fatalf("expected project-model at %s: %v", pmPath, err) - } - if !info.IsDir() { - t.Error("expected directory, not symlink or file") - } - - // The file should be accessible - data, err := os.ReadFile(filepath.Join(pmPath, "project.yaml")) - if err != nil { - t.Fatalf("failed to read project.yaml: %v", err) - } - if len(data) != 10 { - t.Errorf("expected 10 bytes, got %d", len(data)) - } - - // Staging dir should be cleaned up - if _, err := os.Stat(stagingPath); !os.IsNotExist(err) { - t.Error("staging dir should be removed after promotion") - } -} - -func TestPromoteStagingToCache_ReplacesExisting(t *testing.T) { - cacheDir := t.TempDir() - - // First promotion - staging1, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - createTestFile(t, filepath.Join(staging1, "project-model", "project.yaml"), 10) - if err := PromoteStagingToCache(cacheDir, staging1); err != nil { - t.Fatal(err) - } - - // Second promotion replaces the first - staging2, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - createTestFile(t, filepath.Join(staging2, "project-model", "project.yaml"), 20) - if err := PromoteStagingToCache(cacheDir, staging2); err != nil { - t.Fatal(err) - } - - // New content should be accessible (20 bytes from second promotion) - data, err := os.ReadFile(filepath.Join(cacheDir, "project-model", "project.yaml")) - if err != nil { - t.Fatalf("failed to read project.yaml: %v", err) - } - if len(data) != 20 { - t.Errorf("expected 20 bytes from second promotion, got %d", len(data)) - } -} - -func TestPromoteStagingToCache_CleansLeftoverFiles(t *testing.T) { - cacheDir := t.TempDir() - - stagingPath, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - // Compilation output - createTestFile(t, filepath.Join(stagingPath, "project-model", "project.yaml"), 10) - // Leftover file outside project-model/ (e.g. build log) - createTestFile(t, filepath.Join(stagingPath, "build.log"), 100) - - err = PromoteStagingToCache(cacheDir, stagingPath) - if err != nil { - t.Fatalf("PromoteStagingToCache() error = %v", err) - } - - // Staging dir should be fully removed despite leftover files - if _, err := os.Stat(stagingPath); !os.IsNotExist(err) { - t.Errorf("staging dir should be removed after promotion, but still exists") - } -} - -func TestCleanupStagingDir(t *testing.T) { - cacheDir := t.TempDir() - - stagingPath, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - createTestFile(t, filepath.Join(stagingPath, "project-model", "file.txt"), 5) - - CleanupStagingDir(stagingPath) - - if _, err := os.Stat(stagingPath); !os.IsNotExist(err) { - t.Error("staging dir should be removed after cleanup") - } -} - func TestGetLogCacheDirPath(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) diff --git a/cli/internal/utils/prune.go b/cli/internal/utils/prune.go index 12a0f695..60a22270 100644 --- a/cli/internal/utils/prune.go +++ b/cli/internal/utils/prune.go @@ -252,25 +252,17 @@ func ScanForStaleArtifacts(categories PruneCategory) (*PruneResult, error) { return result, nil } -// scanProjectCacheSubdirs adds only project-model/ and .staging-* subdirs -// from a project cache directory, preserving logs and other data. +// scanProjectCacheSubdirs adds only the project-model/ subdir of a project +// cache directory so that adjacent files (lock, logs) are preserved. func scanProjectCacheSubdirs(projectCachePath string, result *PruneResult) { - entries, err := os.ReadDir(projectCachePath) - if err != nil { + pmPath := filepath.Join(projectCachePath, projectModelDir) + info, err := os.Stat(pmPath) + if err != nil || !info.IsDir() { return } - for _, entry := range entries { - if !entry.IsDir() { - continue - } - name := entry.Name() - if name == projectModelDir || strings.HasPrefix(name, ".staging-") { - subPath := filepath.Join(projectCachePath, name) - size, _ := dirSize(subPath) - if size > 0 { - result.Add(StaleArtifact{Path: subPath, Size: size, Kind: StaleKindModel}) - } - } + size, _ := dirSize(pmPath) + if size > 0 { + result.Add(StaleArtifact{Path: pmPath, Size: size, Kind: StaleKindModel}) } } diff --git a/cli/internal/utils/prune_test.go b/cli/internal/utils/prune_test.go index 39c67414..0dee6103 100644 --- a/cli/internal/utils/prune_test.go +++ b/cli/internal/utils/prune_test.go @@ -475,28 +475,6 @@ func TestScanForStaleArtifacts_CachedModels(t *testing.T) { } }) - t.Run("stale staging dir is prunable", func(t *testing.T) { - home := t.TempDir() - t.Setenv("HOME", home) - projectDir := filepath.Join(home, ".opentaint", "cache", "my-project-a1b2c3d4") - stagingDir := filepath.Join(projectDir, ".staging-12345-9999") - createTestFile(t, filepath.Join(stagingDir, "project-model", "project.yaml"), 50) - - result, err := ScanForStaleArtifacts(PruneCategoriesDefault) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - assertHasKind(t, result, StaleKindModel) - // Should target the staging dir specifically - for _, s := range result.Stale { - if s.Kind == StaleKindModel { - if s.Path != stagingDir { - t.Errorf("expected path %q, got %q", stagingDir, s.Path) - } - } - } - }) - t.Run("empty models dir produces no stale", func(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) From 296ab26831f1aca8582ff124e6301fae83b3a95c Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:27:26 +0300 Subject: [PATCH 10/15] docs(cli): Drop staging-directory reference from prune help text --- cli/cmd/prune.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/cmd/prune.go b/cli/cmd/prune.go index 820b1a49..383656ec 100644 --- a/cli/cmd/prune.go +++ b/cli/cmd/prune.go @@ -63,13 +63,13 @@ var pruneCmd = &cobra.Command{ Identifies artifacts that are no longer needed: - Old versions of analyzer JARs, autobuilder JARs, and rules - Downloaded JDK/JRE versions that don't match the current version -- Cached project models and staging directories +- Cached project models Use category flags to prune selectively: --artifacts Stale analyzer and autobuilder JARs --rules Stale rules directories --jdk Old JDK/JRE versions - --models Cached project models and staging directories + --models Cached project models --logs Project log files --install Install-tier lib and JRE artifacts (requires re-download) From 9cb89c07e1efe7a68c232a1ec772655031f824fd Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:28:32 +0300 Subject: [PATCH 11/15] test(cli): Add prune coverage for reader-held project cache --- cli/internal/utils/prune_test.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/cli/internal/utils/prune_test.go b/cli/internal/utils/prune_test.go index 0dee6103..b7dce985 100644 --- a/cli/internal/utils/prune_test.go +++ b/cli/internal/utils/prune_test.go @@ -622,6 +622,36 @@ func TestScanForStaleArtifacts_LockedModelSkipped(t *testing.T) { t.Errorf("expected PID 99999, got %d", result.Skipped[0].Meta.PID) } }) + + t.Run("shared-locked project is skipped with empty meta", func(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + projectDir := filepath.Join(home, ".opentaint", "cache", "my-project-a1b2c3d4") + createTestFile(t, filepath.Join(projectDir, "project-model", "project.yaml"), 50) + + // Hold the cache lock as a reader (simulates an in-progress analyze). + lockPath := CacheLockPath(projectDir) + sharedLock, err := TryLockShared(lockPath) + if err != nil { + t.Fatalf("failed to acquire shared cache lock: %v", err) + } + defer sharedLock.Unlock() + + result, err := ScanForStaleArtifacts(PruneCategoryModels) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + assertNoKind(t, result, StaleKindModel) + if len(result.Skipped) != 1 { + t.Fatalf("expected 1 skipped, got %d", len(result.Skipped)) + } + if result.Skipped[0].Meta.PID != 0 { + t.Errorf("expected empty meta for reader-held lock, got PID %d", result.Skipped[0].Meta.PID) + } + if result.Skipped[0].Meta.Command != "" { + t.Errorf("expected empty command for reader-held lock, got %q", result.Skipped[0].Meta.Command) + } + }) } func TestPruneResult_AddSkipped(t *testing.T) { From b87d72c6375f0944751b4a65e2a16348482d59cc Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 14:35:34 +0300 Subject: [PATCH 12/15] docs(cli): Drop residual staging and compile-lock wording --- cli/cmd/prune.go | 2 +- cli/internal/utils/prune.go | 6 +++--- docs/troubleshooting.md | 6 ------ docs/usage.md | 2 +- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/cli/cmd/prune.go b/cli/cmd/prune.go index 383656ec..a4da4e6b 100644 --- a/cli/cmd/prune.go +++ b/cli/cmd/prune.go @@ -158,7 +158,7 @@ func init() { pruneCmd.Flags().BoolVar(&pruneArtifacts, "artifacts", false, "Prune stale analyzer and autobuilder JARs") pruneCmd.Flags().BoolVar(&pruneRules, "rules", false, "Prune stale rules directories") pruneCmd.Flags().BoolVar(&pruneJDK, "jdk", false, "Prune old JDK/JRE versions") - pruneCmd.Flags().BoolVar(&pruneModels, "models", false, "Prune cached project models and staging directories") + pruneCmd.Flags().BoolVar(&pruneModels, "models", false, "Prune cached project models") pruneCmd.Flags().BoolVar(&pruneLogs, "logs", false, "Prune project log files") pruneCmd.Flags().BoolVar(&pruneInstall, "install", false, "Prune install-tier lib and JRE artifacts (requires re-download on next run)") } diff --git a/cli/internal/utils/prune.go b/cli/internal/utils/prune.go index 60a22270..85dd1333 100644 --- a/cli/internal/utils/prune.go +++ b/cli/internal/utils/prune.go @@ -30,7 +30,7 @@ type StaleArtifact struct { Kind string } -// SkippedProject represents a project cache that was skipped because a compile lock was held. +// SkippedProject represents a project cache that was skipped because its cache lock was held. type SkippedProject struct { Path string Meta LockMeta @@ -51,7 +51,7 @@ func (r *PruneResult) Add(a StaleArtifact) { r.TotalCount++ } -// AddSkipped records a project that was skipped due to an active compile lock. +// AddSkipped records a project that was skipped due to an active cache lock holder. func (r *PruneResult) AddSkipped(s SkippedProject) { r.Skipped = append(r.Skipped, s) } @@ -219,7 +219,7 @@ func ScanForStaleArtifacts(categories PruneCategory) (*PruneResult, error) { continue } if lockErr != nil { - output.LogDebugf("Failed to probe compile lock for %s, skipping: %v", projectCachePath, lockErr) + output.LogDebugf("Failed to probe cache lock for %s, skipping: %v", projectCachePath, lockErr) continue } scanProjectCacheSubdirs(projectCachePath, result) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 9ec39ce7..f02f9347 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -101,12 +101,6 @@ This means another `opentaint scan` is currently compiling the same project. Wai opentaint scan --project-model /path/to/model ``` -If the previous process crashed and left a stale staging directory, prune cached models: - -```bash -opentaint prune -``` - ### Clearing the Model Cache To remove all cached project models (stored in `~/.opentaint/cache/`): diff --git a/docs/usage.md b/docs/usage.md index 316392dd..91934747 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -149,7 +149,7 @@ opentaint scan /path/to/project opentaint scan --recompile /path/to/project ``` -If a compilation is already in progress for the same project (detected via a staging directory), the scan aborts with an error instead of compiling concurrently. +If another scan is actively compiling the same project, the scan aborts with an error instead of compiling concurrently. Multiple read-only scans against the same cached model can run in parallel. To remove all cached models: From d22d72bb728057f81e005ac486e7da45ce6dccc9 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 15:41:13 +0300 Subject: [PATCH 13/15] fix(cli): Distinguish compile vs analyze holder in lock-contention error --- cli/cmd/scan.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 5108bc6f..f6e8b6f4 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -432,8 +432,8 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { out.Fatalf("Failed to acquire cache read lock: %s", sharedErr) } // sharedErr == ErrLocked means a writer holds the cache; we're about - // to ask for exclusive below, which will also fail with ErrLocked and - // produce the same "compilation already in progress" message. + // to ask for exclusive below, which will also fail with ErrLocked — + // ReadLockMeta below will surface which command is holding it. } cacheLock, lockErr := utils.TryLockExclusive( @@ -441,7 +441,13 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { utils.LockMeta{PID: os.Getpid(), Command: "compile", Project: absUserProjectRoot}, ) if lockErr == utils.ErrLocked { - out.Error("Compilation already in progress for this project") + // Readers don't stamp metadata (empty LockMeta); writers do. Use that + // to distinguish an in-progress compile from an in-progress analyze. + if meta, _ := utils.ReadLockMeta(cacheLockPath); meta.PID != 0 { + out.Error("Compilation already in progress for this project") + } else { + out.Error("Another scan is currently analyzing this project") + } suggest("To scan an existing model instead", utils.NewScanCommand("").WithProjectModel("").Build()) os.Exit(1) } From ac5685600e2904dc7b34967756ee7270e5996db2 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 16:33:53 +0300 Subject: [PATCH 14/15] refactor(cli): Apply review polish for cache lock and marker helpers Lowercase the Downgrade non-exclusive error to follow Go convention (output.Humanize capitalizes at the display boundary), drop the redundant Downgrade no-op test that duplicated the happy path, and route cached-marker path through CompileCompleteMarkerPath so callers and tests share one source of truth for the marker location. --- cli/internal/utils/lock.go | 4 ++-- cli/internal/utils/lock_test.go | 27 -------------------------- cli/internal/utils/model_cache.go | 2 +- cli/internal/utils/model_cache_test.go | 4 ++-- 4 files changed, 5 insertions(+), 32 deletions(-) diff --git a/cli/internal/utils/lock.go b/cli/internal/utils/lock.go index 4de3374f..f47c7f36 100644 --- a/cli/internal/utils/lock.go +++ b/cli/internal/utils/lock.go @@ -83,12 +83,12 @@ func TryLockShared(lockPath string) (*FileLock, error) { // // Metadata is truncated *before* the kernel mode transition so that any // concurrent prune that fails to acquire exclusive can never see stale -// writer PID under a shared lock. If the truncate fails, the kernel mode +// writer PID under a shared lock. If the truncate fails the kernel mode // is not touched and the handle remains exclusively held — the caller can // continue as if Downgrade were never called. func (l *FileLock) Downgrade() error { if !l.exclusive { - return errors.New("Downgrade called on non-exclusive lock") + return errors.New("downgrade called on non-exclusive lock") } if err := os.Truncate(l.path, 0); err != nil { return fmt.Errorf("failed to clear lock metadata on downgrade: %w", err) diff --git a/cli/internal/utils/lock_test.go b/cli/internal/utils/lock_test.go index 2c99c981..77fb7266 100644 --- a/cli/internal/utils/lock_test.go +++ b/cli/internal/utils/lock_test.go @@ -211,33 +211,6 @@ func TestDowngrade(t *testing.T) { t.Fatal("expected error from Downgrade on shared lock") } }) - - t.Run("downgrade truncates meta even if RLock fails (no-op test)", func(t *testing.T) { - // We can't easily force flock.RLock to fail in a unit test, but we - // can assert the error contract: on any Downgrade error, meta must - // still be either fully cleared or fully intact — never partially. - // The actual ordering is tested implicitly by the "downgrade clears - // stamped meta" case. This test documents the intent. - path := filepath.Join(t.TempDir(), "d.lock") - w, err := TryLockExclusive(path, LockMeta{PID: 99, Command: "compile"}) - if err != nil { - t.Fatalf("TryLockExclusive() error = %v", err) - } - defer w.Unlock() - - if err := w.Downgrade(); err != nil { - t.Fatalf("Downgrade() error = %v", err) - } - - // File must be empty (truncation happened). - data, err := os.ReadFile(path) - if err != nil { - t.Fatal(err) - } - if len(data) != 0 { - t.Errorf("expected empty meta after Downgrade, got %q", string(data)) - } - }) } func TestReadLockMeta(t *testing.T) { diff --git a/cli/internal/utils/model_cache.go b/cli/internal/utils/model_cache.go index c4d2bf8b..d6ba3a97 100644 --- a/cli/internal/utils/model_cache.go +++ b/cli/internal/utils/model_cache.go @@ -129,7 +129,7 @@ func IsCachedModelComplete(cacheDir string) bool { if _, err := os.Stat(filepath.Join(pm, "project.yaml")); err != nil { return false } - if _, err := os.Stat(filepath.Join(pm, compileCompleteMarker)); err != nil { + if _, err := os.Stat(CompileCompleteMarkerPath(cacheDir)); err != nil { return false } return true diff --git a/cli/internal/utils/model_cache_test.go b/cli/internal/utils/model_cache_test.go index 79882a85..2ad6540c 100644 --- a/cli/internal/utils/model_cache_test.go +++ b/cli/internal/utils/model_cache_test.go @@ -203,7 +203,7 @@ func TestIsCachedModelComplete(t *testing.T) { t.Run("false when project.yaml missing", func(t *testing.T) { cacheDir := t.TempDir() // Only the marker, no project.yaml. - createTestFile(t, filepath.Join(cacheDir, "project-model", ".compile-complete"), 0) + createTestFile(t, CompileCompleteMarkerPath(cacheDir), 0) if IsCachedModelComplete(cacheDir) { t.Error("missing project.yaml should make cache incomplete") } @@ -221,7 +221,7 @@ func TestIsCachedModelComplete(t *testing.T) { t.Run("true when both present", func(t *testing.T) { cacheDir := t.TempDir() createTestFile(t, filepath.Join(cacheDir, "project-model", "project.yaml"), 10) - createTestFile(t, filepath.Join(cacheDir, "project-model", ".compile-complete"), 0) + createTestFile(t, CompileCompleteMarkerPath(cacheDir), 0) if !IsCachedModelComplete(cacheDir) { t.Error("both files present should mean complete") } From 7a084aae4b549374ba0484acdb9b2f95fee248b1 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Fri, 17 Apr 2026 16:48:08 +0300 Subject: [PATCH 15/15] fix(cli): Remove project cache dir after pruning its model The .cache.lock file created during the prune scan remained in the project cache directory after DeleteArtifacts removed the model, so removeEmptyParents could never clear the now-orphaned project dir. Re-acquire the cache lock exclusively before deletion and remove the whole project cache dir (model + lock file) in one shot; fall back to removing only project-model/ if another holder grabbed the lock between scan and delete. --- cli/internal/utils/prune.go | 41 +++++++++++++++++++++++++- cli/internal/utils/prune_test.go | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/cli/internal/utils/prune.go b/cli/internal/utils/prune.go index 85dd1333..e0d3db7d 100644 --- a/cli/internal/utils/prune.go +++ b/cli/internal/utils/prune.go @@ -272,10 +272,20 @@ func DeleteArtifacts(artifacts []StaleArtifact) error { emptyDirCandidates := map[string]struct{}{} for _, artifact := range artifacts { + if artifact.Kind == StaleKindModel { + modelsParent, err := deleteModelArtifact(artifact.Path) + if err != nil { + return err + } + if modelsParent != "" { + emptyDirCandidates[modelsParent] = struct{}{} + } + continue + } if err := os.RemoveAll(artifact.Path); err != nil { return fmt.Errorf("failed to remove %s: %w", artifact.Path, err) } - if artifact.Kind == StaleKindModel || artifact.Kind == StaleKindLog { + if artifact.Kind == StaleKindLog { emptyDirCandidates[filepath.Dir(artifact.Path)] = struct{}{} } } @@ -288,6 +298,35 @@ func DeleteArtifacts(artifacts []StaleArtifact) error { return nil } +// deleteModelArtifact removes a cached project-model and, when the cache lock +// is still ours to take, the entire project cache directory (including the +// residual .cache.lock left over from the prior scan). Returns the models/ +// parent directory as a candidate for empty-parent cleanup, or "" if the +// project directory must remain intact (another holder acquired the lock). +func deleteModelArtifact(modelPath string) (string, error) { + projectDir := filepath.Dir(modelPath) + lock, err := TryLockExclusive(CacheLockPath(projectDir), LockMeta{ + PID: os.Getpid(), + Command: "prune", + }) + if err != nil { + // Another process grabbed the cache between scan and delete. + // Fall back to removing only the model; the holder keeps its lock. + if rmErr := os.RemoveAll(modelPath); rmErr != nil { + return "", fmt.Errorf("failed to remove %s: %w", modelPath, rmErr) + } + return "", nil + } + // Remove the whole project cache dir while holding exclusive. The lock + // file is unlinked here; our fd stays valid until Unlock closes it. + if err := os.RemoveAll(projectDir); err != nil { + lock.Unlock() + return "", fmt.Errorf("failed to remove %s: %w", projectDir, err) + } + lock.Unlock() + return filepath.Dir(projectDir), nil +} + // removeEmptyParents attempts to remove dir and its parent if they are empty. // Stops at the first non-empty or non-removable directory. func removeEmptyParents(dir string) { diff --git a/cli/internal/utils/prune_test.go b/cli/internal/utils/prune_test.go index b7dce985..d09eca08 100644 --- a/cli/internal/utils/prune_test.go +++ b/cli/internal/utils/prune_test.go @@ -93,6 +93,55 @@ func TestDeleteArtifacts(t *testing.T) { } }) + t.Run("cleans up project dir with residual cache lock", func(t *testing.T) { + tmpDir := t.TempDir() + cacheDir := filepath.Join(tmpDir, "cache") + projectDir := filepath.Join(cacheDir, "my-project-a1b2c3d4") + pmDir := filepath.Join(projectDir, "project-model") + createTestFile(t, filepath.Join(pmDir, "project.yaml"), 50) + // Simulate the .cache.lock file a prior scan left behind. + createTestFile(t, CacheLockPath(projectDir), 0) + + artifacts := []StaleArtifact{{Path: pmDir, Size: 50, Kind: StaleKindModel}} + if err := DeleteArtifacts(artifacts); err != nil { + t.Fatalf("DeleteArtifacts() error = %v", err) + } + + if _, err := os.Stat(projectDir); !os.IsNotExist(err) { + t.Errorf("expected project cache dir to be removed, got err=%v", err) + } + if _, err := os.Stat(cacheDir); !os.IsNotExist(err) { + t.Errorf("expected empty cache dir to be removed, got err=%v", err) + } + }) + + t.Run("preserves project dir when cache lock is held elsewhere", func(t *testing.T) { + tmpDir := t.TempDir() + cacheDir := filepath.Join(tmpDir, "cache") + projectDir := filepath.Join(cacheDir, "my-project-a1b2c3d4") + pmDir := filepath.Join(projectDir, "project-model") + createTestFile(t, filepath.Join(pmDir, "project.yaml"), 50) + + // Another holder grabbed the lock between scan and delete. + sharedLock, err := TryLockShared(CacheLockPath(projectDir)) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer sharedLock.Unlock() + + artifacts := []StaleArtifact{{Path: pmDir, Size: 50, Kind: StaleKindModel}} + if err := DeleteArtifacts(artifacts); err != nil { + t.Fatalf("DeleteArtifacts() error = %v", err) + } + + if _, err := os.Stat(pmDir); !os.IsNotExist(err) { + t.Errorf("expected project-model to be removed, got err=%v", err) + } + if _, err := os.Stat(projectDir); err != nil { + t.Errorf("expected project cache dir to remain (lock held), got err=%v", err) + } + }) + t.Run("empty slice is no-op", func(t *testing.T) { if err := DeleteArtifacts(nil); err != nil { t.Fatalf("DeleteArtifacts(nil) error = %v", err)