diff --git a/cli/cmd/prune.go b/cli/cmd/prune.go index 4c5e2b37..a4da4e6b 100644 --- a/cli/cmd/prune.go +++ b/cli/cmd/prune.go @@ -63,13 +63,13 @@ var pruneCmd = &cobra.Command{ Identifies artifacts that are no longer needed: - Old versions of analyzer JARs, autobuilder JARs, and rules - Downloaded JDK/JRE versions that don't match the current version -- Cached project models and staging directories +- Cached project models Use category flags to prune selectively: --artifacts Stale analyzer and autobuilder JARs --rules Stale rules directories --jdk Old JDK/JRE versions - --models Cached project models and staging directories + --models Cached project models --logs Project log files --install Install-tier lib and JRE artifacts (requires re-download) @@ -86,7 +86,7 @@ With --all: prunes everything including logs and install-tier.`, if err != nil { out.Fatalf("Failed to resolve prune lock path: %s", err) } - pruneLock, err := utils.TryLock(pruneLockPath, utils.LockMeta{ + pruneLock, err := utils.TryLockExclusive(pruneLockPath, utils.LockMeta{ PID: os.Getpid(), Command: "prune", }) @@ -158,7 +158,7 @@ func init() { pruneCmd.Flags().BoolVar(&pruneArtifacts, "artifacts", false, "Prune stale analyzer and autobuilder JARs") pruneCmd.Flags().BoolVar(&pruneRules, "rules", false, "Prune stale rules directories") pruneCmd.Flags().BoolVar(&pruneJDK, "jdk", false, "Prune old JDK/JRE versions") - pruneCmd.Flags().BoolVar(&pruneModels, "models", false, "Prune cached project models and staging directories") + pruneCmd.Flags().BoolVar(&pruneModels, "models", false, "Prune cached project models") pruneCmd.Flags().BoolVar(&pruneLogs, "logs", false, "Prune project log files") pruneCmd.Flags().BoolVar(&pruneInstall, "install", false, "Prune install-tier lib and JRE artifacts (requires re-download on next run)") } diff --git a/cli/cmd/scan.go b/cli/cmd/scan.go index 4311b012..f6e8b6f4 100644 --- a/cli/cmd/scan.go +++ b/cli/cmd/scan.go @@ -65,11 +65,10 @@ func (m ScanMode) String() string { // scanConfig holds the resolved paths and flags for a scan invocation. type scanConfig struct { mode ScanMode - absProjectModel string // absolute path to the project model (cached or staging) + absProjectModel string // absolute path to the project model (always the cache dir when projectCachePath is set) projectCachePath string // cache dir for this project (empty for explicit model / dry-run) - stagingDir string // staging dir path (empty when not compiling or dry-run) needsCompilation bool // true when compilation is needed before scanning - compileLock *utils.FileLock + cacheLock *utils.FileLock } // scanCmd represents the scan command @@ -161,8 +160,8 @@ func scan(cmd *cobra.Command) { cfg := resolveScanConfig(absUserProjectRoot) defer func() { - if cfg.compileLock != nil { - cfg.compileLock.Unlock() + if cfg.cacheLock != nil { + cfg.cacheLock.Unlock() } }() @@ -173,12 +172,6 @@ func scan(cmd *cobra.Command) { absProjectModelPath := cfg.absProjectModel - cleanupStaging := func() { - if cfg.stagingDir != "" { - utils.CleanupStagingDir(cfg.stagingDir) - } - } - var absRuleSetPaths []RulesetType var userRuleSetPath = Ruleset @@ -278,31 +271,35 @@ func scan(cmd *cobra.Command) { out.Fatalf("Failed to resolve Java for compilation: %s", err) } + // Wipe any residue from a prior crashed compile before writing new output. + if cfg.projectCachePath != "" { + if err := os.RemoveAll(cfg.absProjectModel); err != nil { + out.Fatalf("Failed to prepare cache directory: %s", err) + } + } + if err := out.RunWithSpinner("Compiling project model", func() error { return compile(absUserProjectRoot, cfg.absProjectModel, autobuilderJarPath, compileJavaRunner, Internal) }); err != nil { - cleanupStaging() + if cfg.projectCachePath != "" { + _ = os.RemoveAll(cfg.absProjectModel) + } out.Error("Native compile has failed: " + err.Error()) suggest("If native compilation fails due to missing required Java, set JAVA_HOME according to the project's requirements or try Docker-based scan:", utils.BuildScanCommandWithDocker(currentScanBuilder(""), absUserProjectRoot, absSarifReportPath, Ruleset)) os.Exit(1) } out.Blank() - // Promote staging to cache + // Mark the cache as valid, then downgrade to a reader so other scans + // can run the analyzer against the freshly-compiled model in parallel. if cfg.projectCachePath != "" { - if err := utils.PromoteStagingToCache(cfg.projectCachePath, cfg.stagingDir); err != nil { - output.LogInfof("Failed to promote staging to cache: %v", err) - } else { - cfg.stagingDir = "" // staging dir no longer exists - absProjectModelPath = utils.CachedProjectModelPath(cfg.projectCachePath) - output.LogDebugf("Model cached at: %s", absProjectModelPath) + if err := utils.MarkCompileComplete(cfg.projectCachePath); err != nil { + _ = os.RemoveAll(cfg.absProjectModel) + out.Fatalf("Failed to mark model complete: %s", err) + } + if err := cfg.cacheLock.Downgrade(); err != nil { + output.LogInfof("Cache lock downgrade failed, continuing under exclusive: %v", err) } - } - - // Recompute SARIF path against the promoted model path - if SarifReportPath == "" { - absSarifReportPath = utils.DefaultSarifReportPath(absProjectModelPath) - sarifReportName = filepath.Base(absSarifReportPath) } printCompileSummary(absProjectModelPath) @@ -358,7 +355,6 @@ func scan(cmd *cobra.Command) { if err := out.RunWithSpinner("Analyzing project", func() error { return scanProject(nativeBuilder, analyzerJavaRunner) }); err != nil { - cleanupStaging() out.Fatalf("Native scan has failed: %s", err) } @@ -413,42 +409,58 @@ func resolveScanConfig(absUserProjectRoot string) scanConfig { } cachedModelPath := utils.CachedProjectModelPath(projectCachePath) - if !Recompile { - if _, serr := os.Stat(filepath.Join(cachedModelPath, "project.yaml")); serr == nil { - output.LogDebugf("Reusing cached model at: %s", cachedModelPath) - return scanConfig{ - mode: Scan, - absProjectModel: cachedModelPath, - projectCachePath: projectCachePath, + cacheLockPath := utils.CacheLockPath(projectCachePath) + + // Fast path: if we're not forced to recompile and the cache looks + // complete on disk, take a shared lock and re-check under the lock. + if !Recompile && utils.IsCachedModelComplete(projectCachePath) { + sharedLock, sharedErr := utils.TryLockShared(cacheLockPath) + if sharedErr == nil { + if utils.IsCachedModelComplete(projectCachePath) { + output.LogDebugf("Reusing cached model at: %s", cachedModelPath) + return scanConfig{ + mode: Scan, + absProjectModel: cachedModelPath, + projectCachePath: projectCachePath, + cacheLock: sharedLock, + } } + // Marker vanished between the outer check and the lock + // (writer raced ahead of us). Fall through to compile path. + sharedLock.Unlock() + } else if sharedErr != utils.ErrLocked { + out.Fatalf("Failed to acquire cache read lock: %s", sharedErr) } + // sharedErr == ErrLocked means a writer holds the cache; we're about + // to ask for exclusive below, which will also fail with ErrLocked — + // ReadLockMeta below will surface which command is holding it. } - compileLock, lockErr := utils.TryLock( - utils.CompileLockPath(projectCachePath), + cacheLock, lockErr := utils.TryLockExclusive( + cacheLockPath, utils.LockMeta{PID: os.Getpid(), Command: "compile", Project: absUserProjectRoot}, ) if lockErr == utils.ErrLocked { - out.Error("Compilation already in progress for this project") + // Readers don't stamp metadata (empty LockMeta); writers do. Use that + // to distinguish an in-progress compile from an in-progress analyze. + if meta, _ := utils.ReadLockMeta(cacheLockPath); meta.PID != 0 { + out.Error("Compilation already in progress for this project") + } else { + out.Error("Another scan is currently analyzing this project") + } suggest("To scan an existing model instead", utils.NewScanCommand("").WithProjectModel("").Build()) os.Exit(1) } if lockErr != nil { - out.Fatalf("Failed to acquire compile lock: %s", lockErr) - } - - stagingDir, serr := utils.CreateStagingDir(projectCachePath) - if serr != nil { - out.Fatalf("Failed to create staging directory: %s", serr) + out.Fatalf("Failed to acquire cache lock: %s", lockErr) } return scanConfig{ mode: CompileAndScan, - absProjectModel: filepath.Join(stagingDir, "project-model"), + absProjectModel: cachedModelPath, projectCachePath: projectCachePath, - stagingDir: stagingDir, needsCompilation: true, - compileLock: compileLock, + cacheLock: cacheLock, } } @@ -462,7 +474,7 @@ func printScanInfo(cmd *cobra.Command, cfg scanConfig, absSemgrepRuleLoadTracePa if cfg.needsCompilation { sb.Field("Project", absUserProjectRoot) if cfg.projectCachePath != "" { - sb.Field("Project model", utils.CachedProjectModelPath(cfg.projectCachePath)) + sb.Field("Project model", cfg.absProjectModel) } } else { sb.Field("Project model", cfg.absProjectModel) diff --git a/cli/internal/utils/lock.go b/cli/internal/utils/lock.go index 779a111c..f47c7f36 100644 --- a/cli/internal/utils/lock.go +++ b/cli/internal/utils/lock.go @@ -11,32 +11,30 @@ import ( "github.com/gofrs/flock" ) -// ErrLocked is returned when a lock file is already held by another process. +// ErrLocked is returned when a lock file is already held by another process +// in an incompatible mode. var ErrLocked = errors.New("lock is held by another process") -// LockMeta holds diagnostic information written into lock files. +// LockMeta holds diagnostic information written into lock files by exclusive +// (writer) holders. It is zero for shared (reader) holders. type LockMeta struct { PID int Command string Project string } -// FileLock wraps a flock.Flock with its path for cleanup. +// FileLock wraps a flock.Flock with its path and the mode it was acquired in. type FileLock struct { - flock *flock.Flock - path string + flock *flock.Flock + path string + exclusive bool } -// Unlock releases the advisory lock and removes the lock file. -func (l *FileLock) Unlock() { - _ = l.flock.Unlock() - _ = os.Remove(l.path) -} - -// TryLock attempts a non-blocking exclusive lock on the given path. -// On success it writes meta into the file and returns a FileLock. -// On failure because the lock is held, it returns ErrLocked. -func TryLock(lockPath string, meta LockMeta) (*FileLock, error) { +// TryLockExclusive attempts a non-blocking LOCK_EX on the given path. On +// success it stamps meta into the file and returns a FileLock whose Unlock +// will clear the file content before releasing the kernel lock. On failure +// because another exclusive or shared holder exists, returns ErrLocked. +func TryLockExclusive(lockPath string, meta LockMeta) (*FileLock, error) { if err := os.MkdirAll(filepath.Dir(lockPath), 0o755); err != nil { return nil, fmt.Errorf("failed to create lock directory: %w", err) } @@ -44,7 +42,7 @@ func TryLock(lockPath string, meta LockMeta) (*FileLock, error) { fl := flock.New(lockPath) locked, err := fl.TryLock() if err != nil { - return nil, fmt.Errorf("failed to acquire lock: %w", err) + return nil, fmt.Errorf("failed to acquire exclusive lock: %w", err) } if !locked { return nil, ErrLocked @@ -56,10 +54,73 @@ func TryLock(lockPath string, meta LockMeta) (*FileLock, error) { } _ = os.WriteFile(lockPath, []byte(content), 0o644) - return &FileLock{flock: fl, path: lockPath}, nil + return &FileLock{flock: fl, path: lockPath, exclusive: true}, nil +} + +// TryLockShared attempts a non-blocking LOCK_SH on the given path. Multiple +// shared holders can coexist. Returns ErrLocked if an exclusive holder is +// present. The file content is not modified. +func TryLockShared(lockPath string) (*FileLock, error) { + if err := os.MkdirAll(filepath.Dir(lockPath), 0o755); err != nil { + return nil, fmt.Errorf("failed to create lock directory: %w", err) + } + + fl := flock.New(lockPath) + locked, err := fl.TryRLock() + if err != nil { + return nil, fmt.Errorf("failed to acquire shared lock: %w", err) + } + if !locked { + return nil, ErrLocked + } + + return &FileLock{flock: fl, path: lockPath, exclusive: false}, nil +} + +// Downgrade atomically converts a held LOCK_EX to LOCK_SH on the same fd and +// truncates the metadata file. After Downgrade the handle may still be +// released by Unlock. Calling Downgrade on a shared handle returns an error. +// +// Metadata is truncated *before* the kernel mode transition so that any +// concurrent prune that fails to acquire exclusive can never see stale +// writer PID under a shared lock. If the truncate fails the kernel mode +// is not touched and the handle remains exclusively held — the caller can +// continue as if Downgrade were never called. +func (l *FileLock) Downgrade() error { + if !l.exclusive { + return errors.New("downgrade called on non-exclusive lock") + } + if err := os.Truncate(l.path, 0); err != nil { + return fmt.Errorf("failed to clear lock metadata on downgrade: %w", err) + } + // gofrs/flock.RLock on a held exclusive fd issues unix.Flock(fd, LOCK_SH) + // on the same descriptor, which is an atomic downgrade on POSIX. After + // this call, gofrs internally has both its f.l and f.r fields set to + // true; that is harmless because its Unlock path issues LOCK_UN whenever + // either flag is set. + if err := l.flock.RLock(); err != nil { + return fmt.Errorf("failed to downgrade lock: %w", err) + } + l.exclusive = false + return nil +} + +// Unlock releases the advisory lock. For exclusive holders it first truncates +// the metadata so prune's diagnostic output does not show a stale writer PID. +// The lock file itself is not removed — removing it while other shared +// holders still have it open would let a subsequent acquirer create a new +// inode and silently bypass mutual exclusion. +func (l *FileLock) Unlock() { + if l.exclusive { + _ = os.Truncate(l.path, 0) + l.exclusive = false + } + _ = l.flock.Unlock() } -// ReadLockMeta reads diagnostic metadata from a lock file. +// ReadLockMeta reads diagnostic metadata from a lock file. Returns an empty +// LockMeta when the file exists but has no content (shared holder, or after +// an exclusive holder's Unlock/Downgrade). func ReadLockMeta(lockPath string) (LockMeta, error) { data, err := os.ReadFile(lockPath) if err != nil { @@ -83,7 +144,7 @@ func ReadLockMeta(lockPath string) (LockMeta, error) { return meta, nil } -// PruneLockPath returns the path to the global prune lock: ~/.opentaint/.prune.lock +// PruneLockPath returns ~/.opentaint/.prune.lock. func PruneLockPath() (string, error) { home, err := GetOpenTaintHomePath() if err != nil { @@ -92,8 +153,8 @@ func PruneLockPath() (string, error) { return filepath.Join(home, ".prune.lock"), nil } -// CompileLockPath returns the path to a per-project compile lock: -// /.compile.lock -func CompileLockPath(projectCachePath string) string { - return filepath.Join(projectCachePath, ".compile.lock") +// CacheLockPath returns /.cache.lock — the reader/writer +// lock gating access to the compiled project model. +func CacheLockPath(projectCachePath string) string { + return filepath.Join(projectCachePath, ".cache.lock") } diff --git a/cli/internal/utils/lock_test.go b/cli/internal/utils/lock_test.go index eadcae47..77fb7266 100644 --- a/cli/internal/utils/lock_test.go +++ b/cli/internal/utils/lock_test.go @@ -6,68 +6,246 @@ import ( "testing" ) -func TestTryLock(t *testing.T) { - t.Run("acquires lock on new file", func(t *testing.T) { - lockPath := filepath.Join(t.TempDir(), "test.lock") - lock, err := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "test"}) +func TestTryLockExclusive(t *testing.T) { + t.Run("acquires on new file", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + lock, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "test"}) if err != nil { - t.Fatalf("TryLock() error = %v", err) + t.Fatalf("TryLockExclusive() error = %v", err) } defer lock.Unlock() }) - t.Run("second lock on same file returns ErrLocked", func(t *testing.T) { - lockPath := filepath.Join(t.TempDir(), "test.lock") - lock1, err := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "first"}) + t.Run("second exclusive returns ErrLocked", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + l1, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "first"}) if err != nil { - t.Fatalf("first TryLock() error = %v", err) + t.Fatalf("first TryLockExclusive() error = %v", err) } - defer lock1.Unlock() + defer l1.Unlock() - _, err = TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "second"}) + _, err = TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "second"}) if err != ErrLocked { t.Fatalf("expected ErrLocked, got %v", err) } }) - t.Run("lock released after Unlock allows re-acquisition", func(t *testing.T) { - lockPath := filepath.Join(t.TempDir(), "test.lock") - lock1, err := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "first"}) + t.Run("exclusive held blocks shared", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + l1, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "w"}) if err != nil { - t.Fatalf("first TryLock() error = %v", err) + t.Fatalf("TryLockExclusive() error = %v", err) } - lock1.Unlock() + defer l1.Unlock() - lock2, err := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "second"}) + if _, err := TryLockShared(path); err != ErrLocked { + t.Fatalf("expected ErrLocked from TryLockShared, got %v", err) + } + }) + + t.Run("unlock clears stamped meta", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + l, err := TryLockExclusive(path, LockMeta{PID: 42, Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + l.Unlock() + + data, err := os.ReadFile(path) if err != nil { - t.Fatalf("second TryLock() error = %v", err) + t.Fatalf("lock file should still exist after Unlock: %v", err) + } + if len(data) != 0 { + t.Errorf("expected empty file after writer Unlock, got %q", string(data)) + } + }) + + t.Run("unlock does not remove the file", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "x.lock") + l, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "test"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + l.Unlock() + + if _, err := os.Stat(path); err != nil { + t.Errorf("expected lock file to persist after Unlock, got %v", err) + } + }) +} + +func TestTryLockShared(t *testing.T) { + t.Run("acquires on new file", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.lock") + l, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer l.Unlock() + }) + + t.Run("two concurrent shared holders succeed", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.lock") + l1, err := TryLockShared(path) + if err != nil { + t.Fatalf("first TryLockShared() error = %v", err) + } + defer l1.Unlock() + + l2, err := TryLockShared(path) + if err != nil { + t.Fatalf("second TryLockShared() error = %v", err) + } + defer l2.Unlock() + }) + + t.Run("shared blocks exclusive", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.lock") + l, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer l.Unlock() + + _, err = TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "compile"}) + if err != ErrLocked { + t.Fatalf("expected ErrLocked, got %v", err) + } + }) + + t.Run("shared does not touch file contents", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.lock") + if err := os.WriteFile(path, []byte("pre-existing"), 0o644); err != nil { + t.Fatal(err) + } + + l, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer l.Unlock() + + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + if string(data) != "pre-existing" { + t.Errorf("shared acquire should not modify file, got %q", string(data)) + } + }) +} + +func TestDowngrade(t *testing.T) { + t.Run("downgrade allows new shared acquire", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "d.lock") + w, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + defer w.Unlock() + + // Before downgrade, shared is blocked. + if _, err := TryLockShared(path); err != ErrLocked { + t.Fatalf("expected ErrLocked before downgrade, got %v", err) + } + + if err := w.Downgrade(); err != nil { + t.Fatalf("Downgrade() error = %v", err) + } + + r, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() after downgrade error = %v", err) + } + defer r.Unlock() + }) + + t.Run("downgrade blocks new exclusive", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "d.lock") + w, err := TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + defer w.Unlock() + + if err := w.Downgrade(); err != nil { + t.Fatalf("Downgrade() error = %v", err) + } + + _, err = TryLockExclusive(path, LockMeta{PID: os.Getpid(), Command: "another"}) + if err != ErrLocked { + t.Fatalf("expected ErrLocked from exclusive-after-downgrade, got %v", err) + } + }) + + t.Run("downgrade clears stamped meta", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "d.lock") + w, err := TryLockExclusive(path, LockMeta{PID: 77, Command: "compile"}) + if err != nil { + t.Fatalf("TryLockExclusive() error = %v", err) + } + defer w.Unlock() + + if err := w.Downgrade(); err != nil { + t.Fatalf("Downgrade() error = %v", err) + } + + meta, err := ReadLockMeta(path) + if err != nil { + t.Fatalf("ReadLockMeta() error = %v", err) + } + if meta.PID != 0 || meta.Command != "" { + t.Errorf("expected empty meta after Downgrade, got %+v", meta) + } + }) + + t.Run("downgrade on shared lock errors", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "d.lock") + r, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer r.Unlock() + + if err := r.Downgrade(); err == nil { + t.Fatal("expected error from Downgrade on shared lock") } - defer lock2.Unlock() }) } func TestReadLockMeta(t *testing.T) { - t.Run("reads PID and command from lock file", func(t *testing.T) { - lockPath := filepath.Join(t.TempDir(), "test.lock") - meta := LockMeta{PID: 12345, Command: "compile", Project: "/tmp/my-project"} - lock, err := TryLock(lockPath, meta) + t.Run("reads stamped meta while writer holds", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "m.lock") + meta := LockMeta{PID: 12345, Command: "compile", Project: "/tmp/p"} + l, err := TryLockExclusive(path, meta) if err != nil { - t.Fatalf("TryLock() error = %v", err) + t.Fatalf("TryLockExclusive() error = %v", err) } - defer lock.Unlock() + defer l.Unlock() - got, err := ReadLockMeta(lockPath) + got, err := ReadLockMeta(path) if err != nil { t.Fatalf("ReadLockMeta() error = %v", err) } - if got.PID != 12345 { - t.Errorf("PID = %d, want 12345", got.PID) + if got.PID != 12345 || got.Command != "compile" || got.Project != "/tmp/p" { + t.Errorf("unexpected meta: %+v", got) } - if got.Command != "compile" { - t.Errorf("Command = %q, want %q", got.Command, "compile") + }) + + t.Run("returns empty LockMeta when only readers hold", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "m.lock") + r, err := TryLockShared(path) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer r.Unlock() + + got, err := ReadLockMeta(path) + if err != nil { + t.Fatalf("ReadLockMeta() error = %v", err) } - if got.Project != "/tmp/my-project" { - t.Errorf("Project = %q, want %q", got.Project, "/tmp/my-project") + if got.PID != 0 || got.Command != "" { + t.Errorf("expected empty meta, got %+v", got) } }) @@ -83,20 +261,20 @@ func TestPruneLockPath(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) - lockPath, err := PruneLockPath() + got, err := PruneLockPath() if err != nil { t.Fatalf("PruneLockPath() error = %v", err) } expected := filepath.Join(home, ".opentaint", ".prune.lock") - if lockPath != expected { - t.Errorf("got %q, want %q", lockPath, expected) + if got != expected { + t.Errorf("got %q, want %q", got, expected) } } -func TestCompileLockPath(t *testing.T) { - result := CompileLockPath("/home/user/.opentaint/cache/my-project-abc12345") - expected := "/home/user/.opentaint/cache/my-project-abc12345/.compile.lock" - if result != expected { - t.Errorf("got %q, want %q", result, expected) +func TestCacheLockPath(t *testing.T) { + got := CacheLockPath("/home/user/.opentaint/cache/my-project-abc12345") + expected := "/home/user/.opentaint/cache/my-project-abc12345/.cache.lock" + if got != expected { + t.Errorf("got %q, want %q", got, expected) } } diff --git a/cli/internal/utils/model_cache.go b/cli/internal/utils/model_cache.go index d89e827d..d6ba3a97 100644 --- a/cli/internal/utils/model_cache.go +++ b/cli/internal/utils/model_cache.go @@ -111,43 +111,39 @@ func CachedProjectModelPath(cacheDir string) string { return filepath.Join(cacheDir, projectModelDir) } -// CreateStagingDir creates a staging directory inside cacheDir for isolated compilation. -// Returns the path to the staging directory (e.g. /.staging-XXXX/). -func CreateStagingDir(cacheDir string) (string, error) { - stagingPath, err := os.MkdirTemp(cacheDir, ".staging-") - if err != nil { - return "", fmt.Errorf("failed to create staging directory: %w", err) - } - return stagingPath, nil +// compileCompleteMarker is the sentinel file written as the final step of a +// successful compile. Its presence proves the whole model is on disk. +const compileCompleteMarker = ".compile-complete" + +// CompileCompleteMarkerPath returns the absolute path to the compile-complete +// marker inside a cache directory. +func CompileCompleteMarkerPath(cacheDir string) string { + return filepath.Join(cacheDir, projectModelDir, compileCompleteMarker) } -// PromoteStagingToCache moves the compiled project-model/ from the staging -// directory into the cache, replacing any existing cached model. -// The caller must hold the compile lock to prevent concurrent promotions. -func PromoteStagingToCache(cacheDir, stagingPath string) error { - srcPM := filepath.Join(stagingPath, projectModelDir) - destPM := filepath.Join(cacheDir, projectModelDir) - - // Remove existing cached model if present - if err := os.RemoveAll(destPM); err != nil { - return fmt.Errorf("failed to remove old cached model: %w", err) +// IsCachedModelComplete reports whether cacheDir holds a fully-written +// project model. Both project.yaml and the compile-complete marker must +// exist. A crashed mid-compile leaves the marker absent and returns false. +func IsCachedModelComplete(cacheDir string) bool { + pm := CachedProjectModelPath(cacheDir) + if _, err := os.Stat(filepath.Join(pm, "project.yaml")); err != nil { + return false } - - // Move staging project-model/ to cache - if err := os.Rename(srcPM, destPM); err != nil { - return fmt.Errorf("failed to move staging model to cache: %w", err) + if _, err := os.Stat(CompileCompleteMarkerPath(cacheDir)); err != nil { + return false } - - // Remove the staging dir and any leftover files (e.g. build logs) - _ = os.RemoveAll(stagingPath) - - return nil + return true } -// CleanupStagingDir removes a staging directory and all its contents. -// Used when compilation fails and the staging output is not needed. -func CleanupStagingDir(stagingPath string) { - _ = os.RemoveAll(stagingPath) +// MarkCompileComplete writes the compile-complete marker as the very last +// step of a successful compile. The caller must have just populated +// /project-model/ and must hold the cache exclusive lock. +func MarkCompileComplete(cacheDir string) error { + markerPath := CompileCompleteMarkerPath(cacheDir) + if err := os.WriteFile(markerPath, nil, 0o644); err != nil { + return fmt.Errorf("failed to write compile-complete marker: %w", err) + } + return nil } // ProjectPathSlugHash returns a deterministic directory name for a project path. diff --git a/cli/internal/utils/model_cache_test.go b/cli/internal/utils/model_cache_test.go index a97b3479..2ad6540c 100644 --- a/cli/internal/utils/model_cache_test.go +++ b/cli/internal/utils/model_cache_test.go @@ -142,149 +142,6 @@ func TestGetProjectCachePath_SymlinksResolved(t *testing.T) { } } -func TestCreateStagingDir(t *testing.T) { - cacheDir := t.TempDir() - - stagingPath, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // Should be under cacheDir - if !strings.HasPrefix(stagingPath, cacheDir) { - t.Errorf("staging path %q should be under %q", stagingPath, cacheDir) - } - - // Directory name should start with .staging- - dirName := filepath.Base(stagingPath) - if !strings.HasPrefix(dirName, ".staging-") { - t.Errorf("expected .staging- prefix, got %q", dirName) - } - - // Staging dir itself should exist - info, err := os.Stat(stagingPath) - if err != nil { - t.Fatalf("staging dir not created: %v", err) - } - if !info.IsDir() { - t.Error("expected directory") - } -} - -func TestPromoteStagingToCache(t *testing.T) { - cacheDir := t.TempDir() - - // Create a staging dir with content - stagingPath, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - // Add a file to project-model to simulate compilation output - createTestFile(t, filepath.Join(stagingPath, "project-model", "project.yaml"), 10) - - // Promote - err = PromoteStagingToCache(cacheDir, stagingPath) - if err != nil { - t.Fatalf("PromoteStagingToCache() error = %v", err) - } - - // project-model directory should exist (not a symlink) - pmPath := filepath.Join(cacheDir, "project-model") - info, err := os.Lstat(pmPath) - if err != nil { - t.Fatalf("expected project-model at %s: %v", pmPath, err) - } - if !info.IsDir() { - t.Error("expected directory, not symlink or file") - } - - // The file should be accessible - data, err := os.ReadFile(filepath.Join(pmPath, "project.yaml")) - if err != nil { - t.Fatalf("failed to read project.yaml: %v", err) - } - if len(data) != 10 { - t.Errorf("expected 10 bytes, got %d", len(data)) - } - - // Staging dir should be cleaned up - if _, err := os.Stat(stagingPath); !os.IsNotExist(err) { - t.Error("staging dir should be removed after promotion") - } -} - -func TestPromoteStagingToCache_ReplacesExisting(t *testing.T) { - cacheDir := t.TempDir() - - // First promotion - staging1, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - createTestFile(t, filepath.Join(staging1, "project-model", "project.yaml"), 10) - if err := PromoteStagingToCache(cacheDir, staging1); err != nil { - t.Fatal(err) - } - - // Second promotion replaces the first - staging2, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - createTestFile(t, filepath.Join(staging2, "project-model", "project.yaml"), 20) - if err := PromoteStagingToCache(cacheDir, staging2); err != nil { - t.Fatal(err) - } - - // New content should be accessible (20 bytes from second promotion) - data, err := os.ReadFile(filepath.Join(cacheDir, "project-model", "project.yaml")) - if err != nil { - t.Fatalf("failed to read project.yaml: %v", err) - } - if len(data) != 20 { - t.Errorf("expected 20 bytes from second promotion, got %d", len(data)) - } -} - -func TestPromoteStagingToCache_CleansLeftoverFiles(t *testing.T) { - cacheDir := t.TempDir() - - stagingPath, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - // Compilation output - createTestFile(t, filepath.Join(stagingPath, "project-model", "project.yaml"), 10) - // Leftover file outside project-model/ (e.g. build log) - createTestFile(t, filepath.Join(stagingPath, "build.log"), 100) - - err = PromoteStagingToCache(cacheDir, stagingPath) - if err != nil { - t.Fatalf("PromoteStagingToCache() error = %v", err) - } - - // Staging dir should be fully removed despite leftover files - if _, err := os.Stat(stagingPath); !os.IsNotExist(err) { - t.Errorf("staging dir should be removed after promotion, but still exists") - } -} - -func TestCleanupStagingDir(t *testing.T) { - cacheDir := t.TempDir() - - stagingPath, err := CreateStagingDir(cacheDir) - if err != nil { - t.Fatal(err) - } - createTestFile(t, filepath.Join(stagingPath, "project-model", "file.txt"), 5) - - CleanupStagingDir(stagingPath) - - if _, err := os.Stat(stagingPath); !os.IsNotExist(err) { - t.Error("staging dir should be removed after cleanup") - } -} - func TestGetLogCacheDirPath(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) @@ -325,3 +182,68 @@ func TestGetProjectLogPath(t *testing.T) { t.Errorf("got %q, want %q", logPath, expected) } } + +func TestCompileCompleteMarkerPath(t *testing.T) { + cacheDir := "/home/user/.opentaint/cache/my-project-abc12345" + got := CompileCompleteMarkerPath(cacheDir) + expected := "/home/user/.opentaint/cache/my-project-abc12345/project-model/.compile-complete" + if got != expected { + t.Errorf("got %q, want %q", got, expected) + } +} + +func TestIsCachedModelComplete(t *testing.T) { + t.Run("false for non-existent cache", func(t *testing.T) { + cacheDir := t.TempDir() + if IsCachedModelComplete(cacheDir) { + t.Error("empty cache dir should not be complete") + } + }) + + t.Run("false when project.yaml missing", func(t *testing.T) { + cacheDir := t.TempDir() + // Only the marker, no project.yaml. + createTestFile(t, CompileCompleteMarkerPath(cacheDir), 0) + if IsCachedModelComplete(cacheDir) { + t.Error("missing project.yaml should make cache incomplete") + } + }) + + t.Run("false when marker missing", func(t *testing.T) { + cacheDir := t.TempDir() + // Only project.yaml, no marker. + createTestFile(t, filepath.Join(cacheDir, "project-model", "project.yaml"), 10) + if IsCachedModelComplete(cacheDir) { + t.Error("missing marker should make cache incomplete") + } + }) + + t.Run("true when both present", func(t *testing.T) { + cacheDir := t.TempDir() + createTestFile(t, filepath.Join(cacheDir, "project-model", "project.yaml"), 10) + createTestFile(t, CompileCompleteMarkerPath(cacheDir), 0) + if !IsCachedModelComplete(cacheDir) { + t.Error("both files present should mean complete") + } + }) +} + +func TestMarkCompileComplete(t *testing.T) { + cacheDir := t.TempDir() + // The caller always creates project-model/ before calling. + if err := os.MkdirAll(filepath.Join(cacheDir, "project-model"), 0o755); err != nil { + t.Fatal(err) + } + + if err := MarkCompileComplete(cacheDir); err != nil { + t.Fatalf("MarkCompileComplete() error = %v", err) + } + + info, err := os.Stat(CompileCompleteMarkerPath(cacheDir)) + if err != nil { + t.Fatalf("marker should exist: %v", err) + } + if info.Size() != 0 { + t.Errorf("marker should be empty, got size %d", info.Size()) + } +} diff --git a/cli/internal/utils/prune.go b/cli/internal/utils/prune.go index d5b6ec85..e0d3db7d 100644 --- a/cli/internal/utils/prune.go +++ b/cli/internal/utils/prune.go @@ -30,7 +30,7 @@ type StaleArtifact struct { Kind string } -// SkippedProject represents a project cache that was skipped because a compile lock was held. +// SkippedProject represents a project cache that was skipped because its cache lock was held. type SkippedProject struct { Path string Meta LockMeta @@ -51,7 +51,7 @@ func (r *PruneResult) Add(a StaleArtifact) { r.TotalCount++ } -// AddSkipped records a project that was skipped due to an active compile lock. +// AddSkipped records a project that was skipped due to an active cache lock holder. func (r *PruneResult) AddSkipped(s SkippedProject) { r.Skipped = append(r.Skipped, s) } @@ -211,15 +211,15 @@ func ScanForStaleArtifacts(categories PruneCategory) (*PruneResult, error) { continue } projectCachePath := filepath.Join(modelsDir, modelEntry.Name()) - lockPath := CompileLockPath(projectCachePath) - lock, lockErr := TryLock(lockPath, LockMeta{PID: os.Getpid(), Command: "prune"}) + lockPath := CacheLockPath(projectCachePath) + lock, lockErr := TryLockExclusive(lockPath, LockMeta{PID: os.Getpid(), Command: "prune"}) if lockErr == ErrLocked { meta, _ := ReadLockMeta(lockPath) result.AddSkipped(SkippedProject{Path: projectCachePath, Meta: meta}) continue } if lockErr != nil { - output.LogDebugf("Failed to probe compile lock for %s, skipping: %v", projectCachePath, lockErr) + output.LogDebugf("Failed to probe cache lock for %s, skipping: %v", projectCachePath, lockErr) continue } scanProjectCacheSubdirs(projectCachePath, result) @@ -252,25 +252,17 @@ func ScanForStaleArtifacts(categories PruneCategory) (*PruneResult, error) { return result, nil } -// scanProjectCacheSubdirs adds only project-model/ and .staging-* subdirs -// from a project cache directory, preserving logs and other data. +// scanProjectCacheSubdirs adds only the project-model/ subdir of a project +// cache directory so that adjacent files (lock, logs) are preserved. func scanProjectCacheSubdirs(projectCachePath string, result *PruneResult) { - entries, err := os.ReadDir(projectCachePath) - if err != nil { + pmPath := filepath.Join(projectCachePath, projectModelDir) + info, err := os.Stat(pmPath) + if err != nil || !info.IsDir() { return } - for _, entry := range entries { - if !entry.IsDir() { - continue - } - name := entry.Name() - if name == projectModelDir || strings.HasPrefix(name, ".staging-") { - subPath := filepath.Join(projectCachePath, name) - size, _ := dirSize(subPath) - if size > 0 { - result.Add(StaleArtifact{Path: subPath, Size: size, Kind: StaleKindModel}) - } - } + size, _ := dirSize(pmPath) + if size > 0 { + result.Add(StaleArtifact{Path: pmPath, Size: size, Kind: StaleKindModel}) } } @@ -280,10 +272,20 @@ func DeleteArtifacts(artifacts []StaleArtifact) error { emptyDirCandidates := map[string]struct{}{} for _, artifact := range artifacts { + if artifact.Kind == StaleKindModel { + modelsParent, err := deleteModelArtifact(artifact.Path) + if err != nil { + return err + } + if modelsParent != "" { + emptyDirCandidates[modelsParent] = struct{}{} + } + continue + } if err := os.RemoveAll(artifact.Path); err != nil { return fmt.Errorf("failed to remove %s: %w", artifact.Path, err) } - if artifact.Kind == StaleKindModel || artifact.Kind == StaleKindLog { + if artifact.Kind == StaleKindLog { emptyDirCandidates[filepath.Dir(artifact.Path)] = struct{}{} } } @@ -296,6 +298,35 @@ func DeleteArtifacts(artifacts []StaleArtifact) error { return nil } +// deleteModelArtifact removes a cached project-model and, when the cache lock +// is still ours to take, the entire project cache directory (including the +// residual .cache.lock left over from the prior scan). Returns the models/ +// parent directory as a candidate for empty-parent cleanup, or "" if the +// project directory must remain intact (another holder acquired the lock). +func deleteModelArtifact(modelPath string) (string, error) { + projectDir := filepath.Dir(modelPath) + lock, err := TryLockExclusive(CacheLockPath(projectDir), LockMeta{ + PID: os.Getpid(), + Command: "prune", + }) + if err != nil { + // Another process grabbed the cache between scan and delete. + // Fall back to removing only the model; the holder keeps its lock. + if rmErr := os.RemoveAll(modelPath); rmErr != nil { + return "", fmt.Errorf("failed to remove %s: %w", modelPath, rmErr) + } + return "", nil + } + // Remove the whole project cache dir while holding exclusive. The lock + // file is unlinked here; our fd stays valid until Unlock closes it. + if err := os.RemoveAll(projectDir); err != nil { + lock.Unlock() + return "", fmt.Errorf("failed to remove %s: %w", projectDir, err) + } + lock.Unlock() + return filepath.Dir(projectDir), nil +} + // removeEmptyParents attempts to remove dir and its parent if they are empty. // Stops at the first non-empty or non-removable directory. func removeEmptyParents(dir string) { diff --git a/cli/internal/utils/prune_test.go b/cli/internal/utils/prune_test.go index 87e6c52f..d09eca08 100644 --- a/cli/internal/utils/prune_test.go +++ b/cli/internal/utils/prune_test.go @@ -93,6 +93,55 @@ func TestDeleteArtifacts(t *testing.T) { } }) + t.Run("cleans up project dir with residual cache lock", func(t *testing.T) { + tmpDir := t.TempDir() + cacheDir := filepath.Join(tmpDir, "cache") + projectDir := filepath.Join(cacheDir, "my-project-a1b2c3d4") + pmDir := filepath.Join(projectDir, "project-model") + createTestFile(t, filepath.Join(pmDir, "project.yaml"), 50) + // Simulate the .cache.lock file a prior scan left behind. + createTestFile(t, CacheLockPath(projectDir), 0) + + artifacts := []StaleArtifact{{Path: pmDir, Size: 50, Kind: StaleKindModel}} + if err := DeleteArtifacts(artifacts); err != nil { + t.Fatalf("DeleteArtifacts() error = %v", err) + } + + if _, err := os.Stat(projectDir); !os.IsNotExist(err) { + t.Errorf("expected project cache dir to be removed, got err=%v", err) + } + if _, err := os.Stat(cacheDir); !os.IsNotExist(err) { + t.Errorf("expected empty cache dir to be removed, got err=%v", err) + } + }) + + t.Run("preserves project dir when cache lock is held elsewhere", func(t *testing.T) { + tmpDir := t.TempDir() + cacheDir := filepath.Join(tmpDir, "cache") + projectDir := filepath.Join(cacheDir, "my-project-a1b2c3d4") + pmDir := filepath.Join(projectDir, "project-model") + createTestFile(t, filepath.Join(pmDir, "project.yaml"), 50) + + // Another holder grabbed the lock between scan and delete. + sharedLock, err := TryLockShared(CacheLockPath(projectDir)) + if err != nil { + t.Fatalf("TryLockShared() error = %v", err) + } + defer sharedLock.Unlock() + + artifacts := []StaleArtifact{{Path: pmDir, Size: 50, Kind: StaleKindModel}} + if err := DeleteArtifacts(artifacts); err != nil { + t.Fatalf("DeleteArtifacts() error = %v", err) + } + + if _, err := os.Stat(pmDir); !os.IsNotExist(err) { + t.Errorf("expected project-model to be removed, got err=%v", err) + } + if _, err := os.Stat(projectDir); err != nil { + t.Errorf("expected project cache dir to remain (lock held), got err=%v", err) + } + }) + t.Run("empty slice is no-op", func(t *testing.T) { if err := DeleteArtifacts(nil); err != nil { t.Fatalf("DeleteArtifacts(nil) error = %v", err) @@ -475,28 +524,6 @@ func TestScanForStaleArtifacts_CachedModels(t *testing.T) { } }) - t.Run("stale staging dir is prunable", func(t *testing.T) { - home := t.TempDir() - t.Setenv("HOME", home) - projectDir := filepath.Join(home, ".opentaint", "cache", "my-project-a1b2c3d4") - stagingDir := filepath.Join(projectDir, ".staging-12345-9999") - createTestFile(t, filepath.Join(stagingDir, "project-model", "project.yaml"), 50) - - result, err := ScanForStaleArtifacts(PruneCategoriesDefault) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - assertHasKind(t, result, StaleKindModel) - // Should target the staging dir specifically - for _, s := range result.Stale { - if s.Kind == StaleKindModel { - if s.Path != stagingDir { - t.Errorf("expected path %q, got %q", stagingDir, s.Path) - } - } - } - }) - t.Run("empty models dir produces no stale", func(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) @@ -624,11 +651,11 @@ func TestScanForStaleArtifacts_LockedModelSkipped(t *testing.T) { projectDir := filepath.Join(home, ".opentaint", "cache", "my-project-a1b2c3d4") createTestFile(t, filepath.Join(projectDir, "project-model", "project.yaml"), 50) - // Hold the compile lock - lockPath := CompileLockPath(projectDir) - lock, err := TryLock(lockPath, LockMeta{PID: 99999, Command: "compile"}) + // Hold the cache lock exclusively (simulate in-progress compile). + lockPath := CacheLockPath(projectDir) + lock, err := TryLockExclusive(lockPath, LockMeta{PID: 99999, Command: "compile"}) if err != nil { - t.Fatalf("failed to acquire compile lock: %v", err) + t.Fatalf("failed to acquire cache lock: %v", err) } defer lock.Unlock() @@ -644,6 +671,36 @@ func TestScanForStaleArtifacts_LockedModelSkipped(t *testing.T) { t.Errorf("expected PID 99999, got %d", result.Skipped[0].Meta.PID) } }) + + t.Run("shared-locked project is skipped with empty meta", func(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + projectDir := filepath.Join(home, ".opentaint", "cache", "my-project-a1b2c3d4") + createTestFile(t, filepath.Join(projectDir, "project-model", "project.yaml"), 50) + + // Hold the cache lock as a reader (simulates an in-progress analyze). + lockPath := CacheLockPath(projectDir) + sharedLock, err := TryLockShared(lockPath) + if err != nil { + t.Fatalf("failed to acquire shared cache lock: %v", err) + } + defer sharedLock.Unlock() + + result, err := ScanForStaleArtifacts(PruneCategoryModels) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + assertNoKind(t, result, StaleKindModel) + if len(result.Skipped) != 1 { + t.Fatalf("expected 1 skipped, got %d", len(result.Skipped)) + } + if result.Skipped[0].Meta.PID != 0 { + t.Errorf("expected empty meta for reader-held lock, got PID %d", result.Skipped[0].Meta.PID) + } + if result.Skipped[0].Meta.Command != "" { + t.Errorf("expected empty command for reader-held lock, got %q", result.Skipped[0].Meta.Command) + } + }) } func TestPruneResult_AddSkipped(t *testing.T) { diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 9ec39ce7..f02f9347 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -101,12 +101,6 @@ This means another `opentaint scan` is currently compiling the same project. Wai opentaint scan --project-model /path/to/model ``` -If the previous process crashed and left a stale staging directory, prune cached models: - -```bash -opentaint prune -``` - ### Clearing the Model Cache To remove all cached project models (stored in `~/.opentaint/cache/`): diff --git a/docs/usage.md b/docs/usage.md index 316392dd..91934747 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -149,7 +149,7 @@ opentaint scan /path/to/project opentaint scan --recompile /path/to/project ``` -If a compilation is already in progress for the same project (detected via a staging directory), the scan aborts with an error instead of compiling concurrently. +If another scan is actively compiling the same project, the scan aborts with an error instead of compiling concurrently. Multiple read-only scans against the same cached model can run in parallel. To remove all cached models: