diff --git a/Sources/VistaCore/Indexer.swift b/Sources/VistaCore/Indexer.swift index d8b766c..86cfb23 100644 --- a/Sources/VistaCore/Indexer.swift +++ b/Sources/VistaCore/Indexer.swift @@ -213,8 +213,7 @@ public actor Indexer { let attrs = try? fm.attributesOfItem(atPath: url.path) let mtime = (attrs?[.modificationDate] as? Date) ?? Date() let size = (attrs?[.size] as? NSNumber)?.int64Value ?? 0 - if let existing = try? store.fingerprint(for: url), - existing.mtime == mtime, existing.size == size { + if (try? store.isAlreadyIndexed(at: url, mtime: mtime, size: size)) == true { continue } toIndex.append(url) @@ -313,8 +312,7 @@ public actor Indexer { // Skip unchanged files — the fingerprint check saves us an expensive // OCR pass for every file on every relaunch. - if let existing = try store.fingerprint(for: url), - existing.mtime == mtime, existing.size == size { + if try store.isAlreadyIndexed(at: url, mtime: mtime, size: size) { return } diff --git a/Sources/VistaCore/ScreenshotStore.swift b/Sources/VistaCore/ScreenshotStore.swift index 066d241..c99e031 100644 --- a/Sources/VistaCore/ScreenshotStore.swift +++ b/Sources/VistaCore/ScreenshotStore.swift @@ -148,6 +148,30 @@ public final class ScreenshotStore: @unchecked Sendable { // MARK: - Lookups used during incremental scan + /// Tolerance applied when comparing a file's current mtime against + /// the value we stored the last time we indexed it. Public so tests + /// can pin behaviour to the same constant instead of re-declaring + /// the magic number. + public static let mtimeTolerance: TimeInterval = 0.001 + + /// Whether the DB already holds an entry for `url` matching the given + /// `(mtime, size)`. Mtime is compared with `mtimeTolerance` (1 ms) to + /// survive the `Double` round-trip through SQLite's REAL column: at + /// year-2026 timestamps (~1.78e9 s) one Double ULP is ~400 ns, which + /// is coarser than APFS's ns-resolution mtime, so a Double can't + /// represent every distinct APFS timestamp and exact equality + /// sporadically fails. Before the tolerance, the indexer re-OCR'd a + /// random ~37 % subset of rows on every relaunch. `size` is still + /// compared exactly — a real edit almost always changes size, so the + /// slack on mtime doesn't mask genuine changes. + public func isAlreadyIndexed(at url: URL, mtime: Date, size: Int64) throws -> Bool { + guard let existing = try fingerprint(for: url) else { return false } + guard existing.size == size else { return false } + let drift = abs(existing.mtime.timeIntervalSinceReferenceDate + - mtime.timeIntervalSinceReferenceDate) + return drift <= Self.mtimeTolerance + } + /// Returns the (mtime, size) we last indexed for this path, or nil if /// unknown. Indexer uses this to skip unchanged files without reading /// pixels. diff --git a/Tests/VistaCoreTests/ScreenshotStoreTests.swift b/Tests/VistaCoreTests/ScreenshotStoreTests.swift index b08be5c..d4f54d1 100644 --- a/Tests/VistaCoreTests/ScreenshotStoreTests.swift +++ b/Tests/VistaCoreTests/ScreenshotStoreTests.swift @@ -134,6 +134,35 @@ final class ScreenshotStoreTests: XCTestCase { XCTAssertNil(try store.fingerprint(for: Self.sampleURL(name: "never-seen.png"))) } + // Regression: exact `Date == Date` after a round-trip through the REAL + // `mtime` column used to fail sporadically because a Double ULP at + // 2026 timestamps (~400 ns) is coarser than APFS's 1 ns mtime + // resolution — so a `Double` can't represent every distinct APFS + // timestamp. `isAlreadyIndexed` applies `ScreenshotStore.mtimeTolerance` + // (1 ms) so high-precision fractional mtimes still match after the + // round-trip. + func testIsAlreadyIndexedSurvivesDoublePrecisionRoundTrip() throws { + let path = Self.sampleURL(name: "Drift.png") + // Fractional seconds chosen so `.timeIntervalSince1970` can't be + // represented exactly as a Double — that's the shape filesystem + // mtimes actually have on APFS. + let mtime = Date(timeIntervalSince1970: 1_776_864_366.2835145) + let size: Int64 = 42_630 + try store.upsert(Self.sampleRecord(path: path, mtime: mtime, size: size, text: "")) + + XCTAssertTrue(try store.isAlreadyIndexed(at: path, mtime: mtime, size: size)) + XCTAssertFalse(try store.isAlreadyIndexed(at: path, mtime: mtime, size: size + 1)) + XCTAssertFalse(try store.isAlreadyIndexed(at: Self.sampleURL(name: "nope.png"), + mtime: mtime, size: size)) + + // Drift outside the tolerance window is treated as "changed". The + // epsilon (10 × tolerance) is deliberately well beyond the ~400 ns + // Double-round-trip slop so the assertion is deterministic across + // fractional mtime values. + let beyondTol = mtime.addingTimeInterval(ScreenshotStore.mtimeTolerance * 10) + XCTAssertFalse(try store.isAlreadyIndexed(at: path, mtime: beyondTol, size: size)) + } + // MARK: - Helpers private static func sampleURL(name: String) -> URL {