From 7a4588192e0e51f30ebfc40c3281f76f81618e04 Mon Sep 17 00:00:00 2001 From: Gordon Beeming Date: Thu, 23 Apr 2026 00:16:36 +1000 Subject: [PATCH 1/2] Fix relaunch re-OCR: 1 ms tolerance on mtime fingerprint check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified in production against a 4,756-row index: ~1,785 files were being re-OCR'd on every relaunch because the fingerprint guard used exact `Date == Date`. After the round-trip `Date → timeIntervalSince1970 → SQLite REAL → Date(timeIntervalSince1970:)` a Double ULP at 2026 timestamps (~400 ns at 1.78e9 s) drifts the value below APFS's ns-resolution mtime, so a random subset of rows stopped matching themselves. The existing unit test for `fingerprint(for:)` already compared with `accuracy: 0.001`, which is the tolerance production needed all along. Route both Indexer call sites through a new `ScreenshotStore.isAlreadyIndexed(at:mtime:size:)` that applies a 1 ms mtime tolerance and an exact size match. Size still catches real edits (which almost always change byte count), so the slack on mtime doesn't mask genuine changes. No schema change; existing rows benefit immediately on next launch. Added a regression test that upserts a high-precision fractional mtime (`Date(timeIntervalSince1970: 1_776_864_366.2835145)`) and asserts `isAlreadyIndexed` returns `true` for the same `(mtime, size)` and `false` when size differs. Co-authored-by: Claude Co-authored-by: GitButler --- Sources/VistaCore/Indexer.swift | 6 ++---- Sources/VistaCore/ScreenshotStore.swift | 17 ++++++++++++++++ .../VistaCoreTests/ScreenshotStoreTests.swift | 20 +++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/Sources/VistaCore/Indexer.swift b/Sources/VistaCore/Indexer.swift index d8b766c..86cfb23 100644 --- a/Sources/VistaCore/Indexer.swift +++ b/Sources/VistaCore/Indexer.swift @@ -213,8 +213,7 @@ public actor Indexer { let attrs = try? fm.attributesOfItem(atPath: url.path) let mtime = (attrs?[.modificationDate] as? Date) ?? Date() let size = (attrs?[.size] as? NSNumber)?.int64Value ?? 0 - if let existing = try? store.fingerprint(for: url), - existing.mtime == mtime, existing.size == size { + if (try? store.isAlreadyIndexed(at: url, mtime: mtime, size: size)) == true { continue } toIndex.append(url) @@ -313,8 +312,7 @@ public actor Indexer { // Skip unchanged files — the fingerprint check saves us an expensive // OCR pass for every file on every relaunch. - if let existing = try store.fingerprint(for: url), - existing.mtime == mtime, existing.size == size { + if try store.isAlreadyIndexed(at: url, mtime: mtime, size: size) { return } diff --git a/Sources/VistaCore/ScreenshotStore.swift b/Sources/VistaCore/ScreenshotStore.swift index 066d241..a49e0aa 100644 --- a/Sources/VistaCore/ScreenshotStore.swift +++ b/Sources/VistaCore/ScreenshotStore.swift @@ -148,6 +148,23 @@ public final class ScreenshotStore: @unchecked Sendable { // MARK: - Lookups used during incremental scan + /// Whether the DB already holds an entry for `url` matching the given + /// `(mtime, size)`. Mtime is compared with a 1 ms tolerance to survive + /// the `Double` round-trip through SQLite's REAL column: at year-2026 + /// timestamps (~1.78e9 s) one Double ULP is ~400 ns, which is below + /// APFS's ns-resolution mtime, so exact equality sporadically fails + /// and the indexer used to re-OCR a random ~37% subset of rows on + /// every relaunch. `size` is still compared exactly — a real edit + /// almost always changes size, so the tolerance on mtime doesn't + /// mask genuine changes. + public func isAlreadyIndexed(at url: URL, mtime: Date, size: Int64) throws -> Bool { + guard let existing = try fingerprint(for: url) else { return false } + guard existing.size == size else { return false } + let drift = abs(existing.mtime.timeIntervalSinceReferenceDate + - mtime.timeIntervalSinceReferenceDate) + return drift < 0.001 + } + /// Returns the (mtime, size) we last indexed for this path, or nil if /// unknown. Indexer uses this to skip unchanged files without reading /// pixels. diff --git a/Tests/VistaCoreTests/ScreenshotStoreTests.swift b/Tests/VistaCoreTests/ScreenshotStoreTests.swift index b08be5c..a576239 100644 --- a/Tests/VistaCoreTests/ScreenshotStoreTests.swift +++ b/Tests/VistaCoreTests/ScreenshotStoreTests.swift @@ -134,6 +134,26 @@ final class ScreenshotStoreTests: XCTestCase { XCTAssertNil(try store.fingerprint(for: Self.sampleURL(name: "never-seen.png"))) } + // Regression: exact `Date == Date` after a round-trip through the REAL + // `mtime` column used to fail sporadically because a Double ULP at + // 2026 timestamps (~400 ns) falls below APFS's ns-resolution mtime. + // The store now applies a 1 ms tolerance via `isAlreadyIndexed`, so + // high-precision fractional mtimes survive the round-trip. + func testIsAlreadyIndexedSurvivesDoublePrecisionRoundTrip() throws { + let path = Self.sampleURL(name: "Drift.png") + // Fractional seconds chosen so `.timeIntervalSince1970` can't be + // represented exactly as a Double — that's the shape filesystem + // mtimes actually have on APFS. + let mtime = Date(timeIntervalSince1970: 1_776_864_366.2835145) + let size: Int64 = 42_630 + try store.upsert(Self.sampleRecord(path: path, mtime: mtime, size: size, text: "")) + + XCTAssertTrue(try store.isAlreadyIndexed(at: path, mtime: mtime, size: size)) + XCTAssertFalse(try store.isAlreadyIndexed(at: path, mtime: mtime, size: size + 1)) + XCTAssertFalse(try store.isAlreadyIndexed(at: Self.sampleURL(name: "nope.png"), + mtime: mtime, size: size)) + } + // MARK: - Helpers private static func sampleURL(name: String) -> URL { From 93ced646d3250dcab17b699edd5d204dcef7d464 Mon Sep 17 00:00:00 2001 From: Gordon Beeming Date: Thu, 23 Apr 2026 00:24:55 +1000 Subject: [PATCH 2/2] Address Copilot review on #5: named tolerance + doc wording MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four bot-review follow-ups on the mtime tolerance fix: - Extract the 0.001 s literal into `ScreenshotStore.mtimeTolerance` so the value lives in one place and tests can reference it instead of re-declaring a magic number. - Switch the comparison from `drift < 0.001` to `drift <= Self.mtimeTolerance` so the doc's "1 ms tolerance" and the code agree on inclusive equality at exactly 1 ms. - Rewrite the "below APFS's ns-resolution mtime" phrase in both the `ScreenshotStore` doc comment and the regression test comment: a ~400 ns Double ULP is *coarser* than APFS's 1 ns resolution, which is what makes some timestamps unrepresentable as Double — the previous wording read backwards. - Added a drift-outside-tolerance assertion to the regression test using `mtimeTolerance * 10` as the epsilon (well beyond the ~400 ns round-trip slop, so deterministic). Co-authored-by: Claude Co-authored-by: GitButler --- Sources/VistaCore/ScreenshotStore.swift | 25 ++++++++++++------- .../VistaCoreTests/ScreenshotStoreTests.swift | 15 ++++++++--- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/Sources/VistaCore/ScreenshotStore.swift b/Sources/VistaCore/ScreenshotStore.swift index a49e0aa..c99e031 100644 --- a/Sources/VistaCore/ScreenshotStore.swift +++ b/Sources/VistaCore/ScreenshotStore.swift @@ -148,21 +148,28 @@ public final class ScreenshotStore: @unchecked Sendable { // MARK: - Lookups used during incremental scan + /// Tolerance applied when comparing a file's current mtime against + /// the value we stored the last time we indexed it. Public so tests + /// can pin behaviour to the same constant instead of re-declaring + /// the magic number. + public static let mtimeTolerance: TimeInterval = 0.001 + /// Whether the DB already holds an entry for `url` matching the given - /// `(mtime, size)`. Mtime is compared with a 1 ms tolerance to survive - /// the `Double` round-trip through SQLite's REAL column: at year-2026 - /// timestamps (~1.78e9 s) one Double ULP is ~400 ns, which is below - /// APFS's ns-resolution mtime, so exact equality sporadically fails - /// and the indexer used to re-OCR a random ~37% subset of rows on - /// every relaunch. `size` is still compared exactly — a real edit - /// almost always changes size, so the tolerance on mtime doesn't - /// mask genuine changes. + /// `(mtime, size)`. Mtime is compared with `mtimeTolerance` (1 ms) to + /// survive the `Double` round-trip through SQLite's REAL column: at + /// year-2026 timestamps (~1.78e9 s) one Double ULP is ~400 ns, which + /// is coarser than APFS's ns-resolution mtime, so a Double can't + /// represent every distinct APFS timestamp and exact equality + /// sporadically fails. Before the tolerance, the indexer re-OCR'd a + /// random ~37 % subset of rows on every relaunch. `size` is still + /// compared exactly — a real edit almost always changes size, so the + /// slack on mtime doesn't mask genuine changes. public func isAlreadyIndexed(at url: URL, mtime: Date, size: Int64) throws -> Bool { guard let existing = try fingerprint(for: url) else { return false } guard existing.size == size else { return false } let drift = abs(existing.mtime.timeIntervalSinceReferenceDate - mtime.timeIntervalSinceReferenceDate) - return drift < 0.001 + return drift <= Self.mtimeTolerance } /// Returns the (mtime, size) we last indexed for this path, or nil if diff --git a/Tests/VistaCoreTests/ScreenshotStoreTests.swift b/Tests/VistaCoreTests/ScreenshotStoreTests.swift index a576239..d4f54d1 100644 --- a/Tests/VistaCoreTests/ScreenshotStoreTests.swift +++ b/Tests/VistaCoreTests/ScreenshotStoreTests.swift @@ -136,9 +136,11 @@ final class ScreenshotStoreTests: XCTestCase { // Regression: exact `Date == Date` after a round-trip through the REAL // `mtime` column used to fail sporadically because a Double ULP at - // 2026 timestamps (~400 ns) falls below APFS's ns-resolution mtime. - // The store now applies a 1 ms tolerance via `isAlreadyIndexed`, so - // high-precision fractional mtimes survive the round-trip. + // 2026 timestamps (~400 ns) is coarser than APFS's 1 ns mtime + // resolution — so a `Double` can't represent every distinct APFS + // timestamp. `isAlreadyIndexed` applies `ScreenshotStore.mtimeTolerance` + // (1 ms) so high-precision fractional mtimes still match after the + // round-trip. func testIsAlreadyIndexedSurvivesDoublePrecisionRoundTrip() throws { let path = Self.sampleURL(name: "Drift.png") // Fractional seconds chosen so `.timeIntervalSince1970` can't be @@ -152,6 +154,13 @@ final class ScreenshotStoreTests: XCTestCase { XCTAssertFalse(try store.isAlreadyIndexed(at: path, mtime: mtime, size: size + 1)) XCTAssertFalse(try store.isAlreadyIndexed(at: Self.sampleURL(name: "nope.png"), mtime: mtime, size: size)) + + // Drift outside the tolerance window is treated as "changed". The + // epsilon (10 × tolerance) is deliberately well beyond the ~400 ns + // Double-round-trip slop so the assertion is deterministic across + // fractional mtime values. + let beyondTol = mtime.addingTimeInterval(ScreenshotStore.mtimeTolerance * 10) + XCTAssertFalse(try store.isAlreadyIndexed(at: path, mtime: beyondTol, size: size)) } // MARK: - Helpers