Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions Sources/VistaCore/Indexer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,7 @@ public actor Indexer {
let attrs = try? fm.attributesOfItem(atPath: url.path)
let mtime = (attrs?[.modificationDate] as? Date) ?? Date()
let size = (attrs?[.size] as? NSNumber)?.int64Value ?? 0
if let existing = try? store.fingerprint(for: url),
existing.mtime == mtime, existing.size == size {
if (try? store.isAlreadyIndexed(at: url, mtime: mtime, size: size)) == true {
continue
}
toIndex.append(url)
Expand Down Expand Up @@ -313,8 +312,7 @@ public actor Indexer {

// Skip unchanged files — the fingerprint check saves us an expensive
// OCR pass for every file on every relaunch.
if let existing = try store.fingerprint(for: url),
existing.mtime == mtime, existing.size == size {
if try store.isAlreadyIndexed(at: url, mtime: mtime, size: size) {
return
}

Expand Down
24 changes: 24 additions & 0 deletions Sources/VistaCore/ScreenshotStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,30 @@ public final class ScreenshotStore: @unchecked Sendable {

// MARK: - Lookups used during incremental scan

/// Tolerance applied when comparing a file's current mtime against
/// the value we stored the last time we indexed it. Public so tests
/// can pin behaviour to the same constant instead of re-declaring
/// the magic number.
public static let mtimeTolerance: TimeInterval = 0.001

/// Whether the DB already holds an entry for `url` matching the given
/// `(mtime, size)`. Mtime is compared with `mtimeTolerance` (1 ms) to
/// survive the `Double` round-trip through SQLite's REAL column: at
/// year-2026 timestamps (~1.78e9 s) one Double ULP is ~400 ns, which
/// is coarser than APFS's ns-resolution mtime, so a Double can't
/// represent every distinct APFS timestamp and exact equality
/// sporadically fails. Before the tolerance, the indexer re-OCR'd a
/// random ~37 % subset of rows on every relaunch. `size` is still
/// compared exactly — a real edit almost always changes size, so the
/// slack on mtime doesn't mask genuine changes.
public func isAlreadyIndexed(at url: URL, mtime: Date, size: Int64) throws -> Bool {
guard let existing = try fingerprint(for: url) else { return false }
guard existing.size == size else { return false }
let drift = abs(existing.mtime.timeIntervalSinceReferenceDate
- mtime.timeIntervalSinceReferenceDate)
return drift <= Self.mtimeTolerance
}

/// Returns the (mtime, size) we last indexed for this path, or nil if
/// unknown. Indexer uses this to skip unchanged files without reading
/// pixels.
Expand Down
29 changes: 29 additions & 0 deletions Tests/VistaCoreTests/ScreenshotStoreTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,35 @@ final class ScreenshotStoreTests: XCTestCase {
XCTAssertNil(try store.fingerprint(for: Self.sampleURL(name: "never-seen.png")))
}

// Regression: exact `Date == Date` after a round-trip through the REAL
// `mtime` column used to fail sporadically because a Double ULP at
// 2026 timestamps (~400 ns) is coarser than APFS's 1 ns mtime
// resolution — so a `Double` can't represent every distinct APFS
// timestamp. `isAlreadyIndexed` applies `ScreenshotStore.mtimeTolerance`
// (1 ms) so high-precision fractional mtimes still match after the
// round-trip.
func testIsAlreadyIndexedSurvivesDoublePrecisionRoundTrip() throws {
let path = Self.sampleURL(name: "Drift.png")
// Fractional seconds chosen so `.timeIntervalSince1970` can't be
// represented exactly as a Double — that's the shape filesystem
// mtimes actually have on APFS.
let mtime = Date(timeIntervalSince1970: 1_776_864_366.2835145)
let size: Int64 = 42_630
try store.upsert(Self.sampleRecord(path: path, mtime: mtime, size: size, text: ""))

XCTAssertTrue(try store.isAlreadyIndexed(at: path, mtime: mtime, size: size))
XCTAssertFalse(try store.isAlreadyIndexed(at: path, mtime: mtime, size: size + 1))
XCTAssertFalse(try store.isAlreadyIndexed(at: Self.sampleURL(name: "nope.png"),
mtime: mtime, size: size))

// Drift outside the tolerance window is treated as "changed". The
// epsilon (10 × tolerance) is deliberately well beyond the ~400 ns
// Double-round-trip slop so the assertion is deterministic across
// fractional mtime values.
let beyondTol = mtime.addingTimeInterval(ScreenshotStore.mtimeTolerance * 10)
XCTAssertFalse(try store.isAlreadyIndexed(at: path, mtime: beyondTol, size: size))
}

// MARK: - Helpers

private static func sampleURL(name: String) -> URL {
Expand Down
Loading