diff --git a/Package.swift b/Package.swift
index 94be1bc..8457d3c 100644
--- a/Package.swift
+++ b/Package.swift
@@ -66,6 +66,10 @@ let package = Package(
             dependencies: ["FetchCore"],
             path: "Tests/SwiftlyFetchTestFixtures"
         ),
+        .executableTarget(
+            name: "SwiftlyFetchCorpusAudit",
+            dependencies: ["FetchCore", "FetchKit"]
+        ),
         .testTarget(
             name: "RAGCoreTests",
             dependencies: ["RAGCore"]
diff --git a/ROADMAP.md b/ROADMAP.md
index 917a1af..bc05588 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -191,6 +191,7 @@ In Progress
 - [x] Refine the default in-memory all-term ranker so tighter evidence beats scattered term mentions instead of falling through to document ID tie-breaking.
 - [x] Add a second checked-in text source for corpus-based tests so fixture coverage is not only Gutenberg-derived.
 - [x] Add a Hugging Face-derived audit micro-corpus that combines short stories, markdown reference records, and line-oriented literary text across the default in-memory and macOS SearchKit-backed paths.
+- [x] Add an opt-in Hugging Face corpus audit lane that downloads bounded Dataset Viewer slices, indexes a larger temporary corpus locally, and reports ranking/snippet checks without making default CI network-dependent.
 - [ ] Audit larger app-like corpus result quality now that field-aware ranking, compact all-term evidence, phrase weighting, truncation cues, multi-term snippets, and field-evidence metadata are in place.
 - [ ] Keep the persistent `FetchKitLibrary` construction and search API surface under review as real callers exercise the current design.
 - [ ] Explore an opt-in extended snippet surface that can use idle time to precompute short document summaries for larger records, with Apple's [`FoundationModels`](https://developer.apple.com/documentation/foundationmodels) or another local summarization path as the first candidate instead of making foreground full-text search wait on summarization.
diff --git a/Sources/SwiftlyFetchCorpusAudit/main.swift b/Sources/SwiftlyFetchCorpusAudit/main.swift
new file mode 100644
index 0000000..4f11102
--- /dev/null
+++ b/Sources/SwiftlyFetchCorpusAudit/main.swift
@@ -0,0 +1,434 @@
+import FetchCore
+import FetchKit
+import Foundation
+
+@main
+struct SwiftlyFetchCorpusAudit {
+    static func main() async {
+        do {
+            let configuration = AuditConfiguration(environment: ProcessInfo.processInfo.environment)
+            let auditor = CorpusAuditor(configuration: configuration)
+            try await auditor.run()
+        } catch {
+            fputs("ERROR: \(error.localizedDescription)\n", stderr)
+            exit(1)
+        }
+    }
+}
+
+private struct AuditConfiguration {
+    var tinyStoriesLength: Int
+    var simpleWikipediaLength: Int
+    var gutenbergPoetryLength: Int
+    var requestTimeout: TimeInterval
+    var token: String?
+
+    init(environment: [String: String]) {
+        tinyStoriesLength = Self.boundedLength(
+            environment["HF_CORPUS_AUDIT_TINYSTORIES_LENGTH"],
+            defaultValue: 60
+        )
+        simpleWikipediaLength = Self.boundedLength(
+            environment["HF_CORPUS_AUDIT_SIMPLEWIKI_LENGTH"],
+            defaultValue: 30
+        )
+        gutenbergPoetryLength = Self.boundedLength(
+            environment["HF_CORPUS_AUDIT_POETRY_LENGTH"],
+            defaultValue: 80
+        )
+        requestTimeout = TimeInterval(
+            Self.positiveInteger(environment["HF_CORPUS_AUDIT_TIMEOUT_SECONDS"], defaultValue: 30)
+        )
+        token = environment["HF_TOKEN"].flatMap { $0.isEmpty ? nil : $0 }
+    }
+
+    private static func boundedLength(_ value: String?, defaultValue: Int) -> Int {
+        min(100, positiveInteger(value, defaultValue: defaultValue))
+    }
+
+    private static func positiveInteger(_ value: String?, defaultValue: Int) -> Int {
+        guard let value, let intValue = Int(value), intValue > 0 else {
+            return defaultValue
+        }
+
+        return intValue
+    }
+}
+
+private struct CorpusAuditor {
+    var configuration: AuditConfiguration
+
+    func run() async throws {
+        print("Running Hugging Face corpus audit with bounded Dataset Viewer slices.")
+        let client = DatasetViewerClient(
+            token: configuration.token,
+            requestTimeout: configuration.requestTimeout
+        )
+
+        async let tinyStoriesRows = client.rows(
+            dataset: "roneneldan/TinyStories",
+            config: "default",
+            split: "train",
+            offset: 0,
+            length: configuration.tinyStoriesLength
+        )
+        async let simpleWikipediaRows = client.rows(
+            dataset: "juno-labs/simple_wikipedia",
+            config: "default",
+            split: "train",
+            offset: 0,
+            length: configuration.simpleWikipediaLength
+        )
+        async let poetryRows = client.rows(
+            dataset: "biglam/gutenberg-poetry-corpus",
+            config: "default",
+            split: "train",
+            offset: 0,
+            length: configuration.gutenbergPoetryLength
+        )
+
+        let records = try await CorpusMapper.records(
+            tinyStoriesRows: tinyStoriesRows,
+            simpleWikipediaRows: simpleWikipediaRows,
+            poetryRows: poetryRows
+        )
+
+        guard !records.isEmpty else {
+            throw AuditError.noRecords
+        }
+
+        let library = FetchKitLibrary()
+        try await library.addDocuments(records)
+
+        let checks = AuditCheck.defaults
+        var failures: [String] = []
+
+        print("Indexed \(records.count) documents from \(uniqueDatasetCount(in: records)) datasets.")
+
+        for check in checks {
+            do {
+                try await run(check, library: library)
+            } catch {
+                failures.append(error.localizedDescription)
+            }
+        }
+
+        if !failures.isEmpty {
+            throw AuditError.failedChecks(failures)
+        }
+
+        print("Hugging Face corpus audit passed \(checks.count) checks.")
+    }
+
+    private func run(_ check: AuditCheck, library: FetchKitLibrary) async throws {
+        let results = try await library.search(check.query)
+
+        guard let firstResult = results.first else {
+            throw AuditError.checkFailed(
+                "\(check.name) returned no results for query '\(check.query.text)'."
+            )
+        }
+
+        let dataset = firstResult.document.id.rawValue.components(separatedBy: "-").prefix(2).joined(separator: "-")
+        guard firstResult.document.id.rawValue.hasPrefix(check.expectedIDPrefix) else {
+            throw AuditError.checkFailed(
+                "\(check.name) expected top document prefix '\(check.expectedIDPrefix)' but got '\(firstResult.document.id.rawValue)'."
+            )
+        }
+
+        guard let snippet = firstResult.snippet, !snippet.text.isEmpty else {
+            throw AuditError.checkFailed(
+                "\(check.name) top result '\(firstResult.document.id.rawValue)' did not include a snippet."
+            )
+        }
+
+        for expectedText in check.expectedSnippetTerms {
+            guard snippet.text.localizedCaseInsensitiveContains(expectedText) else {
+                throw AuditError.checkFailed(
+                    "\(check.name) top result '\(firstResult.document.id.rawValue)' snippet did not include '\(expectedText)'."
+                )
+            }
+        }
+
+        let snippetPreview = snippet.text.replacingOccurrences(of: "\n", with: " ")
+        print(
+            "[pass] \(check.name): \(dataset) \(firstResult.document.id.rawValue) score=\(String(format: "%.3f", firstResult.score)) field=\(firstResult.snippetField?.rawValue ?? "none") snippet=\"\(snippetPreview.prefix(120))\""
+        )
+    }
+
+    private func uniqueDatasetCount(in records: [FetchDocumentRecord]) -> Int {
+        Set(records.compactMap { $0.metadata["hf.dataset"] }).count
+    }
+}
+
+private struct AuditCheck {
+    var name: String
+    var query: FetchSearchQuery
+    var expectedIDPrefix: String
+    var expectedSnippetTerms: [String]
+
+    static let defaults: [AuditCheck] = [
+        AuditCheck(
+            name: "TinyStories sewing retrieval",
+            query: FetchSearchQuery("needle sew shirt", kind: .allTerms, fields: [.title, .body], limit: 5),
+            expectedIDPrefix: "hf-tinystories-0",
+            expectedSnippetTerms: ["needle"]
+        ),
+        AuditCheck(
+            name: "TinyStories toy retrieval",
+            query: FetchSearchQuery("triangle puddle toy", kind: .allTerms, fields: [.title, .body], limit: 5),
+            expectedIDPrefix: "hf-tinystories-6",
+            expectedSnippetTerms: ["triangle"]
+        ),
+        AuditCheck(
+            name: "Simple Wikipedia calendar retrieval",
+            query: FetchSearchQuery("april leap year flowers", kind: .allTerms, fields: [.title, .body], limit: 5),
+            expectedIDPrefix: "hf-simplewiki-0",
+            expectedSnippetTerms: ["April"]
+        ),
+        AuditCheck(
+            name: "Simple Wikipedia rhetoric retrieval",
+            query: FetchSearchQuery("against person weak argument", kind: .allTerms, fields: [.title, .body], limit: 5),
+            expectedIDPrefix: "hf-simplewiki-18",
+            expectedSnippetTerms: []
+        ),
+        AuditCheck(
+            name: "Gutenberg poetry northland retrieval",
+            query: FetchSearchQuery("great lakes northland ojibways", kind: .allTerms, fields: [.title, .body], limit: 5),
+            expectedIDPrefix: "hf-poetry-19-lines",
+            expectedSnippetTerms: ["Northland"]
+        ),
+    ]
+}
+
+private struct DatasetViewerClient {
+    var token: String?
+    var requestTimeout: TimeInterval
+
+    func rows(
+        dataset: String,
+        config: String,
+        split: String,
+        offset: Int,
+        length: Int
+    ) async throws -> [DatasetRow] {
+        var components = URLComponents(string: "https://datasets-server.huggingface.co/rows")
+        components?.queryItems = [
+            URLQueryItem(name: "dataset", value: dataset),
+            URLQueryItem(name: "config", value: config),
+            URLQueryItem(name: "split", value: split),
+            URLQueryItem(name: "offset", value: String(offset)),
+            URLQueryItem(name: "length", value: String(length)),
+        ]
+
+        guard let url = components?.url else {
+            throw AuditError.invalidURL("Could not build a Dataset Viewer URL for \(dataset).")
+        }
+
+        var request = URLRequest(url: url, timeoutInterval: requestTimeout)
+        request.httpMethod = "GET"
+
+        if let token {
+            request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
+        }
+
+        let (data, response) = try await URLSession.shared.data(for: request)
+
+        guard let httpResponse = response as? HTTPURLResponse else {
+            throw AuditError.network("Dataset Viewer did not return an HTTP response for \(dataset).")
+        }
+
+        guard 200..<300 ~= httpResponse.statusCode else {
+            let body = String(data: data, encoding: .utf8) ?? ""
+            throw AuditError.network(
+                "Dataset Viewer returned HTTP \(httpResponse.statusCode) for \(dataset). \(body.prefix(240))"
+            )
+        }
+
+        return try JSONDecoder().decode(DatasetRowsResponse.self, from: data).rows
+    }
+}
+
+private struct DatasetRowsResponse: Decodable {
+    var rows: [DatasetRow]
+}
+
+private struct DatasetRow: Decodable {
+    var rowIdx: Int
+    var row: [String: JSONValue]
+
+    enum CodingKeys: String, CodingKey {
+        case rowIdx = "row_idx"
+        case row
+    }
+}
+
+private enum JSONValue: Decodable {
+    case string(String)
+    case int(Int)
+    case double(Double)
+    case bool(Bool)
+    case null
+
+    init(from decoder: Decoder) throws {
+        let container = try decoder.singleValueContainer()
+
+        if container.decodeNil() {
+            self = .null
+        } else if let value = try? container.decode(String.self) {
+            self = .string(value)
+        } else if let value = try? container.decode(Int.self) {
+            self = .int(value)
+        } else if let value = try? container.decode(Double.self) {
+            self = .double(value)
+        } else if let value = try? container.decode(Bool.self) {
+            self = .bool(value)
+        } else {
+            self = .null
+        }
+    }
+
+    var stringValue: String? {
+        switch self {
+            case let .string(value):
+                value
+            case let .int(value):
+                String(value)
+            case let .double(value):
+                String(value)
+            case let .bool(value):
+                String(value)
+            case .null:
+                nil
+        }
+    }
+}
+
+private enum CorpusMapper {
+    static func records(
+        tinyStoriesRows: [DatasetRow],
+        simpleWikipediaRows: [DatasetRow],
+        poetryRows: [DatasetRow]
+    ) -> [FetchDocumentRecord] {
+        tinyStoriesRecords(from: tinyStoriesRows)
+            + simpleWikipediaRecords(from: simpleWikipediaRows)
+            + poetryRecords(from: poetryRows)
+    }
+
+    private static func tinyStoriesRecords(from rows: [DatasetRow]) -> [FetchDocumentRecord] {
+        rows.compactMap { row in
+            guard let text = row.row["text"]?.stringValue, !text.isEmpty else {
+                return nil
+            }
+
+            return FetchDocumentRecord(
+                id: FetchDocumentID("hf-tinystories-\(row.rowIdx)"),
+                title: title(from: text, fallback: "TinyStories row \(row.rowIdx)"),
+                body: text,
+                kind: .article,
+                language: "en",
+                sourceURI: "https://huggingface.co/datasets/roneneldan/TinyStories",
+                metadata: metadata(dataset: "roneneldan/TinyStories", row: row.rowIdx)
+            )
+        }
+    }
+
+    private static func simpleWikipediaRecords(from rows: [DatasetRow]) -> [FetchDocumentRecord] {
+        rows.compactMap { row in
+            guard
+                let title = row.row["title"]?.stringValue,
+                let content = row.row["content"]?.stringValue,
+                !content.isEmpty
+            else {
+                return nil
+            }
+
+            return FetchDocumentRecord(
+                id: FetchDocumentID("hf-simplewiki-\(row.rowIdx)"),
+                title: title,
+                body: limited(content, maxCharacters: 12_000),
+                contentType: .markdown,
+                kind: .reference,
+                language: "en",
+                sourceURI: "https://huggingface.co/datasets/juno-labs/simple_wikipedia",
+                metadata: metadata(dataset: "juno-labs/simple_wikipedia", row: row.rowIdx)
+            )
+        }
+    }
+
+    private static func poetryRecords(from rows: [DatasetRow]) -> [FetchDocumentRecord] {
+        let groupedRows = Dictionary(grouping: rows) { row in
+            row.row["gutenberg_id"]?.stringValue ?? "unknown"
+        }
+
+        return groupedRows.keys.sorted().flatMap { gutenbergID in
+            let rows = (groupedRows[gutenbergID] ?? []).sorted { $0.rowIdx < $1.rowIdx }
+            return rows.chunked(into: 12).compactMap { chunk -> FetchDocumentRecord? in
+                let lines = chunk.compactMap { $0.row["line"]?.stringValue }.filter { !$0.isEmpty }
+                guard !lines.isEmpty, let firstRow = chunk.first?.rowIdx, let lastRow = chunk.last?.rowIdx else {
+                    return nil
+                }
+
+                return FetchDocumentRecord(
+                    id: FetchDocumentID("hf-poetry-\(gutenbergID)-lines-\(firstRow)-\(lastRow)"),
+                    title: "Gutenberg Poetry \(gutenbergID): lines \(firstRow)-\(lastRow)",
+                    body: lines.joined(separator: "\n"),
+                    kind: .article,
+                    language: "en",
+                    sourceURI: "https://huggingface.co/datasets/biglam/gutenberg-poetry-corpus",
+                    metadata: metadata(dataset: "biglam/gutenberg-poetry-corpus", row: "\(firstRow)-\(lastRow)")
+                )
+            }
+        }
+    }
+
+    private static func title(from text: String, fallback: String) -> String {
+        let firstLine = text
+            .split(separator: "\n", omittingEmptySubsequences: true)
+            .first
+            .map(String.init) ?? fallback
+        return String(firstLine.prefix(72))
+    }
+
+    private static func limited(_ text: String, maxCharacters: Int) -> String {
+        guard text.count > maxCharacters else {
+            return text
+        }
+
+        return String(text.prefix(maxCharacters))
+    }
+
+    private static func metadata(dataset: String, row: some CustomStringConvertible) -> [String: String] {
+        [
+            "hf.dataset": dataset,
+            "hf.row": row.description,
+        ]
+    }
+}
+
+private enum AuditError: LocalizedError {
+    case invalidURL(String)
+    case network(String)
+    case noRecords
+    case checkFailed(String)
+    case failedChecks([String])
+
+    var errorDescription: String? {
+        switch self {
+            case let .invalidURL(message), let .network(message), let .checkFailed(message):
+                message
+            case .noRecords:
+                "The Hugging Face corpus audit could not build any FetchKit records from the downloaded dataset slices."
+            case let .failedChecks(messages):
+                "The Hugging Face corpus audit failed \(messages.count) check(s):\n- \(messages.joined(separator: "\n- "))"
+        }
+    }
+}
+
+private extension Array {
+    func chunked(into size: Int) -> [[Element]] {
+        stride(from: 0, to: count, by: size).map { start in
+            Array(self[start ..< Swift.min(start + size, count)])
+        }
+    }
+}
diff --git a/docs/maintainers/fixture-corpus.md b/docs/maintainers/fixture-corpus.md
index 1d76780..8320be8 100644
--- a/docs/maintainers/fixture-corpus.md
+++ b/docs/maintainers/fixture-corpus.md
@@ -50,6 +50,25 @@ The fourth fixture pass added a small Hugging Face-derived audit corpus that com
 
 This is still a checked-in micro-corpus rather than a live dataset lane. Its job is to make the current in-memory and SearchKit-backed result-quality tests cover more document shapes before deciding whether a larger local benchmark or opt-in dataset download is worth maintaining.
 
+The first live larger-corpus lane now exists as an opt-in maintainer command:
+
+```bash
+scripts/repo-maintenance/run-huggingface-corpus-audit.sh
+```
+
+That lane runs the `SwiftlyFetchCorpusAudit` executable target. It downloads bounded Dataset Viewer `/rows` slices from the same three Hugging Face families, maps them into temporary `FetchDocumentRecord` values in memory, indexes them through `FetchKitLibrary`, and reports pass/fail checks plus top-hit snippets. It intentionally stays out of default `swift test`, default GitHub CI, and checked-in fixture data because it depends on live network access and Hub dataset availability.
+
+The default slice sizes can be tuned without editing source:
+
+```bash
+HF_CORPUS_AUDIT_TINYSTORIES_LENGTH=80 \
+HF_CORPUS_AUDIT_SIMPLEWIKI_LENGTH=40 \
+HF_CORPUS_AUDIT_POETRY_LENGTH=100 \
+scripts/repo-maintenance/run-huggingface-corpus-audit.sh
+```
+
+The Dataset Viewer `/rows` endpoint caps `length` at 100, so the audit tool also caps each configured slice length at 100. If a private or rate-limited dataset is added later, the lane will use `HF_TOKEN` when present.
+
 ## Hugging Face Dependency Boundary
 
 Do not add a Hugging Face Swift dependency for the default fixture lane yet. The current checked-in fixture keeps CI deterministic and avoids adding a network, token, cache, or package-resolution requirement to ordinary tests.
@@ -72,6 +91,14 @@ curl -s 'https://datasets-server.huggingface.co/first-rows?dataset=juno-labs/sim
 curl -s 'https://datasets-server.huggingface.co/first-rows?dataset=biglam/gutenberg-poetry-corpus&config=default&split=train'
 ```
 
+The opt-in audit lane uses the same endpoint family with bounded `/rows` slices:
+
+```bash
+curl -s 'https://datasets-server.huggingface.co/rows?dataset=roneneldan/TinyStories&config=default&split=train&offset=0&length=60'
+curl -s 'https://datasets-server.huggingface.co/rows?dataset=juno-labs/simple_wikipedia&config=default&split=train&offset=0&length=30'
+curl -s 'https://datasets-server.huggingface.co/rows?dataset=biglam/gutenberg-poetry-corpus&config=default&split=train&offset=0&length=80'
+```
+
 Hugging Face documents dataset parquet discovery through the Dataset Viewer service in the [`huggingface_hub` CLI guide](https://huggingface.co/docs/huggingface_hub/guides/cli) and the Dataset Viewer [Parquet conversion guide](https://huggingface.co/docs/dataset-viewer/parquet).
 
 ## Next Use
diff --git a/scripts/repo-maintenance/run-huggingface-corpus-audit.sh b/scripts/repo-maintenance/run-huggingface-corpus-audit.sh
new file mode 100755
index 0000000..7d9396f
--- /dev/null
+++ b/scripts/repo-maintenance/run-huggingface-corpus-audit.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env sh
+set -eu
+
+SELF_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
+export REPO_MAINTENANCE_COMMON_DIR="$SELF_DIR/lib"
+. "$SELF_DIR/lib/common.sh"
+
+load_profile_env
+ensure_git_repo
+
+log "Running the opt-in Hugging Face corpus audit lane from $REPO_ROOT."
+log "Set HF_CORPUS_AUDIT_* environment variables to tune bounded Dataset Viewer slice sizes."
+
+cd "$REPO_ROOT"
+swift run SwiftlyFetchCorpusAudit