From 093f3448a94c60cf74d4fd255367fa904aaabacd Mon Sep 17 00:00:00 2001 From: Jin Choi Date: Mon, 25 May 2026 14:55:47 -0700 Subject: [PATCH 1/3] feat(gate): add read-only insight quality audit (Phase 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds scripts/insight-gate.ts + scripts/lib/insight-gate.ts: a deterministic, recomputable semantic-quality audit over the insight corpus. Sibling to health.ts (structural validation) and verify-contracts.ts (doc/code surface) — this layer asks the quality questions those don't: is the stance directional? is it attributed to a real source? does its topic match its path? is it a near-duplicate? Checks (all deterministic, no LLM in the gate): - stance present / stance directional (heuristic floor) - attribution resolves to a known source (by normalized title or url) - topic matches file path - nearest-INSIGHT-neighbor cosine novelty: exact cosine on the stored float32 vectors (the vec0 table returns L2 distance, not cosine), INS-only via k=64 overfetch, with a block-threshold simulation and deduped triage pairs Phase 1 is audit-only: it reports, never blocks (exit 1 only on operational failure). brain.db is opened read-only. Outputs land in the gitignored knowledge-base/meta/. Forward enforcement (Phase 2) will reuse these checks. Tests: 47 (unit + in-memory sqlite-vec integration covering blob round-trip, PRI-/MM- filtering, and exact cosine). Run via: npm run gate -- --audit --all Co-Authored-By: Claude Opus 4.7 (1M context) --- package.json | 1 + scripts/insight-gate.ts | 242 ++++++++++++++ scripts/lib/insight-gate.test.ts | 337 +++++++++++++++++++ scripts/lib/insight-gate.ts | 558 +++++++++++++++++++++++++++++++ 4 files changed, 1138 insertions(+) create mode 100644 scripts/insight-gate.ts create mode 100644 scripts/lib/insight-gate.test.ts create mode 100644 scripts/lib/insight-gate.ts diff --git a/package.json b/package.json index 8821e4dae2..78e48f0d9c 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "create-principles": "tsx scripts/create-principles.ts", "sleep": "tsx scripts/sleep.ts", "quality-score": "tsx scripts/quality-score.ts", + "gate": "tsx scripts/insight-gate.ts", "ask": "tsx scripts/ask.ts", "autoknowledge": "tsx scripts/autoknowledge.ts", "wake": "tsx scripts/wake.ts", diff --git a/scripts/insight-gate.ts b/scripts/insight-gate.ts new file mode 100644 index 0000000000..f588ba334f --- /dev/null +++ b/scripts/insight-gate.ts @@ -0,0 +1,242 @@ +#!/usr/bin/env npx tsx +/** + * insight-gate.ts — Semantic quality gate for insights (CLI). + * + * PHASE 1 (this file): AUDIT ONLY. Read-only X-ray of insight quality across + * the corpus. Never exits 1 — it reports so the pass-bar can be calibrated + * against the real distribution before forward enforcement is switched on. + * + * Usage: + * npx tsx scripts/insight-gate.ts --audit --all # full corpus X-ray + * npx tsx scripts/insight-gate.ts --audit --since 2026-05-01 + * npx tsx scripts/insight-gate.ts --audit --all --json # machine-readable + * npx tsx scripts/insight-gate.ts --audit --examples 30 # more failing samples + * + * Outputs (in addition to stdout): + * knowledge-base/meta/gate-report.json latest full report (overwritten) + * knowledge-base/meta/gate-log.jsonl one summary line per run (appended) + * + * Phase 2 (not yet wired) will add forward enforcement: scope to a batch and + * exit 1 on failures, reusing the exact check functions in lib/insight-gate.ts. + */ + +import { appendFileSync, mkdirSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import Database from "better-sqlite3"; +import * as sqliteVec from "sqlite-vec"; + +import { + auditInsights, + buildNoveltyComputer, + buildSourceIndex, + loadGateInsights, + type AuditReport, + type CheckId, + type NearestFn, +} from "./lib/insight-gate"; +import { KB_ROOT } from "./lib/kb-root"; + +const REPORT_PATH = join(KB_ROOT, "meta", "gate-report.json"); +const LOG_PATH = join(KB_ROOT, "meta", "gate-log.jsonl"); +const DB_PATH = join(KB_ROOT, "db", "brain.db"); + +const CHECK_LABELS: Record = { + stance_present: "stance present", + stance_directional: "stance directional", + attribution_resolves: "attribution resolves", + topic_matches_path: "topic matches path", +}; + +// ─── Args ───────────────────────────────────────────────────────────── + +interface Args { + since: string | null; + json: boolean; + exampleLimit: number; +} + +function parseArgs(argv: string[]): Args { + const sinceIdx = argv.indexOf("--since"); + const examplesIdx = argv.indexOf("--examples"); + return { + since: sinceIdx !== -1 ? argv[sinceIdx + 1] ?? null : null, + json: argv.includes("--json"), + exampleLimit: examplesIdx !== -1 ? parseInt(argv[examplesIdx + 1], 10) || 15 : 15, + }; +} + +// ─── Novelty wiring (graceful: structural audit survives a vec failure) ── + +function tryBuildNovelty(): { nearest: NearestFn | undefined; close: () => void } { + try { + // Read-only: an audit must never migrate schema or create tables on brain.db. + // We load sqlite-vec (no DDL) and query the existing embeddings table directly, + // deliberately bypassing initDb()/initVectorTable() which run CREATE/ALTER. + const db = new Database(DB_PATH, { readonly: true, fileMustExist: true }); + sqliteVec.load(db); + return { nearest: buildNoveltyComputer(db), close: () => db.close() }; + } catch (err) { + console.warn( + `WARN: novelty unavailable (${(err as Error).message}). ` + + `Reporting structural checks only.` + ); + return { nearest: undefined, close: () => {} }; + } +} + +// ─── Main ───────────────────────────────────────────────────────────── + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + + let insights = await loadGateInsights(); + const scope = args.since ? `since ${args.since}` : "all"; + if (args.since) { + insights = insights.filter((i) => i.dateExtracted >= args.since!); + } + + const sourceIndex = await buildSourceIndex(); + const { nearest, close } = tryBuildNovelty(); + + let report: AuditReport; + try { + report = auditInsights(insights, sourceIndex, nearest, { + scope, + exampleLimit: args.exampleLimit, + }).report; + } finally { + close(); + } + + persist(report); + + if (args.json) { + console.log(JSON.stringify(report, null, 2)); + } else { + printReport(report); + } +} + +// ─── Persistence ────────────────────────────────────────────────────── + +function persist(report: AuditReport): void { + mkdirSync(dirname(REPORT_PATH), { recursive: true }); + writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2) + "\n", "utf-8"); + + const logLine = { + generated_at: report.generated_at, + scope: report.scope, + total: report.total, + coverage: report.coverage, + novelty_available: report.novelty.available, + block_simulation: report.novelty.block_simulation, + singleton_topics: report.fragmentation.singletons, + }; + appendFileSync(LOG_PATH, JSON.stringify(logLine) + "\n", "utf-8"); +} + +// ─── Pretty printer ─────────────────────────────────────────────────── + +function printReport(report: AuditReport): void { + const total = report.total; + + console.log("╔════════════════════════════════════════════════════════╗"); + console.log("║ Zuhn — Insight Gate (AUDIT · read-only) ║"); + console.log("╚════════════════════════════════════════════════════════╝"); + console.log(`Scope: ${report.scope} Insights: ${total.toLocaleString()}`); + + if (total === 0) { + console.log("\nNo insights in scope."); + return; + } + + console.log("\nQuality coverage (insights passing each check):"); + for (const checkId of Object.keys(report.coverage) as CheckId[]) { + const n = report.coverage[checkId]; + console.log( + ` ${CHECK_LABELS[checkId].padEnd(22)} ${fmt(n).padStart(7)} / ${fmt(total)} ${pct(n, total)}` + ); + } + + const { available, embedded, unembedded, no_neighbor, histogram, block_simulation, mean_similarity, top_pairs } = + report.novelty; + if (!available) { + console.log("\nNovelty — not computed (embeddings DB unavailable; structural checks only)."); + } else { + const withNeighbor = embedded - no_neighbor; + const noNeighborNote = no_neighbor > 0 ? `, ${fmt(no_neighbor)} no-neighbor` : ""; + console.log( + `\nNovelty — nearest INSIGHT neighbor, exact cosine (${fmt(embedded)} embedded, ${fmt(unembedded)} unembedded${noNeighborNote}):` + ); + if (withNeighbor === 0) { + console.log(" (no insight-to-insight neighbors available)"); + } else { + for (const [label, count] of Object.entries(histogram)) { + console.log(` ${label.padEnd(10)} ${fmt(count).padStart(7)} ${bar(count, withNeighbor)}`); + } + console.log(` mean similarity: ${mean_similarity === null ? "n/a" : mean_similarity.toFixed(3)}`); + + console.log("\nBlock simulation (insights that WOULD be rejected as near-dupes):"); + for (const [threshold, count] of Object.entries(block_simulation)) { + console.log(` >= ${threshold} ${fmt(count).padStart(7)} (${pct(count, withNeighbor)} of measured)`); + } + + if (top_pairs.length > 0) { + console.log("\nClosest insight pairs (dedup triage — highest similarity first):"); + for (const p of top_pairs.slice(0, 10)) { + console.log(` ${p.similarity.toFixed(3)} ${p.id} ~ ${p.nearestId}`); + } + } + } + } + + const frag = report.fragmentation; + console.log( + `\nTopic fragmentation: ${fmt(frag.singletons)} singleton topics / ${fmt(frag.total_topics)} total` + ); + if (frag.singletons > 0) { + const preview = frag.singleton_topics.slice(0, 12).join(", "); + const more = frag.singletons > 12 ? `, … (+${frag.singletons - 12} more)` : ""; + console.log(` ${preview}${more}`); + } + + console.log("\nSample failures:"); + let anyFailures = false; + for (const checkId of Object.keys(report.failing_examples) as CheckId[]) { + const examples = report.failing_examples[checkId]; + if (examples.length === 0) continue; + anyFailures = true; + console.log(` ${CHECK_LABELS[checkId]} (${fmt(total - report.coverage[checkId])} failing):`); + for (const ex of examples) { + const detail = ex.detail ? ` — ${ex.detail}` : ""; + console.log(` ${ex.id} ${ex.relPath}${detail}`); + } + } + if (!anyFailures) console.log(" none — all checks pass."); + + console.log(`\nFull report: ${REPORT_PATH}`); +} + +// ─── Format helpers ─────────────────────────────────────────────────── + +function fmt(n: number): string { + return n.toLocaleString(); +} + +function pct(n: number, total: number): string { + return total === 0 ? "0.0%" : `${((100 * n) / total).toFixed(1)}%`; +} + +function bar(count: number, total: number): string { + if (total === 0) return ""; + const width = Math.round((count / total) * 30); + return "█".repeat(width); +} + +main().catch((err) => { + // Contract: AUDIT never exits non-zero on QUALITY findings (it only reports). + // A non-zero exit here means an OPERATIONAL failure — corpus unreadable, report + // unwritable, etc. — which should fail loud rather than pretend success. + console.error("insight-gate.ts failed:", err); + process.exit(1); +}); diff --git a/scripts/lib/insight-gate.test.ts b/scripts/lib/insight-gate.test.ts new file mode 100644 index 0000000000..695b94aed0 --- /dev/null +++ b/scripts/lib/insight-gate.test.ts @@ -0,0 +1,337 @@ +import { describe, expect, it } from "vitest"; +import Database from "better-sqlite3"; + +import { + auditInsights, + buildNoveltyComputer, + checkAttributionResolves, + checkStanceDirectional, + checkStancePresent, + checkTopicMatchesPath, + cosineSimilarity, + isDirectionalStance, + normalizeTitle, + normalizeUrl, + type GateInsight, + type NearestFn, + type SourceIndex, +} from "./insight-gate"; +import { initVectorTable, upsertEmbedding } from "./vector-search"; + +// ─── Fixtures ───────────────────────────────────────────────────────── + +function mk(over: Partial = {}): GateInsight { + return { + id: "INS-260525-AAAA", + domain: "ai-development", + topic: "agents", + title: "A title", + stance: "Agents fail because harness context is incomplete", + sources: [{ title: "Known Source", url: "https://example.com/a" }], + dateExtracted: "2026-05-25", + filePath: "/abs/domains/ai-development/agents/x.md", + relPath: "domains/ai-development/agents/x.md", + ...over, + }; +} + +const INDEX: SourceIndex = { + titles: new Set(["known source"]), + urls: new Set(["example.com/a"]), +}; + +// ─── isDirectionalStance ────────────────────────────────────────────── + +describe("isDirectionalStance", () => { + const directional = [ + "AI will displace most white-collar workers", + "Context engineering beats model choice for agent reliability", + "Teams should adopt typed schemas because runtime checks rot", + "Verification is the bottleneck, so agents need a quality gate", + "Founders who ship early outperform those who plan", + "Memory capture is commoditizing rather than a durable moat", + // Regression: real corpus stances the heuristic wrongly flagged before the + // vacuous-list narrowing + directional-verb additions (incidental "good"/"bad"). + "Building intuitive feel for what an AI is good at and when it hallucinates is a valid form of knowledge, not a substitute for 'real' understanding.", + "The booster-doomer debate creates a false spectrum where the only question is whether powerful AI is good or bad", + "Distributing systems across multiple geographic regions provides fault tolerance against regional failures but introduces data consistency challenges", + ]; + for (const s of directional) { + it(`passes directional claim: "${s.slice(0, 40)}"`, () => { + expect(isDirectionalStance(s)).toBe(true); + }); + } + + const weak = [ + "This discusses AI's impact on jobs", + "An overview of agent patterns", + "It covers prompt engineering basics", + "AI is important", + "Embeddings are interesting", + "The talk explains how transformers work", + "AI matters", // too short + "", // empty + ]; + for (const s of weak) { + it(`rejects weak/descriptive claim: "${s.slice(0, 40)}"`, () => { + expect(isDirectionalStance(s)).toBe(false); + }); + } + + it("lets a directional marker override a vacuous value word", () => { + expect(isDirectionalStance("Latency is important because it predicts churn")).toBe(true); + }); +}); + +// ─── Stance checks ──────────────────────────────────────────────────── + +describe("checkStancePresent", () => { + it("fails on empty / whitespace-only stance", () => { + expect(checkStancePresent(mk({ stance: "" })).passed).toBe(false); + expect(checkStancePresent(mk({ stance: " " })).passed).toBe(false); + }); + it("passes when a stance exists", () => { + expect(checkStancePresent(mk({ stance: "X beats Y" })).passed).toBe(true); + }); +}); + +describe("checkStanceDirectional", () => { + it("reports 'no stance' when absent (does not crash)", () => { + const r = checkStanceDirectional(mk({ stance: "" })); + expect(r.passed).toBe(false); + expect(r.detail).toBe("no stance"); + }); + it("fails a vacuous stance with a helpful detail", () => { + const r = checkStanceDirectional(mk({ stance: "AI is important" })); + expect(r.passed).toBe(false); + expect(r.detail).toContain("not directional"); + }); + it("passes a directional stance", () => { + expect(checkStanceDirectional(mk({ stance: "X reduces cost because Y" })).passed).toBe(true); + }); +}); + +// ─── Normalizers ────────────────────────────────────────────────────── + +describe("normalizeTitle", () => { + it("trims, lowercases, and collapses whitespace", () => { + expect(normalizeTitle(" Some Source ")).toBe("some source"); + }); +}); + +describe("normalizeUrl", () => { + it("strips protocol, www, and trailing slashes; lowercases", () => { + expect(normalizeUrl("https://www.Example.com/x/")).toBe("example.com/x"); + expect(normalizeUrl("http://example.com/a")).toBe("example.com/a"); + }); +}); + +// ─── Attribution ────────────────────────────────────────────────────── + +describe("checkAttributionResolves", () => { + it("fails when sources[] is empty (orphan)", () => { + const r = checkAttributionResolves(mk({ sources: [] }), INDEX); + expect(r.passed).toBe(false); + expect(r.detail).toBe("no sources[]"); + }); + it("resolves by normalized url", () => { + const ins = mk({ sources: [{ url: "https://www.example.com/a/" }] }); + expect(checkAttributionResolves(ins, INDEX).passed).toBe(true); + }); + it("resolves by normalized title", () => { + const ins = mk({ sources: [{ title: " known source " }] }); + expect(checkAttributionResolves(ins, INDEX).passed).toBe(true); + }); + it("passes if any one of several refs resolves", () => { + const ins = mk({ sources: [{ title: "Unknown" }, { url: "https://example.com/a" }] }); + expect(checkAttributionResolves(ins, INDEX).passed).toBe(true); + }); + it("fails when nothing resolves", () => { + const ins = mk({ sources: [{ title: "Ghost", url: "https://nope.com/z" }] }); + expect(checkAttributionResolves(ins, INDEX).passed).toBe(false); + }); +}); + +// ─── Topic / path ───────────────────────────────────────────────────── + +describe("checkTopicMatchesPath", () => { + it("passes when frontmatter domain/topic match the path", () => { + expect(checkTopicMatchesPath(mk()).passed).toBe(true); + }); + it("fails on a domain/topic mismatch and explains it", () => { + const r = checkTopicMatchesPath(mk({ domain: "startups" })); + expect(r.passed).toBe(false); + expect(r.detail).toContain("!= path"); + }); +}); + +// ─── Cosine similarity ──────────────────────────────────────────────── + +describe("cosineSimilarity", () => { + it("is 1 for identical vectors", () => { + const v = new Float32Array([1, 2, 3]); + expect(cosineSimilarity(v, v)).toBeCloseTo(1, 6); + }); + it("is 0 for orthogonal vectors", () => { + expect(cosineSimilarity(new Float32Array([1, 0]), new Float32Array([0, 1]))).toBeCloseTo(0, 6); + }); + it("is -1 for opposite vectors", () => { + expect(cosineSimilarity(new Float32Array([1, 1]), new Float32Array([-1, -1]))).toBeCloseTo(-1, 6); + }); + it("is invariant to magnitude (direction only)", () => { + expect(cosineSimilarity(new Float32Array([2, 0]), new Float32Array([5, 0]))).toBeCloseTo(1, 6); + }); + it("returns 0 for a zero vector", () => { + expect(cosineSimilarity(new Float32Array([0, 0]), new Float32Array([1, 1]))).toBe(0); + }); +}); + +// ─── Audit aggregation ──────────────────────────────────────────────── + +describe("auditInsights", () => { + const good = mk({ + id: "INS-1", + stance: "X reduces cost because Y", + sources: [{ title: "Known Source" }], + relPath: "domains/ai-development/agents/a.md", + }); + const weak = mk({ + id: "INS-2", + stance: "AI is important", + sources: [{ url: "https://example.com/a" }], + relPath: "domains/ai-development/agents/b.md", + }); + const orphan = mk({ + id: "INS-3", + stance: "", + sources: [], + domain: "startups", + topic: "lonely", + relPath: "domains/startups/lonely/c.md", + }); + + it("aggregates coverage counts per check", () => { + const { report } = auditInsights([good, weak, orphan], INDEX); + expect(report.total).toBe(3); + expect(report.coverage.stance_present).toBe(2); // good + weak + expect(report.coverage.stance_directional).toBe(1); // good only + expect(report.coverage.attribution_resolves).toBe(2); // good (title) + weak (url) + expect(report.coverage.topic_matches_path).toBe(3); // all filed correctly + }); + + it("marks novelty unavailable (not 'all unembedded') when no novelty fn is supplied", () => { + const { report } = auditInsights([good, weak, orphan], INDEX); + expect(report.novelty.available).toBe(false); + expect(report.novelty.embedded).toBe(0); + expect(report.novelty.unembedded).toBe(0); + expect(report.novelty.no_neighbor).toBe(0); + expect(report.novelty.mean_similarity).toBeNull(); + expect(report.novelty.top_pairs).toEqual([]); + }); + + it("separates 'embedded but no neighbor' from 'unembedded' when novelty is available", () => { + const nearest: NearestFn = (id) => + id === "INS-1" + ? { nearestId: null, similarity: null, selfEmbedded: true } // embedded, but no INS neighbor + : { nearestId: null, similarity: null, selfEmbedded: false }; // not embedded at all + const { report } = auditInsights([good, weak], INDEX, nearest); + expect(report.novelty.available).toBe(true); + expect(report.novelty.embedded).toBe(1); + expect(report.novelty.no_neighbor).toBe(1); + expect(report.novelty.unembedded).toBe(1); + }); + + it("detects singleton topics from the file path", () => { + const { report } = auditInsights([good, weak, orphan], INDEX); + expect(report.fragmentation.total_topics).toBe(2); + expect(report.fragmentation.singletons).toBe(1); + expect(report.fragmentation.singleton_topics).toEqual(["startups/lonely"]); + }); + + it("collects failing examples with details", () => { + const { report } = auditInsights([good, weak, orphan], INDEX); + expect(report.failing_examples.stance_present.map((e) => e.id)).toEqual(["INS-3"]); + expect(report.failing_examples.stance_directional.map((e) => e.id)).toEqual(["INS-2", "INS-3"]); + expect(report.failing_examples.attribution_resolves.map((e) => e.id)).toEqual(["INS-3"]); + }); + + it("respects exampleLimit", () => { + const orphans = Array.from({ length: 5 }, (_, i) => + mk({ id: `INS-X${i}`, sources: [] }) + ); + const { report } = auditInsights(orphans, INDEX, undefined, { exampleLimit: 2 }); + expect(report.failing_examples.attribution_resolves).toHaveLength(2); + }); + + it("builds the cosine histogram and block simulation from a novelty fn", () => { + const nearest: NearestFn = (id) => + id === "INS-1" + ? { nearestId: "INS-9", similarity: 0.97, selfEmbedded: true } + : { nearestId: null, similarity: null, selfEmbedded: false }; + const { report } = auditInsights([good], INDEX, nearest); + expect(report.novelty.available).toBe(true); + expect(report.novelty.embedded).toBe(1); + expect(report.novelty.histogram[">=0.95"]).toBe(1); + expect(report.novelty.block_simulation["0.85"]).toBe(1); + expect(report.novelty.block_simulation["0.95"]).toBe(1); + expect(report.novelty.mean_similarity).toBe(0.97); + expect(report.novelty.top_pairs).toEqual([ + { id: "INS-1", nearestId: "INS-9", similarity: 0.97 }, + ]); + }); +}); + +// ─── buildNoveltyComputer (integration: real in-memory sqlite-vec) ───── +// Exercises the DB path codex flagged as untested: float32 blob round-trip, +// PRI-/MM- filtering, and exact-cosine computation. + +describe("buildNoveltyComputer (in-memory sqlite-vec)", () => { + /** Build a 768-dim vector from [index, value] pairs (rest zero). */ + function vec768(pairs: Array<[number, number]>): number[] { + const v = new Array(768).fill(0); + for (const [i, x] of pairs) v[i] = x; + return v; + } + + it("returns the nearest INSIGHT neighbor with exact cosine, skipping PRI-/MM-", () => { + const db = new Database(":memory:"); + initVectorTable(db); + upsertEmbedding(db, "INS-A", vec768([[0, 1]])); + upsertEmbedding(db, "PRI-C", vec768([[0, 1], [1, 0.001]])); // ~identical to A, but a principle + upsertEmbedding(db, "MM-D", vec768([[0, 1], [2, 0.001]])); // ~identical to A, but a mental model + upsertEmbedding(db, "INS-B", vec768([[0, 1], [1, 0.3]])); // cosine(A,B) = 1/sqrt(1.09) ≈ 0.958 + + const nearest = buildNoveltyComputer(db); + const r = nearest("INS-A"); + + expect(r.selfEmbedded).toBe(true); + expect(r.nearestId).toBe("INS-B"); // PRI-C and MM-D are nearer but filtered out + expect(r.similarity).toBeCloseTo(0.958, 2); + db.close(); + }); + + it("reports selfEmbedded=false when the insight has no embedding row", () => { + const db = new Database(":memory:"); + initVectorTable(db); + upsertEmbedding(db, "INS-B", vec768([[0, 1]])); + + const r = buildNoveltyComputer(db)("INS-MISSING"); + expect(r.selfEmbedded).toBe(false); + expect(r.nearestId).toBeNull(); + expect(r.similarity).toBeNull(); + db.close(); + }); + + it("reports selfEmbedded=true with no neighbor when only non-insight rows exist", () => { + const db = new Database(":memory:"); + initVectorTable(db); + upsertEmbedding(db, "INS-A", vec768([[0, 1]])); + upsertEmbedding(db, "PRI-C", vec768([[0, 1], [1, 0.1]])); + + const r = buildNoveltyComputer(db)("INS-A"); + expect(r.selfEmbedded).toBe(true); + expect(r.nearestId).toBeNull(); + expect(r.similarity).toBeNull(); + db.close(); + }); +}); diff --git a/scripts/lib/insight-gate.ts b/scripts/lib/insight-gate.ts new file mode 100644 index 0000000000..921ba63c46 --- /dev/null +++ b/scripts/lib/insight-gate.ts @@ -0,0 +1,558 @@ +/** + * insight-gate.ts (lib) — Semantic quality gate for insights. + * + * Sibling to lib/verify-contracts.ts: a set of pure, deterministic, + * RECOMPUTABLE checks plus an audit aggregator. Unlike health.ts + * (structural: does the YAML parse?) this layer asks the semantic-quality + * questions: is the stance directional? is it attributed to a real source? + * is it filed under a topic that matches its path? is it a near-duplicate? + * + * Architecture boundary (CLAUDE.md): NO LLM here. Every check is a pure + * function of current state, so the verdict can be recomputed at any time + * without storing it. The LLM rubric stays in quality-score.ts (the + * advisory/audit layer) — never in the gate. + * + * Phase 1 is AUDIT ONLY: these functions report; the CLI never exits 1. + * Forward enforcement (Phase 2) reuses the exact same check functions. + */ + +import { readFile } from "node:fs/promises"; +import { relative } from "node:path"; +import fg from "fast-glob"; +import matter from "gray-matter"; +import type Database from "better-sqlite3"; + +import { KB_ROOT } from "./kb-root"; + +// ─── Types ──────────────────────────────────────────────────────────── + +export type CheckId = + | "stance_present" + | "stance_directional" + | "attribution_resolves" + | "topic_matches_path"; + +export const CHECK_IDS: CheckId[] = [ + "stance_present", + "stance_directional", + "attribution_resolves", + "topic_matches_path", +]; + +/** A source reference as stored on an insight (sources[] in frontmatter). */ +export interface SourceRef { + title?: string; + url?: string; +} + +/** Defensively-parsed insight — fields may be missing on older insights. */ +export interface GateInsight { + id: string; + domain: string; + topic: string; + title: string; + stance: string; // "" when absent + sources: SourceRef[]; + dateExtracted: string; // "" when absent + filePath: string; // absolute + relPath: string; // relative to KB_ROOT, e.g. domains///.md +} + +export interface CheckResult { + checkId: CheckId; + passed: boolean; + detail?: string; +} + +export interface NoveltyResult { + /** Nearest INSIGHT neighbor id, or null when none was found. */ + nearestId: string | null; + /** Exact cosine similarity to the nearest insight neighbor, or null. */ + similarity: number | null; + /** Whether THIS insight has an embedding — distinguishes "unembedded" from "no neighbor found". */ + selfEmbedded: boolean; +} + +export interface InsightAudit { + id: string; + relPath: string; + checks: CheckResult[]; + novelty: NoveltyResult; +} + +/** Normalized lookup sets built from the sources/ directory. */ +export interface SourceIndex { + titles: Set; + urls: Set; +} + +export interface AuditReport { + generated_at: string; + scope: string; + total: number; + /** Number of insights PASSING each check. */ + coverage: Record; + novelty: { + /** Whether novelty was computed at all (false when the embeddings DB was unavailable). */ + available: boolean; + /** Insights that have their own embedding. */ + embedded: number; + /** Insights with no embedding row. */ + unembedded: number; + /** Embedded insights for which no INS- neighbor was found (e.g. outliers). */ + no_neighbor: number; + /** Bucketed counts of nearest-neighbor similarity (insights with a neighbor only). */ + histogram: Record; + /** For each candidate block threshold, how many insights have a neighbor >= it. */ + block_simulation: Record; + mean_similarity: number | null; + /** Closest insight pairs by similarity, for dedup triage (highest first). */ + top_pairs: Array<{ id: string; nearestId: string; similarity: number }>; + }; + fragmentation: { + total_topics: number; + singletons: number; + singleton_topics: string[]; // "domain/topic" + }; + /** Up to `exampleLimit` failing insights per check, for triage. */ + failing_examples: Record>; +} + +// ─── Stance directionality heuristic ────────────────────────────────── +// +// A real stance is an ASSERTION that could be true or false — a directional +// claim — not a description of the source ("this discusses X") or a vacuous +// value statement ("X is important"). This is a deterministic FLOOR: it will +// have false positives/negatives. Audit mode reports the rate so the bar can +// be calibrated against the corpus rather than guessed. Patterns are exported +// so they are testable and tweakable. + +/** Verbs/phrases that describe the source instead of asserting a claim. */ +export const STANCE_DESCRIPTIVE = + /\b(discusses|covers|explores|explains|describes|examines|outlines|highlights|mentions|summari[sz]es|talks about|is about|provides? an overview|gives? an overview|an overview of|introduction to|a look at)\b/i; + +// Bare value claims with no direction ("X is important/interesting/..."). +// Deliberately narrow: only "puffery" adjectives that almost never appear +// except as the vacuous predicate of a non-claim. Generic words (good, bad, +// complex, common, ...) were removed because they fire incidentally inside +// genuinely directional stances ("AI is good at X", "good enough to ..."). +export const STANCE_VACUOUS_VALUE = + /\b(is|are|was|were|can be|remains?|stays?|seems?)\s+(?:very\s+|increasingly\s+|quite\s+|really\s+|highly\s+|often\s+|generally\s+)?(important|interesting|useful|valuable|key|crucial|essential|powerful|significant|notable|relevant)\b/i; + +/** Causal / comparative / predictive / consequential markers => directional. */ +// NOTE: keep this set disjoint from STANCE_DESCRIPTIVE — a verb in both (e.g. +// "introduces", which can mean "describes" or "brings about") would be checked +// directional-first and silently pass source-descriptive stances. +export const STANCE_DIRECTIONAL = + /\b(because|since|so that|therefore|thus|hence|leads? to|causes?|drives?|predicts?|will|won't|should|shouldn't|must|cannot|can't|outperforms?|beats?|exceeds?|fails?|unless|instead of|rather than|more than|less than|faster than|slower than|better than|worse than|enables?|prevents?|requires?|eliminates?|replaces?|reduces?|increases?|decreases?|creates?|trades?|transforms?|determines?|undermines?|forces?|favou?rs?|breaks?|shifts?|results? in|means that|implies|trumps?|wins?|loses?|matters? more)\b/i; + +const STANCE_MIN_LENGTH = 15; + +/** + * Heuristic: is this stance a directional, falsifiable claim (vs. descriptive + * or vacuous)? Order matters — an explicit directional marker passes even when + * a weak word is also present ("X is important BECAUSE it predicts Y"). + */ +export function isDirectionalStance(stance: string): boolean { + const s = stance.trim(); + if (s.length < STANCE_MIN_LENGTH) return false; + if (STANCE_DIRECTIONAL.test(s)) return true; + if (STANCE_DESCRIPTIVE.test(s)) return false; + if (STANCE_VACUOUS_VALUE.test(s)) return false; + return true; +} + +// ─── Normalizers ────────────────────────────────────────────────────── + +export function normalizeTitle(title: string): string { + return title.trim().toLowerCase().replace(/\s+/g, " "); +} + +export function normalizeUrl(url: string): string { + return url + .trim() + .toLowerCase() + .replace(/^https?:\/\//, "") + .replace(/^www\./, "") + .replace(/\/+$/, ""); +} + +// ─── Pure checks ────────────────────────────────────────────────────── + +function result(checkId: CheckId, passed: boolean, detail?: string): CheckResult { + return detail === undefined ? { checkId, passed } : { checkId, passed, detail }; +} + +export function checkStancePresent(ins: GateInsight): CheckResult { + return ins.stance.trim().length > 0 + ? result("stance_present", true) + : result("stance_present", false, "no stance"); +} + +export function checkStanceDirectional(ins: GateInsight): CheckResult { + const s = ins.stance.trim(); + if (s.length === 0) { + // Presence is a separate check; directionality is vacuously failed. + return result("stance_directional", false, "no stance"); + } + return isDirectionalStance(s) + ? result("stance_directional", true) + : result("stance_directional", false, `not directional: "${truncate(s, 80)}"`); +} + +/** + * Passes if AT LEAST ONE of the insight's source references resolves to a + * known source file (by normalized url or title). An insight with no + * sources[] is an orphan and fails. + */ +export function checkAttributionResolves( + ins: GateInsight, + index: SourceIndex +): CheckResult { + if (ins.sources.length === 0) { + return result("attribution_resolves", false, "no sources[]"); + } + for (const src of ins.sources) { + if (src.url && index.urls.has(normalizeUrl(src.url))) { + return result("attribution_resolves", true); + } + if (src.title && index.titles.has(normalizeTitle(src.title))) { + return result("attribution_resolves", true); + } + } + const labels = ins.sources + .map((s) => s.title || s.url || "(empty)") + .join(", "); + return result( + "attribution_resolves", + false, + `no source ref resolves (${truncate(labels, 80)})` + ); +} + +/** + * Passes if the insight's frontmatter domain/topic match its file location + * (domains///...). Catches misfiled insights and frontmatter + * that drifted from the path. + */ +export function checkTopicMatchesPath(ins: GateInsight): CheckResult { + const parts = splitPath(ins.relPath); + const pathDomain = parts[1] ?? ""; + const pathTopic = parts[2] ?? ""; + if (pathDomain === ins.domain && pathTopic === ins.topic) { + return result("topic_matches_path", true); + } + return result( + "topic_matches_path", + false, + `frontmatter ${ins.domain}/${ins.topic} != path ${pathDomain}/${pathTopic}` + ); +} + +export function runChecks(ins: GateInsight, index: SourceIndex): CheckResult[] { + return [ + checkStancePresent(ins), + checkStanceDirectional(ins), + checkAttributionResolves(ins, index), + checkTopicMatchesPath(ins), + ]; +} + +// ─── Loaders ────────────────────────────────────────────────────────── + +const INSIGHT_GLOB = "domains/**/*.md"; +const INSIGHT_IGNORE = ["**/_overview.md", "**/_summary.md", "**/_index.md"]; + +export async function loadGateInsights(kbRoot: string = KB_ROOT): Promise { + const files = await fg(INSIGHT_GLOB, { + cwd: kbRoot, + absolute: true, + ignore: INSIGHT_IGNORE, + }); + + const insights: GateInsight[] = []; + for (const filePath of files) { + let data: Record; + try { + const raw = await readFile(filePath, "utf-8"); + data = matter(raw).data as Record; + } catch { + continue; // unreadable/garbled file — health.ts reports these separately + } + if (!data || typeof data.id !== "string") continue; + + insights.push({ + id: data.id, + domain: asString(data.domain), + topic: asString(data.topic), + title: asString(data.title), + stance: asString(data.stance), + dateExtracted: asString(data.date_extracted), + sources: parseSources(data.sources), + filePath, + relPath: relative(kbRoot, filePath), + }); + } + return insights; +} + +export async function buildSourceIndex(kbRoot: string = KB_ROOT): Promise { + const files = await fg("sources/**/*.md", { + cwd: kbRoot, + absolute: true, + ignore: ["**/raw/**", "**/_index.md"], + }); + + const titles = new Set(); + const urls = new Set(); + for (const filePath of files) { + let data: Record; + try { + data = matter(await readFile(filePath, "utf-8")).data as Record; + } catch { + continue; + } + if (typeof data?.title === "string") titles.add(normalizeTitle(data.title)); + if (typeof data?.url === "string") urls.add(normalizeUrl(data.url)); + } + return { titles, urls }; +} + +// ─── Novelty (cosine nearest-neighbor) ──────────────────────────────── +// +// Two-step, deliberately metric-agnostic: +// 1. vec0 KNN (`embedding MATCH ? AND k = N`) to find candidate neighbors. +// The live embeddings table returns L2 distance (it was created before +// distance_metric=cosine was added; CREATE…IF NOT EXISTS no-ops on the +// existing table). For the L2-normalized vectors Zuhn stores, L2 order +// and cosine order coincide, so the KNN ranking is correct. +// 2. Compute EXACT cosine on the actual stored vectors — so the reported +// value never depends on which distance metric vec0 happens to use. +// +// Neighbors are filtered to INS- ids: the dedup signal is insight-to-insight, +// and the table also holds principles/mental models (PRI-/MM-). We overfetch +// (large k) and filter in JS so a real insight neighbor isn't missed behind a +// run of PRI-/MM- rows in a tight compression cluster. + +const KNN_K = 64; + +/** Reinterpret a vec0 float32 blob as a Float32Array (copy → 4-byte aligned). */ +function toFloat32(buf: Buffer): Float32Array { + const copy = buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.length); + return new Float32Array(copy); +} + +export function cosineSimilarity(a: Float32Array, b: Float32Array): number { + const n = Math.min(a.length, b.length); + let dot = 0; + let na = 0; + let nb = 0; + for (let i = 0; i < n; i++) { + dot += a[i] * b[i]; + na += a[i] * a[i]; + nb += b[i] * b[i]; + } + if (na === 0 || nb === 0) return 0; + return dot / (Math.sqrt(na) * Math.sqrt(nb)); +} + +export type NearestFn = (id: string) => NoveltyResult; + +export function buildNoveltyComputer(db: Database.Database): NearestFn { + const embStmt = db.prepare("SELECT embedding FROM embeddings WHERE id = ?"); + const knnStmt = db.prepare( + `SELECT id, distance FROM embeddings WHERE embedding MATCH ? AND k = ${KNN_K}` + ); + + return function nearest(id: string): NoveltyResult { + const self = embStmt.get(id) as { embedding: Buffer } | undefined; + if (!self) return { nearestId: null, similarity: null, selfEmbedded: false }; + + const rows = knnStmt.all(self.embedding) as Array<{ id: string; distance: number }>; + // Nearest *insight* neighbor (KNN is distance-ordered; exclude self + non-insights). + const neighbor = rows.find((r) => r.id !== id && r.id.startsWith("INS-")); + if (!neighbor) return { nearestId: null, similarity: null, selfEmbedded: true }; + + const other = embStmt.get(neighbor.id) as { embedding: Buffer } | undefined; + if (!other) return { nearestId: neighbor.id, similarity: null, selfEmbedded: true }; + + // Exact cosine on the stored vectors (full precision — round only at display). + const sim = cosineSimilarity(toFloat32(self.embedding), toFloat32(other.embedding)); + return { nearestId: neighbor.id, similarity: sim, selfEmbedded: true }; + }; +} + +// ─── Audit aggregation ──────────────────────────────────────────────── + +/** Histogram buckets for nearest-neighbor similarity (high → low). */ +const SIM_BUCKETS: Array<{ label: string; min: number }> = [ + { label: ">=0.95", min: 0.95 }, + { label: "0.90-0.95", min: 0.9 }, + { label: "0.85-0.90", min: 0.85 }, + { label: "0.80-0.85", min: 0.8 }, + { label: "0.70-0.80", min: 0.7 }, + { label: "<0.70", min: -Infinity }, +]; + +/** Candidate "too duplicate to admit" thresholds, for block simulation. */ +export const BLOCK_THRESHOLDS = [0.85, 0.9, 0.95]; + +export function auditInsights( + insights: GateInsight[], + index: SourceIndex, + nearest?: NearestFn, + options: { scope?: string; exampleLimit?: number } = {} +): { audits: InsightAudit[]; report: AuditReport } { + const exampleLimit = options.exampleLimit ?? 15; + // Distinguish "novelty not computed" (no DB) from "computed, found nothing". + const noveltyAvailable = nearest !== undefined; + + const coverage = emptyCheckRecord(); + const failingExamples = emptyExampleRecord(); + const histogram: Record = Object.fromEntries( + SIM_BUCKETS.map((b) => [b.label, 0]) + ); + const blockSimulation: Record = Object.fromEntries( + BLOCK_THRESHOLDS.map((t) => [t.toFixed(2), 0]) + ); + const topicCounts = new Map(); + + let embedded = 0; + let unembedded = 0; + let noNeighbor = 0; + let simSum = 0; + let simCount = 0; + const pairs: Array<{ id: string; nearestId: string; similarity: number }> = []; + + const audits: InsightAudit[] = []; + + for (const ins of insights) { + const checks = runChecks(ins, index); + for (const check of checks) { + if (check.passed) { + coverage[check.checkId] += 1; + } else if (failingExamples[check.checkId].length < exampleLimit) { + failingExamples[check.checkId].push({ + id: ins.id, + relPath: ins.relPath, + detail: check.detail, + }); + } + } + + const novelty: NoveltyResult = nearest + ? nearest(ins.id) + : { nearestId: null, similarity: null, selfEmbedded: false }; + + // Only account for novelty when it was actually computed; otherwise leave + // all counts at zero (report.novelty.available signals "not measured"). + if (noveltyAvailable) { + if (!novelty.selfEmbedded) { + unembedded += 1; + } else if (novelty.similarity === null || novelty.nearestId === null) { + embedded += 1; + noNeighbor += 1; + } else { + embedded += 1; + simSum += novelty.similarity; + simCount += 1; + histogram[bucketLabel(novelty.similarity)] += 1; + for (const t of BLOCK_THRESHOLDS) { + if (novelty.similarity >= t) blockSimulation[t.toFixed(2)] += 1; + } + pairs.push({ id: ins.id, nearestId: novelty.nearestId, similarity: novelty.similarity }); + } + } + + // Topic counts keyed by actual file location (domain/topic from path). + const parts = splitPath(ins.relPath); + const topicKey = `${parts[1] ?? "?"}/${parts[2] ?? "?"}`; + topicCounts.set(topicKey, (topicCounts.get(topicKey) ?? 0) + 1); + + audits.push({ id: ins.id, relPath: ins.relPath, checks, novelty }); + } + + const singletonTopics = [...topicCounts.entries()] + .filter(([, n]) => n === 1) + .map(([key]) => key) + .sort(); + + // Dedup reciprocal pairs (A→B and B→A are the same near-dupe) by unordered key, + // keeping the highest-similarity instance (list is already sorted desc). + const seenPairs = new Set(); + const topPairs = pairs + .sort((a, b) => b.similarity - a.similarity) + .filter((p) => { + const key = p.id < p.nearestId ? `${p.id}|${p.nearestId}` : `${p.nearestId}|${p.id}`; + if (seenPairs.has(key)) return false; + seenPairs.add(key); + return true; + }) + .slice(0, 50) + // Full precision in the report/JSON — rounding happens only at display. + .map((p) => ({ id: p.id, nearestId: p.nearestId, similarity: p.similarity })); + + const report: AuditReport = { + generated_at: new Date().toISOString(), + scope: options.scope ?? "all", + total: insights.length, + coverage, + novelty: { + available: noveltyAvailable, + embedded, + unembedded, + no_neighbor: noNeighbor, + histogram, + block_simulation: blockSimulation, + mean_similarity: simCount > 0 ? simSum / simCount : null, + top_pairs: topPairs, + }, + fragmentation: { + total_topics: topicCounts.size, + singletons: singletonTopics.length, + singleton_topics: singletonTopics, + }, + failing_examples: failingExamples, + }; + + return { audits, report }; +} + +// ─── Internal helpers ───────────────────────────────────────────────── + +function asString(value: unknown): string { + if (typeof value === "string") return value; + if (value === null || value === undefined) return ""; + return String(value); +} + +function parseSources(value: unknown): SourceRef[] { + if (!Array.isArray(value)) return []; + return value + .filter((s): s is Record => !!s && typeof s === "object") + .map((s) => ({ + title: typeof s.title === "string" ? s.title : undefined, + url: typeof s.url === "string" ? s.url : undefined, + })); +} + +function splitPath(relPath: string): string[] { + return relPath.split(/[\\/]/); +} + +function truncate(text: string, max: number): string { + return text.length <= max ? text : text.slice(0, max - 1) + "…"; +} + +function emptyCheckRecord(): Record { + return { stance_present: 0, stance_directional: 0, attribution_resolves: 0, topic_matches_path: 0 }; +} + +function emptyExampleRecord(): Record> { + return { stance_present: [], stance_directional: [], attribution_resolves: [], topic_matches_path: [] }; +} + +function bucketLabel(similarity: number): string { + const bucket = SIM_BUCKETS.find((b) => similarity >= b.min) ?? SIM_BUCKETS[SIM_BUCKETS.length - 1]; + return bucket.label; +} From 67ffd2dd189a28caba5edc914134a709c18c1f93 Mon Sep 17 00:00:00 2001 From: Jin Choi Date: Mon, 25 May 2026 15:20:09 -0700 Subject: [PATCH 2/3] feat(gate): add forward enforcement (Phase 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the audit checks into a forward-only enforcement gate. - enforceGate() (lib): partitions blocking failures from non-blocking warnings. Near-duplicate (>= cosine threshold, default 0.95) blocks when measurable; missing stance blocks by default. stance_directional (a heuristic) and attribution_resolves (would reject legitimate synthetic insights) are warnings by default, promotable via blockingChecks. An unembedded new insight WARNS (dup check unmeasurable) rather than silently skipping — fail-closing there would halt ingestion whenever Ollama is down. - insight-gate.ts --enforce: scopes via --changed (uncommitted insight files; fails CLOSED on git error and on changed files that don't load), --since, or all. Exits 1 on blocking failures. - post-ingest gate step: fatal, after embed (needs vectors) and before learn/auto-git (a blocked batch must not commit; placement preserves forward-only scoping since learn mutates existing insights). Mirrors the existing fatal reindex step. Conservative by design: blocks only on missing stance + near-identical duplicate, so it won't false-block legitimate or synthetic insights. Ratchet --max-similarity or promote warning-checks once trusted. Tests: 8 enforceGate cases. Verified e2e: --changed exit 0; blocking scope exit 1. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/insight-gate.ts | 161 ++++++++++++++++++++++++++++--- scripts/lib/insight-gate.test.ts | 68 +++++++++++++ scripts/lib/insight-gate.ts | 85 ++++++++++++++++ scripts/post-ingest.ts | 37 ++++++- 4 files changed, 338 insertions(+), 13 deletions(-) diff --git a/scripts/insight-gate.ts b/scripts/insight-gate.ts index f588ba334f..fdef23f757 100644 --- a/scripts/insight-gate.ts +++ b/scripts/insight-gate.ts @@ -2,24 +2,27 @@ /** * insight-gate.ts — Semantic quality gate for insights (CLI). * - * PHASE 1 (this file): AUDIT ONLY. Read-only X-ray of insight quality across - * the corpus. Never exits 1 — it reports so the pass-bar can be calibrated - * against the real distribution before forward enforcement is switched on. + * Two modes: + * --audit READ-ONLY X-ray of insight quality across the corpus. Never + * exits 1 on quality findings — it reports so the pass-bar can be + * calibrated against the real distribution. + * --enforce Forward gate over a batch of NEW insights. Exits 1 if any + * BLOCKING issue is found (missing stance, or a near-duplicate). + * Wired as a fatal pre-commit step in post-ingest. * * Usage: - * npx tsx scripts/insight-gate.ts --audit --all # full corpus X-ray - * npx tsx scripts/insight-gate.ts --audit --since 2026-05-01 - * npx tsx scripts/insight-gate.ts --audit --all --json # machine-readable - * npx tsx scripts/insight-gate.ts --audit --examples 30 # more failing samples + * npx tsx scripts/insight-gate.ts --audit --all # full corpus X-ray + * npx tsx scripts/insight-gate.ts --audit --since 2026-05-01 [--json] + * npx tsx scripts/insight-gate.ts --enforce --changed # gate uncommitted insights + * npx tsx scripts/insight-gate.ts --enforce --since 2026-05-01 # gate by date + * npx tsx scripts/insight-gate.ts --enforce --changed --max-similarity 0.93 * - * Outputs (in addition to stdout): + * Audit outputs (in addition to stdout): * knowledge-base/meta/gate-report.json latest full report (overwritten) * knowledge-base/meta/gate-log.jsonl one summary line per run (appended) - * - * Phase 2 (not yet wired) will add forward enforcement: scope to a batch and - * exit 1 on failures, reusing the exact check functions in lib/insight-gate.ts. */ +import { execFileSync } from "node:child_process"; import { appendFileSync, mkdirSync, writeFileSync } from "node:fs"; import { dirname, join } from "node:path"; import Database from "better-sqlite3"; @@ -29,9 +32,12 @@ import { auditInsights, buildNoveltyComputer, buildSourceIndex, + enforceGate, loadGateInsights, + DEFAULT_MAX_SIMILARITY, type AuditReport, type CheckId, + type EnforceResult, type NearestFn, } from "./lib/insight-gate"; import { KB_ROOT } from "./lib/kb-root"; @@ -87,8 +93,15 @@ function tryBuildNovelty(): { nearest: NearestFn | undefined; close: () => void // ─── Main ───────────────────────────────────────────────────────────── async function main(): Promise { - const args = parseArgs(process.argv.slice(2)); + const argv = process.argv.slice(2); + if (argv.includes("--enforce")) { + await runEnforce(argv); + } else { + await runAudit(parseArgs(argv)); + } +} +async function runAudit(args: Args): Promise { let insights = await loadGateInsights(); const scope = args.since ? `since ${args.since}` : "all"; if (args.since) { @@ -117,6 +130,130 @@ async function main(): Promise { } } +// ─── Enforce (Phase 2) ──────────────────────────────────────────────── + +async function runEnforce(argv: string[]): Promise { + const simIdx = argv.indexOf("--max-similarity"); + const maxSimilarity = + simIdx !== -1 ? parseFloat(argv[simIdx + 1]) || DEFAULT_MAX_SIMILARITY : DEFAULT_MAX_SIMILARITY; + const sinceIdx = argv.indexOf("--since"); + const since = sinceIdx !== -1 ? argv[sinceIdx + 1] ?? null : null; + + let insights = await loadGateInsights(); + let scope: string; + if (argv.includes("--changed")) { + // Fail CLOSED: if git can't tell us what changed, we cannot safely enforce. + // Aborting (exit 1) is correct for a gate — never treat "git broke" as + // "nothing changed" (which would let a batch sail through in post-ingest). + let changed: Set; + try { + changed = getChangedInsightRelPaths(); + } catch (err) { + console.error( + `ENFORCE aborted: cannot determine changed insights via git — ${(err as Error).message}` + ); + process.exit(1); + } + // Fail CLOSED on unloadable insights: a changed insight file that + // loadGateInsights() dropped (unreadable, invalid frontmatter, or missing + // id) would otherwise silently vanish from the batch and let the gate pass. + const loadedPaths = new Set(insights.map((i) => i.relPath)); + const unaccounted = [...changed].filter((p) => !loadedPaths.has(p)); + if (unaccounted.length > 0) { + console.error( + `ENFORCE aborted: ${unaccounted.length} changed insight file(s) could not be loaded ` + + "(unreadable, invalid frontmatter, or missing id):" + ); + for (const p of unaccounted) console.error(` ✗ ${p}`); + console.error("Fix these (run: npm run health) and re-run."); + process.exit(1); + } + insights = insights.filter((i) => changed.has(i.relPath)); + scope = "changed (uncommitted)"; + } else if (since) { + insights = insights.filter((i) => i.dateExtracted >= since); + scope = `since ${since}`; + } else { + scope = "all"; + } + + console.log( + `Insight Gate (ENFORCE) — scope: ${scope} · ${insights.length} insight(s) · block ≥ ${maxSimilarity} cosine` + ); + + if (insights.length === 0) { + console.log("No insights in scope — nothing to gate."); + return; // exit 0 + } + + const sourceIndex = await buildSourceIndex(); + const { nearest, close } = tryBuildNovelty(); + let result: EnforceResult; + try { + result = enforceGate(insights, sourceIndex, nearest, { maxSimilarity }); + } finally { + close(); + } + if (!nearest) { + console.warn("WARN: embeddings unavailable — near-duplicate check skipped this run."); + } + + if (result.warnings.length > 0) { + console.log(`\n${result.warnings.length} warning(s) (non-blocking):`); + for (const w of result.warnings) { + console.log(` ⚠ ${w.id} [${w.checkId}] ${w.reason}`); + } + } + + if (result.failures.length > 0) { + console.error(`\n✗ GATE FAILED — ${result.failures.length} blocking issue(s):`); + for (const f of result.failures) { + console.error(` ✗ ${f.id} [${f.checkId}] ${f.reason}`); + console.error(` ${f.relPath}`); + } + console.error("\nThis batch is not admissible. Fix the above and re-run."); + process.exit(1); + } + + console.log(`\n✓ GATE PASSED — ${insights.length} insight(s) admitted.`); +} + +/** + * Insight files (KB-relative paths) that are untracked or modified vs HEAD. + * + * FAILS CLOSED: git errors propagate to the caller. A gate that can't determine + * its batch must not silently report "nothing changed" — the caller treats a + * throw as fatal (exit 1). An empty set means git succeeded and genuinely found + * no changed insights (a legitimate no-op). + */ +function getChangedInsightRelPaths(): Set { + const repoRoot = execFileSync("git", ["rev-parse", "--show-toplevel"], { + encoding: "utf-8", + cwd: KB_ROOT, + }).trim(); + const git = (args: string[]): string => + execFileSync("git", args, { encoding: "utf-8", cwd: repoRoot }).trim(); + + const out = new Set(); + const blocks = [ + git(["ls-files", "--others", "--exclude-standard", "--", "knowledge-base/domains/"]), + git(["diff", "--name-only", "HEAD", "--", "knowledge-base/domains/"]), + ]; + for (const block of blocks) { + if (!block) continue; + for (const line of block.split("\n")) { + if (!line.endsWith(".md")) continue; + const rel = line.replace(/^knowledge-base\//, ""); + // Skip regenerated _overview/_summary/_index files — they aren't insights + // (mirrors loadGateInsights' ignore set), so they must not be treated as + // "unaccounted" insight files in the reconciliation in runEnforce. + if ((rel.split("/").pop() ?? "").startsWith("_")) continue; + out.add(rel); + } + } + return out; +} + // ─── Persistence ────────────────────────────────────────────────────── function persist(report: AuditReport): void { diff --git a/scripts/lib/insight-gate.test.ts b/scripts/lib/insight-gate.test.ts index 695b94aed0..b94c9565a5 100644 --- a/scripts/lib/insight-gate.test.ts +++ b/scripts/lib/insight-gate.test.ts @@ -4,6 +4,7 @@ import Database from "better-sqlite3"; import { auditInsights, buildNoveltyComputer, + enforceGate, checkAttributionResolves, checkStanceDirectional, checkStancePresent, @@ -335,3 +336,70 @@ describe("buildNoveltyComputer (in-memory sqlite-vec)", () => { db.close(); }); }); + +// ─── enforceGate (Phase 2 forward enforcement) ──────────────────────── + +describe("enforceGate", () => { + const clean = mk({ + id: "INS-1", + stance: "X reduces cost because Y", + sources: [{ title: "Known Source" }], + relPath: "domains/ai-development/agents/a.md", + }); + + it("admits a clean insight: no failures, no warnings", () => { + const { failures, warnings } = enforceGate([clean], INDEX, undefined); + expect(failures).toEqual([]); + expect(warnings).toEqual([]); + }); + + it("blocks a missing stance; directional + attribution failures are warnings (default policy)", () => { + const bad = mk({ id: "INS-2", stance: "", sources: [], relPath: "domains/ai-development/agents/b.md" }); + const { failures, warnings } = enforceGate([bad], INDEX, undefined); + expect(failures.map((f) => f.checkId)).toEqual(["stance_present"]); + const warned = warnings.map((w) => w.checkId); + expect(warned).toContain("stance_directional"); + expect(warned).toContain("attribution_resolves"); + }); + + it("blocks a near-duplicate at/above the threshold", () => { + const nearest: NearestFn = () => ({ nearestId: "INS-9", similarity: 0.96, selfEmbedded: true }); + const { failures } = enforceGate([clean], INDEX, nearest); + expect(failures.map((f) => f.checkId)).toEqual(["novelty"]); + expect(failures[0].reason).toContain("INS-9"); + }); + + it("does not block a distinct insight below the threshold", () => { + const nearest: NearestFn = () => ({ nearestId: "INS-9", similarity: 0.8, selfEmbedded: true }); + expect(enforceGate([clean], INDEX, nearest).failures).toEqual([]); + }); + + it("respects a custom maxSimilarity", () => { + const nearest: NearestFn = () => ({ nearestId: "INS-9", similarity: 0.91, selfEmbedded: true }); + expect(enforceGate([clean], INDEX, nearest, { maxSimilarity: 0.95 }).failures).toEqual([]); + expect( + enforceGate([clean], INDEX, nearest, { maxSimilarity: 0.9 }).failures.map((f) => f.checkId) + ).toEqual(["novelty"]); + }); + + it("can promote a heuristic check to blocking via blockingChecks", () => { + const weak = mk({ id: "INS-3", stance: "AI is important", sources: [{ title: "Known Source" }] }); + expect(enforceGate([weak], INDEX, undefined).failures).toEqual([]); // directional warns by default + const { failures } = enforceGate([weak], INDEX, undefined, { + blockingChecks: ["stance_present", "stance_directional"], + }); + expect(failures.map((f) => f.checkId)).toEqual(["stance_directional"]); + }); + + it("skips the near-duplicate check when novelty is unavailable", () => { + expect(enforceGate([clean], INDEX, undefined).failures).toEqual([]); + }); + + it("warns (not silently skips) when a new insight has no embedding", () => { + const noEmbedding: NearestFn = () => ({ nearestId: null, similarity: null, selfEmbedded: false }); + const { failures, warnings } = enforceGate([clean], INDEX, noEmbedding); + expect(failures).toEqual([]); + expect(warnings.map((w) => w.checkId)).toContain("novelty"); + expect(warnings.find((w) => w.checkId === "novelty")?.reason).toContain("no embedding"); + }); +}); diff --git a/scripts/lib/insight-gate.ts b/scripts/lib/insight-gate.ts index 921ba63c46..f4f67da30d 100644 --- a/scripts/lib/insight-gate.ts +++ b/scripts/lib/insight-gate.ts @@ -382,6 +382,91 @@ export function buildNoveltyComputer(db: Database.Database): NearestFn { }; } +// ─── Enforcement (Phase 2) ──────────────────────────────────────────── +// +// Forward-only gate: given a batch of NEW insights, return the blocking +// failures and non-blocking warnings. Reuses the exact same deterministic +// checks as the audit — the only difference is that here some of them have +// teeth (a non-empty failures list means the batch is refused / exit 1). + +export interface GateFailure { + id: string; + relPath: string; + checkId: CheckId | "novelty"; + reason: string; +} + +export interface EnforceResult { + failures: GateFailure[]; // blocking — caller should refuse the batch + warnings: GateFailure[]; // non-blocking — reported for the human +} + +export interface EnforceOptions { + /** Block a new insight whose nearest INSIGHT neighbor is >= this cosine. */ + maxSimilarity?: number; + /** Which boolean checks block (the rest become warnings). */ + blockingChecks?: CheckId[]; +} + +export const DEFAULT_MAX_SIMILARITY = 0.95; +// Conservative default: only the objective, near-zero-false-positive checks +// block. stance_directional is a heuristic (false positives); attribution +// would reject legitimate synthetic/cross-domain insights — both start as +// warnings and can be promoted once the corpus behavior is trusted. +export const DEFAULT_BLOCKING_CHECKS: CheckId[] = ["stance_present"]; + +export function enforceGate( + insights: GateInsight[], + index: SourceIndex, + nearest: NearestFn | undefined, + options: EnforceOptions = {} +): EnforceResult { + const maxSimilarity = options.maxSimilarity ?? DEFAULT_MAX_SIMILARITY; + const blocking = new Set(options.blockingChecks ?? DEFAULT_BLOCKING_CHECKS); + + const failures: GateFailure[] = []; + const warnings: GateFailure[] = []; + + for (const ins of insights) { + for (const check of runChecks(ins, index)) { + if (check.passed) continue; + const entry: GateFailure = { + id: ins.id, + relPath: ins.relPath, + checkId: check.checkId, + reason: check.detail ?? check.checkId, + }; + (blocking.has(check.checkId) ? failures : warnings).push(entry); + } + + // Near-duplicate blocks when measurable. If the new insight has no + // embedding (e.g. Embed was skipped with Ollama offline), the dup check + // cannot run — surface that as a WARNING rather than skip it silently. + // (Fail-closing here would halt all ingestion whenever embeddings are + // unavailable, which is too brittle; the warning keeps the gap visible.) + if (nearest) { + const nv = nearest(ins.id); + if (!nv.selfEmbedded) { + warnings.push({ + id: ins.id, + relPath: ins.relPath, + checkId: "novelty", + reason: "not checked — no embedding (run embed first to gate duplicates)", + }); + } else if (nv.similarity !== null && nv.nearestId !== null && nv.similarity >= maxSimilarity) { + failures.push({ + id: ins.id, + relPath: ins.relPath, + checkId: "novelty", + reason: `${nv.similarity.toFixed(3)} cosine near-duplicate of ${nv.nearestId}`, + }); + } + } + } + + return { failures, warnings }; +} + // ─── Audit aggregation ──────────────────────────────────────────────── /** Histogram buckets for nearest-neighbor similarity (high → low). */ diff --git a/scripts/post-ingest.ts b/scripts/post-ingest.ts index 5c94f32bfc..505d284041 100644 --- a/scripts/post-ingest.ts +++ b/scripts/post-ingest.ts @@ -5,7 +5,7 @@ import { execFileSync } from "node:child_process"; import { readFile, writeFile, mkdir } from "node:fs/promises"; const PROJECT_ROOT = join(__dirname, ".."); -const KB_ROOT = join(PROJECT_ROOT, "knowledge-base"); +import { KB_ROOT } from "./lib/kb-root"; const ACTIVITY_LOG = join(KB_ROOT, "meta", "activity.md"); // ─── Helpers ────────────────────────────────────────────────────────── @@ -179,6 +179,41 @@ async function main(): Promise { console.warn("\nEmbed step failed (non-fatal) — FTS5-only mode active."); } + // Step 3.5: Gate (forward enforcement — FATAL, like reindex) + // + // Refuse to commit the batch if any NEW (uncommitted) insight fails a + // blocking check: missing stance, or a near-duplicate (>= cosine threshold). + // Runs AFTER embed so the cosine check has vectors, and BEFORE auto-git so a + // failing batch never lands. Scoped to uncommitted insight files (--changed), + // so a clean run (no new insights) passes trivially. Fully recoverable: fix + // the flagged insight(s) and re-run post-ingest (idempotent). + // + // Placement is deliberately BEFORE learn/views and must stay there: learn + // rewrites related[]/confidence on many EXISTING insights, which would + // balloon the --changed set to include grandfathered insights and break + // forward-only scoping (the gate would then block on pre-existing dupes / + // stanceless insights). The gate validates INTRINSIC quality — stance, + // attribution, novelty — fields fixed at extract time that learn never + // touches, so gating before learn loses no coverage. + const gateResult = runStep("Gate", [ + "npx", "tsx", join(PROJECT_ROOT, "scripts", "insight-gate.ts"), + "--enforce", "--changed", + ]); + + if (gateResult.ok) { + results.push({ step: "gate", status: "PASSED" }); + } else { + results.push({ step: "gate", status: "BLOCKED" }); + await appendToActivityLog( + `post-ingest gate BLOCKED the batch — aborting before auto-git: ${gateResult.error}` + ); + console.error( + "\nPipeline ABORTED: gate blocked the batch (NOT committed). Fix the flagged insights and re-run." + ); + printSummary(results); + process.exit(1); + } + // Step 4: Learn (auto-connections, emergence detection, confidence propagation) const learnResult = runStep("Learn", [ "npx", "tsx", join(PROJECT_ROOT, "scripts", "learn.ts"), From fbeedbc0293bfc8470856a517b34acaf182404bd Mon Sep 17 00:00:00 2001 From: Jin Choi Date: Mon, 25 May 2026 15:32:04 -0700 Subject: [PATCH 3/3] chore(gate): track scripts/lib/kb-root.ts (KB_ROOT resolver) The Phase 1/2 gate imports KB_ROOT from ./lib/kb-root, but the module was untracked (created by an in-progress refactor that was never committed), so the gate would not build on a fresh checkout. Track it here as the first committed code to depend on it. Self-contained (only imports node:path); resolves ZUHN_KB_ROOT or defaults to /knowledge-base. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/lib/kb-root.ts | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 scripts/lib/kb-root.ts diff --git a/scripts/lib/kb-root.ts b/scripts/lib/kb-root.ts new file mode 100644 index 0000000000..7d45ed6f5b --- /dev/null +++ b/scripts/lib/kb-root.ts @@ -0,0 +1,24 @@ +// ─── KB_ROOT — single source of truth ───────────────────────────────── +// +// All scripts and library modules import KB_ROOT from here rather than +// re-deriving it from `__dirname`. This is the foundation for multi-tenant +// configuration: pointing Zuhn at a different knowledge base (e.g. a +// customer corpus, a demo corpus, an isolated test KB) is a single env-var +// flip — `ZUHN_KB_ROOT=/path/to/other/kb`. +// +// Resolution rules: +// 1. If process.env.ZUHN_KB_ROOT is set, resolve it (relative paths +// resolve against process.cwd(), absolute paths pass through). +// 2. Otherwise, default to /knowledge-base/. +// +// Naming follows the existing ZUHN_* convention used in inbox-server.ts +// (ZUHN_INBOX_PORT, ZUHN_INBOX_HOST) and daemon.ts (ZUHN_MAX_AGENTS). + +import { join, resolve } from "node:path"; + +// scripts/lib/kb-root.ts → ../../ → repo root → knowledge-base/ +const REPO_DEFAULT_KB_ROOT = join(__dirname, "../../knowledge-base"); + +export const KB_ROOT = process.env.ZUHN_KB_ROOT + ? resolve(process.env.ZUHN_KB_ROOT) + : REPO_DEFAULT_KB_ROOT;