diff --git a/scripts/insight-gate.ts b/scripts/insight-gate.ts index fdef23f757..eec2de58de 100644 --- a/scripts/insight-gate.ts +++ b/scripts/insight-gate.ts @@ -17,6 +17,11 @@ * npx tsx scripts/insight-gate.ts --enforce --since 2026-05-01 # gate by date * npx tsx scripts/insight-gate.ts --enforce --changed --max-similarity 0.93 * + * Enforce tuning (no code edits — set in the environment / post-ingest's shell): + * ZUHN_GATE_MAX_SIMILARITY=0.90 # ratchet the near-dup block threshold + * ZUHN_GATE_BLOCKING_CHECKS=stance_present,stance_directional # promote checks to blocking + * (precedence: --max-similarity flag > env > default 0.95; near-duplicate always blocks) + * * Audit outputs (in addition to stdout): * knowledge-base/meta/gate-report.json latest full report (overwritten) * knowledge-base/meta/gate-log.jsonl one summary line per run (appended) @@ -34,7 +39,10 @@ import { buildSourceIndex, enforceGate, loadGateInsights, - DEFAULT_MAX_SIMILARITY, + resolveBlockingChecks, + resolveMaxSimilarity, + CHECK_IDS, + DEFAULT_BLOCKING_CHECKS, type AuditReport, type CheckId, type EnforceResult, @@ -133,9 +141,25 @@ async function runAudit(args: Args): Promise { // ─── Enforce (Phase 2) ──────────────────────────────────────────────── async function runEnforce(argv: string[]): Promise { + // Threshold precedence: --max-similarity flag > ZUHN_GATE_MAX_SIMILARITY env > default. const simIdx = argv.indexOf("--max-similarity"); - const maxSimilarity = - simIdx !== -1 ? parseFloat(argv[simIdx + 1]) || DEFAULT_MAX_SIMILARITY : DEFAULT_MAX_SIMILARITY; + const flagSim = simIdx !== -1 ? argv[simIdx + 1] : undefined; + const maxSimilarity = resolveMaxSimilarity(flagSim, process.env.ZUHN_GATE_MAX_SIMILARITY); + + // Blocking-check set: ZUHN_GATE_BLOCKING_CHECKS env overrides the default + // (comma-separated check ids). Unknown tokens warn; all-invalid → default. + let blockingChecks: CheckId[] | undefined; + const bc = resolveBlockingChecks(process.env.ZUHN_GATE_BLOCKING_CHECKS); + if (bc) { + if (bc.invalid.length > 0) { + console.warn( + `WARN: ignoring unknown gate check(s): ${bc.invalid.join(", ")} (valid: ${CHECK_IDS.join(", ")})` + ); + } + if (bc.checks.length > 0) blockingChecks = bc.checks; + else console.warn("WARN: ZUHN_GATE_BLOCKING_CHECKS had no valid checks — using default."); + } + const sinceIdx = argv.indexOf("--since"); const since = sinceIdx !== -1 ? argv[sinceIdx + 1] ?? null : null; @@ -177,8 +201,10 @@ async function runEnforce(argv: string[]): Promise { scope = "all"; } + const effectiveBlocking = blockingChecks ?? DEFAULT_BLOCKING_CHECKS; console.log( - `Insight Gate (ENFORCE) — scope: ${scope} · ${insights.length} insight(s) · block ≥ ${maxSimilarity} cosine` + `Insight Gate (ENFORCE) — scope: ${scope} · ${insights.length} insight(s) · ` + + `block ≥ ${maxSimilarity} cosine · blocking: ${effectiveBlocking.join("+")}+novelty` ); if (insights.length === 0) { @@ -190,7 +216,7 @@ async function runEnforce(argv: string[]): Promise { const { nearest, close } = tryBuildNovelty(); let result: EnforceResult; try { - result = enforceGate(insights, sourceIndex, nearest, { maxSimilarity }); + result = enforceGate(insights, sourceIndex, nearest, { maxSimilarity, blockingChecks }); } finally { close(); } diff --git a/scripts/lib/insight-gate.test.ts b/scripts/lib/insight-gate.test.ts index b94c9565a5..3d81e4b95e 100644 --- a/scripts/lib/insight-gate.test.ts +++ b/scripts/lib/insight-gate.test.ts @@ -13,6 +13,9 @@ import { isDirectionalStance, normalizeTitle, normalizeUrl, + resolveBlockingChecks, + resolveMaxSimilarity, + DEFAULT_MAX_SIMILARITY, type GateInsight, type NearestFn, type SourceIndex, @@ -403,3 +406,50 @@ describe("enforceGate", () => { expect(warnings.find((w) => w.checkId === "novelty")?.reason).toContain("no embedding"); }); }); + +// ─── Env-config resolvers (Step 3 tuning) ───────────────────────────── + +describe("resolveMaxSimilarity", () => { + it("prefers the flag, then env, then default", () => { + expect(resolveMaxSimilarity("0.9", "0.8")).toBe(0.9); + expect(resolveMaxSimilarity(undefined, "0.8")).toBe(0.8); + expect(resolveMaxSimilarity(undefined, undefined)).toBe(DEFAULT_MAX_SIMILARITY); + }); + it("falls through invalid or out-of-range candidates instead of accepting them", () => { + expect(resolveMaxSimilarity("abc", "0.8")).toBe(0.8); // bad flag → env + expect(resolveMaxSimilarity("1.5", undefined)).toBe(DEFAULT_MAX_SIMILARITY); // > 1 → default + expect(resolveMaxSimilarity("-0.1", "0.7")).toBe(0.7); // < 0 → env + }); + it("rejects partial-numeric strings (full-string validation, not parseFloat)", () => { + expect(resolveMaxSimilarity("1abc", undefined)).toBe(DEFAULT_MAX_SIMILARITY); // would parseFloat→1 + expect(resolveMaxSimilarity("0.95#", "0.8")).toBe(0.8); + expect(resolveMaxSimilarity(" ", "0.8")).toBe(0.8); // blank → fall through + }); + it("accepts the 0 and 1 boundaries", () => { + expect(resolveMaxSimilarity("0", undefined)).toBe(0); + expect(resolveMaxSimilarity("1", undefined)).toBe(1); + }); +}); + +describe("resolveBlockingChecks", () => { + it("returns null when unset or blank (caller uses default)", () => { + expect(resolveBlockingChecks(undefined)).toBeNull(); + expect(resolveBlockingChecks("")).toBeNull(); + expect(resolveBlockingChecks(" ")).toBeNull(); + }); + it("parses a valid comma-separated list, trimming whitespace", () => { + expect(resolveBlockingChecks(" stance_present , topic_matches_path ")).toEqual({ + checks: ["stance_present", "topic_matches_path"], + invalid: [], + }); + }); + it("separates valid checks from unknown tokens", () => { + expect(resolveBlockingChecks("stance_present,bogus")).toEqual({ + checks: ["stance_present"], + invalid: ["bogus"], + }); + }); + it("reports all-invalid input with empty checks", () => { + expect(resolveBlockingChecks("nope,bogus")).toEqual({ checks: [], invalid: ["nope", "bogus"] }); + }); +}); diff --git a/scripts/lib/insight-gate.ts b/scripts/lib/insight-gate.ts index f4f67da30d..adb413396d 100644 --- a/scripts/lib/insight-gate.ts +++ b/scripts/lib/insight-gate.ts @@ -415,6 +415,42 @@ export const DEFAULT_MAX_SIMILARITY = 0.95; // warnings and can be promoted once the corpus behavior is trusted. export const DEFAULT_BLOCKING_CHECKS: CheckId[] = ["stance_present"]; +/** + * Resolve the block threshold with precedence flag > env > default. Invalid or + * out-of-range candidates fall through to the next source (never silently + * accepted) so a typo can't disable dedup blocking by setting it to garbage. + */ +export function resolveMaxSimilarity(flagValue?: string, envValue?: string): number { + for (const candidate of [flagValue, envValue]) { + if (candidate === undefined) continue; + const trimmed = candidate.trim(); + if (trimmed === "") continue; + // Number() (NOT parseFloat) requires the WHOLE string to be numeric, so a + // typo like "1abc" → NaN and falls through rather than parsing to 1 and + // silently weakening the dedup block to exact-duplicates-only. + const n = Number(trimmed); + if (Number.isFinite(n) && n >= 0 && n <= 1) return n; + } + return DEFAULT_MAX_SIMILARITY; +} + +/** + * Parse a comma-separated blocking-checks env value into valid CheckIds plus any + * unrecognized tokens. Returns null when unset/blank (caller uses the default). + */ +export function resolveBlockingChecks( + envValue?: string +): { checks: CheckId[]; invalid: string[] } | null { + if (envValue === undefined || envValue.trim() === "") return null; + const checks: CheckId[] = []; + const invalid: string[] = []; + for (const token of envValue.split(",").map((t) => t.trim()).filter(Boolean)) { + if ((CHECK_IDS as readonly string[]).includes(token)) checks.push(token as CheckId); + else invalid.push(token); + } + return { checks, invalid }; +} + export function enforceGate( insights: GateInsight[], index: SourceIndex,