Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions scripts/insight-gate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
* npx tsx scripts/insight-gate.ts --enforce --since 2026-05-01 # gate by date
* npx tsx scripts/insight-gate.ts --enforce --changed --max-similarity 0.93
*
* Enforce tuning (no code edits — set in the environment / post-ingest's shell):
* ZUHN_GATE_MAX_SIMILARITY=0.90 # ratchet the near-dup block threshold
* ZUHN_GATE_BLOCKING_CHECKS=stance_present,stance_directional # promote checks to blocking
* (precedence: --max-similarity flag > env > default 0.95; near-duplicate always blocks)
*
* Audit outputs (in addition to stdout):
* knowledge-base/meta/gate-report.json latest full report (overwritten)
* knowledge-base/meta/gate-log.jsonl one summary line per run (appended)
Expand All @@ -34,7 +39,10 @@ import {
buildSourceIndex,
enforceGate,
loadGateInsights,
DEFAULT_MAX_SIMILARITY,
resolveBlockingChecks,
resolveMaxSimilarity,
CHECK_IDS,
DEFAULT_BLOCKING_CHECKS,
type AuditReport,
type CheckId,
type EnforceResult,
Expand Down Expand Up @@ -133,9 +141,25 @@ async function runAudit(args: Args): Promise<void> {
// ─── Enforce (Phase 2) ────────────────────────────────────────────────

async function runEnforce(argv: string[]): Promise<void> {
// Threshold precedence: --max-similarity flag > ZUHN_GATE_MAX_SIMILARITY env > default.
const simIdx = argv.indexOf("--max-similarity");
const maxSimilarity =
simIdx !== -1 ? parseFloat(argv[simIdx + 1]) || DEFAULT_MAX_SIMILARITY : DEFAULT_MAX_SIMILARITY;
const flagSim = simIdx !== -1 ? argv[simIdx + 1] : undefined;
const maxSimilarity = resolveMaxSimilarity(flagSim, process.env.ZUHN_GATE_MAX_SIMILARITY);

// Blocking-check set: ZUHN_GATE_BLOCKING_CHECKS env overrides the default
// (comma-separated check ids). Unknown tokens warn; all-invalid → default.
let blockingChecks: CheckId[] | undefined;
const bc = resolveBlockingChecks(process.env.ZUHN_GATE_BLOCKING_CHECKS);
if (bc) {
if (bc.invalid.length > 0) {
console.warn(
`WARN: ignoring unknown gate check(s): ${bc.invalid.join(", ")} (valid: ${CHECK_IDS.join(", ")})`
);
}
if (bc.checks.length > 0) blockingChecks = bc.checks;
else console.warn("WARN: ZUHN_GATE_BLOCKING_CHECKS had no valid checks — using default.");
}

const sinceIdx = argv.indexOf("--since");
const since = sinceIdx !== -1 ? argv[sinceIdx + 1] ?? null : null;

Expand Down Expand Up @@ -177,8 +201,10 @@ async function runEnforce(argv: string[]): Promise<void> {
scope = "all";
}

const effectiveBlocking = blockingChecks ?? DEFAULT_BLOCKING_CHECKS;
console.log(
`Insight Gate (ENFORCE) — scope: ${scope} · ${insights.length} insight(s) · block ≥ ${maxSimilarity} cosine`
`Insight Gate (ENFORCE) — scope: ${scope} · ${insights.length} insight(s) · ` +
`block ≥ ${maxSimilarity} cosine · blocking: ${effectiveBlocking.join("+")}+novelty`
);

if (insights.length === 0) {
Expand All @@ -190,7 +216,7 @@ async function runEnforce(argv: string[]): Promise<void> {
const { nearest, close } = tryBuildNovelty();
let result: EnforceResult;
try {
result = enforceGate(insights, sourceIndex, nearest, { maxSimilarity });
result = enforceGate(insights, sourceIndex, nearest, { maxSimilarity, blockingChecks });
} finally {
close();
}
Expand Down
50 changes: 50 additions & 0 deletions scripts/lib/insight-gate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ import {
isDirectionalStance,
normalizeTitle,
normalizeUrl,
resolveBlockingChecks,
resolveMaxSimilarity,
DEFAULT_MAX_SIMILARITY,
type GateInsight,
type NearestFn,
type SourceIndex,
Expand Down Expand Up @@ -403,3 +406,50 @@ describe("enforceGate", () => {
expect(warnings.find((w) => w.checkId === "novelty")?.reason).toContain("no embedding");
});
});

// ─── Env-config resolvers (Step 3 tuning) ─────────────────────────────

describe("resolveMaxSimilarity", () => {
it("prefers the flag, then env, then default", () => {
expect(resolveMaxSimilarity("0.9", "0.8")).toBe(0.9);
expect(resolveMaxSimilarity(undefined, "0.8")).toBe(0.8);
expect(resolveMaxSimilarity(undefined, undefined)).toBe(DEFAULT_MAX_SIMILARITY);
});
it("falls through invalid or out-of-range candidates instead of accepting them", () => {
expect(resolveMaxSimilarity("abc", "0.8")).toBe(0.8); // bad flag → env
expect(resolveMaxSimilarity("1.5", undefined)).toBe(DEFAULT_MAX_SIMILARITY); // > 1 → default
expect(resolveMaxSimilarity("-0.1", "0.7")).toBe(0.7); // < 0 → env
});
it("rejects partial-numeric strings (full-string validation, not parseFloat)", () => {
expect(resolveMaxSimilarity("1abc", undefined)).toBe(DEFAULT_MAX_SIMILARITY); // would parseFloat→1
expect(resolveMaxSimilarity("0.95#", "0.8")).toBe(0.8);
expect(resolveMaxSimilarity(" ", "0.8")).toBe(0.8); // blank → fall through
});
it("accepts the 0 and 1 boundaries", () => {
expect(resolveMaxSimilarity("0", undefined)).toBe(0);
expect(resolveMaxSimilarity("1", undefined)).toBe(1);
});
});

describe("resolveBlockingChecks", () => {
it("returns null when unset or blank (caller uses default)", () => {
expect(resolveBlockingChecks(undefined)).toBeNull();
expect(resolveBlockingChecks("")).toBeNull();
expect(resolveBlockingChecks(" ")).toBeNull();
});
it("parses a valid comma-separated list, trimming whitespace", () => {
expect(resolveBlockingChecks(" stance_present , topic_matches_path ")).toEqual({
checks: ["stance_present", "topic_matches_path"],
invalid: [],
});
});
it("separates valid checks from unknown tokens", () => {
expect(resolveBlockingChecks("stance_present,bogus")).toEqual({
checks: ["stance_present"],
invalid: ["bogus"],
});
});
it("reports all-invalid input with empty checks", () => {
expect(resolveBlockingChecks("nope,bogus")).toEqual({ checks: [], invalid: ["nope", "bogus"] });
});
});
36 changes: 36 additions & 0 deletions scripts/lib/insight-gate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,42 @@ export const DEFAULT_MAX_SIMILARITY = 0.95;
// warnings and can be promoted once the corpus behavior is trusted.
export const DEFAULT_BLOCKING_CHECKS: CheckId[] = ["stance_present"];

/**
* Resolve the block threshold with precedence flag > env > default. Invalid or
* out-of-range candidates fall through to the next source (never silently
* accepted) so a typo can't disable dedup blocking by setting it to garbage.
*/
export function resolveMaxSimilarity(flagValue?: string, envValue?: string): number {
for (const candidate of [flagValue, envValue]) {
if (candidate === undefined) continue;
const trimmed = candidate.trim();
if (trimmed === "") continue;
// Number() (NOT parseFloat) requires the WHOLE string to be numeric, so a
// typo like "1abc" → NaN and falls through rather than parsing to 1 and
// silently weakening the dedup block to exact-duplicates-only.
const n = Number(trimmed);
if (Number.isFinite(n) && n >= 0 && n <= 1) return n;
}
return DEFAULT_MAX_SIMILARITY;
}

/**
* Parse a comma-separated blocking-checks env value into valid CheckIds plus any
* unrecognized tokens. Returns null when unset/blank (caller uses the default).
*/
export function resolveBlockingChecks(
envValue?: string
): { checks: CheckId[]; invalid: string[] } | null {
if (envValue === undefined || envValue.trim() === "") return null;
const checks: CheckId[] = [];
const invalid: string[] = [];
for (const token of envValue.split(",").map((t) => t.trim()).filter(Boolean)) {
if ((CHECK_IDS as readonly string[]).includes(token)) checks.push(token as CheckId);
else invalid.push(token);
}
return { checks, invalid };
}

export function enforceGate(
insights: GateInsight[],
index: SourceIndex,
Expand Down
Loading