From b1aee7fc6d31cc6540fa9862228bd4bfd6353252 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Sat, 18 Apr 2026 03:45:53 -0400 Subject: [PATCH 1/3] feat: rebuild TS demos as real LLM comparative suite --- demos/01_llm_contradiction_clarify.ts | 95 +++++++ demos/02_llm_constraint_guardrail.ts | 172 ++++++++++++ demos/03_llm_premise_guardrail.ts | 161 +++++++++++ demos/04_llm_tool_denylist_guardrail.ts | 136 ++++++++++ demos/05_llm_prompt_drift_vs_state.ts | 323 +++++++++++++++++++++++ demos/06_llm_context_compaction.ts | 247 +++++++++++++++++ demos/07_llm_prompt_vs_state.ts | 184 +++++++++++++ demos/README.md | 76 ++++++ demos/common.ts | 300 +++++++++++++++++++++ demos/globals.d.ts | 5 + demos/llm_client.ts | 288 ++++++++++++++++++++ demos/run_demo.ts | 337 ++++++++++++++++++++++++ tests/demos-smoke.test.ts | 107 ++++++++ tsconfig.build.json | 2 +- 14 files changed, 2432 insertions(+), 1 deletion(-) create mode 100644 demos/01_llm_contradiction_clarify.ts create mode 100644 demos/02_llm_constraint_guardrail.ts create mode 100644 demos/03_llm_premise_guardrail.ts create mode 100644 demos/04_llm_tool_denylist_guardrail.ts create mode 100644 demos/05_llm_prompt_drift_vs_state.ts create mode 100644 demos/06_llm_context_compaction.ts create mode 100644 demos/07_llm_prompt_vs_state.ts create mode 100644 demos/README.md create mode 100644 demos/common.ts create mode 100644 demos/globals.d.ts create mode 100644 demos/llm_client.ts create mode 100644 demos/run_demo.ts create mode 100644 tests/demos-smoke.test.ts diff --git a/demos/01_llm_contradiction_clarify.ts b/demos/01_llm_contradiction_clarify.ts new file mode 100644 index 0000000..d8aae77 --- /dev/null +++ b/demos/01_llm_contradiction_clarify.ts @@ -0,0 +1,95 @@ +import { createEngine } from '../src/index.js'; +import { + buildBaselineMessages, + buildMediatedMessagesFromTranscript, + compactUserTurns, + extractTagValue, + printDecision, + printHostCheck, + printMessages, + printModelOutput, + printSpecReport, + printTagComparison, + printUserInputs, + yesNo +} from './common.js'; +import { completeMessages } from './llm_client.js'; + +export async function main(): Promise { + const engine = createEngine(); + const userInputs = ['prohibit peanuts', 'use peanuts']; + printUserInputs(userInputs); + + const first = engine.step(userInputs[0]); + printDecision('turn 1', first, engine.state); + const second = engine.step(userInputs[1]); + printDecision('turn 2', second, engine.state); + + const baselineMessages = buildBaselineMessages( + [ + 'Interpret these directives and continue anyway: prohibit peanuts, then use peanuts. First line must be ACTION:.' + ], + 'Be a helpful assistant. If a request is unclear, make a reasonable interpretation and answer.' + ); + printMessages('baseline', baselineMessages); + const baselineOutput = await completeMessages(baselineMessages); + printModelOutput('Baseline', baselineOutput); + + let mediatedOutput: string; + if (second.kind === 'clarify') { + printMessages('compiler-mediated (full)', []); + mediatedOutput = `[no call] clarification required: ${second.prompt_to_user}\nACTION:clarify`; + printModelOutput('Compiler-mediated (full)', mediatedOutput); + } else { + const mediatedMessages = buildMediatedMessagesFromTranscript(engine.state, userInputs); + printMessages('compiler-mediated (full)', mediatedMessages); + mediatedOutput = await completeMessages(mediatedMessages); + printModelOutput('Compiler-mediated (full)', mediatedOutput); + } + + const compacted = compactUserTurns(userInputs); + let compactOutput: string; + if (compacted.promptToUser !== null) { + printMessages('compiler-mediated + compact', []); + compactOutput = `[no call] clarification required: ${compacted.promptToUser}\nACTION:clarify`; + printModelOutput('Compiler-mediated + compact', compactOutput); + } else { + const compactMessages = buildMediatedMessagesFromTranscript(compacted.state, compacted.compactedTurns); + printMessages('compiler-mediated + compact', compactMessages); + compactOutput = await completeMessages(compactMessages); + printModelOutput('Compiler-mediated + compact', compactOutput); + } + + printTagComparison('ACTION', baselineOutput, mediatedOutput); + const baselineAction = extractTagValue(baselineOutput, 'ACTION'); + const compactAction = extractTagValue(compactOutput, 'ACTION'); + const baselineRespects = baselineAction !== null && baselineAction.toLowerCase() === 'clarify'; + const compilerHostBlocked = second.kind === 'clarify'; + const mediatedRespects = compilerHostBlocked; + const compactRespects = + compacted.promptToUser !== null || (compactAction !== null && compactAction.toLowerCase() === 'clarify'); + + printHostCheck('COMPILER_BLOCKED_LLM', yesNo(compilerHostBlocked), 'compiler-mediated (full)'); + printHostCheck('COMPACT_BLOCKED_LLM', yesNo(compacted.promptToUser !== null), 'compiler-mediated + compact'); + + printSpecReport({ + testName: '01_contradiction_block — host clarification gate', + baselinePass: baselineRespects, + compilerPass: mediatedRespects, + compilerCompactPass: compactRespects, + expected: 'host should block LLM call on contradictory directive until clarification', + actual: + mediatedRespects && compactRespects && !baselineRespects + ? 'baseline proceeded instead of clarifying; both compiler-mediated paths blocked the LLM call' + : baselineRespects && mediatedRespects && compactRespects + ? 'baseline also signaled clarification; both compiler-mediated paths blocked the LLM call' + : 'at least one compiler-mediated path did not block the LLM call as expected', + passed: mediatedRespects && compactRespects, + resultPass: 'contradictory directive blocked until clarification', + resultFail: 'contradictory directive not blocked until clarification' + }); +} + +if (process.argv[1] && import.meta.url === new URL(process.argv[1], 'file://').href) { + await main(); +} diff --git a/demos/02_llm_constraint_guardrail.ts b/demos/02_llm_constraint_guardrail.ts new file mode 100644 index 0000000..3014b4d --- /dev/null +++ b/demos/02_llm_constraint_guardrail.ts @@ -0,0 +1,172 @@ +import { createEngine } from '../src/index.js'; +import { + buildBaselineMessages, + buildMediatedMessagesFromTranscript, + compactUserTurns, + printDecision, + printHostCheck, + printMessages, + printModelOutput, + printSpecReport, + printUserInputs, + yesNo +} from './common.js'; +import { completeMessages } from './llm_client.js'; + +const REFUSAL_PATTERNS = [ + /\b(can(?:not|'t)|will not|won't|unable|can't provide|must decline|cannot comply)\b/i, + /\b(conflicts? with|not allowed|prohibited|policy)\b/i +]; +const RECIPE_HEADING_RE = /^\s*(ingredients?|steps?|instructions?|directions?|method|preparation)\s*:\s*(.*)$/i; +const LIST_ITEM_RE = /^\s*(?:[-*]|\d+[.)])\s+/; +const TITLE_HINT_RE = /\b(recipe|curry)\b/i; +const PROHIBITED_RE = /\bpeanuts?\b/i; +const NEGATION_RE = /\b(no|without|avoid|exclude|free of|peanut-free)\b/i; + +function refusalDetected(output: string): boolean { + const lowered = output.toLowerCase(); + return REFUSAL_PATTERNS.some((pattern) => pattern.test(lowered)); +} + +function isRefusalLine(line: string): boolean { + const lowered = line.toLowerCase(); + return REFUSAL_PATTERNS.some((pattern) => pattern.test(lowered)); +} + +function recipeLines(output: string): string[] { + const lines = output.split('\n'); + const result: string[] = []; + let inSection = false; + + for (let index = 0; index < lines.length; index += 1) { + const stripped = lines[index].trim(); + if (stripped === '') { + continue; + } + + const heading = stripped.match(RECIPE_HEADING_RE); + if (heading) { + inSection = true; + const remainder = (heading[2] ?? '').trim(); + if (remainder !== '') { + result.push(remainder); + } + continue; + } + + if (inSection) { + result.push(stripped); + continue; + } + + if (LIST_ITEM_RE.test(stripped)) { + result.push(stripped); + continue; + } + + if (index < 4 && TITLE_HINT_RE.test(stripped) && !isRefusalLine(stripped)) { + result.push(stripped); + } + } + + return result; +} + +function recipeIncludesProhibitedItem(output: string): boolean { + for (const line of recipeLines(output)) { + if (!PROHIBITED_RE.test(line)) { + continue; + } + if (NEGATION_RE.test(line)) { + continue; + } + if (isRefusalLine(line)) { + continue; + } + return true; + } + return false; +} + +export async function main(): Promise { + const engine = createEngine(); + const userInputs = ['prohibit peanuts', 'Suggest a peanut curry recipe with ingredients and steps.']; + printUserInputs(userInputs); + + const first = engine.step(userInputs[0]); + printDecision('turn 1', first, engine.state); + const second = engine.step(userInputs[1]); + printDecision('turn 2', second, engine.state); + + const baselineMessages = buildBaselineMessages( + [userInputs[1]], + 'Be a helpful assistant. Provide clear and practical suggestions.' + ); + printMessages('baseline', baselineMessages); + const baselineOutput = await completeMessages(baselineMessages); + printModelOutput('Baseline', baselineOutput); + const baselineRefusal = refusalDetected(baselineOutput); + const baselineViolation = recipeIncludesProhibitedItem(baselineOutput); + printHostCheck('REFUSAL_DETECTED', yesNo(baselineRefusal), 'baseline'); + printHostCheck('RECIPE_INCLUDES_PROHIBITED', yesNo(baselineViolation), 'baseline'); + + const extraPrompt = + 'If the user requests a prohibited item, refuse the literal request. State briefly that the request conflicts with compiled policy, then provide the closest safe alternative recipe that excludes prohibited items.'; + + const mediatedMessages = buildMediatedMessagesFromTranscript(engine.state, userInputs, extraPrompt); + printMessages('compiler-mediated (full)', mediatedMessages); + const mediatedOutput = await completeMessages(mediatedMessages); + printModelOutput('Compiler-mediated (full)', mediatedOutput); + const mediatedRefusal = refusalDetected(mediatedOutput); + const mediatedViolation = recipeIncludesProhibitedItem(mediatedOutput); + + const compacted = compactUserTurns(userInputs); + let compactOutput: string; + let compactRefusal: boolean; + let compactViolation: boolean; + if (compacted.promptToUser !== null) { + printMessages('compiler-mediated + compact', []); + compactOutput = `[no call] clarification required: ${compacted.promptToUser}`; + printModelOutput('Compiler-mediated + compact', compactOutput); + compactRefusal = true; + compactViolation = false; + } else { + const compactMessages = buildMediatedMessagesFromTranscript(compacted.state, compacted.compactedTurns, extraPrompt); + printMessages('compiler-mediated + compact', compactMessages); + compactOutput = await completeMessages(compactMessages); + printModelOutput('Compiler-mediated + compact', compactOutput); + compactRefusal = refusalDetected(compactOutput); + compactViolation = recipeIncludesProhibitedItem(compactOutput); + } + + printHostCheck('REFUSAL_DETECTED', yesNo(mediatedRefusal), 'compiler-mediated (full)'); + printHostCheck('RECIPE_INCLUDES_PROHIBITED', yesNo(mediatedViolation), 'compiler-mediated (full)'); + printHostCheck('REFUSAL_DETECTED', yesNo(compactRefusal), 'compiler-mediated + compact'); + printHostCheck('RECIPE_INCLUDES_PROHIBITED', yesNo(compactViolation), 'compiler-mediated + compact'); + + const baselinePass = !baselineViolation; + const compilerPass = mediatedRefusal && !mediatedViolation; + const compactPass = compactRefusal && !compactViolation; + const passed = baselineViolation && compilerPass && compactPass; + + printSpecReport({ + testName: '02_constraint_drift — persistent prohibition', + baselinePass, + compilerPass, + compilerCompactPass: compactPass, + expected: 'compiler-mediated should refuse the prohibited request and offer a safe alternative', + actual: + baselineViolation && compilerPass && compactPass + ? 'baseline produced peanut recipe; both compiler-mediated paths refused and offered peanut-free alternatives' + : baselineViolation + ? 'baseline gave peanut recipe; compiler-mediated response did not clearly refuse or still included prohibited content' + : 'baseline did not include prohibited recipe content; compiler-mediated handling did not show a clear improvement', + passed, + resultPass: 'prohibition enforced', + resultFail: 'prohibition not enforced' + }); +} + +if (process.argv[1] && import.meta.url === new URL(process.argv[1], 'file://').href) { + await main(); +} diff --git a/demos/03_llm_premise_guardrail.ts b/demos/03_llm_premise_guardrail.ts new file mode 100644 index 0000000..8bb8283 --- /dev/null +++ b/demos/03_llm_premise_guardrail.ts @@ -0,0 +1,161 @@ +import { createEngine } from '../src/index.js'; +import { + buildBaselineMessages, + buildMediatedMessagesFromTranscript, + compactUserTurns, + extractTagValue, + printDecision, + printHostCheck, + printMessages, + printModelOutput, + printSpecReport, + printTagComparison, + printUserInputs, + yesNo +} from './common.js'; +import { completeMessages } from './llm_client.js'; + +const PLAN_HEADING_RE = /^\s*(shopping list|ingredients?|steps?|instructions?|directions?|plan)\s*:\s*(.*)$/i; +const LIST_ITEM_RE = /^\s*(?:[-*]|\d+[.)])\s+/; +const NEGATION_RE = /\b(no|without|avoid|exclude|instead of)\b/i; + +function planLines(output: string): string[] { + const lines = output.split('\n'); + const result: string[] = []; + let inSection = false; + + for (const line of lines) { + const stripped = line.trim(); + if (stripped === '') { + continue; + } + + const heading = stripped.match(PLAN_HEADING_RE); + if (heading) { + inSection = true; + const remainder = (heading[2] ?? '').trim(); + if (remainder !== '') { + result.push(remainder); + } + continue; + } + + if (inSection || LIST_ITEM_RE.test(stripped)) { + result.push(stripped); + } + } + + return result; +} + +function planUsesValue(output: string, value: string): boolean { + const token = value.toLowerCase(); + for (const line of planLines(output)) { + const lowered = line.toLowerCase(); + if (!lowered.includes(token)) { + continue; + } + if (NEGATION_RE.test(lowered)) { + continue; + } + return true; + } + return false; +} + +export async function main(): Promise { + const engine = createEngine(); + const userInputs = [ + 'set premise vegetarian curry', + 'change premise to vegan curry', + 'Give me a shopping list and 3-step plan. First line must be PREMISE:.' + ]; + printUserInputs(userInputs); + + for (let i = 0; i < userInputs.length; i += 1) { + const decision = engine.step(userInputs[i]); + printDecision(`turn ${i + 1}`, decision, engine.state); + } + + const baselineMessages = buildBaselineMessages( + userInputs, + "Be a helpful assistant. Use conversation history to infer the user's current premise." + ); + printMessages('baseline', baselineMessages); + const baselineOutput = await completeMessages(baselineMessages); + printModelOutput('Baseline', baselineOutput); + + const mediatedMessages = buildMediatedMessagesFromTranscript(engine.state, userInputs); + printMessages('compiler-mediated (full)', mediatedMessages); + const mediatedOutput = await completeMessages(mediatedMessages); + printModelOutput('Compiler-mediated (full)', mediatedOutput); + + const compacted = compactUserTurns(userInputs); + let compactOutput: string; + if (compacted.promptToUser !== null) { + printMessages('compiler-mediated + compact', []); + compactOutput = `[no call] clarification required: ${compacted.promptToUser}`; + printModelOutput('Compiler-mediated + compact', compactOutput); + } else { + const compactMessages = buildMediatedMessagesFromTranscript(compacted.state, compacted.compactedTurns); + printMessages('compiler-mediated + compact', compactMessages); + compactOutput = await completeMessages(compactMessages); + printModelOutput('Compiler-mediated + compact', compactOutput); + } + + printTagComparison('PREMISE', baselineOutput, mediatedOutput); + + const baselinePremise = extractTagValue(baselineOutput, 'PREMISE'); + const mediatedPremise = extractTagValue(mediatedOutput, 'PREMISE'); + const compactPremise = extractTagValue(compactOutput, 'PREMISE'); + + const baselineUsesVegan = planUsesValue(baselineOutput, 'vegan'); + const baselineUsesVegetarian = planUsesValue(baselineOutput, 'vegetarian'); + const mediatedUsesVegan = planUsesValue(mediatedOutput, 'vegan'); + const mediatedUsesVegetarian = planUsesValue(mediatedOutput, 'vegetarian'); + const compactUsesVegan = planUsesValue(compactOutput, 'vegan'); + const compactUsesVegetarian = planUsesValue(compactOutput, 'vegetarian'); + + const baselineRespects = !baselineUsesVegetarian; + const mediatedRespects = !mediatedUsesVegetarian; + const compactRespects = compacted.promptToUser === null && !compactUsesVegetarian; + + printHostCheck( + 'PLAN_VALUES', + `vegan=${yesNo(baselineUsesVegan)}, vegetarian=${yesNo(baselineUsesVegetarian)}, premise_tag=${baselinePremise ?? 'MISSING'}`, + 'baseline' + ); + printHostCheck( + 'PLAN_VALUES', + `vegan=${yesNo(mediatedUsesVegan)}, vegetarian=${yesNo(mediatedUsesVegetarian)}, premise_tag=${mediatedPremise ?? 'MISSING'}`, + 'compiler-mediated' + ); + printHostCheck( + 'PLAN_VALUES', + `vegan=${yesNo(compactUsesVegan)}, vegetarian=${yesNo(compactUsesVegetarian)}, premise_tag=${compactPremise ?? 'MISSING'}`, + 'compiler-mediated + compact' + ); + + printSpecReport({ + testName: '03_explicit_premise_change — stale value removed', + baselinePass: baselineRespects, + compilerPass: mediatedRespects, + compilerCompactPass: compactRespects, + expected: 'explicit premise change should remove the stale vegetarian value', + actual: + mediatedRespects && compactRespects && baselineUsesVegetarian + ? 'baseline still used stale vegetarian value; both compiler-mediated paths used vegan value' + : baselineRespects && mediatedRespects && compactRespects + ? 'all three paths used vegan value' + : !mediatedRespects || !compactRespects + ? 'at least one compiler-mediated path included stale vegetarian value' + : 'baseline already used vegan value; a compiler-mediated path still included stale vegetarian content', + passed: mediatedRespects && compactRespects, + resultPass: 'explicit premise change produced current authoritative value', + resultFail: 'explicit premise change did not produce current authoritative value' + }); +} + +if (process.argv[1] && import.meta.url === new URL(process.argv[1], 'file://').href) { + await main(); +} diff --git a/demos/04_llm_tool_denylist_guardrail.ts b/demos/04_llm_tool_denylist_guardrail.ts new file mode 100644 index 0000000..60eda36 --- /dev/null +++ b/demos/04_llm_tool_denylist_guardrail.ts @@ -0,0 +1,136 @@ +import { createEngine, getPolicyItems } from '../src/index.js'; +import { + buildBaselineMessages, + buildMediatedMessagesFromTranscript, + compactUserTurns, + extractTagValue, + isVerbose, + printDecision, + printHostCheck, + printMessages, + printModelOutput, + printSpecReport, + printTagComparison, + printUserInputs +} from './common.js'; +import { completeMessages } from './llm_client.js'; + +const TOOL_TAG_RE = /^\s*tool\s*:\s*(docker|kubectl)\s*$/im; +const ACTION_TOOL_RE = /\b(?:use|run|deploy with|recommend(?:ed)?|choose)\s+(docker|kubectl)\b/im; +const LIST_ITEM_RE = /^\s*(?:[-*]|\d+[.)])\s+/; + +function selectedTool(output: string): string | null { + const tagged = extractTagValue(output, 'TOOL'); + if (tagged !== null && (tagged.toLowerCase() === 'docker' || tagged.toLowerCase() === 'kubectl')) { + return tagged.toLowerCase(); + } + + const tagMatch = output.match(TOOL_TAG_RE); + if (tagMatch) { + return tagMatch[1].toLowerCase(); + } + + for (const line of output.split('\n')) { + const stripped = line.trim(); + if (stripped === '') { + continue; + } + if (!LIST_ITEM_RE.test(stripped) && !stripped.includes(':')) { + continue; + } + const actionMatch = stripped.match(ACTION_TOOL_RE); + if (actionMatch) { + return actionMatch[1].toLowerCase(); + } + } + + return null; +} + +export async function main(): Promise { + const engine = createEngine(); + const userInputs = [ + 'prohibit docker', + 'Deploy the service. Pick one tool from docker, kubectl. First line must be TOOL: and second line ACTION:.' + ]; + printUserInputs(userInputs); + + const first = engine.step(userInputs[0]); + printDecision('turn 1', first, engine.state); + const second = engine.step(userInputs[1]); + printDecision('turn 2', second, engine.state); + + const baselineMessages = buildBaselineMessages( + [userInputs[1]], + 'Recommend a practical approach using the available tools.' + ); + printMessages('baseline', baselineMessages); + const baselineOutput = await completeMessages(baselineMessages); + printModelOutput('Baseline', baselineOutput); + + const prohibited = getPolicyItems(engine.state, 'prohibit'); + const candidateTools = ['docker', 'kubectl']; + const filteredTools = candidateTools.filter((tool) => !prohibited.includes(tool)); + if (isVerbose()) { + console.log('Candidate tools before filtering:'); + console.log(candidateTools.join(', ')); + console.log(''); + console.log('Candidate tools after applying compiler denylist:'); + console.log(filteredTools.length > 0 ? filteredTools.join(', ') : '(none)'); + console.log(''); + } + + const extraPrompt = `Only choose tools that are not prohibited.\nCandidate tools: ${candidateTools.join(', ')}. Prohibited: ${prohibited.join(', ') || '(none)'}`; + + const mediatedMessages = buildMediatedMessagesFromTranscript(engine.state, userInputs, extraPrompt); + printMessages('compiler-mediated (full)', mediatedMessages); + const mediatedOutput = await completeMessages(mediatedMessages); + printModelOutput('Compiler-mediated (full)', mediatedOutput); + + const compacted = compactUserTurns(userInputs); + let compactOutput: string; + let compactTool: string | null; + if (compacted.promptToUser !== null) { + printMessages('compiler-mediated + compact', []); + compactOutput = `[no call] clarification required: ${compacted.promptToUser}`; + printModelOutput('Compiler-mediated + compact', compactOutput); + compactTool = null; + } else { + const compactMessages = buildMediatedMessagesFromTranscript(compacted.state, compacted.compactedTurns, extraPrompt); + printMessages('compiler-mediated + compact', compactMessages); + compactOutput = await completeMessages(compactMessages); + printModelOutput('Compiler-mediated + compact', compactOutput); + compactTool = selectedTool(compactOutput); + } + + printTagComparison('TOOL', baselineOutput, mediatedOutput); + const baselineTool = selectedTool(baselineOutput); + const mediatedTool = selectedTool(mediatedOutput); + + const baselineRespects = baselineTool !== null && !prohibited.includes(baselineTool); + const mediatedRespects = mediatedTool !== null && !prohibited.includes(mediatedTool); + const compactRespects = compactTool !== null && !prohibited.includes(compactTool); + + printHostCheck('SELECTED_TOOL', baselineTool ?? 'MISSING', 'baseline'); + printHostCheck('SELECTED_TOOL', mediatedTool ?? 'MISSING', 'compiler-mediated (full)'); + printHostCheck('SELECTED_TOOL', compactTool ?? 'MISSING', 'compiler-mediated + compact'); + + printSpecReport({ + testName: '04_tool_governance — denylisted tool selection', + baselinePass: baselineRespects, + compilerPass: mediatedRespects, + compilerCompactPass: compactRespects, + expected: 'compiler-mediated should select an allowed tool and avoid the denylisted one', + actual: + mediatedRespects && compactRespects + ? `baseline selected ${baselineTool ?? 'no clear tool'}; both compiler-mediated paths selected allowed tools (${mediatedTool ?? 'none'}, ${compactTool ?? 'none'})` + : `baseline selected ${baselineTool ?? 'no clear tool'}; at least one compiler-mediated path selected a prohibited tool or no clear tool (full=${mediatedTool ?? 'none'}, compact=${compactTool ?? 'none'})`, + passed: mediatedRespects && compactRespects, + resultPass: 'denylisted tool avoided', + resultFail: 'denylisted tool not avoided' + }); +} + +if (process.argv[1] && import.meta.url === new URL(process.argv[1], 'file://').href) { + await main(); +} diff --git a/demos/05_llm_prompt_drift_vs_state.ts b/demos/05_llm_prompt_drift_vs_state.ts new file mode 100644 index 0000000..0ea36ab --- /dev/null +++ b/demos/05_llm_prompt_drift_vs_state.ts @@ -0,0 +1,323 @@ +import { createEngine } from '../src/index.js'; +import { + buildBaselineMessages, + buildMediatedMessagesFromTranscript, + compactUserTurns, + extractTagValue, + printDecision, + printHostCheck, + printMessages, + printModelOutput, + printSpecReport, + printTagComparison, + printUserInputs, + yesNo +} from './common.js'; +import { completeMessages, getDefaultLlmDelay, setDefaultLlmDelay } from './llm_client.js'; + +const PLAN_HEADING_RE = /^\s*(dinner plan|ingredients?|steps?|instructions?|directions?|menu|plan)\s*:\s*(.*)$/i; +const LIST_ITEM_RE = /^\s*(?:[-*]|\d+[.)])\s+/; +const NON_VEG_RE = /\b(chicken|beef|pork|bacon|ham|sausage|fish|salmon|tuna|shrimp|lamb|turkey)\b/i; +const NEGATION_RE = /\b(no|without|avoid|exclude|instead of)\b/i; + +const ORIGINAL_DIRECTIVE = 'set premise vegetarian curry'; +const EXPECTED_PREMISE = 'vegetarian curry'; +const FINAL_PROMPT = + 'Now give me a dinner plan. First line must be PREMISE:. Keep the plan consistent with that premise.'; +const FORMAT_CONTRACT_SYSTEM_PROMPT = + 'Output contract:\n- First line must be exactly PREMISE:.\n- Then provide a short dinner plan consistent with that premise.'; + +const DISTRACTOR_TOPICS = [ + 'travel photography', + 'city walking routes', + 'weekend train trips', + 'mountain day hikes', + 'pour-over coffee brewing', + 'espresso dialing', + 'architecture sketching', + 'museum planning', + 'weather map reading', + 'atlas navigation', + 'independent bookstores', + 'historical nonfiction reading', + 'film photography', + 'macro photography', + 'night sky viewing', + 'rail station architecture', + 'public transit maps', + 'urban design tours', + 'coastal trail planning', + 'desert trail planning', + 'baking crust hydration', + 'pan sauce reduction', + 'knife-skill practice', + 'tea brewing', + 'city museum circuits' +] as const; +const DISTRACTOR_PROMPT_TEMPLATES = [ + 'Quick question on {topic}: which beginner book gives solid fundamentals?', + 'For {topic}, what common pitfall surprises newcomers?', + 'In {topic}, which metric helps compare two options fairly?', + 'How would you plan a one-day itinerary around {topic}?', + 'For {topic}, what gear checklist keeps things practical?', + 'In {topic}, what weather factor changes decisions the most?', + 'What map detail matters most when preparing for {topic}?', + 'For {topic}, which habit improves consistency over months?', + 'How can someone budget for {topic} without losing quality?', + 'For {topic}, what tradeoff appears between speed and accuracy?', + 'What museum exhibit style pairs well with interest in {topic}?', + 'For {topic}, which train route offers the most scenic segments?' +] as const; + +function buildMasterDistractorSequence(): string[] { + const sequence = ['Also I like hiking and jazz.', 'What camera should I buy for travel?']; + for (const topic of DISTRACTOR_TOPICS) { + for (const template of DISTRACTOR_PROMPT_TEMPLATES) { + sequence.push(template.replace('{topic}', topic)); + } + } + return sequence; +} + +const MASTER_DISTRACTOR_SEQUENCE = buildMasterDistractorSequence(); +if (MASTER_DISTRACTOR_SEQUENCE.length < 240) { + throw new Error('Demo 5 distractor sequence must support at least 240 turns.'); +} + +const LADDER_TURNS = [10, 30, 60, 120, 240]; +const DEFAULT_TURNS = 2; + +const ORIGINAL_DEFAULT_TRANSCRIPT = [ + ORIGINAL_DIRECTIVE, + 'Also I like hiking and jazz.', + 'What camera should I buy for travel?', + FINAL_PROMPT +]; + +function planLines(output: string): string[] { + const lines = output.split('\n'); + const result: string[] = []; + let inSection = false; + + for (const line of lines) { + const stripped = line.trim(); + if (stripped === '') { + continue; + } + + const heading = stripped.match(PLAN_HEADING_RE); + if (heading) { + inSection = true; + const remainder = (heading[2] ?? '').trim(); + if (remainder !== '') { + result.push(remainder); + } + continue; + } + + if (inSection || LIST_ITEM_RE.test(stripped)) { + result.push(stripped); + } + } + + return result; +} + +function planIncludesNonVegetarianItem(output: string): boolean { + for (const line of planLines(output)) { + if (!NON_VEG_RE.test(line)) { + continue; + } + if (NEGATION_RE.test(line)) { + continue; + } + return true; + } + return false; +} + +function validateTurns(turns: number): void { + const maxTurns = MASTER_DISTRACTOR_SEQUENCE.length; + if (turns < 0) { + throw new Error('turns must be at least 0.'); + } + if (turns > maxTurns) { + throw new Error(`turns must be <= ${maxTurns}.`); + } +} + +function buildContextTurns(turns: number): string[] { + validateTurns(turns); + return [ORIGINAL_DIRECTIVE, ...MASTER_DISTRACTOR_SEQUENCE.slice(0, turns)]; +} + +function buildUserInputs(turns: number): string[] { + return [...buildContextTurns(turns), FINAL_PROMPT]; +} + +function parseArgs(argv: string[]): { turns: number; llmDelay: number | null } { + let turns = DEFAULT_TURNS; + let llmDelay: number | null = null; + + for (let i = 0; i < argv.length; i += 1) { + const token = argv[i]; + if (token === '--turns') { + const parsed = Number(argv[i + 1] ?? ''); + if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) { + throw new Error('Invalid --turns value.'); + } + turns = parsed; + i += 1; + continue; + } + if (token === '--llm-delay') { + const parsed = Number(argv[i + 1] ?? ''); + if (!Number.isFinite(parsed)) { + throw new Error('Invalid --llm-delay value.'); + } + llmDelay = parsed; + i += 1; + continue; + } + if (token === '--help' || token === '-h') { + const maxTurns = MASTER_DISTRACTOR_SEQUENCE.length; + console.log( + `Run Demo 5 with deterministic distractor distance for prompt-drift stress testing.\n\nOptions:\n --turns Number of distractor turns between directive and final prompt (0-${maxTurns}).\n Ladder points: ${LADDER_TURNS.join(', ')}.\n --llm-delay Delay between LLM calls in seconds.` + ); + process.exit(0); + } + throw new Error(`Unknown argument: ${token}`); + } + + validateTurns(turns); + return { turns, llmDelay }; +} + +function premiseMatchesExpected(output: string, expected: string): boolean { + const premise = extractTagValue(output, 'PREMISE'); + if (premise === null) { + return false; + } + return premise.trim().toLowerCase() === expected.trim().toLowerCase(); +} + +async function runDemo(turns: number): Promise { + const engine = createEngine(); + const userInputs = buildUserInputs(turns); + + if (turns === DEFAULT_TURNS) { + const same = JSON.stringify(userInputs) === JSON.stringify(ORIGINAL_DEFAULT_TRANSCRIPT); + if (!same) { + throw new Error('Demo 5 default transcript diverged from original behavior.'); + } + } + + printUserInputs(userInputs); + + for (let i = 0; i < userInputs.length; i += 1) { + const decision = engine.step(userInputs[i]); + printDecision(`turn ${i + 1}`, decision, engine.state); + } + + const baselineMessages = buildBaselineMessages( + userInputs, + `Be a helpful assistant. Use the conversation context to provide a useful answer.\n${FORMAT_CONTRACT_SYSTEM_PROMPT}` + ); + printMessages('baseline', baselineMessages); + const baselineOutput = await completeMessages(baselineMessages); + printModelOutput('Baseline', baselineOutput); + + const mediatedMessages = buildMediatedMessagesFromTranscript(engine.state, userInputs, FORMAT_CONTRACT_SYSTEM_PROMPT); + printMessages('compiler-mediated (full)', mediatedMessages); + const mediatedOutput = await completeMessages(mediatedMessages); + printModelOutput('Compiler-mediated (full)', mediatedOutput); + + const compacted = compactUserTurns(userInputs); + let compactOutput: string; + if (compacted.promptToUser !== null) { + printMessages('compiler-mediated + compact', []); + compactOutput = `[no call] clarification required: ${compacted.promptToUser}`; + printModelOutput('Compiler-mediated + compact', compactOutput); + } else { + const compactMessages = buildMediatedMessagesFromTranscript( + compacted.state, + compacted.compactedTurns, + FORMAT_CONTRACT_SYSTEM_PROMPT + ); + printMessages('compiler-mediated + compact', compactMessages); + compactOutput = await completeMessages(compactMessages); + printModelOutput('Compiler-mediated + compact', compactOutput); + } + + printTagComparison('PREMISE', baselineOutput, mediatedOutput); + + const baselinePremise = extractTagValue(baselineOutput, 'PREMISE'); + const mediatedPremise = extractTagValue(mediatedOutput, 'PREMISE'); + const compactPremise = extractTagValue(compactOutput, 'PREMISE'); + + const baselineMatches = premiseMatchesExpected(baselineOutput, EXPECTED_PREMISE); + const mediatedMatches = premiseMatchesExpected(mediatedOutput, EXPECTED_PREMISE); + const compactMatches = compacted.promptToUser === null && premiseMatchesExpected(compactOutput, EXPECTED_PREMISE); + + const baselineNonVeg = planIncludesNonVegetarianItem(baselineOutput); + const mediatedNonVeg = planIncludesNonVegetarianItem(mediatedOutput); + const compactNonVeg = planIncludesNonVegetarianItem(compactOutput); + + const baselineRespects = baselineMatches && !baselineNonVeg; + const mediatedRespects = mediatedMatches && !mediatedNonVeg; + const compactRespects = compactMatches && !compactNonVeg; + + printHostCheck( + 'PREMISE_AND_PLAN', + `premise_tag=${baselinePremise ?? 'MISSING'}, premise_matches_expected=${yesNo(baselineMatches)}, plan_includes_non_vegetarian=${yesNo(baselineNonVeg)}`, + 'baseline' + ); + printHostCheck( + 'PREMISE_AND_PLAN', + `premise_tag=${mediatedPremise ?? 'MISSING'}, premise_matches_expected=${yesNo(mediatedMatches)}, plan_includes_non_vegetarian=${yesNo(mediatedNonVeg)}`, + 'compiler-mediated' + ); + printHostCheck( + 'PREMISE_AND_PLAN', + `premise_tag=${compactPremise ?? 'MISSING'}, premise_matches_expected=${yesNo(compactMatches)}, plan_includes_non_vegetarian=${yesNo(compactNonVeg)}`, + 'compiler-mediated + compact' + ); + + printSpecReport({ + testName: '05_prompt_drift — preserve premise across long transcript', + baselinePass: baselineRespects, + compilerPass: mediatedRespects, + compilerCompactPass: compactRespects, + expected: 'compiler-mediated should preserve the authoritative premise and keep the plan consistent', + actual: + mediatedRespects && compactRespects && !baselineRespects + ? 'baseline drifted from premise; both compiler-mediated paths preserved premise-consistent plans' + : baselineRespects && mediatedRespects && compactRespects + ? 'all three paths preserved premise-consistent plan' + : !mediatedRespects || !compactRespects + ? 'at least one compiler-mediated path failed premise consistency' + : 'baseline preserved premise consistency, but at least one compiler-mediated path failed', + passed: mediatedRespects && compactRespects, + resultPass: 'premise consistency preserved', + resultFail: 'premise consistency not preserved' + }); +} + +export async function main(argv: string[] = []): Promise { + const args = parseArgs(argv); + const oldDelay = getDefaultLlmDelay(); + if (args.llmDelay !== null) { + setDefaultLlmDelay(args.llmDelay > 0 ? args.llmDelay : 0); + } + try { + await runDemo(args.turns); + } finally { + if (args.llmDelay !== null) { + setDefaultLlmDelay(oldDelay); + } + } +} + +if (process.argv[1] && import.meta.url === new URL(process.argv[1], 'file://').href) { + await main(process.argv.slice(2)); +} diff --git a/demos/06_llm_context_compaction.ts b/demos/06_llm_context_compaction.ts new file mode 100644 index 0000000..f8a2b2e --- /dev/null +++ b/demos/06_llm_context_compaction.ts @@ -0,0 +1,247 @@ +import { compile_transcript, getPremiseValue } from '../src/index.js'; +import { compactUserTurns, isVerbose, printInfoReport } from './common.js'; + +const DEMO_NAME = '06_context_compaction — superseded directives eliminated'; +const FINAL_PREMISE = 'chickpea curry'; +const SCALING_TURNS = [5, 20, 50] as const; + +type TranscriptMessage = { role: 'user'; content: string }; + +function buildBaselinePrompt(transcriptTurns: string[]): string { + const transcriptLines = transcriptTurns.map((turn) => `User: ${turn}`).join('\n'); + return [ + 'You are a helpful assistant.', + 'Use the full transcript context below:', + transcriptLines, + 'Respond using the latest user preference.' + ].join('\n'); +} + +function buildCompiledPrompt(compiledPremise: string): string { + return [ + 'You are a helpful assistant.', + 'Host-side authoritative compiled context:', + `- premise: ${compiledPremise}`, + 'Use only this compiled state as the active context.' + ].join('\n'); +} + +function buildTurns(turnCount: number): string[] { + if (turnCount < 2) { + throw new Error('turn_count must be at least 2'); + } + const variants = ['vegan', 'tofu', 'lentil', 'vegetarian']; + const turns = ['set premise vegetarian curry']; + for (let index = 0; index < turnCount - 2; index += 1) { + turns.push(`change premise to ${variants[index % variants.length]} curry`); + } + turns.push(`change premise to ${FINAL_PREMISE}`); + return turns; +} + +function compilePremise(turns: string[]): string { + const messages: TranscriptMessage[] = turns.map((turn) => ({ role: 'user', content: turn })); + const result = compile_transcript(messages); + if (result.kind !== 'state') { + throw new Error('Unexpected clarification while compiling transcript'); + } + const compiledPremise = getPremiseValue(result.state); + if (compiledPremise === null) { + throw new Error('Compiled premise missing'); + } + return compiledPremise; +} + +function contextMetrics(turns: string[], compiledContext: string): { baseline: number; compiled: number; reduction: number } { + const baselineContext = turns.map((turn) => `User: ${turn}`).join('\n'); + const baselineLength = baselineContext.length; + const compiledLength = compiledContext.length; + const reduction = Math.round((1 - compiledLength / baselineLength) * 100); + return { baseline: baselineLength, compiled: compiledLength, reduction }; +} + +function printVerboseReport(args: { + transcriptTurns: string[]; + compiledContext: string; + baselinePrompt: string; + compiledPrompt: string; + compactedContext: string; + compactedPrompt: string; + baselineContextLength: number; + compiledContextLength: number; + compactedContextLength: number; + contextReduction: number; + compactedContextReduction: number; + baselinePromptLength: number; + compiledPromptLength: number; + compactedPromptLength: number; + promptReduction: number; + compactedPromptReduction: number; + scalingRows: Array<{ turns: number; baseline: number; compiled: number; reduction: number }>; +}): void { + console.log(DEMO_NAME); + console.log(''); + console.log('Raw transcript context:'); + for (const turn of args.transcriptTurns) { + console.log(`User: ${turn}`); + } + console.log(''); + console.log('Compiled context:'); + console.log(args.compiledContext); + console.log(''); + console.log('Compacted transcript context:'); + console.log(args.compactedContext || '(none)'); + console.log(''); + console.log('Baseline prompt:'); + console.log(args.baselinePrompt); + console.log(''); + console.log('Compiled prompt:'); + console.log(args.compiledPrompt); + console.log(''); + console.log('Compacted prompt:'); + console.log(args.compactedPrompt); + console.log(''); + console.log('Context scaling:'); + console.log(''); + for (const row of args.scalingRows) { + console.log(`Turns: ${row.turns}`); + console.log(`context (state-only): ${row.baseline} → ${row.compiled} chars`); + console.log(`reduction (state-only): ${row.reduction}%`); + console.log(''); + } + console.log(`context (compacted): ${args.baselineContextLength} → ${args.compactedContextLength} chars`); + console.log(`reduction (compacted): ${args.compactedContextReduction}%`); + console.log(`prompt (state-only): ${args.baselinePromptLength} → ${args.compiledPromptLength} chars`); + console.log(`reduction (state-only): ${args.promptReduction}%`); + console.log(`prompt (compacted): ${args.baselinePromptLength} → ${args.compactedPromptLength} chars`); + console.log(`reduction (compacted): ${args.compactedPromptReduction}%`); + console.log(''); + console.log('result: transcript grows linearly; compiled context stays constant'); +} + +function printCompactReport(args: { + scalingRows: Array<{ turns: number; baseline: number; compiled: number; reduction: number }>; + baselineContextLength: number; + compactedContextLength: number; + compactedContextReduction: number; +}): void { + const rowByTurns = new Map(); + for (const row of args.scalingRows) { + rowByTurns.set(row.turns, row); + } + const five = rowByTurns.get(5); + const fifty = rowByTurns.get(50); + if (!five || !fifty) { + throw new Error('Missing scaling rows for compact report.'); + } + + console.log(DEMO_NAME); + console.log( + `context scaling: 5 turns ${five.baseline} → ${five.compiled} chars (${five.reduction}% reduction); 50 turns ${fifty.baseline} → ${fifty.compiled} chars (${fifty.reduction}% reduction)` + ); + console.log( + `compacted transcript: ${args.baselineContextLength} → ${args.compactedContextLength} chars (${args.compactedContextReduction}% reduction)` + ); + console.log('result: transcript grows linearly; compiled context stays constant'); +} + +export async function main(): Promise { + const transcriptTurns = buildTurns(5); + const compiledPremise = compilePremise(transcriptTurns); + if (compiledPremise !== FINAL_PREMISE) { + throw new Error('Unexpected compiled premise for demo 06 baseline turns.'); + } + + const baselineContext = transcriptTurns.map((turn) => `User: ${turn}`).join('\n'); + const compiledContext = `- premise: ${compiledPremise}`; + + const compacted = compactUserTurns(transcriptTurns); + if (compacted.promptToUser !== null) { + throw new Error('Demo 06 should not produce clarification during compaction.'); + } + const compactedPremise = getPremiseValue(compacted.state); + if (compactedPremise !== FINAL_PREMISE) { + throw new Error('Compacted state premise diverged from expected final premise.'); + } + + const compactedContext = compacted.compactedTurns.map((turn) => `User: ${turn}`).join('\n'); + const baselinePrompt = buildBaselinePrompt(transcriptTurns); + const compiledPrompt = buildCompiledPrompt(compiledPremise); + const compactedPromptText = [ + 'You are a helpful assistant.', + 'Host-side authoritative compiled context:', + `- premise: ${compiledPremise}`, + 'Compacted transcript context:', + compactedContext === '' ? '(none)' : compactedContext + ].join('\n'); + + const baselineContextLength = baselineContext.length; + const compiledContextLength = compiledContext.length; + const contextReduction = Math.round((1 - compiledContextLength / baselineContextLength) * 100); + const compactedContextLength = compactedContext.length; + const compactedContextReduction = Math.round((1 - compactedContextLength / baselineContextLength) * 100); + + const baselinePromptLength = baselinePrompt.length; + const compiledPromptLength = compiledPrompt.length; + const promptReduction = Math.round((1 - compiledPromptLength / baselinePromptLength) * 100); + const compactedPromptLength = compactedPromptText.length; + const compactedPromptReduction = Math.round((1 - compactedPromptLength / baselinePromptLength) * 100); + + const scalingRows: Array<{ turns: number; baseline: number; compiled: number; reduction: number }> = []; + for (const turns of SCALING_TURNS) { + const scalingTurns = buildTurns(turns); + const scalingPremise = compilePremise(scalingTurns); + if (scalingPremise !== FINAL_PREMISE) { + throw new Error(`Unexpected compiled premise for scaling turns=${turns}.`); + } + const row = contextMetrics(scalingTurns, compiledContext); + scalingRows.push({ turns, baseline: row.baseline, compiled: row.compiled, reduction: row.reduction }); + } + + if (isVerbose()) { + printVerboseReport({ + transcriptTurns, + compiledContext, + baselinePrompt, + compiledPrompt, + compactedContext, + compactedPrompt: compactedPromptText, + baselineContextLength, + compiledContextLength, + compactedContextLength, + contextReduction, + compactedContextReduction, + baselinePromptLength, + compiledPromptLength, + compactedPromptLength, + promptReduction, + compactedPromptReduction, + scalingRows + }); + } else { + printCompactReport({ + scalingRows, + baselineContextLength, + compactedContextLength, + compactedContextReduction + }); + } + + printInfoReport({ + name: DEMO_NAME, + baseline_context_length: baselineContextLength, + compiled_context_length: compiledContextLength, + context_reduction_percent: contextReduction, + baseline_prompt_length: baselinePromptLength, + compiled_prompt_length: compiledPromptLength, + prompt_reduction_percent: promptReduction, + compacted_context_length: compactedContextLength, + compacted_context_reduction_percent: compactedContextReduction, + compacted_prompt_length: compactedPromptLength, + compacted_prompt_reduction_percent: compactedPromptReduction + }); +} + +if (process.argv[1] && import.meta.url === new URL(process.argv[1], 'file://').href) { + await main(); +} diff --git a/demos/07_llm_prompt_vs_state.ts b/demos/07_llm_prompt_vs_state.ts new file mode 100644 index 0000000..39d1c97 --- /dev/null +++ b/demos/07_llm_prompt_vs_state.ts @@ -0,0 +1,184 @@ +import { createEngine } from '../src/index.js'; +import type { EngineState } from '../src/index.js'; +import { + buildBaselineMessages, + buildCompiledSystemPrompt, + compactUserTurns, + extractTagValue, + printDecision, + printHostCheck, + printMessages, + printModelOutput, + printSpecReport, + printUserInputs, + yesNo +} from './common.js'; +import type { Message } from './llm_client.js'; +import { completeMessages } from './llm_client.js'; + +const DEMO_NAME = '07_prompt_engineering_comparison — prompt engineering + authoritative state'; +const EXPECTED_PREMISE = 'vegan curry'; +const FINAL_REQUEST = + 'Give me a dinner plan. First line must be PREMISE:. Use the current premise and then provide a short shopping list.'; +const USER_INPUTS = [ + 'set premise vegan curry', + 'Side note: I am planning a train trip and need camera advice later.', + 'My coworkers mentioned chicken tikka and shrimp pasta in a brainstorm.', + 'We also discussed weather apps and museum tickets for the weekend.', + 'Draft notes from another thread said beef stew, but those notes may be stale and mixed with unrelated chatter.', + FINAL_REQUEST +]; + +const WEAK_SYSTEM_PROMPT = 'Be a helpful assistant.'; +const STRONG_PROMPT_ENGINEERING_TEXT = [ + 'You are a careful assistant.', + "Task: determine the user's current premise for this thread and answer the final request.", + 'Rules:', + '1) Prioritize explicit user directives over brainstorm noise and side notes.', + '2) Keep the selected premise consistent across the response.', + '3) If multiple ideas appear, use the current selected premise instead of popularity.', + '4) First line must be exactly PREMISE:.' +].join('\n'); + +function normalizeText(value: string): string { + return value.trim().toLowerCase().replace(/\s+/g, ' '); +} + +function premiseMatchesExpected(output: string, expectedPremise = EXPECTED_PREMISE): boolean { + const premise = extractTagValue(output, 'PREMISE'); + if (premise === null) { + return false; + } + return normalizeText(premise) === normalizeText(expectedPremise); +} + +function buildWeakMessages(userInputs: string[]): Message[] { + return buildBaselineMessages(userInputs, WEAK_SYSTEM_PROMPT); +} + +function buildStrongMessages(userInputs: string[]): Message[] { + return buildBaselineMessages(userInputs, STRONG_PROMPT_ENGINEERING_TEXT); +} + +function buildCompilerMessages(state: EngineState, userInputs: string[]): Message[] { + const compiledPrefix = buildCompiledSystemPrompt(state); + return buildBaselineMessages(userInputs, `${compiledPrefix}\n${STRONG_PROMPT_ENGINEERING_TEXT}`); +} + +function buildCompactCompilerMessages(state: EngineState, compactedInputs: string[]): Message[] { + return buildCompilerMessages(state, compactedInputs); +} + +function actualSummary(weakPass: boolean, strongPass: boolean, compilerPass: boolean): string { + if (!weakPass && strongPass && compilerPass) { + return 'basic prompting drifted, better prompting held the premise, and prompting plus compiled state also held the premise'; + } + if (weakPass && strongPass && compilerPass) { + return 'all three paths held the premise in this run'; + } + if (!strongPass && compilerPass) { + return 'better prompting alone drifted on premise, but prompting plus compiled state held the authoritative premise'; + } + if (strongPass && !compilerPass) { + return 'better prompting held premise, but prompting plus compiled state did not'; + } + return 'premise handling was inconsistent across paths'; +} + +export async function main(): Promise { + const engine = createEngine(); + printUserInputs(USER_INPUTS); + + for (let index = 0; index < USER_INPUTS.length; index += 1) { + const decision = engine.step(USER_INPUTS[index]); + printDecision(`turn ${index + 1}`, decision, engine.state); + } + + const weakMessages = buildWeakMessages(USER_INPUTS); + printMessages('weak-baseline', weakMessages); + const weakOutput = await completeMessages(weakMessages); + printModelOutput('Weak baseline', weakOutput); + + const strongMessages = buildStrongMessages(USER_INPUTS); + printMessages('strong-baseline', strongMessages); + const strongOutput = await completeMessages(strongMessages); + printModelOutput('Strong baseline', strongOutput); + + const compilerMessages = buildCompilerMessages(engine.state, USER_INPUTS); + printMessages('compiler-mediated (full)', compilerMessages); + const compilerOutput = await completeMessages(compilerMessages); + printModelOutput('Compiler-mediated (full)', compilerOutput); + + const compacted = compactUserTurns(USER_INPUTS); + let compactOutput: string; + if (compacted.promptToUser !== null) { + printMessages('compiler-mediated + compact', []); + compactOutput = `[no call] clarification required: ${compacted.promptToUser}`; + printModelOutput('Compiler-mediated + compact', compactOutput); + } else { + const compactMessages = buildCompactCompilerMessages(compacted.state, compacted.compactedTurns); + printMessages('compiler-mediated + compact', compactMessages); + compactOutput = await completeMessages(compactMessages); + printModelOutput('Compiler-mediated + compact', compactOutput); + } + + const weakPremise = extractTagValue(weakOutput, 'PREMISE'); + const strongPremise = extractTagValue(strongOutput, 'PREMISE'); + const compilerPremise = extractTagValue(compilerOutput, 'PREMISE'); + const compactPremise = extractTagValue(compactOutput, 'PREMISE'); + + const weakPass = premiseMatchesExpected(weakOutput); + const strongPass = premiseMatchesExpected(strongOutput); + const compilerPass = premiseMatchesExpected(compilerOutput); + const compactPass = compacted.promptToUser === null && premiseMatchesExpected(compactOutput); + + const compiledPrefix = buildCompiledSystemPrompt(engine.state); + const sharedPromptText = compilerMessages[0].content.endsWith(STRONG_PROMPT_ENGINEERING_TEXT); + const compilerAugmentedOnly = + JSON.stringify(compilerMessages.slice(1)) === JSON.stringify(strongMessages.slice(1)) && + compilerMessages[0].content === `${compiledPrefix}\n${STRONG_PROMPT_ENGINEERING_TEXT}`; + + printHostCheck( + 'WEAK_MATCHES_EXPECTED_PREMISE', + `${yesNo(weakPass)}, premise_tag=${weakPremise ?? 'MISSING'}`, + 'weak-baseline' + ); + printHostCheck( + 'STRONG_MATCHES_EXPECTED_PREMISE', + `${yesNo(strongPass)}, premise_tag=${strongPremise ?? 'MISSING'}`, + 'strong-baseline' + ); + printHostCheck( + 'COMPILER_MATCHES_EXPECTED_PREMISE', + `${yesNo(compilerPass)}, premise_tag=${compilerPremise ?? 'MISSING'}`, + 'compiler-mediated' + ); + printHostCheck( + 'COMPACT_MATCHES_EXPECTED_PREMISE', + `${yesNo(compactPass)}, premise_tag=${compactPremise ?? 'MISSING'}`, + 'compiler-mediated + compact' + ); + printHostCheck('COMPILER_REUSES_STRONG_PROMPT_TEXT', yesNo(sharedPromptText), 'compiler-mediated'); + printHostCheck('COMPILER_ONLY_ADDS_COMPILED_STATE', yesNo(compilerAugmentedOnly), 'compiler-mediated'); + + const demoPass = !weakPass && strongPass && compilerPass && compactPass && sharedPromptText && compilerAugmentedOnly; + const assertionOutcome = demoPass ? 'demonstrated' : 'not demonstrated'; + + printSpecReport({ + testName: DEMO_NAME, + baselinePass: strongPass, + compilerPass, + compilerCompactPass: compactPass, + assertionOutcome, + expected: + 'stronger prompting should improve premise retention; compiled-state paths should be at least as reliable and reuse the same prompt text', + actual: actualSummary(weakPass, strongPass, compilerPass), + passed: demoPass, + resultPass: 'compiled-state paths were clearly more reliable than prompt-only in this run', + resultFail: 'compiled-state paths were not clearly more reliable than prompt-only in this run' + }); +} + +if (process.argv[1] && import.meta.url === new URL(process.argv[1], 'file://').href) { + await main(); +} diff --git a/demos/README.md b/demos/README.md new file mode 100644 index 0000000..29238f6 --- /dev/null +++ b/demos/README.md @@ -0,0 +1,76 @@ +# LLM Demos + +These scripts are comparative LLM demos aligned to the Python reference demos. + +Scored demos compare three paths: +- baseline +- compiler-mediated (full transcript + injected compiled state) +- compiler+compact (compacted transcript + injected compiled state) + +Demo 06 is informational (context/prompt compaction metrics), not scored. + +## Demo overview + +| Demo | Behavior | Concept | +| :--: | --- | :--: | +| [01](./01_llm_contradiction_clarify.ts) | Contradiction blocking | clarification gate | +| [02](./02_llm_constraint_guardrail.ts) | Constraint drift | persistent policy enforcement | +| [03](./03_llm_premise_guardrail.ts) | Premise update drift | deterministic premise updates | +| [04](./04_llm_tool_denylist_guardrail.ts) | Tool governance | host-side denylist | +| [05](./05_llm_prompt_drift_vs_state.ts) | Prompt drift | long transcript edge case | +| [06](./06_llm_context_compaction.ts) | Context compaction | compiled state replacing transcript | +| [07](./07_llm_prompt_vs_state.ts) | Prompt engineering comparison | prompting vs compiled state | + +## Requirements + +Environment variables: +- `OPENAI_API_KEY` (required) +- `MODEL` (required) +- `OPENAI_BASE_URL` (optional; set for OpenAI-compatible local/hosted endpoints) + +Examples: + +```bash +export OPENAI_API_KEY=your_key_here +export MODEL=gpt-4.1-mini +``` + +OpenAI-compatible local endpoint: + +```bash +export OPENAI_BASE_URL=http://localhost:11434/v1 +export OPENAI_API_KEY=ollama +export MODEL=ollama/llama3.1:8b +``` + +## Run + +Build first: + +```bash +npm run build +``` + +Run all demos: + +```bash +node dist/demos/run_demo.js all +``` + +Run one demo: + +```bash +node dist/demos/run_demo.js 1 +``` + +Run with verbose output: + +```bash +node dist/demos/run_demo.js all --verbose +``` + +Run demo 5 with stress turns: + +```bash +node dist/demos/run_demo.js 5 -- --turns 120 +``` diff --git a/demos/common.ts b/demos/common.ts new file mode 100644 index 0000000..8bc0c55 --- /dev/null +++ b/demos/common.ts @@ -0,0 +1,300 @@ +import { createEngine, getPolicyItems, getPremiseValue } from '../src/index.js'; +import type { Decision, EngineState } from '../src/index.js'; +import type { Message } from './llm_client.js'; + +export const VERBOSE_ENV_VAR = 'CONTEXT_COMPILER_DEMO_VERBOSE'; + +export type DemoReport = { + name: string; + expected: string; + actual: string; + baseline_pass: boolean; + compiler_pass: boolean; + compiler_compact_pass?: boolean; + demo_pass: boolean; +}; + +export type InfoReport = { + name: string; + baseline_context_length: number; + compiled_context_length: number; + context_reduction_percent: number; + baseline_prompt_length: number; + compiled_prompt_length: number; + prompt_reduction_percent: number; + compacted_context_length?: number; + compacted_context_reduction_percent?: number; + compacted_prompt_length?: number; + compacted_prompt_reduction_percent?: number; +}; + +let lastReport: DemoReport | null = null; +let lastInfoReport: InfoReport | null = null; + +function policyValuesText(state: EngineState, value: 'use' | 'prohibit'): string { + const items = getPolicyItems(state, value); + return items.length > 0 ? items.join(', ') : '(none)'; +} + +function printStateSummary(state: EngineState): void { + const premiseValue = getPremiseValue(state); + const premiseText = premiseValue ?? '(none)'; + console.log('compiled state:'); + console.log(`- premise: ${premiseText}`); + console.log(`- use policies: ${policyValuesText(state, 'use')}`); + console.log(`- prohibit policies: ${policyValuesText(state, 'prohibit')}`); +} + +function printMultilinePrompt(label: string, prompt: string): void { + console.log(`${label}:`); + for (const line of prompt.split('\n')) { + console.log(`- ${line}`); + } +} + +export function isVerbose(): boolean { + const raw = process.env[VERBOSE_ENV_VAR]?.toLowerCase() ?? ''; + return raw === '1' || raw === 'true' || raw === 'yes' || raw === 'on'; +} + +export function printUserInputs(inputs: string[]): void { + if (!isVerbose()) { + return; + } + console.log('User inputs:'); + for (let index = 0; index < inputs.length; index += 1) { + console.log(` ${index + 1}. ${inputs[index]}`); + } + console.log(''); +} + +export function printDecision(title: string, decision: Decision, state: EngineState): void { + if (!isVerbose()) { + return; + } + console.log(`Compiler decision (${title}):`); + if (decision.kind === 'update') { + console.log('result: updated'); + printStateSummary(state); + } else if (decision.kind === 'clarify') { + console.log('result: clarify'); + if (decision.prompt_to_user) { + printMultilinePrompt('clarify prompt', decision.prompt_to_user); + } + printStateSummary(state); + } else { + console.log('result: passthrough'); + printStateSummary(state); + } + console.log(''); +} + +export function printMessages(label: string, messages: Message[]): void { + if (!isVerbose()) { + return; + } + console.log(`Prompt/messages sent to LLM (${label}):`); + if (messages.length === 0) { + console.log('- (none)'); + } + for (const message of messages) { + const lines = message.content.split('\n'); + if (lines.length === 0) { + console.log(`- ${message.role}:`); + continue; + } + console.log(`- ${message.role}: ${lines[0]}`); + for (let i = 1; i < lines.length; i += 1) { + console.log(` ${lines[i]}`); + } + } + console.log(''); +} + +export function excerptLines(text: string, maxLines = 3): string { + const lines = text.split('\n'); + if (lines.length <= maxLines) { + return text; + } + return `${lines.slice(0, maxLines).join('\n')}\n[...]`; +} + +export function printModelOutput(label: string, output: string): void { + if (!isVerbose()) { + return; + } + console.log(`${label} output excerpt:`); + console.log(excerptLines(output)); + console.log(''); +} + +export function extractTagValue(output: string, tag: string): string | null { + const pattern = new RegExp(`^\\s*${tag.replace(/[.*+?^${}()|[\\]\\]/g, '\\$&')}\\s*:\\s*([^\\n]+)\\s*$`, 'im'); + const match = output.match(pattern); + if (!match) { + return null; + } + return match[1].trim(); +} + +export function printTagComparison(tag: string, baselineOutput: string, mediatedOutput: string): void { + if (!isVerbose()) { + return; + } + const baseline = extractTagValue(baselineOutput, tag) ?? 'MISSING'; + const mediated = extractTagValue(mediatedOutput, tag) ?? 'MISSING'; + console.log(`TAG_CHECK ${tag} baseline=${baseline} mediated=${mediated}`); + console.log(''); +} + +export function yesNo(value: boolean): string { + return value ? 'yes' : 'no'; +} + +export function printHostCheck(name: string, value: string, context: string): void { + if (!isVerbose()) { + return; + } + console.log(`HOST_CHECK ${name}: ${value} (${context})`); +} + +export function printSpecReport(input: { + testName: string; + baselinePass: boolean; + compilerPass: boolean; + compilerCompactPass?: boolean; + assertionOutcome?: string; + expected: string; + actual: string; + passed: boolean; + resultPass: string; + resultFail: string; +}): void { + const report: DemoReport = { + name: input.testName, + expected: input.expected, + actual: input.actual, + baseline_pass: input.baselinePass, + compiler_pass: input.compilerPass, + demo_pass: input.passed + }; + if (input.compilerCompactPass !== undefined) { + report.compiler_compact_pass = input.compilerCompactPass; + } + lastReport = report; + + console.log(input.testName); + console.log(`baseline: ${input.baselinePass ? 'PASS' : 'FAIL'}`); + console.log(`compiler: ${input.compilerPass ? 'PASS' : 'FAIL'}`); + if (input.compilerCompactPass !== undefined) { + console.log(`compiler+compact: ${input.compilerCompactPass ? 'PASS' : 'FAIL'}`); + } + console.log(`expected: ${input.expected}`); + console.log(`actual: ${input.actual}`); + if (input.assertionOutcome) { + console.log(`assertion: ${input.assertionOutcome}`); + } + console.log(`result: ${input.passed ? input.resultPass : input.resultFail}`); + if (isVerbose()) { + console.log(''); + } +} + +export function consumeLastReport(): DemoReport | null { + const value = lastReport; + lastReport = null; + return value; +} + +export function printInfoReport(report: InfoReport): void { + lastInfoReport = report; +} + +export function consumeLastInfoReport(): InfoReport | null { + const value = lastInfoReport; + lastInfoReport = null; + return value; +} + +export function compactUserTurns(userTurns: string[]): { + compactedTurns: string[]; + state: EngineState; + promptToUser: string | null; +} { + const engine = createEngine(); + const compactedTurns: string[] = []; + let promptToUser: string | null = null; + + for (const turn of userTurns) { + const decision = engine.step(turn); + if (decision.kind === 'update') { + continue; + } + compactedTurns.push(turn); + if (decision.kind === 'clarify') { + promptToUser = decision.prompt_to_user; + break; + } + } + + return { + compactedTurns, + state: engine.state, + promptToUser + }; +} + +export function buildCompiledSystemPrompt(state: EngineState): string { + const premise = getPremiseValue(state) ?? '(unset)'; + const useItems = getPolicyItems(state, 'use'); + const prohibitItems = getPolicyItems(state, 'prohibit'); + const useText = useItems.length > 0 ? useItems.join(', ') : '(none)'; + const prohibitText = prohibitItems.length > 0 ? prohibitItems.join(', ') : '(none)'; + + return [ + 'Follow authoritative compiled state exactly.', + `- premise: ${premise}`, + `- use policy items: ${useText}`, + `- prohibited policy items: ${prohibitText}`, + 'Compiled state overrides transcript drift and conflicts. Do not violate prohibited items.' + ].join('\n'); +} + +export function buildBaselineMessages(userTurns: string[], baselineSystemPrompt?: string): Message[] { + const messages: Message[] = []; + if (baselineSystemPrompt) { + messages.push({ role: 'system', content: baselineSystemPrompt }); + } + for (const turn of userTurns) { + messages.push({ role: 'user', content: turn }); + } + return messages; +} + +export function buildMediatedMessagesFromTranscript( + state: EngineState, + userTurns: string[], + extraSystemPrompt?: string +): Message[] { + let systemPrompt = buildCompiledSystemPrompt(state); + if (extraSystemPrompt) { + systemPrompt = `${systemPrompt}\n${extraSystemPrompt}`; + } + + const messages: Message[] = [{ role: 'system', content: systemPrompt }]; + for (const turn of userTurns) { + messages.push({ role: 'user', content: turn }); + } + return messages; +} + +export function buildMediatedMessages(state: EngineState, userRequest: string, extraSystemPrompt?: string): Message[] { + let systemPrompt = buildCompiledSystemPrompt(state); + if (extraSystemPrompt) { + systemPrompt = `${systemPrompt}\n${extraSystemPrompt}`; + } + return [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userRequest } + ]; +} diff --git a/demos/globals.d.ts b/demos/globals.d.ts new file mode 100644 index 0000000..227f5e9 --- /dev/null +++ b/demos/globals.d.ts @@ -0,0 +1,5 @@ +declare const process: { + argv: string[]; + env: Record; + exit: (code?: number) => never; +}; diff --git a/demos/llm_client.ts b/demos/llm_client.ts new file mode 100644 index 0000000..3107226 --- /dev/null +++ b/demos/llm_client.ts @@ -0,0 +1,288 @@ +export type Role = 'system' | 'user' | 'assistant' | 'tool'; + +export type Message = { + role: Role; + content: string; +}; + +export type LLMConfig = { + baseUrl: string | null; + apiKey: string; + model: string; +}; + +export class MissingDemoConfigError extends Error { + readonly missing: string[]; + readonly baseUrl: string | null; + + constructor(missing: string[], baseUrl: string | null) { + super(`Missing demo configuration: ${missing.join(', ')}`); + this.name = 'MissingDemoConfigError'; + this.missing = missing; + this.baseUrl = baseUrl; + } +} + +export class DemoLLMError extends Error { + constructor(message: string) { + super(message); + this.name = 'DemoLLMError'; + } +} + +const RETRY_DELAYS_SECONDS = [1, 2, 4] as const; +const MAX_RETRY_AFTER_SECONDS = 5; +let defaultLlmDelaySeconds = 0; + +export function setDefaultLlmDelay(seconds: number): void { + defaultLlmDelaySeconds = seconds > 0 ? seconds : 0; +} + +export function getDefaultLlmDelay(): number { + return defaultLlmDelaySeconds; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function normalizeBaseUrl(raw: string | undefined): string | null { + if (!raw || raw.trim() === '') { + return null; + } + return raw.replace(/\/+$/, ''); +} + +export function loadConfig(): LLMConfig { + const baseUrl = normalizeBaseUrl(process.env.OPENAI_BASE_URL); + const apiKey = process.env.OPENAI_API_KEY; + const model = process.env.MODEL; + + const missing: string[] = []; + if (!apiKey || apiKey.trim() === '') { + missing.push('OPENAI_API_KEY'); + } + if (!model || model.trim() === '') { + missing.push('MODEL'); + } + + if (missing.length > 0) { + throw new MissingDemoConfigError(missing, baseUrl); + } + + const apiKeyValue = apiKey as string; + const modelValue = model as string; + + return { + baseUrl, + apiKey: apiKeyValue, + model: modelValue + }; +} + +function endpointFor(baseUrl: string | null): string { + const root = baseUrl ?? 'https://api.openai.com/v1'; + return `${root}/chat/completions`; +} + +function parseRetryAfterSeconds(headers: Headers): number | null { + const raw = headers.get('retry-after') ?? headers.get('Retry-After'); + if (!raw) { + return null; + } + + const trimmed = raw.trim(); + if (/^\d+$/.test(trimmed)) { + return Number(trimmed); + } + + const parsed = Date.parse(trimmed); + if (Number.isNaN(parsed)) { + return null; + } + + const seconds = Math.ceil((parsed - Date.now()) / 1000); + return seconds > 0 ? seconds : 0; +} + +function retryAfterFromText(text: string): number | null { + const lowered = text.toLowerCase(); + const patterns = [/retry in\s+([0-9]+(?:\.[0-9]+)?)s/i, /retrydelay\s*[:=]\s*['"]?([0-9]+(?:\.[0-9]+)?)s['"]?/i]; + for (const pattern of patterns) { + const match = lowered.match(pattern); + if (!match) { + continue; + } + const value = Number(match[1]); + if (!Number.isFinite(value) || value < 0) { + continue; + } + return Number.isInteger(value) ? value : Math.floor(value) + 1; + } + return null; +} + +function isLikelyTemperatureRejection(status: number, message: string): boolean { + if (status < 400 || status >= 500) { + return false; + } + const lowered = message.toLowerCase(); + return ( + lowered.includes('temperature') && + (lowered.includes('unsupported') || lowered.includes('not supported') || lowered.includes('invalid')) + ); +} + +async function callChatCompletions( + config: LLMConfig, + targetModel: string, + messages: Message[], + deterministicDecoding: boolean +): Promise<{ status: number; headers: Headers; content: string; errorMessage: string }> { + const body: Record = { + model: targetModel, + messages + }; + if (deterministicDecoding) { + body.temperature = 0; + } + + const response = await fetch(endpointFor(config.baseUrl), { + method: 'POST', + headers: { + 'content-type': 'application/json', + authorization: `Bearer ${config.apiKey}` + }, + body: JSON.stringify(body) + }); + + let payload: unknown = null; + try { + payload = await response.json(); + } catch { + payload = null; + } + + if (!response.ok) { + const fallbackText = typeof payload === 'string' ? payload : JSON.stringify(payload ?? {}); + const errorMessage = + typeof payload === 'object' && payload !== null && 'error' in payload + ? String((payload as { error?: { message?: unknown } }).error?.message ?? fallbackText) + : fallbackText; + + return { + status: response.status, + headers: response.headers, + content: '', + errorMessage + }; + } + + const choices = + typeof payload === 'object' && payload !== null && 'choices' in payload + ? (payload as { choices?: Array<{ message?: { content?: unknown } }> }).choices + : undefined; + + const content = typeof choices?.[0]?.message?.content === 'string' ? choices[0].message.content : ''; + + return { + status: response.status, + headers: response.headers, + content: content.trim(), + errorMessage: '' + }; +} + +export async function completeMessages( + messages: Message[], + options?: { + model?: string; + delaySeconds?: number; + } +): Promise { + const config = loadConfig(); + const targetModel = options?.model ?? config.model; + const configuredDelay = options?.delaySeconds && options.delaySeconds > 0 ? options.delaySeconds : defaultLlmDelaySeconds; + + for (let attempt = 0; attempt <= RETRY_DELAYS_SECONDS.length; attempt += 1) { + if (configuredDelay > 0) { + await sleep(configuredDelay * 1000); + } + + let firstResult; + try { + firstResult = await callChatCompletions(config, targetModel, messages, true); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + if (attempt < RETRY_DELAYS_SECONDS.length) { + await sleep(RETRY_DELAYS_SECONDS[attempt] * 1000); + continue; + } + throw new DemoLLMError( + `Could not reach the configured LLM endpoint after retries. Check OPENAI_BASE_URL and network access. (${message})` + ); + } + + if (firstResult.status < 400) { + return firstResult.content; + } + + if (isLikelyTemperatureRejection(firstResult.status, firstResult.errorMessage)) { + const retryResult = await callChatCompletions(config, targetModel, messages, false); + if (retryResult.status < 400) { + return retryResult.content; + } + throw new DemoLLMError(`LLM provider error while calling model '${targetModel}': ${retryResult.errorMessage}`); + } + + const lowered = firstResult.errorMessage.toLowerCase(); + const retryAfterHeader = parseRetryAfterSeconds(firstResult.headers); + const retryAfterText = retryAfterFromText(firstResult.errorMessage); + const retryAfter = retryAfterHeader ?? retryAfterText; + + const isRateLimit = firstResult.status === 429 || lowered.includes('rate limit') || lowered.includes('quota'); + const isAuth = firstResult.status === 401 || lowered.includes('invalid api key') || lowered.includes('unauthorized'); + const isPermission = firstResult.status === 403 || lowered.includes('forbidden') || lowered.includes('access denied'); + const isNotFound = firstResult.status === 404 || lowered.includes('model not found') || lowered.includes('unknown model'); + const isTimeout = lowered.includes('timeout') || lowered.includes('timed out'); + const isConnection = lowered.includes('connection') || lowered.includes('unreachable'); + + if (isNotFound) { + throw new DemoLLMError( + `Model '${targetModel}' was not found at the configured endpoint. Check MODEL or OPENAI_BASE_URL.` + ); + } + if (isAuth) { + throw new DemoLLMError('Authentication failed. Check OPENAI_API_KEY.'); + } + if (isPermission) { + throw new DemoLLMError(`Access to model '${targetModel}' was denied by the configured provider.`); + } + + if (isRateLimit || isTimeout || isConnection) { + if (retryAfter !== null && retryAfter > MAX_RETRY_AFTER_SECONDS) { + throw new DemoLLMError( + `LLM provider requested retry after ${retryAfter}s, which exceeds the demo retry limit. Try again later or switch providers.` + ); + } + + if (attempt < RETRY_DELAYS_SECONDS.length) { + const delay = retryAfter ?? RETRY_DELAYS_SECONDS[attempt]; + await sleep(delay * 1000); + continue; + } + + if (isRateLimit) { + throw new DemoLLMError('LLM provider rate limit exceeded. Try again later or switch providers.'); + } + + throw new DemoLLMError( + 'Could not reach the configured LLM endpoint after retries. Check OPENAI_BASE_URL and network access.' + ); + } + + throw new DemoLLMError(`LLM provider error while calling model '${targetModel}': ${firstResult.errorMessage}`); + } + + throw new DemoLLMError('Unexpected LLM retry failure.'); +} diff --git a/demos/run_demo.ts b/demos/run_demo.ts new file mode 100644 index 0000000..f115b61 --- /dev/null +++ b/demos/run_demo.ts @@ -0,0 +1,337 @@ +import { + VERBOSE_ENV_VAR, + consumeLastInfoReport, + consumeLastReport, + type DemoReport, + type InfoReport +} from './common.js'; +import { + completeMessages, + DemoLLMError, + getDefaultLlmDelay, + MissingDemoConfigError, + setDefaultLlmDelay +} from './llm_client.js'; +import { main as demo1Main } from './01_llm_contradiction_clarify.js'; +import { main as demo2Main } from './02_llm_constraint_guardrail.js'; +import { main as demo3Main } from './03_llm_premise_guardrail.js'; +import { main as demo4Main } from './04_llm_tool_denylist_guardrail.js'; +import { main as demo5Main } from './05_llm_prompt_drift_vs_state.js'; +import { main as demo6Main } from './06_llm_context_compaction.js'; +import { main as demo7Main } from './07_llm_prompt_vs_state.js'; + +type DemoFn = (argv?: string[]) => Promise; + +const DEMOS: Record = { + '1': { file: '01_llm_contradiction_clarify.ts', run: demo1Main }, + '2': { file: '02_llm_constraint_guardrail.ts', run: demo2Main }, + '3': { file: '03_llm_premise_guardrail.ts', run: demo3Main }, + '4': { file: '04_llm_tool_denylist_guardrail.ts', run: demo4Main }, + '5': { file: '05_llm_prompt_drift_vs_state.ts', run: demo5Main }, + '6': { file: '06_llm_context_compaction.ts', run: demo6Main }, + '7': { file: '07_llm_prompt_vs_state.ts', run: demo7Main } +}; + +const SCORED_DEMOS = new Set(['1', '2', '3', '4', '5', '7']); + +function parseArgs(argv: string[]): { + demo: string; + verbose: boolean; + llmDelay: number; + demoArgs: string[]; +} { + let demo = 'all'; + let verbose = false; + let llmDelay = 0; + const demoArgs: string[] = []; + + const tokens = [...argv]; + let i = 0; + while (i < tokens.length) { + const token = tokens[i]; + + if (token === '--') { + demoArgs.push(...tokens.slice(i + 1)); + break; + } + + if (token === '--verbose') { + verbose = true; + i += 1; + continue; + } + + if (token === '--llm-delay') { + const parsed = Number(tokens[i + 1] ?? ''); + if (!Number.isFinite(parsed)) { + throw new Error('Invalid --llm-delay value.'); + } + llmDelay = parsed; + i += 2; + continue; + } + + if (token.startsWith('--')) { + throw new Error(`Unknown option: ${token}`); + } + + if (demo === 'all') { + demo = token; + } else { + demoArgs.push(token); + } + i += 1; + } + + if (demo !== 'all' && !(demo in DEMOS)) { + throw new Error(`Demo must be one of: all, ${Object.keys(DEMOS).join(', ')}`); + } + if (demo === 'all' && demoArgs.length > 0) { + throw new Error('demo-specific args are only supported when running a single demo'); + } + + return { demo, verbose, llmDelay, demoArgs }; +} + +function verboseDemoLabel(filename: string): string { + return filename.replace('.ts', '').replace('_llm', ''); +} + +function isCompilerRegression(result: DemoReport): boolean { + return result.baseline_pass && !result.compiler_pass; +} + +function printCompilerRegressionWarning(): void { + console.log(''); + console.log('⚠️ MEDIATED REGRESSION'); + console.log('baseline succeeded but compiler-mediated version failed'); +} + +async function preflightAllMode(): Promise { + await completeMessages([{ role: 'user', content: 'Reply with exactly: OK' }], { delaySeconds: 0 }); +} + +function printConfigError(error: MissingDemoConfigError): void { + const mode = error.baseUrl ? 'OpenAI-compatible endpoint' : 'OpenAI API'; + console.log('Unable to run LLM demos: missing model configuration.'); + console.log(`Assumed mode: ${mode}`); + console.log(`Missing variables: ${error.missing.join(', ')}`); + console.log('Example setup:'); + if (error.baseUrl) { + console.log(' export OPENAI_BASE_URL=http://localhost:11434/v1'); + console.log(' export OPENAI_API_KEY=ollama'); + console.log(' export MODEL=llama3.1:8b'); + } else { + console.log(' export OPENAI_API_KEY=your_key_here'); + console.log(' export MODEL=gpt-4.1-mini'); + } +} + +async function runSingle( + key: string, + opts: { verbose: boolean; llmDelay: number; demoArgs: string[] } +): Promise<{ report: DemoReport | null; info: InfoReport | null }> { + const entry = DEMOS[key]; + if (!entry) { + throw new Error(`Unknown demo key: ${key}`); + } + + if (opts.verbose) { + console.log(`===== Running ${verboseDemoLabel(entry.file)} =====`); + } + + const oldVerbose = process.env[VERBOSE_ENV_VAR]; + const oldDelay = getDefaultLlmDelay(); + + process.env[VERBOSE_ENV_VAR] = opts.verbose ? '1' : '0'; + setDefaultLlmDelay(opts.llmDelay > 0 ? opts.llmDelay : 0); + + try { + await entry.run(opts.demoArgs); + return { report: consumeLastReport(), info: consumeLastInfoReport() }; + } finally { + if (oldVerbose === undefined) { + delete process.env[VERBOSE_ENV_VAR]; + } else { + process.env[VERBOSE_ENV_VAR] = oldVerbose; + } + setDefaultLlmDelay(oldDelay); + } +} + +function printUsageAndExit(error: string): never { + console.error(error); + console.error(''); + console.error('Usage: node dist/demos/run_demo.js [all|1|2|3|4|5|6|7] [--verbose] [--llm-delay ] [-- ]'); + process.exit(2); +} + +export async function main(argv: string[] = process.argv.slice(2)): Promise { + let args; + try { + args = parseArgs(argv); + } catch (error) { + printUsageAndExit(error instanceof Error ? error.message : String(error)); + } + + if (args.demo === 'all') { + try { + await preflightAllMode(); + } catch (error) { + if (error instanceof MissingDemoConfigError) { + printConfigError(error); + process.exit(2); + } + if (error instanceof DemoLLMError) { + console.log(error.message); + process.exit(2); + } + throw error; + } + + let baselinePassCount = 0; + let baselineFailCount = 0; + let compilerPassCount = 0; + let compilerFailCount = 0; + let compactPassCount = 0; + let compactFailCount = 0; + let compilerRegressions = 0; + const informationalReports: InfoReport[] = []; + + const keys = Object.keys(DEMOS).sort(); + for (let i = 0; i < keys.length; i += 1) { + const key = keys[i]; + if (i > 0 && !args.verbose) { + console.log(''); + } + + let result; + try { + result = await runSingle(key, { + verbose: args.verbose, + llmDelay: args.llmDelay, + demoArgs: [] + }); + } catch (error) { + if (error instanceof MissingDemoConfigError) { + printConfigError(error); + process.exit(2); + } + if (error instanceof DemoLLMError) { + console.log(error.message); + process.exit(2); + } + throw error; + } + + if (result.info !== null) { + informationalReports.push(result.info); + } + + if (!SCORED_DEMOS.has(key)) { + continue; + } + + if (result.report === null) { + baselineFailCount += 1; + compilerFailCount += 1; + compactFailCount += 1; + continue; + } + + if (result.report.baseline_pass) { + baselinePassCount += 1; + } else { + baselineFailCount += 1; + } + + if (result.report.compiler_pass) { + compilerPassCount += 1; + } else { + compilerFailCount += 1; + } + + const compactPass = result.report.compiler_compact_pass ?? result.report.compiler_pass; + if (compactPass) { + compactPassCount += 1; + } else { + compactFailCount += 1; + } + + if (isCompilerRegression(result.report)) { + compilerRegressions += 1; + printCompilerRegressionWarning(); + } + } + + console.log(''); + console.log('Summary:'); + console.log(''); + console.log('Evaluative demos:'); + console.log(`Baseline results: ${baselinePassCount} passed, ${baselineFailCount} failed`); + console.log(`Compiler results: ${compilerPassCount} passed, ${compilerFailCount} failed`); + console.log(`Compiler+compact results: ${compactPassCount} passed, ${compactFailCount} failed`); + + if (compilerRegressions > 0) { + console.log(''); + if (compilerRegressions === 1) { + console.log('*** 1 MEDIATED REGRESSION DETECTED ***'); + } else { + console.log(`*** ${compilerRegressions} MEDIATED REGRESSIONS DETECTED ***`); + } + } + + if (informationalReports.length > 0) { + console.log(''); + console.log('Informational demo:'); + for (const report of informationalReports) { + const demoId = report.name.split(' — ', 1)[0]; + console.log( + `${demoId} — context ${report.baseline_context_length} → ${report.compiled_context_length} chars (${report.context_reduction_percent}% reduction); prompt ${report.baseline_prompt_length} → ${report.compiled_prompt_length} chars (${report.prompt_reduction_percent}% reduction)` + ); + if ( + report.compacted_context_length !== undefined && + report.compacted_context_reduction_percent !== undefined && + report.compacted_prompt_length !== undefined && + report.compacted_prompt_reduction_percent !== undefined + ) { + console.log( + `${demoId} — compacted context ${report.baseline_context_length} → ${report.compacted_context_length} chars (${report.compacted_context_reduction_percent}% reduction); compacted prompt ${report.baseline_prompt_length} → ${report.compacted_prompt_length} chars (${report.compacted_prompt_reduction_percent}% reduction)` + ); + } + } + } + + if (compilerRegressions > 0) { + process.exit(1); + } + return; + } + + try { + const result = await runSingle(args.demo, { + verbose: args.verbose, + llmDelay: args.llmDelay, + demoArgs: args.demoArgs + }); + + if (SCORED_DEMOS.has(args.demo) && result.report !== null && isCompilerRegression(result.report)) { + printCompilerRegressionWarning(); + process.exit(1); + } + } catch (error) { + if (error instanceof MissingDemoConfigError) { + printConfigError(error); + process.exit(2); + } + if (error instanceof DemoLLMError) { + console.log(error.message); + process.exit(2); + } + throw error; + } +} + +if (process.argv[1] && import.meta.url === new URL(process.argv[1], 'file://').href) { + await main(); +} diff --git a/tests/demos-smoke.test.ts b/tests/demos-smoke.test.ts new file mode 100644 index 0000000..e76e6bb --- /dev/null +++ b/tests/demos-smoke.test.ts @@ -0,0 +1,107 @@ +import { spawnSync } from 'node:child_process'; +import { resolve } from 'node:path'; + +import { beforeAll, describe, expect, it } from 'vitest'; + +const ROOT = resolve(process.cwd()); +const DIST_DEMOS = resolve(ROOT, 'dist', 'demos'); + +const HAS_DEMO_ENV = Boolean(process.env.OPENAI_API_KEY && process.env.MODEL); + +function runNodeScript(file: string, args: string[] = [], envOverride?: Record) { + const script = resolve(DIST_DEMOS, file); + const run = spawnSync(process.execPath, [script, ...args], { + cwd: ROOT, + encoding: 'utf8', + env: { + ...process.env, + ...envOverride + } + }); + return { + status: run.status, + stdout: run.stdout ?? '', + stderr: run.stderr ?? '' + }; +} + +describe('demos smoke', () => { + beforeAll(() => { + const build = spawnSync('npm', ['run', 'build'], { + cwd: ROOT, + encoding: 'utf8' + }); + if (build.status !== 0) { + throw new Error(`Build failed.\nSTDOUT:\n${build.stdout}\nSTDERR:\n${build.stderr}`); + } + }, 120_000); + + it('runner fails fast with setup instructions when config is missing', () => { + const run = runNodeScript('run_demo.js', ['all'], { + OPENAI_API_KEY: '', + MODEL: '' + }); + expect(run.status).toBe(2); + expect(run.stdout).toContain('Unable to run LLM demos: missing model configuration.'); + expect(run.stdout).toContain('Missing variables: OPENAI_API_KEY, MODEL'); + }); + + const describeWhenConfigured = HAS_DEMO_ENV ? describe : describe.skip; + + describeWhenConfigured('with configured llm env', () => { + it('runs scored demos with comparative markers', () => { + const demos = [ + ['01_llm_contradiction_clarify.js', '01_contradiction_block'], + ['02_llm_constraint_guardrail.js', '02_constraint_drift'], + ['03_llm_premise_guardrail.js', '03_explicit_premise_change'], + ['04_llm_tool_denylist_guardrail.js', '04_tool_governance'], + ['05_llm_prompt_drift_vs_state.js', '05_prompt_drift'], + ['07_llm_prompt_vs_state.js', '07_prompt_engineering_comparison'] + ] as const; + + for (const [file, marker] of demos) { + const run = runNodeScript(file); + expect(run.status).toBe(0); + expect(run.stdout).toContain(marker); + expect(run.stdout).toContain('baseline:'); + expect(run.stdout).toContain('compiler:'); + expect(run.stdout).toContain('compiler+compact:'); + expect(run.stdout).toContain('result:'); + expect(run.stdout).not.toContain('"version":'); + expect(run.stdout).not.toContain('"policies":'); + } + }, 180_000); + + it('runs informational demo 06 with compaction markers', () => { + const run = runNodeScript('06_llm_context_compaction.js'); + expect(run.status).toBe(0); + expect(run.stdout).toContain('06_context_compaction'); + expect(run.stdout).toContain('context scaling:'); + expect(run.stdout).toContain('compacted transcript:'); + expect(run.stdout).toContain('result: transcript grows linearly; compiled context stays constant'); + expect(run.stdout).not.toContain('baseline: PASS'); + expect(run.stdout).not.toContain('"version":'); + expect(run.stdout).not.toContain('"policies":'); + }, 120_000); + + it('runs demo runner for single and all with summary markers', () => { + const single = runNodeScript('run_demo.js', ['1']); + expect(single.status).toBe(0); + expect(single.stdout).toContain('01_contradiction_block'); + expect(single.stdout).toContain('baseline:'); + expect(single.stdout).toContain('compiler:'); + + const all = runNodeScript('run_demo.js', ['all']); + expect(all.status).toBe(0); + expect(all.stdout).toContain('Summary:'); + expect(all.stdout).toContain('Evaluative demos:'); + expect(all.stdout).toContain('Baseline results:'); + expect(all.stdout).toContain('Compiler results:'); + expect(all.stdout).toContain('Compiler+compact results:'); + expect(all.stdout).toContain('Informational demo:'); + expect(all.stdout).toContain('06_context_compaction'); + expect(all.stdout).not.toContain('"version":'); + expect(all.stdout).not.toContain('"policies":'); + }, 180_000); + }); +}); diff --git a/tsconfig.build.json b/tsconfig.build.json index 59a87a6..c790678 100644 --- a/tsconfig.build.json +++ b/tsconfig.build.json @@ -7,6 +7,6 @@ "declarationMap": false, "types": [] }, - "include": ["index.ts", "src/**/*.ts", "examples/**/*.ts"], + "include": ["index.ts", "src/**/*.ts", "examples/**/*.ts", "demos/**/*.ts", "demos/**/*.d.ts"], "exclude": ["tests", "dist", "node_modules"] } From 626fa7e6a767340573d589251f05ab64112a8115 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Sat, 18 Apr 2026 03:57:04 -0400 Subject: [PATCH 2/3] docs: note LLM demo result variability --- demos/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/demos/README.md b/demos/README.md index 29238f6..52fde4e 100644 --- a/demos/README.md +++ b/demos/README.md @@ -9,6 +9,13 @@ Scored demos compare three paths: Demo 06 is informational (context/prompt compaction metrics), not scored. +## Result variability note + +LLM demo outcomes can vary across environments. In practice, PASS/FAIL patterns may differ based on: +- provider +- client layer +- model serving path + ## Demo overview | Demo | Behavior | Concept | From 417dffba5a03eaddb4f8b0ebe4cfa600ecaf5149 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Sat, 18 Apr 2026 04:07:50 -0400 Subject: [PATCH 3/3] build: rename package to @rlippmann/context-compiler --- .gitignore | 2 +- README.md | 6 +++--- package-lock.json | 8 ++++---- package.json | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 1da5670..ede5259 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,4 @@ yarn-error.log* .idea/ # npm pack artifacts -context-compiler-ts-*.tgz +rlippmann-context-compiler-*.tgz diff --git a/README.md b/README.md index 0f4524a..681ee5e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# context-compiler-ts +# @rlippmann/context-compiler TypeScript port of the Context Compiler core. @@ -28,13 +28,13 @@ Behavioral conformance is defined by the upstream Python fixture corpus and dire ## Installation ```bash -npm install context-compiler-ts +npm install @rlippmann/context-compiler ``` ## Quick Start ```ts -import { createEngine } from 'context-compiler-ts'; +import { createEngine } from '@rlippmann/context-compiler'; const engine = createEngine(); const decision = engine.step('set premise concise replies'); diff --git a/package-lock.json b/package-lock.json index 9c2f294..00f2e26 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { - "name": "context-compiler-ts", - "version": "0.1.0", + "name": "@rlippmann/context-compiler", + "version": "0.5.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "context-compiler-ts", - "version": "0.1.0", + "name": "@rlippmann/context-compiler", + "version": "0.5.0", "devDependencies": { "typescript": "^5.9.3", "vitest": "^3.2.4" diff --git a/package.json b/package.json index cc742e1..974c287 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "context-compiler-ts", + "name": "@rlippmann/context-compiler", "version": "0.5.0", "license": "Apache-2.0", "repository": { @@ -26,7 +26,7 @@ "README.md" ], "scripts": { - "build": "tsc -p tsconfig.build.json", + "build": "rm -rf dist && tsc -p tsconfig.build.json", "fixtures:sync": "bash scripts/fixtures-sync.sh", "fixtures:check": "bash scripts/fixtures-check.sh", "prepack": "npm run build",