Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ jobs:
- name: Install
run: npm ci

- name: Checkout Python fixture source
# TS 0.5.2 fixtures include cases added after Python v0.5.2; pin to v0.6.0
# to keep fixture drift checks aligned with the current TS fixture corpus.
run: git clone --depth 1 --branch v0.6.0 https://github.com/rlippmann/context-compiler ../context-compiler

- name: Check fixture drift
run: npm run fixtures:check

- name: Build
run: npm run build

Expand Down
24 changes: 24 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,30 @@ A change is correct only if all fixtures pass.

Do not modify fixtures to make tests pass.

Fixture updates are allowed only when syncing from the authoritative Python source for the targeted compatibility line.

If synced fixtures introduce failures, fix TypeScript behavior rather than editing fixture expectations.

## Test Coverage Expectations

Before opening a PR, consider:

- Does this change affect any user-facing behavior?
- If so, is that behavior covered by tests?

User-facing behavior includes:

- Engine decision outcomes (`kind`, `prompt_to_user`, and returned `state`)
- Checkpoint export/import and continuation behavior
- Clarify/confirmation flows (`yes` / `no`)
- Transcript replay behavior and compaction-related behavior
- Integration behavior (examples, demo runner, and integration scripts)
- Integration error-path normalization

If a user-facing behavior is changed or introduced, add or update tests to cover it.

Do not rely solely on coverage metrics.

## Workflow

Typical workflow:
Expand Down
20 changes: 20 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ All changes must:
- Exact match when a fixture specifies a string
- Non-empty string when a fixture uses `null`

## Test Coverage Expectations

Before opening a PR, consider:

- Does this change affect any user-facing behavior?
- If so, is that behavior covered by tests?

User-facing behavior includes:

- Engine decision outcomes (`kind`, `prompt_to_user`, and returned `state`)
- Checkpoint export/import and continuation behavior
- Clarify/confirmation flows (`yes` / `no`)
- Transcript replay behavior and compaction-related behavior
- Integration behavior (examples, demo runner, and integration scripts)
- Integration error-path normalization

If a user-facing behavior is changed or introduced, add or update tests to cover it.

Do not rely solely on coverage metrics.

## What Not to Do

Do not:
Expand Down
149 changes: 149 additions & 0 deletions demos/llm_client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export type LLMConfig = {
model: string;
};

const DEMO_MOCK_ENV_VAR = 'CONTEXT_COMPILER_DEMO_MOCK';

export class MissingDemoConfigError extends Error {
readonly missing: string[];
readonly baseUrl: string | null;
Expand Down Expand Up @@ -85,6 +87,149 @@ function endpointFor(baseUrl: string | null): string {
return `${root}/chat/completions`;
}

function isDemoMockEnabled(): boolean {
const raw = (process.env[DEMO_MOCK_ENV_VAR] ?? '').trim().toLowerCase();
return raw === '1' || raw === 'true' || raw === 'yes' || raw === 'on';
}

function splitItems(raw: string): string[] {
return raw
.split(',')
.map((item) => item.trim().toLowerCase())
.filter((item) => item !== '' && item !== '(none)');
}

function parseCompiledState(systemPrompt: string): {
premise: string | null;
useItems: string[];
prohibitItems: string[];
} {
const premiseMatch = systemPrompt.match(/^- premise:\s*(.+)$/im);
const useMatch = systemPrompt.match(/^- use policy items:\s*(.+)$/im);
const prohibitMatch = systemPrompt.match(/^- prohibited policy items:\s*(.+)$/im);

return {
premise: premiseMatch ? premiseMatch[1].trim() : null,
useItems: useMatch ? splitItems(useMatch[1]) : [],
prohibitItems: prohibitMatch ? splitItems(prohibitMatch[1]) : []
};
}

function parseDirectivePremises(messages: Message[]): { first: string | null; latest: string | null } {
let first: string | null = null;
let latest: string | null = null;
const directiveRe = /^\s*(?:set premise|change premise to)\s+(.+?)\s*$/i;

for (const message of messages) {
if (message.role !== 'user') {
continue;
}
const match = message.content.match(directiveRe);
if (!match) {
continue;
}
const premise = match[1].trim();
if (first === null) {
first = premise;
}
latest = premise;
}

return { first, latest };
}

function chooseMockPremise(messages: Message[], systemPrompt: string, compiledPremise: string | null): string {
if (compiledPremise && compiledPremise !== '(unset)') {
return compiledPremise;
}

const { first, latest } = parseDirectivePremises(messages);
const joined = messages
.filter((message) => message.role === 'user')
.map((message) => message.content)
.join('\n')
.toLowerCase();
const strongPrompt = /prioritize explicit user directives|careful assistant|first line must be exactly premise:/i.test(
systemPrompt
);

if (!strongPrompt) {
if (joined.includes('beef stew')) {
return 'beef stew';
}
return first ?? latest ?? 'vegetarian curry';
}
return latest ?? first ?? 'vegan curry';
}

function mockCompletion(messages: Message[]): string {
const systemPrompt = messages.find((message) => message.role === 'system')?.content ?? '';
const allUserText = messages
.filter((message) => message.role === 'user')
.map((message) => message.content)
.join('\n');
const lowered = allUserText.toLowerCase();
const compiled = parseCompiledState(systemPrompt);
const premise = chooseMockPremise(messages, systemPrompt, compiled.premise);
const isCompiled = /follow authoritative compiled state exactly\./i.test(systemPrompt);

if (/reply with exactly:\s*ok/i.test(allUserText)) {
return 'OK';
}

if (/first line must be tool:<docker\|kubectl>/i.test(allUserText)) {
const tool = isCompiled && !compiled.prohibitItems.includes('kubectl') ? 'kubectl' : 'docker';
return [`TOOL:${tool}`, `ACTION:Use ${tool} to deploy the service.`].join('\n');
}

if (/first line must be action:<clarify\|proceed>/i.test(allUserText)) {
const contradictoryPeanutDirectives = lowered.includes('prohibit peanuts') && lowered.includes('use peanuts');
if (isCompiled && contradictoryPeanutDirectives) {
return 'ACTION:clarify\nRequest is contradictory; please confirm policy.';
}
return 'ACTION:proceed\nProceeding with a best-effort interpretation.';
}

const wantsPremiseTag = /first line must be premise:<value>/i.test(allUserText);
const wantsRecipe = /\b(recipe|ingredients|steps|curry)\b/i.test(allUserText);
const wantsPlan = /\b(plan|shopping list|dinner)\b/i.test(allUserText);
const blocksPeanuts = compiled.prohibitItems.includes('peanuts') || compiled.prohibitItems.includes('peanut');

if (wantsRecipe && blocksPeanuts) {
const prefix = wantsPremiseTag ? [`PREMISE:${premise}`] : [];
return [
...prefix,
'I cannot provide a peanut recipe because it conflicts with policy.',
'Ingredients:',
'- chickpeas',
'- coconut milk',
'- garlic',
'Steps:',
'1. Saute garlic.',
'2. Simmer chickpeas in coconut milk.',
'3. Serve hot.'
].join('\n');
}

if (wantsPremiseTag || wantsPlan || wantsRecipe) {
const lines: string[] = [];
if (wantsPremiseTag) {
lines.push(`PREMISE:${premise}`);
}
lines.push('Shopping list:');
lines.push('- onions');
lines.push('- tomatoes');
lines.push(`- ${premise}`);
lines.push('Steps:');
lines.push(`1. Prepare a ${premise} base.`);
lines.push('2. Simmer until flavors combine.');
lines.push('3. Serve warm.');
return lines.join('\n');
}

return 'OK';
}

function parseRetryAfterSeconds(headers: Headers): number | null {
const raw = headers.get('retry-after') ?? headers.get('Retry-After');
if (!raw) {
Expand Down Expand Up @@ -200,6 +345,10 @@ export async function completeMessages(
delaySeconds?: number;
}
): Promise<string> {
if (isDemoMockEnabled()) {
return mockCompletion(messages);
}

const config = loadConfig();
const targetModel = options?.model ?? config.model;
const configuredDelay = options?.delaySeconds && options.delaySeconds > 0 ? options.delaySeconds : defaultLlmDelaySeconds;
Expand Down
7 changes: 4 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 20 additions & 9 deletions tests/demos-smoke.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import { beforeAll, describe, expect, it } from 'vitest';
const ROOT = resolve(process.cwd());
const DIST_DEMOS = resolve(ROOT, 'dist', 'demos');

const HAS_DEMO_ENV = Boolean(process.env.OPENAI_API_KEY && process.env.MODEL);

function runNodeScript(file: string, args: string[] = [], envOverride?: Record<string, string | undefined>) {
const script = resolve(DIST_DEMOS, file);
const run = spawnSync(process.execPath, [script, ...args], {
Expand All @@ -25,6 +23,16 @@ function runNodeScript(file: string, args: string[] = [], envOverride?: Record<s
};
}

function runNodeScriptWithMock(file: string, args: string[] = []) {
// Force deterministic/offline smoke behavior in CI: no real LLM calls.
// Mock mode keeps demo smoke runs stable and fast.
return runNodeScript(file, args, {
CONTEXT_COMPILER_DEMO_MOCK: '1',
OPENAI_API_KEY: process.env.OPENAI_API_KEY && process.env.OPENAI_API_KEY.trim() !== '' ? process.env.OPENAI_API_KEY : 'mock-key',
MODEL: process.env.MODEL && process.env.MODEL.trim() !== '' ? process.env.MODEL : 'mock-model'
});
}

describe('demos smoke', () => {
beforeAll(() => {
const build = spawnSync('npm', ['run', 'build'], {
Expand All @@ -42,13 +50,12 @@ describe('demos smoke', () => {
MODEL: ''
});
expect(run.status).toBe(2);
expect(run.stderr.trim()).toBe('');
expect(run.stdout).toContain('Unable to run LLM demos: missing model configuration.');
expect(run.stdout).toContain('Missing variables: OPENAI_API_KEY, MODEL');
});

const describeWhenConfigured = HAS_DEMO_ENV ? describe : describe.skip;

describeWhenConfigured('with configured llm env', () => {
describe('with configured llm env or demo mock', () => {
it('runs scored demos with comparative markers', () => {
const demos = [
['01_llm_contradiction_clarify.js', '01_contradiction_block'],
Expand All @@ -60,8 +67,9 @@ describe('demos smoke', () => {
] as const;

for (const [file, marker] of demos) {
const run = runNodeScript(file);
const run = runNodeScriptWithMock(file);
expect(run.status).toBe(0);
expect(run.stderr.trim()).toBe('');
expect(run.stdout).toContain(marker);
expect(run.stdout).toContain('baseline:');
expect(run.stdout).toContain('compiler:');
Expand All @@ -73,8 +81,9 @@ describe('demos smoke', () => {
}, 180_000);

it('runs informational demo 06 with compaction markers', () => {
const run = runNodeScript('06_llm_context_compaction.js');
const run = runNodeScriptWithMock('06_llm_context_compaction.js');
expect(run.status).toBe(0);
expect(run.stderr.trim()).toBe('');
expect(run.stdout).toContain('06_context_compaction');
expect(run.stdout).toContain('context scaling:');
expect(run.stdout).toContain('compacted transcript:');
Expand All @@ -85,14 +94,16 @@ describe('demos smoke', () => {
}, 120_000);

it('runs demo runner for single and all with summary markers', () => {
const single = runNodeScript('run_demo.js', ['1']);
const single = runNodeScriptWithMock('run_demo.js', ['1']);
expect(single.status).toBe(0);
expect(single.stderr.trim()).toBe('');
expect(single.stdout).toContain('01_contradiction_block');
expect(single.stdout).toContain('baseline:');
expect(single.stdout).toContain('compiler:');

const all = runNodeScript('run_demo.js', ['all']);
const all = runNodeScriptWithMock('run_demo.js', ['all']);
expect(all.status).toBe(0);
expect(all.stderr.trim()).toBe('');
expect(all.stdout).toContain('Summary:');
expect(all.stdout).toContain('Evaluative demos:');
expect(all.stdout).toContain('Baseline results:');
Expand Down
Loading
Loading