From a15b5a03eb8aefa5668bbb5b94cfc4bc894a60d3 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Sat, 25 Apr 2026 00:51:34 -0400 Subject: [PATCH 1/5] feat: add opt-in repl precompiler flag --- src/context_compiler/repl.py | 37 ++++++++++--- tests/test_repl.py | 102 +++++++++++++++++++++++++++++++++-- 2 files changed, 128 insertions(+), 11 deletions(-) diff --git a/src/context_compiler/repl.py b/src/context_compiler/repl.py index ad480db..79bdb71 100644 --- a/src/context_compiler/repl.py +++ b/src/context_compiler/repl.py @@ -1,18 +1,21 @@ import sys from typing import TextIO +from experimental.preprocessor.output_validation import parse_precompiler_output + from . import __version__, create_engine, get_policy_items, get_premise_value -from .engine import Decision, DecisionKind, State +from .engine import Decision, DecisionKind, Engine, State _EXIT_TOKENS = {"exit", "quit"} _HELP_TOKENS = {"help", "?"} _MULTI_COMMAND_PROMPT = "Multiple commands detected.\nEnter one command per line." _CLI_HELP_TEXT = """Usage: - context-compiler [--help] [--version] + context-compiler [--help] [--version] [--with-precompiler] Options: - --help Show this help message and exit. - --version Show the installed context-compiler version and exit. + --help Show this help message and exit. + --version Show the installed context-compiler version and exit. + --with-precompiler Enable heuristic precompiler validation in the REPL loop. """ @@ -94,7 +97,21 @@ def _print_decision_lines(decision: Decision, out_stream: TextIO, *, leading_bla print(line, file=out_stream) -def run_repl(in_stream: TextIO, out_stream: TextIO) -> None: +def _has_pending_clarification(engine: Engine) -> bool: + checkpoint = engine.export_checkpoint() + return checkpoint["pending"] is not None + + +def _compile_input(raw_input: str, engine: Engine, *, use_precompiler: bool) -> str: + if not use_precompiler: + return raw_input + if _has_pending_clarification(engine): + return raw_input + parsed = parse_precompiler_output(raw_input, source_input=raw_input) + return parsed if parsed is not None else raw_input + + +def run_repl(in_stream: TextIO, out_stream: TextIO, *, use_precompiler: bool = False) -> None: engine = create_engine() if _is_interactive(in_stream, out_stream): @@ -118,7 +135,8 @@ def run_repl(in_stream: TextIO, out_stream: TextIO) -> None: _print_interactive_help(out_stream) continue - decision = engine.step(user_input) + compile_input = _compile_input(user_input, engine, use_precompiler=use_precompiler) + decision = engine.step(compile_input) _print_decision_lines(decision, out_stream, leading_blank=True) return @@ -129,7 +147,8 @@ def run_repl(in_stream: TextIO, out_stream: TextIO) -> None: user_input = line.rstrip("\n") if user_input.strip().lower() in _EXIT_TOKENS: return - decision = engine.step(user_input) + compile_input = _compile_input(user_input, engine, use_precompiler=use_precompiler) + decision = engine.step(compile_input) _print_decision_lines(decision, out_stream, leading_blank=False) @@ -147,6 +166,10 @@ def main() -> int: # pragma: no cover print(__version__, file=sys.stdout) return 0 + if args == ["--with-precompiler"]: + run_repl(sys.stdin, sys.stdout, use_precompiler=True) + return 0 + bad_arg = args[0] print(f"error: unknown option '{bad_arg}'", file=sys.stderr) print("Try 'context-compiler --help' for usage.", file=sys.stderr) diff --git a/tests/test_repl.py b/tests/test_repl.py index f640907..4df99c9 100644 --- a/tests/test_repl.py +++ b/tests/test_repl.py @@ -75,11 +75,12 @@ def test_main_help_flag_prints_usage_and_exits_zero( assert result == 0 assert captured.out == ( "Usage:\n" - " context-compiler [--help] [--version]\n" + " context-compiler [--help] [--version] [--with-precompiler]\n" "\n" "Options:\n" - " --help Show this help message and exit.\n" - " --version Show the installed context-compiler version and exit.\n" + " --help Show this help message and exit.\n" + " --version Show the installed context-compiler version and exit.\n" + " --with-precompiler Enable heuristic precompiler validation in the REPL loop.\n" ) assert captured.err == "" @@ -100,9 +101,12 @@ def test_main_version_flag_prints_installed_version_and_exits_zero( def test_main_without_args_runs_repl_as_before(monkeypatch: pytest.MonkeyPatch) -> None: called: dict[str, object] = {} - def _fake_run_repl(in_stream: TextIO, out_stream: TextIO) -> None: + def _fake_run_repl( + in_stream: TextIO, out_stream: TextIO, *, use_precompiler: bool = False + ) -> None: called["in_stream"] = in_stream called["out_stream"] = out_stream + called["use_precompiler"] = use_precompiler monkeypatch.setattr(repl_module, "run_repl", _fake_run_repl) monkeypatch.setattr(sys, "argv", ["context-compiler"]) @@ -112,6 +116,28 @@ def _fake_run_repl(in_stream: TextIO, out_stream: TextIO) -> None: assert result == 0 assert called["in_stream"] is sys.stdin assert called["out_stream"] is sys.stdout + assert called["use_precompiler"] is False + + +def test_main_with_precompiler_flag_runs_repl_with_flag(monkeypatch: pytest.MonkeyPatch) -> None: + called: dict[str, object] = {} + + def _fake_run_repl( + in_stream: TextIO, out_stream: TextIO, *, use_precompiler: bool = False + ) -> None: + called["in_stream"] = in_stream + called["out_stream"] = out_stream + called["use_precompiler"] = use_precompiler + + monkeypatch.setattr(repl_module, "run_repl", _fake_run_repl) + monkeypatch.setattr(sys, "argv", ["context-compiler", "--with-precompiler"]) + + result = repl_module.main() + + assert result == 0 + assert called["in_stream"] is sys.stdin + assert called["out_stream"] is sys.stdout + assert called["use_precompiler"] is True def test_main_unknown_flag_prints_error_hint_and_exits_nonzero( @@ -187,6 +213,74 @@ def test_repl_non_interactive_uses_human_readable_output() -> None: assert lines == ["passthrough"] +def test_repl_with_precompiler_parses_directive_before_engine_step() -> None: + out = StringIO() + run_repl( + StringIO('{"classification":"directive","output":"prohibit peanuts"}\nquit\n'), + out, + use_precompiler=True, + ) + + lines = out.getvalue().splitlines() + assert lines == ["updated", "premise: (none)", "policies:", "- prohibit peanuts"] + + +def test_repl_with_precompiler_near_miss_passes_through_and_clarifies() -> None: + out = StringIO() + run_repl(StringIO("set premise to concise replies\nquit\n"), out, use_precompiler=True) + + lines = out.getvalue().splitlines() + assert lines == ["confirm: Did you mean 'set premise concise replies'?"] + + +def test_repl_with_precompiler_non_directive_passthrough() -> None: + out = StringIO() + run_repl(StringIO("what is a simple curry recipe?\nquit\n"), out, use_precompiler=True) + + lines = out.getvalue().splitlines() + assert lines == ["passthrough"] + + +def test_repl_with_precompiler_bypasses_parsing_while_pending( + monkeypatch: pytest.MonkeyPatch, +) -> None: + seen: list[tuple[object, str | None]] = [] + + def _parse(raw_output: object, *, source_input: str | None = None) -> str | None: + seen.append((raw_output, source_input)) + if raw_output == "use podman instead of docker": + return "use podman instead of docker" + raise AssertionError("parse_precompiler_output should be bypassed while pending") + + monkeypatch.setattr(repl_module, "parse_precompiler_output", _parse) + + out = StringIO() + run_repl( + StringIO("use podman instead of docker\nyes\nquit\n"), + out, + use_precompiler=True, + ) + + assert seen == [("use podman instead of docker", "use podman instead of docker")] + lines = out.getvalue().splitlines() + assert _contains_subsequence(lines, ['confirm: Did you mean to use "podman" instead?']) + assert _contains_subsequence(lines, ["updated", "premise: (none)", "policies:", "- use podman"]) + + +def test_repl_without_precompiler_does_not_parse_inputs(monkeypatch: pytest.MonkeyPatch) -> None: + def _fail_parse(_raw: object, *, source_input: str | None = None) -> str | None: + del source_input + raise AssertionError("parse_precompiler_output should not be called") + + monkeypatch.setattr(repl_module, "parse_precompiler_output", _fail_parse) + + out = StringIO() + run_repl(StringIO('{"classification":"directive","output":"prohibit peanuts"}\nquit\n'), out) + + lines = out.getvalue().splitlines() + assert lines == ["passthrough"] + + def test_repl_interactive_rejects_multi_command_chunk() -> None: out = _TTYStringIO() run_repl( From f5056ee9de7be581c5049519f60833b13b492d24 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Sat, 25 Apr 2026 00:59:29 -0400 Subject: [PATCH 2/5] test: add repl cli argument and pipe smoke coverage --- tests/test_repl.py | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tests/test_repl.py b/tests/test_repl.py index 4df99c9..d07a79e 100644 --- a/tests/test_repl.py +++ b/tests/test_repl.py @@ -1,3 +1,4 @@ +import subprocess import sys from io import StringIO from typing import TextIO @@ -64,6 +65,16 @@ def _contains_subsequence(lines: list[str], expected: list[str]) -> bool: return any(lines[i : i + window] == expected for i in range(len(lines) - window + 1)) +def _run_repl_cli(*args: str, input_text: str = "") -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, "-m", "context_compiler.repl", *args], + input=input_text, + text=True, + capture_output=True, + check=False, + ) + + def test_main_help_flag_prints_usage_and_exits_zero( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] ) -> None: @@ -80,7 +91,8 @@ def test_main_help_flag_prints_usage_and_exits_zero( "Options:\n" " --help Show this help message and exit.\n" " --version Show the installed context-compiler version and exit.\n" - " --with-precompiler Enable heuristic precompiler validation in the REPL loop.\n" + " --with-precompiler Enable precompiler before each REPL turn " + "(heuristic + validation only)\n" ) assert captured.err == "" @@ -155,6 +167,24 @@ def test_main_unknown_flag_prints_error_hint_and_exits_nonzero( ) +@pytest.mark.parametrize( + "args, expected_bad_arg", + [ + (["--with-precompiler", "foo"], "--with-precompiler"), + (["--help", "--version"], "--help"), + (["--version", "--with-precompiler"], "--version"), + ], +) +def test_cli_rejects_non_single_flag_argument_forms(args: list[str], expected_bad_arg: str) -> None: + result = _run_repl_cli(*args) + + assert result.returncode != 0 + assert result.stdout == "" + assert result.stderr == ( + f"error: unknown option '{expected_bad_arg}'\nTry 'context-compiler --help' for usage.\n" + ) + + def test_repl_update_flow() -> None: lines = _run_non_interactive_lines("set premise concise\nquit\n") assert lines == ["updated", "premise: concise", "policies: (none)"] @@ -241,6 +271,15 @@ def test_repl_with_precompiler_non_directive_passthrough() -> None: assert lines == ["passthrough"] +def test_cli_with_precompiler_pipe_smoke_emits_clarify_without_update() -> None: + result = _run_repl_cli("--with-precompiler", input_text="set premise to concise replies\n") + + assert result.returncode == 0 + assert "Did you mean 'set premise concise replies'?" in result.stdout + assert "updated" not in result.stdout + assert result.stderr == "" + + def test_repl_with_precompiler_bypasses_parsing_while_pending( monkeypatch: pytest.MonkeyPatch, ) -> None: From 41bd2c5cb153de66d55d9d0d4012513a313ec207 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Sat, 25 Apr 2026 01:04:46 -0400 Subject: [PATCH 3/5] docs: update repl help text for precompiler flag --- src/context_compiler/repl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/context_compiler/repl.py b/src/context_compiler/repl.py index 79bdb71..d51991c 100644 --- a/src/context_compiler/repl.py +++ b/src/context_compiler/repl.py @@ -15,7 +15,7 @@ Options: --help Show this help message and exit. --version Show the installed context-compiler version and exit. - --with-precompiler Enable heuristic precompiler validation in the REPL loop. + --with-precompiler Enable precompiler before each REPL turn (heuristic + validation only) """ From 471b620d7a104e01dc009ef6095052a6f4eef4a9 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Sat, 25 Apr 2026 01:08:55 -0400 Subject: [PATCH 4/5] docs: update 0.6.9 user-facing precompiler docs --- README.md | 11 ++++++++++- docs/DescriptionAndMilestones.md | 15 +++++++++++++++ examples/integrations/litellm/README.md | 13 +++++++------ examples/integrations/openwebui/README.md | 14 +++++++------- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index b32eef8..e3bea6c 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,16 @@ The model performs reasoning and generation while the compiler manages premise a ```bash pip install context-compiler context-compiler +context-compiler --with-precompiler ``` +`context-compiler` launches the interactive REPL. + +`--with-precompiler` enables the experimental precompiler before each REPL turn +(heuristic + validation only). Near-miss inputs are not rewritten and are +passed through to the engine, which continues to return clarify behavior for +those forms. + Or in code: ```python from context_compiler import create_engine @@ -375,8 +383,9 @@ into canonical directives before compilation. It is designed to be conservative and must be used with validation: -- heuristic-first, with LLM fallback when needed +- reject-first; directive-adjacent unsafe forms abstain instead of rewriting - all outputs must be validated with `parse_precompiler_output(...)` +- no directive grammar expansion - raw outputs must not be passed directly to the compiler See [LLM preprocessor](docs/llm-preprocessor.md) and diff --git a/docs/DescriptionAndMilestones.md b/docs/DescriptionAndMilestones.md index ca24713..af029c2 100644 --- a/docs/DescriptionAndMilestones.md +++ b/docs/DescriptionAndMilestones.md @@ -63,6 +63,21 @@ The current authoritative state shape and directive semantics are defined in `Di After correcting or constraining the assistant once, the behavior remains consistent for the rest of the conversation. +### 0.6.9 — Precompiler Hardening + REPL Opt-In (implemented) + +**Goal** +Harden experimental precompiler behavior while preserving core engine semantics. + +**Deliverables:** + +- Reject-first precompiler classification behavior (`directive` / `no_directive` / `unknown`) +- Portable precompiler conformance fixtures for cross-language runners (TS-ready shape) +- REPL opt-in flag: `context-compiler --with-precompiler` +- No directive grammar expansion and no engine semantic changes + +**User-visible outcome:** +Safer precompiler behavior and explicit REPL opt-in without changing deterministic engine outcomes. + ### M3 — Cross-Session Recall **Goal** diff --git a/examples/integrations/litellm/README.md b/examples/integrations/litellm/README.md index afc2d64..a170ea2 100644 --- a/examples/integrations/litellm/README.md +++ b/examples/integrations/litellm/README.md @@ -129,19 +129,20 @@ These files are importable integration references for host applications. - Basic: passes raw user input to `engine.step(...)`. - With preprocessor: runs heuristic precompiler first. - If heuristic returns a directive, that directive is passed to `engine.step(...)`. - - If heuristic does not resolve to a directive (`no_directive`), LLM fallback prompt conversion runs. + - If heuristic does not produce a directive (`no_directive` or `unknown`), LLM fallback prompt conversion runs. - If fallback yields nothing usable or errors, behavior safely remains equivalent to basic. + - Behavior is reject-first and does not expand directive grammar. ## Example checks -- Near-miss canonicalization (`with_preprocessor.py`): - - `set premise to concise replies` -> precompiler can canonicalize to `set premise concise replies`. +- Near-miss passthrough (`with_preprocessor.py`): + - `set premise to concise replies` is not rewritten by the precompiler and is passed through unchanged. + - Engine returns clarify (`Did you mean 'set premise concise replies'?`). - Lifecycle enforcement (both): - `change premise to formal tone` with no premise -> clarify (`set premise ...` first). - Conflict semantics (both): - `use docker` then `prohibit docker` -> conflict clarify. - Replacement precondition (both): - `use podman instead of docker` without prior `use docker` -> replacement clarify. -- NL upgrade / abstain (`with_preprocessor.py`): - - `please use docker` may upgrade to `use docker`. - - `I usually use docker` should abstain (`no directive`). +- Directive-adjacent abstain (`with_preprocessor.py`): + - `change premise concise replies` is classified as `unknown`, not rewritten, and handled by engine clarify. diff --git a/examples/integrations/openwebui/README.md b/examples/integrations/openwebui/README.md index 80e2bfc..d330388 100644 --- a/examples/integrations/openwebui/README.md +++ b/examples/integrations/openwebui/README.md @@ -99,16 +99,16 @@ Validate clarify short-circuit, passthrough forwarding, update injection with on **Case 4** -- prompt(s): `clear state` → `set premise to concise replies` → `set premise formal tone` -- base model: accepts both as conversational style requests -- basic pipe: `Did you mean 'set premise concise replies'?` then conversational formal-tone rewrite -- preprocessor pipe: `Premise set: Concise replies.` then `Premise already exists...` -- why this is a real win: preprocessor canonicalizes near-miss form and preserves premise-slot semantics end-to-end. +- prompt(s): `clear state` → `set premise to concise replies` +- base model: accepts conversational style phrasing +- basic pipe: `Did you mean 'set premise concise replies'?` +- preprocessor pipe: same clarify (near-miss is not rewritten) +- why this is a real win: precompiler stays reject-first and preserves engine-owned clarify behavior. **Case 5** - prompt(s): `clear state` → `change premise concise replies` - base model: generic “please clarify changes” response - basic pipe: `Did you mean 'change premise to concise replies'?` -- preprocessor pipe: `No premise exists yet. Use 'set premise ...' first.` -- why this is a real win: preprocessor upgrades near-miss form and reaches the correct lifecycle clarify state instead of stopping at syntax clarify. +- preprocessor pipe: same clarify (near-miss is passed through unchanged) +- why this is a real win: near-miss inputs are not canonicalized, so directive semantics stay engine-owned. From d85568e113bb0ded89e1f31566e88f7a39f6d206 Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Sat, 25 Apr 2026 01:08:57 -0400 Subject: [PATCH 5/5] docs: tighten preprocessor contract wording --- experimental/preprocessor/README.md | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/experimental/preprocessor/README.md b/experimental/preprocessor/README.md index e701151..dad8a29 100644 --- a/experimental/preprocessor/README.md +++ b/experimental/preprocessor/README.md @@ -30,14 +30,31 @@ Public validator entry point: All preprocessor outputs (heuristic or LLM) must be validated with `parse_precompiler_output(...)` before being applied. +Classification contract: + +- `directive`: safe, validated canonical directive (`output` is a directive string) +- `no_directive`: confident ordinary content (`output` is `null`) +- `unknown`: unsafe to rewrite (`output` is `null`) + +`unknown` is reject/abstain behavior. Malformed, ambiguous, mixed-intent, +quoted/reported, unsupported, or unsafe outputs must not be rewritten. + +Only validated `directive` output may be used as rewritten compiler input. +`no_directive` and `unknown` must fall back to original user input. + `source_input` is optional at the API level for backward compatibility. -For integration behavior, it is required for LLM fallback validation calls: +For integration behavior, it is REQUIRED for LLM fallback validation calls: pass `source_input=` so source-aware reject rules can -block unsafe rewrites (for example, engine-owned premise near-miss -canonicalization). +block unsafe rewrites. + +Engine-owned near-misses are reject cases (for example `set premise to X`, +`change premise X`) and must remain `unknown` (not rewritten). Raw preprocessor/LLM outputs must not be passed directly to the compiler. +The precompiler does not expand directive grammar. It may emit only validated +canonical directives accepted by the compiler. + ## Safe usage pattern 1. Run `precompile_heuristic(message)`.