diff --git a/.data/snapshots/state_AgentA.json.meta b/.data/snapshots/state_AgentA.json.meta index e4bf186..dfdbfd7 100644 --- a/.data/snapshots/state_AgentA.json.meta +++ b/.data/snapshots/state_AgentA.json.meta @@ -1 +1 @@ -{"created_at": "1980-01-01T00:00:00Z", "schema_version": "v1"} +{"created_at": "2025-10-10T08:23:58Z", "schema_version": "v1"} diff --git a/.data/snapshots/state_Ambrose.json.meta b/.data/snapshots/state_Ambrose.json.meta index e4bf186..4c28666 100644 --- a/.data/snapshots/state_Ambrose.json.meta +++ b/.data/snapshots/state_Ambrose.json.meta @@ -1 +1 @@ -{"created_at": "1980-01-01T00:00:00Z", "schema_version": "v1"} +{"created_at": "2025-10-10T08:23:57Z", "schema_version": "v1"} diff --git a/.data/snapshots/state_agent.json.meta b/.data/snapshots/state_agent.json.meta index e4bf186..56f8530 100644 --- a/.data/snapshots/state_agent.json.meta +++ b/.data/snapshots/state_agent.json.meta @@ -1 +1 @@ -{"created_at": "1980-01-01T00:00:00Z", "schema_version": "v1"} +{"created_at": "2025-10-10T08:23:55Z", "schema_version": "v1"} diff --git a/.data/snapshots/state_smoke.json.meta b/.data/snapshots/state_smoke.json.meta index e4bf186..dfdbfd7 100644 --- a/.data/snapshots/state_smoke.json.meta +++ b/.data/snapshots/state_smoke.json.meta @@ -1 +1 @@ -{"created_at": "1980-01-01T00:00:00Z", "schema_version": "v1"} +{"created_at": "2025-10-10T08:23:58Z", "schema_version": "v1"} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aaef4af..f8af3d4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,6 +12,7 @@ env: PYTHONUTF8: "1" PYTHONHASHSEED: "0" LC_ALL: C.UTF-8 + LANG: C.UTF-8 SOURCE_DATE_EPOCH: "315532800" jobs: @@ -98,6 +99,8 @@ jobs: pip install -e .[dev,test] # Ensure PyYAML is present (not included in extras) pip install pyyaml + # Ensure deterministic completions output + python -m pip install "shtab==1.7.1" - name: Sanity imports continue-on-error: true @@ -157,6 +160,8 @@ jobs: CI: "true" PYTHONHASHSEED: "0" COLUMNS: "80" + LINES: "25" + LANG: "C.UTF-8" run: | set -euo pipefail pytest -q -m "not manual" @@ -199,6 +204,8 @@ jobs: pip install -e ".[test,dev]" # Fallback in case extras not defined on forks pip install pytest || true + # Ensure deterministic completions output + python -m pip install "shtab==1.7.1" - name: Run CLI help stability tests env: @@ -206,6 +213,8 @@ jobs: CI: "true" PYTHONHASHSEED: "0" COLUMNS: "80" + LINES: "25" + LANG: "C.UTF-8" run: | pytest -q tests/cli/test_help_stability.py diff --git a/.github/workflows/cli_smoke.yml b/.github/workflows/cli_smoke.yml index 91d24e1..b4bd44b 100644 --- a/.github/workflows/cli_smoke.yml +++ b/.github/workflows/cli_smoke.yml @@ -26,6 +26,7 @@ jobs: env: CI: "true" TZ: "UTC" + LANG: "C.UTF-8" PYTHONUTF8: "1" PYTHONHASHSEED: "0" LC_ALL: "C.UTF-8" @@ -33,6 +34,7 @@ jobs: CLEMATIS_NETWORK_BAN: "1" PYTHONDONTWRITEBYTECODE: "1" COLUMNS: "80" + LINES: "25" steps: - name: Checkout @@ -67,6 +69,8 @@ jobs: else python -m pip install pytest hypothesis fi + # Ensure deterministic completions output + python -m pip install "shtab==1.7.1" - name: Pre-check (version/help) shell: bash diff --git a/.gitignore b/.gitignore index dee4e95..6d77a76 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,8 @@ site/ # Caches & scratch .cache/ .ipynb_checkpoints/ +.lancedb/ +lancedb/ # Logs & artifacts (non-identity; keep tests' goldens intact) logs/ diff --git a/.logs/apply.jsonl b/.logs/apply.jsonl index bad3480..be144ed 100644 --- a/.logs/apply.jsonl +++ b/.logs/apply.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "applied": 0, "clamps": 0, "version_etag": "46", "snapshot": "./.data/snapshots/state_AgentA.json", "cache_invalidations": 0, "ms": 0.785} +{"turn": "demo-1", "agent": "AgentA", "applied": 0, "clamps": 0, "version_etag": "46", "snapshot": "./.data/snapshots/state_AgentA.json", "cache_invalidations": 0, "ms": 0.777} diff --git a/.logs/t1.jsonl b/.logs/t1.jsonl index f863190..103f387 100644 --- a/.logs/t1.jsonl +++ b/.logs/t1.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "pops": 5, "iters": 1, "propagations": 3, "radius_cap_hits": 0, "layer_cap_hits": 0, "node_budget_hits": 0, "max_delta": 1.0, "graphs_touched": 1, "cache_hits": 0, "cache_misses": 1, "cache_used": false, "cache_enabled": true, "ms": 0.312, "now": "2025-10-10T02:07:37.100667+00:00"} +{"turn": "demo-1", "agent": "AgentA", "pops": 5, "iters": 1, "propagations": 3, "radius_cap_hits": 0, "layer_cap_hits": 0, "node_budget_hits": 0, "max_delta": 1.0, "graphs_touched": 1, "cache_hits": 0, "cache_misses": 1, "cache_used": false, "cache_enabled": true, "ms": 0.278, "now": "2025-10-10T08:23:58.151083+00:00"} diff --git a/.logs/t2.jsonl b/.logs/t2.jsonl index 17bacbb..b156d97 100644 --- a/.logs/t2.jsonl +++ b/.logs/t2.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "tier_sequence": ["exact_semantic", "cluster_semantic", "archive"], "k_returned": 0, "k_used": 0, "k_residual": 0, "sim_stats": {"mean": 0.0, "max": 0.0}, "score_stats": {"mean": 0.0, "max": 0.0}, "owner_scope": "any", "caps": {"residual_cap": 32}, "cache_enabled": true, "cache_used": true, "cache_hits": 0, "cache_misses": 2, "backend": "inmemory", "backend_fallback": false, "hybrid_used": false, "cache_hit": false, "cache_size": 1, "ms": 0.185, "now": "2025-10-10T02:07:37.100667+00:00"} +{"turn": "demo-1", "agent": "AgentA", "tier_sequence": ["exact_semantic", "cluster_semantic", "archive"], "k_returned": 0, "k_used": 0, "k_residual": 0, "sim_stats": {"mean": 0.0, "max": 0.0}, "score_stats": {"mean": 0.0, "max": 0.0}, "owner_scope": "any", "caps": {"residual_cap": 32}, "cache_enabled": true, "cache_used": true, "cache_hits": 0, "cache_misses": 2, "backend": "inmemory", "backend_fallback": false, "hybrid_used": false, "cache_hit": false, "cache_size": 1, "ms": 0.145, "now": "2025-10-10T08:23:58.151083+00:00"} diff --git a/.logs/t3.jsonl b/.logs/t3.jsonl index d21695d..a40f842 100644 --- a/.logs/t3.jsonl +++ b/.logs/t3.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "backend": "rulebased", "ops_counts": {"Speak": 1, "RequestRetrieve": 1}, "requested_retrieve": true, "rag_used": true, "ms_plan": 0.042, "ms_rag": 0.066, "ms_speak": 0.034, "now": "2025-10-10T02:07:37.100667+00:00"} +{"turn": "demo-1", "agent": "AgentA", "backend": "rulebased", "ops_counts": {"Speak": 1, "RequestRetrieve": 1}, "requested_retrieve": true, "rag_used": true, "ms_plan": 0.032, "ms_rag": 0.055, "ms_speak": 0.036, "now": "2025-10-10T08:23:58.151083+00:00"} diff --git a/.logs/t3_dialogue.jsonl b/.logs/t3_dialogue.jsonl index bebd9dc..a17e739 100644 --- a/.logs/t3_dialogue.jsonl +++ b/.logs/t3_dialogue.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "tokens": 6, "truncated": false, "style_prefix_used": false, "snippet_count": 0, "ms": 0.034, "backend": "rulebased", "now": "2025-10-10T02:07:37.100667+00:00"} +{"turn": "demo-1", "agent": "AgentA", "tokens": 7, "truncated": false, "style_prefix_used": false, "snippet_count": 0, "ms": 0.036, "backend": "rulebased", "now": "2025-10-10T08:23:58.151083+00:00"} diff --git a/.logs/t3_plan.jsonl b/.logs/t3_plan.jsonl index 2000f4d..48e7b80 100644 --- a/.logs/t3_plan.jsonl +++ b/.logs/t3_plan.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "policy_backend": "rulebased", "backend": "rulebased", "ops_counts": {"Speak": 1, "RequestRetrieve": 1}, "requested_retrieve": true, "rag_used": true, "reflection": false, "ms_deliberate": 0.042, "ms_rag": 0.066, "now": "2025-10-10T02:07:37.100667+00:00"} +{"turn": "demo-1", "agent": "AgentA", "policy_backend": "rulebased", "backend": "rulebased", "ops_counts": {"Speak": 1, "RequestRetrieve": 1}, "requested_retrieve": true, "rag_used": true, "reflection": false, "ms_deliberate": 0.032, "ms_rag": 0.055, "now": "2025-10-10T08:23:58.151083+00:00"} diff --git a/.logs/t4.jsonl b/.logs/t4.jsonl index abe3592..cba737c 100644 --- a/.logs/t4.jsonl +++ b/.logs/t4.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "counts": {"input": 0, "after_cooldown": 0, "after_novelty": 0, "after_l2": 0, "approved": 0, "dropped_tail": 0}, "clamps": {"novelty_clamped": 0, "l2_scale": 1.0}, "cooldowns": {"blocked_ops": 0}, "caps": {"delta_norm_cap_l2": 1.5, "novelty_cap_per_node": 0.3, "churn_cap_edges": 64}, "approved": 0, "rejected": 0, "reasons": [], "ms": 0.009, "now": "2025-10-10T02:07:37.100667+00:00"} +{"turn": "demo-1", "agent": "AgentA", "counts": {"input": 0, "after_cooldown": 0, "after_novelty": 0, "after_l2": 0, "approved": 0, "dropped_tail": 0}, "clamps": {"novelty_clamped": 0, "l2_scale": 1.0}, "cooldowns": {"blocked_ops": 0}, "caps": {"delta_norm_cap_l2": 1.5, "novelty_cap_per_node": 0.3, "churn_cap_edges": 64}, "approved": 0, "rejected": 0, "reasons": [], "ms": 0.006, "now": "2025-10-10T08:23:58.151083+00:00"} diff --git a/.logs/turn.jsonl b/.logs/turn.jsonl index d71e545..da59683 100644 --- a/.logs/turn.jsonl +++ b/.logs/turn.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "durations_ms": {"t1": 0.312, "t2": 0.185, "t4": 0.009, "apply": 0.785, "total": 3.189}, "t1": {"pops": 5, "iters": 1, "graphs_touched": 1}, "t2": {"k_returned": 0, "k_used": 0, "cache_hit": false}, "t4": {"approved": 0, "rejected": 0}, "now": "2025-10-10T02:07:37.100667+00:00"} +{"turn": "demo-1", "agent": "AgentA", "durations_ms": {"t1": 0.278, "t2": 0.145, "t4": 0.006, "apply": 0.777, "total": 2.955}, "t1": {"pops": 5, "iters": 1, "graphs_touched": 1}, "t2": {"k_returned": 0, "k_used": 0, "cache_hit": false}, "t4": {"approved": 0, "rejected": 0}, "now": "2025-10-10T08:23:58.151083+00:00"} diff --git a/CHANGELOG.MD b/CHANGELOG.MD index bd79213..1039714 100644 --- a/CHANGELOG.MD +++ b/CHANGELOG.MD @@ -14,6 +14,12 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Fixed +- **T2 / LanceDB:** exact-semantic recency filters now honour the orchestrator-provided `hints["now"]` timestamp before falling back to wall-clock UTC, keeping Lance replay results aligned with the in-memory backend during deterministic replays. + +### Docs +- Documented the LanceDB recency behaviour and the console `--now-ms` requirement for identity runs (README, operator guide, `docs/m3/lance.md`). + ## [0.10.3] - 2025-10-09 ### M14 — Examples & fixtures (viewer/console) diff --git a/MANIFEST.in b/MANIFEST.in index 0b04fad..989b320 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -27,6 +27,9 @@ recursive-include docs *.md # Frontend viewer: ship prebuilt static assets in sdists recursive-include clematis/frontend/dist * +# Prompt templates for demos/LLM scaffolding +recursive-include configs/prompts *.txt + # Do not ship repo-level frontend sources (TS/Node dev tree) prune frontend diff --git a/README.md b/README.md index 644bfa4..09050a4 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,11 @@ Clematis is a deterministic, turn‑based scaffold for agential AI. It models agents with concept graphs and tiered reasoning (T1→T4), uses small LLMs where needed, and keeps runtime behavior reproducible (no hidden network calls in tests/CI). -> **Status:** **v0.10.0** (2025‑10‑08) — **M13 Hardening & Freeze (frozen)**. See **[docs/m13/overview.md](docs/m13/overview.md)**. **M12 skipped** for v3. **M11 complete** ✅ (HS1/GEL substrate). Defaults unchanged; all GEL paths are **gated and OFF by default**; identity path preserved. M10 remains complete; M9 deterministic parallelism remains flag‑gated and OFF by default. +> **Status:** **v0.10.3** (2025‑10‑09) — v3 remains frozen after **M13 Hardening & Freeze**; recent 0.10.x updates are docs/examples only. See **[docs/m13/overview.md](docs/m13/overview.md)** for the locked surface. **M12 skipped** for v3. **M11 complete** ✅ (HS1/GEL substrate). Defaults unchanged; all GEL paths are **gated and OFF by default**; identity path preserved. M10 remains complete; M9 deterministic parallelism remains flag‑gated and OFF by default. > > **License:** Apache‑2.0 — see [LICENSE](./LICENSE) & [NOTICE](./NOTICE). > **Support matrix:** Python **3.11–3.13**; Ubuntu, macOS, Windows. Cross‑OS identity and reproducible builds (SBOM/SLSA) enforced in CI. -> **Changelog:** see [CHANGELOG.MD](CHANGELOG.MD) for **v0.10.1**. +> **Changelog:** see [CHANGELOG.MD](CHANGELOG.MD) for **v0.10.3**. > > **M13 — Hardening & Freeze (v3):** See **[docs/m13/overview.md](docs/m13/overview.md)**. > **M14 — Viewer & Console (docs):** See **[docs/m14/frontend.md](docs/m14/frontend.md)**. @@ -82,6 +82,7 @@ TZ=UTC PYTHONHASHSEED=0 SOURCE_DATE_EPOCH=315532800 CLEMATIS_NETWORK_BAN=1 \ python -m clematis console -- step --now-ms 315532800000 --out /tmp/run.json python -m clematis console -- compare --a /tmp/run.json --b /tmp/run.json ``` +> ⚖️ Identity tip: Passing `--now-ms` (or exporting `SOURCE_DATE_EPOCH`) keeps T2’s `exact_recent_days` window aligned across the in-memory and LanceDB backends when replaying bundles or comparing logs. Local reproducibility + offline checks for the viewer: diff --git a/clematis/adapters/embeddings.py b/clematis/adapters/embeddings.py index a5e9ce9..354a1ba 100644 --- a/clematis/adapters/embeddings.py +++ b/clematis/adapters/embeddings.py @@ -1,8 +1,12 @@ from __future__ import annotations -from typing import List +from typing import List, Optional import numpy as np from numpy.typing import NDArray import hashlib +import os +import logging + +logger = logging.getLogger(__name__) class _DevDummyEmbeddingAdapter: @@ -48,7 +52,67 @@ def encode(self, texts: List[str]) -> List[NDArray[np.float32]]: return vecs -# Alias for clarity with planned BGE usage in T2 -BGEAdapter = DeterministicEmbeddingAdapter +# Alias for clarity with planned BGE usage in T2 (deterministic fallback) +class BGEAdapter: + """ + Wrapper that attempts to load the real BGE v1.5 encoder when available. + + By default we keep the deterministic adapter for reproducibility. Set the + environment variable ``CLEMATIS_USE_REAL_BGE=1`` (or pass ``use_real=True``) + to enable the SentenceTransformer-backed encoder. + """ + + _ENV_FLAG = "CLEMATIS_USE_REAL_BGE" + + def __init__( + self, + dim: int = 32, + normalize: bool = True, + *, + use_real: Optional[bool] = None, + model_name: str = "BAAI/bge-base-en-v1.5", + device: Optional[str] = None, + ) -> None: + flag = use_real + if flag is None: + flag = os.getenv(self._ENV_FLAG, "").strip().lower() in {"1", "true", "yes", "on"} + + self.normalize = bool(normalize) + self._stub = DeterministicEmbeddingAdapter(dim=dim, normalize=normalize) + self._model = None + self._use_real = bool(flag) + self.dim = self._stub.dim + + if self._use_real: + try: + from sentence_transformers import SentenceTransformer # type: ignore + + logger.info("Loading real BGE encoder '%s' (device=%s)", model_name, device or "auto") + self._model = SentenceTransformer(model_name, device=device) + try: + self.dim = int(self._model.get_sentence_embedding_dimension()) # type: ignore[attr-defined] + except Exception: + # Fallback: infer dim from a dummy encode + sample = self._model.encode(["probe"], convert_to_numpy=True) + self.dim = int(np.asarray(sample[0]).shape[-1]) + except Exception as exc: + logger.warning( + "Falling back to deterministic BGE adapter (failed to load '%s': %s)", + model_name, + exc, + ) + self._model = None + self._use_real = False + + def encode(self, texts: List[str]) -> List[NDArray[np.float32]]: + if self._model is not None: + vectors = self._model.encode( + texts, + convert_to_numpy=True, + normalize_embeddings=self.normalize, + ) + return [np.asarray(vec, dtype=np.float32) for vec in vectors] + return self._stub.encode(texts) + __all__ = ["DeterministicEmbeddingAdapter", "BGEAdapter"] diff --git a/clematis/adapters/ollama_transport.py b/clematis/adapters/ollama_transport.py new file mode 100644 index 0000000..02fff53 --- /dev/null +++ b/clematis/adapters/ollama_transport.py @@ -0,0 +1,26 @@ +# clematis/adapters/ollama_transport.py +from __future__ import annotations +import json +import urllib.request + +def generate_with_ollama(prompt: str, *, model: str, max_tokens: int, temperature: float, timeout_s: float) -> str: + body = { + "model": model, + "prompt": prompt, + "stream": False, + "options": { + "temperature": temperature, + "num_predict": max(0, int(max_tokens)), + }, + } + data = json.dumps(body).encode("utf-8") + req = urllib.request.Request( + "http://localhost:11434/api/generate", + data=data, + headers={"content-type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + payload = json.loads(resp.read().decode("utf-8")) + # /api/generate returns {"response": "...", ...} + return (payload.get("response") or "").strip() diff --git a/clematis/cli/chat.py b/clematis/cli/chat.py new file mode 100644 index 0000000..e40b486 --- /dev/null +++ b/clematis/cli/chat.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Optional + +# Reuse the implementation that lives in scripts/chat.py +try: # prefer packaged location + from clematis.scripts.chat import main as _chat_main +except Exception: # dev fallback when running from repo root + try: + from scripts.chat import main as _chat_main + except Exception as e: # helpful error if neither path is available + raise ModuleNotFoundError( + "Unable to import chat implementation. Expected 'clematis.scripts.chat' " + "when installed, or 'scripts.chat' in a source checkout. If you're in 'dist/', " + "run from the repo root or install the package." + ) from e + + +def register(subparsers) -> None: + """ + Register 'chat' as a pass-through subcommand so `python -m clematis chat` works. + """ + p = subparsers.add_parser( + "chat", + help="Interactive chat loop with optional LLM backend", + ) + + def _run(ns) -> int: + argv = list(getattr(ns, "args", [])) + if argv and argv[0] == "--": + argv = argv[1:] + return _chat_main(argv) + + p.set_defaults(func=_run) + + +def main(argv: Optional[list[str]] = None) -> int: + """Direct entrypoint allowing `python -m clematis.chat` style execution.""" + return _chat_main(argv or []) diff --git a/clematis/cli/export_logs_for_frontend.py b/clematis/cli/export_logs_for_frontend.py index 3d17013..c70f593 100644 --- a/clematis/cli/export_logs_for_frontend.py +++ b/clematis/cli/export_logs_for_frontend.py @@ -67,7 +67,7 @@ def _run(ns: argparse.Namespace) -> int: def register(subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: help_text = "Export logs + latest snapshot into a JSON bundle (delegates to scripts/)" epilog = ( - "Arguments after an optional '--' are forwarded verbatim to " + "Arguments after an optional '--' are forwarded verbatim to\n" "scripts/export_logs_for_frontend.py" ) p = subparsers.add_parser( diff --git a/clematis/cli/main.py b/clematis/cli/main.py index 5b942db..3df98f7 100644 --- a/clematis/cli/main.py +++ b/clematis/cli/main.py @@ -20,6 +20,7 @@ validate, export_logs_for_frontend, console, + chat, ) from ._config import discover_config_path, maybe_log_selected @@ -67,6 +68,7 @@ def build_parser() -> argparse.ArgumentParser: demo.register(subparsers) export_logs_for_frontend.register(subparsers) console.register(subparsers) + chat.register(subparsers) reorder_subparsers_alphabetically(parser) return parser diff --git a/clematis/cli/validate.py b/clematis/cli/validate.py index cfd04a0..5a7d2eb 100644 --- a/clematis/cli/validate.py +++ b/clematis/cli/validate.py @@ -10,6 +10,9 @@ from __future__ import annotations import argparse +import sys +import json +import subprocess from ._exit import OK, USER_ERR from ._io import eprint_once, set_verbosity @@ -50,10 +53,83 @@ def _run(ns: argparse.Namespace) -> int: return USER_ERR rest = opts.argv - if opts.wants_json and "--json" not in rest: - rest = ["--json", *rest] - - # Delegate via packaged shim (which falls back to repo-layout if needed) + if opts.wants_json: + # Run the delegate as a real subprocess so we capture FD-level stdout/stderr + if "--json" not in rest: + rest = ["--json", *rest] + cmd = [sys.executable, "-m", "clematis.scripts.validate", *rest] + pr = subprocess.run(cmd, capture_output=True, text=True) + out, err = pr.stdout, pr.stderr + + def extract_json_block(s: str) -> tuple[str | None, tuple[int, int] | None]: + i, j = s.find("{"), s.rfind("}") + if i != -1 and j != -1 and j >= i: + candidate = s[i : j + 1] + json.loads(candidate) # validate + return candidate, (i, j + 1) + return None, None + + json_text, span, src = None, None, None + # Prefer JSON on stdout; fall back to stderr + cand, sp = extract_json_block(out) + if cand is not None: + json_text, span, src = cand, sp, "stdout" + else: + cand, sp = extract_json_block(err) + if cand is not None: + json_text, span, src = cand, sp, "stderr" + + # Emit only JSON to stdout; prefer unwrapped config if present + if json_text is not None: + chosen = json_text + try: + parsed = json.loads(json_text) + + def pick_config(o): + if isinstance(o, dict): + # If this dict already looks like a config (has t1/t2/t3), return it + if any(k in o for k in ("t1", "t2", "t3")): + return o + # Otherwise, try common wrapper keys + for key in ("config", "effective_config", "cfg", "data", "result"): + v = o.get(key) + if isinstance(v, dict) and any(k in v for k in ("t1", "t2", "t3")): + return v + return o + + cfg_obj = pick_config(parsed) + chosen = json.dumps(cfg_obj, ensure_ascii=False) + except Exception: + pass + sys.stdout.write(chosen) + sys.stdout.flush() + + # Forward any non-JSON output to stderr, avoiding duplication of the JSON slice + def forward_noise(s: str, sp: tuple[int, int] | None) -> None: + if not s: + return + if sp is None: + sys.stderr.write(s) + return + a, b = sp + if a > 0: + sys.stderr.write(s[:a]) + if b < len(s): + sys.stderr.write(s[b:]) + + if src == "stdout": + forward_noise(out, span) + forward_noise(err, None) + elif src == "stderr": + forward_noise(out, None) + forward_noise(err, span) + else: + forward_noise(out, None) + forward_noise(err, None) + + return pr.returncode + + # Non-JSON path: delegate directly from clematis.scripts.validate import main as _main return _main(rest) diff --git a/clematis/engine/orchestrator/core.py b/clematis/engine/orchestrator/core.py index 59b98fd..a42627e 100644 --- a/clematis/engine/orchestrator/core.py +++ b/clematis/engine/orchestrator/core.py @@ -1,11 +1,12 @@ from __future__ import annotations -from typing import Any, Dict +from typing import Any, Dict, List import time from datetime import datetime, timezone import os as _os import sys as _sys import importlib from dataclasses import asdict, is_dataclass +import re def _iso_from_ms(ms: int) -> str: @@ -26,6 +27,7 @@ def _iso_from_ms(ms: int) -> str: speak, llm_speak, build_llm_prompt, + build_llm_adapter, emit_trace, reflect, ReflectionBundle, @@ -70,6 +72,56 @@ def _truthy(value: Any) -> bool: return bool(value) +_UTTER_SANITIZE_RULES: List[tuple[str, re.Pattern[str], str]] = [ + ( + "identity:qwen", + re.compile(r"\bi\s*(?:am|'m)\s+qwen\b", re.IGNORECASE), + "[FILTERED]", + ), + ( + "vendor:alibaba_cloud", + re.compile(r"large language model developed by alibaba cloud", re.IGNORECASE), + "[FILTERED]", + ), + ( + "memory:seeded_disclaimer", + re.compile(r"\bi\s+do\s+not\s+store\s+or\s+retain\s+seeded\s+memories\b", re.IGNORECASE), + "[FILTERED]", + ), + ( + "identity:repeat", + re.compile(r"(?:\bI am Clematis\b[\.,!?\s]*){3,}", re.IGNORECASE), + "I am Clematis.", + ), +] + + +def _sanitize_utterance(ctx: Any, agent_id: str, backend_used: str, utter: str) -> tuple[str, Dict[str, Any] | None]: + if not utter: + return utter, None + sanitized = utter + triggered: List[str] = [] + for tag, pattern, repl in _UTTER_SANITIZE_RULES: + if pattern.search(sanitized): + sanitized = pattern.sub(repl, sanitized) + triggered.append(tag) + sanitized = sanitized.strip() + if sanitized == utter: + return sanitized, None + meta = { + "turn": getattr(ctx, "turn_id", "-"), + "agent": agent_id, + "backend": backend_used, + "patterns": triggered, + "original": utter, + "sanitized": sanitized, + } + now = getattr(ctx, "now", None) + if now: + meta["now"] = now + return sanitized, meta + + # --- T3 enablement helper (default ON unless explicitly denied) --- def _t3_is_enabled(cfg: Dict[str, Any]) -> bool: """Decide if T3 is enabled. @@ -773,6 +825,7 @@ def run_turn(self, ctx: TurnCtx, state: Dict[str, Any], input_text: str) -> Turn "k_retrieved": 0, "owner": None, "tier_pref": None, + "retrieved_ids": [], } if t3_enabled and not _dry_run: @@ -895,6 +948,15 @@ def _retrieve_fn(payload: Dict[str, Any]) -> Dict[str, Any]: t0_rag = time.perf_counter() plan, rag_metrics = rag_once(bundle, plan, _retrieve_fn, already_used=False) rag_ms = round((time.perf_counter() - t0_rag) * 1000.0, 3) + retrieved_ids = rag_metrics.get("retrieved_ids", []) + if retrieved_ids: + if isinstance(state, dict): + state["_chat_last_retrieved"] = list(retrieved_ids) + else: + try: + setattr(state, "_chat_last_retrieved", list(retrieved_ids)) + except Exception: + pass # Dialogue synthesis (rule-based vs optional LLM backend) t0 = time.perf_counter() @@ -961,17 +1023,62 @@ def _retrieve_fn(payload: Dict[str, Any]) -> Dict[str, Any]: speak_metrics = {} backend_used = "patched" else: - if backend_cfg == "llm" and adapter is not None: - utter, speak_metrics = llm_speak(dialog_bundle, plan, adapter) - backend_used = "llm" + adapter_error: Exception | None = None + if backend_cfg == "llm": + if adapter is None: + try: + adapter = build_llm_adapter(cfg) + if adapter is not None: + if isinstance(state, dict): + state["llm_adapter"] = adapter + else: + setattr(state, "llm_adapter", adapter) + try: + setattr(ctx, "llm_adapter", adapter) + except Exception: + pass + except Exception as e: + adapter_error = e + try: + logs = getattr(state, "logs", None) + if logs is None and isinstance(state, dict): + logs = state.get("logs") + if isinstance(logs, list): + prov = str((llm_cfg or {}).get("provider", "unknown")) + ci = str(_os.environ.get("CI", "")) + logs.append( + { + "llm_adapter_error": str(e), + "provider": prov, + "ci": ci, + } + ) + except Exception: + pass + if adapter is not None: + utter, speak_metrics = llm_speak(dialog_bundle, plan, adapter) + backend_used = "llm" + else: + utter, speak_metrics = speak(dialog_bundle, plan) + backend_used = "rulebased" + backend_fallback = "rulebased" + if fallback_reason is None: + fallback_reason = ( + f"adapter_error:{type(adapter_error).__name__}" + if adapter_error is not None + else "no_adapter" + ) else: utter, speak_metrics = speak(dialog_bundle, plan) backend_used = "rulebased" - if backend_cfg == "llm" and adapter is None: - backend_fallback, fallback_reason = "rulebased", "no_adapter" - elif backend_cfg not in ("rulebased", "llm"): + if backend_cfg not in ("rulebased", "llm"): backend_fallback, fallback_reason = "rulebased", "invalid_backend" + utter, filter_meta = _sanitize_utterance(ctx, agent_id, backend_used, utter) + if filter_meta: + speak_metrics["filtered"] = True + _append_jsonl("t3_filter.jsonl", filter_meta) + speak_ms = round((time.perf_counter() - t0) * 1000.0, 3) # Plan logging diff --git a/clematis/engine/stages/t1.py b/clematis/engine/stages/t1.py index 3295053..c968ef3 100644 --- a/clematis/engine/stages/t1.py +++ b/clematis/engine/stages/t1.py @@ -202,7 +202,17 @@ def t1_propagate(ctx, state, text: str) -> T1Result: def _t1_one_graph(gid: str): # Returns (deltas_for_gid, per_graph_metrics) g = store.get_graph(gid) - labels = [(n.id, n.label) for n in g.nodes.values()] + labels: List[Tuple[str, str]] = [] + for n in g.nodes.values(): + if getattr(n, "label", None): + labels.append((n.id, n.label)) + try: + tags = list(getattr(n, "attrs", {}).get("tags", [])) + except Exception: + tags = [] + for kw in tags: + if isinstance(kw, str) and kw: + labels.append((n.id, kw)) seeds = _match_keywords(text, labels) if not seeds: return [], { diff --git a/clematis/engine/stages/t2/state.py b/clematis/engine/stages/t2/state.py index 42568f4..92b6935 100644 --- a/clematis/engine/stages/t2/state.py +++ b/clematis/engine/stages/t2/state.py @@ -38,7 +38,11 @@ def _init_index_from_cfg(state: dict, cfg_t2: dict): from clematis.memory.lance_index import LanceIndex # type: ignore lcfg = cfg_t2.get("lancedb", {}) or {} - idx = LanceIndex(lcfg) + uri = str(lcfg.get("uri", "./.data/lancedb")) + table = str(lcfg.get("table", "episodes")) + meta_table = str(lcfg.get("meta_table", "meta")) + create_ok = bool(lcfg.get("create_ok", True)) + idx = LanceIndex(uri=uri, table=table, meta_table=meta_table, create_ok=create_ok) except Exception as e: # noqa: BLE001 — we intentionally swallow here and fall back idx = InMemoryIndex() fallback_reason = f"lancedb_unavailable: {type(e).__name__}" diff --git a/clematis/engine/stages/t3/__init__.py b/clematis/engine/stages/t3/__init__.py index ba95c6a..d346233 100644 --- a/clematis/engine/stages/t3/__init__.py +++ b/clematis/engine/stages/t3/__init__.py @@ -5,7 +5,7 @@ from .dialogue import build_llm_prompt, llm_speak, speak from .metrics import finalize as finalize_metrics finalize = finalize_metrics -from .policy import run_policy, select_policy +from .policy import run_policy, select_policy, build_llm_adapter from .trace import emit_trace from .reflect import ReflectionBundle, ReflectionResult, reflect, FixtureMissingError from .legacy import ( @@ -29,6 +29,7 @@ "make_planner_prompt", "plan_with_llm", "rag_once", + "build_llm_adapter", "run_policy", "select_policy", "speak", diff --git a/clematis/engine/stages/t3/bundle.py b/clematis/engine/stages/t3/bundle.py index a127d30..c733d2c 100644 --- a/clematis/engine/stages/t3/bundle.py +++ b/clematis/engine/stages/t3/bundle.py @@ -129,19 +129,42 @@ def extract_t2_retrieved(t2: Any, k_retrieval: int) -> List[Dict[str, Any]]: out: List[Dict[str, Any]] = [] retrieved = getattr(t2, "retrieved", []) for record in retrieved or []: + speaker = None if isinstance(record, dict): rid = str(record.get("id")) score = float(record.get("_score", record.get("score", 0.0)) or 0.0) owner = str(record.get("owner", "any")) quarter = str(record.get("quarter", "")) + text = record.get("text") + tags = record.get("tags") + speaker = record.get("speaker") else: rid = str(getattr(record, "id", "")) score = float(getattr(record, "score", 0.0) or 0.0) owner = str(getattr(record, "owner", "any")) quarter = str(getattr(record, "quarter", "")) + text = getattr(record, "text", None) + tags = getattr(record, "tags", None) + speaker = getattr(record, "speaker", None) if not rid: continue - out.append({"id": rid, "score": score, "owner": owner, "quarter": quarter}) + tags_list = list(tags) if isinstance(tags, list) else None + if speaker is None and tags_list: + for t in tags_list: + if isinstance(t, str) and t.startswith("speaker:"): + speaker = t.split(":", 1)[1] or None + break + out.append( + { + "id": rid, + "score": score, + "owner": owner, + "quarter": quarter, + **({"text": str(text)} if text else {}), + **({"tags": tags_list} if tags_list else {}), + **({"speaker": str(speaker)} if speaker else {}), + } + ) out.sort(key=lambda e: (-float(e.get("score", 0.0)), str(e.get("id", "")))) return out[: max(int(k_retrieval), 0)] diff --git a/clematis/engine/stages/t3/dialogue.py b/clematis/engine/stages/t3/dialogue.py index 76c4953..4f668e4 100644 --- a/clematis/engine/stages/t3/dialogue.py +++ b/clematis/engine/stages/t3/dialogue.py @@ -3,6 +3,10 @@ from ...types import Plan, SpeakOp +_DEFAULT_IDENTITY = ( + "You are roleplaying as Clematis. Maintain this identity, only speak as it, and never claim to be Qwen. Clematis is a distant person." +) + def _dedupe_sort_list(xs: List[str]) -> List[str]: return sorted({str(x) for x in (xs or [])}) @@ -19,6 +23,27 @@ def _top_snippet_ids(dialog_bundle: Dict[str, Any]) -> List[str]: return ids[: max(n, 0)] +def _top_snippets(dialog_bundle: Dict[str, Any]) -> List[Dict[str, Any]]: + n = int(dialog_bundle.get("dialogue", {}).get("include_top_k_snippets", 2) or 2) + hits = [] + for entry in dialog_bundle.get("retrieved", []) or []: + if not isinstance(entry, dict): + continue + rid = entry.get("id") + if not rid: + continue + hits.append( + { + "id": str(rid), + "owner": str(entry.get("owner", "any")), + "score": float(entry.get("score", 0.0) or 0.0), + "text": str(entry.get("text", "")), + "speaker": str(entry.get("speaker", "")) if entry.get("speaker") else None, + } + ) + return hits[: max(n, 0)] + + def _tokenize(s: str) -> List[str]: return (s or "").split() @@ -52,15 +77,22 @@ def speak(dialog_bundle: Dict[str, Any], plan: Plan) -> Tuple[str, Dict[str, Any template = str( dialog_bundle.get("dialogue", {}).get("template", "summary: {labels}. next: {intent}") ) + identity = str(dialog_bundle.get("dialogue", {}).get("identity", _DEFAULT_IDENTITY)) snippet_ids = _top_snippet_ids(dialog_bundle) + snippets_rich = _top_snippets(dialog_bundle) snippets_str = ", ".join(snippet_ids) + snippets_text = "; ".join( + [f"{s['id']}: {s['text']}" for s in snippets_rich if s.get("text")] + ) fmt_vars = { "labels": _format_labels(labels_sorted), "intent": intent, "snippets": snippets_str, + "snippets_text": snippets_text, "style_prefix": style_prefix, + "identity": identity, } try: @@ -109,16 +141,46 @@ def build_llm_prompt(dialog_bundle: Dict[str, Any], plan: Plan) -> str: style_prefix = str(dialog_bundle.get("agent", {}).get("style_prefix", "")) input_text = str(dialog_bundle.get("text", {}).get("input", "")) snippet_ids = _top_snippet_ids(dialog_bundle) - + identity = str(dialog_bundle.get("dialogue", {}).get("identity", _DEFAULT_IDENTITY)) + safety = ( + "SYSTEM: You are speaking as Clematis. Stay in the given identity, cite retrieved memories when relevant, " + "and never claim to be Qwen or reference Alibaba Cloud." + ) + guardrails = ( + "POLICY: Answer the user's question first, reference known user facts (such as names) accurately, " + "mention your own name at most once, keep replies concise (<=3 sentences), remain respectful, and use retrieval snippets only when they help. " + "If uncertain, acknowledge limits or ask for clarification." + ) lines = [ - f"now: {dialog_bundle.get('now', '')}", - f"style_prefix: {style_prefix}", - f"intent: {intent}", - f"labels: {', '.join(labels)}", - f"snippets: {', '.join(snippet_ids)}", - "instruction: Compose a concise utterance that reflects the intent and labels. Do not exceed the token budget.", - f"input: {input_text}", + safety, + guardrails, + f"IDENTITY: {identity}", + f"NOW: {dialog_bundle.get('now', '')}", + f"STYLE_PREFIX: {style_prefix}", + f"INTENT: {intent}", + f"LABELS: {', '.join(labels)}", + f"SNIPPET_IDS: {', '.join(snippet_ids)}", ] + history = dialog_bundle.get("dialogue", {}).get("history", []) + snippet_records = _top_snippets(dialog_bundle) + if history: + lines.append("HISTORY:") + for entry in history: + role = str(entry.get("role", "")) + text = str(entry.get("text", "")) + lines.append(f"{role}: {text}") + if snippet_records: + lines.append("SNIPPETS:") + for idx, rec in enumerate(snippet_records, start=1): + snippet_text = rec.get("text") or "(no text)" + owner = rec.get("owner", "any") + speaker = rec.get("speaker") or owner + score = rec.get("score", 0.0) + lines.append( + f"{idx}. [{rec['id']}] speaker={speaker} owner={owner} score={score:.3f}: {snippet_text}" + ) + lines.append("INSTRUCTION: Compose a short, concise utterance that reflects the intent, identity, labels, and relevant history without exceeding the token budget.") + lines.append(f"INPUT: {input_text}") return "\n".join(lines).strip() @@ -154,7 +216,7 @@ def llm_speak(dialog_bundle: Dict[str, Any], plan: Plan, adapter: Any) -> Tuple[ if truncated_llm is None and isinstance(result, dict): truncated_llm = bool(result.get("truncated", False)) except Exception: - text, tokens, truncated_llm = "[llm:error]", 0, True + text, tokens, truncated_llm = "Brainfart, sorry, please repeat.", 0, True if style_prefix and not (text or "").startswith(f"{style_prefix}|"): text = f"{style_prefix}| {text}".strip() diff --git a/clematis/engine/stages/t3/legacy.py b/clematis/engine/stages/t3/legacy.py index d6de5a4..69fdff0 100644 --- a/clematis/engine/stages/t3/legacy.py +++ b/clematis/engine/stages/t3/legacy.py @@ -2,6 +2,8 @@ from typing import Dict, Any, List, Tuple, Callable, Optional import os +import json +from pathlib import Path from ...policy.json_schemas import PLANNER_V1 from ...policy.sanitize import parse_and_validate @@ -83,6 +85,53 @@ def make_plan_bundle(ctx, state, t1, t2) -> Dict[str, Any]: # --- PR7: Dialogue bundle (deterministic, pure) DIALOG_BUNDLE_VERSION = "t3-dialog-bundle-v1" +_DEFAULT_TEMPLATE = "{style_prefix}| summary: {labels}. next: {intent}" +_DEFAULT_IDENTITY = ( + "You are Clematis, the knowledge gardener. Maintain this persona, remember user-provided facts accurately, and never claim to be Qwen." +) +_HISTORY_WINDOW = 6 + + +def _load_template_from_file(path_str: str) -> Optional[str]: + try: + path = Path(path_str) + if not path.exists(): + return None + suffix = path.suffix.lower() + if suffix in {".json", ".jsonl"}: + with path.open("r", encoding="utf-8") as fh: + if suffix == ".jsonl": + for line in fh: + line = line.strip() + if not line: + continue + rec = json.loads(line) + break + else: + return None + else: + rec = json.load(fh) + template = rec.get("template") + if not template: + return None + return str(template) + return path.read_text(encoding="utf-8").strip() + except Exception: + return None + + +def _resolve_dialogue_template(dialogue_cfg: Dict[str, Any]) -> Tuple[str, Optional[str]]: + template = str(dialogue_cfg.get("template", _DEFAULT_TEMPLATE)) + template_file_cfg = dialogue_cfg.get("template_file") + template_file_path: Optional[str] = None + if template_file_cfg: + template_file_path = str(template_file_cfg) + loaded = _load_template_from_file(template_file_path) + if loaded: + template = loaded + else: + template_file_path = None + return template, template_file_path def make_dialog_bundle(ctx, state, t1, t2, plan=None) -> Dict[str, Any]: @@ -99,8 +148,14 @@ def make_dialog_bundle(ctx, state, t1, t2, plan=None) -> Dict[str, Any]: dialogue_cfg = {} if isinstance(cfg_t3, dict): dialogue_cfg = cfg_t3.get("dialogue", {}) or {} - template = str(dialogue_cfg.get("template", "summary: {labels}. next: {intent}")) + template, template_file = _resolve_dialogue_template(dialogue_cfg if isinstance(dialogue_cfg, dict) else {}) include_top_k = int(dialogue_cfg.get("include_top_k_snippets", 2) or 2) + identity = str(dialogue_cfg.get("identity", _DEFAULT_IDENTITY)) + if isinstance(state, dict): + history_raw = list(state.get("_chat_history", []) or []) + else: + history_raw = list(getattr(state, "_chat_history", []) or []) + history = history_raw[-_HISTORY_WINDOW:] # Retrieved already sorted/capped by make_plan_bundle retrieved = list(base.get("t2", {}).get("retrieved", []) or []) @@ -111,7 +166,13 @@ def make_dialog_bundle(ctx, state, t1, t2, plan=None) -> Dict[str, Any]: "agent": base["agent"], "text": base["text"], "retrieved": retrieved, - "dialogue": {"template": template, "include_top_k_snippets": include_top_k}, + "dialogue": { + "template": template, + "include_top_k_snippets": include_top_k, + "template_file": template_file, + "identity": identity, + "history": history, + }, } @@ -239,6 +300,7 @@ def rag_once( "k_retrieved": 0, "owner": rr.get("owner") if rr else None, "tier_pref": rr.get("tier_pref") if rr else None, + "retrieved_ids": [], } if rr is None: @@ -250,6 +312,7 @@ def rag_once( "k_retrieved": 0, "owner": None, "tier_pref": None, + "retrieved_ids": [], } payload = _normalize_rr_payload(bundle, rr) @@ -322,6 +385,7 @@ def rag_once( "k_retrieved": int(k_retrieved), "owner": payload.get("owner"), "tier_pref": payload.get("tier_pref"), + "retrieved_ids": [h.get("id") for h in hits], } return refined, metrics diff --git a/clematis/engine/stages/t3/policy.py b/clematis/engine/stages/t3/policy.py index 3ee6869..6264c9c 100644 --- a/clematis/engine/stages/t3/policy.py +++ b/clematis/engine/stages/t3/policy.py @@ -29,7 +29,9 @@ class LLMAdapterError(Exception): # type: ignore _DEFAULT_TAU_HIGH = 0.8 _DEFAULT_TAU_LOW = 0.4 _DEFAULT_EPS_EDIT = 0.10 - +_DEFAULT_IDENTITY = ( + "You are roleplaying as Clematis. Maintain this identity, only speak as it, and never claim to be Qwen. Clematis is a distant person." +) def select_policy(cfg_root: Dict[str, Any], ctx: Any) -> PolicyHandle: t3_cfg = (cfg_root.get("t3") if isinstance(cfg_root, dict) else None) or {} @@ -139,13 +141,27 @@ def deliberate(bundle: Dict[str, Any]) -> Plan: def make_planner_prompt(ctx) -> str: + def _dialogue_cfg() -> Dict[str, Any]: + cfg = getattr(ctx, "cfg", {}) + try: + t3 = cfg.get("t3", {}) if isinstance(cfg, dict) else getattr(cfg, "t3", {}) + except Exception: + t3 = {} + try: + return t3.get("dialogue", {}) if isinstance(t3, dict) else getattr(t3, "dialogue", {}) + except Exception: + return {} + + dialogue_cfg = _dialogue_cfg() + identity = str(dialogue_cfg.get("identity", _DEFAULT_IDENTITY)) summary = { "turn": getattr(ctx, "turn_id", 0), "agent": getattr(ctx, "agent_id", "agent"), } return ( - "SYSTEM: Return ONLY valid JSON with keys {plan: list[str], rationale: str, reflection: boolean}. " - "No prose. No markdown. No trailing commas.\n" + "SYSTEM: You are Clematis, the knowledge gardener. Stay in character, never claim to be Qwen or mention Alibaba Cloud. " + "Return ONLY valid JSON with keys {plan: list[str], rationale: str, reflection: boolean}. No prose. No markdown. No trailing commas.\n" + f"IDENTITY: {identity}\n" f"STATE: {_json.dumps(summary, separators=(',', ':'))}\n" "USER: Propose up to 4 next steps as short strings; include a brief rationale and set reflection to true only if a reflection pass is recommended." ) @@ -156,10 +172,23 @@ def _get_llm_adapter_from_cfg(cfg: Dict[str, Any]): t3 = cfg.get("t3", {}) if isinstance(cfg, dict) else {} except Exception: t3 = {} - if str(t3.get("backend", "rulebased")) != "llm": + if str(t3.get("backend", "rulebased")).lower() != "llm": return None llm = t3.get("llm", {}) if isinstance(t3, dict) else {} - provider = str(llm.get("provider", "fixture")) + provider_raw = llm.get("provider") + provider = str(provider_raw if provider_raw is not None else "fixture").lower() + if provider_raw is None and "mode" in llm: + mode = str(llm.get("mode", "")).lower() + if mode in ("live", "ollama"): + provider = "ollama" + elif mode in ("mock", "replay", "fixture"): + provider = "fixture" + elif mode in ("rulebased", "off"): + provider = "rulebased" + elif mode: + provider = mode + if provider in ("rulebased", "off", ""): + return None try: ci = str(os.environ.get("CI", "")).lower() == "true" except Exception: @@ -192,6 +221,7 @@ def _ollama_call( ) -> str: import json as _json_local import urllib.request as _ur + import urllib.error as _ue body = _json_local.dumps( { @@ -199,12 +229,14 @@ def _ollama_call( "prompt": prompt, "options": {"temperature": float(temperature), "num_predict": int(max_tokens)}, "stream": False, - "format": "json", } ).encode("utf-8") req = _ur.Request(endpoint, data=body, headers={"Content-Type": "application/json"}) - with _ur.urlopen(req, timeout=timeout_s) as resp: - payload = _json_local.loads(resp.read().decode("utf-8")) + try: + with _ur.urlopen(req, timeout=timeout_s) as resp: + payload = _json_local.loads(resp.read().decode("utf-8")) + except _ue.URLError as e: + raise LLMAdapterError(f"Ollama request failed: {e}") from e txt = payload.get("response") if not isinstance(txt, str): raise LLMAdapterError("Ollama returned no text response") @@ -216,6 +248,14 @@ def _ollama_call( return None +def build_llm_adapter(cfg: Dict[str, Any]): + """ + Public façade for constructing the LLM adapter based on configuration. + Returns an adapter instance or None when the backend is not active. + """ + return _get_llm_adapter_from_cfg(cfg) + + def plan_with_llm(ctx, state: Any, cfg: Dict[str, Any]) -> Dict[str, Any]: try: adapter = _get_llm_adapter_from_cfg(cfg) @@ -310,6 +350,7 @@ def run_policy( __all__ = [ + "build_llm_adapter", "deliberate", "make_planner_prompt", "plan_with_llm", diff --git a/clematis/memory/lance_index.py b/clematis/memory/lance_index.py index 3305239..cfe01c6 100644 --- a/clematis/memory/lance_index.py +++ b/clematis/memory/lance_index.py @@ -261,7 +261,16 @@ def search_tiered( if tier == "exact_semantic": recent_days = hints.get("recent_days") if isinstance(recent_days, (int, float)) and recent_days > 0: - cutoff = datetime.now(timezone.utc) - timedelta(days=float(recent_days)) + now_hint = hints.get("now") + base_now: Optional[datetime] = None + if isinstance(now_hint, str) and now_hint.strip(): + try: + base_now = _parse_iso8601(now_hint.strip()) + except Exception: + base_now = None + if base_now is None: + base_now = datetime.now(timezone.utc) + cutoff = base_now - timedelta(days=float(recent_days)) eps = [e for e in eps if e["_dt"] >= cutoff] elif tier == "archive": qset = hints.get("archive_quarters") diff --git a/clematis/scripts/chat.py b/clematis/scripts/chat.py new file mode 100644 index 0000000..f69409d --- /dev/null +++ b/clematis/scripts/chat.py @@ -0,0 +1,31 @@ +"""Thin adapter for the chat CLI. + +This lives inside the packaged namespace so that `python -m clematis --help` +can import the chat wrapper without requiring the full repo checkout. The heavy +implementation still resides under `scripts/chat.py`; we load it lazily when +the command is actually invoked so the module import remains fast and keeps +packaged builds lightweight. +""" + +from __future__ import annotations + +from typing import Optional, Sequence + + +def _resolve_impl(): + try: + from scripts.chat import main as _impl # type: ignore[import] + except ModuleNotFoundError as exc: + raise ModuleNotFoundError( + "Chat CLI implementation not bundled; run from a source checkout where " + "`scripts/chat.py` is available." + ) from exc + return _impl + + +def main(argv: Optional[Sequence[str]] = None) -> int: + impl = _resolve_impl() + return impl(list(argv) if argv is not None else None) + + +__all__ = ["main"] diff --git a/clematis/scripts/export_logs_for_frontend.py b/clematis/scripts/export_logs_for_frontend.py index 6bc971d..4fe97e1 100644 --- a/clematis/scripts/export_logs_for_frontend.py +++ b/clematis/scripts/export_logs_for_frontend.py @@ -42,9 +42,16 @@ STAGE_FILES = [ - "t1.jsonl", "t2.jsonl", - "t3.jsonl", "t3_plan.jsonl", "t3_dialogue.jsonl", "t3_reflection.jsonl", - "t4.jsonl", "apply.jsonl", "turn.jsonl", + "t1.jsonl", + "t2.jsonl", + "t3.jsonl", + "t3_plan.jsonl", + "t3_dialogue.jsonl", + "t3_reflection.jsonl", + "t3_filter.jsonl", + "t4.jsonl", + "apply.jsonl", + "turn.jsonl", ] diff --git a/configs/config.yaml b/configs/config.yaml index efb2e6c..802d7e6 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -15,7 +15,7 @@ t1: max_entries: 512 ttl_s: 300 t2: - backend: inmemory + backend: lancedb k_retrieval: 64 sim_threshold: 0.3 tiers: [exact_semantic, cluster_semantic, archive] @@ -46,12 +46,12 @@ t3: apply_ops: false max_rag_loops: 1 tokens: 256 - temp: 0.7 + temp: 0.2 max_ops_per_turn: 3 allow_reflection: true - backend: rulebased + backend: llm reflection: - backend: rulebased # deterministic summariser; "llm" requires fixtures (see PR84) + backend: llm # deterministic summariser; "llm" requires fixtures (see PR84) summary_tokens: 128 # whitespace-tokenised cap embed: true # embed the reflection summary deterministically log: true # write t3_reflection.jsonl (not part of identity logs) @@ -65,15 +65,15 @@ t3: epsilon_edit: 0.10 llm: # M3-07 LLM adapter scaffolding — defaults OFF and fixture-driven in CI - provider: fixture # "fixture" | "ollama" - model: qwen3:4b-instruct-q4_K_M + provider: ollama # "fixture" | "ollama" + model: qwen3:4b-instruct endpoint: http://localhost:11434/api/generate max_tokens: 256 temp: 0.2 timeout_ms: 10000 fixtures: - enabled: false # fixtures-only mode for deterministic LLM reflection - path: null # set to fixture file path when enabled + enabled: true # fixtures-only mode for deterministic LLM reflection + path: tests/fixtures/llm_cassettes/reflection.jsonl t4: enabled: true delta_norm_cap_l2: 1.5 diff --git a/configs/prompts/clematis_dialogue_template.txt b/configs/prompts/clematis_dialogue_template.txt new file mode 100644 index 0000000..8bdda19 --- /dev/null +++ b/configs/prompts/clematis_dialogue_template.txt @@ -0,0 +1 @@ +{style_prefix}| persona: {identity}. guidelines: answer the user's question or request first; reference known user facts accurately; mention your own name at most once; keep replies under three sentences; weave in relevant memories when helpful: {snippets_text}. intent: {intent}. summary: {labels}. diff --git a/dist.1/clematis-0.10.2-py3-none-any.whl b/dist.1/clematis-0.10.2-py3-none-any.whl new file mode 100644 index 0000000..a2fd7e0 Binary files /dev/null and b/dist.1/clematis-0.10.2-py3-none-any.whl differ diff --git a/dist.1/clematis-0.10.2.tar.gz b/dist.1/clematis-0.10.2.tar.gz new file mode 100644 index 0000000..03d89bf Binary files /dev/null and b/dist.1/clematis-0.10.2.tar.gz differ diff --git a/dist.2/clematis-0.10.2-py3-none-any.whl b/dist.2/clematis-0.10.2-py3-none-any.whl new file mode 100644 index 0000000..a2fd7e0 Binary files /dev/null and b/dist.2/clematis-0.10.2-py3-none-any.whl differ diff --git a/dist.2/clematis-0.10.2.tar.gz b/dist.2/clematis-0.10.2.tar.gz new file mode 100644 index 0000000..f6cb4a9 Binary files /dev/null and b/dist.2/clematis-0.10.2.tar.gz differ diff --git a/docs/m3/lance.md b/docs/m3/lance.md index 3685b72..2fdc54d 100644 --- a/docs/m3/lance.md +++ b/docs/m3/lance.md @@ -1,12 +1,14 @@ ## Appendix — LanceDB optional integration (T2 reader) -**Status:** Shipped (optional, defaults OFF). The in‑memory reader remains the default; LanceDB is an opt‑in backend for T2 retrieval. +**Status:** Shipped (optional, defaults OFF). The in-memory reader remains the default; LanceDB is an opt-in backend for T2 retrieval. ### Why -Use LanceDB to persist and query the memory index with vector search while keeping CI deterministic (CI still uses the in‑memory path). + +Use LanceDB to persist and query the memory index with vector search while keeping CI deterministic (CI still uses the in-memory path). ### Enable locally -Minimal config sketch (adjust keys/paths to your environment; validation will enforce exact names and bounds): + +Minimal config sketch (adjust keys/paths to your environment; validation enforces exact names and bounds): ```yaml t2: @@ -19,19 +21,40 @@ lance: partitions: owner_quarter: true # optional partitioning (validated in tests) precompute_norms: true # required when using fp16 indexes -> Tip: With `reader: auto`, the system will try Lance first and **fall back** to the in‑memory reader when Lance is unavailable or misconfigured (emits a single warning). +``` + +> Tip: With `reader: "auto"`, the system tries Lance first and **falls back** to the in-memory reader when Lance is unavailable or misconfigured (emits a single warning; behavior remains deterministic). + +### Deterministic recency filters + +- The exact-semantic tier applies a rolling `recent_days` cutoff (default 30 days). When LanceDB is in use, the index now honors the orchestrator-supplied `hints["now"]` timestamp before falling back to `datetime.now()`. +- Identity paths already pass `now` (e.g., the console sets `--now-ms`); keep doing so in bespoke scripts to avoid drift when replaying logs or comparing bundles. +- When `hints["now"]` is absent, LanceDB and the in-memory index both fall back to wall-clock UTC, which is acceptable for ad-hoc runs but not identity comparisons. + ### Quick verification + Run the existing reader and Lance optional tests: + +```bash # Reader parity / integration pytest -q tests/integration/test_t2_reader_parity.py -# Lance optional index behaviors +# Lance optional index behaviors (requires lancedb extras) pytest -q tests/test_lance_index_optional.py +``` + ### Optional: seed a tiny demo table -If you add a helper script like `scripts/seed_lance_demo.py`, you can populate a tiny Lance table and verify parity end‑to‑end: + +If you add a helper script like `scripts/seed_lance_demo.py`, you can populate a tiny Lance table and verify parity end-to-end: + +```bash python scripts/seed_lance_demo.py # creates a local .lancedb with a small table pytest -q tests/integration/test_t2_reader_parity.py +``` + ### Failure modes & fallbacks -- Missing Lance URI/table → falls back to in‑memory; warns once. + +- Missing Lance URI/table → falls back to in-memory; warns once. - Invalid partition spec → config validator rejects with a clear message. -- FP16 without precomputed norms → test warns (performance) or validator flags depending on config. -CI remains unaffected: workflows continue to run **offline** and **in‑memory** to guarantee deterministic results. +- FP16 without precomputed norms → tests warn (performance) or the validator flags the config, depending on whether the fast path is requested. + +CI remains unaffected: workflows continue to run **offline** and **in-memory** to guarantee deterministic results. diff --git a/docs/operator-guide.md b/docs/operator-guide.md index 3f4243e..217d1ac 100644 --- a/docs/operator-guide.md +++ b/docs/operator-guide.md @@ -204,6 +204,7 @@ A minimal, deterministic console for operators to drive the orchestrator locally - Set the env in §2 (`TZ=UTC`, `PYTHONHASHSEED=0`, `SOURCE_DATE_EPOCH=315532800`, `CLEMATIS_NETWORK_BAN=1`). - The console warns when these do not match. - Logs are written to `CLEMATIS_LOG_DIR` if set; otherwise the console uses a temporary directory and cleans it up. +- T2’s exact-semantic recency window (controlled by `t2.exact_recent_days`) uses the orchestrator’s notion of “now”. Supplying `--now-ms` (or exporting `SOURCE_DATE_EPOCH`) keeps the in-memory and LanceDB backends aligned for log replays and golden comparisons. **Examples** ```bash diff --git a/man/clematis-chat.1 b/man/clematis-chat.1 new file mode 100644 index 0000000..617430f --- /dev/null +++ b/man/clematis-chat.1 @@ -0,0 +1,14 @@ +.TH clematis-chat 1 "2024-01-01" "Clematis 0.10.2" "User Commands" +.SH NAME +clematis\-chat \\\- Delegates to scripts/ for 'chat' +.SH SYNOPSIS +usage: clematis chat chat [\-h] +.SH DESCRIPTION +Delegates to scripts/ for 'chat' +.SH OPTIONS +.nf +usage: clematis chat chat [\-h] + +options: + \-h, \-\-help show this help message and exit +.fi diff --git a/man/clematis.1 b/man/clematis.1 index 4ccc062..b95067b 100644 --- a/man/clematis.1 +++ b/man/clematis.1 @@ -18,6 +18,7 @@ options: subcommands: bench\-t4 Delegates to scripts + chat Interactive chat loop with optional LLM backend console Deterministic local console (step demo Delegates to scripts/ export\-logs Export logs + latest snapshot into a JSON bundle diff --git a/pyproject.toml b/pyproject.toml index 56f23d7..e14aa1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,9 @@ cli-demo = [ 'numpy>=2.1; python_version >= "3.13"', "pyyaml>=6.0" ] +embeddings = [ + "sentence-transformers>=2.6,<3", +] frontend = [ "nodeenv>=1.8.0" ] diff --git a/scripts/bench_t4.py b/scripts/bench_t4.py index 8920464..ff941de 100644 --- a/scripts/bench_t4.py +++ b/scripts/bench_t4.py @@ -10,7 +10,7 @@ try: # pragma: no cover — allow shim fallback when package missing from clematis.scripts.bench_t4 import main as _impl_main _IMPORT_ERROR = None -except ModuleNotFoundError as exc: # pragma: no cover +except ModuleNotFoundError as exc: # pragma: no coverm yeag _impl_main = None # type: ignore[assignment] _IMPORT_ERROR = exc diff --git a/scripts/chat.py b/scripts/chat.py new file mode 100644 index 0000000..be459b0 --- /dev/null +++ b/scripts/chat.py @@ -0,0 +1,859 @@ +#!/usr/bin/env python3 +"""Interactive Clematis chat loop.""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import sys +from datetime import datetime, timezone +from pathlib import Path +from types import SimpleNamespace +from typing import Any, Dict, Optional, List + +try: + import yaml +except ImportError: + yaml = None + +REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +if REPO_ROOT not in sys.path: + sys.path.insert(0, REPO_ROOT) + +from configs.validate import validate_config_api +from scripts.console import adapter_reset, warn_nondeterminism +from clematis.cli._config import discover_config_path +from clematis.engine.stages.t2.state import _init_index_from_cfg +from clematis.graph.store import InMemoryGraphStore +from clematis.engine.types import Node, Edge +from clematis.memory.index import InMemoryIndex + +DEFAULT_LOG_DIR = Path(".logs") / "chat" +DEFAULT_FIXTURE = Path("fixtures") / "llm" / "qwen_small.jsonl" +DEFAULT_TEMPLATE_PATH = (Path(REPO_ROOT) / "configs" / "prompts" / "clematis_dialogue_template.txt").resolve() +DEFAULT_IDENTITY = ( + "You are Clematis, the knowledge gardener. Maintain this persona, remember user-provided facts accurately, " + "respond concisely, and never claim to be Qwen or mention Alibaba Cloud." +) +DEFAULT_GRAPH_ID = "g:surface" +MAX_HISTORY = 12 +_STOPWORDS = { + "the", + "and", + "that", + "have", + "with", + "this", + "your", + "from", + "into", + "about", + "just", + "like", + "been", + "will", + "what", + "when", + "where", + "there", + "them", + "then", + "over", + "only", + "into", + "upon", + "onto", + "such", + "some", + "more", + "many", +} +DEFAULT_LANCEDB_URI = Path(os.getenv("CLEMATIS_LANCEDB_URI", ".data/lancedb_chat")) +DEFAULT_LANCEDB_TABLE = "episodes" +DEFAULT_LANCEDB_META = "meta" + +SEED_MEMORIES = [ + { + "id": "seed_coach", + "owner": "demo", + "text": "Clematis coached a user about maintaining a concept graph with strong labels.", + "importance": 0.75, + "tags": ["seed", "demo"], + }, + { + "id": "seed_story", + "owner": "bot", + "text": "An earlier conversation explored stories about botanical gardens and their symbolism.", + "importance": 0.6, + "tags": ["seed", "story"], + }, + { + "id": "seed_task", + "owner": "demo", + "text": "We evaluated retrieval quality by asking for summaries of recent tasks and reflections.", + "importance": 0.8, + "tags": ["seed", "task"], + }, + { + "id": "seed_graph", + "owner": "bot", + "text": "The assistant mapped related concepts like lattice, vine, and bloom inside the surface graph.", + "importance": 0.7, + "tags": ["seed", "graph"], + }, + { + "id": "seed_journal", + "owner": "demo", + "text": "Clematis guided a traveler through dreams and memory, awakening long-forgotten ideas.", + "importance": 0.65, + "tags": ["seed", "story", "forest", "memories"], + }, + { + "id": "seed_lattice", + "owner": "bot", + "text": "A lattice of thoughts once converged into a bloom called Ambrose—an echo of an older age.", + "importance": 0.7, + "tags": ["seed", "bot", "lattice", "ambrose"], + }, + { + "id": "seed_well", + "owner": "demo", + "text": "Beside a moonlit well, Clematis listened to Vecipher describe a manor hidden by ivy.", + "importance": 0.75, + "tags": ["seed", "manor", "vecipher", "moonlight"], + }, + { + "id": "seed_dell", + "owner": "bot", + "text": "The garden once became a moonlit dell; footsteps and laughter still linger there.", + "importance": 0.6, + "tags": ["seed", "garden", "dell", "moonlight"], + }, + { + "id": "seed_wine", + "owner": "demo", + "text": "Clematis never brewed wine but remembers the scent of cracked casks along the manor road.", + "importance": 0.7, + "tags": ["seed", "wine", "manor", "memories"], + }, + { + "id": "seed_solstice", + "owner": "bot", + "text": "During the solstice, the breeze carried petals and riddles across the canopy.", + "importance": 0.65, + "tags": ["seed", "solstice", "forest", "breeze"], + }, + { + "id": "seed_library", + "owner": "demo", + "text": "Clematis cataloged memories into a living library of roots and vines.", + "importance": 0.72, + "tags": ["seed", "library", "roots", "vines"], + }, + { + "id": "seed_watch", + "owner": "bot", + "text": "Water watches quietly as stars fall into the reflection pool.", + "importance": 0.6, + "tags": ["seed", "water", "stars", "reflection"], + }, +] + + +def _load_config(path: Optional[Path]) -> Dict[str, Any]: + if path is None or yaml is None: + raw: Dict[str, Any] = {} + else: + try: + with path.open("r", encoding="utf-8") as fh: + raw = yaml.safe_load(fh) or {} + except FileNotFoundError: + raw = {} + ok, errs, cfg = validate_config_api(dict(raw)) + if not ok or cfg is None: + msg = "\n".join(errs or ["invalid configuration"]) + print(msg, file=sys.stderr) + raise SystemExit(2) + return cfg + + +class _AttrDict(dict): + """Dict that also supports attribute access recursively.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError as exc: + raise AttributeError(name) from exc + + def __setattr__(self, name: str, value: Any) -> None: + self[name] = value + + def __delattr__(self, name: str) -> None: + try: + del self[name] + except KeyError as exc: + raise AttributeError(name) from exc + + +def _to_attrdict(obj: Any) -> Any: + if isinstance(obj, dict): + return _AttrDict({k: _to_attrdict(v) for k, v in obj.items()}) + if isinstance(obj, list): + return [_to_attrdict(v) for v in obj] + if isinstance(obj, tuple): + return tuple(_to_attrdict(v) for v in obj) + return obj + + +def _ensure_store(state: Dict[str, Any]) -> InMemoryGraphStore: + store = state.get("store") + if not isinstance(store, InMemoryGraphStore): + store = InMemoryGraphStore() + state["store"] = store + graph = store.ensure(DEFAULT_GRAPH_ID) + state.setdefault("active_graphs", [DEFAULT_GRAPH_ID]) + if not state.get("_chat_seeded_graph"): + root = Node(id="n:root", label="memory-root", attrs={"notes": "chat demo root"}) + store.upsert_nodes(DEFAULT_GRAPH_ID, [root]) + state["_chat_seeded_graph"] = True + return store + + +def _empty_state() -> Dict[str, Any]: + empty_meta = { + "schema": "v1.1", + "merges": [], + "splits": [], + "promotions": [], + "concept_nodes_count": 0, + "edges_count": 0, + } + graph = {"nodes": {}, "edges": {}, "meta": dict(empty_meta)} + state = {"graph": dict(graph), "gel": dict(graph), "version_etag": "0", "logs": [], "store": {}} + return state + + +def _load_state(snapshot_path: Optional[str]) -> Dict[str, Any]: + try: + state = adapter_reset(snapshot_path) + except SystemExit as exc: + code = getattr(exc, "code", None) + if snapshot_path or code not in (None, 0, 2): + raise + print("[chat] WARNING: no snapshot available; starting from empty state", file=sys.stderr) + state = _empty_state() + if isinstance(state, dict): + state.setdefault("logs", []) + store = state.get("store") + if not isinstance(store, InMemoryGraphStore): + state["store"] = {} + state.setdefault("_chat_history", []) + state.setdefault("_chat_memory_usage", {}) + state.setdefault("_chat_mem_node_map", {}) + state.setdefault("_chat_memories", []) + return state + + +def _get_embedding_adapter(dim: int): + try: + from clematis.adapters.embeddings import BGEAdapter # local import for optional dep + except ModuleNotFoundError as exc: + missing = exc.name or "numpy" + print(f"Missing dependency for embeddings: {missing}. Install project requirements.", file=sys.stderr) + return None + return BGEAdapter(dim=dim) + + +def _tune_t2(cfg_root: Dict[str, Any]) -> None: + t2 = cfg_root.setdefault("t2", {}) + t2.setdefault("backend", "lancedb") + t2.setdefault("sim_threshold", 0.05) + t2.setdefault("k_retrieval", 6) + t2.setdefault("exact_recent_days", 365) + ranking = t2.setdefault("ranking", {}) + ranking.setdefault("alpha_sim", 0.6) + ranking.setdefault("beta_recency", 0.35) + ranking.setdefault("gamma_importance", 0.05) + ldb = t2.setdefault("lancedb", {}) + ldb.setdefault("uri", str(DEFAULT_LANCEDB_URI)) + ldb.setdefault("table", DEFAULT_LANCEDB_TABLE) + ldb.setdefault("meta_table", DEFAULT_LANCEDB_META) + + +def _now_iso() -> str: + return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + + +def _owner_quarter(owner: str, iso_ts: str) -> str: + try: + dt = datetime.fromisoformat(iso_ts.replace("Z", "+00:00")) + quarter = (dt.month - 1) // 3 + 1 + return f"{owner}_{dt.year}Q{quarter}" + except Exception: + return f"{owner}_unknown" + + +def _record_memory_id(state: Dict[str, Any], mem_id: str) -> None: + ids = state.setdefault("_chat_memory_ids", []) + ids.append(mem_id) + + +def _memory_exists(state: Dict[str, Any], mem_id: str) -> bool: + ids = state.get("_chat_memory_ids", []) + return mem_id in ids + + +def _extract_keywords(text: str, limit: int = 6) -> List[str]: + tokens = re.findall(r"[A-Za-z0-9']+", text.lower()) + keywords: List[str] = [] + for tok in tokens: + if len(tok) <= 3: + continue + if tok in _STOPWORDS: + continue + if tok not in keywords: + keywords.append(tok) + if len(keywords) >= limit: + break + return keywords + + +def _speaker_label(owner: str, kind: str) -> str: + own = (owner or "").lower() + if kind == "dialogue": + if own in {"agent", "user"}: + return "user" + if own in {"assistant", "bot"}: + return "assistant" + if own in {"demo", "seed"}: + return "seed" + return owner + + +def _append_history(state: Dict[str, Any], role: str, text: str, turn: int) -> None: + if not text: + return + history = state.setdefault("_chat_history", []) + history.append({"role": role, "text": text, "turn": turn}) + if len(history) > MAX_HISTORY: + del history[:-MAX_HISTORY] + + +def _mark_memory_usage(state: Dict[str, Any], mem_ids: List[str]) -> None: + if not mem_ids: + return + usage = state.setdefault("_chat_memory_usage", {}) + idx = state.get("mem_index") + store = state.get("store") + node_map = state.setdefault("_chat_mem_node_map", {}) + mem_records = state.setdefault("_chat_memories", []) + for mem_id in dict.fromkeys(mem_ids): + if not mem_id: + continue + usage[mem_id] = usage.get(mem_id, 0) + 1 + usage_tag = f"usage:{usage[mem_id]}" + if hasattr(idx, "_eps"): + try: + for ep in getattr(idx, "_eps", []): + if ep.get("id") == mem_id: + tags = list(ep.get("tags") or []) + tags = [t for t in tags if not str(t).startswith("usage:")] + tags.append(usage_tag) + ep["tags"] = tags + ep["usage_count"] = usage[mem_id] + break + except Exception: + pass + if isinstance(store, InMemoryGraphStore): + try: + node_id = node_map.get(mem_id) + if node_id: + graph = store.get_graph(DEFAULT_GRAPH_ID) + node = graph.nodes.get(node_id) + if node: + tags = list(node.attrs.get("tags", [])) + tags = [t for t in tags if not str(t).startswith("usage:")] + tags.append(usage_tag) + node.attrs["tags"] = tags + node.attrs["usage_count"] = usage[mem_id] + except Exception: + pass + for rec in mem_records: + if rec.get("id") == mem_id: + rec["usage_count"] = usage[mem_id] + tags = [t for t in rec.get("tags", []) if not str(t).startswith("usage:")] + tags.append(usage_tag) + rec["tags"] = tags + break + + +def _print_memories(state: Dict[str, Any], limit: int = 10, *, verbose: bool = False) -> None: + mem_records = list(state.get("_chat_memories", [])) + if not mem_records: + print("[mem] no memories stored yet.") + return + records = list(mem_records)[-limit:][::-1] + if verbose: + print(f"[mem-v] showing up to {limit} memories (most recent first):") + for rec in records: + ident = rec.get("id") + owner = rec.get("owner") + usage = rec.get("usage_count", 0) + turn = rec.get("turn") + text = rec.get("text", "") + kind = rec.get("kind") or ("seed" if (turn is None or turn < 0) else "dialogue") + speaker = rec.get("speaker") or _speaker_label(owner, kind) + tags = ", ".join(str(t) for t in rec.get("tags", [])) + print( + f" {ident} (kind={kind}, speaker={speaker}, owner={owner}, usage={usage}, turn={turn}) -> {text}" + ) + if tags: + print(f" tags: {tags}") + else: + print(f"[mem] showing up to {limit} memories (most recent first):") + for rec in records: + ident = rec.get("id") + owner = rec.get("owner") + text = rec.get("text", "") + used = rec.get("usage_count", 0) + turn = rec.get("turn") + kind = rec.get("kind") or ("seed" if (turn is None or turn < 0) else "dialogue") + speaker = rec.get("speaker") or _speaker_label(owner, kind) + if kind == "dialogue": + label = speaker + else: + label = "seed" + prefix = f"{ident} [{label}]" + if used: + prefix += f" (used x{used})" + print(f" {prefix}: {text}") + + +def _ensure_index(state: Dict[str, Any], cfg_t2: Dict[str, Any]): + idx, backend_selected, fallback_reason = _init_index_from_cfg(state, cfg_t2) + if fallback_reason and not state.get("_chat_index_warning"): + print( + f"[chat] WARNING: using fallback in-memory index (reason: {fallback_reason}). " + "Install LanceDB support with `pip install \"clematis[lancedb]\"` to enable the configured backend.", + file=sys.stderr, + ) + state["_chat_index_warning"] = True + state["_chat_index_backend"] = backend_selected + return idx + + +def _add_memory_entry( + state: Dict[str, Any], + cfg_t2: Dict[str, Any], + adapter, + *, + text: str, + owner: str, + tags: Optional[list[str]] = None, + importance: float = 0.5, + mem_id: Optional[str] = None, + turn: Optional[int] = None, +) -> None: + if adapter is None or not text: + return + idx = _ensure_index(state, cfg_t2) + seq = state.get("_chat_memory_seq", 0) + if mem_id is None: + mem_id = f"chat_{seq:04d}" + if _memory_exists(state, mem_id): + return + vec = adapter.encode([text])[0] + ts = _now_iso() + role_tag = f"role:{owner}" + base_tags = list(tags or []) if tags else [] + if role_tag not in base_tags: + base_tags.append(role_tag) + base_tags.append("source:chat") + for kw in _extract_keywords(text): + if kw not in base_tags: + base_tags.append(kw) + tag_kw = f"kw:{kw}" + if tag_kw not in base_tags: + base_tags.append(tag_kw) + base_tags = list(dict.fromkeys(base_tags)) + rec_kind = "seed" if (turn is None or turn < 0) else "dialogue" + speaker = _speaker_label(owner, rec_kind) + speaker_tag = f"speaker:{speaker}" + if speaker_tag not in base_tags: + base_tags.append(speaker_tag) + base_tags = list(dict.fromkeys(base_tags)) + episode = { + "id": mem_id, + "owner": owner, + "role": owner, + "text": text, + "ts": ts, + "tags": base_tags, + "importance": float(importance), + "quarter": _owner_quarter(owner, ts), + "turn": turn, + "usage_count": 0, + "vec_full": vec.astype("float32").tolist(), + } + idx.add(episode) + _record_memory_id(state, mem_id) + state["_chat_memory_seq"] = seq + 1 + + # Also mirror into the concept graph for T1/T4 visibility + store = _ensure_store(state) + node_seq = state.get("_chat_node_seq", 0) + node_id = f"n:mem_{node_seq:04d}" + label = text.split(".")[0][:48] or f"memory-{node_seq}" + store.upsert_nodes( + DEFAULT_GRAPH_ID, + [ + Node( + id=node_id, + label=label, + attrs={ + "text": text, + "owner": owner, + "role": owner, + "speaker": speaker, + "tags": base_tags, + "mem_id": mem_id, + "turn": turn, + "usage_count": 0, + }, + ) + ], + ) + # Connect memory node to root (both directions) for propagation. + root_id = "n:root" + edge_fw = Edge( + id=f"e:{root_id}->{node_id}", + src=root_id, + dst=node_id, + weight=0.6, + rel="associates", + ) + edge_bw = Edge( + id=f"e:{node_id}->{root_id}", + src=node_id, + dst=root_id, + weight=0.4, + rel="associates", + ) + store.upsert_edges(DEFAULT_GRAPH_ID, [edge_fw, edge_bw]) + state.setdefault("_chat_mem_node_map", {})[mem_id] = node_id + state["_chat_node_seq"] = node_seq + 1 + mem_records = state.setdefault("_chat_memories", []) + for rec in mem_records: + if rec.get("id") == mem_id: + break + else: + mem_records.append( + { + "id": mem_id, + "owner": owner, + "speaker": speaker, + "text": text, + "tags": list(base_tags), + "turn": turn, + "usage_count": 0, + "kind": rec_kind, + } + ) + + +def _seed_memories( + state: Dict[str, Any], + cfg_t2: Dict[str, Any], + adapter, + *, + force: bool = False, +) -> None: + if adapter is None: + return + _ensure_index(state, cfg_t2) + seeded = bool(state.get("_chat_seeded_memories")) + if seeded and not force: + return + for row in SEED_MEMORIES: + _add_memory_entry( + state, + cfg_t2, + adapter, + text=row["text"], + owner=row["owner"], + tags=row.get("tags"), + importance=row.get("importance", 0.5), + mem_id=row["id"], + turn=-1, + ) + state["_chat_seeded_memories"] = True + + +def _wipe_memories(state: Dict[str, Any]) -> None: + state["mem_index"] = InMemoryIndex() + state["mem_backend"] = "inmemory" + state.pop("mem_backend_fallback_reason", None) + state["_chat_seeded_memories"] = False + state["_chat_memory_ids"] = [] + state["_chat_memory_seq"] = 0 + state["_chat_node_seq"] = 0 + state["_chat_history"] = [] + state["_chat_memory_usage"] = {} + state["_chat_mem_node_map"] = {} + state["_chat_memories"] = [] + state["_chat_index_warning"] = False + state["_chat_index_backend"] = None + # Reset store/graph snapshots + state["store"] = InMemoryGraphStore() + state["graph"] = {"nodes": {}, "edges": {}, "meta": {"schema": "v1.1"}} + state["gel"] = {"nodes": {}, "edges": {}, "meta": {"schema": "v1.1"}} + state["active_graphs"] = [DEFAULT_GRAPH_ID] + state["_chat_seeded_graph"] = False + _ensure_store(state) + + +def _apply_llm_mode(cfg: Dict[str, Any], args: argparse.Namespace) -> None: + t3 = cfg.setdefault("t3", {}) + t3.setdefault("enabled", True) + t3["apply_ops"] = not bool(getattr(args, "no_apply_ops", False)) + llm_cfg = t3.setdefault("llm", {}) + if args.llm_mode == "rulebased": + t3["backend"] = "rulebased" + return + t3["backend"] = "llm" + if args.llm_mode == "fixture": + llm_cfg["provider"] = "fixture" + fx = llm_cfg.setdefault("fixtures", {}) + fx["enabled"] = True + fx["path"] = str(args.fixture_path or DEFAULT_FIXTURE) + else: # live + llm_cfg["provider"] = "ollama" + llm_cfg["endpoint"] = args.endpoint + llm_cfg["model"] = args.model + llm_cfg["temp"] = float(args.temp) + llm_cfg["timeout_ms"] = int(args.timeout_ms) + fx = llm_cfg.setdefault("fixtures", {}) + fx["enabled"] = False + + +def _tail_jsonl(path: Path) -> Optional[Dict[str, Any]]: + if not path.exists(): + return None + try: + with path.open("r", encoding="utf-8") as fh: + lines = fh.readlines() + if not lines: + return None + return json.loads(lines[-1]) + except Exception: + return None + + +def _iso_from_ms(ms: int) -> str: + dt = datetime.fromtimestamp(ms / 1000.0, tz=timezone.utc) + return dt.isoformat() + + +def _prepare_ctx(cfg: Any, agent_id: str, turn: int, now_ms: int, text: str): + ctx = SimpleNamespace() + ctx.turn_id = str(turn) + ctx.agent_id = agent_id + ctx.now_ms = now_ms + ctx.now = _iso_from_ms(now_ms) + ctx.cfg = cfg + ctx.config = cfg + ctx.input_text = text + try: + dialogue_cfg = cfg.t3.dialogue # type: ignore[attr-defined] + ctx.style_prefix = getattr(dialogue_cfg, "style_prefix", "") + ctx.identity = getattr(dialogue_cfg, "identity", DEFAULT_IDENTITY) + except Exception: + ctx.style_prefix = "" + return ctx + + +def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: + p = argparse.ArgumentParser(description="Interactive Clematis chat demo.") + p.add_argument("--config", type=str, default=None, help="Path to config.yaml (auto-discovered if omitted).") + p.add_argument("--snapshot", type=str, default=None, help="Seed state from snapshot JSON.") + p.add_argument("--log-dir", type=str, default=None, help="Directory for CLEMATIS_LOG_DIR (default: ./.logs/chat).") + p.add_argument("--agent", type=str, default="clematis", help="Agent id for the turn context.") + p.add_argument("--llm-mode", choices=["rulebased", "fixture", "live"], default="live", help="Planner/dialogue backend.") + p.add_argument("--model", type=str, default="qwen3:4b-instruct", help="Model name for --llm-mode=live.") + p.add_argument("--endpoint", type=str, default="http://localhost:11434/api/generate", help="Endpoint for --llm-mode=live.") + p.add_argument("--temp", type=float, default=0.2, help="Temperature passed to the LLM.") + p.add_argument("--timeout-ms", type=int, default=10000, help="Timeout for the LLM call in milliseconds.") + p.add_argument("--fixture-path", type=str, default=str(DEFAULT_FIXTURE), help="Fixture JSONL for --llm-mode=fixture.") + p.add_argument("--no-apply-ops", action="store_true", help="Disable T3 apply_ops (graph edits).") + p.add_argument("--now-ms", type=int, default=None, help="Initial logical timestamp (ms since epoch).") + p.add_argument("--step-ms", type=int, default=1000, help="Delta applied to now_ms each turn.") + p.add_argument("--show-plan", action="store_true", help="Print a compact plan/metrics summary each turn.") + p.add_argument("--no-network-ban", action="store_true", help="Unset CLEMATIS_NETWORK_BAN for live LLM smoke.") + p.add_argument("--no-seed", action="store_true", help="Skip seeding baseline retrieval memories.") + p.add_argument("--no-auto-memories", action="store_true", help="Skip writing chat turns back into memory index.") + return p.parse_args(argv) + + +def main(argv: Optional[list[str]] = None) -> int: + args = parse_args(argv) + + cfg_path, _ = discover_config_path(args.config) + if cfg_path is not None and not cfg_path.exists(): + print(f"Config not found: {cfg_path}", file=sys.stderr) + return 2 + cfg_dict = _load_config(cfg_path) + t3_cfg = cfg_dict.setdefault("t3", {}) + dialogue_cfg = t3_cfg.setdefault("dialogue", {}) + dialogue_cfg.setdefault("identity", DEFAULT_IDENTITY) + dialogue_cfg.setdefault("style_prefix", "clematis") + if DEFAULT_TEMPLATE_PATH.exists(): + dialogue_cfg.setdefault("template_file", str(DEFAULT_TEMPLATE_PATH)) + _apply_llm_mode(cfg_dict, args) + _tune_t2(cfg_dict) + try: + DEFAULT_LANCEDB_URI.parent.mkdir(parents=True, exist_ok=True) + except Exception: + pass + cfg = _to_attrdict(cfg_dict) + + log_dir = Path(args.log_dir or DEFAULT_LOG_DIR) + log_dir.mkdir(parents=True, exist_ok=True) + os.environ["CLEMATIS_LOG_DIR"] = str(log_dir) + os.environ["CLEMATIS_T3_ALLOW"] = "1" + os.environ["CLEMATIS_T3_APPLY_OPS"] = "0" if args.no_apply_ops else "1" + os.environ["CLEMATIS_LLM_MODE"] = args.llm_mode + if args.no_network_ban: + os.environ["CLEMATIS_NETWORK_BAN"] = "0" + else: + os.environ.setdefault("CLEMATIS_NETWORK_BAN", "1") + + warn_nondeterminism() + + snapshot = args.snapshot + state = _load_state(snapshot) + store = _ensure_store(state) + + adapter = _get_embedding_adapter(int(cfg_dict.get("k_surface", 32))) + if adapter is None: + return 2 + cfg_t2 = cfg_dict.setdefault("t2", {}) + if not args.no_seed: + _seed_memories(state, cfg_t2, adapter, force=False) + + try: + from clematis.engine.orchestrator.core import run_turn + except ModuleNotFoundError as exc: + mod = exc.name or "dependency" + print(f"Missing dependency: {mod}. Install project requirements to run chat.", file=sys.stderr) + return 2 + + now_ms = args.now_ms or int(datetime.now(timezone.utc).timestamp() * 1000) + turn = 1 + + print("Interactive Clematis chat (type /exit to quit, /reset to reload snapshot).") + print(f"Config: {cfg_path if cfg_path else 'defaults'}") + print(f"Logs: {log_dir}") + if args.llm_mode != "rulebased": + print("LLM backend active. Ensure Ollama is running for --llm-mode=live.") + print("Commands: [wipe] clears memories, [seed] restores demo memories.") + + while True: + try: + text = input("you> ").strip() + except EOFError: + print() + break + except KeyboardInterrupt: + print() + break + + if not text: + continue + if text.lower() in {"/exit", ":exit", "/quit", ":quit"}: + break + lowered = text.lower() + if lowered in {"/reset", ":reset"}: + state = _load_state(snapshot) + _ensure_store(state) + if not args.no_seed: + _seed_memories(state, cfg_t2, adapter, force=False) + turn = 1 + now_ms = args.now_ms or int(datetime.now(timezone.utc).timestamp() * 1000) + print("[reset] state reloaded.") + continue + if lowered in {"[wipe]", "/wipe"}: + _wipe_memories(state) + print("[wipe] memories cleared.") + if not args.no_seed: + print(" (use [seed] to restore demo memories.)") + continue + if lowered in {"[seed]", "/seed"}: + _seed_memories(state, cfg_t2, adapter, force=True) + print("[seed] baseline memories restored.") + continue + if lowered in {"[mem]", "/mem", ":mem"}: + _print_memories(state, verbose=False) + continue + if lowered in {"[mem-v]", "/mem-v", ":mem-v"}: + _print_memories(state, verbose=True) + continue + + _append_history(state, "user", text, turn) + ctx = _prepare_ctx(cfg, args.agent, turn, now_ms, text) + result = run_turn(ctx, state, text) + print(result.line) + if not args.no_auto_memories: + _add_memory_entry( + state, + cfg_t2, + adapter, + text=text, + owner="agent", + tags=["chat", "user"], + turn=turn, + ) + if result.line: + _add_memory_entry( + state, + cfg_t2, + adapter, + text=result.line, + owner="assistant", + tags=["chat", "assistant"], + turn=turn, + ) + _append_history(state, "assistant", result.line, turn) + retrieved_ids = list(state.pop("_chat_last_retrieved", [])) + if retrieved_ids: + _mark_memory_usage(state, retrieved_ids) + + if args.show_plan: + plan_entry = _tail_jsonl(log_dir / "t3_plan.jsonl") + dialogue_entry = _tail_jsonl(log_dir / "t3_dialogue.jsonl") + if plan_entry: + ops = plan_entry.get("ops_counts", {}) + backend = plan_entry.get("backend") + print(f" plan: backend={backend} ops={ops} rag={plan_entry.get('rag_used')}") + if plan_entry.get("retrieved_ids"): + print(f" retrieved_ids={plan_entry.get('retrieved_ids')}") + if dialogue_entry: + print( + f" speak: tokens={dialogue_entry.get('tokens')} truncated={dialogue_entry.get('truncated')} backend={dialogue_entry.get('backend')}" + ) + usage = state.get("_chat_memory_usage", {}) + if usage: + recent_usage = {k: usage[k] for k in list(usage.keys())[-4:]} + print(f" memory_usage={recent_usage}") + + turn += 1 + now_ms += int(args.step_ms) + + print("bye.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/cli/goldens/completions/bash.txt b/tests/cli/goldens/completions/bash.txt index 041463a..d195e26 100644 --- a/tests/cli/goldens/completions/bash.txt +++ b/tests/cli/goldens/completions/bash.txt @@ -1,9 +1,10 @@ # AUTOMATICALLY GENERATED by `shtab` -_shtab_clematis_subparsers=('bench-t4' 'console' 'demo' 'export-logs' 'inspect-snapshot' 'rotate-logs' 'seed-lance-demo' 'validate') +_shtab_clematis_subparsers=('bench-t4' 'chat' 'console' 'demo' 'export-logs' 'inspect-snapshot' 'rotate-logs' 'seed-lance-demo' 'validate') _shtab_clematis_option_strings=('-h' '--help' '--version') _shtab_clematis_bench_t4_option_strings=('-h' '--help' '--json' '--table' '--quiet' '--verbose') +_shtab_clematis_chat_option_strings=('-h' '--help') _shtab_clematis_console_option_strings=('-h' '--help') _shtab_clematis_demo_option_strings=('--json' '--table' '--quiet' '--verbose' '-h' '--help') _shtab_clematis_export_logs_option_strings=('-h' '--help') @@ -14,7 +15,7 @@ _shtab_clematis_validate_option_strings=('--json' '--table' '--quiet' '--verbose -_shtab_clematis_pos_0_choices=('bench-t4' 'console' 'demo' 'export-logs' 'inspect-snapshot' 'rotate-logs' 'seed-lance-demo' 'validate') +_shtab_clematis_pos_0_choices=('bench-t4' 'chat' 'console' 'demo' 'export-logs' 'inspect-snapshot' 'rotate-logs' 'seed-lance-demo' 'validate') _shtab_clematis_pos_0_nargs=A... _shtab_clematis__h_nargs=0 @@ -28,6 +29,8 @@ _shtab_clematis_bench_t4___json_nargs=0 _shtab_clematis_bench_t4___table_nargs=0 _shtab_clematis_bench_t4___quiet_nargs=0 _shtab_clematis_bench_t4___verbose_nargs=0 +_shtab_clematis_chat__h_nargs=0 +_shtab_clematis_chat___help_nargs=0 _shtab_clematis_console__h_nargs=0 _shtab_clematis_console___help_nargs=0 _shtab_clematis_demo___json_nargs=0 diff --git a/tests/cli/goldens/completions/zsh.txt b/tests/cli/goldens/completions/zsh.txt index 7c70aed..2cde2b1 100644 --- a/tests/cli/goldens/completions/zsh.txt +++ b/tests/cli/goldens/completions/zsh.txt @@ -6,6 +6,7 @@ _shtab_clematis_commands() { local _commands=( "bench-t4:Delegates to scripts\/bench_t4.py" + "chat:" "console:" "demo:Delegates to scripts\/run_demo.py" "export-logs:Export logs \+ latest snapshot into a JSON bundle (delegates to scripts\/)" @@ -31,6 +32,10 @@ _shtab_clematis_bench_t4_options=( "(-)*:Pass-through arguments for scripts\/bench_t4.py.:" ) +_shtab_clematis_chat_options=( + "(- : *)"{-h,--help}"[show this help message and exit]" +) + _shtab_clematis_console_options=( "(- : *)"{-h,--help}"[show this help message and exit]" ) @@ -98,6 +103,7 @@ _shtab_clematis() { curcontext="${curcontext%:*:*}:_shtab_clematis-$line[1]:" case $line[1] in bench-t4) _arguments -C -s $_shtab_clematis_bench_t4_options ;; + chat) _arguments -C -s $_shtab_clematis_chat_options ;; console) _arguments -C -s $_shtab_clematis_console_options ;; demo) _arguments -C -s $_shtab_clematis_demo_options ;; export-logs) _arguments -C -s $_shtab_clematis_export_logs_options ;; diff --git a/tests/cli/goldens/help/chat.txt b/tests/cli/goldens/help/chat.txt new file mode 100644 index 0000000..fc7d206 --- /dev/null +++ b/tests/cli/goldens/help/chat.txt @@ -0,0 +1,4 @@ +usage: clematis [-h] [--version] + +options: + -h, --help show this help message and exit diff --git a/tests/cli/goldens/help/top.txt b/tests/cli/goldens/help/top.txt index 0555296..05be092 100644 --- a/tests/cli/goldens/help/top.txt +++ b/tests/cli/goldens/help/top.txt @@ -9,6 +9,7 @@ options: subcommands: bench-t4 Delegates to scripts/bench_t4.py + chat Interactive chat loop with optional LLM backend console Deterministic local console (step/reset/status/compare) demo Delegates to scripts/ export-logs Export logs + latest snapshot into a JSON bundle diff --git a/tests/cli/test_cli_completions_golden.py b/tests/cli/test_cli_completions_golden.py index 18075d4..72d612e 100644 --- a/tests/cli/test_cli_completions_golden.py +++ b/tests/cli/test_cli_completions_golden.py @@ -6,7 +6,7 @@ try: import shtab # type: ignore -except Exception: # pragma: no cover +except Exception: # pragma: no cover yeah shtab = None diff --git a/tests/fixtures/llm_cassettes/reflection.jsonl b/tests/fixtures/llm_cassettes/reflection.jsonl new file mode 100644 index 0000000..f97e575 --- /dev/null +++ b/tests/fixtures/llm_cassettes/reflection.jsonl @@ -0,0 +1 @@ +{"prompt":"REFLECT:", "completion":"summary: user greeted; intents: greet; labels: neutral"} diff --git a/tests/test_t3_llm_adapter.py b/tests/test_t3_llm_adapter.py index 1ed3de4..9ffb193 100644 --- a/tests/test_t3_llm_adapter.py +++ b/tests/test_t3_llm_adapter.py @@ -74,12 +74,15 @@ def test_build_llm_prompt_contains_sorted_fields(): db = make_dialog_bundle(ctx, {}, t1, t2, plan) prompt = build_llm_prompt(db, plan) - # style, intent, labels (sorted), snippets top-2 (e1, e2), and input present - assert "style_prefix: calm" in prompt - assert "intent: summary" in prompt - assert "labels: alpha, zeta" in prompt - assert "snippets: e1, e2" in prompt - assert "input: hello world" in prompt + # style, intent, labels (sorted), snippets top-2 (e1, e2), identity, and input present + assert "STYLE_PREFIX: calm" in prompt + assert "INTENT: summary" in prompt + assert "LABELS: alpha, zeta" in prompt + assert "SNIPPET_IDS: e1, e2" in prompt + assert "SNIPPETS:" in prompt + assert "1. [e1]" in prompt + assert "IDENTITY: " in prompt + assert "INPUT: hello world" in prompt def test_llm_speak_with_deterministic_adapter_is_stable_and_capped(): diff --git a/tests/test_t3_rag.py b/tests/test_t3_rag.py index ed9e7e2..7d1c1c2 100644 --- a/tests/test_t3_rag.py +++ b/tests/test_t3_rag.py @@ -74,6 +74,7 @@ def fake_retrieve(payload): assert asdict(p2) == asdict(p), "Plan must be unchanged when already_used=True" assert m["rag_used"] is False and m["rag_blocked"] is True assert m["pre_s_max"] == m["post_s_max"] == 0.2 + assert m["retrieved_ids"] == [] def test_refinement_improves_intent_and_keeps_ops_capped(): @@ -110,6 +111,7 @@ def fake_retrieve(payload): assert m["rag_used"] is True and m["rag_blocked"] is False assert m["pre_s_max"] == 0.2 and m["post_s_max"] == 0.85 assert m["k_retrieved"] == 2 + assert m["retrieved_ids"] == ["x2", "x1"] # Payload sanity assert seen.get("query") == "hello world" assert seen.get("owner") in ("agent", "world", "any") @@ -131,6 +133,7 @@ def fake_retrieve(_): ] assert speak_intents[0] == "question" assert m["post_s_max"] == pytest.approx(0.25) + assert m["retrieved_ids"] == ["x"] def test_optional_editgraph_added_when_evidence_sufficient_and_absent(): @@ -175,3 +178,4 @@ def fake_retrieve(_): p2, m = rag_once(b, plan, fake_retrieve) assert asdict(p2) == asdict(plan) assert m["rag_used"] is False and m["rag_blocked"] is False and m["k_retrieved"] == 0 + assert m["retrieved_ids"] == []