diff --git a/.data/snapshots/state_AgentA.json.meta b/.data/snapshots/state_AgentA.json.meta index 83fec52..e4bf186 100644 --- a/.data/snapshots/state_AgentA.json.meta +++ b/.data/snapshots/state_AgentA.json.meta @@ -1 +1 @@ -{"created_at": "2025-10-09T08:56:21Z", "schema_version": "v1"} +{"created_at": "1980-01-01T00:00:00Z", "schema_version": "v1"} diff --git a/.data/snapshots/state_Ambrose.json.meta b/.data/snapshots/state_Ambrose.json.meta index 83fec52..e4bf186 100644 --- a/.data/snapshots/state_Ambrose.json.meta +++ b/.data/snapshots/state_Ambrose.json.meta @@ -1 +1 @@ -{"created_at": "2025-10-09T08:56:21Z", "schema_version": "v1"} +{"created_at": "1980-01-01T00:00:00Z", "schema_version": "v1"} diff --git a/.data/snapshots/state_agent.json.meta b/.data/snapshots/state_agent.json.meta index 2dbedca..e4bf186 100644 --- a/.data/snapshots/state_agent.json.meta +++ b/.data/snapshots/state_agent.json.meta @@ -1 +1 @@ -{"created_at": "2025-10-09T08:56:18Z", "schema_version": "v1"} +{"created_at": "1980-01-01T00:00:00Z", "schema_version": "v1"} diff --git a/.data/snapshots/state_smoke.json.meta b/.data/snapshots/state_smoke.json.meta index 83fec52..e4bf186 100644 --- a/.data/snapshots/state_smoke.json.meta +++ b/.data/snapshots/state_smoke.json.meta @@ -1 +1 @@ -{"created_at": "2025-10-09T08:56:21Z", "schema_version": "v1"} +{"created_at": "1980-01-01T00:00:00Z", "schema_version": "v1"} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3064e30..aaef4af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -420,6 +420,15 @@ jobs: # wheel paths use the data-files target path unzip -l dist/*.whl | grep -E 'share/examples/clematis/gel/(enabled|disabled)\.yaml' + - name: Verify demo run bundle included in artifacts + run: | + set -euo pipefail + echo "Verify demo bundle is present in sdist and wheel" + # sdist paths include a top-level project dir (e.g., name-version/) + tar -tzf dist/*.tar.gz | grep -E '(^|/)clematis/examples/run_bundles/run_demo_bundle\.json$' + # wheel paths are under the package directory + unzip -l dist/*.whl | grep -F 'clematis/examples/run_bundles/run_demo_bundle.json' + - name: Assert viewer included in wheel run: | set -euo pipefail diff --git a/.logs/apply.jsonl b/.logs/apply.jsonl index 3fe4b0a..bad3480 100644 --- a/.logs/apply.jsonl +++ b/.logs/apply.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "applied": 0, "clamps": 0, "version_etag": "46", "snapshot": "./.data/snapshots/state_AgentA.json", "cache_invalidations": 0, "ms": 0.746} +{"turn": "demo-1", "agent": "AgentA", "applied": 0, "clamps": 0, "version_etag": "46", "snapshot": "./.data/snapshots/state_AgentA.json", "cache_invalidations": 0, "ms": 0.785} diff --git a/.logs/t1.jsonl b/.logs/t1.jsonl index 1cfa8b2..f863190 100644 --- a/.logs/t1.jsonl +++ b/.logs/t1.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "pops": 5, "iters": 1, "propagations": 3, "radius_cap_hits": 0, "layer_cap_hits": 0, "node_budget_hits": 0, "max_delta": 1.0, "graphs_touched": 1, "cache_hits": 0, "cache_misses": 1, "cache_used": false, "cache_enabled": true, "ms": 0.093, "now": "2025-10-09T08:56:21.581179+00:00"} +{"turn": "demo-1", "agent": "AgentA", "pops": 5, "iters": 1, "propagations": 3, "radius_cap_hits": 0, "layer_cap_hits": 0, "node_budget_hits": 0, "max_delta": 1.0, "graphs_touched": 1, "cache_hits": 0, "cache_misses": 1, "cache_used": false, "cache_enabled": true, "ms": 0.312, "now": "2025-10-10T02:07:37.100667+00:00"} diff --git a/.logs/t2.jsonl b/.logs/t2.jsonl index b437fa4..17bacbb 100644 --- a/.logs/t2.jsonl +++ b/.logs/t2.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "tier_sequence": ["exact_semantic", "cluster_semantic", "archive"], "k_returned": 0, "k_used": 0, "k_residual": 0, "sim_stats": {"mean": 0.0, "max": 0.0}, "score_stats": {"mean": 0.0, "max": 0.0}, "owner_scope": "any", "caps": {"residual_cap": 32}, "cache_enabled": true, "cache_used": true, "cache_hits": 0, "cache_misses": 2, "backend": "inmemory", "backend_fallback": false, "hybrid_used": false, "cache_hit": false, "cache_size": 1, "ms": 0.118, "now": "2025-10-09T08:56:21.581179+00:00"} +{"turn": "demo-1", "agent": "AgentA", "tier_sequence": ["exact_semantic", "cluster_semantic", "archive"], "k_returned": 0, "k_used": 0, "k_residual": 0, "sim_stats": {"mean": 0.0, "max": 0.0}, "score_stats": {"mean": 0.0, "max": 0.0}, "owner_scope": "any", "caps": {"residual_cap": 32}, "cache_enabled": true, "cache_used": true, "cache_hits": 0, "cache_misses": 2, "backend": "inmemory", "backend_fallback": false, "hybrid_used": false, "cache_hit": false, "cache_size": 1, "ms": 0.185, "now": "2025-10-10T02:07:37.100667+00:00"} diff --git a/.logs/t3.jsonl b/.logs/t3.jsonl new file mode 100644 index 0000000..d21695d --- /dev/null +++ b/.logs/t3.jsonl @@ -0,0 +1 @@ +{"turn": "demo-1", "agent": "AgentA", "backend": "rulebased", "ops_counts": {"Speak": 1, "RequestRetrieve": 1}, "requested_retrieve": true, "rag_used": true, "ms_plan": 0.042, "ms_rag": 0.066, "ms_speak": 0.034, "now": "2025-10-10T02:07:37.100667+00:00"} diff --git a/.logs/t3_dialogue.jsonl b/.logs/t3_dialogue.jsonl index 3a384f0..bebd9dc 100644 --- a/.logs/t3_dialogue.jsonl +++ b/.logs/t3_dialogue.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "tokens": 6, "truncated": false, "style_prefix_used": false, "snippet_count": 0, "ms": 0.026, "backend": "rulebased", "now": "2025-10-09T08:56:21.581179+00:00"} +{"turn": "demo-1", "agent": "AgentA", "tokens": 6, "truncated": false, "style_prefix_used": false, "snippet_count": 0, "ms": 0.034, "backend": "rulebased", "now": "2025-10-10T02:07:37.100667+00:00"} diff --git a/.logs/t3_plan.jsonl b/.logs/t3_plan.jsonl index d1328cc..2000f4d 100644 --- a/.logs/t3_plan.jsonl +++ b/.logs/t3_plan.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "policy_backend": "rulebased", "backend": "rulebased", "ops_counts": {"Speak": 1, "RequestRetrieve": 1}, "requested_retrieve": true, "rag_used": true, "reflection": false, "ms_deliberate": 0.031, "ms_rag": 0.256, "now": "2025-10-09T08:56:21.581179+00:00"} +{"turn": "demo-1", "agent": "AgentA", "policy_backend": "rulebased", "backend": "rulebased", "ops_counts": {"Speak": 1, "RequestRetrieve": 1}, "requested_retrieve": true, "rag_used": true, "reflection": false, "ms_deliberate": 0.042, "ms_rag": 0.066, "now": "2025-10-10T02:07:37.100667+00:00"} diff --git a/.logs/t4.jsonl b/.logs/t4.jsonl index 849b44f..abe3592 100644 --- a/.logs/t4.jsonl +++ b/.logs/t4.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "counts": {"input": 0, "after_cooldown": 0, "after_novelty": 0, "after_l2": 0, "approved": 0, "dropped_tail": 0}, "clamps": {"novelty_clamped": 0, "l2_scale": 1.0}, "cooldowns": {"blocked_ops": 0}, "caps": {"delta_norm_cap_l2": 1.5, "novelty_cap_per_node": 0.3, "churn_cap_edges": 64}, "approved": 0, "rejected": 0, "reasons": [], "ms": 0.006, "now": "2025-10-09T08:56:21.581179+00:00"} +{"turn": "demo-1", "agent": "AgentA", "counts": {"input": 0, "after_cooldown": 0, "after_novelty": 0, "after_l2": 0, "approved": 0, "dropped_tail": 0}, "clamps": {"novelty_clamped": 0, "l2_scale": 1.0}, "cooldowns": {"blocked_ops": 0}, "caps": {"delta_norm_cap_l2": 1.5, "novelty_cap_per_node": 0.3, "churn_cap_edges": 64}, "approved": 0, "rejected": 0, "reasons": [], "ms": 0.009, "now": "2025-10-10T02:07:37.100667+00:00"} diff --git a/.logs/turn.jsonl b/.logs/turn.jsonl index 9531c33..d71e545 100644 --- a/.logs/turn.jsonl +++ b/.logs/turn.jsonl @@ -1 +1 @@ -{"turn": "demo-1", "agent": "AgentA", "durations_ms": {"t1": 0.093, "t2": 0.118, "t4": 0.006, "apply": 0.746, "total": 2.629}, "t1": {"pops": 5, "iters": 1, "graphs_touched": 1}, "t2": {"k_returned": 0, "k_used": 0, "cache_hit": false}, "t4": {"approved": 0, "rejected": 0}, "now": "2025-10-09T08:56:21.581179+00:00"} +{"turn": "demo-1", "agent": "AgentA", "durations_ms": {"t1": 0.312, "t2": 0.185, "t4": 0.009, "apply": 0.785, "total": 3.189}, "t1": {"pops": 5, "iters": 1, "graphs_touched": 1}, "t2": {"k_returned": 0, "k_used": 0, "cache_hit": false}, "t4": {"approved": 0, "rejected": 0}, "now": "2025-10-10T02:07:37.100667+00:00"} diff --git a/CHANGELOG.MD b/CHANGELOG.MD index f8e8a66..bd79213 100644 --- a/CHANGELOG.MD +++ b/CHANGELOG.MD @@ -11,8 +11,23 @@ All notable changes to this project will be documented in this file. + ## [Unreleased] +## [0.10.3] - 2025-10-09 + +### M14 — Examples & fixtures (viewer/console) +- **Examples:** Added a tiny, deterministic demo bundle at `clematis/examples/run_bundles/run_demo_bundle.json` and a HOWTO at `clematis/examples/README_frontend.md`. +- **Makefile:** `demo-bundle` target regenerates the demo deterministically (fixed env + clock). +- **Viewer smoke:** `tests/frontend/test_example_bundle.py` opens the viewer over `file://` and loads the committed demo bundle via the file input; asserts snapshot panel renders; **no network**. +- **Docs:** `docs/m14/frontend.md` now includes a *Quick path* to open the packaged viewer and demo bundle using `importlib.resources.files(...)`. +- **CI:** Reproducible build workflow asserts the demo bundle ships in both artifacts (wheel & sdist). + +### Notes +- No runtime behavior changes; identity path unchanged. All additions are docs/examples/tests only. + + + ## [0.10.1] - 2025-10-09 ### M14 — Viewer & Console (docs-only; identity unchanged) diff --git a/MANIFEST.in b/MANIFEST.in index f4e36c5..0b04fad 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,11 +5,14 @@ include LICENSE include NOTICE include clematis/VERSION +recursive-include clematis/scripts *.py + # Package sources and data (only real file types present) recursive-include clematis *.py *.json *.jsonl -# Packaged examples for post-install defaults -recursive-include clematis/examples * +# Examples: ship only docs and deterministic demo bundle (PR135) +recursive-include clematis/examples *.json *.md +include clematis/examples/run_bundles/run_demo_bundle.json # Dotfiles are not guaranteed by generic globs; include explicitly include clematis/examples/logs/.placeholder diff --git a/Makefile b/Makefile index 71b7575..4afa60d 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,36 @@ # Deterministic frontend scaffolding (M14 / PR127) -.PHONY: frontend-build frontend-clean frontend-checksum frontend-offline-check +.PHONY: frontend-build frontend-clean frontend-checksum frontend-offline-check frontend-stage frontend-repro demo-bundle +.ONESHELL: FRONTEND_DIST := frontend/dist +PKG_FRONTEND_DIST := clematis/frontend/dist frontend-build: @python scripts/build_frontend.py + @$(MAKE) frontend-stage frontend-clean: - @rm -rf $(FRONTEND_DIST) + @rm -rf $(FRONTEND_DIST) $(PKG_FRONTEND_DIST) frontend-checksum: @python scripts/hashdir.py $(FRONTEND_DIST) # Fail if any external URLs appear in built assets frontend-offline-check: - @rg -n "(https?://|//cdn)" $(FRONTEND_DIST) && (echo "External URL found"; exit 2) || echo "OK: no external URLs" + @sh -c 'set -e; if rg -n "(https?://|//cdn)" $(FRONTEND_DIST) | grep -v "www\\.w3\\.org/2000/svg" | grep -q .; then echo "External URL found"; exit 2; else echo "OK: no external URLs"; fi' + +# Mirror built assets into the Python package path for packaging/tests +frontend-stage: + @rm -rf $(PKG_FRONTEND_DIST) + @mkdir -p $(PKG_FRONTEND_DIST) + @cp -R $(FRONTEND_DIST)/* $(PKG_FRONTEND_DIST)/ + +# Wrap local reproducibility check for the viewer +frontend-repro: + @bash scripts/repro_check_local.sh --frontend + +# Regenerate the deterministic demo bundle used by the viewer example (PR135) +demo-bundle: + @mkdir -p clematis/examples/run_bundles + @TZ=UTC PYTHONUTF8=1 PYTHONHASHSEED=0 LC_ALL=C.UTF-8 SOURCE_DATE_EPOCH=315532800 CLEMATIS_NETWORK_BAN=1 \ + python -m clematis console -- step --now-ms 315532800000 --out clematis/examples/run_bundles/run_demo_bundle.json diff --git a/clematis/cli/console.py b/clematis/cli/console.py index 8d1d5a4..e1eb610 100644 --- a/clematis/cli/console.py +++ b/clematis/cli/console.py @@ -3,7 +3,17 @@ from typing import Optional # Reuse the single implementation that lives in scripts/console.py -from scripts.console import main as _console_main +try: # prefer packaged location + from clematis.scripts.console import main as _console_main +except Exception: # dev fallback when running from repo root + try: + from scripts.console import main as _console_main + except Exception as e: # helpful error if neither path is available + raise ModuleNotFoundError( + "Unable to import console implementation. Expected 'clematis.scripts.console' " + "when installed, or 'scripts.console' in a source checkout. If you're in 'dist/', " + "run from the repo root or install the package." + ) from e def register(subparsers) -> None: diff --git a/clematis/engine/orchestrator/core.py b/clematis/engine/orchestrator/core.py index 8bfee3e..59b98fd 100644 --- a/clematis/engine/orchestrator/core.py +++ b/clematis/engine/orchestrator/core.py @@ -70,6 +70,36 @@ def _truthy(value: Any) -> bool: return bool(value) +# --- T3 enablement helper (default ON unless explicitly denied) --- +def _t3_is_enabled(cfg: Dict[str, Any]) -> bool: + """Decide if T3 is enabled. + Precedence: env deny > env allow > config (enabled/allow) > default-ON for v3 identity. + """ + # Hard overrides from environment + if _truthy(_os.environ.get("CLEMATIS_T3_DENY", "0")): + return False + if _truthy(_os.environ.get("CLEMATIS_T3_ALLOW", "0")): + return True + # Config path + try: + t3 = (cfg.get("t3") if isinstance(cfg, dict) else {}) or {} + except Exception: + t3 = {} + if isinstance(t3, dict): + if "enabled" in t3: + try: + return bool(t3.get("enabled")) + except Exception: + return True + if "allow" in t3: + try: + return bool(t3.get("allow")) + except Exception: + return True + # Default ON to preserve v3 identity semantics + return True + + from typing import TypedDict _sched_next_turn = None @@ -720,121 +750,21 @@ def run_turn(self, ctx: TurnCtx, state: Dict[str, Any], input_text: str) -> Turn }, ) - # --- T3 (deliberation → optional one-shot RAG → dialogue) --- - # All pure stage functions; only logging here does I/O. - t0 = time.perf_counter() - bundle = make_plan_bundle(ctx, state, t1, t2) - # Allow tests to monkeypatch via clematis.engine.orchestrator.t3_deliberate - orch_module = _sys.modules.get("clematis.engine.orchestrator") - delib_fn = None - if orch_module is not None: - delib_fn = getattr(orch_module, "t3_deliberate", None) - if delib_fn is None: - delib_fn = globals().get("t3_deliberate") - if callable(delib_fn): - plan = delib_fn(ctx, state, bundle) - else: - plan = deliberate(bundle) - plan_ms = round((time.perf_counter() - t0) * 1000.0, 3) - # --- M5 boundary check after T3 (plan) --- - if slice_ctx is not None: - consumed = { - "ms": int(round((time.perf_counter() - total_t0) * 1000.0)), - } - try: - ops_count = sum(1 for _ in (getattr(plan, "ops", []) or [])) - consumed["t3_ops"] = int(ops_count) - except Exception: - pass - reason = _should_yield(slice_ctx, consumed) - if reason: - event = { - "turn": turn_id, - "slice": slice_ctx["slice_idx"], - "agent": agent_id, - "policy": (_get_cfg(ctx).get("scheduler") or {}).get("policy", "round_robin"), - **({"pick_reason": _get_pick_reason(ctx)} if _get_pick_reason(ctx) else {}), - "reason": reason, - "enforced": True, - "stage_end": "T3", - "quantum_ms": slice_ctx["budgets"].get("quantum_ms"), - "wall_ms": slice_ctx["budgets"].get("wall_ms"), - "budgets": {k: v for k, v in slice_ctx["budgets"].items() if k != "quantum_ms"}, - "consumed": consumed, - "queued": [], - "ms": 0, - } - _write_or_capture_scheduler_event(ctx, event) - total_ms_now = round((time.perf_counter() - total_t0) * 1000.0, 3) - _append_jsonl( - "turn.jsonl", - { - "turn": turn_id, - "agent": agent_id, - "durations_ms": { - "t1": t1_ms, - "t2": t2_ms, - "t4": 0.0, - "apply": 0.0, - "total": total_ms_now, - }, - "t1": { - "pops": t1.metrics.get("pops"), - "iters": t1.metrics.get("iters"), - "graphs_touched": t1.metrics.get("graphs_touched"), - }, - "t2": { - "k_returned": t2.metrics.get("k_returned"), - "k_used": t2.metrics.get("k_used"), - "cache_hit": bool(cache_hit), - }, - "t4": {}, - "slice_idx": slice_ctx["slice_idx"], - "yielded": True, - "yield_reason": reason, - **({"now": now} if now else {}), - }, - ) - return TurnResult(line="", events=[]) - - # RAG: allow at most one refinement if both requested and enabled by config + # --- T3 (deliberation → optional one-shot RAG → dialogue) --- (GATED) --- + # Ensure plan/utter exist even when T3 is disabled (T4 expects them). cfg = _get_cfg(ctx) - t3cfg = cfg.get("t3", {}) if isinstance(cfg, dict) else {} - max_rag_loops = int(t3cfg.get("max_rag_loops", 1)) if isinstance(t3cfg, dict) else 1 - - def _retrieve_fn(payload: Dict[str, Any]) -> Dict[str, Any]: - # Deterministic wrapper around T2: re-run with the provided query; map to the shape rag_once expects. - q = str(payload.get("query") or input_text) - t2_alt = _get_stage_callable("t2_semantic", t2_semantic)(ctx, state, q, t1) - # Normalize retrieved to list[dict] - hits = [] - for r in getattr(t2_alt, "retrieved", []) or []: - if isinstance(r, dict): - hits.append( - { - "id": str(r.get("id")), - "score": float(r.get("_score", r.get("score", 0.0)) or 0.0), - "owner": str(r.get("owner", "any")), - "quarter": str(r.get("quarter", "")), - } - ) - else: - rid = str(getattr(r, "id", "")) - if not rid: - continue - hits.append( - { - "id": rid, - "score": float(getattr(r, "score", 0.0) or 0.0), - "owner": str(getattr(r, "owner", "any")), - "quarter": str(getattr(r, "quarter", "")), - } - ) - return {"retrieved": hits, "metrics": getattr(t2_alt, "metrics", {})} - - requested_retrieve = any( - getattr(op, "kind", None) == "RequestRetrieve" for op in getattr(plan, "ops", []) or [] - ) + t3_enabled = _t3_is_enabled(cfg) + t3cfg = (cfg.get("t3") or {}) if isinstance(cfg, dict) else {} + + # Defaults / placeholders when T3 is disabled + plan = SimpleNamespace(ops=[], reflection=False) + utter = "" + plan_ms = 0.0 + rag_ms = 0.0 + requested_retrieve = False + backend_used = "disabled" + backend_fallback = None + fallback_reason = None rag_metrics = { "rag_used": False, "rag_blocked": False, @@ -844,170 +774,309 @@ def _retrieve_fn(payload: Dict[str, Any]) -> Dict[str, Any]: "owner": None, "tier_pref": None, } - if requested_retrieve and max_rag_loops >= 1: - t0_rag = time.perf_counter() - plan, rag_metrics = rag_once(bundle, plan, _retrieve_fn, already_used=False) - rag_ms = round((time.perf_counter() - t0_rag) * 1000.0, 3) - else: - rag_ms = 0.0 - # Dialogue synthesis (rule-based vs optional LLM backend) - t0 = time.perf_counter() - dialog_bundle = make_dialog_bundle(ctx, state, t1, t2, plan) - - prompt_text = build_llm_prompt(dialog_bundle, plan) + if t3_enabled and not _dry_run: + # All pure stage functions; only _append_jsonl performs I/O. + t0 = time.perf_counter() + bundle = make_plan_bundle(ctx, state, t1, t2) + + # Allow tests to monkeypatch via clematis.engine.orchestrator.t3_deliberate + orch_module = _sys.modules.get("clematis.engine.orchestrator") + delib_fn = None + if orch_module is not None: + delib_fn = getattr(orch_module, "t3_deliberate", None) + if delib_fn is None: + delib_fn = globals().get("t3_deliberate") + + if callable(delib_fn): + plan = delib_fn(ctx, state, bundle) + else: + plan = deliberate(bundle) + plan_ms = round((time.perf_counter() - t0) * 1000.0, 3) - trace_meta: Dict[str, Any] = {} - state_logs = None - if isinstance(state, dict): - state_logs = state.get("logs") - if not isinstance(state_logs, list): - state_logs = [] - state["logs"] = state_logs - else: - state_logs = getattr(state, "logs", None) - if not isinstance(state_logs, list): + # --- M5 boundary check after T3 (plan) --- + if slice_ctx is not None: + consumed = { + "ms": int(round((time.perf_counter() - total_t0) * 1000.0)), + } try: - state_logs = [] - setattr(state, "logs", state_logs) + ops_count = sum(1 for _ in (getattr(plan, "ops", []) or [])) + consumed["t3_ops"] = int(ops_count) except Exception: - state_logs = None - if isinstance(state_logs, list): - trace_meta["state_logs"] = state_logs + pass + reason = _should_yield(slice_ctx, consumed) + if reason: + event = { + "turn": turn_id, + "slice": slice_ctx["slice_idx"], + "agent": agent_id, + "policy": (_get_cfg(ctx).get("scheduler") or {}).get("policy", "round_robin"), + **({"pick_reason": _get_pick_reason(ctx)} if _get_pick_reason(ctx) else {}), + "reason": reason, + "enforced": True, + "stage_end": "T3", + "quantum_ms": slice_ctx["budgets"].get("quantum_ms"), + "wall_ms": slice_ctx["budgets"].get("wall_ms"), + "budgets": {k: v for k, v in slice_ctx["budgets"].items() if k != "quantum_ms"}, + "consumed": consumed, + "queued": [], + "ms": 0, + } + _write_or_capture_scheduler_event(ctx, event) + total_ms_now = round((time.perf_counter() - total_t0) * 1000.0, 3) + _append_jsonl( + "turn.jsonl", + { + "turn": turn_id, + "agent": agent_id, + "durations_ms": { + "t1": t1_ms, + "t2": t2_ms, + "t4": 0.0, + "apply": 0.0, + "total": total_ms_now, + }, + "t1": { + "pops": t1.metrics.get("pops"), + "iters": t1.metrics.get("iters"), + "graphs_touched": t1.metrics.get("graphs_touched"), + }, + "t2": { + "k_returned": t2.metrics.get("k_returned"), + "k_used": t2.metrics.get("k_used"), + "cache_hit": bool(cache_hit), + }, + "t4": {}, + "slice_idx": slice_ctx["slice_idx"], + "yielded": True, + "yield_reason": reason, + **({"now": now} if now else {}), + }, + ) + return TurnResult(line=utter, events=[]) + + # RAG: allow at most one refinement if both requested and enabled by config + max_rag_loops = int(t3cfg.get("max_rag_loops", 1)) if isinstance(t3cfg, dict) else 1 + + def _retrieve_fn(payload: Dict[str, Any]) -> Dict[str, Any]: + # Deterministic wrapper around T2: re-run with the provided query; map to the shape rag_once expects. + q = str(payload.get("query") or input_text) + t2_alt = _get_stage_callable("t2_semantic", t2_semantic)(ctx, state, q, t1) + # Normalize retrieved to list[dict] + hits = [] + for r in getattr(t2_alt, "retrieved", []) or []: + if isinstance(r, dict): + hits.append( + { + "id": str(r.get("id")), + "score": float(r.get("_score", r.get("score", 0.0)) or 0.0), + "owner": str(r.get("owner", "any")), + "quarter": str(r.get("quarter", "")), + } + ) + else: + rid = str(getattr(r, "id", "")) + if not rid: + continue + hits.append( + { + "id": rid, + "score": float(getattr(r, "score", 0.0) or 0.0), + "owner": str(getattr(r, "owner", "any")), + "quarter": str(getattr(r, "quarter", "")), + } + ) + return {"retrieved": hits, "metrics": getattr(t2_alt, "metrics", {})} - trace_reason = getattr(ctx, "trace_reason", None) - if trace_reason is None and isinstance(ctx, dict): - trace_reason = ctx.get("trace_reason") - if trace_reason is not None: - trace_meta["trace_reason"] = trace_reason + requested_retrieve = any( + getattr(op, "kind", None) == "RequestRetrieve" for op in getattr(plan, "ops", []) or [] + ) + if requested_retrieve and max_rag_loops >= 1: + t0_rag = time.perf_counter() + plan, rag_metrics = rag_once(bundle, plan, _retrieve_fn, already_used=False) + rag_ms = round((time.perf_counter() - t0_rag) * 1000.0, 3) - emit_trace(dialog_bundle.get("cfg", {}), prompt_text, dialog_bundle, trace_meta) + # Dialogue synthesis (rule-based vs optional LLM backend) + t0 = time.perf_counter() + dialog_bundle = make_dialog_bundle(ctx, state, t1, t2, plan) + prompt_text = build_llm_prompt(dialog_bundle, plan) - # Backend selection - backend_cfg = ( - str(t3cfg.get("backend", "rulebased")) if isinstance(t3cfg, dict) else "rulebased" - ) - llm_cfg = t3cfg.get("llm", {}) if isinstance(t3cfg, dict) else {} - adapter = ( - state.get("llm_adapter", None) - if isinstance(state, dict) - else getattr(state, "llm_adapter", None) - ) or getattr(ctx, "llm_adapter", None) + trace_meta: Dict[str, Any] = {} + state_logs = None + if isinstance(state, dict): + state_logs = state.get("logs") + if not isinstance(state_logs, list): + state_logs = [] + state["logs"] = state_logs + else: + state_logs = getattr(state, "logs", None) + if not isinstance(state_logs, list): + try: + state_logs = [] + setattr(state, "logs", state_logs) + except Exception: + state_logs = None + if isinstance(state_logs, list): + trace_meta["state_logs"] = state_logs - backend_used = "rulebased" - backend_fallback = None - fallback_reason = None + trace_reason = getattr(ctx, "trace_reason", None) + if trace_reason is None and isinstance(ctx, dict): + trace_reason = ctx.get("trace_reason") + if trace_reason is not None: + trace_meta["trace_reason"] = trace_reason - # let tests monkeypatch a module-level t3_dialogue with flexible signatures for speed and laziness. - dlg_fn = None - if orch_module is not None: - dlg_fn = getattr(orch_module, "t3_dialogue", None) - if dlg_fn is None: - dlg_fn = globals().get("t3_dialogue") - if callable(dlg_fn): - try: - # pref sig: (dialog_bundle, plan) - res = dlg_fn(dialog_bundle, plan) - except TypeError: - # Fallback signature used by some tests: (ctx, state, dialog_bundle) - res = dlg_fn(ctx, state, dialog_bundle) - # normaliaztion: support returning just a string or (utter, metrics) - if isinstance(res, tuple): - utter = res[0] - speak_metrics = res[1] if len(res) > 1 and isinstance(res[1], dict) else {} - else: - utter = res - speak_metrics = {} - backend_used = "patched" - else: - if backend_cfg == "llm" and adapter is not None: - utter, speak_metrics = llm_speak(dialog_bundle, plan, adapter) - backend_used = "llm" + emit_trace(dialog_bundle.get("cfg", {}), prompt_text, dialog_bundle, trace_meta) + + # Backend selection + backend_cfg = ( + str(t3cfg.get("backend", "rulebased")) if isinstance(t3cfg, dict) else "rulebased" + ) + llm_cfg = t3cfg.get("llm", {}) if isinstance(t3cfg, dict) else {} + adapter = ( + state.get("llm_adapter", None) + if isinstance(state, dict) + else getattr(state, "llm_adapter", None) + ) or getattr(ctx, "llm_adapter", None) + + # let tests monkeypatch a module-level t3_dialogue with flexible signatures + dlg_fn = None + if orch_module is not None: + dlg_fn = getattr(orch_module, "t3_dialogue", None) + if dlg_fn is None: + dlg_fn = globals().get("t3_dialogue") + + if callable(dlg_fn): + try: + # preferred sig: (dialog_bundle, plan) + res = dlg_fn(dialog_bundle, plan) + except TypeError: + # fallback sig used by some tests: (ctx, state, dialog_bundle) + res = dlg_fn(ctx, state, dialog_bundle) + # normalization: support returning just a string or (utter, metrics) + if isinstance(res, tuple): + utter = res[0] + speak_metrics = res[1] if len(res) > 1 and isinstance(res[1], dict) else {} + else: + utter = res + speak_metrics = {} + backend_used = "patched" else: - utter, speak_metrics = speak(dialog_bundle, plan) - if backend_cfg == "llm" and adapter is None: - backend_fallback, fallback_reason = "rulebased", "no_adapter" - elif backend_cfg not in ("rulebased", "llm"): - backend_fallback, fallback_reason = "rulebased", "invalid_backend" - - speak_ms = round((time.perf_counter() - t0) * 1000.0, 3) - - # Plan logging - ops_counts: Dict[str, int] = {} - for op in getattr(plan, "ops", []) or []: - k = getattr(op, "kind", None) - ops_counts[k] = ops_counts.get(k, 0) + 1 - - policy_backend = ( - str(t3cfg.get("backend", "rulebased")) if isinstance(t3cfg, dict) else "rulebased" - ) - # Compute reflection flag for logging: Plan.reflection or stashed planner flag - _plan_reflection_flag = bool(getattr(plan, "reflection", False)) - if not _plan_reflection_flag: - try: - if isinstance(state, dict): - _plan_reflection_flag = bool(state.get("_planner_reflection_flag", False)) + if backend_cfg == "llm" and adapter is not None: + utter, speak_metrics = llm_speak(dialog_bundle, plan, adapter) + backend_used = "llm" else: - _plan_reflection_flag = bool(getattr(state, "_planner_reflection_flag", False)) - except Exception: - _plan_reflection_flag = False - _append_jsonl( - "t3_plan.jsonl", - { - "turn": turn_id, - "agent": agent_id, - "policy_backend": policy_backend, - "backend": backend_used, - **( - {"backend_fallback": backend_fallback, "fallback_reason": fallback_reason} - if backend_fallback - else {} - ), - "ops_counts": ops_counts, - "requested_retrieve": bool(requested_retrieve), - "rag_used": bool(rag_metrics.get("rag_used", False)), - "reflection": _plan_reflection_flag, - "ms_deliberate": plan_ms, - "ms_rag": rag_ms, - **({"now": now} if now else {}), - }, - ) + utter, speak_metrics = speak(dialog_bundle, plan) + backend_used = "rulebased" + if backend_cfg == "llm" and adapter is None: + backend_fallback, fallback_reason = "rulebased", "no_adapter" + elif backend_cfg not in ("rulebased", "llm"): + backend_fallback, fallback_reason = "rulebased", "invalid_backend" + + speak_ms = round((time.perf_counter() - t0) * 1000.0, 3) + + # Plan logging + ops_counts: Dict[str, int] = {} + for op in getattr(plan, "ops", []) or []: + k = getattr(op, "kind", None) + ops_counts[k] = ops_counts.get(k, 0) + 1 + + policy_backend = ( + str(t3cfg.get("backend", "rulebased")) if isinstance(t3cfg, dict) else "rulebased" + ) + # Compute reflection flag for logging: Plan.reflection or stashed planner flag + _plan_reflection_flag = bool(getattr(plan, "reflection", False)) + if not _plan_reflection_flag: + try: + if isinstance(state, dict): + _plan_reflection_flag = bool(state.get("_planner_reflection_flag", False)) + else: + _plan_reflection_flag = bool(getattr(state, "_planner_reflection_flag", False)) + except Exception: + _plan_reflection_flag = False - # Dialogue logging - dlg_extra = {} - if backend_used == "llm": - adapter_name = getattr( - adapter, - "name", - adapter.__class__.__name__ if hasattr(adapter, "__class__") else "Unknown", + # Summary row (t3.jsonl) + _append_jsonl( + "t3.jsonl", + { + "turn": turn_id, + "agent": agent_id, + "backend": backend_used, + **( + {"backend_fallback": backend_fallback, "fallback_reason": fallback_reason} + if backend_fallback + else {} + ), + "ops_counts": ops_counts, + "requested_retrieve": bool(requested_retrieve), + "rag_used": bool(rag_metrics.get("rag_used", False)), + "ms_plan": plan_ms, + "ms_rag": rag_ms, + "ms_speak": speak_ms, + **({"now": now} if now else {}), + }, ) - model = str(llm_cfg.get("model", "")) - temperature = float(llm_cfg.get("temperature", 0.2)) - dlg_extra.update( + + # Detailed plan log + _append_jsonl( + "t3_plan.jsonl", { - "backend": "llm", - "adapter": adapter_name, - "model": model, - "temperature": temperature, - } + "turn": turn_id, + "agent": agent_id, + "policy_backend": policy_backend, + "backend": backend_used, + **( + {"backend_fallback": backend_fallback, "fallback_reason": fallback_reason} + if backend_fallback + else {} + ), + "ops_counts": ops_counts, + "requested_retrieve": bool(requested_retrieve), + "rag_used": bool(rag_metrics.get("rag_used", False)), + "reflection": _plan_reflection_flag, + "ms_deliberate": plan_ms, + "ms_rag": rag_ms, + **({"now": now} if now else {}), + }, ) - else: - dlg_extra.update({"backend": "rulebased"}) - _append_jsonl( - "t3_dialogue.jsonl", - { - "turn": turn_id, - "agent": agent_id, - "tokens": int(speak_metrics.get("tokens", 0)), - "truncated": bool(speak_metrics.get("truncated", False)), - "style_prefix_used": bool(speak_metrics.get("style_prefix_used", False)), - "snippet_count": int(speak_metrics.get("snippet_count", 0)), - "ms": speak_ms, - **dlg_extra, - **({"now": now} if now else {}), - }, - ) + # Dialogue logging + dlg_extra = {} + if backend_used == "llm": + adapter_name = getattr( + adapter, + "name", + adapter.__class__.__name__ if hasattr(adapter, "__class__") else "Unknown", + ) + model = str(llm_cfg.get("model", "")) + temperature = float(llm_cfg.get("temperature", 0.2)) + dlg_extra.update( + { + "backend": "llm", + "adapter": adapter_name, + "model": model, + "temperature": temperature, + } + ) + elif backend_used == "patched": + dlg_extra.update({"backend": "patched"}) + else: + dlg_extra.update({"backend": "rulebased"}) + + _append_jsonl( + "t3_dialogue.jsonl", + { + "turn": turn_id, + "agent": agent_id, + "tokens": int(speak_metrics.get("tokens", 0)), + "truncated": bool(speak_metrics.get("truncated", False)), + "style_prefix_used": bool(speak_metrics.get("style_prefix_used", False)), + "snippet_count": int(speak_metrics.get("snippet_count", 0)), + "ms": speak_ms, + **dlg_extra, + **({"now": now} if now else {}), + }, + ) + # (end gated T3) # the kill switch (t4.enabled). Default True if unspecified. t4_cfg_full = (_get_cfg(ctx).get("t4") if isinstance(_get_cfg(ctx), dict) else {}) or {} @@ -1465,7 +1534,10 @@ def _retrieve_fn(payload: Dict[str, Any]) -> Dict[str, Any]: }, ) - return TurnResult(line=utter, events=[]) + _final_line = utter if isinstance(utter, str) else "" + if not _final_line: + _final_line = (str(input_text or "")).strip() or "…" + return TurnResult(line=_final_line, events=[]) def run_smoke_turn(cfg: Dict[str, Any] | None = None, log_dir: str | None = None, input_text: str = "") -> TurnResult: diff --git a/clematis/examples/README_frontend.md b/clematis/examples/README_frontend.md new file mode 100644 index 0000000..81f1f93 --- /dev/null +++ b/clematis/examples/README_frontend.md @@ -0,0 +1,91 @@ + + +# Frontend demo bundle (offline) + +This directory ships a tiny, deterministic **run bundle** for the offline viewer. + +The goal: give operators a known‑good bundle that always loads in the viewer without network, and a one‑command recipe to regenerate it deterministically. + +--- + +## Quick start + +You have two ways to open the viewer: + +**A) From a repo checkout (build once, then open)** +```bash +npm ci --prefix frontend +npm run --prefix frontend build +make frontend-build +# Then open in a browser: +# file://…/frontend/dist/index.html +``` +Now use **Load** to select the demo bundle below. + +**B) From an installed wheel (no Node required)** +Print the viewer and demo‑bundle paths from the installed package: +```bash +python - <<'PY' +from importlib.resources import files +print("viewer:", files("clematis").joinpath("frontend/dist/index.html")) +print("bundle:", files("clematis").joinpath("examples/run_bundles/run_demo_bundle.json")) +PY +``` +Open the printed `viewer` path in your browser and **Load** the printed `bundle`. + +> The viewer is fully static and offline; there are **no http(s)** requests on load (enforced by tests). + +--- + +## What’s in the demo bundle? + +- A single, tiny turn recorded with fixed clocks and a minimal config. +- Canonical JSON (sorted keys, stable separators, LF newlines). +- Small enough to be committed and shipped in both sdist and wheel under: + ``` + clematis/examples/run_bundles/run_demo_bundle.json + ``` + +--- + +## Regenerate deterministically (maintainers) + +Use the console with fixed environment and a fixed clock. This will **overwrite** the committed demo: + +```bash +export TZ=UTC PYTHONUTF8=1 PYTHONHASHSEED=0 LC_ALL=C.UTF-8 +export SOURCE_DATE_EPOCH=315532800 CLEMATIS_NETWORK_BAN=1 + +# Produce a tiny one‑turn bundle at a fixed epoch time (1980‑01‑01) +python -m clematis console -- step \ + --now-ms 315532800000 \ + --out clematis/examples/run_bundles/run_demo_bundle.json +``` + +Recommended sanity checks: + +```bash +# Canonical JSON and LF newlines are expected +python -m json.tool clematis/examples/run_bundles/run_demo_bundle.json >/dev/null + +# Optional: validate with the viewer locally (offline) +file:///…/frontend/dist/index.html # then Load the bundle +``` + +--- + +## Troubleshooting + +- **`frontend/dist/index.html` missing** (from repo): run the build steps in *Quick start (A)*. +- **Viewer opens but nothing loads after selecting the bundle**: + - Ensure you selected the committed file at `clematis/examples/run_bundles/run_demo_bundle.json` (not a stale path). + - Check browser console for syntax errors (should be none); the viewer is ESM‑only. +- **CI fails “viewer assets present”**: + - Packaging expects files under `clematis/frontend/dist/**`. Ensure you ran `make frontend-build` (or that compiled assets are committed) before building wheels. + +--- + +## See also + +- **docs/m14/frontend.md** — operator‑grade notes for the offline viewer and console. +- **tests/frontend/test_example_bundle.py** — smoke test that loads this demo bundle over `file://` with no network. diff --git a/clematis/examples/run_bundles/run_demo_bundle.json b/clematis/examples/run_bundles/run_demo_bundle.json new file mode 100644 index 0000000..56f45e6 --- /dev/null +++ b/clematis/examples/run_bundles/run_demo_bundle.json @@ -0,0 +1,156 @@ +{ + "logs": { + "apply": [ + { + "agent": "console", + "applied": 0, + "cache_invalidations": 0, + "clamps": 0, + "ms": 0.867, + "snapshot": "/Users/vkfyka/Desktop/Clematis3/.data/snapshots/state_console.json", + "turn": "1", + "version_etag": "51" + } + ], + "t1": [ + { + "agent": "console", + "cache_enabled": true, + "cache_hits": 0, + "cache_misses": 0, + "cache_used": false, + "graphs_touched": 0, + "iters": 0, + "layer_cap_hits": 0, + "max_delta": 0.0, + "ms": 0.024, + "node_budget_hits": 0, + "pops": 0, + "propagations": 0, + "radius_cap_hits": 0, + "turn": "1" + } + ], + "t2": [ + { + "agent": "console", + "backend": "inmemory", + "backend_fallback": false, + "cache_enabled": true, + "cache_hit": false, + "cache_hits": 0, + "cache_misses": 2, + "cache_size": 1, + "cache_used": true, + "caps": { + "residual_cap": 32 + }, + "hybrid_used": false, + "k_residual": 0, + "k_returned": 0, + "k_used": 0, + "ms": 0.782, + "owner_scope": "any", + "score_stats": { + "max": 0.0, + "mean": 0.0 + }, + "sim_stats": { + "max": 0.0, + "mean": 0.0 + }, + "tier_sequence": [ + "exact_semantic", + "cluster_semantic", + "archive" + ], + "turn": "1" + } + ], + "t4": [ + { + "agent": "console", + "approved": 0, + "caps": { + "churn_cap_edges": 64, + "delta_norm_cap_l2": 1.5, + "novelty_cap_per_node": 0.3 + }, + "clamps": { + "l2_scale": 1.0, + "novelty_clamped": 0 + }, + "cooldowns": { + "blocked_ops": 0 + }, + "counts": { + "after_cooldown": 0, + "after_l2": 0, + "after_novelty": 0, + "approved": 0, + "dropped_tail": 0, + "input": 0 + }, + "ms": 0.008, + "reasons": [], + "rejected": 0, + "turn": "1" + } + ], + "turn": [ + { + "agent": "console", + "durations_ms": { + "apply": 0.867, + "t1": 0.024, + "t2": 0.782, + "t4": 0.008, + "total": 3.418 + }, + "t1": { + "graphs_touched": 0, + "iters": 0, + "pops": 0 + }, + "t2": { + "cache_hit": false, + "k_returned": 0, + "k_used": 0 + }, + "t4": { + "approved": 0, + "rejected": 0 + }, + "turn": "1" + } + ] + }, + "meta": { + "logs_dir": "/var/folders/tc/50qy9gr93pn90t3n8zbcf21c0000gn/T/clematis-logs-3nnvcfhb", + "schema": "v1", + "snapshots_dir": "/Users/vkfyka/Desktop/Clematis3/.data/snapshots", + "stages": [ + "t1.jsonl", + "t2.jsonl", + "t4.jsonl", + "apply.jsonl", + "turn.jsonl" + ], + "tool": "clematis-export-logs" + }, + "snapshot": { + "caps": { + "churn_cap_edges": null, + "delta_norm_cap_l2": null, + "novelty_cap_per_node": null, + "weight_max": null, + "weight_min": null + }, + "edges": 0, + "graph_schema_version": "v1.1", + "nodes": 0, + "path": "/Users/vkfyka/Desktop/Clematis3/.data/snapshots/state_console.json", + "schema_version": "v1", + "version_etag": "51" + } +} diff --git a/clematis/scripts/console.py b/clematis/scripts/console.py new file mode 100644 index 0000000..97f283a --- /dev/null +++ b/clematis/scripts/console.py @@ -0,0 +1,476 @@ +#!/usr/bin/env python3 +""" +Deterministic local console for Clematis. +Usage: + python -m clematis console -- step [--now-ms N] [--input "…"] [--out run.json] + python -m clematis console -- reset [--snapshot PATH] + python -m clematis console -- status + python -m clematis console -- compare --a A.json --b B.json +Exit codes: + 0 = OK/equal; 1 = compare:differs; 2 = adapter/misuse error. +""" +from __future__ import annotations +import argparse +import json +import os +import sys +import pathlib +import tempfile +import shutil +from typing import Any, Dict + +# Snapshot import deferred; see adapter_reset() for lazy import with fallbacks. + +# Exporter (PR128). Optional; console can fall back to a minimal bundle. +try: + from clematis.scripts.export_logs_for_frontend import export_state_to_bundle as _export_state_to_bundle # type: ignore[attr-defined] +except Exception: # pragma: no cover + try: + from clematis.cli.export_logs_for_frontend import export_state_to_bundle as _export_state_to_bundle # type: ignore[attr-defined] + except Exception: # pragma: no cover + _export_state_to_bundle = None # type: ignore[assignment] +try: + from clematis.scripts.export_logs_for_frontend import build_run_bundle as _build_run_bundle # type: ignore[attr-defined] +except Exception: # pragma: no cover + _build_run_bundle = None # type: ignore[assignment] + +DEFAULT_EPOCH = int(os.environ.get("SOURCE_DATE_EPOCH") or "315532800") # 1980-01-01 +DEFAULT_NOW_MS = DEFAULT_EPOCH * 1000 + +# ------------------------- +# Deterministic env helpers +# ------------------------- +REQ_ENV = { + "TZ": "UTC", + "PYTHONHASHSEED": "0", + "SOURCE_DATE_EPOCH": str(DEFAULT_EPOCH), + "CLEMATIS_NETWORK_BAN": "1", +} + +def warn_nondeterminism() -> None: + missing = [k for k,v in REQ_ENV.items() if os.environ.get(k) != v] + if missing: + print(f"[console] WARNING: non-deterministic env vars differ: {missing}", file=sys.stderr) + +# ------------------------- +# Orchestrator adapter +# ------------------------- + + +def _state_from_payload(payload: Dict[str, Any]) -> Dict[str, Any]: + """Construct a minimal engine state from a snapshot payload dict. + Keeps both `gel` and `graph` keys to satisfy downstream consumers. + """ + gel = payload.get("gel") or payload.get("graph") or {} + if not isinstance(gel, dict): + gel = {} + state: Dict[str, Any] = {"gel": gel, "graph": gel} + ver = payload.get("version_etag") + if ver is not None: + state["version_etag"] = str(ver) + store = payload.get("store") + if isinstance(store, dict): + state["store"] = store + return state + + +def adapter_reset(snapshot_path: str | None): + """ + Deterministic reset using the snapshot module: explicit path -> read_snapshot; + otherwise use load_latest_snapshot (engine decides where "latest" is). + Falls back to scanning ./.data/snapshots when the helper is unavailable. + """ + try: + from clematis.engine.snapshot import read_snapshot, load_latest_snapshot # type: ignore[attr-defined] + except Exception as e: + print("[console] ERROR: snapshot module not available (clematis.engine.snapshot)", file=sys.stderr) + raise SystemExit(2) from e + + # Explicit path: read payload and synthesise a minimal state + if snapshot_path: + try: + payload = read_snapshot(path=snapshot_path) # type: ignore[call-arg] + except Exception as e: + print(f"[console] ERROR: failed to read snapshot: {snapshot_path}", file=sys.stderr) + raise SystemExit(2) from e + return _state_from_payload(payload) + + # No explicit path: try engine helper to load latest into a fresh state + try: + from types import SimpleNamespace + ctx = SimpleNamespace(cfg={}) + state: Dict[str, Any] = {} + load_latest_snapshot(ctx, state) # type: ignore[misc] + return state + except Exception: + # Fallback: pick most-recent .json manually and read it + base = os.environ.get("CLEMATIS_SNAPSHOTS_DIR") or "./.data/snapshots" + p = find_latest_snapshot(pathlib.Path(base)) + if not p: + print("[console] ERROR: no snapshots found and no default loader", file=sys.stderr) + raise SystemExit(2) + try: + payload = read_snapshot(path=p) # type: ignore[call-arg] + except Exception as e: + print(f"[console] ERROR: failed to read snapshot: {p}", file=sys.stderr) + raise SystemExit(2) from e + return _state_from_payload(payload) + +def _make_minimal_bundle(low_level_logs: Dict[str, Any]) -> Dict[str, Any]: + stages = ("t1", "t2", "t4", "apply", "turn") + logs = {k: list(low_level_logs.get(k, [])) for k in stages} + return { + "meta": {"tool": "clematis-console", "schema": "v1", "stages": list(stages)}, + "snapshots": [], + "logs": logs, + } + +def adapter_step(state: dict, now_ms: int, input_text: str): + """ + Execute exactly one deterministic turn. + - If the orchestrator returns (state, logs) or an object with `.state`/`.logs`, use that. + - Otherwise, assume logs were written to disk (CLEMATIS_LOG_DIR) and assemble a bundle from disk. + Falls back to a minimal in-memory bundle if no exporter is available. + """ + try: + from clematis.engine.orchestrator.core import run_turn # type: ignore[attr-defined] + except Exception as e: + print("[console] ERROR: orchestrator run_turn not available", file=sys.stderr) + raise SystemExit(2) from e + + # Prepare deterministic context + try: + from types import SimpleNamespace + # Build a small t3 config from environment flags so orchestrator can pick it up + _t3_allow = (os.environ.get("CLEMATIS_T3_ALLOW") == "1") + _t3_apply = (os.environ.get("CLEMATIS_T3_APPLY_OPS") == "1") + _llm_mode = os.environ.get("CLEMATIS_LLM_MODE", "mock") + _llm_cassette = os.environ.get("CLEMATIS_LLM_CASSETTE") + _backend = "rulebased" if _llm_mode == "rulebased" else "llm" + + cfg_ns = SimpleNamespace( + t1={}, + t2={}, + t3={ + "enabled": _t3_allow, + "allow": _t3_allow, # alias accepted by orchestrator + "apply_ops": _t3_apply, + "backend": _backend, + "llm": {"mode": _llm_mode, "cassette": _llm_cassette}, + "max_rag_loops": 1, + }, + scheduler={}, + ) + ctx = SimpleNamespace(now_ms=now_ms, turn_id="1", agent_id="console", cfg=cfg_ns) + + # Best-effort: attach an adapter if available for mock/replay/live; silently skip if not present + try: + if _backend == "llm": + if _llm_mode == "mock": + try: + from clematis.adapters.llm import FixtureLLMAdapter # type: ignore + ctx.llm_adapter = FixtureLLMAdapter() + except Exception: + pass + elif _llm_mode == "replay" and _llm_cassette: + try: + from clematis.adapters.llm import ReplayLLMAdapter # type: ignore + ctx.llm_adapter = ReplayLLMAdapter(_llm_cassette) + except Exception: + pass + elif _llm_mode == "live": + try: + from clematis.adapters.llm import LiveOpenAIAdapter # type: ignore + ctx.llm_adapter = LiveOpenAIAdapter.from_env() # may raise; ok to skip + except Exception: + pass + except Exception: + # never let adapter wiring break deterministic runs + pass + except Exception: + # Fallback (shouldn't happen): minimal mapping shape + ctx = {"now_ms": now_ms, "cfg": {"t1": {}, "t2": {}, "t3": {}, "scheduler": {}}} + + # Ensure we have a logs directory; if none configured, use a temp dir + cleanup_dir = None + restore_env = None + if not os.environ.get("CLEMATIS_LOG_DIR"): + cleanup_dir = tempfile.mkdtemp(prefix="clematis-logs-") + restore_env = os.environ.get("CLEMATIS_LOG_DIR") + os.environ["CLEMATIS_LOG_DIR"] = cleanup_dir + print(f"[console] using logs_dir={cleanup_dir}", file=sys.stderr) + else: + print(f"[console] using logs_dir={os.environ.get('CLEMATIS_LOG_DIR')}", file=sys.stderr) + + try: + res = run_turn(ctx, state, input_text or "") + + # Derive new_state and low_level_logs from various possible return shapes + new_state = state + low_level_logs: Dict[str, Any] | None = None + if isinstance(res, tuple) and len(res) >= 2: + new_state = res[0] if res[0] is not None else state + low_level_logs = res[1] + else: + # object with attributes? + cand_state = getattr(res, "state", None) + cand_logs = getattr(res, "logs", None) + if cand_state is not None: + new_state = cand_state + if cand_logs is not None: + low_level_logs = cand_logs + + # Prefer exporter if we have in-memory logs + if _export_state_to_bundle and low_level_logs is not None: + try: + bundle = _export_state_to_bundle(new_state, logs=low_level_logs, include_perf=False) # type: ignore[misc] + return new_state, bundle + except TypeError: + # Signature mismatch; fall through to from-disk bundling + pass + except NotImplementedError as e: + print("[console] ERROR: exporter not available (export_state_to_bundle)", file=sys.stderr) + raise SystemExit(2) from e + + # Assemble from disk if possible + if _build_run_bundle: + logs_dir = os.environ.get("CLEMATIS_LOG_DIR") or cleanup_dir or "./.data/logs" + snaps_dir = os.environ.get("CLEMATIS_SNAPSHOTS_DIR") or "./.data/snapshots" + try: + bundle, warns, rc = _build_run_bundle( + logs_dir=logs_dir, + snapshots_dir=snaps_dir, + include_perf=False, + strict=False, + max_stage_entries=None, + ) + if rc == 0: + return new_state, bundle + except Exception: + # Fall through to minimal bundle + pass + + # Last resort: minimal in-memory bundle + return new_state, _make_minimal_bundle(low_level_logs or {}) + finally: + if cleanup_dir: + try: + shutil.rmtree(cleanup_dir) + except Exception: + pass + # Restore env + if restore_env is None: + os.environ.pop("CLEMATIS_LOG_DIR", None) + else: + os.environ["CLEMATIS_LOG_DIR"] = restore_env + +def adapter_status(state: Dict[str, Any]) -> Dict[str, Any]: + """ + Summarize scheduler configuration and budgets from state/config, + and (optionally) recent scheduler event counts from scheduler.jsonl. + """ + cfg = state.get("cfg") or state.get("config") or {} + scfg = (cfg.get("scheduler") or {}) if isinstance(cfg, dict) else {} + policy = scfg.get("policy", "round_robin") + fairness = scfg.get("fairness") or {} + # Prefer budgets from cfg.scheduler; fall back to top-level + budgets_src = scfg.get("budgets") or state.get("budgets") or {} + + status: Dict[str, Any] = { + "scheduler": { + "policy": policy, + "fairness_keys": sorted(list(fairness.keys())) if isinstance(fairness, dict) else [], + }, + "budgets": {}, + } + + # Normalize canonical budget keys referenced in orchestrator.core + for k in ("t1_iters", "t1_pops", "t2_k", "t3_ops", "quantum_ms", "wall_ms"): + if isinstance(budgets_src, dict) and k in budgets_src: + status["budgets"][k] = budgets_src[k] + + # Optionally summarize recent scheduler events from logs (best-effort) + try: + logs_dir = os.environ.get("CLEMATIS_LOG_DIR") + if logs_dir: + sched_path = os.path.join(logs_dir, "scheduler.jsonl") + if os.path.exists(sched_path): + counts: Dict[str, int] = {} + with open(sched_path, "r", encoding="utf-8", errors="ignore") as fh: + tail = fh.readlines()[-100:] + for line in tail: + try: + ev = json.loads(line) + ev_type = str(ev.get("event", "unknown")) + counts[ev_type] = counts.get(ev_type, 0) + 1 + except Exception: + counts["parse_error"] = counts.get("parse_error", 0) + 1 + if counts: + status["scheduler"]["recent_event_counts"] = counts + except Exception: + # fail-soft: logging isn’t critical for status + pass + + return status + +# ------------------------- +# Utilities +# ------------------------- +def find_latest_snapshot(dir_path: pathlib.Path) -> str | None: + if not dir_path.exists(): + return None + files = [p for p in dir_path.glob("*.json") if p.is_file()] + if not files: + return None + files.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return str(files[0]) + +def write_json(path: str, obj: Any) -> None: + # Canonical ordering + LF newlines to match exporter conventions. + def canonical(o: Any) -> Any: + if isinstance(o, dict): + return {k: canonical(o[k]) for k in sorted(o.keys())} + if isinstance(o, list): + return [canonical(x) for x in o] + return o + data = json.dumps(canonical(obj), indent=2, ensure_ascii=False) + p = pathlib.Path(path) + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(data + "\n", encoding="utf-8") + +def load_json(path: str) -> Dict[str, Any]: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + +def summarize_bundle(bundle: Dict[str, Any]) -> Dict[str, Any]: + logs = bundle.get("logs") or {} + stages = ("t1", "t2", "t3", "t3_reflection", "t4", "apply", "turn") + counts = {k: len(logs.get(k, [])) for k in stages} + snaps_list = bundle.get("snapshots") + if isinstance(snaps_list, list): + snaps_len = len(snaps_list) + else: + snaps_len = 1 if "snapshot" in bundle else 0 + meta = bundle.get("meta") or {} + return { + "counts": counts, + "snapshots_len": snaps_len, + "meta_keys": sorted(list(meta.keys())), + } + +def compare_bundles(a: Dict[str, Any], b: Dict[str, Any]) -> Dict[str, Any]: + sa, sb = summarize_bundle(a), summarize_bundle(b) + diff = {} + for k in ("counts", "snapshots_len", "meta_keys"): + if sa[k] != sb[k]: + diff[k] = {"a": sa[k], "b": sb[k]} + return diff + +# ------------------------- +# Commands +# ------------------------- +def cmd_reset(args: argparse.Namespace) -> int: + warn_nondeterminism() + snap = args.snapshot + if snap is None: + base = os.environ.get("CLEMATIS_SNAPSHOTS_DIR") or "./.data/snapshots" + snap = find_latest_snapshot(pathlib.Path(base)) # default convention + st = adapter_reset(snap) + out = {"ok": True, "snapshot": snap, "state_hint": list(st.keys())[:8]} + print(json.dumps(out, indent=2)) + return 0 + +def cmd_status(args: argparse.Namespace) -> int: + snap = args.snapshot + if snap is None: + base = os.environ.get("CLEMATIS_SNAPSHOTS_DIR") or "./.data/snapshots" + snap = find_latest_snapshot(pathlib.Path(base)) + st = adapter_reset(snap) if snap else adapter_reset(None) + info = adapter_status(st) + if snap: + info = {"snapshot": snap, **info} + print(json.dumps(info, indent=2)) + return 0 + +def cmd_step(args: argparse.Namespace) -> int: + warn_nondeterminism() + st = adapter_reset(args.snapshot) if args.snapshot else adapter_reset(None) + now_ms = args.now_ms if args.now_ms is not None else DEFAULT_NOW_MS + # Propagate T3/LLM flags via env so the orchestrator gate can see them + if getattr(args, "t3", False): + os.environ["CLEMATIS_T3_ALLOW"] = "1" + if getattr(args, "t3_apply_ops", False): + os.environ["CLEMATIS_T3_APPLY_OPS"] = "1" + if getattr(args, "llm_mode", None): + os.environ["CLEMATIS_LLM_MODE"] = args.llm_mode + if getattr(args, "llm_cassette", None): + os.environ["CLEMATIS_LLM_CASSETTE"] = args.llm_cassette + st2, logs = adapter_step(st, now_ms=now_ms, input_text=(args.input or "")) + if args.out: + write_json(args.out, logs) + else: + print(json.dumps(logs, indent=2)) + return 0 + +def cmd_compare(args: argparse.Namespace) -> int: + a = load_json(args.a) + b = load_json(args.b) + diff = compare_bundles(a, b) + if diff: + print(json.dumps(diff, indent=2)) + return 1 + print(json.dumps({"equal": True})) + return 0 + +# ------------------------- +# Entry +# ------------------------- +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser(prog="console", add_help=True) + sub = p.add_subparsers(dest="cmd", required=True) + + p_step = sub.add_parser("step", aliases=["next"], help="run one turn deterministically") + p_step.add_argument("--snapshot", type=str, default=None, help="snapshot .json path (default: latest)") + p_step.add_argument("--now-ms", type=int, default=None, help=f"logical time (default: {DEFAULT_NOW_MS})") + p_step.add_argument("--input", type=str, default="", help="input text for the turn") + # T3 / LLM gating + p_step.add_argument("--t3", action="store_true", help="enable T3 (planner/dialogue/ops)") + p_step.add_argument("--t3-apply-ops", action="store_true", help="apply T3 ops to state (off by default)") + p_step.add_argument( + "--llm-mode", + choices=["mock", "replay", "live", "rulebased"], + default=os.environ.get("CLEMATIS_LLM_MODE", "mock"), + help="LLM backend mode (default: mock). 'rulebased' forces non-LLM speak", + ) + p_step.add_argument( + "--llm-cassette", + type=str, + default=os.environ.get("CLEMATIS_LLM_CASSETTE"), + help="Path to replay cassette when --llm-mode=replay", + ) + p_step.add_argument("--out", type=str, default=None, help="write logs to file (default: stdout)") + p_step.set_defaults(fn=cmd_step) + + p_reset = sub.add_parser("reset", help="load snapshot and reset state") + p_reset.add_argument("--snapshot", type=str, default=None) + p_reset.set_defaults(fn=cmd_reset) + + p_status = sub.add_parser("status", help="print scheduler/budgets summary") + p_status.add_argument("--snapshot", type=str, default=None) + p_status.set_defaults(fn=cmd_status) + + p_cmp = sub.add_parser("compare", help="diff two run_bundle.json files") + p_cmp.add_argument("--a", type=str, required=True) + p_cmp.add_argument("--b", type=str, required=True) + p_cmp.set_defaults(fn=cmd_compare) + + return p + +def main(argv: list[str] | None = None) -> int: + argv = argv if argv is not None else sys.argv[1:] + parser = build_parser() + ns = parser.parse_args(argv) + return ns.fn(ns) + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/clematis/scripts/export_logs_for_frontend.py b/clematis/scripts/export_logs_for_frontend.py index 881cf00..6bc971d 100644 --- a/clematis/scripts/export_logs_for_frontend.py +++ b/clematis/scripts/export_logs_for_frontend.py @@ -41,7 +41,11 @@ from clematis.io.paths import logs_dir as _default_logs_dir # type: ignore -STAGE_FILES = ("t1.jsonl", "t2.jsonl", "t4.jsonl", "apply.jsonl", "turn.jsonl") +STAGE_FILES = [ + "t1.jsonl", "t2.jsonl", + "t3.jsonl", "t3_plan.jsonl", "t3_dialogue.jsonl", "t3_reflection.jsonl", + "t4.jsonl", "apply.jsonl", "turn.jsonl", +] def _read_jsonl(path: str, max_entries: int | None = None): diff --git a/configs/config.yaml b/configs/config.yaml index 3ec02d8..efb2e6c 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -43,11 +43,12 @@ t2: trace_dir: "logs/quality" redact: true t3: + apply_ops: false max_rag_loops: 1 tokens: 256 temp: 0.7 max_ops_per_turn: 3 - allow_reflection: false + allow_reflection: true backend: rulebased reflection: backend: rulebased # deterministic summariser; "llm" requires fixtures (see PR84) diff --git a/configs/validate.py b/configs/validate.py index 389d960..5bc1cc2 100644 --- a/configs/validate.py +++ b/configs/validate.py @@ -273,6 +273,7 @@ def _deep_merge(dst: Dict[str, Any], src: Dict[str, Any]) -> Dict[str, Any]: "tokens", "temp", "allow_reflection", + "apply_ops", "dialogue", "policy", "llm", @@ -281,6 +282,8 @@ def _deep_merge(dst: Dict[str, Any], src: Dict[str, Any]) -> Dict[str, Any]: ALLOWED_T3_LLM = {"provider", "model", "endpoint", "max_tokens", "temp", "timeout_ms", "fixtures"} ALLOWED_T3_LLM_FIXTURES = {"enabled", "path"} ALLOWED_T3_REFLECTION = {"backend", "summary_tokens", "embed", "log", "topk_snippets"} +ALLOWED_T3_DIALOGUE = {"template", "include_top_k_snippets"} +ALLOWED_T3_POLICY = {"tau_high", "tau_low", "epsilon_edit"} ALLOWED_T4 = { "enabled", "delta_norm_cap_l2", @@ -461,6 +464,8 @@ def _validate_config_normalize_impl(cfg: Dict[str, Any]) -> Dict[str, Any]: raw_t3_llm = _ensure_dict(raw_t3.get("llm")) raw_t3_llm_fixtures = _ensure_dict(raw_t3_llm.get("fixtures")) raw_t3_reflection = _ensure_dict(raw_t3.get("reflection")) + raw_t3_dialogue = _ensure_dict(raw_t3.get("dialogue")) + raw_t3_policy = _ensure_dict(raw_t3.get("policy")) raw_t4 = _ensure_dict(cfg_in.get("t4")) raw_t1_cache = _ensure_dict(raw_t1.get("cache")) raw_t2_cache = _ensure_dict(raw_t2.get("cache")) @@ -543,6 +548,18 @@ def _validate_config_normalize_impl(cfg: Dict[str, Any]) -> Dict[str, Any]: hint = f" (did you mean '{sug}')" if sug else "" _err(errors, f"t3.reflection.{k}", f"unknown key{hint}") + for k in raw_t3_dialogue.keys(): + if k not in ALLOWED_T3_DIALOGUE: + sug = _suggest_key(k, ALLOWED_T3_DIALOGUE) + hint = f" (did you mean '{sug}')" if sug else "" + _err(errors, f"t3.dialogue.{k}", f"unknown key{hint}") + + for k in raw_t3_policy.keys(): + if k not in ALLOWED_T3_POLICY: + sug = _suggest_key(k, ALLOWED_T3_POLICY) + hint = f" (did you mean '{sug}')" if sug else "" + _err(errors, f"t3.policy.{k}", f"unknown key{hint}") + for k in raw_t4.keys(): if k not in ALLOWED_T4: sug = _suggest_key(k, ALLOWED_T4) @@ -920,6 +937,57 @@ def _validate_config_normalize_impl(cfg: Dict[str, Any]) -> Dict[str, Any]: t3["backend"] = t3_backend t3["allow_reflection"] = _coerce_bool(t3.get("allow_reflection", False)) + # optional gate: whether to apply ops from the T3 plan (defaults False for M14) + t3["apply_ops"] = _coerce_bool(t3.get("apply_ops", False)) + + # top-level dialogue sampling controls (separate from llm.* which configures the adapter) + t3["tokens"] = _coerce_int(t3.get("tokens", 256)) + if t3["tokens"] < 1: + _err(errors, "t3.tokens", "must be >= 1") + t3["temp"] = _coerce_float(t3.get("temp", 0.7)) + if not (0.0 <= t3["temp"] <= 1.0): + _err(errors, "t3.temp", "must be in [0,1]") + + # dialogue sub-block (template and snippet controls) + dlg = _ensure_subdict(t3, "dialogue") + if "template" in raw_t3_dialogue: + tpl = raw_t3_dialogue.get("template") + if not isinstance(tpl, str) or not tpl: + _err(errors, "t3.dialogue.template", "must be a non-empty string") + else: + dlg["template"] = tpl + if "include_top_k_snippets" in raw_t3_dialogue: + itks = _coerce_int(raw_t3_dialogue.get("include_top_k_snippets")) + if itks < 0: + _err(errors, "t3.dialogue.include_top_k_snippets", "must be >= 0") + else: + dlg["include_top_k_snippets"] = itks + t3["dialogue"] = dlg + + # policy sub-block (thresholds) + pol = _ensure_subdict(t3, "policy") + if "tau_high" in raw_t3_policy: + pol["tau_high"] = _coerce_float(raw_t3_policy.get("tau_high")) + if not (0.0 <= pol["tau_high"] <= 1.0): + _err(errors, "t3.policy.tau_high", "must be in [0,1]") + if "tau_low" in raw_t3_policy: + pol["tau_low"] = _coerce_float(raw_t3_policy.get("tau_low")) + if not (0.0 <= pol["tau_low"] <= 1.0): + _err(errors, "t3.policy.tau_low", "must be in [0,1]") + if "epsilon_edit" in raw_t3_policy: + pol["epsilon_edit"] = _coerce_float(raw_t3_policy.get("epsilon_edit")) + if not (0.0 <= pol["epsilon_edit"] <= 1.0): + _err(errors, "t3.policy.epsilon_edit", "must be in [0,1]") + # cross-field sanity: tau_high >= tau_low when both present + try: + th = pol.get("tau_high") + tl = pol.get("tau_low") + if th is not None and tl is not None and th < tl: + _err(errors, "t3.policy", "tau_high should be >= tau_low") + except Exception: + pass + t3["policy"] = pol + # t3.reflection normalization (PR77) rfl = _ensure_subdict(t3, "reflection") # backend choice diff --git a/docs/m14/frontend.md b/docs/m14/frontend.md index c3a7c5b..ffbf387 100644 --- a/docs/m14/frontend.md +++ b/docs/m14/frontend.md @@ -80,8 +80,27 @@ file://…/frontend/dist/index.html Then **Load** one or more `run_bundle.json` files via the **Runs** tab file picker (`#fileInput`). + **Where do bundles come from?** From the console (next section). +```markdown +### Quick path: use the packaged demo bundle + +If you have **clematis** installed, the viewer and a tiny demo bundle are packaged with it. Print their paths and open them directly: + +```bash +python - <<'PY' +from importlib.resources import files +print("viewer:", files("clematis").joinpath("frontend/dist/index.html")) +print("bundle:", files("clematis").joinpath("examples/run_bundles/run_demo_bundle.json")) +PY +``` + +Open the printed **viewer** path in your browser and use **Load** to select the printed **bundle**. The demo is deterministic and loads fully offline. + +See also: `clematis/examples/README_frontend.md` for details and a deterministic regeneration recipe. +``` + --- ## 5) Producing run bundles with the console @@ -158,5 +177,6 @@ python -m clematis console -- compare --a /tmp/run.json --b /tmp/run.json - Viewer sources: `frontend/index.html`, `frontend/src`, `frontend/tsdist/assets`, `frontend/styles/*`. - Build helper: `scripts/build_frontend.py`. - Repro: `scripts/repro_check_local.sh --frontend`. -- Tests: `tests/frontend/test_viewer_smoke.py`, `tests/frontend/test_offline_browser.py`, `tests/frontend/test_console_identity.py`. +- Examples: `clematis/examples/README_frontend.md` (demo bundle + how to regenerate). +- Tests: `tests/frontend/test_viewer_smoke.py`, `tests/frontend/test_offline_browser.py`, `tests/frontend/test_console_identity.py`, `tests/frontend/test_example_bundle.py`. - Related PRs: PR130 (TS viewer), PR131 (console), PR132 (offline & reproducibility). diff --git a/frontend/index.html b/frontend/index.html index 1c08154..4372860 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -7,12 +7,14 @@