diff --git a/docs/api/mcp-tools.md b/docs/api/mcp-tools.md index 12b7addd..f50a1afc 100755 --- a/docs/api/mcp-tools.md +++ b/docs/api/mcp-tools.md @@ -1,7 +1,7 @@ # MCP Tools Reference Complete reference for all Surreal-Memory MCP tools. -**53 tools** available via MCP stdio transport. +**56 tools** available via MCP stdio transport. !!! tip Tools are called as MCP tool calls, not CLI commands. In Claude Code, call `smem_recall` directly — do not run `smem recall` in terminal. @@ -71,6 +71,9 @@ Complete reference for all Surreal-Memory MCP tools. - [`smem_report_outcome`](#smem_report_outcome) - [`smem_budget`](#smem_budget) - [`smem_tier`](#smem_tier) + - [`smem_offload`](#smem_offload) + - [`smem_inflate`](#smem_inflate) + - [`smem_situation`](#smem_situation) --- @@ -94,6 +97,7 @@ Store a memory. Auto-detects type if not specified. Error resolution: when a new | `source_id` | string | No | — | Link this memory to a registered source. Creates a SOURCE_OF synapse for provenance tracking. | | `context` | object | No | — | Structured context dict merged into content server-side using type-specific templates. Keys like 'reason', 'alternati... | | `ephemeral` | boolean | No | — | Session-scoped memory: auto-expires after TTL (default 24h), never synced to cloud, excluded from consolidation. Use ... | +| `verbose_extraction` | boolean | No | — | Surface concept-extraction observability stats (dropped_short, dropped_noise, dropped_duplicate_entity) in the respon... | | `compact` | boolean | No | — | Return compact response (strip metadata hints, truncate lists). Saves 60-80% tokens. | | `token_budget` | integer | No | — | Max tokens for response. Progressively strips content to fit budget. | @@ -126,6 +130,7 @@ Query memories by semantic search with confidence ranking. | `mode` | string (`associative`, `exact`) | No | — | Recall mode: 'associative' (default) returns formatted context, 'exact' returns raw neuron contents verbatim without ... | | `include_citations` | boolean | No | default: true | Include citation and audit trail in exact recall results (default: true). | | `recall_token_budget` | integer | No | — | When set, activates budget-aware fiber selection: ranks fibers by value-per-token and selects the most efficient ones... | +| `prefer_recent` | boolean | No | — | Re-rank matched fibers newest-first (by time_end, fallback created_at). Use for queries about current state ('what's ... | | `permanent_only` | boolean | No | — | Exclude ephemeral (session-scoped) memories from results. Default: false (include all). | | `clean_for_prompt` | boolean | No | — | Return clean bullet-point text without section headers or neuron-type tags. Use when injecting recall output into pro... | | `tier` | string (`hot`, `warm`, `cold`) | No | — | Filter results by memory tier. Only return memories matching this tier. | @@ -803,6 +808,37 @@ Auto-tier management — promote/demote memories between HOT/WARM/COLD based on | `compact` | boolean | No | — | Return compact response (strip metadata hints, truncate lists). Saves 60-80% tokens. | | `token_budget` | integer | No | — | Max tokens for response. Progressively strips content to fit budget. | +### `smem_offload` + +Store a large tool result as an ephemeral neuron (24h TTL) and return a compact summary + ref_id. Use when tool output is large (>2KB) and you may need to inspect it again later without keeping it in context. Drill back into full content via smem_inflate(ref_id). + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `content` | string | Yes | — | Raw tool output to offload (≤100k chars) | +| `tool_name` | string | Yes | — | Name of the tool that produced this output (e.g. 'ls', 'grep') | +| `summary` | string | No | — | Caller-provided summary. If omitted, an auto-summary (first 200 chars + size hint) is generated. | +| `compact` | boolean | No | — | Return compact response (strip metadata hints, truncate lists). Saves 60-80% tokens. | +| `token_budget` | integer | No | — | Max tokens for response. Progressively strips content to fit budget. | + +### `smem_inflate` + +Retrieve full content of a previously offloaded tool result by its ref_id (returned from smem_offload). Returns the original raw content. Returns an error if the ref has expired or never existed. + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `ref_id` | string | Yes | — | ref_id returned by smem_offload | +| `compact` | boolean | No | — | Return compact response (strip metadata hints, truncate lists). Saves 60-80% tokens. | +| `token_budget` | integer | No | — | Max tokens for response. Progressively strips content to fit budget. | + +### `smem_situation` + +One-shot snapshot of the current working situation: active session task, top 3 recent decisions, open blockers, gap detection. Replaces smem_recap + multiple smem_recall calls when resuming a session. Pure read — never mutates state. + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `compact` | boolean | No | — | Return compact response (strip metadata hints, truncate lists). Saves 60-80% tokens. | +| `token_budget` | integer | No | — | Max tokens for response. Progressively strips content to fit budget. | + --- -*Auto-generated by `scripts/gen_mcp_docs.py` from `tool_schemas.py` — 53 tools.* +*Auto-generated by `scripts/gen_mcp_docs.py` from `tool_schemas.py` — 56 tools.* diff --git a/src/surreal_memory/cli/commands/config_cmd.py b/src/surreal_memory/cli/commands/config_cmd.py index 6cfd58c0..520a3fa8 100755 --- a/src/surreal_memory/cli/commands/config_cmd.py +++ b/src/surreal_memory/cli/commands/config_cmd.py @@ -13,7 +13,7 @@ def preset_cmd( name: Annotated[ str, - typer.Argument(help="Preset name: safe-cost, balanced, max-recall"), + typer.Argument(help="Preset name: safe-cost, balanced, max-recall, chat-heavy"), ] = "", list_available: Annotated[ bool, diff --git a/src/surreal_memory/config_presets.py b/src/surreal_memory/config_presets.py index 7fc1441a..08bc4649 100755 --- a/src/surreal_memory/config_presets.py +++ b/src/surreal_memory/config_presets.py @@ -1,6 +1,6 @@ """Static configuration presets for Surreal-Memory. -Three built-in profiles that configure brain behavior, maintenance, +Four built-in profiles that configure brain behavior, maintenance, and retrieval for different use cases. Presets are static dicts (not a plugin system) to keep the surface simple and predictable. @@ -82,16 +82,38 @@ }, } +CHAT_HEAVY: dict[str, dict[str, Any]] = { + "brain": { + "decay_rate": 0.15, + "reinforcement_delta": 0.05, + "activation_threshold": 0.25, + "max_spread_hops": 3, + "max_context_tokens": 800, + "freshness_weight": 0.25, + }, + "maintenance": { + "auto_consolidate": True, + "check_interval": 20, + "auto_consolidate_strategies": ["prune", "merge"], + "consolidate_cooldown_minutes": 20, + }, + "eternal": { + "max_context_tokens": 64_000, + }, +} + _PRESETS: dict[str, dict[str, dict[str, Any]]] = { "safe-cost": SAFE_COST, "balanced": BALANCED, "max-recall": MAX_RECALL, + "chat-heavy": CHAT_HEAVY, } _DESCRIPTIONS: dict[str, str] = { "safe-cost": "Lower token usage, faster decay, aggressive pruning", "balanced": "Default settings — good all-around performance", "max-recall": "Maximum retention, deeper retrieval, conservative pruning", + "chat-heavy": "Conversational agents (Telegram/Discord/Slack) — fast decay, recent-biased, compact", } diff --git a/src/surreal_memory/engine/encoder.py b/src/surreal_memory/engine/encoder.py index 64343932..c37a9996 100755 --- a/src/surreal_memory/engine/encoder.py +++ b/src/surreal_memory/engine/encoder.py @@ -64,6 +64,9 @@ class EncodingResult: neurons_created: List of newly created neurons neurons_linked: List of existing neuron IDs that were linked synapses_created: List of newly created synapses + extraction_stats: Optional concept-extraction counters when callers + opt in via verbose_extraction. Surface schema: + ``{"dropped_short", "dropped_noise", "dropped_duplicate_entity"}``. """ fiber: Fiber @@ -71,6 +74,7 @@ class EncodingResult: neurons_linked: list[str] synapses_created: list[Synapse] conflicts_detected: int = 0 + extraction_stats: dict[str, int] | None = None def build_default_pipeline( @@ -432,6 +436,11 @@ async def encode( neurons_linked=ctx.neurons_linked, synapses_created=ctx.synapses_created, conflicts_detected=ctx.conflicts_detected, + extraction_stats={ + "dropped_short": ctx.dropped_short, + "dropped_noise": ctx.dropped_noise, + "dropped_duplicate_entity": ctx.dropped_duplicate_entity, + }, ) async def _post_encode_neuro(self, anchor: Neuron) -> None: diff --git a/src/surreal_memory/engine/pipeline.py b/src/surreal_memory/engine/pipeline.py index d144158d..b5c1ab1f 100755 --- a/src/surreal_memory/engine/pipeline.py +++ b/src/surreal_memory/engine/pipeline.py @@ -72,6 +72,12 @@ class PipelineContext: # Entities stored here are first-mentions — not yet promoted to neurons deferred_entity_refs: list[str] = field(default_factory=list) + # Concept extraction observability — incremented by ExtractConceptNeuronsStep. + # Surfaced through EncodingResult.extraction_stats only when callers opt in. + dropped_short: int = 0 + dropped_noise: int = 0 + dropped_duplicate_entity: int = 0 + @runtime_checkable class PipelineStep(Protocol): diff --git a/src/surreal_memory/engine/pipeline_steps.py b/src/surreal_memory/engine/pipeline_steps.py index 59e1fd32..8edd7e9a 100755 --- a/src/surreal_memory/engine/pipeline_steps.py +++ b/src/surreal_memory/engine/pipeline_steps.py @@ -320,11 +320,14 @@ def _is_valid_concept(kw: str) -> bool: kw_lower = kw.lower() # Minimum 4 chars — 3-char words produce too many noise concepts ("ai", "os") if len(kw_lower) < 4: + ctx.dropped_short += 1 return False if kw_lower in _NOISE_CONCEPTS: + ctx.dropped_noise += 1 return False # Skip if already captured as an entity neuron if kw_lower in entity_content: + ctx.dropped_duplicate_entity += 1 return False return True diff --git a/src/surreal_memory/mcp/offload_handler.py b/src/surreal_memory/mcp/offload_handler.py new file mode 100644 index 00000000..fb7c50fd --- /dev/null +++ b/src/surreal_memory/mcp/offload_handler.py @@ -0,0 +1,202 @@ +"""MCP handler mixin for tool result offload tools. + +Phase 1 of agent-ergonomics: reduces context bloat by storing large tool +results as ephemeral neurons (24h TTL) and returning a compact ref + summary +that the agent can drill into via ``smem_inflate`` when needed. + +No LLM calls, no compression — pure store/lookup. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +from surreal_memory.core.neuron import Neuron, NeuronType +from surreal_memory.engine.token_budget import TOKEN_RATIO +from surreal_memory.mcp.tool_handler_utils import _get_brain_or_error + +if TYPE_CHECKING: + from surreal_memory.storage.base import NeuralStorage + from surreal_memory.unified_config import UnifiedConfig + +logger = logging.getLogger(__name__) + +# Hard cap on offload content — same ceiling as smem_remember to keep storage sane +_MAX_CONTENT_LEN = 100_000 + +# Caps on caller-controlled string fields (handler-side, schema is only advisory) +_MAX_TOOL_NAME_LEN = 100 +_MAX_EXPLICIT_SUMMARY_LEN = 500 + +# Preview length for auto-generated summaries +_SUMMARY_PREVIEW_LEN = 200 + +# Hard cap on the final summary string returned to caller — keeps the +# offload contract ("summary is small") true even with long tool_names. +_MAX_SUMMARY_LEN = 300 + + +def _estimate_tokens(content: str) -> int: + """Rough token estimate — uses whichever of (words x ratio) or (chars / 4) is larger. + + The dual estimate guards against pathological inputs (e.g. a 5000-char run + of identical bytes with no whitespace) where word count under-reports cost. + """ + words = len(content.split()) + word_based = int(words * TOKEN_RATIO) + char_based = len(content) // 4 # ~4 chars/token rule of thumb for English + return max(1, word_based, char_based) + + +def _build_summary(content: str, tool_name: str) -> str: + """Generate a compact preview + size hint for an offloaded payload. + + Output is hard-capped at ``_MAX_SUMMARY_LEN`` to keep the offload + contract honest regardless of tool_name length. + """ + preview = content[:_SUMMARY_PREVIEW_LEN].replace("\n", " ").strip() + if len(content) > _SUMMARY_PREVIEW_LEN: + preview += "…" + line_count = content.count("\n") + 1 + byte_count = len(content) + summary = f"[{tool_name}] {preview} (~{line_count} lines, {byte_count}B)" + if len(summary) > _MAX_SUMMARY_LEN: + summary = summary[: _MAX_SUMMARY_LEN - 1] + "…" + return summary + + +class OffloadHandler: + """Mixin: tool result offload + inflate tools.""" + + if TYPE_CHECKING: + config: UnifiedConfig + + async def get_storage(self) -> NeuralStorage: + raise NotImplementedError + + async def _offload(self, args: dict[str, Any]) -> dict[str, Any]: + """Store a large tool result as an ephemeral neuron, return a compact ref. + + Args: + content: Raw tool output to offload (required, ≤100k chars). The + content is sanitized for prompt-injection markers and run + through the auto-redactor before storage (same pipeline as + smem_remember) so leaked secrets in tool output are scrubbed. + tool_name: Name of the tool that produced the output (required, + truncated to 100 chars). + summary: Caller-provided summary (optional; auto-generated if + absent, max 500 chars). + (ttl is fixed at 24h via the ephemeral expiry handler — no + ttl_hours arg is accepted.) + + Returns: + ``{ref_id, summary, token_saved, redacted}`` on success, + ``{error}`` on failure. ``redacted`` is True when sensitive + content was scrubbed. + """ + content = args.get("content") + tool_name_raw = args.get("tool_name") or "unknown" + tool_name = str(tool_name_raw)[:_MAX_TOOL_NAME_LEN] + explicit_summary_raw = args.get("summary") + explicit_summary = ( + str(explicit_summary_raw)[:_MAX_EXPLICIT_SUMMARY_LEN] + if isinstance(explicit_summary_raw, str) + else None + ) + + if not content or not isinstance(content, str): + return {"error": "content is required and must be a non-empty string"} + if len(content) > _MAX_CONTENT_LEN: + return {"error": f"Content too long ({len(content)} chars). Max: {_MAX_CONTENT_LEN}."} + + try: + storage = await self.get_storage() + _brain, err = await _get_brain_or_error(storage) + if err: + return err + + # Defense in depth — tool output is a common vector for accidental + # secret capture (API keys in grep, tokens in curl logs, etc). + # Mirror the remember_handler safety pipeline. + from surreal_memory.safety.input_firewall import sanitize_explicit_content + from surreal_memory.safety.sensitive import auto_redact_content + + content = sanitize_explicit_content(content) + try: + redact_severity = int(self.config.safety.auto_redact_min_severity) + except (TypeError, ValueError, AttributeError): + redact_severity = 3 + redacted_content, redacted_matches, _hash = auto_redact_content( + content, min_severity=redact_severity + ) + redacted = bool(redacted_matches) + if redacted: + content = redacted_content + logger.info( + "smem_offload: auto-redacted %d sensitive matches for tool=%s", + len(redacted_matches), + tool_name, + ) + + summary = explicit_summary or _build_summary(content, tool_name) + token_estimate = _estimate_tokens(content) + summary_tokens = _estimate_tokens(summary) + token_saved = max(0, token_estimate - summary_tokens) + + neuron = Neuron.create( + type=NeuronType.CONCEPT, + content=content, + metadata={ + "_source": "tool_offload", + "_tool_name": tool_name, + "_summary": summary, + "_offload_token_estimate": token_estimate, + "_offload_redacted": redacted, + }, + ephemeral=True, + ) + await storage.add_neuron(neuron) + + return { + "ref_id": neuron.id, + "summary": summary, + "token_saved": token_saved, + "redacted": redacted, + } + except Exception: + logger.error("Offload failed for tool=%s", tool_name, exc_info=True) + return {"error": "Offload failed"} + + async def _inflate(self, args: dict[str, Any]) -> dict[str, Any]: + """Retrieve full content of a previously offloaded neuron by ref_id. + + Args: + ref_id: Neuron ID returned by ``smem_offload`` (required) + + Returns: + ``{content, tool_name, summary}`` on success, ``{error}`` on failure. + """ + ref_id = args.get("ref_id") + if not ref_id or not isinstance(ref_id, str): + return {"error": "ref_id is required and must be a string"} + + try: + storage = await self.get_storage() + neuron = await storage.get_neuron(ref_id) + if neuron is None: + return {"error": f"ref_id not found or expired: {ref_id}"} + + meta = neuron.metadata or {} + if meta.get("_source") != "tool_offload": + # Don't allow inflate to peek at arbitrary neurons — only offload payloads. + return {"error": f"ref_id {ref_id} is not an offloaded payload"} + + return { + "content": neuron.content, + "tool_name": meta.get("_tool_name", "unknown"), + "summary": meta.get("_summary", ""), + } + except Exception: + logger.error("Inflate failed for ref_id=%s", ref_id, exc_info=True) + return {"error": "Inflate failed"} diff --git a/src/surreal_memory/mcp/recall_handler.py b/src/surreal_memory/mcp/recall_handler.py index 65e5c467..915ca2c3 100755 --- a/src/surreal_memory/mcp/recall_handler.py +++ b/src/surreal_memory/mcp/recall_handler.py @@ -20,6 +20,39 @@ logger = logging.getLogger(__name__) +async def _rerank_by_recency(fiber_ids: list[str], storage: Any) -> list[str]: + """Re-order fiber IDs by recency (newest first). + + Sort key per fiber: ``time_end`` if set, else ``created_at``. Fibers that + can't be fetched fall to the end with a naive-UTC epoch sentinel (project + uses naive UTC throughout). + + Pure helper — no side effects, safe to call multiple times. + """ + from datetime import datetime as _dt + + epoch = _dt.min # noqa: DTZ901 — naive-UTC sentinel matches project datetime contract + + async def _ts(fid: str) -> _dt: + try: + fiber = await storage.get_fiber(fid) + except Exception: + return epoch + if fiber is None: + return epoch + end = getattr(fiber, "time_end", None) + if isinstance(end, _dt): + return end + created = getattr(fiber, "created_at", None) + if isinstance(created, _dt): + return created + return epoch + + pairs = [(fid, await _ts(fid)) for fid in fiber_ids] + pairs.sort(key=lambda p: p[1], reverse=True) + return [p[0] for p in pairs] + + class RecallHandler: """Mixin providing recall and context MCP tool handlers.""" @@ -402,6 +435,58 @@ async def _recall(self, args: dict[str, Any]) -> dict[str, Any]: except Exception: logger.debug("Post-filter (trust/tier) failed (non-critical)", exc_info=True) + # Optional prefer_recent re-rank (agent-ergonomics). + # Reorders surviving fibers newest-first AND rebuilds result.context so + # the answer text reflects the new order. Useful for "current state" + # queries where freshness matters more than activation strength. + prefer_recent_active = bool(args.get("prefer_recent", False)) and bool( + result.fibers_matched + ) + if prefer_recent_active: + try: + reranked = await _rerank_by_recency(list(result.fibers_matched), storage) + if hasattr(result, "_replace"): + result = result._replace(fibers_matched=reranked) + + # Rebuild context only when no budget pass is coming (the budget + # path handles its own ordering). Skip exact mode (no context). + budget_will_rebuild = args.get("recall_token_budget") is not None + if not budget_will_rebuild and recall_mode != "exact": + fibers_ordered: list[Any] = [] + for fid in reranked: + f = await storage.get_fiber(fid) + if f: + fibers_ordered.append(f) + if fibers_ordered: + from surreal_memory.engine.activation import ActivationResult + from surreal_memory.engine.retrieval_context import format_context + + acts: dict[str, ActivationResult] = {} + for co in result.co_activations: + for nid in co.neuron_ids: + acts.setdefault( + nid, + ActivationResult( + neuron_id=nid, + activation_level=co.binding_strength, + hop_distance=0, + path=[nid], + source_anchor=nid, + ), + ) + new_ctx, _ = await format_context( + storage=storage, + activations=acts, + fibers=fibers_ordered, + max_tokens=max_tokens, + brain_id=brain_id, + clean_for_prompt=clean_for_prompt, + ) + if new_ctx and hasattr(result, "_replace"): + result = result._replace(context=new_ctx) + except Exception: + logger.debug("prefer_recent rerank failed, keeping default order", exc_info=True) + # Exact mode: return raw neuron contents without truncation if recall_mode == "exact" and result.fibers_matched: exact_items: list[dict[str, Any]] = [] diff --git a/src/surreal_memory/mcp/remember_handler.py b/src/surreal_memory/mcp/remember_handler.py index bbbb461c..b26ae448 100755 --- a/src/surreal_memory/mcp/remember_handler.py +++ b/src/surreal_memory/mcp/remember_handler.py @@ -722,6 +722,10 @@ async def _remember(self, args: dict[str, Any]) -> dict[str, Any]: if alert_info: response.update(alert_info) + # Opt-in concept-extraction observability (c088eaf) — drop counts from pipeline_steps + if bool(args.get("verbose_extraction", False)) and result.extraction_stats is not None: + response["extraction_stats"] = result.extraction_stats + return response async def _remember_batch(self, args: dict[str, Any]) -> dict[str, Any]: diff --git a/src/surreal_memory/mcp/server.py b/src/surreal_memory/mcp/server.py index 967b5bc8..7ee137a7 100755 --- a/src/surreal_memory/mcp/server.py +++ b/src/surreal_memory/mcp/server.py @@ -24,6 +24,7 @@ import json import logging import sys +from pathlib import Path from typing import TYPE_CHECKING, Any from surreal_memory import __version__ @@ -268,6 +269,9 @@ async def call_tool(self, name: str, arguments: dict[str, Any]) -> dict[str, Any "smem_show": self._show, "smem_source": self._source, "smem_provenance": self._provenance, + "smem_offload": self._offload, + "smem_inflate": self._inflate, + "smem_situation": self._situation, "smem_edit": self._edit, "smem_forget": self._forget, "smem_consolidate": self._consolidate, @@ -533,6 +537,29 @@ async def handle_message(server: MCPServer, message: dict[str, Any]) -> dict[str _MAX_MESSAGE_SIZE = 10 * 1024 * 1024 # 10 MB +def _running_under_plugin() -> bool: + """Detect whether this MCP server was launched from a Claude Code plugin. + + Plugins ship their own ``hooks.json`` that Claude Code loads directly, so the + MCP server must NOT also inject hooks into ``~/.claude/settings.json`` — else + every hook fires twice (issue #169). + + Heuristic: the plugin cache lives at + ``~/.claude/plugins/cache//surreal-memory/``. If that directory + exists, the plugin is installed and owns hook registration. + """ + cache_root = Path.home() / ".claude" / "plugins" / "cache" + if not cache_root.is_dir(): + return False + try: + for marketplace_dir in cache_root.iterdir(): + if (marketplace_dir / "surreal-memory").is_dir(): + return True + except OSError: + return False + return False + + def _lazy_init() -> None: """Run first-time setup if Surreal-Memory has never been initialized. @@ -552,8 +579,14 @@ def _lazy_init() -> None: try: setup_config(data_dir) setup_brain(data_dir) - hook_status = setup_hooks_claude() - logger.info("Surreal-Memory: first-time auto-init complete (hook: %s)", hook_status) + if _running_under_plugin(): + logger.info( + "Surreal-Memory: first-time auto-init complete (plugin detected, " + "skipping hook injection — plugin hooks.json owns registration)" + ) + else: + hook_status = setup_hooks_claude() + logger.info("Surreal-Memory: first-time auto-init complete (hook: %s)", hook_status) except Exception: logger.debug("Surreal-Memory: auto-init failed (non-critical)", exc_info=True) diff --git a/src/surreal_memory/mcp/session_handler.py b/src/surreal_memory/mcp/session_handler.py index 9aa13d31..3ea826ed 100755 --- a/src/surreal_memory/mcp/session_handler.py +++ b/src/surreal_memory/mcp/session_handler.py @@ -225,6 +225,125 @@ async def _session_end(self, storage: NeuralStorage) -> dict[str, Any]: return {"active": False, "summary": summary, "message": "Session ended and summary saved"} + # ── SITUATION (agent-ergonomics) ── + + async def _resolve_content(self, tm: TypedMemory, storage: NeuralStorage) -> str: + """Resolve display content for a TypedMemory. + + Tries metadata['content'] first (cheap), then fiber summary, then the + anchor neuron's content. Returns empty string on any failure. + """ + cached = tm.metadata.get("content") if tm.metadata else None + if isinstance(cached, str) and cached: + return cached + try: + fiber = await storage.get_fiber(tm.fiber_id) + if fiber is None: + return "" + summary = getattr(fiber, "summary", None) + if isinstance(summary, str) and summary: + return summary + anchor_id = getattr(fiber, "anchor_neuron_id", None) + if anchor_id: + neuron = await storage.get_neuron(anchor_id) + if neuron and neuron.content: + return neuron.content + except Exception: + logger.debug("Situation: fiber fetch failed for %s", tm.fiber_id, exc_info=True) + return "" + + async def _situation(self, _args: dict[str, Any]) -> dict[str, Any]: + """Return a one-shot snapshot of the current working situation. + + Aggregates active session state, top recent decisions, and open blockers + in a single response so agents can resume context without chaining + ``smem_recap`` + multiple ``smem_recall`` calls. + + Pure read — never mutates session state. + """ + storage = await self.get_storage() + + # 1) Active session metadata (if any) — reuses gap detection + active_meta: dict[str, Any] | None = None + gap_detected = False + try: + session = await self._find_current_session(storage) + if session and session.metadata.get("active", True): + active_meta = session.metadata + else: + gap_detected = await self._check_session_gap(storage) + except Exception: + logger.debug("Situation: session lookup failed", exc_info=True) + + # 2) Recent decisions — top 3 sorted by created_at desc + recent_decisions: list[dict[str, Any]] = [] + try: + decisions = await storage.find_typed_memories( + memory_type=MemoryType.DECISION, + limit=20, + ) + decisions_sorted = sorted(decisions, key=lambda m: m.created_at, reverse=True) + for d in decisions_sorted[:3]: + recent_decisions.append( + { + "content": await self._resolve_content(d, storage), + "created_at": d.created_at.isoformat(), + } + ) + except Exception: + logger.debug("Situation: decision lookup failed", exc_info=True) + + # 3) Open blockers — TODO with "blocker" tag, no "resolved" tag + open_blockers: list[dict[str, Any]] = [] + try: + blockers = await storage.find_typed_memories( + memory_type=MemoryType.TODO, + tags={"blocker"}, + limit=20, + ) + for b in blockers: + if "resolved" in b.tags: + continue + open_blockers.append( + { + "content": await self._resolve_content(b, storage), + "tag": "blocker", + } + ) + except Exception: + logger.debug("Situation: blocker lookup failed", exc_info=True) + + # files_in_session is contract-stable empty until session metadata tracks + # edited files (out of scope here). Keep the key so consumers can rely on it. + result: dict[str, Any] = { + "active_task": (active_meta or {}).get("task") or None, + "active_feature": (active_meta or {}).get("feature") or None, + "session_started_at": (active_meta or {}).get("started_at") or None, + "recent_decisions": recent_decisions, + "open_blockers": open_blockers, + "files_in_session": [], + "gap_detected": gap_detected, + } + + # Compose a short suggestion based on what was found + if gap_detected: + result["suggestion"] = ( + "Gap detected — run smem_auto(action='flush') with recent conversation." + ) + elif active_meta is None and not recent_decisions: + result["suggestion"] = ( + "Fresh brain — call smem_session(action='set', feature='...', task='...') " + "to anchor your work." + ) + elif open_blockers: + result["suggestion"] = ( + f"{len(open_blockers)} open blocker(s) — review before starting new work." + ) + else: + result["suggestion"] = "Situation looks clean — continue working." + + return result + # ── Fingerprint helpers ── @staticmethod @@ -301,15 +420,25 @@ async def _check_session_gap(self, storage: NeuralStorage) -> bool: # Compare timestamps: if summary is newer than fingerprint, gap detected fp_saved = fingerprints[0].metadata.get("saved_at", "") - summary_created = summaries[0].created_at if hasattr(summaries[0], "created_at") else "" + summary_created = ( + summaries[0].created_at if hasattr(summaries[0], "created_at") else None + ) - if not fp_saved or not summary_created: + if not fp_saved or summary_created is None: return False - # If the fingerprint and summary are from the same session_end call, - # they'll have very close timestamps — no gap. - # A gap means work happened AFTER the last fingerprint was saved. - return False + try: + fp_dt = datetime.fromisoformat(fp_saved) if isinstance(fp_saved, str) else fp_saved + except (TypeError, ValueError): + return False + if not isinstance(fp_dt, datetime) or not isinstance(summary_created, datetime): + return False + + # Same session_end pair → near-identical timestamps. A gap is a + # summary created well after the fingerprint (≥60s slack to absorb + # write skew within the same session_end transaction). + delta = (summary_created - fp_dt).total_seconds() + return delta >= 60 except Exception: logger.debug("Session gap check failed", exc_info=True) diff --git a/src/surreal_memory/mcp/tool_handlers.py b/src/surreal_memory/mcp/tool_handlers.py index 5836c869..1d31a2b3 100755 --- a/src/surreal_memory/mcp/tool_handlers.py +++ b/src/surreal_memory/mcp/tool_handlers.py @@ -30,6 +30,7 @@ from surreal_memory.mcp.evolution_handler import EvolutionHandler from surreal_memory.mcp.instruction_handler import InstructionHandler from surreal_memory.mcp.lifecycle_handler import LifecycleHandler +from surreal_memory.mcp.offload_handler import OffloadHandler from surreal_memory.mcp.provenance_handler import ProvenanceHandler from surreal_memory.mcp.recall_handler import RecallHandler from surreal_memory.mcp.remember_handler import RememberHandler @@ -54,6 +55,7 @@ class ToolHandler( InstructionHandler, BudgetHandler, TierHandler, + OffloadHandler, RememberHandler, RecallHandler, ): diff --git a/src/surreal_memory/mcp/tool_schemas.py b/src/surreal_memory/mcp/tool_schemas.py index a5e32c17..863a4735 100755 --- a/src/surreal_memory/mcp/tool_schemas.py +++ b/src/surreal_memory/mcp/tool_schemas.py @@ -24,6 +24,7 @@ "smem_recap", "smem_todo", "smem_session", + "smem_situation", "smem_auto", "smem_eternal", } @@ -194,6 +195,12 @@ def get_tool_schemas_for_tier(tier: str) -> list[dict[str, Any]]: "never synced to cloud, excluded from consolidation. " "Use for scratch notes, debugging context, temporary reasoning.", }, + "verbose_extraction": { + "type": "boolean", + "description": "Surface concept-extraction observability stats " + "(dropped_short, dropped_noise, dropped_duplicate_entity) in the response. " + "Default: false. Useful for debugging the noise filter or measuring memory hygiene.", + }, }, "required": ["content"], }, @@ -348,6 +355,13 @@ def get_tool_schemas_for_tier(tier: str) -> list[dict[str, Any]]: "and selects the most efficient ones to fit within this budget. " "Adds budget_stats to the response. Default: not set (uses standard sequential truncation).", }, + "prefer_recent": { + "type": "boolean", + "description": "Re-rank matched fibers newest-first (by time_end, fallback created_at). " + "Use for queries about current state ('what's the current version', 'today's status'). " + "Do NOT use for historical questions ('how did we design X') — recency bias will mislead. " + "Default: false.", + }, "permanent_only": { "type": "boolean", "description": "Exclude ephemeral (session-scoped) memories from results. Default: false (include all).", @@ -1937,4 +1951,61 @@ def get_tool_schemas_for_tier(tier: str) -> list[dict[str, Any]]: "required": ["action"], }, }, + { + "name": "smem_offload", + "description": "Store a large tool result as an ephemeral neuron (24h TTL) " + "and return a compact summary + ref_id. Use when tool output is large " + "(>2KB) and you may need to inspect it again later without keeping it " + "in context. Drill back into full content via smem_inflate(ref_id).", + "inputSchema": { + "type": "object", + "properties": { + "content": { + "type": "string", + "maxLength": 100000, + "description": "Raw tool output to offload (≤100k chars)", + }, + "tool_name": { + "type": "string", + "maxLength": 100, + "description": "Name of the tool that produced this output (e.g. 'ls', 'grep')", + }, + "summary": { + "type": "string", + "maxLength": 500, + "description": "Caller-provided summary. If omitted, an auto-summary " + "(first 200 chars + size hint) is generated.", + }, + }, + "required": ["content", "tool_name"], + }, + }, + { + "name": "smem_inflate", + "description": "Retrieve full content of a previously offloaded tool result " + "by its ref_id (returned from smem_offload). Returns the original raw content. " + "Returns an error if the ref has expired or never existed.", + "inputSchema": { + "type": "object", + "properties": { + "ref_id": { + "type": "string", + "description": "ref_id returned by smem_offload", + }, + }, + "required": ["ref_id"], + }, + }, + { + "name": "smem_situation", + "description": "One-shot snapshot of the current working situation: " + "active session task, top 3 recent decisions, open blockers, gap detection. " + "Replaces smem_recap + multiple smem_recall calls when resuming a session. " + "Pure read — never mutates state.", + "inputSchema": { + "type": "object", + "properties": {}, + "required": [], + }, + }, ] diff --git a/src/surreal_memory/unified_config.py b/src/surreal_memory/unified_config.py index 9694e8e3..d75114ad 100755 --- a/src/surreal_memory/unified_config.py +++ b/src/surreal_memory/unified_config.py @@ -13,6 +13,7 @@ from __future__ import annotations import asyncio +import dataclasses import json import logging import os @@ -217,7 +218,16 @@ def _load_embedding_settings(data: dict[str, Any]) -> EmbeddingSettings: @dataclass class BrainSettings: - """Settings for brain behavior.""" + """Settings for brain behavior. + + The explicit fields are the historical keys exposed via ``[brain]`` in + ``config.toml``. ``extras`` captures any additional ``[brain]`` keys that map + onto ``core.brain.BrainConfig`` fields added after this class was first + defined (e.g. ``tag_match_boost``, ``rrf_k``, …), so new BrainConfig knobs + become config-toml-controllable without a parallel field for each one + (issue #168). Unknown keys are filtered against BrainConfig's field set, so + typos in ``config.toml`` do not crash brain creation. + """ decay_rate: float = 0.1 reinforcement_delta: float = 0.05 @@ -225,6 +235,18 @@ class BrainSettings: max_spread_hops: int = 4 max_context_tokens: int = 1500 freshness_weight: float = 0.0 + extras: dict[str, Any] = field(default_factory=dict) + + _EXPLICIT_KEYS: ClassVar[frozenset[str]] = frozenset( + { + "decay_rate", + "reinforcement_delta", + "activation_threshold", + "max_spread_hops", + "max_context_tokens", + "freshness_weight", + } + ) def to_dict(self) -> dict[str, Any]: return { @@ -234,10 +256,12 @@ def to_dict(self) -> dict[str, Any]: "max_spread_hops": self.max_spread_hops, "max_context_tokens": self.max_context_tokens, "freshness_weight": self.freshness_weight, + **self.extras, } @classmethod def from_dict(cls, data: dict[str, Any]) -> BrainSettings: + extras = {k: v for k, v in data.items() if k not in cls._EXPLICIT_KEYS} return cls( decay_rate=data.get("decay_rate", 0.1), reinforcement_delta=data.get("reinforcement_delta", 0.05), @@ -245,8 +269,53 @@ def from_dict(cls, data: dict[str, Any]) -> BrainSettings: max_spread_hops=data.get("max_spread_hops", 4), max_context_tokens=data.get("max_context_tokens", 1500), freshness_weight=data.get("freshness_weight", 0.0), + extras=extras, ) + def to_brain_config_kwargs(self, embedding: EmbeddingSettings | None = None) -> dict[str, Any]: + """Build kwargs for ``core.brain.BrainConfig`` from this settings instance. + + Combines the explicit BrainSettings fields, embedding-derived fields, and + any ``extras`` keys that match a real BrainConfig field name. Unknown + extras are dropped so ``BrainConfig(**kwargs)`` is safe. + """ + from surreal_memory.core.brain import BrainConfig + + valid_fields = {f.name for f in dataclasses.fields(BrainConfig)} + kwargs: dict[str, Any] = { + "decay_rate": self.decay_rate, + "reinforcement_delta": self.reinforcement_delta, + "activation_threshold": self.activation_threshold, + "max_spread_hops": self.max_spread_hops, + "max_context_tokens": self.max_context_tokens, + "freshness_weight": self.freshness_weight, + } + if embedding is not None: + kwargs.update( + { + "embedding_enabled": embedding.enabled, + "embedding_provider": embedding.provider, + "embedding_model": embedding.model, + "embedding_similarity_threshold": embedding.similarity_threshold, + } + ) + for key, value in self.extras.items(): + if key in valid_fields and key not in kwargs: + kwargs[key] = value + return kwargs + + def runtime_overrides(self) -> dict[str, Any]: + """Return only ``extras`` keys that match real BrainConfig fields. + + Used by storage init to layer ``config.toml [brain]`` over a + previously-stored brain config on upgrade (issue #168). The explicit + fields are excluded because legacy brains may have customized them. + """ + from surreal_memory.core.brain import BrainConfig + + valid_fields = {f.name for f in dataclasses.fields(BrainConfig)} + return {key: value for key, value in self.extras.items() if key in valid_fields} + @dataclass class EternalConfig: @@ -1920,6 +1989,50 @@ async def list_available_brains() -> list[str]: return config.list_brains() +async def _migrate_brain_runtime_config( + storage: NeuralStorage, + brain: Any, + config: UnifiedConfig, +) -> None: + """Layer ``config.toml [brain]`` extras over an already-stored brain.config. + + Brains created on older versions store BrainConfig fields that existed at + creation time; newer fields fall back to BrainConfig defaults on load, so a + ``[brain]`` knob set in ``config.toml`` is silently ignored (issue #168). + This applies any ``extras`` keys from ``BrainSettings`` to the stored brain + config and persists the patched brain. Only ``extras`` keys are applied — the + explicit fields are left untouched because legacy brains may carry per-brain + customizations there. Failures are logged and swallowed — config migration + must never break recall. + """ + try: + overrides = config.brain.runtime_overrides() + if not overrides: + return + + from surreal_memory.utils.timeutils import utcnow + + current = {f.name: getattr(brain.config, f.name) for f in dataclasses.fields(brain.config)} + diff = {k: v for k, v in overrides.items() if current.get(k) != v} + if not diff: + return + + patched_config = dataclasses.replace(brain.config, **diff) + patched_brain = dataclasses.replace(brain, config=patched_config, updated_at=utcnow()) + await storage.save_brain(patched_brain) + logger.info( + "Brain %r config migrated from config.toml [brain] extras: %s", + brain.name, + sorted(diff.keys()), + ) + except Exception: + logger.debug( + "Brain runtime config migration failed for %r (non-fatal)", + getattr(brain, "name", "?"), + exc_info=True, + ) + + async def _get_sqlite_storage( config: UnifiedConfig, name: str, @@ -1965,20 +2078,11 @@ async def _get_sqlite_storage( if brain is None: from surreal_memory.core.brain import BrainConfig - brain_config = BrainConfig( - decay_rate=config.brain.decay_rate, - reinforcement_delta=config.brain.reinforcement_delta, - activation_threshold=config.brain.activation_threshold, - max_spread_hops=config.brain.max_spread_hops, - max_context_tokens=config.brain.max_context_tokens, - freshness_weight=config.brain.freshness_weight, - embedding_enabled=config.embedding.enabled, - embedding_provider=config.embedding.provider, - embedding_model=config.embedding.model, - embedding_similarity_threshold=config.embedding.similarity_threshold, - ) + brain_config = BrainConfig(**config.brain.to_brain_config_kwargs(config.embedding)) brain = Brain.create(name=name, config=brain_config, brain_id=name) await storage.save_brain(brain) + else: + await _migrate_brain_runtime_config(storage, brain, config) storage.set_brain(brain.id) _storage_cache[cache_key] = storage @@ -2035,6 +2139,8 @@ async def _get_surrealdb_storage(config: UnifiedConfig, name: str) -> NeuralStor if brain is None: brain = Brain.create(name, brain_id=name) await storage.save_brain(brain) + else: + await _migrate_brain_runtime_config(storage, brain, config) # Brains are addressed by name in this store (neurons carry brain_id as a # plain string), so the brain context stays the name — never brain.id, diff --git a/tests/unit/test_config_presets.py b/tests/unit/test_config_presets.py index 7736eaab..80c3521c 100755 --- a/tests/unit/test_config_presets.py +++ b/tests/unit/test_config_presets.py @@ -19,9 +19,9 @@ class TestListPresets: - def test_returns_three_presets(self) -> None: + def test_returns_all_presets(self) -> None: presets = list_presets() - assert len(presets) == 3 + assert len(presets) == 4 def test_each_has_name_and_description(self) -> None: for p in list_presets(): @@ -34,6 +34,7 @@ def test_names_are_expected(self) -> None: assert "safe-cost" in names assert "balanced" in names assert "max-recall" in names + assert "chat-heavy" in names class TestGetPreset: diff --git a/tests/unit/test_mcp.py b/tests/unit/test_mcp.py index 85016b43..7d2a0809 100755 --- a/tests/unit/test_mcp.py +++ b/tests/unit/test_mcp.py @@ -44,7 +44,7 @@ def test_get_tools(self, server: MCPServer) -> None: with patch("surreal_memory.plugins.get_plugin_tools", return_value=[]): tools = server.get_tools() - assert len(tools) == 53 + assert len(tools) == 56 tool_names = {tool["name"] for tool in tools} assert tool_names == { "smem_remember", @@ -89,6 +89,9 @@ def test_get_tools(self, server: MCPServer) -> None: "smem_show", "smem_source", "smem_provenance", + "smem_offload", + "smem_inflate", + "smem_situation", "smem_edit", "smem_forget", "smem_consolidate", @@ -1053,7 +1056,7 @@ async def test_tools_list_message(self, server: MCPServer) -> None: assert response["id"] == 2 assert "result" in response assert "tools" in response["result"] - assert len(response["result"]["tools"]) == 53 + assert len(response["result"]["tools"]) == 56 @pytest.mark.asyncio async def test_tools_call_message(self, server: MCPServer) -> None: diff --git a/tests/unit/test_tool_tiers.py b/tests/unit/test_tool_tiers.py index a78eacaf..a3b8642a 100755 --- a/tests/unit/test_tool_tiers.py +++ b/tests/unit/test_tool_tiers.py @@ -65,7 +65,7 @@ class TestToolTiers: def test_full_tier_returns_all(self) -> None: tools = get_tool_schemas_for_tier("full") - assert len(tools) == 53 + assert len(tools) == 56 def test_full_tier_matches_get_tool_schemas(self) -> None: full = get_tool_schemas_for_tier("full") @@ -75,7 +75,7 @@ def test_full_tier_matches_get_tool_schemas(self) -> None: def test_standard_tier_count(self) -> None: tools = get_tool_schemas_for_tier("standard") - assert len(tools) == 9 + assert len(tools) == 10 def test_standard_tier_correct_names(self) -> None: tools = get_tool_schemas_for_tier("standard") @@ -88,6 +88,7 @@ def test_standard_tier_correct_names(self) -> None: "smem_recap", "smem_todo", "smem_session", + "smem_situation", "smem_auto", "smem_eternal", } @@ -108,7 +109,7 @@ def test_minimal_tier_correct_names(self) -> None: def test_invalid_tier_defaults_to_full(self) -> None: tools = get_tool_schemas_for_tier("bogus") - assert len(tools) == 53 + assert len(tools) == 56 def test_tier_hierarchy_minimal_subset_of_standard(self) -> None: assert TOOL_TIERS["minimal"] < TOOL_TIERS["standard"] @@ -133,13 +134,13 @@ def test_get_tool_schemas_returns_copy(self) -> None: a = get_tool_schemas() b = get_tool_schemas() a.pop() - assert len(b) == 53 + assert len(b) == 56 def test_get_tool_schemas_for_tier_returns_copy(self) -> None: a = get_tool_schemas_for_tier("standard") b = get_tool_schemas_for_tier("standard") a.pop() - assert len(b) == 9 + assert len(b) == 10 class TestServerTierIntegration: @@ -159,12 +160,12 @@ def _make_server(self, tier: str) -> MCPServer: # noqa: F821 def test_server_full_tier(self) -> None: server = self._make_server("full") with patch("surreal_memory.plugins.get_plugin_tools", return_value=[]): - assert len(server.get_tools()) == 53 + assert len(server.get_tools()) == 56 def test_server_standard_tier(self) -> None: server = self._make_server("standard") with patch("surreal_memory.plugins.get_plugin_tools", return_value=[]): - assert len(server.get_tools()) == 9 + assert len(server.get_tools()) == 10 def test_server_minimal_tier(self) -> None: server = self._make_server("minimal")