From 6c48d266a50f1a6cc9a94f98d053a677f4135ff4 Mon Sep 17 00:00:00 2001 From: Gaodan Fang Date: Fri, 8 May 2026 11:48:05 -0400 Subject: [PATCH 1/2] refactor(mcp): relocate user-facts logic out of EvolveClient Follow the save_trajectory / get_guidelines pattern so EvolveClient stays a pure CRUD layer and the LLM module stays pure LLM. After the refactor, all write-tools converge on client.update_entities and all read-tools on client.search_entities, which sets up the shared MCP-tool-wrapping helper from the original issue as a trivial follow-up. - Add categorize_facts() pure helper in altk_evolve/llm/fact_extraction - Remove store_user_facts / retrieve_user_facts from EvolveClient - Rewrite the matching MCP tools to orchestrate directly: store via extract_facts_from_messages + client.update_entities, retrieve via client.search_entities + the existing fallback chain + categorize_facts. JSON envelopes and tool signatures unchanged. - Move the fact tests from test_client.py into test_mcp_server.py, update mocks to target the CRUD methods, add parametrized coverage for None/""/whitespace messages and non-positive-limit early returns. Refs #241 --- altk_evolve/frontend/client/evolve_client.py | 95 ------------ altk_evolve/frontend/mcp/mcp_server.py | 117 +++++++++++++-- .../llm/fact_extraction/fact_extraction.py | 24 +++ tests/unit/test_client.py | 49 ------- tests/unit/test_mcp_server.py | 137 ++++++++++++++---- 5 files changed, 236 insertions(+), 186 deletions(-) diff --git a/altk_evolve/frontend/client/evolve_client.py b/altk_evolve/frontend/client/evolve_client.py index f6c659c0..12074511 100644 --- a/altk_evolve/frontend/client/evolve_client.py +++ b/altk_evolve/frontend/client/evolve_client.py @@ -1,9 +1,7 @@ import logging -from typing import Any from altk_evolve.backend.base import BaseEntityBackend from altk_evolve.config.evolve import EvolveConfig -from altk_evolve.llm.fact_extraction.fact_extraction import ExtractedFact, extract_facts_from_messages from altk_evolve.schema.conflict_resolution import EntityUpdate from altk_evolve.schema.core import Entity, Namespace, RecordedEntity from altk_evolve.schema.exceptions import NamespaceAlreadyExistsException, NamespaceNotFoundException @@ -245,96 +243,3 @@ def ensure_namespace(self, namespace_id: str) -> Namespace: return self.create_namespace(namespace_id) except NamespaceAlreadyExistsException: return self.get_namespace_details(namespace_id) - - def store_user_facts( - self, - namespace_id: str, - message: str, - user_id: str, - metadata: dict[str, Any] | None = None, - enable_conflict_resolution: bool = False, - ) -> list[EntityUpdate]: - """Extract facts from a user utterance and persist them as `fact` entities.""" - message = (message or "").strip() - if not message: - return [] - - self.ensure_namespace(namespace_id) - - base_metadata: dict[str, Any] = dict(metadata or {}) - base_metadata["user_id"] = user_id - - extracted = extract_facts_from_messages([{"role": "user", "content": message}]) - entities: list[Entity] = [] - for one in extracted: - if isinstance(one, ExtractedFact): - fact_metadata = dict(base_metadata) - fact_metadata["category"] = one.category - fact_metadata["key"] = one.key - fact_metadata["value"] = one.value - entities.append(Entity(type="fact", content=one.content, metadata=fact_metadata)) - else: - entities.append(Entity(type="fact", content=str(one), metadata=dict(base_metadata))) - - if not entities: - return [] - - return self.update_entities( - namespace_id=namespace_id, - entities=entities, - enable_conflict_resolution=enable_conflict_resolution, - ) - - def retrieve_user_facts( - self, - namespace_id: str, - user_id: str, - query: str | None = None, - limit: int = 5, - ) -> dict[str, list[dict[str, Any]]]: - """Retrieve categorized user facts for prompt/context usage.""" - if limit <= 0 or not self.namespace_exists(namespace_id): - return {} - - facts = self.search_entities( - namespace_id=namespace_id, - query=query, - filters={"type": "fact", "metadata.user_id": user_id}, - limit=limit, - ) - if query and not facts: - facts = self.search_entities( - namespace_id=namespace_id, - query=None, - filters={"type": "fact", "metadata.user_id": user_id}, - limit=limit, - ) - if not facts and user_id != "default": - facts = self.search_entities( - namespace_id=namespace_id, - query=query, - filters={"type": "fact", "metadata.user_id": "default"}, - limit=limit, - ) - if query and not facts: - facts = self.search_entities( - namespace_id=namespace_id, - query=None, - filters={"type": "fact", "metadata.user_id": "default"}, - limit=limit, - ) - - categorized_preferences: dict[str, list[dict[str, Any]]] = {} - for fact in facts: - metadata = fact.metadata or {} - category = str(metadata.get("category") or "misc") - categorized_preferences.setdefault(category, []).append( - { - "id": fact.id, - "content": str(fact.content), - "key": metadata.get("key"), - "value": metadata.get("value"), - } - ) - - return categorized_preferences diff --git a/altk_evolve/frontend/mcp/mcp_server.py b/altk_evolve/frontend/mcp/mcp_server.py index 34dfa6f6..68abd01b 100644 --- a/altk_evolve/frontend/mcp/mcp_server.py +++ b/altk_evolve/frontend/mcp/mcp_server.py @@ -20,6 +20,11 @@ from altk_evolve.config.evolve import evolve_config from altk_evolve.frontend.client.evolve_client import EvolveClient from altk_evolve.frontend.api.routes import router as api_router +from altk_evolve.llm.fact_extraction.fact_extraction import ( + ExtractedFact, + categorize_facts, + extract_facts_from_messages, +) from altk_evolve.llm.guidelines.guidelines import generate_guidelines from altk_evolve.schema.core import Entity, RecordedEntity from altk_evolve.schema.exceptions import EvolveException, NamespaceNotFoundException @@ -278,6 +283,10 @@ def get_guidelines( return get_entities_logic(task, "guideline", user_id=user_id, namespace_id=namespace_id, session_id=session_id) +def _empty_store_user_facts_response(user_id: str) -> str: + return json.dumps({"user_id": user_id, "stored_count": 0, "updates": []}) + + @mcp.tool() def store_user_facts( user_id: str, @@ -297,13 +306,44 @@ def store_user_facts( } ) - updates = get_client().store_user_facts( - namespace_id=evolve_config.namespace_id, - message=message, - user_id=user_id, - metadata=metadata_dict, - enable_conflict_resolution=enable_conflict_resolution, - ) + trimmed_message = (message or "").strip() + if not trimmed_message: + return _empty_store_user_facts_response(user_id) + + resolved_ns = _resolve_namespace(None) + + base_metadata: dict[str, Any] = dict(metadata_dict) + base_metadata["user_id"] = user_id + + extracted = extract_facts_from_messages([{"role": "user", "content": trimmed_message}]) + entities: list[Entity] = [] + for one in extracted: + if isinstance(one, ExtractedFact): + fact_metadata = dict(base_metadata) + fact_metadata["category"] = one.category + fact_metadata["key"] = one.key + fact_metadata["value"] = one.value + entities.append(Entity(type="fact", content=one.content, metadata=fact_metadata)) + else: + entities.append(Entity(type="fact", content=str(one), metadata=dict(base_metadata))) + + if not entities: + return _empty_store_user_facts_response(user_id) + + try: + updates = get_client().update_entities( + namespace_id=resolved_ns, + entities=entities, + enable_conflict_resolution=enable_conflict_resolution, + ) + except NamespaceNotFoundException: + _evict_namespace(resolved_ns) + resolved_ns = _resolve_namespace(None) + updates = get_client().update_entities( + namespace_id=resolved_ns, + entities=entities, + enable_conflict_resolution=enable_conflict_resolution, + ) serialized_updates = [ { @@ -325,15 +365,66 @@ def store_user_facts( ) -@mcp.tool() -def retrieve_user_facts(user_id: str, query: str | None = None, limit: int = 5) -> str: - """Retrieve categorized user facts/preferences for a durable user identity.""" - categories = get_client().retrieve_user_facts( - namespace_id=evolve_config.namespace_id, - user_id=user_id, +def _search_facts_with_fallback( + namespace_id: str, + user_id: str, + query: str | None, + limit: int, +) -> list[RecordedEntity]: + """Fetch fact entities for a user with the legacy fallback chain. + + Order: (1) user filter + query, (2) user filter without query, (3) default + user with query, (4) default user without query. The default-user fallback + is skipped when the caller is already ``"default"``. + """ + client = get_client() + facts = client.search_entities( + namespace_id=namespace_id, query=query, + filters={"type": "fact", "metadata.user_id": user_id}, limit=limit, ) + if query and not facts: + facts = client.search_entities( + namespace_id=namespace_id, + query=None, + filters={"type": "fact", "metadata.user_id": user_id}, + limit=limit, + ) + if not facts and user_id != "default": + facts = client.search_entities( + namespace_id=namespace_id, + query=query, + filters={"type": "fact", "metadata.user_id": "default"}, + limit=limit, + ) + if query and not facts: + facts = client.search_entities( + namespace_id=namespace_id, + query=None, + filters={"type": "fact", "metadata.user_id": "default"}, + limit=limit, + ) + return facts + + +@mcp.tool() +def retrieve_user_facts(user_id: str, query: str | None = None, limit: int = 5) -> str: + """Retrieve categorized user facts/preferences for a durable user identity.""" + namespace_id = evolve_config.namespace_id + + if limit <= 0 or not get_client().namespace_exists(namespace_id): + return json.dumps( + { + "user_id": user_id, + "query": query, + "matched_count": 0, + "categories": {}, + } + ) + + facts = _search_facts_with_fallback(namespace_id, user_id, query, limit) + categories = categorize_facts(facts) matched_count = sum(len(items) for items in categories.values()) return json.dumps( diff --git a/altk_evolve/llm/fact_extraction/fact_extraction.py b/altk_evolve/llm/fact_extraction/fact_extraction.py index 63d74a88..26f1dc83 100644 --- a/altk_evolve/llm/fact_extraction/fact_extraction.py +++ b/altk_evolve/llm/fact_extraction/fact_extraction.py @@ -9,6 +9,7 @@ from altk_evolve.config.llm import llm_settings from altk_evolve.llm.fact_extraction.categorization import CategoryManager +from altk_evolve.schema.core import RecordedEntity from altk_evolve.utils.utils import clean_llm_response @@ -77,3 +78,26 @@ def extract_facts_from_messages(messages: list[dict], use_categorization: bool | last_error = exc continue raise ValueError(f"Failed to parse extracted facts response: {last_error}") + + +def categorize_facts(facts: list[RecordedEntity]) -> dict[str, list[dict[str, Any]]]: + """Group fact entities by their metadata category. + + Pure helper with no client or CRUD coupling: takes a list of already-fetched + RecordedEntity facts and returns a dict keyed by ``metadata.category`` (or + ``"misc"`` when absent), with each entry exposing the id, content, key, and + value of the fact. + """ + categorized: dict[str, list[dict[str, Any]]] = {} + for fact in facts: + metadata = fact.metadata or {} + category = str(metadata.get("category") or "misc") + categorized.setdefault(category, []).append( + { + "id": fact.id, + "content": str(fact.content), + "key": metadata.get("key"), + "value": metadata.get("value"), + } + ) + return categorized diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 49338b59..cc43418b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -196,52 +196,3 @@ def delete_entity_by_id(self, namespace_id, entity_id): monkeypatch.setattr(evolve_client.backend, "delete_entity_by_id", delete_entity_by_id.__get__(evolve_client.backend, BaseEntityBackend)) evolve_client.delete_entity_by_id(namespace_id="foobar", entity_id="1") - - -@pytest.mark.unit -@pytest.mark.parametrize("message", [None, "", " \t\n"]) -def test_store_user_facts_skips_none_empty_or_whitespace(evolve_client: EvolveClient, monkeypatch, message): - def fail_ensure_namespace(namespace_id: str): - raise AssertionError("ensure_namespace should not be called for blank messages") - - def fail_update_entities(namespace_id, entities, enable_conflict_resolution=True): - raise AssertionError("update_entities should not be called for blank messages") - - def fail_extract(messages): - raise AssertionError("extract_facts_from_messages should not be called for blank messages") - - monkeypatch.setattr(evolve_client, "ensure_namespace", fail_ensure_namespace) - monkeypatch.setattr(evolve_client, "update_entities", fail_update_entities) - monkeypatch.setattr("altk_evolve.frontend.client.evolve_client.extract_facts_from_messages", fail_extract) - - result = evolve_client.store_user_facts(namespace_id="foobar", message=message, user_id="u1") - - assert result == [] - - -@pytest.mark.unit -def test_store_user_facts_uses_trimmed_message(evolve_client: EvolveClient, monkeypatch): - captured: dict = {"ensure_namespace_called": False} - - def ensure_namespace(namespace_id: str): - captured["ensure_namespace_called"] = True - return Namespace(id=namespace_id, created_at=datetime.datetime.now(datetime.UTC)) - - def extract(messages): - captured["message_content"] = messages[0]["content"] - return ["trimmed fact"] - - def update_entities(namespace_id, entities, enable_conflict_resolution=True): - captured["entity_content"] = entities[0].content if entities else None - return [EntityUpdate(id="1", type="fact", content="trimmed fact", event="ADD")] - - monkeypatch.setattr(evolve_client, "ensure_namespace", ensure_namespace) - monkeypatch.setattr(evolve_client, "update_entities", update_entities) - monkeypatch.setattr("altk_evolve.frontend.client.evolve_client.extract_facts_from_messages", extract) - - result = evolve_client.store_user_facts(namespace_id="foobar", message=" hello world \n", user_id="u1") - - assert captured["ensure_namespace_called"] is True - assert captured["message_content"] == "hello world" - assert captured["entity_content"] == "trimmed fact" - assert result[0].event == "ADD" diff --git a/tests/unit/test_mcp_server.py b/tests/unit/test_mcp_server.py index d9dfa8b6..84f14150 100644 --- a/tests/unit/test_mcp_server.py +++ b/tests/unit/test_mcp_server.py @@ -11,7 +11,8 @@ store_user_facts, retrieve_user_facts, ) -from altk_evolve.schema.core import Namespace +from altk_evolve.llm.fact_extraction.fact_extraction import ExtractedFact +from altk_evolve.schema.core import Namespace, RecordedEntity from altk_evolve.schema.conflict_resolution import EntityUpdate pytestmark = pytest.mark.unit @@ -277,58 +278,114 @@ def test_save_trajectory_backward_compat_no_extra_params(mock_get_client): def test_store_user_facts_returns_structured_payload(mock_get_client): - mock_update = EntityUpdate(id="fact-1", type="fact", content="Prefers concise answers", event="ADD", metadata={"category": "style"}) - mock_get_client.store_user_facts.return_value = [mock_update] - - result = json.loads( - store_user_facts( - user_id="user-123", - message="I prefer concise answers.", - metadata=json.dumps({"source": "cuga-lite"}), - ) + extracted = ExtractedFact( + category="style", + key="response_style", + value="concise", + content="Prefers concise answers", + ) + mock_update = EntityUpdate( + id="fact-1", + type="fact", + content="Prefers concise answers", + event="ADD", + metadata={"category": "style"}, ) + mock_get_client.update_entities.return_value = [mock_update] + + with patch( + "altk_evolve.frontend.mcp.mcp_server.extract_facts_from_messages", + return_value=[extracted], + ) as mock_extract: + result = json.loads( + store_user_facts( + user_id="user-123", + message="I prefer concise answers.", + metadata=json.dumps({"source": "cuga-lite"}), + ) + ) assert result["user_id"] == "user-123" assert result["stored_count"] == 1 assert result["updates"][0]["id"] == "fact-1" assert result["updates"][0]["metadata"]["category"] == "style" + # Verify the MCP tool orchestrated directly: extraction + update_entities. + assert mock_extract.called + mock_get_client.update_entities.assert_called_once() + call_kwargs = mock_get_client.update_entities.call_args.kwargs + entities = call_kwargs["entities"] + assert len(entities) == 1 + assert entities[0].type == "fact" + assert entities[0].metadata["user_id"] == "user-123" + assert entities[0].metadata["category"] == "style" + assert entities[0].metadata["source"] == "cuga-lite" + assert call_kwargs["enable_conflict_resolution"] is False + def test_store_user_facts_invalid_metadata_json(mock_get_client): - result = json.loads( - store_user_facts( - user_id="user-123", - message="I prefer concise answers.", - metadata="{bad json", + with patch("altk_evolve.frontend.mcp.mcp_server.extract_facts_from_messages") as mock_extract: + result = json.loads( + store_user_facts( + user_id="user-123", + message="I prefer concise answers.", + metadata="{bad json", + ) ) - ) assert result["error"] == "Invalid JSON" assert "invalid_metadata" in result - mock_get_client.store_user_facts.assert_not_called() - + mock_extract.assert_not_called() + mock_get_client.update_entities.assert_not_called() -def test_store_user_facts_empty_message_returns_zero_updates(mock_get_client): - mock_get_client.store_user_facts.return_value = [] - result = json.loads(store_user_facts(user_id="user-123", message="")) +@pytest.mark.parametrize("message", [None, "", " \t\n"]) +def test_store_user_facts_skips_blank_message(mock_get_client, message): + with patch("altk_evolve.frontend.mcp.mcp_server.extract_facts_from_messages") as mock_extract: + result = json.loads(store_user_facts(user_id="user-123", message=message)) assert result["user_id"] == "user-123" assert result["stored_count"] == 0 assert result["updates"] == [] + mock_extract.assert_not_called() + mock_get_client.update_entities.assert_not_called() + + +def test_store_user_facts_trims_whitespace(mock_get_client): + mock_get_client.update_entities.return_value = [EntityUpdate(id="fact-1", type="fact", content="hello world", event="ADD")] + + with patch( + "altk_evolve.frontend.mcp.mcp_server.extract_facts_from_messages", + return_value=["hello world"], + ) as mock_extract: + store_user_facts(user_id="u1", message=" hello world \n") + + # Extraction should have been called with the trimmed message. + passed_messages = mock_extract.call_args.args[0] + assert passed_messages[0]["content"] == "hello world" + + # The resulting entity should carry the string content (non-ExtractedFact branch). + entities = mock_get_client.update_entities.call_args.kwargs["entities"] + assert entities[0].content == "hello world" + assert entities[0].metadata == {"user_id": "u1"} def test_retrieve_user_facts_returns_structured_payload(mock_get_client): - mock_get_client.retrieve_user_facts.return_value = { - "style": [ - { - "id": "fact-1", - "content": "Prefers concise answers", + mock_get_client.namespace_exists.return_value = True + mock_get_client.search_entities.return_value = [ + RecordedEntity( + id="fact-1", + type="fact", + content="Prefers concise answers", + created_at=datetime.datetime.now(datetime.UTC), + metadata={ + "category": "style", "key": "response_style", "value": "concise", - } - ] - } + "user_id": "user-123", + }, + ) + ] result = json.loads(retrieve_user_facts(user_id="user-123", query="How should I answer?", limit=5)) @@ -336,3 +393,25 @@ def test_retrieve_user_facts_returns_structured_payload(mock_get_client): assert result["query"] == "How should I answer?" assert result["matched_count"] == 1 assert result["categories"]["style"][0]["value"] == "concise" + # First search call should filter by the caller's user_id with the query. + first_call = mock_get_client.search_entities.call_args_list[0] + assert first_call.kwargs["filters"] == {"type": "fact", "metadata.user_id": "user-123"} + assert first_call.kwargs["query"] == "How should I answer?" + + +def test_retrieve_user_facts_empty_when_namespace_missing(mock_get_client): + mock_get_client.namespace_exists.return_value = False + + result = json.loads(retrieve_user_facts(user_id="user-123", query="anything", limit=5)) + + assert result["matched_count"] == 0 + assert result["categories"] == {} + mock_get_client.search_entities.assert_not_called() + + +def test_retrieve_user_facts_empty_when_limit_not_positive(mock_get_client): + result = json.loads(retrieve_user_facts(user_id="user-123", query=None, limit=0)) + + assert result["matched_count"] == 0 + assert result["categories"] == {} + mock_get_client.search_entities.assert_not_called() From 12d378538273a6f08244070d058c159abe7994c2 Mon Sep 17 00:00:00 2001 From: Gaodan Fang Date: Fri, 8 May 2026 11:48:59 -0400 Subject: [PATCH 2/2] chore: ignore .omc workspace artifacts --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b2928336..5c925ec2 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ demo/workdir/.claude/ dist .coverage .evolve +.omc .omx .secrets event.json