From dfa2780ec7e619a03087cda4b851dfa0cc642426 Mon Sep 17 00:00:00 2001 From: "zhoujiahui.01" Date: Tue, 31 Mar 2026 11:53:47 +0800 Subject: [PATCH] Fix parent_uri compatibility with legacy records --- openviking/core/context.py | 12 ++- openviking/session/memory_archiver.py | 12 ++- .../storage/viking_vector_index_backend.py | 8 +- tests/storage/test_collection_schemas.py | 97 +++++++++++++++++++ tests/unit/session/test_memory_archiver.py | 21 +++- tests/unit/test_context.py | 11 +++ 6 files changed, 156 insertions(+), 5 deletions(-) diff --git a/openviking/core/context.py b/openviking/core/context.py index be1ea5fa..55bce1c4 100644 --- a/openviking/core/context.py +++ b/openviking/core/context.py @@ -9,6 +9,7 @@ from openviking.utils.time_utils import format_iso8601, parse_iso_datetime from openviking_cli.session.user_id import UserIdentifier +from openviking_cli.utils.uri import VikingURI class ResourceContentType(str, Enum): @@ -189,6 +190,15 @@ def to_dict(self) -> Dict[str, Any]: return data + @staticmethod + def _derive_parent_uri(uri: str) -> Optional[str]: + """Best-effort parent URI derivation for records persisted without parent_uri.""" + try: + parent = VikingURI(uri).parent + except Exception: + return None + return parent.uri if parent else None + @classmethod def from_dict(cls, data: Dict[str, Any]) -> "Context": """Create a context object from dictionary.""" @@ -196,7 +206,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "Context": user_obj = UserIdentifier.from_dict(user_data) if isinstance(user_data, dict) else user_data obj = cls( uri=data["uri"], - parent_uri=data.get("parent_uri"), + parent_uri=data.get("parent_uri") or cls._derive_parent_uri(data["uri"]), temp_uri=data.get("temp_uri"), is_leaf=data.get("is_leaf", False), abstract=data.get("abstract", ""), diff --git a/openviking/session/memory_archiver.py b/openviking/session/memory_archiver.py index 9f610c2b..d4ac08fb 100644 --- a/openviking/session/memory_archiver.py +++ b/openviking/session/memory_archiver.py @@ -16,6 +16,7 @@ from openviking.storage.expr import And, Eq from openviking.utils.time_utils import parse_iso_datetime from openviking_cli.utils.logger import get_logger +from openviking_cli.utils.uri import VikingURI logger = get_logger(__name__) @@ -79,6 +80,14 @@ def __init__( self.threshold = threshold self.min_age_days = min_age_days + @staticmethod + def _derive_parent_uri(uri: str) -> str: + try: + parent = VikingURI(uri).parent + except Exception: + return "" + return parent.uri if parent else "" + async def scan( self, scope_uri: str, @@ -121,7 +130,6 @@ async def scan( "active_count", "updated_at", "context_type", - "parent_uri", ], ) @@ -165,7 +173,7 @@ async def scan( updated_at=updated_at, score=score, context_type=record.get("context_type", ""), - parent_uri=record.get("parent_uri", ""), + parent_uri=self._derive_parent_uri(uri), ) ) diff --git a/openviking/storage/viking_vector_index_backend.py b/openviking/storage/viking_vector_index_backend.py index 22e4604b..ce5dc8e7 100644 --- a/openviking/storage/viking_vector_index_backend.py +++ b/openviking/storage/viking_vector_index_backend.py @@ -136,6 +136,12 @@ def _filter_known_fields(self, data: Dict[str, Any]) -> Dict[str, Any]: except Exception: return data + def _prepare_upsert_payload(self, data: Dict[str, Any]) -> Dict[str, Any]: + """Drop runtime-only or stale legacy fields before writing back to the current schema.""" + payload = {k: v for k, v in data.items() if v is not None} + filtered = self._filter_known_fields(payload) + return {k: v for k, v in filtered.items() if v is not None} + # ========================================================================= # Collection Management # ========================================================================= @@ -224,7 +230,7 @@ async def upsert(self, data: Dict[str, Any]) -> str: if not payload.get("id"): payload["id"] = str(uuid.uuid4()) - payload = self._filter_known_fields(payload) + payload = self._prepare_upsert_payload(payload) ids = self._adapter.upsert(payload) return ids[0] if ids else "" diff --git a/tests/storage/test_collection_schemas.py b/tests/storage/test_collection_schemas.py index 4513c1e0..c1095821 100644 --- a/tests/storage/test_collection_schemas.py +++ b/tests/storage/test_collection_schemas.py @@ -14,6 +14,8 @@ init_context_collection, ) from openviking.storage.queuefs.embedding_msg import EmbeddingMsg +from openviking.storage.viking_vector_index_backend import _SingleAccountBackend +from openviking_cli.utils.config.vectordb_config import VectorDBBackendConfig class _DummyEmbedder: @@ -203,3 +205,98 @@ async def create_collection(self, name, schema): field_names = [field["FieldName"] for field in captured["schema"]["Fields"]] assert "parent_uri" not in field_names assert "parent_uri" not in captured["schema"]["ScalarIndex"] + + +def test_single_account_backend_filters_parent_uri_against_current_schema(): + class _Collection: + def get_meta_data(self): + return { + "Fields": [ + {"FieldName": "id"}, + {"FieldName": "uri"}, + {"FieldName": "abstract"}, + {"FieldName": "account_id"}, + ] + } + + class _Adapter: + mode = "local" + + def get_collection(self): + return _Collection() + + backend = _SingleAccountBackend( + config=VectorDBBackendConfig(backend="local", name="context", dimension=2), + bound_account_id="acc1", + shared_adapter=_Adapter(), + ) + + filtered = backend._filter_known_fields( + { + "id": "rec-1", + "uri": "viking://resources/sample", + "abstract": "sample", + "account_id": "acc1", + "parent_uri": "viking://resources", + } + ) + + assert filtered == { + "id": "rec-1", + "uri": "viking://resources/sample", + "abstract": "sample", + "account_id": "acc1", + } + + +@pytest.mark.asyncio +async def test_single_account_backend_upsert_drops_legacy_parent_uri_before_write(): + captured = {} + + class _Collection: + def get_meta_data(self): + return { + "Fields": [ + {"FieldName": "id"}, + {"FieldName": "uri"}, + {"FieldName": "abstract"}, + {"FieldName": "active_count"}, + {"FieldName": "account_id"}, + ] + } + + class _Adapter: + mode = "local" + + def get_collection(self): + return _Collection() + + def upsert(self, data): + captured["data"] = dict(data) + return ["rec-legacy"] + + backend = _SingleAccountBackend( + config=VectorDBBackendConfig(backend="local", name="context", dimension=2), + bound_account_id="acc1", + shared_adapter=_Adapter(), + ) + + record_id = await backend.upsert( + { + "id": "rec-legacy", + "uri": "viking://resources/sample", + "abstract": "sample", + "active_count": 2, + "account_id": "acc1", + "parent_uri": "viking://resources", + } + ) + + assert record_id == "rec-legacy" + assert captured["data"] == { + "id": "rec-legacy", + "uri": "viking://resources/sample", + "abstract": "sample", + "active_count": 2, + "account_id": "acc1", + } diff --git a/tests/unit/session/test_memory_archiver.py b/tests/unit/session/test_memory_archiver.py index f8089fca..b33d6b7d 100644 --- a/tests/unit/session/test_memory_archiver.py +++ b/tests/unit/session/test_memory_archiver.py @@ -121,6 +121,26 @@ def _make_viking_fs(): class TestScan: + @pytest.mark.asyncio + async def test_scan_requests_no_parent_uri_field(self): + storage = _make_storage([]) + archiver = MemoryArchiver( + viking_fs=_make_viking_fs(), + storage=storage, + threshold=0.5, + min_age_days=7, + ) + + await archiver.scan("viking://memories/", now=NOW) + + assert storage.scroll.await_count == 1 + assert storage.scroll.await_args.kwargs["output_fields"] == [ + "uri", + "active_count", + "updated_at", + "context_type", + ] + @pytest.mark.asyncio async def test_scan_finds_cold_memories(self): records = [ @@ -129,7 +149,6 @@ async def test_scan_finds_cold_memories(self): "active_count": 0, "updated_at": OLD_DATE, "context_type": "memory", - "parent_uri": "viking://memories/", }, ] archiver = MemoryArchiver( diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index f41f459b..9a34d14d 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -429,6 +429,17 @@ def test_from_dict_with_vector(self): assert ctx.vector == [0.1, 0.2, 0.3] + def test_from_dict_derives_parent_uri_when_missing(self): + """Test parent_uri is derived from uri for records written without the field.""" + d = { + "uri": "viking://user/test/memories/preferences/theme.md", + "context_type": "memory", + } + + ctx = Context.from_dict(d) + + assert ctx.parent_uri == "viking://user/test/memories/preferences" + def test_roundtrip(self): """Test to_dict -> from_dict roundtrip.""" original = Context(