diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py index 17d9508427..c7c1c8a2b0 100644 --- a/openhands-sdk/openhands/sdk/context/agent_context.py +++ b/openhands-sdk/openhands/sdk/context/agent_context.py @@ -8,7 +8,9 @@ from pydantic import ( BaseModel, Field, + PrivateAttr, SecretStr, + ValidationInfo, field_serializer, field_validator, model_validator, @@ -118,6 +120,102 @@ class AgentContext(BaseModel): json_schema_extra={"acp_compatible": True}, ) + # Snapshot of the skills that ``_load_auto_skills`` added on top of + # the caller-supplied list. The serializer drops only the ones still + # equal to this snapshot — if a downstream consumer replaces an + # auto-loaded skill via ``model_copy(update={'skills': merged})`` + # (OpenHands' ``_create_agent_with_skills`` does this when it merges + # in sandbox / repo skills with overlapping names), the replacement + # has different field values and survives serialization. The same + # auto-load will re-run on the receiving end and rebuild the + # auto-loaded subset that wasn't replaced. For a stock configuration + # that turns both flags on (~40 skills bundled under + # ``~/.openhands/skills``) the resolved list is ~260 KB per + # ``AgentContext`` — every ``GET`` on a stored conversation carried + # that. See software-agent-sdk#3301. + # + # Values are deep-copied so an in-place caller mutation + # (``ctx.skills[0].content = "custom"``) doesn't also mutate the + # snapshot — without that, the equality check would still succeed + # and the customised skill would silently disappear from the wire. + _auto_loaded_skills: dict[str, Skill] = PrivateAttr(default_factory=dict) + # The auto-load config that produced ``_auto_loaded_skills``. + # Tracked so the serializer can no-op the trim when the config + # changed after validation — e.g. ``model_copy(update={ + # "load_public_skills": False})``. Without this, a copy with the + # flag flipped off would drop the auto-loaded skills on the wire + # AND fail to re-load them on the next ``model_validate`` (the + # flag is now off), losing them entirely. ``None`` when + # ``_load_auto_skills`` has not run yet. + _auto_load_config: tuple[bool, bool, str | None] | None = PrivateAttr(default=None) + + @field_serializer("skills", when_used="always", mode="wrap") + def _serialize_skills(self, value: list[Skill], handler, info) -> Any: + """Drop unmodified auto-loaded skills from the serialized output. + + The runtime keeps the full resolved list on ``self.skills`` so + prompt rendering and downstream consumers behave exactly as + today. Only the wire payload changes: callers re-loading the + model will trigger ``_load_auto_skills`` again, which rebuilds + the auto-loaded subset deterministically from the same + ``load_user_skills`` / ``load_public_skills`` / + ``marketplace_path`` configuration. + + Equality (not just name match) is required because consumers + like OpenHands' ``_create_agent_with_skills`` replace + auto-loaded skills in-place with their own version under the + same name. A name-only filter would silently drop those + replacements and the receiver would auto-reload the stock + version on the next deserialization. + + Opt-out via ``context={"preserve_full_skills": True}``: paths + that need a stable snapshot of the resolved skill catalog (the + most important one being ``ConversationState._save_base_state`` + — persistence of the conversation to disk) pass this flag so + the serializer is a no-op. Without it, a paused conversation + resumed after the ``~/.openhands/skills`` directory or the + public marketplace updated would silently pick up the *new* + skill content via ``_load_auto_skills``. The API-response path + skips the flag and gets the byte-size win. + + Config-drift safety: if the auto-load config + (``load_user_skills`` / ``load_public_skills`` / + ``marketplace_path``) changed since the snapshot was taken — + typically via ``model_copy(update=...)`` flipping one of those + flags — the trim is skipped. Otherwise the receiver would + either re-load a *different* skill catalog (changed + ``marketplace_path``) or fail to re-load at all (flag turned + off), losing the auto-loaded skills entirely. + + ``mode="wrap"`` + ``handler`` delegation preserves caller + options like ``exclude_none``, ``exclude_defaults``, and + nested ``include`` / ``exclude`` for the surviving skills — + manual ``s.model_dump(...)`` would have ignored them. + """ + # ``round_trip=True`` is Pydantic's canonical signal for "this + # dump must rehydrate without semantic loss" (e.g. for caches, + # snapshots, or anything the caller will ``model_validate`` + # later). Honour it the same way we honour the explicit + # opt-out flag — trimming under round-trip would let the + # reload silently pick up a *different* skill catalog from + # the loader, which is exactly the loss the flag warns against. + if info.round_trip or ( + info.context and info.context.get("preserve_full_skills") + ): + return handler(value) + # Auto-load config drifted (or never ran) → can't trust the + # snapshot to round-trip. Serialize everything as explicit. + current_config = ( + self.load_user_skills, + self.load_public_skills, + self.marketplace_path, + ) + if self._auto_load_config != current_config: + return handler(value) + auto = self._auto_loaded_skills + kept = [s for s in value if auto.get(s.name) != s] + return handler(kept) + @field_serializer("secrets", when_used="always") def _serialize_secrets( self, value: Mapping[str, SecretValue] | None, info @@ -147,8 +245,47 @@ def _validate_skills(cls, v: list[Skill], _info): return v @model_validator(mode="after") - def _load_auto_skills(self): - """Load user and/or public skills if enabled.""" + def _load_auto_skills(self, info: ValidationInfo): + """Load user and/or public skills if enabled. + + Names of skills added here are tracked in + ``_auto_loaded_skills`` so the serializer can drop them from + the wire payload — this validator re-runs on every model load, + so the same skills repopulate without needing to be persisted. + + Two behaviour modes, switched by + ``context={"loading_from_snapshot": True}``: + + Fresh construction (default, no flag): + - Skills the loader returned that aren't already in + ``self.skills`` get appended and tracked as auto-loaded. + - Skills the loader returned that ARE already in + ``self.skills`` (caller-supplied explicit) are left + alone — even if they happen to equal the loader output. + The caller pinned that content deliberately; treating it + as auto-loaded would let a later marketplace update + silently swap it on the next round-trip. + + Loading from a persisted snapshot (``loading_from_snapshot``): + - The persisted ``skills`` list IS authoritative. Don't + append new auto-loaded skills — that would let a + marketplace update or a fresh skill file silently + pollute the snapshot on the next save. + - Skills already in ``self.skills`` that equal the + loader's current output get marked as auto-loaded so + the next serialize trims them (migration path: old + persisted conversations shrink on first reload through + the new SDK). + - Skills in ``self.skills`` that DON'T match the loader's + current output (user edited the file, marketplace + updated since the snapshot was taken) stay explicit so + the on-disk content wins. + + The persistence load path in + ``ConversationState`` factory sets the context flag. API- + response paths and direct caller construction don't, so they + get fresh-construction semantics. + """ if not self.load_user_skills and not self.load_public_skills: return self @@ -160,10 +297,44 @@ def _load_auto_skills(self): marketplace_path=self.marketplace_path, ) - existing_names = {skill.name for skill in self.skills} + # Record the config that produced this snapshot so the + # serializer can detect drift (e.g. ``model_copy(update={ + # "load_public_skills": False})``) and degrade to full + # serialization. + self._auto_load_config = ( + self.load_user_skills, + self.load_public_skills, + self.marketplace_path, + ) + + loading_from_snapshot = bool( + info.context and info.context.get("loading_from_snapshot") + ) + + existing_by_name = {skill.name: skill for skill in self.skills} for name, skill in auto_skills.items(): - if name not in existing_names: + existing = existing_by_name.get(name) + if existing is None: + if loading_from_snapshot: + # Snapshot is authoritative — don't add a skill + # the persisted state never had. This is what + # freezes the auto-loaded set against marketplace + # / user-skill churn between save and load. + continue self.skills.append(skill) + # Deep-copy so an in-place caller mutation + # (``ctx.skills[0].content = "custom"``) doesn't also + # mutate the snapshot — without it the equality check + # would still succeed and the customised skill would + # silently disappear from the wire. + self._auto_loaded_skills[name] = skill.model_copy(deep=True) + elif existing == skill and loading_from_snapshot: + # Migration path: existing matches loader → mark as + # auto-loaded so the next serialize can trim it. + # ONLY fires under loading_from_snapshot — for fresh + # construction this would conflate caller-pinned + # explicit skills with auto-loaded ones. + self._auto_loaded_skills[name] = skill.model_copy(deep=True) else: logger.debug( f"Skipping auto-loaded skill '{name}' (already in explicit skills)" diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py index 7e137929f2..c9faf54603 100644 --- a/openhands-sdk/openhands/sdk/conversation/state.py +++ b/openhands-sdk/openhands/sdk/conversation/state.py @@ -255,8 +255,18 @@ def _save_base_state(self, fs: FileStore) -> None: If a cipher is configured, secrets will be encrypted. Otherwise, they will be redacted (serialized as '**********'). + + ``preserve_full_skills`` is set so the persisted snapshot + includes auto-loaded skills inline — without this the conversation + would silently pick up *new* skill content from + ``~/.openhands/skills`` / the public marketplace on resume, since + ``AgentContext._load_auto_skills`` re-runs on deserialization. + Persistence needs the freeze-at-create-time snapshot; only the + API-response path opts into the trim. See software-agent-sdk#3301. """ - context = {"cipher": self._cipher} if self._cipher else None + context: dict[str, Any] = {"preserve_full_skills": True} + if self._cipher: + context["cipher"] = self._cipher # Warn if secrets exist but no cipher is configured if not self._cipher and self.secret_registry.secret_sources: logger.warning( @@ -339,8 +349,16 @@ def create( # ---- Resume path ---- if base_text: - # Use cipher context for decrypting secrets if provided - context = {"cipher": cipher} if cipher else None + # ``loading_from_snapshot`` tells ``AgentContext._load_auto_skills`` + # that the persisted ``skills`` list is authoritative and must + # NOT be augmented with newly published auto-loaded skills the + # loader picks up post-save. Without it, a snapshot frozen as + # {a} would silently grow to {a, b} after `b` lands in the + # public marketplace, breaking the freeze-at-create guarantee + # ``_save_base_state`` relied on. See software-agent-sdk#3301. + context: dict[str, Any] = {"loading_from_snapshot": True} + if cipher: + context["cipher"] = cipher state = cls.model_validate(json.loads(base_text), context=context) # Restore the conversation with the same id @@ -358,6 +376,28 @@ def create( # Verify compatibility (agent class + tools) agent.verify(state.agent, events=state._events) + # Preserve the loaded snapshot's skills onto the runtime + # agent's ``agent_context``. Without this, the runtime + # agent (constructed by the caller before resume) carries + # a freshly-resolved skill list from its own + # ``_load_auto_skills`` run — which may have drifted + # since the snapshot was taken (marketplace adds, user + # edits). Assigning ``state.agent = agent`` then loses + # the snapshot, and autosave rewrites ``base_state.json`` + # with the drifted set. The snapshot is authoritative on + # resume; runtime-only fields (LLM client, tool wiring, + # etc.) come from the runtime agent. See + # software-agent-sdk#3301. + loaded_agent_context = state.agent.agent_context if state.agent else None + if loaded_agent_context is not None and agent.agent_context is not None: + agent = agent.model_copy( + update={ + "agent_context": agent.agent_context.model_copy( + update={"skills": loaded_agent_context.skills} + ) + } + ) + # Commit runtime-provided values (may autosave) state._autosave_enabled = True state.agent = agent diff --git a/tests/sdk/context/test_agent_context_skills_serialization.py b/tests/sdk/context/test_agent_context_skills_serialization.py new file mode 100644 index 0000000000..75a7816326 --- /dev/null +++ b/tests/sdk/context/test_agent_context_skills_serialization.py @@ -0,0 +1,611 @@ +"""Tests for the auto-loaded-skills serialization optimisation. + +When ``load_user_skills`` / ``load_public_skills`` are enabled, +``AgentContext._load_auto_skills`` resolves every matching skill and +appends it to ``self.skills`` at model-validation time. Persisting +those skills on the wire is pure duplication — the validator runs +again on every deserialization and rebuilds the same set deterministically. + +The serializer drops auto-loaded skills from ``model_dump`` output but +keeps the in-memory ``self.skills`` intact so runtime prompt rendering +behaves exactly as today. These tests pin both halves of that contract. +""" + +from __future__ import annotations + +from unittest.mock import patch + +from openhands.sdk.context import AgentContext +from openhands.sdk.skills import Skill + + +def _make_skill(name: str, content: str = "auto skill body") -> Skill: + return Skill( + name=name, + content=content, + source=f"/fake/{name}.md", + ) + + +class TestAutoLoadedSkillsSerialization: + def test_auto_loaded_skills_drop_from_serialized_output(self): + """``model_dump`` omits auto-loaded skills (the headline payload win).""" + auto = { + "auto-1": _make_skill("auto-1"), + "auto-2": _make_skill("auto-2"), + "auto-3": _make_skill("auto-3"), + } + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True) + + # In-memory: the validator populated all three. + assert {s.name for s in ctx.skills} == {"auto-1", "auto-2", "auto-3"} + # On the wire: dropped entirely (no caller passed explicit skills). + dumped = ctx.model_dump() + assert dumped["skills"] == [] + + def test_explicit_skills_survive_serialization(self): + """Caller-supplied skills are NOT dropped — only the auto-loaded ones.""" + explicit = _make_skill("user-explicit", "this one is mine") + auto = {"auto-only": _make_skill("auto-only")} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True, skills=[explicit]) + + # In-memory: both present. + assert {s.name for s in ctx.skills} == {"user-explicit", "auto-only"} + # On the wire: only the explicit one. + dumped = ctx.model_dump() + assert [s["name"] for s in dumped["skills"]] == ["user-explicit"] + + def test_explicit_skill_shadows_an_auto_one(self): + """When the explicit list collides with an auto-loaded name, the + explicit one wins in-memory AND on the wire (it was never marked + as auto-loaded — the auto-load step skipped it). + """ + explicit = _make_skill("shared-name", "user version") + auto = {"shared-name": _make_skill("shared-name", "auto version")} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True, skills=[explicit]) + + assert len(ctx.skills) == 1 + assert ctx.skills[0].content == "user version" + dumped = ctx.model_dump() + assert len(dumped["skills"]) == 1 + assert dumped["skills"][0]["name"] == "shared-name" + assert dumped["skills"][0]["content"] == "user version" + + def test_round_trip_via_serialized_output_re_resolves_auto_skills(self): + """Deserializing the trimmed payload + re-validating repopulates the + auto-loaded skills via the same auto-load path. This is what makes + dropping them on the wire safe: the receiver sees the same + in-memory shape as the sender. + """ + auto = { + "auto-a": _make_skill("auto-a"), + "auto-b": _make_skill("auto-b"), + } + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True) + dumped = ctx.model_dump() + # Re-validating the trimmed payload should re-fire the auto-load + # validator and rebuild the in-memory list. + roundtripped = AgentContext.model_validate(dumped) + + assert {s.name for s in roundtripped.skills} == {"auto-a", "auto-b"} + + def test_disabled_auto_load_leaves_explicit_skills_on_the_wire(self): + """With both flags off, ``_load_auto_skills`` is a no-op — every + skill is treated as explicit and serialized normally. + """ + explicit = [_make_skill("foo"), _make_skill("bar")] + ctx = AgentContext(skills=explicit) + assert {s.name for s in ctx.skills} == {"foo", "bar"} + dumped = ctx.model_dump() + assert {s["name"] for s in dumped["skills"]} == {"foo", "bar"} + + def test_migration_stored_skills_matching_loader_are_marked_auto_loaded(self): + """Migration path (loading_from_snapshot only): a conversation + stored before this PR carries the resolved auto-loaded skills + inlined on ``skills``. When ``ConversationState`` loads it with + ``loading_from_snapshot=True``, the validator recognises matching + skills as auto-loaded and drops them from the next serialization — + otherwise the bloat persists until the conversation is recreated. + """ + stored = [_make_skill("auto-x"), _make_skill("auto-y")] + loader_output = { + "auto-x": _make_skill("auto-x"), + "auto-y": _make_skill("auto-y"), + } + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=loader_output, + ): + ctx = AgentContext.model_validate( + {"load_public_skills": True, "skills": stored}, + context={"loading_from_snapshot": True}, + ) + + # No duplicates: skills stayed at 2 (migration recognised them). + assert {s.name for s in ctx.skills} == {"auto-x", "auto-y"} + # And the serialized payload drops them. + dumped = ctx.model_dump() + assert dumped["skills"] == [] + + def test_fresh_construction_does_not_conflate_explicit_with_auto(self): + """Comment-6 regression: fresh ``AgentContext(skills=[explicit])`` + construction where the explicit skill happens to equal what the + loader returns must NOT mark it as auto-loaded. Without this + guard, a later marketplace/user-skill edit would silently swap + the caller's pinned content on the next round-trip. + """ + explicit = _make_skill("shared-name", "caller pinned this") + loader_output = { + "shared-name": _make_skill("shared-name", "caller pinned this") + } + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=loader_output, + ): + ctx = AgentContext(load_public_skills=True, skills=[explicit]) + + # In-memory: only the explicit (validator's "already in + # existing" branch hit, no append). + assert len(ctx.skills) == 1 + # On the wire: stays as explicit — not snapshotted as auto. + dumped = ctx.model_dump() + assert len(dumped["skills"]) == 1 + assert dumped["skills"][0]["content"] == "caller pinned this" + + def test_migration_stored_skills_diverged_from_loader_stay_explicit(self): + """If the persisted skill content no longer matches what the + loader would produce (user edited the file, marketplace updated), + the on-disk version wins — treat it as explicit and keep it on + the wire. This is the safe default: we don't drop content + someone may have intentionally customised. Applies under + ``loading_from_snapshot`` (same as the matching-migration path). + """ + stored = [_make_skill("auto-x", "old persisted content")] + loader_output = {"auto-x": _make_skill("auto-x", "new loader content")} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=loader_output, + ): + ctx = AgentContext.model_validate( + {"load_public_skills": True, "skills": stored}, + context={"loading_from_snapshot": True}, + ) + + # The stored ("old") version wins in-memory. + assert len(ctx.skills) == 1 + assert ctx.skills[0].content == "old persisted content" + # And it stays on the wire — not treated as auto-loaded. + dumped = ctx.model_dump() + assert len(dumped["skills"]) == 1 + assert dumped["skills"][0]["content"] == "old persisted content" + + def test_loading_from_snapshot_does_not_pick_up_new_auto_loaded_skills(self): + """Comment-5 regression: a persisted snapshot frozen with only + ``{a}`` must not silently grow to ``{a, b}`` if the loader now + also returns a new skill ``b`` (marketplace added it, user + added a file). The ``loading_from_snapshot`` context flag + tells the validator the persisted ``skills`` list is + authoritative. + """ + stored = [_make_skill("a")] + # Loader sees a new ``b`` that wasn't in the persisted snapshot. + loader_output = { + "a": _make_skill("a"), + "b": _make_skill("b", "newly published content"), + } + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=loader_output, + ): + ctx = AgentContext.model_validate( + {"load_public_skills": True, "skills": stored}, + context={"loading_from_snapshot": True}, + ) + + # In-memory: snapshot is authoritative — ``b`` is NOT appended. + assert {s.name for s in ctx.skills} == {"a"} + # Next ``model_dump`` (default, no preserve flag) still trims + # ``a`` since it was matched as auto-loaded. Serialized list + # is empty — exactly what the original snapshot would have + # serialized. + dumped = ctx.model_dump() + assert dumped["skills"] == [] + # And the preserve-full path also gives back just ``a`` — no + # marketplace pollution. + dumped_full = ctx.model_dump(context={"preserve_full_skills": True}) + assert {s["name"] for s in dumped_full["skills"]} == {"a"} + + def test_fresh_construction_still_appends_new_auto_skills(self): + """Sanity: without the ``loading_from_snapshot`` flag, fresh + construction keeps appending new auto-loaded skills as today. + This is what makes the registry hot for first-launch users. + """ + loader_output = { + "auto-1": _make_skill("auto-1"), + "auto-2": _make_skill("auto-2"), + } + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=loader_output, + ): + ctx = AgentContext(load_public_skills=True) + assert {s.name for s in ctx.skills} == {"auto-1", "auto-2"} + + def test_replacing_auto_skill_via_model_copy_survives_serialization(self): + """Regression guard for OpenHands' ``_create_agent_with_skills`` flow. + + That helper does ``agent_context.model_copy(update={'skills': + merged})`` where ``merged`` is the result of deduping + auto-loaded + sandbox/repo-loaded skills by name, with the + later list winning. If two lists carry the same name, the + merged result is the *replacement* — different content from + what ``_load_auto_skills`` produced. + + A name-only serializer would drop the replacement (the name + matches an auto-loaded one) and the receiver would auto-load + the original stock version, silently dropping OpenHands' + customisations. The equality-based filter must keep the + replacement on the wire. + """ + # Stock auto-loaded skill ("public version"). + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value={"git-skill": _make_skill("git-skill", "public version")}, + ): + ctx = AgentContext(load_public_skills=True) + # OpenHands merges in a different "git-skill" (same name). + replacement = _make_skill("git-skill", "OpenHands custom version") + # Drop the auto-loaded skill, add the replacement. This is what + # ``_merge_skills`` produces — later list (replacement) wins. + merged = [s for s in ctx.skills if s.name != "git-skill"] + [replacement] + new_ctx = ctx.model_copy(update={"skills": merged}) + + # In-memory: only the replacement is present. + assert len(new_ctx.skills) == 1 + assert new_ctx.skills[0].content == "OpenHands custom version" + # On the wire: replacement survives (equality check, not name). + dumped = new_ctx.model_dump() + assert len(dumped["skills"]) == 1 + assert dumped["skills"][0]["content"] == "OpenHands custom version" + + def test_preserve_full_skills_context_flag_keeps_snapshot(self): + """``context={"preserve_full_skills": True}`` opts out of the trim. + + This is the persistence path: ``ConversationState._save_base_state`` + passes this flag so the snapshot of auto-loaded skills is frozen + at conversation-create time. Without it, a paused conversation + resumed after the skill source updates would silently pick up + the *new* content via ``_load_auto_skills``, breaking the + guarantee that the agent runs the same skills it started with. + """ + auto = {f"auto-{i}": _make_skill(f"auto-{i}") for i in range(5)} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True) + + # Default: trim. + dumped_default = ctx.model_dump() + assert dumped_default["skills"] == [] + + # Persistence opt-out: full snapshot. + dumped_persist = ctx.model_dump(context={"preserve_full_skills": True}) + assert len(dumped_persist["skills"]) == 5 + assert {s["name"] for s in dumped_persist["skills"]} == set(auto.keys()) + + def test_wrap_serializer_propagates_exclude_none(self): + """Caller-supplied ``exclude_none`` reaches nested ``Skill`` fields. + + ``Skill`` has several optional fields that default to ``None`` + (``trigger``, ``source``, ``mcp_tools``, …). A manual + ``s.model_dump(...)`` inside the serializer would have dropped + the caller's ``exclude_none`` setting. ``mode="wrap"`` + + ``handler`` delegation preserves it. + """ + explicit = Skill(name="x", content="hello") # trigger / mcp_tools / etc. → None + ctx = AgentContext(skills=[explicit]) + + # Without exclude_none, None fields appear. + dumped_keep_nones = ctx.model_dump() + skill_dict = dumped_keep_nones["skills"][0] + # ``trigger`` is one of the always-present None defaults; pick + # it as the canary. + assert "trigger" in skill_dict + assert skill_dict["trigger"] is None + + # With exclude_none, the field is omitted from the nested skill. + dumped_drop_nones = ctx.model_dump(exclude_none=True) + skill_dict_pruned = dumped_drop_nones["skills"][0] + assert "trigger" not in skill_dict_pruned + + def test_save_base_state_persists_full_skill_snapshot(self): + """End-to-end: ``ConversationState._save_base_state`` writes the + full skill list to disk, even though the in-memory ``model_dump`` + default trims them. + """ + import json + import uuid + + from openhands.sdk.conversation.state import ConversationState + from openhands.sdk.io import InMemoryFileStore + from openhands.sdk.workspace.local import LocalWorkspace + + auto = {f"auto-{i}": _make_skill(f"auto-{i}") for i in range(3)} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + agent_context = AgentContext(load_public_skills=True) + # Use a real lightweight agent to satisfy ConversationState. + from openhands.sdk.agent import Agent + from openhands.sdk.llm import LLM + + agent = Agent( + llm=LLM(model="test", usage_id="test"), + agent_context=agent_context, + ) + + import tempfile + + with tempfile.TemporaryDirectory() as tmp: + state = ConversationState.create( + id=uuid.uuid4(), + agent=agent, + workspace=LocalWorkspace(working_dir=tmp), + ) + fs = InMemoryFileStore() + state._save_base_state(fs) + + persisted = json.loads(fs.read("base_state.json")) + + # The persisted snapshot must include the auto-loaded skills so + # a resumed conversation gets the same content even if the + # skill source changed in the meantime. + persisted_skill_names = [ + s["name"] for s in persisted["agent"]["agent_context"]["skills"] + ] + assert set(persisted_skill_names) == set(auto.keys()), ( + "persistence path lost the auto-loaded skill snapshot — " + "without it, a resumed conversation could silently pick up " + "newer skill content from ~/.openhands/skills" + ) + + def test_in_place_skill_mutation_preserves_customisation_on_wire(self): + """Regression for the mutable-snapshot bug. + + ``_auto_loaded_skills`` used to store the same ``Skill`` + reference that landed in ``self.skills``. An in-place edit + like ``ctx.skills[0].content = "custom"`` mutated both copies, + the equality check still succeeded, and the customised skill + silently vanished from ``model_dump`` output. Deep-copying the + snapshot at auto-load time keeps the two references + independent so the equality check correctly reports the + in-memory skill as "modified" and keeps it on the wire. + """ + auto = {"editable": _make_skill("editable", "original content")} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True) + + # Caller mutates the skill in-place after the auto-load. + ctx.skills[0].content = "user customised content" + + dumped = ctx.model_dump() + assert len(dumped["skills"]) == 1 + assert dumped["skills"][0]["content"] == "user customised content" + + def test_model_copy_toggling_load_flag_off_keeps_skills_on_wire(self): + """Regression for the config-drift bug. + + ``ctx.model_copy(update={"load_public_skills": False})`` keeps + the runtime auto-loaded skills (the validator doesn't re-run + on model_copy) but used to serialize them as ``[]`` because + the snapshot still matched the in-memory names. On + ``model_validate(...)`` the receiver wouldn't auto-reload them + (flag is False), losing them entirely. + + With the config-drift check, the serializer detects that the + current config doesn't match the snapshot config and emits + the full skill list, preserving the round-trip. + """ + auto = {f"auto-{i}": _make_skill(f"auto-{i}") for i in range(3)} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True) + + # Flip the flag off via model_copy — does NOT re-run the + # validator, so ctx.skills still has the 3 auto-loaded skills. + flipped = ctx.model_copy(update={"load_public_skills": False}) + assert len(flipped.skills) == 3 + + # Serializer must NOT trim — the config drifted. Round-trip + # would otherwise lose them (flag is False on the receiver). + dumped = flipped.model_dump() + assert {s["name"] for s in dumped["skills"]} == set(auto.keys()) + + # And the round-trip preserves them in-memory. + roundtripped = AgentContext.model_validate(dumped) + assert {s.name for s in roundtripped.skills} == set(auto.keys()) + + def test_model_copy_changing_marketplace_path_keeps_skills_on_wire(self): + """Same config-drift concern as above but for ``marketplace_path``. + + If the snapshot was taken with ``marketplace_path="A"`` and a + copy bumps it to ``"B"``, the receiver would auto-load from + ``"B"`` and get a *different* catalog. Preserve the original + list on the wire so the receiver can't silently swap. + """ + auto = {"marketplace-skill": _make_skill("marketplace-skill")} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True, marketplace_path="path-A") + + bumped = ctx.model_copy(update={"marketplace_path": "path-B"}) + dumped = bumped.model_dump() + assert {s["name"] for s in dumped["skills"]} == {"marketplace-skill"} + + def test_payload_shrinks_to_explicit_only(self): + """Concrete byte-count assertion: a 40-skill auto-load with a single + explicit skill should serialize approximately the size of the + single explicit skill, not 40+1. + """ + import json + + auto = {f"auto-{i}": _make_skill(f"auto-{i}", "x" * 1000) for i in range(40)} + explicit = _make_skill("user", "y" * 100) + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True, skills=[explicit]) + + dumped = ctx.model_dump() + skills_bytes = len(json.dumps(dumped["skills"])) + # The 40 auto skills total ~40 KB; the single explicit one is + # ~150 B. The serialized ``skills`` list must clearly be the + # explicit-only size, not the full set. + assert skills_bytes < 1000, ( + f"serialized skills should be ~1 explicit skill (~150 B), " + f"got {skills_bytes} B — auto skills leaked into output" + ) + + def test_round_trip_dump_preserves_full_skill_list(self): + """Comment-7 regression: ``model_dump(round_trip=True)`` is + Pydantic's canonical "must reload without semantic loss" + signal. Honour it the same as ``preserve_full_skills`` — + otherwise a caller using it for an ``AgentContext`` snapshot + would reload whatever the current external skill source + returns instead of the in-memory catalog they dumped. + """ + auto = {f"auto-{i}": _make_skill(f"auto-{i}") for i in range(3)} + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value=auto, + ): + ctx = AgentContext(load_public_skills=True) + + # Default dump still trims (the API-wire win). + assert ctx.model_dump()["skills"] == [] + # ``round_trip=True`` keeps the full snapshot. + dumped = ctx.model_dump(round_trip=True) + assert {s["name"] for s in dumped["skills"]} == set(auto.keys()) + + def test_conversationstate_resume_preserves_snapshot_skills(self): + """Comment-8 regression: full create→save→resume cycle. + + The persistence load path used to call ``state.agent = agent`` + unconditionally, overwriting the loaded snapshot's + ``agent_context.skills`` with the runtime agent's freshly- + loaded list. If the loader had drifted since save (marketplace + added a skill), the next autosave rewrote ``base_state.json`` + with the drifted set — breaking the freeze-at-create guarantee. + ``ConversationState.create`` resume path now preserves the + snapshot's skills onto the runtime agent's ``agent_context`` + before assignment. + """ + import tempfile + import uuid + + from openhands.sdk.agent import Agent + from openhands.sdk.conversation.state import ConversationState + from openhands.sdk.io import LocalFileStore + from openhands.sdk.llm import LLM + from openhands.sdk.workspace.local import LocalWorkspace + + # ``a`` was around at save time. Snapshot freezes on it. + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value={"a": _make_skill("a", "original a content")}, + ): + agent_at_save = Agent( + llm=LLM(model="test", usage_id="test"), + agent_context=AgentContext(load_public_skills=True), + ) + + with tempfile.TemporaryDirectory() as tmp: + workspace = LocalWorkspace(working_dir=tmp) + conv_id = uuid.uuid4() + + # Save: create state, persist. + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value={"a": _make_skill("a", "original a content")}, + ): + state_saved = ConversationState.create( + id=conv_id, + agent=agent_at_save, + workspace=workspace, + persistence_dir=tmp, + ) + state_saved._save_base_state(LocalFileStore(tmp)) + + # Now the marketplace adds a new skill `b` and changes `a`. + with patch( + "openhands.sdk.context.agent_context.load_available_skills", + return_value={ + "a": _make_skill("a", "updated a content"), + "b": _make_skill("b", "newly published"), + }, + ): + # Runtime agent on resume — its agent_context auto-load + # picks up the drifted skill set. + runtime_agent = Agent( + llm=LLM(model="test", usage_id="test"), + agent_context=AgentContext(load_public_skills=True), + ) + assert {s.name for s in runtime_agent.agent_context.skills} == { + "a", + "b", + } + # Resume the conversation. The fix must preserve the + # snapshot's skills (just `a`, with original content) + # rather than the runtime agent's drifted set. + state_resumed = ConversationState.create( + id=conv_id, + agent=runtime_agent, + workspace=workspace, + persistence_dir=tmp, + ) + + assert state_resumed.agent.agent_context is not None + resumed_skills = {s.name for s in state_resumed.agent.agent_context.skills} + assert resumed_skills == {"a"}, ( + f"resume picked up drifted skills {resumed_skills} from the runtime " + f"agent's fresh auto-load; should have preserved the snapshot {{a}}" + ) + # And the persisted-to-disk content also matches the + # snapshot — autosave didn't drift it. + with open(f"{tmp}/base_state.json") as f: + import json as _json + + persisted = _json.load(f) + persisted_names = { + s["name"] for s in persisted["agent"]["agent_context"]["skills"] + } + assert persisted_names == {"a"}, ( + f"autosave rewrote base_state.json with drifted skills " + f"{persisted_names} instead of the original snapshot" + )