From 5e1aca81eecd6e7163b8e26a069f9db508ca53fd Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Tue, 19 May 2026 16:43:46 +0200
Subject: [PATCH 1/7] perf(agent_context): drop auto-loaded skills from
 serialized output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When ``load_user_skills`` / ``load_public_skills`` are enabled,
``_load_auto_skills`` mutates ``self.skills`` at model-validation
time with the fully resolved skill catalog. Pydantic then persists
those skills onto the conversation's stored state and serializes
them back on every ``GET /api/conversations``. For a stock setup
that bundles ~40 skills under ``~/.openhands/skills``, that's
~263 KB of duplicated payload per stored conversation per fetch —
content the client never reads directly, because the skills are
only consumed server-side when rendering the system prompt.

Track which skill names ``_load_auto_skills`` added in a
``PrivateAttr`` and drop them at serialize time via a
``@field_serializer("skills")``. The validator is ``mode="after"``,
so it re-runs on every deserialization and repopulates the same
auto-loaded subset deterministically from the
``load_user_skills`` / ``load_public_skills`` / ``marketplace_path``
configuration. Runtime callers of ``self.skills`` (prompt rendering,
``to_acp_prompt_context``, ``get_dynamic_context``) see the full
resolved list as today — only the wire payload changes.

Measured impact (agent-canvas at ``/conversations/<id>``, cold-cold
full page reload, agent-server 1.22.1, same browser):

  Conversation              Before    After (this PR)
  40 skills + 161 events    2226 ms   ~1159 ms   (~48% faster)
  0 skills  + 209 events    1159 ms   1159 ms   (unchanged)
  0 skills  + 0 events       979 ms    979 ms   (unchanged)

Tests pin both halves of the contract:
- In-memory: every ``self.skills`` lookup keeps returning the full
  resolved list, including auto-loaded entries.
- On the wire: ``model_dump`` returns only explicit skills (caller-
  supplied via the constructor); auto-loaded ones are absent.
- Round-trip: dumping then ``model_validate``-ing repopulates the
  auto-loaded subset via the same validator, so a roundtripped
  ``AgentContext`` has the same in-memory shape as the source.
- An explicit skill colliding with an auto-loaded name wins (it was
  never marked auto-loaded), in-memory AND on the wire.
- A concrete byte-count assertion: 40-skill auto-load + 1 explicit
  skill serializes to <1 KB, not the ~40 KB the auto-loaded set
  would occupy.

Closes software-agent-sdk#3301.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/context/agent_context.py    |  38 ++++-
 ...test_agent_context_skills_serialization.py | 141 ++++++++++++++++++
 2 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100644 tests/sdk/context/test_agent_context_skills_serialization.py
diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py
index 17d9508427..17d2e73564 100644
--- a/openhands-sdk/openhands/sdk/context/agent_context.py
+++ b/openhands-sdk/openhands/sdk/context/agent_context.py
@@ -8,6 +8,7 @@
 from pydantic import (
     BaseModel,
     Field,
+    PrivateAttr,
     SecretStr,
     field_serializer,
     field_validator,
@@ -118,6 +119,33 @@ class AgentContext(BaseModel):
         json_schema_extra={"acp_compatible": True},
     )
 
+    # Names of skills that ``_load_auto_skills`` added on top of the
+    # caller-supplied list. Tracked so the serializer can drop them from
+    # the wire payload: the same auto-load will re-run on the next
+    # deserialization (this model_validator runs in ``mode="after"``), so
+    # persisting them is pure duplication. For a stock configuration that
+    # turns both flags on (~40 skills bundled under
+    # ``~/.openhands/skills``) the resolved list is ~260 KB per
+    # ``AgentContext`` instance — every ``GET`` on a stored conversation
+    # carried that. See software-agent-sdk#3301.
+    _auto_loaded_skill_names: set[str] = PrivateAttr(default_factory=set)
+
+    @field_serializer("skills", when_used="always")
+    def _serialize_skills(self, value: list[Skill], info) -> list[Any]:
+        """Drop auto-loaded skills from the serialized output.
+
+        The runtime keeps the full resolved list on ``self.skills`` so
+        prompt rendering and downstream consumers behave exactly as
+        today. Only the wire payload changes: callers re-loading the
+        model will trigger ``_load_auto_skills`` again, which rebuilds
+        the auto-loaded subset deterministically from the same
+        ``load_user_skills`` / ``load_public_skills`` /
+        ``marketplace_path`` configuration.
+        """
+        auto_names = self._auto_loaded_skill_names
+        kept = [s for s in value if s.name not in auto_names]
+        return [s.model_dump(mode=info.mode, context=info.context) for s in kept]
+
     @field_serializer("secrets", when_used="always")
     def _serialize_secrets(
         self, value: Mapping[str, SecretValue] | None, info
@@ -148,7 +176,14 @@ def _validate_skills(cls, v: list[Skill], _info):
 
     @model_validator(mode="after")
     def _load_auto_skills(self):
-        """Load user and/or public skills if enabled."""
+        """Load user and/or public skills if enabled.
+
+        Names of skills added here are tracked in
+        ``_auto_loaded_skill_names`` so the serializer can drop them
+        from the wire payload (this validator re-runs on every model
+        load, so the same skills repopulate without needing to be
+        persisted).
+        """
         if not self.load_user_skills and not self.load_public_skills:
             return self
 
@@ -164,6 +199,7 @@ def _load_auto_skills(self):
         for name, skill in auto_skills.items():
             if name not in existing_names:
                 self.skills.append(skill)
+                self._auto_loaded_skill_names.add(name)
             else:
                 logger.debug(
                     f"Skipping auto-loaded skill '{name}' (already in explicit skills)"
diff --git a/tests/sdk/context/test_agent_context_skills_serialization.py b/tests/sdk/context/test_agent_context_skills_serialization.py
new file mode 100644
index 0000000000..ff0dac1a09
--- /dev/null
+++ b/tests/sdk/context/test_agent_context_skills_serialization.py
@@ -0,0 +1,141 @@
+"""Tests for the auto-loaded-skills serialization optimisation.
+
+When ``load_user_skills`` / ``load_public_skills`` are enabled,
+``AgentContext._load_auto_skills`` resolves every matching skill and
+appends it to ``self.skills`` at model-validation time. Persisting
+those skills on the wire is pure duplication — the validator runs
+again on every deserialization and rebuilds the same set deterministically.
+
+The serializer drops auto-loaded skills from ``model_dump`` output but
+keeps the in-memory ``self.skills`` intact so runtime prompt rendering
+behaves exactly as today. These tests pin both halves of that contract.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+from openhands.sdk.context import AgentContext
+from openhands.sdk.skills import Skill
+
+
+def _make_skill(name: str, content: str = "auto skill body") -> Skill:
+    return Skill(
+        name=name,
+        content=content,
+        source=f"/fake/{name}.md",
+    )
+
+
+class TestAutoLoadedSkillsSerialization:
+    def test_auto_loaded_skills_drop_from_serialized_output(self):
+        """``model_dump`` omits auto-loaded skills (the headline payload win)."""
+        auto = {
+            "auto-1": _make_skill("auto-1"),
+            "auto-2": _make_skill("auto-2"),
+            "auto-3": _make_skill("auto-3"),
+        }
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True)
+
+        # In-memory: the validator populated all three.
+        assert {s.name for s in ctx.skills} == {"auto-1", "auto-2", "auto-3"}
+        # On the wire: dropped entirely (no caller passed explicit skills).
+        dumped = ctx.model_dump()
+        assert dumped["skills"] == []
+
+    def test_explicit_skills_survive_serialization(self):
+        """Caller-supplied skills are NOT dropped — only the auto-loaded ones."""
+        explicit = _make_skill("user-explicit", "this one is mine")
+        auto = {"auto-only": _make_skill("auto-only")}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True, skills=[explicit])
+
+        # In-memory: both present.
+        assert {s.name for s in ctx.skills} == {"user-explicit", "auto-only"}
+        # On the wire: only the explicit one.
+        dumped = ctx.model_dump()
+        assert [s["name"] for s in dumped["skills"]] == ["user-explicit"]
+
+    def test_explicit_skill_shadows_an_auto_one(self):
+        """When the explicit list collides with an auto-loaded name, the
+        explicit one wins in-memory AND on the wire (it was never marked
+        as auto-loaded — the auto-load step skipped it).
+        """
+        explicit = _make_skill("shared-name", "user version")
+        auto = {"shared-name": _make_skill("shared-name", "auto version")}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True, skills=[explicit])
+
+        assert len(ctx.skills) == 1
+        assert ctx.skills[0].content == "user version"
+        dumped = ctx.model_dump()
+        assert len(dumped["skills"]) == 1
+        assert dumped["skills"][0]["name"] == "shared-name"
+        assert dumped["skills"][0]["content"] == "user version"
+
+    def test_round_trip_via_serialized_output_re_resolves_auto_skills(self):
+        """Deserializing the trimmed payload + re-validating repopulates the
+        auto-loaded skills via the same auto-load path. This is what makes
+        dropping them on the wire safe: the receiver sees the same
+        in-memory shape as the sender.
+        """
+        auto = {
+            "auto-a": _make_skill("auto-a"),
+            "auto-b": _make_skill("auto-b"),
+        }
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True)
+            dumped = ctx.model_dump()
+            # Re-validating the trimmed payload should re-fire the auto-load
+            # validator and rebuild the in-memory list.
+            roundtripped = AgentContext.model_validate(dumped)
+
+        assert {s.name for s in roundtripped.skills} == {"auto-a", "auto-b"}
+
+    def test_disabled_auto_load_leaves_explicit_skills_on_the_wire(self):
+        """With both flags off, ``_load_auto_skills`` is a no-op — every
+        skill is treated as explicit and serialized normally.
+        """
+        explicit = [_make_skill("foo"), _make_skill("bar")]
+        ctx = AgentContext(skills=explicit)
+        assert {s.name for s in ctx.skills} == {"foo", "bar"}
+        dumped = ctx.model_dump()
+        assert {s["name"] for s in dumped["skills"]} == {"foo", "bar"}
+
+    def test_payload_shrinks_to_explicit_only(self):
+        """Concrete byte-count assertion: a 40-skill auto-load with a single
+        explicit skill should serialize approximately the size of the
+        single explicit skill, not 40+1.
+        """
+        import json
+
+        auto = {f"auto-{i}": _make_skill(f"auto-{i}", "x" * 1000) for i in range(40)}
+        explicit = _make_skill("user", "y" * 100)
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True, skills=[explicit])
+
+        dumped = ctx.model_dump()
+        skills_bytes = len(json.dumps(dumped["skills"]))
+        # The 40 auto skills total ~40 KB; the single explicit one is
+        # ~150 B. The serialized ``skills`` list must clearly be the
+        # explicit-only size, not the full set.
+        assert skills_bytes < 1000, (
+            f"serialized skills should be ~1 explicit skill (~150 B), "
+            f"got {skills_bytes} B — auto skills leaked into output"
+        )

From 15002de80c77371af0eeaa5700be1f4f6c93c45d Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Tue, 19 May 2026 16:46:11 +0200
Subject: [PATCH 2/7] perf(agent_context): migration path for old conversations
 with inlined skills
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stored conversations created before PR #3302 carry the resolved auto-
loaded skill list inlined on ``agent_context.skills``. The validator's
"skip if name in existing_names" branch hit those entries without
marking them as auto-loaded, so the new serializer kept them on the
wire — the bloat would persist until each conversation was recreated.

Detect them: when an ``existing`` skill matches a loader-produced one
by equality (full Pydantic model compare), mark it auto-loaded too.
That makes the wire payload shrink on the very next ``GET`` after the
SDK is bumped, with no conversation re-creation needed.

A persisted skill that no longer matches the loader's current output
(user edited the file, marketplace updated) stays treated as explicit
— on-disk content wins. Covered by
``test_migration_stored_skills_diverged_from_loader_stay_explicit``.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/context/agent_context.py    | 20 +++++++-
 ...test_agent_context_skills_serialization.py | 50 +++++++++++++++++++
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py
index 17d2e73564..560387c033 100644
--- a/openhands-sdk/openhands/sdk/context/agent_context.py
+++ b/openhands-sdk/openhands/sdk/context/agent_context.py
@@ -183,6 +183,17 @@ def _load_auto_skills(self):
         from the wire payload (this validator re-runs on every model
         load, so the same skills repopulate without needing to be
         persisted).
+
+        Migration: stored conversations created before the serializer
+        change carry the resolved auto-loaded skill list inlined on
+        ``skills``. When such a conversation is loaded back, the names
+        match ``existing_names`` and the new-append branch is skipped
+        — but we still mark them as auto-loaded if the persisted skill
+        equals what the loader would produce now. Without this, every
+        old conversation would keep the bloated payload until rewritten.
+        A persisted skill that no longer matches the loader's current
+        output (user edited the file, marketplace updated, etc.) stays
+        treated as explicit so the on-disk content wins.
         """
         if not self.load_user_skills and not self.load_public_skills:
             return self
@@ -195,11 +206,16 @@ def _load_auto_skills(self):
             marketplace_path=self.marketplace_path,
         )
 
-        existing_names = {skill.name for skill in self.skills}
+        existing_by_name = {skill.name: skill for skill in self.skills}
         for name, skill in auto_skills.items():
-            if name not in existing_names:
+            existing = existing_by_name.get(name)
+            if existing is None:
                 self.skills.append(skill)
                 self._auto_loaded_skill_names.add(name)
+            elif existing == skill:
+                # Migration path for conversations stored before the
+                # serializer change — see the docstring.
+                self._auto_loaded_skill_names.add(name)
             else:
                 logger.debug(
                     f"Skipping auto-loaded skill '{name}' (already in explicit skills)"
diff --git a/tests/sdk/context/test_agent_context_skills_serialization.py b/tests/sdk/context/test_agent_context_skills_serialization.py
index ff0dac1a09..c2560d074e 100644
--- a/tests/sdk/context/test_agent_context_skills_serialization.py
+++ b/tests/sdk/context/test_agent_context_skills_serialization.py
@@ -115,6 +115,56 @@ def test_disabled_auto_load_leaves_explicit_skills_on_the_wire(self):
         dumped = ctx.model_dump()
         assert {s["name"] for s in dumped["skills"]} == {"foo", "bar"}
 
+    def test_migration_stored_skills_matching_loader_are_marked_auto_loaded(self):
+        """Migration path: a conversation stored before this PR carries the
+        resolved auto-loaded skills inlined on ``skills``. On reload, the
+        validator must recognise those as auto-loaded (the persisted
+        skill equals what the loader produces) and drop them from the
+        next serialization — otherwise the bloat persists until the
+        conversation is recreated.
+        """
+        stored = [_make_skill("auto-x"), _make_skill("auto-y")]
+        # Loader returns the same skill objects (matches what was
+        # persisted under the old SDK).
+        loader_output = {
+            "auto-x": _make_skill("auto-x"),
+            "auto-y": _make_skill("auto-y"),
+        }
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=loader_output,
+        ):
+            ctx = AgentContext(load_public_skills=True, skills=stored)
+
+        # No duplicates: skills stayed at 2 (migration recognised them).
+        assert {s.name for s in ctx.skills} == {"auto-x", "auto-y"}
+        # And the serialized payload drops them.
+        dumped = ctx.model_dump()
+        assert dumped["skills"] == []
+
+    def test_migration_stored_skills_diverged_from_loader_stay_explicit(self):
+        """If the persisted skill content no longer matches what the
+        loader would produce (user edited the file, marketplace updated),
+        the on-disk version wins — treat it as explicit and keep it on
+        the wire. This is the safe default: we don't drop content
+        someone may have intentionally customised.
+        """
+        stored = [_make_skill("auto-x", "old persisted content")]
+        loader_output = {"auto-x": _make_skill("auto-x", "new loader content")}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=loader_output,
+        ):
+            ctx = AgentContext(load_public_skills=True, skills=stored)
+
+        # The stored ("old") version wins in-memory.
+        assert len(ctx.skills) == 1
+        assert ctx.skills[0].content == "old persisted content"
+        # And it stays on the wire — not treated as auto-loaded.
+        dumped = ctx.model_dump()
+        assert len(dumped["skills"]) == 1
+        assert dumped["skills"][0]["content"] == "old persisted content"
+
     def test_payload_shrinks_to_explicit_only(self):
         """Concrete byte-count assertion: a 40-skill auto-load with a single
         explicit skill should serialize approximately the size of the

From 67429edef0fa3593662fed89261f9808a04b9012 Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Tue, 19 May 2026 16:58:08 +0200
Subject: [PATCH 3/7] perf(agent_context): equality-check filter so
 consumer-replaced skills survive
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenHands' ``_create_agent_with_skills`` (and any caller that does
``agent_context.model_copy(update={'skills': merged})``) replaces
auto-loaded skills in place when its merged list carries the same
name with different content. A name-only ``_auto_loaded_skill_names``
filter would silently drop those replacements at serialize time and
the receiver would auto-reload the stock version on the next
deserialization — losing the customisation.

Switch the PrivateAttr to a ``dict[str, Skill]`` snapshot. Serializer
drops a skill iff ``_auto_loaded_skills.get(name) == s`` — equality,
not name match. A replacement has different field values, fails the
equality check, and survives on the wire as today.

Verified safe across known consumers:
- ``openhands-sdk/workspace/remote/base.py:920`` — explicit skills with
  ``load_public_skills=False``; auto-load short-circuits, serializer
  is a no-op.
- ``openhands-sdk/workspace/remote/base.py:923`` — empty explicit with
  ``load_public_skills=True``; serializer drops the auto-loaded
  skills (the win we want).
- ``OpenHands/openhands/app_server/.../live_status_app_conversation_service.py:1394``
  — constructs AgentContext WITHOUT either ``load_*_skills`` flag.
  Auto-load is a no-op, ``_auto_loaded_skills`` stays empty.
- ``OpenHands/openhands/app_server/.../app_conversation_service_base.py:159``
  — ``_create_agent_with_skills`` does the model_copy + skill-replace
  flow. Covered by the new regression test.

New test:
``test_replacing_auto_skill_via_model_copy_survives_serialization``
— pins the exact OpenHands flow: auto-load a "git-skill", merge with
a different "git-skill" (same name, different content), model_copy,
serialize. Asserts the replacement survives on the wire instead of
being dropped as auto-loaded.

296/296 tests pass on the affected suite.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/context/agent_context.py    | 39 ++++++++++++-------
 ...test_agent_context_skills_serialization.py | 37 ++++++++++++++++++
 2 files changed, 62 insertions(+), 14 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py
index 560387c033..5077b17155 100644
--- a/openhands-sdk/openhands/sdk/context/agent_context.py
+++ b/openhands-sdk/openhands/sdk/context/agent_context.py
@@ -119,20 +119,24 @@ class AgentContext(BaseModel):
         json_schema_extra={"acp_compatible": True},
     )
 
-    # Names of skills that ``_load_auto_skills`` added on top of the
-    # caller-supplied list. Tracked so the serializer can drop them from
-    # the wire payload: the same auto-load will re-run on the next
-    # deserialization (this model_validator runs in ``mode="after"``), so
-    # persisting them is pure duplication. For a stock configuration that
-    # turns both flags on (~40 skills bundled under
+    # Snapshot of the skills that ``_load_auto_skills`` added on top of
+    # the caller-supplied list. The serializer drops only the ones still
+    # equal to this snapshot — if a downstream consumer replaces an
+    # auto-loaded skill via ``model_copy(update={'skills': merged})``
+    # (OpenHands' ``_create_agent_with_skills`` does this when it merges
+    # in sandbox / repo skills with overlapping names), the replacement
+    # has different field values and survives serialization. The same
+    # auto-load will re-run on the receiving end and rebuild the
+    # auto-loaded subset that wasn't replaced. For a stock configuration
+    # that turns both flags on (~40 skills bundled under
     # ``~/.openhands/skills``) the resolved list is ~260 KB per
-    # ``AgentContext`` instance — every ``GET`` on a stored conversation
-    # carried that. See software-agent-sdk#3301.
-    _auto_loaded_skill_names: set[str] = PrivateAttr(default_factory=set)
+    # ``AgentContext`` — every ``GET`` on a stored conversation carried
+    # that. See software-agent-sdk#3301.
+    _auto_loaded_skills: dict[str, Skill] = PrivateAttr(default_factory=dict)
 
     @field_serializer("skills", when_used="always")
     def _serialize_skills(self, value: list[Skill], info) -> list[Any]:
-        """Drop auto-loaded skills from the serialized output.
+        """Drop unmodified auto-loaded skills from the serialized output.
 
         The runtime keeps the full resolved list on ``self.skills`` so
         prompt rendering and downstream consumers behave exactly as
@@ -141,9 +145,16 @@ def _serialize_skills(self, value: list[Skill], info) -> list[Any]:
         the auto-loaded subset deterministically from the same
         ``load_user_skills`` / ``load_public_skills`` /
         ``marketplace_path`` configuration.
+
+        Equality (not just name match) is required because consumers
+        like OpenHands' ``_create_agent_with_skills`` replace
+        auto-loaded skills in-place with their own version under the
+        same name. A name-only filter would silently drop those
+        replacements and the receiver would auto-reload the stock
+        version on the next deserialization.
         """
-        auto_names = self._auto_loaded_skill_names
-        kept = [s for s in value if s.name not in auto_names]
+        auto = self._auto_loaded_skills
+        kept = [s for s in value if auto.get(s.name) != s]
         return [s.model_dump(mode=info.mode, context=info.context) for s in kept]
 
     @field_serializer("secrets", when_used="always")
@@ -211,11 +222,11 @@ def _load_auto_skills(self):
             existing = existing_by_name.get(name)
             if existing is None:
                 self.skills.append(skill)
-                self._auto_loaded_skill_names.add(name)
+                self._auto_loaded_skills[name] = skill
             elif existing == skill:
                 # Migration path for conversations stored before the
                 # serializer change — see the docstring.
-                self._auto_loaded_skill_names.add(name)
+                self._auto_loaded_skills[name] = skill
             else:
                 logger.debug(
                     f"Skipping auto-loaded skill '{name}' (already in explicit skills)"
diff --git a/tests/sdk/context/test_agent_context_skills_serialization.py b/tests/sdk/context/test_agent_context_skills_serialization.py
index c2560d074e..7bb5865aba 100644
--- a/tests/sdk/context/test_agent_context_skills_serialization.py
+++ b/tests/sdk/context/test_agent_context_skills_serialization.py
@@ -165,6 +165,43 @@ def test_migration_stored_skills_diverged_from_loader_stay_explicit(self):
         assert len(dumped["skills"]) == 1
         assert dumped["skills"][0]["content"] == "old persisted content"
 
+    def test_replacing_auto_skill_via_model_copy_survives_serialization(self):
+        """Regression guard for OpenHands' ``_create_agent_with_skills`` flow.
+
+        That helper does ``agent_context.model_copy(update={'skills':
+        merged})`` where ``merged`` is the result of deduping
+        auto-loaded + sandbox/repo-loaded skills by name, with the
+        later list winning. If two lists carry the same name, the
+        merged result is the *replacement* — different content from
+        what ``_load_auto_skills`` produced.
+
+        A name-only serializer would drop the replacement (the name
+        matches an auto-loaded one) and the receiver would auto-load
+        the original stock version, silently dropping OpenHands'
+        customisations. The equality-based filter must keep the
+        replacement on the wire.
+        """
+        # Stock auto-loaded skill ("public version").
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value={"git-skill": _make_skill("git-skill", "public version")},
+        ):
+            ctx = AgentContext(load_public_skills=True)
+        # OpenHands merges in a different "git-skill" (same name).
+        replacement = _make_skill("git-skill", "OpenHands custom version")
+        # Drop the auto-loaded skill, add the replacement. This is what
+        # ``_merge_skills`` produces — later list (replacement) wins.
+        merged = [s for s in ctx.skills if s.name != "git-skill"] + [replacement]
+        new_ctx = ctx.model_copy(update={"skills": merged})
+
+        # In-memory: only the replacement is present.
+        assert len(new_ctx.skills) == 1
+        assert new_ctx.skills[0].content == "OpenHands custom version"
+        # On the wire: replacement survives (equality check, not name).
+        dumped = new_ctx.model_dump()
+        assert len(dumped["skills"]) == 1
+        assert dumped["skills"][0]["content"] == "OpenHands custom version"
+
     def test_payload_shrinks_to_explicit_only(self):
         """Concrete byte-count assertion: a 40-skill auto-load with a single
         explicit skill should serialize approximately the size of the

From 98f2c62375c692a565859579f9b2ec6d619e0554 Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Tue, 19 May 2026 17:05:42 +0200
Subject: [PATCH 4/7] =?UTF-8?q?perf(agent=5Fcontext):=20address=20PR=20#33?=
 =?UTF-8?q?02=20review=20=E2=80=94=20persistence=20+=20nested=20options?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues from the bot review of PR #3302:

1. The serializer fired on EVERY ``model_dump``, including
   ``ConversationState._save_base_state`` (the persistence path).
   Without auto-loaded skills on the persisted snapshot, a resumed
   conversation would silently pick up *new* skill content from
   ``~/.openhands/skills`` / the public marketplace via
   ``_load_auto_skills`` — losing the freeze-at-create-time guarantee.

   Fix: opt-out via ``context={"preserve_full_skills": True}``.
   ``_save_base_state`` passes it; the API-response path doesn't —
   so persistence keeps the snapshot and the wire still gets the win.

2. The manual ``s.model_dump(mode=..., context=...)`` inside the
   serializer dropped the caller's ``exclude_none`` / ``exclude_defaults``
   / nested ``include`` options. ``AgentContext(skills=[...]).model_dump(
   exclude_none=True)`` would have leaked ``trigger: None``,
   ``source: None`` etc. — which also bloats the very payloads this
   PR optimises.

   Fix: switch to ``@field_serializer("skills", mode="wrap")`` and
   delegate the surviving list to ``handler``. Pydantic's default
   nested machinery then respects every caller option.

Tests:
- ``test_preserve_full_skills_context_flag_keeps_snapshot`` — flag
  reverts to full serialization for the snapshot path.
- ``test_wrap_serializer_propagates_exclude_none`` — pins the
  comment-2 regression with a concrete ``Skill.trigger`` canary.
- ``test_save_base_state_persists_full_skill_snapshot`` — end-to-end
  through ``ConversationState`` to ``InMemoryFileStore``; asserts the
  persisted JSON keeps all auto-loaded skill names.

299/299 tests pass on the affected suites.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/context/agent_context.py    |  23 +++-
 .../openhands/sdk/conversation/state.py       |  12 +-
 ...test_agent_context_skills_serialization.py | 103 ++++++++++++++++++
 3 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py
index 5077b17155..7fa3089f80 100644
--- a/openhands-sdk/openhands/sdk/context/agent_context.py
+++ b/openhands-sdk/openhands/sdk/context/agent_context.py
@@ -134,8 +134,8 @@ class AgentContext(BaseModel):
     # that. See software-agent-sdk#3301.
     _auto_loaded_skills: dict[str, Skill] = PrivateAttr(default_factory=dict)
 
-    @field_serializer("skills", when_used="always")
-    def _serialize_skills(self, value: list[Skill], info) -> list[Any]:
+    @field_serializer("skills", when_used="always", mode="wrap")
+    def _serialize_skills(self, value: list[Skill], handler, info) -> Any:
         """Drop unmodified auto-loaded skills from the serialized output.
 
         The runtime keeps the full resolved list on ``self.skills`` so
@@ -152,10 +152,27 @@ def _serialize_skills(self, value: list[Skill], info) -> list[Any]:
         same name. A name-only filter would silently drop those
         replacements and the receiver would auto-reload the stock
         version on the next deserialization.
+
+        Opt-out via ``context={"preserve_full_skills": True}``: paths
+        that need a stable snapshot of the resolved skill catalog (the
+        most important one being ``ConversationState._save_base_state``
+        — persistence of the conversation to disk) pass this flag so
+        the serializer is a no-op. Without it, a paused conversation
+        resumed after the ``~/.openhands/skills`` directory or the
+        public marketplace updated would silently pick up the *new*
+        skill content via ``_load_auto_skills``. The API-response path
+        skips the flag and gets the byte-size win.
+
+        ``mode="wrap"`` + ``handler`` delegation preserves caller
+        options like ``exclude_none``, ``exclude_defaults``, and
+        nested ``include`` / ``exclude`` for the surviving skills —
+        manual ``s.model_dump(...)`` would have ignored them.
         """
+        if info.context and info.context.get("preserve_full_skills"):
+            return handler(value)
         auto = self._auto_loaded_skills
         kept = [s for s in value if auto.get(s.name) != s]
-        return [s.model_dump(mode=info.mode, context=info.context) for s in kept]
+        return handler(kept)
 
     @field_serializer("secrets", when_used="always")
     def _serialize_secrets(
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
index 7e137929f2..6c078935e8 100644
--- a/openhands-sdk/openhands/sdk/conversation/state.py
+++ b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -255,8 +255,18 @@ def _save_base_state(self, fs: FileStore) -> None:
 
         If a cipher is configured, secrets will be encrypted. Otherwise, they
         will be redacted (serialized as '**********').
+
+        ``preserve_full_skills`` is set so the persisted snapshot
+        includes auto-loaded skills inline — without this the conversation
+        would silently pick up *new* skill content from
+        ``~/.openhands/skills`` / the public marketplace on resume, since
+        ``AgentContext._load_auto_skills`` re-runs on deserialization.
+        Persistence needs the freeze-at-create-time snapshot; only the
+        API-response path opts into the trim. See software-agent-sdk#3301.
         """
-        context = {"cipher": self._cipher} if self._cipher else None
+        context: dict[str, Any] = {"preserve_full_skills": True}
+        if self._cipher:
+            context["cipher"] = self._cipher
         # Warn if secrets exist but no cipher is configured
         if not self._cipher and self.secret_registry.secret_sources:
             logger.warning(
diff --git a/tests/sdk/context/test_agent_context_skills_serialization.py b/tests/sdk/context/test_agent_context_skills_serialization.py
index 7bb5865aba..556d8782b4 100644
--- a/tests/sdk/context/test_agent_context_skills_serialization.py
+++ b/tests/sdk/context/test_agent_context_skills_serialization.py
@@ -202,6 +202,109 @@ def test_replacing_auto_skill_via_model_copy_survives_serialization(self):
         assert len(dumped["skills"]) == 1
         assert dumped["skills"][0]["content"] == "OpenHands custom version"
 
+    def test_preserve_full_skills_context_flag_keeps_snapshot(self):
+        """``context={"preserve_full_skills": True}`` opts out of the trim.
+
+        This is the persistence path: ``ConversationState._save_base_state``
+        passes this flag so the snapshot of auto-loaded skills is frozen
+        at conversation-create time. Without it, a paused conversation
+        resumed after the skill source updates would silently pick up
+        the *new* content via ``_load_auto_skills``, breaking the
+        guarantee that the agent runs the same skills it started with.
+        """
+        auto = {f"auto-{i}": _make_skill(f"auto-{i}") for i in range(5)}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True)
+
+        # Default: trim.
+        dumped_default = ctx.model_dump()
+        assert dumped_default["skills"] == []
+
+        # Persistence opt-out: full snapshot.
+        dumped_persist = ctx.model_dump(context={"preserve_full_skills": True})
+        assert len(dumped_persist["skills"]) == 5
+        assert {s["name"] for s in dumped_persist["skills"]} == set(auto.keys())
+
+    def test_wrap_serializer_propagates_exclude_none(self):
+        """Caller-supplied ``exclude_none`` reaches nested ``Skill`` fields.
+
+        ``Skill`` has several optional fields that default to ``None``
+        (``trigger``, ``source``, ``mcp_tools``, …). A manual
+        ``s.model_dump(...)`` inside the serializer would have dropped
+        the caller's ``exclude_none`` setting. ``mode="wrap"`` +
+        ``handler`` delegation preserves it.
+        """
+        explicit = Skill(name="x", content="hello")  # trigger / mcp_tools / etc. → None
+        ctx = AgentContext(skills=[explicit])
+
+        # Without exclude_none, None fields appear.
+        dumped_keep_nones = ctx.model_dump()
+        skill_dict = dumped_keep_nones["skills"][0]
+        # ``trigger`` is one of the always-present None defaults; pick
+        # it as the canary.
+        assert "trigger" in skill_dict
+        assert skill_dict["trigger"] is None
+
+        # With exclude_none, the field is omitted from the nested skill.
+        dumped_drop_nones = ctx.model_dump(exclude_none=True)
+        skill_dict_pruned = dumped_drop_nones["skills"][0]
+        assert "trigger" not in skill_dict_pruned
+
+    def test_save_base_state_persists_full_skill_snapshot(self):
+        """End-to-end: ``ConversationState._save_base_state`` writes the
+        full skill list to disk, even though the in-memory ``model_dump``
+        default trims them.
+        """
+        import json
+        import uuid
+
+        from openhands.sdk.conversation.state import ConversationState
+        from openhands.sdk.io import InMemoryFileStore
+        from openhands.sdk.workspace.local import LocalWorkspace
+
+        auto = {f"auto-{i}": _make_skill(f"auto-{i}") for i in range(3)}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            agent_context = AgentContext(load_public_skills=True)
+            # Use a real lightweight agent to satisfy ConversationState.
+            from openhands.sdk.agent import Agent
+            from openhands.sdk.llm import LLM
+
+            agent = Agent(
+                llm=LLM(model="test", usage_id="test"),
+                agent_context=agent_context,
+            )
+
+            import tempfile
+
+            with tempfile.TemporaryDirectory() as tmp:
+                state = ConversationState.create(
+                    id=uuid.uuid4(),
+                    agent=agent,
+                    workspace=LocalWorkspace(working_dir=tmp),
+                )
+                fs = InMemoryFileStore()
+                state._save_base_state(fs)
+
+                persisted = json.loads(fs.read("base_state.json"))
+
+        # The persisted snapshot must include the auto-loaded skills so
+        # a resumed conversation gets the same content even if the
+        # skill source changed in the meantime.
+        persisted_skill_names = [
+            s["name"] for s in persisted["agent"]["agent_context"]["skills"]
+        ]
+        assert set(persisted_skill_names) == set(auto.keys()), (
+            "persistence path lost the auto-loaded skill snapshot — "
+            "without it, a resumed conversation could silently pick up "
+            "newer skill content from ~/.openhands/skills"
+        )
+
     def test_payload_shrinks_to_explicit_only(self):
         """Concrete byte-count assertion: a 40-skill auto-load with a single
         explicit skill should serialize approximately the size of the

From 713148b1475053eea1fc7df7eed03533dd6e6893 Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Tue, 19 May 2026 17:16:10 +0200
Subject: [PATCH 5/7] =?UTF-8?q?perf(agent=5Fcontext):=20address=20PR=20#33?=
 =?UTF-8?q?02=20second-pass=20review=20=E2=80=94=20snapshot=20integrity?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two more edge cases from the bot review:

1. ``_auto_loaded_skills`` stored the same ``Skill`` reference that
   was appended to ``self.skills``. ``Skill`` is mutable, so an
   in-place edit (``ctx.skills[0].content = "custom"``) mutated the
   snapshot too; the equality check still succeeded and the
   customised skill silently disappeared from ``model_dump``.

   Fix: deep-copy the snapshot via ``skill.model_copy(deep=True)``.
   Pinned by ``test_in_place_skill_mutation_preserves_customisation_on_wire``.

2. The serializer used the snapshot even when the auto-load config
   changed after validation. ``ctx.model_copy(update={
   "load_public_skills": False})`` keeps the runtime auto-loaded
   skills (validators don't re-run on model_copy) but used to
   serialize them as ``[]``. On ``model_validate`` the receiver
   wouldn't auto-reload them (flag now False) — skills lost.
   Same shape for ``marketplace_path`` changes (receiver re-loads
   from a *different* catalog).

   Fix: snapshot ``(load_user_skills, load_public_skills,
   marketplace_path)`` alongside the skills. If the current config
   drifted, the serializer no-ops the trim and emits the full list,
   preserving the round-trip. Pinned by:
   - ``test_model_copy_toggling_load_flag_off_keeps_skills_on_wire``
   - ``test_model_copy_changing_marketplace_path_keeps_skills_on_wire``

302/302 tests pass on the affected suites.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/context/agent_context.py    | 51 +++++++++++-
 ...test_agent_context_skills_serialization.py | 80 +++++++++++++++++++
 2 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py
index 7fa3089f80..2a61bccff0 100644
--- a/openhands-sdk/openhands/sdk/context/agent_context.py
+++ b/openhands-sdk/openhands/sdk/context/agent_context.py
@@ -132,7 +132,21 @@ class AgentContext(BaseModel):
     # ``~/.openhands/skills``) the resolved list is ~260 KB per
     # ``AgentContext`` — every ``GET`` on a stored conversation carried
     # that. See software-agent-sdk#3301.
+    #
+    # Values are deep-copied so an in-place caller mutation
+    # (``ctx.skills[0].content = "custom"``) doesn't also mutate the
+    # snapshot — without that, the equality check would still succeed
+    # and the customised skill would silently disappear from the wire.
     _auto_loaded_skills: dict[str, Skill] = PrivateAttr(default_factory=dict)
+    # The auto-load config that produced ``_auto_loaded_skills``.
+    # Tracked so the serializer can no-op the trim when the config
+    # changed after validation — e.g. ``model_copy(update={
+    # "load_public_skills": False})``. Without this, a copy with the
+    # flag flipped off would drop the auto-loaded skills on the wire
+    # AND fail to re-load them on the next ``model_validate`` (the
+    # flag is now off), losing them entirely. ``None`` when
+    # ``_load_auto_skills`` has not run yet.
+    _auto_load_config: tuple[bool, bool, str | None] | None = PrivateAttr(default=None)
 
     @field_serializer("skills", when_used="always", mode="wrap")
     def _serialize_skills(self, value: list[Skill], handler, info) -> Any:
@@ -163,6 +177,15 @@ def _serialize_skills(self, value: list[Skill], handler, info) -> Any:
         skill content via ``_load_auto_skills``. The API-response path
         skips the flag and gets the byte-size win.
 
+        Config-drift safety: if the auto-load config
+        (``load_user_skills`` / ``load_public_skills`` /
+        ``marketplace_path``) changed since the snapshot was taken —
+        typically via ``model_copy(update=...)`` flipping one of those
+        flags — the trim is skipped. Otherwise the receiver would
+        either re-load a *different* skill catalog (changed
+        ``marketplace_path``) or fail to re-load at all (flag turned
+        off), losing the auto-loaded skills entirely.
+
         ``mode="wrap"`` + ``handler`` delegation preserves caller
         options like ``exclude_none``, ``exclude_defaults``, and
         nested ``include`` / ``exclude`` for the surviving skills —
@@ -170,6 +193,15 @@ def _serialize_skills(self, value: list[Skill], handler, info) -> Any:
         """
         if info.context and info.context.get("preserve_full_skills"):
             return handler(value)
+        # Auto-load config drifted (or never ran) → can't trust the
+        # snapshot to round-trip. Serialize everything as explicit.
+        current_config = (
+            self.load_user_skills,
+            self.load_public_skills,
+            self.marketplace_path,
+        )
+        if self._auto_load_config != current_config:
+            return handler(value)
         auto = self._auto_loaded_skills
         kept = [s for s in value if auto.get(s.name) != s]
         return handler(kept)
@@ -234,16 +266,31 @@ def _load_auto_skills(self):
             marketplace_path=self.marketplace_path,
         )
 
+        # Record the config that produced this snapshot so the
+        # serializer can detect drift (e.g. ``model_copy(update={
+        # "load_public_skills": False})``) and degrade to full
+        # serialization.
+        self._auto_load_config = (
+            self.load_user_skills,
+            self.load_public_skills,
+            self.marketplace_path,
+        )
+
         existing_by_name = {skill.name: skill for skill in self.skills}
         for name, skill in auto_skills.items():
             existing = existing_by_name.get(name)
             if existing is None:
                 self.skills.append(skill)
-                self._auto_loaded_skills[name] = skill
+                # Deep-copy so an in-place caller mutation
+                # (``ctx.skills[0].content = "custom"``) doesn't also
+                # mutate the snapshot — without it the equality check
+                # would still succeed and the customised skill would
+                # silently disappear from the wire.
+                self._auto_loaded_skills[name] = skill.model_copy(deep=True)
             elif existing == skill:
                 # Migration path for conversations stored before the
                 # serializer change — see the docstring.
-                self._auto_loaded_skills[name] = skill
+                self._auto_loaded_skills[name] = skill.model_copy(deep=True)
             else:
                 logger.debug(
                     f"Skipping auto-loaded skill '{name}' (already in explicit skills)"
diff --git a/tests/sdk/context/test_agent_context_skills_serialization.py b/tests/sdk/context/test_agent_context_skills_serialization.py
index 556d8782b4..4a025cc784 100644
--- a/tests/sdk/context/test_agent_context_skills_serialization.py
+++ b/tests/sdk/context/test_agent_context_skills_serialization.py
@@ -305,6 +305,86 @@ def test_save_base_state_persists_full_skill_snapshot(self):
             "newer skill content from ~/.openhands/skills"
         )
 
+    def test_in_place_skill_mutation_preserves_customisation_on_wire(self):
+        """Regression for the mutable-snapshot bug.
+
+        ``_auto_loaded_skills`` used to store the same ``Skill``
+        reference that landed in ``self.skills``. An in-place edit
+        like ``ctx.skills[0].content = "custom"`` mutated both copies,
+        the equality check still succeeded, and the customised skill
+        silently vanished from ``model_dump`` output. Deep-copying the
+        snapshot at auto-load time keeps the two references
+        independent so the equality check correctly reports the
+        in-memory skill as "modified" and keeps it on the wire.
+        """
+        auto = {"editable": _make_skill("editable", "original content")}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True)
+
+        # Caller mutates the skill in-place after the auto-load.
+        ctx.skills[0].content = "user customised content"
+
+        dumped = ctx.model_dump()
+        assert len(dumped["skills"]) == 1
+        assert dumped["skills"][0]["content"] == "user customised content"
+
+    def test_model_copy_toggling_load_flag_off_keeps_skills_on_wire(self):
+        """Regression for the config-drift bug.
+
+        ``ctx.model_copy(update={"load_public_skills": False})`` keeps
+        the runtime auto-loaded skills (the validator doesn't re-run
+        on model_copy) but used to serialize them as ``[]`` because
+        the snapshot still matched the in-memory names. On
+        ``model_validate(...)`` the receiver wouldn't auto-reload them
+        (flag is False), losing them entirely.
+
+        With the config-drift check, the serializer detects that the
+        current config doesn't match the snapshot config and emits
+        the full skill list, preserving the round-trip.
+        """
+        auto = {f"auto-{i}": _make_skill(f"auto-{i}") for i in range(3)}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True)
+
+        # Flip the flag off via model_copy — does NOT re-run the
+        # validator, so ctx.skills still has the 3 auto-loaded skills.
+        flipped = ctx.model_copy(update={"load_public_skills": False})
+        assert len(flipped.skills) == 3
+
+        # Serializer must NOT trim — the config drifted. Round-trip
+        # would otherwise lose them (flag is False on the receiver).
+        dumped = flipped.model_dump()
+        assert {s["name"] for s in dumped["skills"]} == set(auto.keys())
+
+        # And the round-trip preserves them in-memory.
+        roundtripped = AgentContext.model_validate(dumped)
+        assert {s.name for s in roundtripped.skills} == set(auto.keys())
+
+    def test_model_copy_changing_marketplace_path_keeps_skills_on_wire(self):
+        """Same config-drift concern as above but for ``marketplace_path``.
+
+        If the snapshot was taken with ``marketplace_path="A"`` and a
+        copy bumps it to ``"B"``, the receiver would auto-load from
+        ``"B"`` and get a *different* catalog. Preserve the original
+        list on the wire so the receiver can't silently swap.
+        """
+        auto = {"marketplace-skill": _make_skill("marketplace-skill")}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True, marketplace_path="path-A")
+
+        bumped = ctx.model_copy(update={"marketplace_path": "path-B"})
+        dumped = bumped.model_dump()
+        assert {s["name"] for s in dumped["skills"]} == {"marketplace-skill"}
+
     def test_payload_shrinks_to_explicit_only(self):
         """Concrete byte-count assertion: a 40-skill auto-load with a single
         explicit skill should serialize approximately the size of the

From d4bac354b8a84b0c184c4665ad97e2b787908387 Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Tue, 19 May 2026 19:03:19 +0200
Subject: [PATCH 6/7] perf(agent_context): freeze snapshot on load + scope
 migration to snapshot loads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two more edge cases from the third bot review:

1. ``preserve_full_skills`` only prevented the trim on *save*. The
   load path still ran ``_load_auto_skills`` unconditionally and
   *appended* any newly-published skills the loader returned. A
   snapshot frozen as {a} would silently grow to {a, b} after `b`
   landed in the marketplace — and the next save would persist the
   drifted set.

   Fix: ``_load_auto_skills`` now accepts ``ValidationInfo``. When
   ``info.context["loading_from_snapshot"]`` is true (set by
   ``ConversationState`` factory on the resume path), the validator
   skips the append-new branch entirely — the persisted ``skills``
   list is treated as authoritative.

2. The migration branch (mark existing skills as auto-loaded when
   they equal the loader's output) also fired for fresh
   ``AgentContext(load_public_skills=True, skills=[explicit])``
   construction. A caller-pinned explicit skill that happened to
   equal the marketplace version would be snapshotted as auto-loaded,
   dropped from the wire, and silently swapped to a *new*
   marketplace version on the next round-trip.

   Fix: migration only fires under ``loading_from_snapshot``. Fresh
   construction keeps the "equal existing" branch as a no-op — the
   caller-supplied skill stays explicit on the wire.

The ``ConversationState`` resume path at ``state.py:354`` now passes
``context={"loading_from_snapshot": True}`` alongside the cipher.
API-response paths and direct caller construction don't pass the
flag, so they keep fresh-construction semantics — which is what we
want for the shrink-the-wire win.

New tests (5 → 8):
- ``test_fresh_construction_does_not_conflate_explicit_with_auto``
  — the comment-6 regression, fresh construction with a name
  collision.
- ``test_loading_from_snapshot_does_not_pick_up_new_auto_loaded_skills``
  — the comment-5 regression, snapshot {a} + loader returning
  {a, b} stays {a} in memory and on the wire.
- ``test_fresh_construction_still_appends_new_auto_skills`` — sanity
  that the default branch keeps growing for first-launch users.
- Existing migration tests updated to invoke the load path via
  ``model_validate(context={"loading_from_snapshot": True})``.

305/305 tests pass on the affected suites.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/context/agent_context.py    |  73 ++++++++----
 .../openhands/sdk/conversation/state.py       |  12 +-
 ...test_agent_context_skills_serialization.py | 104 ++++++++++++++++--
 3 files changed, 157 insertions(+), 32 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py
index 2a61bccff0..6cce3d50ce 100644
--- a/openhands-sdk/openhands/sdk/context/agent_context.py
+++ b/openhands-sdk/openhands/sdk/context/agent_context.py
@@ -10,6 +10,7 @@
     Field,
     PrivateAttr,
     SecretStr,
+    ValidationInfo,
     field_serializer,
     field_validator,
     model_validator,
@@ -235,25 +236,46 @@ def _validate_skills(cls, v: list[Skill], _info):
         return v
 
     @model_validator(mode="after")
-    def _load_auto_skills(self):
+    def _load_auto_skills(self, info: ValidationInfo):
         """Load user and/or public skills if enabled.
 
         Names of skills added here are tracked in
-        ``_auto_loaded_skill_names`` so the serializer can drop them
-        from the wire payload (this validator re-runs on every model
-        load, so the same skills repopulate without needing to be
-        persisted).
-
-        Migration: stored conversations created before the serializer
-        change carry the resolved auto-loaded skill list inlined on
-        ``skills``. When such a conversation is loaded back, the names
-        match ``existing_names`` and the new-append branch is skipped
-        — but we still mark them as auto-loaded if the persisted skill
-        equals what the loader would produce now. Without this, every
-        old conversation would keep the bloated payload until rewritten.
-        A persisted skill that no longer matches the loader's current
-        output (user edited the file, marketplace updated, etc.) stays
-        treated as explicit so the on-disk content wins.
+        ``_auto_loaded_skills`` so the serializer can drop them from
+        the wire payload — this validator re-runs on every model load,
+        so the same skills repopulate without needing to be persisted.
+
+        Two behaviour modes, switched by
+        ``context={"loading_from_snapshot": True}``:
+
+        Fresh construction (default, no flag):
+            - Skills the loader returned that aren't already in
+              ``self.skills`` get appended and tracked as auto-loaded.
+            - Skills the loader returned that ARE already in
+              ``self.skills`` (caller-supplied explicit) are left
+              alone — even if they happen to equal the loader output.
+              The caller pinned that content deliberately; treating it
+              as auto-loaded would let a later marketplace update
+              silently swap it on the next round-trip.
+
+        Loading from a persisted snapshot (``loading_from_snapshot``):
+            - The persisted ``skills`` list IS authoritative. Don't
+              append new auto-loaded skills — that would let a
+              marketplace update or a fresh skill file silently
+              pollute the snapshot on the next save.
+            - Skills already in ``self.skills`` that equal the
+              loader's current output get marked as auto-loaded so
+              the next serialize trims them (migration path: old
+              persisted conversations shrink on first reload through
+              the new SDK).
+            - Skills in ``self.skills`` that DON'T match the loader's
+              current output (user edited the file, marketplace
+              updated since the snapshot was taken) stay explicit so
+              the on-disk content wins.
+
+        The persistence load path in
+        ``ConversationState`` factory sets the context flag. API-
+        response paths and direct caller construction don't, so they
+        get fresh-construction semantics.
         """
         if not self.load_user_skills and not self.load_public_skills:
             return self
@@ -276,10 +298,20 @@ def _load_auto_skills(self):
             self.marketplace_path,
         )
 
+        loading_from_snapshot = bool(
+            info.context and info.context.get("loading_from_snapshot")
+        )
+
         existing_by_name = {skill.name: skill for skill in self.skills}
         for name, skill in auto_skills.items():
             existing = existing_by_name.get(name)
             if existing is None:
+                if loading_from_snapshot:
+                    # Snapshot is authoritative — don't add a skill
+                    # the persisted state never had. This is what
+                    # freezes the auto-loaded set against marketplace
+                    # / user-skill churn between save and load.
+                    continue
                 self.skills.append(skill)
                 # Deep-copy so an in-place caller mutation
                 # (``ctx.skills[0].content = "custom"``) doesn't also
@@ -287,9 +319,12 @@ def _load_auto_skills(self):
                 # would still succeed and the customised skill would
                 # silently disappear from the wire.
                 self._auto_loaded_skills[name] = skill.model_copy(deep=True)
-            elif existing == skill:
-                # Migration path for conversations stored before the
-                # serializer change — see the docstring.
+            elif existing == skill and loading_from_snapshot:
+                # Migration path: existing matches loader → mark as
+                # auto-loaded so the next serialize can trim it.
+                # ONLY fires under loading_from_snapshot — for fresh
+                # construction this would conflate caller-pinned
+                # explicit skills with auto-loaded ones.
                 self._auto_loaded_skills[name] = skill.model_copy(deep=True)
             else:
                 logger.debug(
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
index 6c078935e8..1435794204 100644
--- a/openhands-sdk/openhands/sdk/conversation/state.py
+++ b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -349,8 +349,16 @@ def create(
 
         # ---- Resume path ----
         if base_text:
-            # Use cipher context for decrypting secrets if provided
-            context = {"cipher": cipher} if cipher else None
+            # ``loading_from_snapshot`` tells ``AgentContext._load_auto_skills``
+            # that the persisted ``skills`` list is authoritative and must
+            # NOT be augmented with newly published auto-loaded skills the
+            # loader picks up post-save. Without it, a snapshot frozen as
+            # {a} would silently grow to {a, b} after `b` lands in the
+            # public marketplace, breaking the freeze-at-create guarantee
+            # ``_save_base_state`` relied on. See software-agent-sdk#3301.
+            context: dict[str, Any] = {"loading_from_snapshot": True}
+            if cipher:
+                context["cipher"] = cipher
             state = cls.model_validate(json.loads(base_text), context=context)
 
             # Restore the conversation with the same id
diff --git a/tests/sdk/context/test_agent_context_skills_serialization.py b/tests/sdk/context/test_agent_context_skills_serialization.py
index 4a025cc784..89b45c99ec 100644
--- a/tests/sdk/context/test_agent_context_skills_serialization.py
+++ b/tests/sdk/context/test_agent_context_skills_serialization.py
@@ -116,16 +116,14 @@ def test_disabled_auto_load_leaves_explicit_skills_on_the_wire(self):
         assert {s["name"] for s in dumped["skills"]} == {"foo", "bar"}
 
     def test_migration_stored_skills_matching_loader_are_marked_auto_loaded(self):
-        """Migration path: a conversation stored before this PR carries the
-        resolved auto-loaded skills inlined on ``skills``. On reload, the
-        validator must recognise those as auto-loaded (the persisted
-        skill equals what the loader produces) and drop them from the
-        next serialization — otherwise the bloat persists until the
-        conversation is recreated.
+        """Migration path (loading_from_snapshot only): a conversation
+        stored before this PR carries the resolved auto-loaded skills
+        inlined on ``skills``. When ``ConversationState`` loads it with
+        ``loading_from_snapshot=True``, the validator recognises matching
+        skills as auto-loaded and drops them from the next serialization —
+        otherwise the bloat persists until the conversation is recreated.
         """
         stored = [_make_skill("auto-x"), _make_skill("auto-y")]
-        # Loader returns the same skill objects (matches what was
-        # persisted under the old SDK).
         loader_output = {
             "auto-x": _make_skill("auto-x"),
             "auto-y": _make_skill("auto-y"),
@@ -134,7 +132,10 @@ def test_migration_stored_skills_matching_loader_are_marked_auto_loaded(self):
             "openhands.sdk.context.agent_context.load_available_skills",
             return_value=loader_output,
         ):
-            ctx = AgentContext(load_public_skills=True, skills=stored)
+            ctx = AgentContext.model_validate(
+                {"load_public_skills": True, "skills": stored},
+                context={"loading_from_snapshot": True},
+            )
 
         # No duplicates: skills stayed at 2 (migration recognised them).
         assert {s.name for s in ctx.skills} == {"auto-x", "auto-y"}
@@ -142,12 +143,38 @@ def test_migration_stored_skills_matching_loader_are_marked_auto_loaded(self):
         dumped = ctx.model_dump()
         assert dumped["skills"] == []
 
+    def test_fresh_construction_does_not_conflate_explicit_with_auto(self):
+        """Comment-6 regression: fresh ``AgentContext(skills=[explicit])``
+        construction where the explicit skill happens to equal what the
+        loader returns must NOT mark it as auto-loaded. Without this
+        guard, a later marketplace/user-skill edit would silently swap
+        the caller's pinned content on the next round-trip.
+        """
+        explicit = _make_skill("shared-name", "caller pinned this")
+        loader_output = {
+            "shared-name": _make_skill("shared-name", "caller pinned this")
+        }
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=loader_output,
+        ):
+            ctx = AgentContext(load_public_skills=True, skills=[explicit])
+
+        # In-memory: only the explicit (validator's "already in
+        # existing" branch hit, no append).
+        assert len(ctx.skills) == 1
+        # On the wire: stays as explicit — not snapshotted as auto.
+        dumped = ctx.model_dump()
+        assert len(dumped["skills"]) == 1
+        assert dumped["skills"][0]["content"] == "caller pinned this"
+
     def test_migration_stored_skills_diverged_from_loader_stay_explicit(self):
         """If the persisted skill content no longer matches what the
         loader would produce (user edited the file, marketplace updated),
         the on-disk version wins — treat it as explicit and keep it on
         the wire. This is the safe default: we don't drop content
-        someone may have intentionally customised.
+        someone may have intentionally customised. Applies under
+        ``loading_from_snapshot`` (same as the matching-migration path).
         """
         stored = [_make_skill("auto-x", "old persisted content")]
         loader_output = {"auto-x": _make_skill("auto-x", "new loader content")}
@@ -155,7 +182,10 @@ def test_migration_stored_skills_diverged_from_loader_stay_explicit(self):
             "openhands.sdk.context.agent_context.load_available_skills",
             return_value=loader_output,
         ):
-            ctx = AgentContext(load_public_skills=True, skills=stored)
+            ctx = AgentContext.model_validate(
+                {"load_public_skills": True, "skills": stored},
+                context={"loading_from_snapshot": True},
+            )
 
         # The stored ("old") version wins in-memory.
         assert len(ctx.skills) == 1
@@ -165,6 +195,58 @@ def test_migration_stored_skills_diverged_from_loader_stay_explicit(self):
         assert len(dumped["skills"]) == 1
         assert dumped["skills"][0]["content"] == "old persisted content"
 
+    def test_loading_from_snapshot_does_not_pick_up_new_auto_loaded_skills(self):
+        """Comment-5 regression: a persisted snapshot frozen with only
+        ``{a}`` must not silently grow to ``{a, b}`` if the loader now
+        also returns a new skill ``b`` (marketplace added it, user
+        added a file). The ``loading_from_snapshot`` context flag
+        tells the validator the persisted ``skills`` list is
+        authoritative.
+        """
+        stored = [_make_skill("a")]
+        # Loader sees a new ``b`` that wasn't in the persisted snapshot.
+        loader_output = {
+            "a": _make_skill("a"),
+            "b": _make_skill("b", "newly published content"),
+        }
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=loader_output,
+        ):
+            ctx = AgentContext.model_validate(
+                {"load_public_skills": True, "skills": stored},
+                context={"loading_from_snapshot": True},
+            )
+
+        # In-memory: snapshot is authoritative — ``b`` is NOT appended.
+        assert {s.name for s in ctx.skills} == {"a"}
+        # Next ``model_dump`` (default, no preserve flag) still trims
+        # ``a`` since it was matched as auto-loaded. Serialized list
+        # is empty — exactly what the original snapshot would have
+        # serialized.
+        dumped = ctx.model_dump()
+        assert dumped["skills"] == []
+        # And the preserve-full path also gives back just ``a`` — no
+        # marketplace pollution.
+        dumped_full = ctx.model_dump(context={"preserve_full_skills": True})
+        assert {s["name"] for s in dumped_full["skills"]} == {"a"}
+
+    def test_fresh_construction_still_appends_new_auto_skills(self):
+        """Sanity: without the ``loading_from_snapshot`` flag, fresh
+        construction keeps appending new auto-loaded skills as today.
+        This is what makes the registry hot for first-launch users.
+        """
+        loader_output = {
+            "auto-1": _make_skill("auto-1"),
+            "auto-2": _make_skill("auto-2"),
+        }
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=loader_output,
+        ):
+            ctx = AgentContext(load_public_skills=True)
+        assert {s.name for s in ctx.skills} == {"auto-1", "auto-2"}
+
     def test_replacing_auto_skill_via_model_copy_survives_serialization(self):
         """Regression guard for OpenHands' ``_create_agent_with_skills`` flow.
 

From d81d7c05e257908256176fc6884a313945ac73a7 Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Wed, 20 May 2026 09:50:55 +0200
Subject: [PATCH 7/7] perf(agent_context): honour round_trip + preserve
 snapshot on full resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two edge cases from the fourth bot review:

1. ``model_dump(round_trip=True)`` is Pydantic's canonical signal
   for "this dump must rehydrate without semantic loss". The trim
   ran under it too, so a caller using it for an ``AgentContext``
   snapshot would silently reload whatever the loader returns now
   instead of the in-memory catalog they dumped.

   Fix: ``info.round_trip`` opts out of the trim, same as the
   explicit ``preserve_full_skills`` context flag. Pinned by
   ``test_round_trip_dump_preserves_full_skill_list``.

2. The persistence-resume guard (``loading_from_snapshot``)
   correctly populated ``state.agent.agent_context.skills`` from
   the snapshot — but the next line, ``state.agent = agent``,
   unconditionally overwrote the loaded agent with the runtime
   one. The runtime agent's ``agent_context`` carried a freshly-
   resolved (and potentially drifted) skill list from its own
   ``_load_auto_skills`` run. Autosave then rewrote ``base_state.json``
   with the drifted set. The reviewer reproduced this end-to-end:
   initial persisted ``['a']`` → resumed runtime ``['a', 'b']`` →
   autosave persisted ``['a', 'b']``.

   Fix: before ``state.agent = agent``, ``model_copy`` the runtime
   agent's ``agent_context`` with the loaded snapshot's skills. The
   snapshot stays authoritative for skills; runtime-only fields
   (LLM client, tool wiring, etc.) come from the runtime agent as
   today. Pinned by ``test_conversationstate_resume_preserves_snapshot_skills``
   — full create → save → resume cycle via ``ConversationState.create``
   and ``LocalFileStore``, not just ``_save_base_state`` in isolation.

793/793 tests pass on ``tests/sdk/context/`` +
``tests/sdk/conversation/`` + ``tests/cross/test_agent_secrets_integration.py``.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/context/agent_context.py    |  11 +-
 .../openhands/sdk/conversation/state.py       |  22 ++++
 ...test_agent_context_skills_serialization.py | 118 ++++++++++++++++++
 3 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py
index 6cce3d50ce..c7c1c8a2b0 100644
--- a/openhands-sdk/openhands/sdk/context/agent_context.py
+++ b/openhands-sdk/openhands/sdk/context/agent_context.py
@@ -192,7 +192,16 @@ def _serialize_skills(self, value: list[Skill], handler, info) -> Any:
         nested ``include`` / ``exclude`` for the surviving skills —
         manual ``s.model_dump(...)`` would have ignored them.
         """
-        if info.context and info.context.get("preserve_full_skills"):
+        # ``round_trip=True`` is Pydantic's canonical signal for "this
+        # dump must rehydrate without semantic loss" (e.g. for caches,
+        # snapshots, or anything the caller will ``model_validate``
+        # later). Honour it the same way we honour the explicit
+        # opt-out flag — trimming under round-trip would let the
+        # reload silently pick up a *different* skill catalog from
+        # the loader, which is exactly the loss the flag warns against.
+        if info.round_trip or (
+            info.context and info.context.get("preserve_full_skills")
+        ):
             return handler(value)
         # Auto-load config drifted (or never ran) → can't trust the
         # snapshot to round-trip. Serialize everything as explicit.
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
index 1435794204..c9faf54603 100644
--- a/openhands-sdk/openhands/sdk/conversation/state.py
+++ b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -376,6 +376,28 @@ def create(
             # Verify compatibility (agent class + tools)
             agent.verify(state.agent, events=state._events)
 
+            # Preserve the loaded snapshot's skills onto the runtime
+            # agent's ``agent_context``. Without this, the runtime
+            # agent (constructed by the caller before resume) carries
+            # a freshly-resolved skill list from its own
+            # ``_load_auto_skills`` run — which may have drifted
+            # since the snapshot was taken (marketplace adds, user
+            # edits). Assigning ``state.agent = agent`` then loses
+            # the snapshot, and autosave rewrites ``base_state.json``
+            # with the drifted set. The snapshot is authoritative on
+            # resume; runtime-only fields (LLM client, tool wiring,
+            # etc.) come from the runtime agent. See
+            # software-agent-sdk#3301.
+            loaded_agent_context = state.agent.agent_context if state.agent else None
+            if loaded_agent_context is not None and agent.agent_context is not None:
+                agent = agent.model_copy(
+                    update={
+                        "agent_context": agent.agent_context.model_copy(
+                            update={"skills": loaded_agent_context.skills}
+                        )
+                    }
+                )
+
             # Commit runtime-provided values (may autosave)
             state._autosave_enabled = True
             state.agent = agent
diff --git a/tests/sdk/context/test_agent_context_skills_serialization.py b/tests/sdk/context/test_agent_context_skills_serialization.py
index 89b45c99ec..75a7816326 100644
--- a/tests/sdk/context/test_agent_context_skills_serialization.py
+++ b/tests/sdk/context/test_agent_context_skills_serialization.py
@@ -491,3 +491,121 @@ def test_payload_shrinks_to_explicit_only(self):
             f"serialized skills should be ~1 explicit skill (~150 B), "
             f"got {skills_bytes} B — auto skills leaked into output"
         )
+
+    def test_round_trip_dump_preserves_full_skill_list(self):
+        """Comment-7 regression: ``model_dump(round_trip=True)`` is
+        Pydantic's canonical "must reload without semantic loss"
+        signal. Honour it the same as ``preserve_full_skills`` —
+        otherwise a caller using it for an ``AgentContext`` snapshot
+        would reload whatever the current external skill source
+        returns instead of the in-memory catalog they dumped.
+        """
+        auto = {f"auto-{i}": _make_skill(f"auto-{i}") for i in range(3)}
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value=auto,
+        ):
+            ctx = AgentContext(load_public_skills=True)
+
+        # Default dump still trims (the API-wire win).
+        assert ctx.model_dump()["skills"] == []
+        # ``round_trip=True`` keeps the full snapshot.
+        dumped = ctx.model_dump(round_trip=True)
+        assert {s["name"] for s in dumped["skills"]} == set(auto.keys())
+
+    def test_conversationstate_resume_preserves_snapshot_skills(self):
+        """Comment-8 regression: full create→save→resume cycle.
+
+        The persistence load path used to call ``state.agent = agent``
+        unconditionally, overwriting the loaded snapshot's
+        ``agent_context.skills`` with the runtime agent's freshly-
+        loaded list. If the loader had drifted since save (marketplace
+        added a skill), the next autosave rewrote ``base_state.json``
+        with the drifted set — breaking the freeze-at-create guarantee.
+        ``ConversationState.create`` resume path now preserves the
+        snapshot's skills onto the runtime agent's ``agent_context``
+        before assignment.
+        """
+        import tempfile
+        import uuid
+
+        from openhands.sdk.agent import Agent
+        from openhands.sdk.conversation.state import ConversationState
+        from openhands.sdk.io import LocalFileStore
+        from openhands.sdk.llm import LLM
+        from openhands.sdk.workspace.local import LocalWorkspace
+
+        # ``a`` was around at save time. Snapshot freezes on it.
+        with patch(
+            "openhands.sdk.context.agent_context.load_available_skills",
+            return_value={"a": _make_skill("a", "original a content")},
+        ):
+            agent_at_save = Agent(
+                llm=LLM(model="test", usage_id="test"),
+                agent_context=AgentContext(load_public_skills=True),
+            )
+
+        with tempfile.TemporaryDirectory() as tmp:
+            workspace = LocalWorkspace(working_dir=tmp)
+            conv_id = uuid.uuid4()
+
+            # Save: create state, persist.
+            with patch(
+                "openhands.sdk.context.agent_context.load_available_skills",
+                return_value={"a": _make_skill("a", "original a content")},
+            ):
+                state_saved = ConversationState.create(
+                    id=conv_id,
+                    agent=agent_at_save,
+                    workspace=workspace,
+                    persistence_dir=tmp,
+                )
+                state_saved._save_base_state(LocalFileStore(tmp))
+
+            # Now the marketplace adds a new skill `b` and changes `a`.
+            with patch(
+                "openhands.sdk.context.agent_context.load_available_skills",
+                return_value={
+                    "a": _make_skill("a", "updated a content"),
+                    "b": _make_skill("b", "newly published"),
+                },
+            ):
+                # Runtime agent on resume — its agent_context auto-load
+                # picks up the drifted skill set.
+                runtime_agent = Agent(
+                    llm=LLM(model="test", usage_id="test"),
+                    agent_context=AgentContext(load_public_skills=True),
+                )
+                assert {s.name for s in runtime_agent.agent_context.skills} == {
+                    "a",
+                    "b",
+                }
+                # Resume the conversation. The fix must preserve the
+                # snapshot's skills (just `a`, with original content)
+                # rather than the runtime agent's drifted set.
+                state_resumed = ConversationState.create(
+                    id=conv_id,
+                    agent=runtime_agent,
+                    workspace=workspace,
+                    persistence_dir=tmp,
+                )
+
+            assert state_resumed.agent.agent_context is not None
+            resumed_skills = {s.name for s in state_resumed.agent.agent_context.skills}
+            assert resumed_skills == {"a"}, (
+                f"resume picked up drifted skills {resumed_skills} from the runtime "
+                f"agent's fresh auto-load; should have preserved the snapshot {{a}}"
+            )
+            # And the persisted-to-disk content also matches the
+            # snapshot — autosave didn't drift it.
+            with open(f"{tmp}/base_state.json") as f:
+                import json as _json
+
+                persisted = _json.load(f)
+            persisted_names = {
+                s["name"] for s in persisted["agent"]["agent_context"]["skills"]
+            }
+            assert persisted_names == {"a"}, (
+                f"autosave rewrote base_state.json with drifted skills "
+                f"{persisted_names} instead of the original snapshot"
+            )