OpenHands · simonrosenberg · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/openhands-sdk/openhands/sdk/context/agent_context.py b/openhands-sdk/openhands/sdk/context/agent_context.py
@@ -8,7 +8,9 @@
 from pydantic import (
     BaseModel,
     Field,
+    PrivateAttr,
     SecretStr,
+    ValidationInfo,
     field_serializer,
     field_validator,
     model_validator,
@@ -118,6 +120,102 @@ class AgentContext(BaseModel):
         json_schema_extra={"acp_compatible": True},
     )
 
+    # Snapshot of the skills that ``_load_auto_skills`` added on top of
+    # the caller-supplied list. The serializer drops only the ones still
+    # equal to this snapshot — if a downstream consumer replaces an
+    # auto-loaded skill via ``model_copy(update={'skills': merged})``
+    # (OpenHands' ``_create_agent_with_skills`` does this when it merges
+    # in sandbox / repo skills with overlapping names), the replacement
+    # has different field values and survives serialization. The same
+    # auto-load will re-run on the receiving end and rebuild the
+    # auto-loaded subset that wasn't replaced. For a stock configuration
+    # that turns both flags on (~40 skills bundled under
+    # ``~/.openhands/skills``) the resolved list is ~260 KB per
+    # ``AgentContext`` — every ``GET`` on a stored conversation carried
+    # that. See software-agent-sdk#3301.
+    #
+    # Values are deep-copied so an in-place caller mutation
+    # (``ctx.skills[0].content = "custom"``) doesn't also mutate the
+    # snapshot — without that, the equality check would still succeed
+    # and the customised skill would silently disappear from the wire.
+    _auto_loaded_skills: dict[str, Skill] = PrivateAttr(default_factory=dict)
+    # The auto-load config that produced ``_auto_loaded_skills``.
+    # Tracked so the serializer can no-op the trim when the config
+    # changed after validation — e.g. ``model_copy(update={
+    # "load_public_skills": False})``. Without this, a copy with the
+    # flag flipped off would drop the auto-loaded skills on the wire
+    # AND fail to re-load them on the next ``model_validate`` (the
+    # flag is now off), losing them entirely. ``None`` when
+    # ``_load_auto_skills`` has not run yet.
+    _auto_load_config: tuple[bool, bool, str | None] | None = PrivateAttr(default=None)
+
+    @field_serializer("skills", when_used="always", mode="wrap")
+    def _serialize_skills(self, value: list[Skill], handler, info) -> Any:
+        """Drop unmodified auto-loaded skills from the serialized output.
+
+        The runtime keeps the full resolved list on ``self.skills`` so
+        prompt rendering and downstream consumers behave exactly as
+        today. Only the wire payload changes: callers re-loading the
+        model will trigger ``_load_auto_skills`` again, which rebuilds
+        the auto-loaded subset deterministically from the same
+        ``load_user_skills`` / ``load_public_skills`` /
+        ``marketplace_path`` configuration.
+
+        Equality (not just name match) is required because consumers
+        like OpenHands' ``_create_agent_with_skills`` replace
+        auto-loaded skills in-place with their own version under the
+        same name. A name-only filter would silently drop those
+        replacements and the receiver would auto-reload the stock
+        version on the next deserialization.
+
+        Opt-out via ``context={"preserve_full_skills": True}``: paths
+        that need a stable snapshot of the resolved skill catalog (the
+        most important one being ``ConversationState._save_base_state``
+        — persistence of the conversation to disk) pass this flag so
+        the serializer is a no-op. Without it, a paused conversation
+        resumed after the ``~/.openhands/skills`` directory or the
+        public marketplace updated would silently pick up the *new*
+        skill content via ``_load_auto_skills``. The API-response path
+        skips the flag and gets the byte-size win.
+
+        Config-drift safety: if the auto-load config
+        (``load_user_skills`` / ``load_public_skills`` /
+        ``marketplace_path``) changed since the snapshot was taken —
+        typically via ``model_copy(update=...)`` flipping one of those
+        flags — the trim is skipped. Otherwise the receiver would
+        either re-load a *different* skill catalog (changed
+        ``marketplace_path``) or fail to re-load at all (flag turned
+        off), losing the auto-loaded skills entirely.
+
+        ``mode="wrap"`` + ``handler`` delegation preserves caller
+        options like ``exclude_none``, ``exclude_defaults``, and
+        nested ``include`` / ``exclude`` for the surviving skills —
+        manual ``s.model_dump(...)`` would have ignored them.
+        """
+        # ``round_trip=True`` is Pydantic's canonical signal for "this
+        # dump must rehydrate without semantic loss" (e.g. for caches,
+        # snapshots, or anything the caller will ``model_validate``
+        # later). Honour it the same way we honour the explicit
+        # opt-out flag — trimming under round-trip would let the
+        # reload silently pick up a *different* skill catalog from
+        # the loader, which is exactly the loss the flag warns against.
+        if info.round_trip or (
+            info.context and info.context.get("preserve_full_skills")
+        ):
+            return handler(value)
+        # Auto-load config drifted (or never ran) → can't trust the
+        # snapshot to round-trip. Serialize everything as explicit.
+        current_config = (
+            self.load_user_skills,
+            self.load_public_skills,
+            self.marketplace_path,
+        )
+        if self._auto_load_config != current_config:
+            return handler(value)
+        auto = self._auto_loaded_skills
+        kept = [s for s in value if auto.get(s.name) != s]
+        return handler(kept)
+
     @field_serializer("secrets", when_used="always")
     def _serialize_secrets(
         self, value: Mapping[str, SecretValue] | None, info
@@ -147,8 +245,47 @@ def _validate_skills(cls, v: list[Skill], _info):
         return v
 
     @model_validator(mode="after")
-    def _load_auto_skills(self):
-        """Load user and/or public skills if enabled."""
+    def _load_auto_skills(self, info: ValidationInfo):
+        """Load user and/or public skills if enabled.
+
+        Names of skills added here are tracked in
+        ``_auto_loaded_skills`` so the serializer can drop them from
+        the wire payload — this validator re-runs on every model load,
+        so the same skills repopulate without needing to be persisted.
+
+        Two behaviour modes, switched by
+        ``context={"loading_from_snapshot": True}``:
+
+        Fresh construction (default, no flag):
+            - Skills the loader returned that aren't already in
+              ``self.skills`` get appended and tracked as auto-loaded.
+            - Skills the loader returned that ARE already in
+              ``self.skills`` (caller-supplied explicit) are left
+              alone — even if they happen to equal the loader output.
+              The caller pinned that content deliberately; treating it
+              as auto-loaded would let a later marketplace update
+              silently swap it on the next round-trip.
+
+        Loading from a persisted snapshot (``loading_from_snapshot``):
+            - The persisted ``skills`` list IS authoritative. Don't
+              append new auto-loaded skills — that would let a
+              marketplace update or a fresh skill file silently
+              pollute the snapshot on the next save.
+            - Skills already in ``self.skills`` that equal the
+              loader's current output get marked as auto-loaded so
+              the next serialize trims them (migration path: old
+              persisted conversations shrink on first reload through
+              the new SDK).
+            - Skills in ``self.skills`` that DON'T match the loader's
+              current output (user edited the file, marketplace
+              updated since the snapshot was taken) stay explicit so
+              the on-disk content wins.
+
+        The persistence load path in
+        ``ConversationState`` factory sets the context flag. API-
+        response paths and direct caller construction don't, so they
+        get fresh-construction semantics.
+        """
         if not self.load_user_skills and not self.load_public_skills:
             return self
 
@@ -160,10 +297,44 @@ def _load_auto_skills(self):
             marketplace_path=self.marketplace_path,
         )
 
-        existing_names = {skill.name for skill in self.skills}
+        # Record the config that produced this snapshot so the
+        # serializer can detect drift (e.g. ``model_copy(update={
+        # "load_public_skills": False})``) and degrade to full
+        # serialization.
+        self._auto_load_config = (
+            self.load_user_skills,
+            self.load_public_skills,
+            self.marketplace_path,
+        )
+
+        loading_from_snapshot = bool(
+            info.context and info.context.get("loading_from_snapshot")
+        )
+
+        existing_by_name = {skill.name: skill for skill in self.skills}
         for name, skill in auto_skills.items():
-            if name not in existing_names:
+            existing = existing_by_name.get(name)
+            if existing is None:
+                if loading_from_snapshot:
+                    # Snapshot is authoritative — don't add a skill
+                    # the persisted state never had. This is what
+                    # freezes the auto-loaded set against marketplace
+                    # / user-skill churn between save and load.
+                    continue
                 self.skills.append(skill)
+                # Deep-copy so an in-place caller mutation
+                # (``ctx.skills[0].content = "custom"``) doesn't also
+                # mutate the snapshot — without it the equality check
+                # would still succeed and the customised skill would
+                # silently disappear from the wire.
+                self._auto_loaded_skills[name] = skill.model_copy(deep=True)
+            elif existing == skill and loading_from_snapshot:
+                # Migration path: existing matches loader → mark as
+                # auto-loaded so the next serialize can trim it.
+                # ONLY fires under loading_from_snapshot — for fresh
+                # construction this would conflate caller-pinned
+                # explicit skills with auto-loaded ones.
+                self._auto_loaded_skills[name] = skill.model_copy(deep=True)
             else:
                 logger.debug(
                     f"Skipping auto-loaded skill '{name}' (already in explicit skills)"

diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -255,8 +255,18 @@ def _save_base_state(self, fs: FileStore) -> None:
 
         If a cipher is configured, secrets will be encrypted. Otherwise, they
         will be redacted (serialized as '**********').
+
+        ``preserve_full_skills`` is set so the persisted snapshot
+        includes auto-loaded skills inline — without this the conversation
+        would silently pick up *new* skill content from
+        ``~/.openhands/skills`` / the public marketplace on resume, since
+        ``AgentContext._load_auto_skills`` re-runs on deserialization.
+        Persistence needs the freeze-at-create-time snapshot; only the
+        API-response path opts into the trim. See software-agent-sdk#3301.
         """
-        context = {"cipher": self._cipher} if self._cipher else None
+        context: dict[str, Any] = {"preserve_full_skills": True}
+        if self._cipher:
+            context["cipher"] = self._cipher
         # Warn if secrets exist but no cipher is configured
         if not self._cipher and self.secret_registry.secret_sources:
             logger.warning(
@@ -339,8 +349,16 @@ def create(
 
         # ---- Resume path ----
         if base_text:
-            # Use cipher context for decrypting secrets if provided
-            context = {"cipher": cipher} if cipher else None
+            # ``loading_from_snapshot`` tells ``AgentContext._load_auto_skills``
+            # that the persisted ``skills`` list is authoritative and must
+            # NOT be augmented with newly published auto-loaded skills the
+            # loader picks up post-save. Without it, a snapshot frozen as
+            # {a} would silently grow to {a, b} after `b` lands in the
+            # public marketplace, breaking the freeze-at-create guarantee
+            # ``_save_base_state`` relied on. See software-agent-sdk#3301.
+            context: dict[str, Any] = {"loading_from_snapshot": True}
+            if cipher:
+                context["cipher"] = cipher
             state = cls.model_validate(json.loads(base_text), context=context)
 
             # Restore the conversation with the same id
@@ -358,6 +376,28 @@ def create(
             # Verify compatibility (agent class + tools)
             agent.verify(state.agent, events=state._events)
 
+            # Preserve the loaded snapshot's skills onto the runtime
+            # agent's ``agent_context``. Without this, the runtime
+            # agent (constructed by the caller before resume) carries
+            # a freshly-resolved skill list from its own
+            # ``_load_auto_skills`` run — which may have drifted
+            # since the snapshot was taken (marketplace adds, user
+            # edits). Assigning ``state.agent = agent`` then loses
+            # the snapshot, and autosave rewrites ``base_state.json``
+            # with the drifted set. The snapshot is authoritative on
+            # resume; runtime-only fields (LLM client, tool wiring,
+            # etc.) come from the runtime agent. See
+            # software-agent-sdk#3301.
+            loaded_agent_context = state.agent.agent_context if state.agent else None
+            if loaded_agent_context is not None and agent.agent_context is not None:
+                agent = agent.model_copy(
+                    update={
+                        "agent_context": agent.agent_context.model_copy(
+                            update={"skills": loaded_agent_context.skills}
+                        )
+                    }
+                )
+
             # Commit runtime-provided values (may autosave)
             state._autosave_enabled = True
             state.agent = agent