Skip to content
Closed
179 changes: 175 additions & 4 deletions openhands-sdk/openhands/sdk/context/agent_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from pydantic import (
BaseModel,
Field,
PrivateAttr,
SecretStr,
ValidationInfo,
field_serializer,
field_validator,
model_validator,
Expand Down Expand Up @@ -118,6 +120,102 @@ class AgentContext(BaseModel):
json_schema_extra={"acp_compatible": True},
)

# Snapshot of the skills that ``_load_auto_skills`` added on top of
# the caller-supplied list. The serializer drops only the ones still
# equal to this snapshot — if a downstream consumer replaces an
# auto-loaded skill via ``model_copy(update={'skills': merged})``
# (OpenHands' ``_create_agent_with_skills`` does this when it merges
# in sandbox / repo skills with overlapping names), the replacement
# has different field values and survives serialization. The same
# auto-load will re-run on the receiving end and rebuild the
# auto-loaded subset that wasn't replaced. For a stock configuration
# that turns both flags on (~40 skills bundled under
# ``~/.openhands/skills``) the resolved list is ~260 KB per
# ``AgentContext`` — every ``GET`` on a stored conversation carried
# that. See software-agent-sdk#3301.
#
# Values are deep-copied so an in-place caller mutation
# (``ctx.skills[0].content = "custom"``) doesn't also mutate the
# snapshot — without that, the equality check would still succeed
# and the customised skill would silently disappear from the wire.
_auto_loaded_skills: dict[str, Skill] = PrivateAttr(default_factory=dict)
# The auto-load config that produced ``_auto_loaded_skills``.
# Tracked so the serializer can no-op the trim when the config
# changed after validation — e.g. ``model_copy(update={
# "load_public_skills": False})``. Without this, a copy with the
# flag flipped off would drop the auto-loaded skills on the wire
# AND fail to re-load them on the next ``model_validate`` (the
# flag is now off), losing them entirely. ``None`` when
# ``_load_auto_skills`` has not run yet.
_auto_load_config: tuple[bool, bool, str | None] | None = PrivateAttr(default=None)

@field_serializer("skills", when_used="always", mode="wrap")
def _serialize_skills(self, value: list[Skill], handler, info) -> Any:
"""Drop unmodified auto-loaded skills from the serialized output.

The runtime keeps the full resolved list on ``self.skills`` so
prompt rendering and downstream consumers behave exactly as
today. Only the wire payload changes: callers re-loading the
model will trigger ``_load_auto_skills`` again, which rebuilds
the auto-loaded subset deterministically from the same
``load_user_skills`` / ``load_public_skills`` /
``marketplace_path`` configuration.

Equality (not just name match) is required because consumers
like OpenHands' ``_create_agent_with_skills`` replace
auto-loaded skills in-place with their own version under the
same name. A name-only filter would silently drop those
replacements and the receiver would auto-reload the stock
version on the next deserialization.

Opt-out via ``context={"preserve_full_skills": True}``: paths
that need a stable snapshot of the resolved skill catalog (the
most important one being ``ConversationState._save_base_state``
— persistence of the conversation to disk) pass this flag so
the serializer is a no-op. Without it, a paused conversation
resumed after the ``~/.openhands/skills`` directory or the
public marketplace updated would silently pick up the *new*
skill content via ``_load_auto_skills``. The API-response path
skips the flag and gets the byte-size win.

Config-drift safety: if the auto-load config
(``load_user_skills`` / ``load_public_skills`` /
``marketplace_path``) changed since the snapshot was taken —
typically via ``model_copy(update=...)`` flipping one of those
flags — the trim is skipped. Otherwise the receiver would
either re-load a *different* skill catalog (changed
``marketplace_path``) or fail to re-load at all (flag turned
off), losing the auto-loaded skills entirely.

``mode="wrap"`` + ``handler`` delegation preserves caller
options like ``exclude_none``, ``exclude_defaults``, and
nested ``include`` / ``exclude`` for the surviving skills —
manual ``s.model_dump(...)`` would have ignored them.
"""
# ``round_trip=True`` is Pydantic's canonical signal for "this
# dump must rehydrate without semantic loss" (e.g. for caches,
# snapshots, or anything the caller will ``model_validate``
# later). Honour it the same way we honour the explicit
# opt-out flag — trimming under round-trip would let the
# reload silently pick up a *different* skill catalog from
# the loader, which is exactly the loss the flag warns against.
if info.round_trip or (
info.context and info.context.get("preserve_full_skills")
):
return handler(value)
# Auto-load config drifted (or never ran) → can't trust the
# snapshot to round-trip. Serialize everything as explicit.
current_config = (
self.load_user_skills,
self.load_public_skills,
self.marketplace_path,
)
if self._auto_load_config != current_config:
return handler(value)
auto = self._auto_loaded_skills
kept = [s for s in value if auto.get(s.name) != s]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: This filter still uses the copied private snapshot even if the auto-load config was changed after validation. For example, ctx.model_copy(update={"load_public_skills": False}) keeps the runtime auto-loaded skills but serializes them as []; on AgentContext.model_validate(...) they are not reloaded because the flag is now false. Changing marketplace_path has the same stale-snapshot problem, except the receiver may reload a different catalog. Please either tie the snapshot to the (load_user_skills, load_public_skills, marketplace_path) config and no-op when it no longer matches, or clear/recompute the snapshot when those fields are updated.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: filtering to kept before delegating to handler changes Pydantic's index-based nested include/exclude semantics for skills. Once auto-loaded entries are removed, the caller's indices are applied to the compacted list, so an include/exclude targeting an auto-loaded index can incorrectly serialize or drop an explicit skill. Skip trimming when nested include/exclude targets skills, or apply the trim after serialization while preserving the original index mapping.

return handler(kept)

@field_serializer("secrets", when_used="always")
def _serialize_secrets(
self, value: Mapping[str, SecretValue] | None, info
Expand Down Expand Up @@ -147,8 +245,47 @@ def _validate_skills(cls, v: list[Skill], _info):
return v

@model_validator(mode="after")
def _load_auto_skills(self):
"""Load user and/or public skills if enabled."""
def _load_auto_skills(self, info: ValidationInfo):
"""Load user and/or public skills if enabled.

Names of skills added here are tracked in
``_auto_loaded_skills`` so the serializer can drop them from
the wire payload — this validator re-runs on every model load,
so the same skills repopulate without needing to be persisted.

Two behaviour modes, switched by
``context={"loading_from_snapshot": True}``:

Fresh construction (default, no flag):
- Skills the loader returned that aren't already in
``self.skills`` get appended and tracked as auto-loaded.
- Skills the loader returned that ARE already in
``self.skills`` (caller-supplied explicit) are left
alone — even if they happen to equal the loader output.
The caller pinned that content deliberately; treating it
as auto-loaded would let a later marketplace update
silently swap it on the next round-trip.

Loading from a persisted snapshot (``loading_from_snapshot``):
- The persisted ``skills`` list IS authoritative. Don't
append new auto-loaded skills — that would let a
marketplace update or a fresh skill file silently
pollute the snapshot on the next save.
- Skills already in ``self.skills`` that equal the
loader's current output get marked as auto-loaded so
the next serialize trims them (migration path: old
persisted conversations shrink on first reload through
the new SDK).
- Skills in ``self.skills`` that DON'T match the loader's
current output (user edited the file, marketplace
updated since the snapshot was taken) stay explicit so
the on-disk content wins.

The persistence load path in
``ConversationState`` factory sets the context flag. API-
response paths and direct caller construction don't, so they
get fresh-construction semantics.
"""
if not self.load_user_skills and not self.load_public_skills:
return self

Expand All @@ -160,10 +297,44 @@ def _load_auto_skills(self):
marketplace_path=self.marketplace_path,
)

existing_names = {skill.name for skill in self.skills}
# Record the config that produced this snapshot so the
# serializer can detect drift (e.g. ``model_copy(update={
# "load_public_skills": False})``) and degrade to full
# serialization.
self._auto_load_config = (
self.load_user_skills,
self.load_public_skills,
self.marketplace_path,
)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: preserve_full_skills still doesn't freeze a saved snapshot on load. If the persisted payload contains auto skill a with load_public_skills=True, and the loader now returns a plus new b, this branch appends b during AgentContext.model_validate(...). The next persisted/full dump includes b, so the supposedly preserved snapshot can pick up newly published skills. The restore path needs a way to know a full snapshot is authoritative and skip appending missing auto skills (or disable auto-load for preserved snapshots).

loading_from_snapshot = bool(
info.context and info.context.get("loading_from_snapshot")
)

existing_by_name = {skill.name: skill for skill in self.skills}
for name, skill in auto_skills.items():
if name not in existing_names:
existing = existing_by_name.get(name)
if existing is None:
if loading_from_snapshot:
# Snapshot is authoritative — don't add a skill
# the persisted state never had. This is what
# freezes the auto-loaded set against marketplace
# / user-skill churn between save and load.
continue
self.skills.append(skill)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: appending auto-loaded skills in the after-validator doesn't mark skills as set. As a result, AgentContext(load_public_skills=True).model_dump(round_trip=True, exclude_unset=True) (and the preserve_full_skills variant) omits skills entirely before the serializer can preserve the snapshot, so reloading can pick up a different current skill catalog. If round-trip/preserve dumps are meant to be lossless, mark skills as set when the validator mutates it or avoid relying on a field serializer for this path.

# Deep-copy so an in-place caller mutation
# (``ctx.skills[0].content = "custom"``) doesn't also
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: This migration branch also catches caller-supplied explicit skills that happen to be equal to the current auto-loaded skill. In that case AgentContext(load_public_skills=True, skills=[explicit]) records the explicit skill in _auto_loaded_skills, model_dump() emits skills: [], and a later round-trip after the marketplace/user skill changes reloads the new content instead of the caller's pinned explicit skill. That contradicts the PR guarantee that explicit skills are unaffected. Please distinguish legacy persisted snapshots from direct caller-supplied skills, or keep equal existing skills explicit by default.

# mutate the snapshot — without it the equality check
# would still succeed and the customised skill would
# silently disappear from the wire.
self._auto_loaded_skills[name] = skill.model_copy(deep=True)
elif existing == skill and loading_from_snapshot:
# Migration path: existing matches loader → mark as
# auto-loaded so the next serialize can trim it.
# ONLY fires under loading_from_snapshot — for fresh
# construction this would conflate caller-pinned
# explicit skills with auto-loaded ones.
self._auto_loaded_skills[name] = skill.model_copy(deep=True)
else:
logger.debug(
f"Skipping auto-loaded skill '{name}' (already in explicit skills)"
Expand Down
46 changes: 43 additions & 3 deletions openhands-sdk/openhands/sdk/conversation/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,18 @@ def _save_base_state(self, fs: FileStore) -> None:

If a cipher is configured, secrets will be encrypted. Otherwise, they
will be redacted (serialized as '**********').

``preserve_full_skills`` is set so the persisted snapshot
includes auto-loaded skills inline — without this the conversation
would silently pick up *new* skill content from
``~/.openhands/skills`` / the public marketplace on resume, since
``AgentContext._load_auto_skills`` re-runs on deserialization.
Persistence needs the freeze-at-create-time snapshot; only the
API-response path opts into the trim. See software-agent-sdk#3301.
"""
context = {"cipher": self._cipher} if self._cipher else None
context: dict[str, Any] = {"preserve_full_skills": True}
if self._cipher:
context["cipher"] = self._cipher
# Warn if secrets exist but no cipher is configured
if not self._cipher and self.secret_registry.secret_sources:
logger.warning(
Expand Down Expand Up @@ -339,8 +349,16 @@ def create(

# ---- Resume path ----
if base_text:
# Use cipher context for decrypting secrets if provided
context = {"cipher": cipher} if cipher else None
# ``loading_from_snapshot`` tells ``AgentContext._load_auto_skills``
# that the persisted ``skills`` list is authoritative and must
# NOT be augmented with newly published auto-loaded skills the
# loader picks up post-save. Without it, a snapshot frozen as
# {a} would silently grow to {a, b} after `b` lands in the
# public marketplace, breaking the freeze-at-create guarantee
# ``_save_base_state`` relied on. See software-agent-sdk#3301.
context: dict[str, Any] = {"loading_from_snapshot": True}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: this loading_from_snapshot context is not enough to preserve the frozen snapshot, because the loaded state.agent is immediately replaced by the runtime agent below. That runtime agent is normally validated without this context; if the loader changed from {a} to {a,b} between save and resume, it already contains b, and autosave rewrites base_state.json with both skills. I reproduced this through ConversationState.create(..., persistence_dir=...): initial persisted skills ['a'], resumed/runtime skills ['a', 'b'], and the rewritten base state becomes ['a', 'b']. Please preserve/merge the loaded snapshot before autosaving the runtime agent (or adjust the stated freeze-at-create contract), and cover the real create→resume path rather than only calling _save_base_state directly.

if cipher:
context["cipher"] = cipher
state = cls.model_validate(json.loads(base_text), context=context)

# Restore the conversation with the same id
Expand All @@ -358,6 +376,28 @@ def create(
# Verify compatibility (agent class + tools)
agent.verify(state.agent, events=state._events)

# Preserve the loaded snapshot's skills onto the runtime
# agent's ``agent_context``. Without this, the runtime
# agent (constructed by the caller before resume) carries
# a freshly-resolved skill list from its own
# ``_load_auto_skills`` run — which may have drifted
# since the snapshot was taken (marketplace adds, user
# edits). Assigning ``state.agent = agent`` then loses
# the snapshot, and autosave rewrites ``base_state.json``
# with the drifted set. The snapshot is authoritative on
# resume; runtime-only fields (LLM client, tool wiring,
# etc.) come from the runtime agent. See
# software-agent-sdk#3301.
loaded_agent_context = state.agent.agent_context if state.agent else None
if loaded_agent_context is not None and agent.agent_context is not None:
agent = agent.model_copy(
update={
"agent_context": agent.agent_context.model_copy(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: this graft only replaces the public skills field; the copied context keeps the runtime context's private _auto_loaded_skills snapshot. If the persisted snapshot is {a} and the current loader now returns {a,b}, resume correctly sets ctx.skills == {a}, but default model_dump() emits skills: [] because it compares against the runtime {a,b} snapshot. A receiver revalidating that payload then auto-loads {a,b}, so the wire payload no longer represents the resumed conversation. Copy/reset the auto-load bookkeeping from the loaded context, or otherwise force these loaded skills to serialize, when preserving the snapshot.

update={"skills": loaded_agent_context.skills}
)
}
)

# Commit runtime-provided values (may autosave)
state._autosave_enabled = True
state.agent = agent
Expand Down
Loading
Loading