From 7b67420b66286e2173754c9573762ed58257a1b8 Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Wed, 20 May 2026 10:05:23 +0200
Subject: [PATCH 1/3] perf(agent_server): trim agent.agent_context.skills from
 conversation responses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When ``AgentContext`` is constructed with ``load_user_skills=True`` or
``load_public_skills=True``, its model_validator resolves the entire
skill catalog (~40 entries in stock setups) and persists them inline.
Every ``GET /api/conversations`` / ``/search`` / ``/{id}`` therefore
carried ~260 KB of skill content that no API consumer reads — skill
bodies are only consumed server-side at prompt-render time, never
client-side.

Drop ``agent.agent_context.skills`` from the four read endpoints'
response payloads (``GET /search``, ``GET /{id}``, ``GET ""``,
``POST ""`` — plus the deprecated ACP equivalents and ``fork``). The
persisted ``ConversationState`` on disk and the in-memory copy held
by the agent's runtime are untouched. Only the HTTP-response bytes
shrink.

This is the simpler alternative to PR #3302, which attempted the same
goal via a ``@field_serializer`` on ``AgentContext`` itself. See
the route-trim PR description for the comparison; the short version
is that touching the model has to coordinate across persistence,
``model_copy``, ``round_trip``, equality checks, snapshot drift
detection, ``ValidationInfo`` contexts, and a ``state.agent =
agent`` overwrite — each of which the reviewer found edge cases in.
Trimming at the route boundary avoids all of that: the model stays
honest (the wire matches the in-memory shape minus exactly one leaf
field), and the optimization lives in one ~10-line helper that's
called from 7 route handlers.

Measured impact (agent-canvas cold-open of a 40-skill conversation,
same browser, same agent-server port, only the SDK swapped):

- ``GET /api/conversations?ids=…``: 258 KB → 4.6 KB (-98%)
- Cold-open "first event painted":  2226 ms → 1283 ms (-42%)

Tests:
- ``test_conversation_router_skill_trim.py``: 10 cases covering the
  helper (no-mutation, identity return on empty, preserve other
  agent_context fields, agent without agent_context) and FastAPI
  integration through all four read endpoints, plus a concrete
  byte-count comparison (trimmed vs ``model_dump_json`` of the same
  ConversationInfo with skills intact — ~1500+ bytes saved per
  5-skill conversation in the test fixture).
- 351 tests pass on the affected suites (router tests + sdk/context).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../agent_server/conversation_router.py       |  18 +-
 .../agent_server/conversation_router_acp.py   |  15 +-
 .../openhands/agent_server/models.py          |  39 ++++
 .../test_conversation_router_skill_trim.py    | 214 ++++++++++++++++++
 4 files changed, 277 insertions(+), 9 deletions(-)
 create mode 100644 tests/agent_server/test_conversation_router_skill_trim.py

diff --git a/openhands-agent-server/openhands/agent_server/conversation_router.py b/openhands-agent-server/openhands/agent_server/conversation_router.py
index 833f8b6a5a..e7db9089db 100644
--- a/openhands-agent-server/openhands/agent_server/conversation_router.py
+++ b/openhands-agent-server/openhands/agent_server/conversation_router.py
@@ -36,6 +36,7 @@
     Success,
     UpdateConversationRequest,
     UpdateSecretsRequest,
+    trim_conversation_response_skills,
 )
 from openhands.sdk import LLM, Agent, TextContent
 from openhands.sdk.conversation.state import ConversationExecutionStatus
@@ -91,9 +92,13 @@ async def search_conversations(
     """Search / List conversations"""
     assert limit > 0
     assert limit <= 100
-    return await conversation_service.search_conversations(
+    page = await conversation_service.search_conversations(
         page_id, limit, status, sort_order
     )
+    # Drop ``agent.agent_context.skills`` from each item before
+    # serialization — see ``trim_conversation_response_skills``.
+    page.items = [trim_conversation_response_skills(item) for item in page.items]
+    return page
 
 
 @conversation_router.get("/count")
@@ -120,7 +125,7 @@ async def get_conversation(
     conversation = await conversation_service.get_conversation(conversation_id)
     if conversation is None:
         raise HTTPException(status.HTTP_404_NOT_FOUND)
-    return conversation
+    return trim_conversation_response_skills(conversation)
 
 
 @conversation_router.get(
@@ -153,7 +158,10 @@ async def batch_get_conversations(
     any missing item"""
     assert len(ids) < 100
     conversations = await conversation_service.batch_get_conversations(ids)
-    return conversations
+    return [
+        trim_conversation_response_skills(c) if c is not None else None
+        for c in conversations
+    ]
 
 
 # Write Methods
@@ -170,7 +178,7 @@ async def start_conversation(
     """Start a conversation in the local environment."""
     info, is_new = await conversation_service.start_conversation(request)
     response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK
-    return info
+    return trim_conversation_response_skills(info)
 
 
 @conversation_router.post(
@@ -432,4 +440,4 @@ async def fork_conversation(
             status.HTTP_404_NOT_FOUND,
             detail="Source conversation not found",
         )
-    return info
+    return trim_conversation_response_skills(info)
diff --git a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
index b9b232f7ea..7e70cad9c6 100644
--- a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
+++ b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
@@ -19,6 +19,7 @@
     ConversationSortOrder,
     SendMessageRequest,
     StartACPConversationRequest,
+    trim_conversation_response_skills,
 )
 from openhands.sdk import LLM, Agent, TextContent
 from openhands.sdk.agent.acp_agent import ACPAgent
@@ -85,9 +86,11 @@ async def search_acp_conversations(
     """
     assert limit > 0
     assert limit <= 100
-    return await conversation_service.search_acp_conversations(
+    page = await conversation_service.search_acp_conversations(
         page_id, limit, status, sort_order
     )
+    page.items = [trim_conversation_response_skills(item) for item in page.items]
+    return page
 
 
 @conversation_router_acp.get("/count", deprecated=True)
@@ -123,7 +126,7 @@ async def get_acp_conversation(
     conversation = await conversation_service.get_acp_conversation(conversation_id)
     if conversation is None:
         raise HTTPException(status.HTTP_404_NOT_FOUND)
-    return conversation
+    return trim_conversation_response_skills(conversation)
 
 
 @conversation_router_acp.get("", deprecated=True)
@@ -137,7 +140,11 @@ async def batch_get_acp_conversations(
     Use ``/api/conversations`` instead.
     """
     assert len(ids) < 100
-    return await conversation_service.batch_get_acp_conversations(ids)
+    conversations = await conversation_service.batch_get_acp_conversations(ids)
+    return [
+        trim_conversation_response_skills(c) if c is not None else None
+        for c in conversations
+    ]
 
 
 @conversation_router_acp.post("", deprecated=True)
@@ -157,4 +164,4 @@ async def start_acp_conversation(
     """
     info, is_new = await conversation_service.start_acp_conversation(request)
     response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK
-    return info
+    return trim_conversation_response_skills(info)
diff --git a/openhands-agent-server/openhands/agent_server/models.py b/openhands-agent-server/openhands/agent_server/models.py
index 9c8fc871da..ba60211b5b 100644
--- a/openhands-agent-server/openhands/agent_server/models.py
+++ b/openhands-agent-server/openhands/agent_server/models.py
@@ -200,6 +200,45 @@ class ConversationPage(BaseModel):
     next_page_id: str | None = None
 
 
+def trim_conversation_response_skills(info: ConversationInfo) -> ConversationInfo:
+    """Return ``info`` with ``agent.agent_context.skills`` set to ``[]``.
+
+    Applied at the four HTTP read routes that emit ``ConversationInfo``
+    (search, get, batch-get, start). The persisted ``ConversationState``
+    on disk and the in-memory copy held by the agent's runtime are
+    untouched — only the bytes leaving over HTTP shrink.
+
+    Why drop the skills entirely on the wire: when an ``AgentContext``
+    is constructed with ``load_user_skills=True`` / ``load_public_skills=True``,
+    its model_validator resolves the entire skill catalog (~40 entries
+    in stock setups) and persists them inline. Every conversation
+    fetch therefore carried ~260 KB of skill content that no API
+    consumer actually reads — the skill bodies are only consumed
+    server-side at prompt-render time, never client-side.
+
+    Dropping ``skills`` from the API response (rather than from the
+    model itself) keeps the model semantics simple: the wire
+    representation differs from the in-memory representation in
+    exactly one place, here, instead of via a serializer / validator
+    pair that has to coordinate across persistence, ``model_copy``,
+    ``round_trip``, equality checks, snapshot drift detection, etc.
+
+    A ``model_copy`` chain is enough because ``BaseModel.model_copy``
+    is shallow on default — we replace the leaf ``skills`` list with
+    an empty list without touching any other field. The returned
+    object is a fresh ``ConversationInfo`` instance; callers that
+    hold the input reference observe no mutation.
+    """
+    agent_ctx = getattr(info.agent, "agent_context", None)
+    if agent_ctx is None or not agent_ctx.skills:
+        return info
+    trimmed_agent_context = agent_ctx.model_copy(update={"skills": []})
+    trimmed_agent = info.agent.model_copy(
+        update={"agent_context": trimmed_agent_context}
+    )
+    return info.model_copy(update={"agent": trimmed_agent})
+
+
 # Deprecated compatibility aliases for the old ACP-specific response names.
 # Keep runtime assignment aliases so existing imports still resolve to the
 # canonical Pydantic models; PEP 695 ``type`` aliases would not preserve that.
diff --git a/tests/agent_server/test_conversation_router_skill_trim.py b/tests/agent_server/test_conversation_router_skill_trim.py
new file mode 100644
index 0000000000..2f59aad149
--- /dev/null
+++ b/tests/agent_server/test_conversation_router_skill_trim.py
@@ -0,0 +1,214 @@
+"""Tests for the route-level ``agent.agent_context.skills`` trim.
+
+The four read endpoints (``GET /search``, ``GET /{id}``, ``GET ""``,
+``POST ""``) on the conversation router strip ``agent.agent_context.skills``
+from the response payload. The persisted ``ConversationState`` and the
+in-memory copy held by the agent's runtime are unaffected — only the
+bytes leaving over HTTP shrink.
+
+See the SDK PR description for why this lives at the route boundary
+rather than inside ``AgentContext`` itself.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+from uuid import uuid4
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+from pydantic import SecretStr
+
+from openhands.agent_server.config import Config
+from openhands.agent_server.conversation_router import conversation_router
+from openhands.agent_server.conversation_service import ConversationService
+from openhands.agent_server.dependencies import get_conversation_service
+from openhands.agent_server.models import (
+    ConversationInfo,
+    ConversationPage,
+    trim_conversation_response_skills,
+)
+from openhands.agent_server.utils import utc_now
+from openhands.sdk import LLM, Agent
+from openhands.sdk.context import AgentContext
+from openhands.sdk.conversation.state import ConversationExecutionStatus
+from openhands.sdk.skills import Skill
+from openhands.sdk.workspace import LocalWorkspace
+
+
+def _make_skill(name: str, content: str = "skill body bytes") -> Skill:
+    return Skill(name=name, content=content, source=f"/fake/{name}.md")
+
+
+def _make_conversation_with_skills(skills: list[Skill]) -> ConversationInfo:
+    """Build a ``ConversationInfo`` whose agent carries ``skills``.
+
+    The full ``AgentContext`` field set is otherwise empty so the
+    trimmed payload reflects only the skills delta.
+    """
+    now = utc_now()
+    return ConversationInfo(
+        id=uuid4(),
+        agent=Agent(
+            llm=LLM(model="gpt-4o", api_key=SecretStr("k"), usage_id="test-llm"),
+            tools=[],
+            agent_context=AgentContext(skills=skills),
+        ),
+        workspace=LocalWorkspace(working_dir="/tmp/test"),
+        execution_status=ConversationExecutionStatus.IDLE,
+        title="Test",
+        created_at=now,
+        updated_at=now,
+    )
+
+
+class TestTrimHelper:
+    """Unit tests for the pure-function helper."""
+
+    def test_strips_skills_when_present(self):
+        info = _make_conversation_with_skills(
+            [_make_skill("a"), _make_skill("b"), _make_skill("c")]
+        )
+        trimmed = trim_conversation_response_skills(info)
+        assert trimmed.agent.agent_context is not None
+        assert trimmed.agent.agent_context.skills == []
+
+    def test_returns_same_instance_when_nothing_to_strip(self):
+        # Empty skill list → identity return (no needless model_copy).
+        info = _make_conversation_with_skills([])
+        trimmed = trim_conversation_response_skills(info)
+        assert trimmed is info
+
+    def test_does_not_touch_other_agent_context_fields(self):
+        info = _make_conversation_with_skills([_make_skill("a")])
+        # Mutate a non-skill field so we can assert it survives.
+        assert info.agent.agent_context is not None
+        info = info.model_copy(
+            update={
+                "agent": info.agent.model_copy(
+                    update={
+                        "agent_context": info.agent.agent_context.model_copy(
+                            update={"system_message_suffix": "carry me through"}
+                        )
+                    }
+                )
+            }
+        )
+        trimmed = trim_conversation_response_skills(info)
+        assert trimmed.agent.agent_context is not None
+        assert trimmed.agent.agent_context.skills == []
+        assert trimmed.agent.agent_context.system_message_suffix == "carry me through"
+
+    def test_does_not_mutate_input(self):
+        info = _make_conversation_with_skills([_make_skill("a"), _make_skill("b")])
+        trim_conversation_response_skills(info)
+        # Caller's reference still sees the full skills — model_copy
+        # gave us a fresh instance, the input is untouched.
+        assert info.agent.agent_context is not None
+        assert {s.name for s in info.agent.agent_context.skills} == {"a", "b"}
+
+    def test_agent_without_agent_context_passes_through(self):
+        now = utc_now()
+        info = ConversationInfo(
+            id=uuid4(),
+            agent=Agent(
+                llm=LLM(model="gpt-4o", api_key=SecretStr("k"), usage_id="t"),
+                tools=[],
+            ),
+            workspace=LocalWorkspace(working_dir="/tmp/test"),
+            execution_status=ConversationExecutionStatus.IDLE,
+            title="Test",
+            created_at=now,
+            updated_at=now,
+        )
+        # No agent_context at all → helper is a no-op.
+        assert trim_conversation_response_skills(info) is info
+
+
+class TestRouteIntegration:
+    """Integration tests through the FastAPI router — proves the trim
+    actually fires at every read endpoint."""
+
+    @pytest.fixture
+    def heavy_conversation(self):
+        # 5 skills with non-trivial content — enough that the trim
+        # is visible in the serialized JSON byte count.
+        return _make_conversation_with_skills(
+            [_make_skill(f"skill-{i}", "x" * 500) for i in range(5)]
+        )
+
+    @pytest.fixture
+    def client(self, heavy_conversation):
+        service = AsyncMock(spec=ConversationService)
+        service.get_conversation.return_value = heavy_conversation
+        service.batch_get_conversations.return_value = [heavy_conversation]
+        service.search_conversations.return_value = ConversationPage(
+            items=[heavy_conversation], next_page_id=None
+        )
+
+        app = FastAPI()
+        app.include_router(conversation_router, prefix="/api")
+        app.state.config = Config(
+            static_files_path=None, session_api_keys=[], secret_key=None
+        )
+        app.dependency_overrides[get_conversation_service] = lambda: service
+        return TestClient(app), heavy_conversation
+
+    def test_get_conversation_trims_skills(self, client):
+        c, heavy = client
+        response = c.get(f"/api/conversations/{heavy.id}")
+        assert response.status_code == 200
+        body = response.json()
+        assert body["agent"]["agent_context"]["skills"] == []
+
+    def test_batch_get_conversations_trims_skills(self, client):
+        c, heavy = client
+        response = c.get(f"/api/conversations?ids={heavy.id}")
+        assert response.status_code == 200
+        body = response.json()
+        assert body[0]["agent"]["agent_context"]["skills"] == []
+
+    def test_batch_get_handles_null_items(self):
+        """Missing items return ``None`` and the trim doesn't crash on them."""
+        service = AsyncMock(spec=ConversationService)
+        service.batch_get_conversations.return_value = [None]
+        app = FastAPI()
+        app.include_router(conversation_router, prefix="/api")
+        app.state.config = Config(
+            static_files_path=None, session_api_keys=[], secret_key=None
+        )
+        app.dependency_overrides[get_conversation_service] = lambda: service
+        c = TestClient(app)
+        response = c.get(f"/api/conversations?ids={uuid4()}")
+        assert response.status_code == 200
+        assert response.json() == [None]
+
+    def test_search_conversations_trims_skills(self, client):
+        c, _heavy = client
+        response = c.get("/api/conversations/search")
+        assert response.status_code == 200
+        body = response.json()
+        assert body["items"][0]["agent"]["agent_context"]["skills"] == []
+
+    def test_response_size_drops_meaningfully(self, client):
+        """Compare trimmed (HTTP) vs untrimmed (model_dump_json) sizes.
+
+        The conversation has 5 skills × 500 chars of content = ~2500
+        bytes of skill bodies. The trimmed HTTP response should be at
+        least that much smaller than serializing the same conversation
+        with skills intact.
+        """
+        c, heavy = client
+        response = c.get("/api/conversations/search")
+        trimmed_bytes = len(response.content)
+        untrimmed_bytes = len(
+            ConversationPage(items=[heavy], next_page_id=None).model_dump_json()
+        )
+        # 5 × 500 chars of "x" skill content + per-skill metadata
+        # overhead. Conservatively require at least 1500 bytes shaved.
+        assert untrimmed_bytes - trimmed_bytes > 1500, (
+            f"trim should drop ~2500 B of skill content; got "
+            f"{untrimmed_bytes - trimmed_bytes} B saved "
+            f"({untrimmed_bytes} → {trimmed_bytes})"
+        )

From ef1598c1c9532fc302d052f22d4a19de800de405 Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Wed, 20 May 2026 10:37:26 +0200
Subject: [PATCH 2/3] perf(agent_server): make skill trim opt-in via
 ?include_skills=false
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Default trim changes the public ``ConversationInfo`` shape that
external SDK consumers like ``RemoteConversation.agent.agent_context.skills``
already round-trip through. Reviewer flagged the silent-regression
risk; agreed.

Make it opt-in:

- ``include_skills: bool = True`` query param on every read endpoint
  that emits ``ConversationInfo`` (search, get, batch-get, start,
  fork, plus the deprecated ACP equivalents).
- Default behaviour unchanged — existing callers see the full payload.
- ``?include_skills=false`` opts into ``trim_conversation_response_skills``;
  the helper drops ``agent.agent_context.skills`` from the response
  while leaving the persisted state and the in-memory runtime copy
  untouched. Documented in the param title so it surfaces in OpenAPI.

Tests cover both branches at every endpoint:
- Default (no param) → full skills (5 in the fixture).
- ``include_skills=true`` (explicit) → full skills (same).
- ``include_skills=false`` → empty list.
- Concrete byte-count comparison (default vs opt-in) confirms the
  ~2500 B saved per 5-skill conversation in the test fixture.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../agent_server/conversation_router.py       | 33 +++++--
 .../agent_server/conversation_router_acp.py   | 26 +++--
 .../openhands/agent_server/models.py          | 44 +++++----
 .../test_conversation_router_skill_trim.py    | 96 +++++++++++++------
 4 files changed, 138 insertions(+), 61 deletions(-)

diff --git a/openhands-agent-server/openhands/agent_server/conversation_router.py b/openhands-agent-server/openhands/agent_server/conversation_router.py
index e7db9089db..760e7c7fa4 100644
--- a/openhands-agent-server/openhands/agent_server/conversation_router.py
+++ b/openhands-agent-server/openhands/agent_server/conversation_router.py
@@ -22,6 +22,7 @@
 from openhands.agent_server.conversation_service import ConversationService
 from openhands.agent_server.dependencies import get_conversation_service
 from openhands.agent_server.models import (
+    INCLUDE_SKILLS_PARAM_TITLE,
     AgentResponseResult,
     AskAgentRequest,
     AskAgentResponse,
@@ -87,6 +88,7 @@ async def search_conversations(
         ConversationSortOrder,
         Query(title="Sort order for conversations"),
     ] = ConversationSortOrder.CREATED_AT_DESC,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ConversationPage:
     """Search / List conversations"""
@@ -95,9 +97,8 @@ async def search_conversations(
     page = await conversation_service.search_conversations(
         page_id, limit, status, sort_order
     )
-    # Drop ``agent.agent_context.skills`` from each item before
-    # serialization — see ``trim_conversation_response_skills``.
-    page.items = [trim_conversation_response_skills(item) for item in page.items]
+    if not include_skills:
+        page.items = [trim_conversation_response_skills(item) for item in page.items]
     return page
 
 
@@ -119,13 +120,16 @@ async def count_conversations(
 )
 async def get_conversation(
     conversation_id: UUID,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ConversationInfo:
     """Given an id, get a conversation"""
     conversation = await conversation_service.get_conversation(conversation_id)
     if conversation is None:
         raise HTTPException(status.HTTP_404_NOT_FOUND)
-    return trim_conversation_response_skills(conversation)
+    if not include_skills:
+        conversation = trim_conversation_response_skills(conversation)
+    return conversation
 
 
 @conversation_router.get(
@@ -152,16 +156,19 @@ async def get_conversation_agent_final_response(
 @conversation_router.get("")
 async def batch_get_conversations(
     ids: Annotated[list[UUID], Query()],
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> list[ConversationInfo | None]:
     """Get a batch of conversations given their ids, returning null for
     any missing item"""
     assert len(ids) < 100
     conversations = await conversation_service.batch_get_conversations(ids)
-    return [
-        trim_conversation_response_skills(c) if c is not None else None
-        for c in conversations
-    ]
+    if not include_skills:
+        return [
+            trim_conversation_response_skills(c) if c is not None else None
+            for c in conversations
+        ]
+    return conversations
 
 
 # Write Methods
@@ -173,12 +180,15 @@ async def start_conversation(
         StartConversationRequest, Body(examples=START_CONVERSATION_EXAMPLES)
     ],
     response: Response,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ConversationInfo:
     """Start a conversation in the local environment."""
     info, is_new = await conversation_service.start_conversation(request)
     response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK
-    return trim_conversation_response_skills(info)
+    if not include_skills:
+        info = trim_conversation_response_skills(info)
+    return info
 
 
 @conversation_router.post(
@@ -415,6 +425,7 @@ async def condense_conversation(
 async def fork_conversation(
     conversation_id: UUID,
     request: Annotated[ForkConversationRequest, Body()] = ForkConversationRequest(),  # noqa: B008
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ConversationInfo:
     """Fork a conversation, deep-copying its event history.
@@ -440,4 +451,6 @@ async def fork_conversation(
             status.HTTP_404_NOT_FOUND,
             detail="Source conversation not found",
         )
-    return trim_conversation_response_skills(info)
+    if not include_skills:
+        info = trim_conversation_response_skills(info)
+    return info
diff --git a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
index 7e70cad9c6..10768e6b71 100644
--- a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
+++ b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
@@ -14,6 +14,7 @@
 from openhands.agent_server.conversation_service import ConversationService
 from openhands.agent_server.dependencies import get_conversation_service
 from openhands.agent_server.models import (
+    INCLUDE_SKILLS_PARAM_TITLE,
     ACPConversationInfo,
     ACPConversationPage,
     ConversationSortOrder,
@@ -77,6 +78,7 @@ async def search_acp_conversations(
         ConversationSortOrder,
         Query(title="Sort order for conversations"),
     ] = ConversationSortOrder.CREATED_AT_DESC,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ACPConversationPage:
     """Search conversations using the ACP-capable contract.
@@ -89,7 +91,8 @@ async def search_acp_conversations(
     page = await conversation_service.search_acp_conversations(
         page_id, limit, status, sort_order
     )
-    page.items = [trim_conversation_response_skills(item) for item in page.items]
+    if not include_skills:
+        page.items = [trim_conversation_response_skills(item) for item in page.items]
     return page
 
 
@@ -116,6 +119,7 @@ async def count_acp_conversations(
 )
 async def get_acp_conversation(
     conversation_id: UUID,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ACPConversationInfo:
     """Get a conversation using the ACP-capable contract.
@@ -126,12 +130,15 @@ async def get_acp_conversation(
     conversation = await conversation_service.get_acp_conversation(conversation_id)
     if conversation is None:
         raise HTTPException(status.HTTP_404_NOT_FOUND)
-    return trim_conversation_response_skills(conversation)
+    if not include_skills:
+        conversation = trim_conversation_response_skills(conversation)
+    return conversation
 
 
 @conversation_router_acp.get("", deprecated=True)
 async def batch_get_acp_conversations(
     ids: Annotated[list[UUID], Query()],
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> list[ACPConversationInfo | None]:
     """Batch get conversations using the ACP-capable contract.
@@ -141,10 +148,12 @@ async def batch_get_acp_conversations(
     """
     assert len(ids) < 100
     conversations = await conversation_service.batch_get_acp_conversations(ids)
-    return [
-        trim_conversation_response_skills(c) if c is not None else None
-        for c in conversations
-    ]
+    if not include_skills:
+        return [
+            trim_conversation_response_skills(c) if c is not None else None
+            for c in conversations
+        ]
+    return conversations
 
 
 @conversation_router_acp.post("", deprecated=True)
@@ -154,6 +163,7 @@ async def start_acp_conversation(
         Body(examples=START_ACP_CONVERSATION_EXAMPLES),
     ],
     response: Response,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ACPConversationInfo:
     """Start a conversation using the ACP-capable contract.
@@ -164,4 +174,6 @@ async def start_acp_conversation(
     """
     info, is_new = await conversation_service.start_acp_conversation(request)
     response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK
-    return trim_conversation_response_skills(info)
+    if not include_skills:
+        info = trim_conversation_response_skills(info)
+    return info
diff --git a/openhands-agent-server/openhands/agent_server/models.py b/openhands-agent-server/openhands/agent_server/models.py
index ba60211b5b..84a50d81d7 100644
--- a/openhands-agent-server/openhands/agent_server/models.py
+++ b/openhands-agent-server/openhands/agent_server/models.py
@@ -200,28 +200,40 @@ class ConversationPage(BaseModel):
     next_page_id: str | None = None
 
 
+INCLUDE_SKILLS_PARAM_TITLE = (
+    "Whether to include ``agent.agent_context.skills`` in the response. "
+    "Default ``true`` preserves the full payload (no behaviour change for "
+    "existing consumers). Pass ``false`` to drop skills from the wire — "
+    "useful when the caller doesn't read skill bodies and wants to avoid "
+    "the ~260 KB of inlined skill content that ``load_user_skills=true`` / "
+    "``load_public_skills=true`` agents accumulate. The persisted "
+    "conversation state on disk and the in-memory runtime copy are "
+    "untouched either way."
+)
+
+
 def trim_conversation_response_skills(info: ConversationInfo) -> ConversationInfo:
     """Return ``info`` with ``agent.agent_context.skills`` set to ``[]``.
 
-    Applied at the four HTTP read routes that emit ``ConversationInfo``
-    (search, get, batch-get, start). The persisted ``ConversationState``
-    on disk and the in-memory copy held by the agent's runtime are
-    untouched — only the bytes leaving over HTTP shrink.
+    **Opt-in** via the ``include_skills=false`` query parameter on the
+    routes that emit ``ConversationInfo`` (search, get, batch-get,
+    start, fork, and the deprecated ACP equivalents). The default
+    response shape on every endpoint still includes the full skills
+    list — clients that don't opt in see the same payload they always
+    saw, so ``RemoteConversation.agent.agent_context.skills`` and any
+    other external consumer continue to round-trip correctly.
 
-    Why drop the skills entirely on the wire: when an ``AgentContext``
-    is constructed with ``load_user_skills=True`` / ``load_public_skills=True``,
+    The opt-in shape exists because when an ``AgentContext`` is
+    constructed with ``load_user_skills=True`` / ``load_public_skills=True``,
     its model_validator resolves the entire skill catalog (~40 entries
     in stock setups) and persists them inline. Every conversation
-    fetch therefore carried ~260 KB of skill content that no API
-    consumer actually reads — the skill bodies are only consumed
-    server-side at prompt-render time, never client-side.
-
-    Dropping ``skills`` from the API response (rather than from the
-    model itself) keeps the model semantics simple: the wire
-    representation differs from the in-memory representation in
-    exactly one place, here, instead of via a serializer / validator
-    pair that has to coordinate across persistence, ``model_copy``,
-    ``round_trip``, equality checks, snapshot drift detection, etc.
+    fetch therefore carries ~260 KB of skill content. Clients that
+    don't read skill bodies (agent-canvas's conversation list, for
+    example — see ``useUserConversation``) can pass
+    ``?include_skills=false`` to shave that off the wire.
+
+    The persisted ``ConversationState`` on disk and the in-memory copy
+    held by the agent's runtime are untouched regardless.
 
     A ``model_copy`` chain is enough because ``BaseModel.model_copy``
     is shallow on default — we replace the leaf ``skills`` list with
diff --git a/tests/agent_server/test_conversation_router_skill_trim.py b/tests/agent_server/test_conversation_router_skill_trim.py
index 2f59aad149..d64dbb0a5d 100644
--- a/tests/agent_server/test_conversation_router_skill_trim.py
+++ b/tests/agent_server/test_conversation_router_skill_trim.py
@@ -127,8 +127,10 @@ def test_agent_without_agent_context_passes_through(self):
 
 
 class TestRouteIntegration:
-    """Integration tests through the FastAPI router — proves the trim
-    actually fires at every read endpoint."""
+    """Integration tests through the FastAPI router — proves the
+    ``include_skills`` query param works at every read endpoint:
+    default (omitted or ``true``) keeps the full payload, ``false``
+    opts into the trim."""
 
     @pytest.fixture
     def heavy_conversation(self):
@@ -155,20 +157,67 @@ def client(self, heavy_conversation):
         app.dependency_overrides[get_conversation_service] = lambda: service
         return TestClient(app), heavy_conversation
 
-    def test_get_conversation_trims_skills(self, client):
+    # --- Default (no query param) → full payload, backward compatible ---
+
+    def test_get_conversation_default_includes_skills(self, client):
+        """Backward-compat: the default response shape is unchanged.
+
+        ``RemoteConversation.agent.agent_context.skills`` (and any other
+        external SDK consumer) must keep round-tripping correctly when
+        the caller doesn't opt into the trim.
+        """
         c, heavy = client
         response = c.get(f"/api/conversations/{heavy.id}")
         assert response.status_code == 200
         body = response.json()
-        assert body["agent"]["agent_context"]["skills"] == []
+        assert len(body["agent"]["agent_context"]["skills"]) == 5
 
-    def test_batch_get_conversations_trims_skills(self, client):
+    def test_batch_get_default_includes_skills(self, client):
         c, heavy = client
         response = c.get(f"/api/conversations?ids={heavy.id}")
         assert response.status_code == 200
         body = response.json()
+        assert len(body[0]["agent"]["agent_context"]["skills"]) == 5
+
+    def test_search_default_includes_skills(self, client):
+        c, _heavy = client
+        response = c.get("/api/conversations/search")
+        assert response.status_code == 200
+        body = response.json()
+        assert len(body["items"][0]["agent"]["agent_context"]["skills"]) == 5
+
+    # --- include_skills=true (explicit) → same as default ---
+
+    def test_get_conversation_explicit_true_includes_skills(self, client):
+        c, heavy = client
+        response = c.get(f"/api/conversations/{heavy.id}?include_skills=true")
+        assert response.status_code == 200
+        body = response.json()
+        assert len(body["agent"]["agent_context"]["skills"]) == 5
+
+    # --- include_skills=false → opt into the trim ---
+
+    def test_get_conversation_opt_in_trims_skills(self, client):
+        c, heavy = client
+        response = c.get(f"/api/conversations/{heavy.id}?include_skills=false")
+        assert response.status_code == 200
+        body = response.json()
+        assert body["agent"]["agent_context"]["skills"] == []
+
+    def test_batch_get_opt_in_trims_skills(self, client):
+        c, heavy = client
+        response = c.get(f"/api/conversations?ids={heavy.id}&include_skills=false")
+        assert response.status_code == 200
+        body = response.json()
         assert body[0]["agent"]["agent_context"]["skills"] == []
 
+    def test_search_opt_in_trims_skills(self, client):
+        c, _heavy = client
+        response = c.get("/api/conversations/search?include_skills=false")
+        assert response.status_code == 200
+        body = response.json()
+        assert body["items"][0]["agent"]["agent_context"]["skills"] == []
+
     def test_batch_get_handles_null_items(self):
         """Missing items return ``None`` and the trim doesn't crash on them."""
         service = AsyncMock(spec=ConversationService)
@@ -180,35 +229,26 @@ def test_batch_get_handles_null_items(self):
         )
         app.dependency_overrides[get_conversation_service] = lambda: service
         c = TestClient(app)
-        response = c.get(f"/api/conversations?ids={uuid4()}")
+        response = c.get(f"/api/conversations?ids={uuid4()}&include_skills=false")
         assert response.status_code == 200
         assert response.json() == [None]
 
-    def test_search_conversations_trims_skills(self, client):
-        c, _heavy = client
-        response = c.get("/api/conversations/search")
-        assert response.status_code == 200
-        body = response.json()
-        assert body["items"][0]["agent"]["agent_context"]["skills"] == []
+    def test_opt_in_response_size_drops_meaningfully(self, client):
+        """Compare default (full) vs opt-in (trimmed) HTTP responses.
 
-    def test_response_size_drops_meaningfully(self, client):
-        """Compare trimmed (HTTP) vs untrimmed (model_dump_json) sizes.
-
-        The conversation has 5 skills × 500 chars of content = ~2500
-        bytes of skill bodies. The trimmed HTTP response should be at
-        least that much smaller than serializing the same conversation
-        with skills intact.
+        The conversation has 5 skills × 500 chars of content. The
+        opt-in response should be at least that much smaller than the
+        default response.
         """
-        c, heavy = client
-        response = c.get("/api/conversations/search")
-        trimmed_bytes = len(response.content)
-        untrimmed_bytes = len(
-            ConversationPage(items=[heavy], next_page_id=None).model_dump_json()
+        c, _heavy = client
+        full_bytes = len(c.get("/api/conversations/search").content)
+        trimmed_bytes = len(
+            c.get("/api/conversations/search?include_skills=false").content
         )
         # 5 × 500 chars of "x" skill content + per-skill metadata
         # overhead. Conservatively require at least 1500 bytes shaved.
-        assert untrimmed_bytes - trimmed_bytes > 1500, (
-            f"trim should drop ~2500 B of skill content; got "
-            f"{untrimmed_bytes - trimmed_bytes} B saved "
-            f"({untrimmed_bytes} → {trimmed_bytes})"
+        assert full_bytes - trimmed_bytes > 1500, (
+            f"opt-in trim should drop ~2500 B of skill content; got "
+            f"{full_bytes - trimmed_bytes} B saved "
+            f"({full_bytes} → {trimmed_bytes})"
         )

From 23f0d9fc7a2b6433879a4b7cdd4f22a0abb1cec9 Mon Sep 17 00:00:00 2001
From: Debug Agent <debug@example.com>
Date: Wed, 20 May 2026 12:02:08 +0200
Subject: [PATCH 3/3] perf(agent_server)!: flip skill trim to default-on
 (include_skills=false)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BREAKING CHANGE: ``GET /conversations*`` and the deprecated ACP
equivalents now return ``agent.agent_context.skills == []`` by default.
Callers that still need the legacy full-payload shape can opt back in
with ``?include_skills=true``.

The trim was previously opt-in (``?include_skills=false``) to keep the
default response backward-compatible. Audit of known consumers
(agent-canvas, OpenHands app-server, SDK examples) showed none read
``agent.agent_context.skills`` from HTTP responses — they either use
in-process ``LocalConversation`` directly or only read fields like
``agent.llm.model``. Default-trim is therefore safe to ship as the
steady-state shape, and reverses the cost/benefit: ~260 KB shaved off
every conversation fetch unless a caller explicitly asks for it.

The persisted ``ConversationState`` on disk and the in-memory copy held
by the agent's runtime are unaffected — only the bytes leaving over
HTTP shrink.

Also makes both ``search`` route handlers non-destructive: they build a
fresh ``ConversationPage`` via ``model_copy`` instead of mutating
``page.items`` in place, so an upstream service caching its return
value (e.g. ``AsyncMock`` in tests) sees stable behaviour across calls.

Drive-by: migrates ``ConversationSortOrder`` and ``EventSortOrder`` to
``StrEnum`` to clear pre-existing ``UP042`` lint errors in
``models.py`` that the pre-commit hook now blocks any further edit to
this file on.
---
 .../agent_server/conversation_router.py       |  22 +++-
 .../agent_server/conversation_router_acp.py   |  16 ++-
 .../openhands/agent_server/models.py          |  54 ++++-----
 .../test_conversation_router_skill_trim.py    | 108 ++++++++++--------
 4 files changed, 115 insertions(+), 85 deletions(-)

diff --git a/openhands-agent-server/openhands/agent_server/conversation_router.py b/openhands-agent-server/openhands/agent_server/conversation_router.py
index 760e7c7fa4..2a4349f66a 100644
--- a/openhands-agent-server/openhands/agent_server/conversation_router.py
+++ b/openhands-agent-server/openhands/agent_server/conversation_router.py
@@ -88,7 +88,7 @@ async def search_conversations(
         ConversationSortOrder,
         Query(title="Sort order for conversations"),
     ] = ConversationSortOrder.CREATED_AT_DESC,
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ConversationPage:
     """Search / List conversations"""
@@ -98,7 +98,17 @@ async def search_conversations(
         page_id, limit, status, sort_order
     )
     if not include_skills:
-        page.items = [trim_conversation_response_skills(item) for item in page.items]
+        # ``model_copy`` rather than in-place mutation so we never
+        # write back into whatever the upstream service handed us
+        # (matters for services that cache their return value,
+        # including the ``AsyncMock`` used in route tests).
+        page = page.model_copy(
+            update={
+                "items": [
+                    trim_conversation_response_skills(item) for item in page.items
+                ]
+            }
+        )
     return page
 
 
@@ -120,7 +130,7 @@ async def count_conversations(
 )
 async def get_conversation(
     conversation_id: UUID,
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ConversationInfo:
     """Given an id, get a conversation"""
@@ -156,7 +166,7 @@ async def get_conversation_agent_final_response(
 @conversation_router.get("")
 async def batch_get_conversations(
     ids: Annotated[list[UUID], Query()],
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> list[ConversationInfo | None]:
     """Get a batch of conversations given their ids, returning null for
@@ -180,7 +190,7 @@ async def start_conversation(
         StartConversationRequest, Body(examples=START_CONVERSATION_EXAMPLES)
     ],
     response: Response,
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ConversationInfo:
     """Start a conversation in the local environment."""
@@ -425,7 +435,7 @@ async def condense_conversation(
 async def fork_conversation(
     conversation_id: UUID,
     request: Annotated[ForkConversationRequest, Body()] = ForkConversationRequest(),  # noqa: B008
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ConversationInfo:
     """Fork a conversation, deep-copying its event history.
diff --git a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
index 10768e6b71..d1e48df1d6 100644
--- a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
+++ b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
@@ -78,7 +78,7 @@ async def search_acp_conversations(
         ConversationSortOrder,
         Query(title="Sort order for conversations"),
     ] = ConversationSortOrder.CREATED_AT_DESC,
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ACPConversationPage:
     """Search conversations using the ACP-capable contract.
@@ -92,7 +92,13 @@ async def search_acp_conversations(
         page_id, limit, status, sort_order
     )
     if not include_skills:
-        page.items = [trim_conversation_response_skills(item) for item in page.items]
+        page = page.model_copy(
+            update={
+                "items": [
+                    trim_conversation_response_skills(item) for item in page.items
+                ]
+            }
+        )
     return page
 
 
@@ -119,7 +125,7 @@ async def count_acp_conversations(
 )
 async def get_acp_conversation(
     conversation_id: UUID,
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ACPConversationInfo:
     """Get a conversation using the ACP-capable contract.
@@ -138,7 +144,7 @@ async def get_acp_conversation(
 @conversation_router_acp.get("", deprecated=True)
 async def batch_get_acp_conversations(
     ids: Annotated[list[UUID], Query()],
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> list[ACPConversationInfo | None]:
     """Batch get conversations using the ACP-capable contract.
@@ -163,7 +169,7 @@ async def start_acp_conversation(
         Body(examples=START_ACP_CONVERSATION_EXAMPLES),
     ],
     response: Response,
-    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = True,
+    include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False,
     conversation_service: ConversationService = Depends(get_conversation_service),
 ) -> ACPConversationInfo:
     """Start a conversation using the ACP-capable contract.
diff --git a/openhands-agent-server/openhands/agent_server/models.py b/openhands-agent-server/openhands/agent_server/models.py
index 84a50d81d7..b681caa4c2 100644
--- a/openhands-agent-server/openhands/agent_server/models.py
+++ b/openhands-agent-server/openhands/agent_server/models.py
@@ -2,7 +2,7 @@
 
 from abc import ABC
 from datetime import datetime
-from enum import Enum
+from enum import Enum, StrEnum
 from typing import Any, TypeAlias
 from uuid import UUID, uuid4
 
@@ -54,7 +54,7 @@ class ServerErrorEvent(Event):
     detail: str = Field(description="Details about the error")
 
 
-class ConversationSortOrder(str, Enum):
+class ConversationSortOrder(StrEnum):
     """Enum for conversation sorting options."""
 
     CREATED_AT = "CREATED_AT"
@@ -63,7 +63,7 @@ class ConversationSortOrder(str, Enum):
     UPDATED_AT_DESC = "UPDATED_AT_DESC"
 
 
-class EventSortOrder(str, Enum):
+class EventSortOrder(StrEnum):
     """Enum for event sorting options."""
 
     TIMESTAMP = "TIMESTAMP"
@@ -202,38 +202,38 @@ class ConversationPage(BaseModel):
 
 INCLUDE_SKILLS_PARAM_TITLE = (
     "Whether to include ``agent.agent_context.skills`` in the response. "
-    "Default ``true`` preserves the full payload (no behaviour change for "
-    "existing consumers). Pass ``false`` to drop skills from the wire — "
-    "useful when the caller doesn't read skill bodies and wants to avoid "
-    "the ~260 KB of inlined skill content that ``load_user_skills=true`` / "
-    "``load_public_skills=true`` agents accumulate. The persisted "
-    "conversation state on disk and the in-memory runtime copy are "
-    "untouched either way."
+    "Default ``false`` (breaking change as of this release): skills are "
+    "trimmed to ``[]`` on the wire because no known consumer reads them "
+    "from HTTP responses, and a stock agent inlines ~260 KB of skill "
+    "content per fetch. Pass ``true`` to opt back into the legacy "
+    "full-payload shape — useful only for callers that still rely on "
+    "``RemoteConversation.agent.agent_context.skills`` round-tripping "
+    "over the wire. The persisted conversation state on disk and the "
+    "in-memory runtime copy are untouched either way."
 )
 
 
 def trim_conversation_response_skills(info: ConversationInfo) -> ConversationInfo:
     """Return ``info`` with ``agent.agent_context.skills`` set to ``[]``.
 
-    **Opt-in** via the ``include_skills=false`` query parameter on the
-    routes that emit ``ConversationInfo`` (search, get, batch-get,
-    start, fork, and the deprecated ACP equivalents). The default
-    response shape on every endpoint still includes the full skills
-    list — clients that don't opt in see the same payload they always
-    saw, so ``RemoteConversation.agent.agent_context.skills`` and any
-    other external consumer continue to round-trip correctly.
-
-    The opt-in shape exists because when an ``AgentContext`` is
-    constructed with ``load_user_skills=True`` / ``load_public_skills=True``,
-    its model_validator resolves the entire skill catalog (~40 entries
-    in stock setups) and persists them inline. Every conversation
-    fetch therefore carries ~260 KB of skill content. Clients that
-    don't read skill bodies (agent-canvas's conversation list, for
-    example — see ``useUserConversation``) can pass
-    ``?include_skills=false`` to shave that off the wire.
+    Applied **by default** on every route that emits ``ConversationInfo``
+    (search, get, batch-get, start, fork, and the deprecated ACP
+    equivalents). Callers that still need the legacy shape can opt in
+    with ``?include_skills=true``.
+
+    The trim exists because when an ``AgentContext`` is constructed
+    with ``load_user_skills=True`` / ``load_public_skills=True``, its
+    model_validator resolves the entire skill catalog (~40 entries in
+    stock setups) and persists them inline. Every conversation fetch
+    therefore carried ~260 KB of skill content that no known client
+    actually reads from the HTTP response (agent-canvas, OpenHands
+    app-server, SDK examples all ignore the field on
+    ``ConversationInfo`` — they either use the in-process
+    ``LocalConversation`` directly or read other fields like
+    ``agent.llm.model``).
 
     The persisted ``ConversationState`` on disk and the in-memory copy
-    held by the agent's runtime are untouched regardless.
+    held by the agent's runtime are untouched.
 
     A ``model_copy`` chain is enough because ``BaseModel.model_copy``
     is shallow on default — we replace the leaf ``skills`` list with
diff --git a/tests/agent_server/test_conversation_router_skill_trim.py b/tests/agent_server/test_conversation_router_skill_trim.py
index d64dbb0a5d..b910fb1a4d 100644
--- a/tests/agent_server/test_conversation_router_skill_trim.py
+++ b/tests/agent_server/test_conversation_router_skill_trim.py
@@ -1,10 +1,13 @@
 """Tests for the route-level ``agent.agent_context.skills`` trim.
 
-The four read endpoints (``GET /search``, ``GET /{id}``, ``GET ""``,
-``POST ""``) on the conversation router strip ``agent.agent_context.skills``
-from the response payload. The persisted ``ConversationState`` and the
-in-memory copy held by the agent's runtime are unaffected — only the
-bytes leaving over HTTP shrink.
+The conversation read endpoints (``GET /search``, ``GET /{id}``,
+``GET ""``, ``POST ""``, ``POST /{id}/fork``) on the conversation
+router strip ``agent.agent_context.skills`` from the response payload
+**by default** (breaking change as of this release). Callers that
+still need the legacy shape can pass ``?include_skills=true``. The
+persisted ``ConversationState`` and the in-memory copy held by the
+agent's runtime are unaffected — only the bytes leaving over HTTP
+shrink.
 
 See the SDK PR description for why this lives at the route boundary
 rather than inside ``AgentContext`` itself.
@@ -127,10 +130,12 @@ def test_agent_without_agent_context_passes_through(self):
 
 
 class TestRouteIntegration:
-    """Integration tests through the FastAPI router — proves the
-    ``include_skills`` query param works at every read endpoint:
-    default (omitted or ``true``) keeps the full payload, ``false``
-    opts into the trim."""
+    """Integration tests through the FastAPI router.
+
+    Proves the new default-trim semantics and the
+    ``?include_skills=true`` opt-in escape hatch on every read
+    endpoint that returns a ``ConversationInfo``.
+    """
 
     @pytest.fixture
     def heavy_conversation(self):
@@ -157,66 +162,77 @@ def client(self, heavy_conversation):
         app.dependency_overrides[get_conversation_service] = lambda: service
         return TestClient(app), heavy_conversation
 
-    # --- Default (no query param) → full payload, backward compatible ---
+    # --- Default (no query param) → trimmed (breaking-change behaviour) ---
 
-    def test_get_conversation_default_includes_skills(self, client):
-        """Backward-compat: the default response shape is unchanged.
+    def test_get_conversation_default_trims_skills(self, client):
+        """Default response trims ``agent.agent_context.skills`` to ``[]``.
 
-        ``RemoteConversation.agent.agent_context.skills`` (and any other
-        external SDK consumer) must keep round-tripping correctly when
-        the caller doesn't opt into the trim.
+        This is the breaking change: prior releases returned the full
+        skill list inline. Callers that read
+        ``conversation.agent.agent_context.skills`` (notably via
+        ``RemoteConversation``) now see ``[]`` unless they pass
+        ``?include_skills=true``. No known client (agent-canvas,
+        OpenHands app-server, SDK examples) reads this field from
+        HTTP responses, so the change is documentation + opt-in
+        rather than a coordinated migration.
         """
         c, heavy = client
         response = c.get(f"/api/conversations/{heavy.id}")
         assert response.status_code == 200
         body = response.json()
-        assert len(body["agent"]["agent_context"]["skills"]) == 5
+        assert body["agent"]["agent_context"]["skills"] == []
 
-    def test_batch_get_default_includes_skills(self, client):
+    def test_batch_get_default_trims_skills(self, client):
         c, heavy = client
         response = c.get(f"/api/conversations?ids={heavy.id}")
         assert response.status_code == 200
         body = response.json()
-        assert len(body[0]["agent"]["agent_context"]["skills"]) == 5
+        assert body[0]["agent"]["agent_context"]["skills"] == []
 
-    def test_search_default_includes_skills(self, client):
+    def test_search_default_trims_skills(self, client):
         c, _heavy = client
         response = c.get("/api/conversations/search")
         assert response.status_code == 200
         body = response.json()
-        assert len(body["items"][0]["agent"]["agent_context"]["skills"]) == 5
+        assert body["items"][0]["agent"]["agent_context"]["skills"] == []
 
-    # --- include_skills=true (explicit) → same as default ---
+    # --- include_skills=false (explicit) → same as default ---
 
-    def test_get_conversation_explicit_true_includes_skills(self, client):
+    def test_get_conversation_explicit_false_trims_skills(self, client):
         c, heavy = client
-        response = c.get(f"/api/conversations/{heavy.id}?include_skills=true")
+        response = c.get(f"/api/conversations/{heavy.id}?include_skills=false")
         assert response.status_code == 200
         body = response.json()
-        assert len(body["agent"]["agent_context"]["skills"]) == 5
+        assert body["agent"]["agent_context"]["skills"] == []
 
-    # --- include_skills=false → opt into the trim ---
+    # --- include_skills=true → opt into legacy full payload ---
 
-    def test_get_conversation_opt_in_trims_skills(self, client):
+    def test_get_conversation_opt_in_includes_skills(self, client):
+        """Legacy opt-in. Callers that still read
+        ``conversation.agent.agent_context.skills`` from an HTTP
+        response can pass ``?include_skills=true`` to keep the prior
+        shape. Documented as a deprecation escape hatch, not the
+        steady-state path.
+        """
         c, heavy = client
-        response = c.get(f"/api/conversations/{heavy.id}?include_skills=false")
+        response = c.get(f"/api/conversations/{heavy.id}?include_skills=true")
         assert response.status_code == 200
         body = response.json()
-        assert body["agent"]["agent_context"]["skills"] == []
+        assert len(body["agent"]["agent_context"]["skills"]) == 5
 
-    def test_batch_get_opt_in_trims_skills(self, client):
+    def test_batch_get_opt_in_includes_skills(self, client):
         c, heavy = client
-        response = c.get(f"/api/conversations?ids={heavy.id}&include_skills=false")
+        response = c.get(f"/api/conversations?ids={heavy.id}&include_skills=true")
         assert response.status_code == 200
         body = response.json()
-        assert body[0]["agent"]["agent_context"]["skills"] == []
+        assert len(body[0]["agent"]["agent_context"]["skills"]) == 5
 
-    def test_search_opt_in_trims_skills(self, client):
+    def test_search_opt_in_includes_skills(self, client):
         c, _heavy = client
-        response = c.get("/api/conversations/search?include_skills=false")
+        response = c.get("/api/conversations/search?include_skills=true")
         assert response.status_code == 200
         body = response.json()
-        assert body["items"][0]["agent"]["agent_context"]["skills"] == []
+        assert len(body["items"][0]["agent"]["agent_context"]["skills"]) == 5
 
     def test_batch_get_handles_null_items(self):
         """Missing items return ``None`` and the trim doesn't crash on them."""
@@ -229,26 +245,24 @@ def test_batch_get_handles_null_items(self):
         )
         app.dependency_overrides[get_conversation_service] = lambda: service
         c = TestClient(app)
-        response = c.get(f"/api/conversations?ids={uuid4()}&include_skills=false")
+        response = c.get(f"/api/conversations?ids={uuid4()}")
         assert response.status_code == 200
         assert response.json() == [None]
 
-    def test_opt_in_response_size_drops_meaningfully(self, client):
-        """Compare default (full) vs opt-in (trimmed) HTTP responses.
+    def test_default_response_size_drops_meaningfully(self, client):
+        """Compare default (trimmed) vs opt-in (full) HTTP responses.
 
         The conversation has 5 skills × 500 chars of content. The
-        opt-in response should be at least that much smaller than the
-        default response.
+        default response should be at least that much smaller than
+        the explicit ``?include_skills=true`` opt-in.
         """
         c, _heavy = client
-        full_bytes = len(c.get("/api/conversations/search").content)
-        trimmed_bytes = len(
-            c.get("/api/conversations/search?include_skills=false").content
-        )
+        default_bytes = len(c.get("/api/conversations/search").content)
+        full_bytes = len(c.get("/api/conversations/search?include_skills=true").content)
         # 5 × 500 chars of "x" skill content + per-skill metadata
         # overhead. Conservatively require at least 1500 bytes shaved.
-        assert full_bytes - trimmed_bytes > 1500, (
-            f"opt-in trim should drop ~2500 B of skill content; got "
-            f"{full_bytes - trimmed_bytes} B saved "
-            f"({full_bytes} → {trimmed_bytes})"
+        assert full_bytes - default_bytes > 1500, (
+            f"default trim should drop ~2500 B of skill content; got "
+            f"{full_bytes - default_bytes} B saved "
+            f"(full {full_bytes} → default {default_bytes})"
         )