diff --git a/openhands-agent-server/openhands/agent_server/conversation_router.py b/openhands-agent-server/openhands/agent_server/conversation_router.py index 833f8b6a5a..2a4349f66a 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_router.py +++ b/openhands-agent-server/openhands/agent_server/conversation_router.py @@ -22,6 +22,7 @@ from openhands.agent_server.conversation_service import ConversationService from openhands.agent_server.dependencies import get_conversation_service from openhands.agent_server.models import ( + INCLUDE_SKILLS_PARAM_TITLE, AgentResponseResult, AskAgentRequest, AskAgentResponse, @@ -36,6 +37,7 @@ Success, UpdateConversationRequest, UpdateSecretsRequest, + trim_conversation_response_skills, ) from openhands.sdk import LLM, Agent, TextContent from openhands.sdk.conversation.state import ConversationExecutionStatus @@ -86,14 +88,28 @@ async def search_conversations( ConversationSortOrder, Query(title="Sort order for conversations"), ] = ConversationSortOrder.CREATED_AT_DESC, + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> ConversationPage: """Search / List conversations""" assert limit > 0 assert limit <= 100 - return await conversation_service.search_conversations( + page = await conversation_service.search_conversations( page_id, limit, status, sort_order ) + if not include_skills: + # ``model_copy`` rather than in-place mutation so we never + # write back into whatever the upstream service handed us + # (matters for services that cache their return value, + # including the ``AsyncMock`` used in route tests). + page = page.model_copy( + update={ + "items": [ + trim_conversation_response_skills(item) for item in page.items + ] + } + ) + return page @conversation_router.get("/count") @@ -114,12 +130,15 @@ async def count_conversations( ) async def get_conversation( conversation_id: UUID, + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> ConversationInfo: """Given an id, get a conversation""" conversation = await conversation_service.get_conversation(conversation_id) if conversation is None: raise HTTPException(status.HTTP_404_NOT_FOUND) + if not include_skills: + conversation = trim_conversation_response_skills(conversation) return conversation @@ -147,12 +166,18 @@ async def get_conversation_agent_final_response( @conversation_router.get("") async def batch_get_conversations( ids: Annotated[list[UUID], Query()], + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> list[ConversationInfo | None]: """Get a batch of conversations given their ids, returning null for any missing item""" assert len(ids) < 100 conversations = await conversation_service.batch_get_conversations(ids) + if not include_skills: + return [ + trim_conversation_response_skills(c) if c is not None else None + for c in conversations + ] return conversations @@ -165,11 +190,14 @@ async def start_conversation( StartConversationRequest, Body(examples=START_CONVERSATION_EXAMPLES) ], response: Response, + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> ConversationInfo: """Start a conversation in the local environment.""" info, is_new = await conversation_service.start_conversation(request) response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK + if not include_skills: + info = trim_conversation_response_skills(info) return info @@ -407,6 +435,7 @@ async def condense_conversation( async def fork_conversation( conversation_id: UUID, request: Annotated[ForkConversationRequest, Body()] = ForkConversationRequest(), # noqa: B008 + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> ConversationInfo: """Fork a conversation, deep-copying its event history. @@ -432,4 +461,6 @@ async def fork_conversation( status.HTTP_404_NOT_FOUND, detail="Source conversation not found", ) + if not include_skills: + info = trim_conversation_response_skills(info) return info diff --git a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py index b9b232f7ea..d1e48df1d6 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py +++ b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py @@ -14,11 +14,13 @@ from openhands.agent_server.conversation_service import ConversationService from openhands.agent_server.dependencies import get_conversation_service from openhands.agent_server.models import ( + INCLUDE_SKILLS_PARAM_TITLE, ACPConversationInfo, ACPConversationPage, ConversationSortOrder, SendMessageRequest, StartACPConversationRequest, + trim_conversation_response_skills, ) from openhands.sdk import LLM, Agent, TextContent from openhands.sdk.agent.acp_agent import ACPAgent @@ -76,6 +78,7 @@ async def search_acp_conversations( ConversationSortOrder, Query(title="Sort order for conversations"), ] = ConversationSortOrder.CREATED_AT_DESC, + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> ACPConversationPage: """Search conversations using the ACP-capable contract. @@ -85,9 +88,18 @@ async def search_acp_conversations( """ assert limit > 0 assert limit <= 100 - return await conversation_service.search_acp_conversations( + page = await conversation_service.search_acp_conversations( page_id, limit, status, sort_order ) + if not include_skills: + page = page.model_copy( + update={ + "items": [ + trim_conversation_response_skills(item) for item in page.items + ] + } + ) + return page @conversation_router_acp.get("/count", deprecated=True) @@ -113,6 +125,7 @@ async def count_acp_conversations( ) async def get_acp_conversation( conversation_id: UUID, + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> ACPConversationInfo: """Get a conversation using the ACP-capable contract. @@ -123,12 +136,15 @@ async def get_acp_conversation( conversation = await conversation_service.get_acp_conversation(conversation_id) if conversation is None: raise HTTPException(status.HTTP_404_NOT_FOUND) + if not include_skills: + conversation = trim_conversation_response_skills(conversation) return conversation @conversation_router_acp.get("", deprecated=True) async def batch_get_acp_conversations( ids: Annotated[list[UUID], Query()], + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> list[ACPConversationInfo | None]: """Batch get conversations using the ACP-capable contract. @@ -137,7 +153,13 @@ async def batch_get_acp_conversations( Use ``/api/conversations`` instead. """ assert len(ids) < 100 - return await conversation_service.batch_get_acp_conversations(ids) + conversations = await conversation_service.batch_get_acp_conversations(ids) + if not include_skills: + return [ + trim_conversation_response_skills(c) if c is not None else None + for c in conversations + ] + return conversations @conversation_router_acp.post("", deprecated=True) @@ -147,6 +169,7 @@ async def start_acp_conversation( Body(examples=START_ACP_CONVERSATION_EXAMPLES), ], response: Response, + include_skills: Annotated[bool, Query(title=INCLUDE_SKILLS_PARAM_TITLE)] = False, conversation_service: ConversationService = Depends(get_conversation_service), ) -> ACPConversationInfo: """Start a conversation using the ACP-capable contract. @@ -157,4 +180,6 @@ async def start_acp_conversation( """ info, is_new = await conversation_service.start_acp_conversation(request) response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK + if not include_skills: + info = trim_conversation_response_skills(info) return info diff --git a/openhands-agent-server/openhands/agent_server/models.py b/openhands-agent-server/openhands/agent_server/models.py index 9c8fc871da..b681caa4c2 100644 --- a/openhands-agent-server/openhands/agent_server/models.py +++ b/openhands-agent-server/openhands/agent_server/models.py @@ -2,7 +2,7 @@ from abc import ABC from datetime import datetime -from enum import Enum +from enum import Enum, StrEnum from typing import Any, TypeAlias from uuid import UUID, uuid4 @@ -54,7 +54,7 @@ class ServerErrorEvent(Event): detail: str = Field(description="Details about the error") -class ConversationSortOrder(str, Enum): +class ConversationSortOrder(StrEnum): """Enum for conversation sorting options.""" CREATED_AT = "CREATED_AT" @@ -63,7 +63,7 @@ class ConversationSortOrder(str, Enum): UPDATED_AT_DESC = "UPDATED_AT_DESC" -class EventSortOrder(str, Enum): +class EventSortOrder(StrEnum): """Enum for event sorting options.""" TIMESTAMP = "TIMESTAMP" @@ -200,6 +200,57 @@ class ConversationPage(BaseModel): next_page_id: str | None = None +INCLUDE_SKILLS_PARAM_TITLE = ( + "Whether to include ``agent.agent_context.skills`` in the response. " + "Default ``false`` (breaking change as of this release): skills are " + "trimmed to ``[]`` on the wire because no known consumer reads them " + "from HTTP responses, and a stock agent inlines ~260 KB of skill " + "content per fetch. Pass ``true`` to opt back into the legacy " + "full-payload shape — useful only for callers that still rely on " + "``RemoteConversation.agent.agent_context.skills`` round-tripping " + "over the wire. The persisted conversation state on disk and the " + "in-memory runtime copy are untouched either way." +) + + +def trim_conversation_response_skills(info: ConversationInfo) -> ConversationInfo: + """Return ``info`` with ``agent.agent_context.skills`` set to ``[]``. + + Applied **by default** on every route that emits ``ConversationInfo`` + (search, get, batch-get, start, fork, and the deprecated ACP + equivalents). Callers that still need the legacy shape can opt in + with ``?include_skills=true``. + + The trim exists because when an ``AgentContext`` is constructed + with ``load_user_skills=True`` / ``load_public_skills=True``, its + model_validator resolves the entire skill catalog (~40 entries in + stock setups) and persists them inline. Every conversation fetch + therefore carried ~260 KB of skill content that no known client + actually reads from the HTTP response (agent-canvas, OpenHands + app-server, SDK examples all ignore the field on + ``ConversationInfo`` — they either use the in-process + ``LocalConversation`` directly or read other fields like + ``agent.llm.model``). + + The persisted ``ConversationState`` on disk and the in-memory copy + held by the agent's runtime are untouched. + + A ``model_copy`` chain is enough because ``BaseModel.model_copy`` + is shallow on default — we replace the leaf ``skills`` list with + an empty list without touching any other field. The returned + object is a fresh ``ConversationInfo`` instance; callers that + hold the input reference observe no mutation. + """ + agent_ctx = getattr(info.agent, "agent_context", None) + if agent_ctx is None or not agent_ctx.skills: + return info + trimmed_agent_context = agent_ctx.model_copy(update={"skills": []}) + trimmed_agent = info.agent.model_copy( + update={"agent_context": trimmed_agent_context} + ) + return info.model_copy(update={"agent": trimmed_agent}) + + # Deprecated compatibility aliases for the old ACP-specific response names. # Keep runtime assignment aliases so existing imports still resolve to the # canonical Pydantic models; PEP 695 ``type`` aliases would not preserve that. diff --git a/tests/agent_server/test_conversation_router_skill_trim.py b/tests/agent_server/test_conversation_router_skill_trim.py new file mode 100644 index 0000000000..b910fb1a4d --- /dev/null +++ b/tests/agent_server/test_conversation_router_skill_trim.py @@ -0,0 +1,268 @@ +"""Tests for the route-level ``agent.agent_context.skills`` trim. + +The conversation read endpoints (``GET /search``, ``GET /{id}``, +``GET ""``, ``POST ""``, ``POST /{id}/fork``) on the conversation +router strip ``agent.agent_context.skills`` from the response payload +**by default** (breaking change as of this release). Callers that +still need the legacy shape can pass ``?include_skills=true``. The +persisted ``ConversationState`` and the in-memory copy held by the +agent's runtime are unaffected — only the bytes leaving over HTTP +shrink. + +See the SDK PR description for why this lives at the route boundary +rather than inside ``AgentContext`` itself. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock +from uuid import uuid4 + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient +from pydantic import SecretStr + +from openhands.agent_server.config import Config +from openhands.agent_server.conversation_router import conversation_router +from openhands.agent_server.conversation_service import ConversationService +from openhands.agent_server.dependencies import get_conversation_service +from openhands.agent_server.models import ( + ConversationInfo, + ConversationPage, + trim_conversation_response_skills, +) +from openhands.agent_server.utils import utc_now +from openhands.sdk import LLM, Agent +from openhands.sdk.context import AgentContext +from openhands.sdk.conversation.state import ConversationExecutionStatus +from openhands.sdk.skills import Skill +from openhands.sdk.workspace import LocalWorkspace + + +def _make_skill(name: str, content: str = "skill body bytes") -> Skill: + return Skill(name=name, content=content, source=f"/fake/{name}.md") + + +def _make_conversation_with_skills(skills: list[Skill]) -> ConversationInfo: + """Build a ``ConversationInfo`` whose agent carries ``skills``. + + The full ``AgentContext`` field set is otherwise empty so the + trimmed payload reflects only the skills delta. + """ + now = utc_now() + return ConversationInfo( + id=uuid4(), + agent=Agent( + llm=LLM(model="gpt-4o", api_key=SecretStr("k"), usage_id="test-llm"), + tools=[], + agent_context=AgentContext(skills=skills), + ), + workspace=LocalWorkspace(working_dir="/tmp/test"), + execution_status=ConversationExecutionStatus.IDLE, + title="Test", + created_at=now, + updated_at=now, + ) + + +class TestTrimHelper: + """Unit tests for the pure-function helper.""" + + def test_strips_skills_when_present(self): + info = _make_conversation_with_skills( + [_make_skill("a"), _make_skill("b"), _make_skill("c")] + ) + trimmed = trim_conversation_response_skills(info) + assert trimmed.agent.agent_context is not None + assert trimmed.agent.agent_context.skills == [] + + def test_returns_same_instance_when_nothing_to_strip(self): + # Empty skill list → identity return (no needless model_copy). + info = _make_conversation_with_skills([]) + trimmed = trim_conversation_response_skills(info) + assert trimmed is info + + def test_does_not_touch_other_agent_context_fields(self): + info = _make_conversation_with_skills([_make_skill("a")]) + # Mutate a non-skill field so we can assert it survives. + assert info.agent.agent_context is not None + info = info.model_copy( + update={ + "agent": info.agent.model_copy( + update={ + "agent_context": info.agent.agent_context.model_copy( + update={"system_message_suffix": "carry me through"} + ) + } + ) + } + ) + trimmed = trim_conversation_response_skills(info) + assert trimmed.agent.agent_context is not None + assert trimmed.agent.agent_context.skills == [] + assert trimmed.agent.agent_context.system_message_suffix == "carry me through" + + def test_does_not_mutate_input(self): + info = _make_conversation_with_skills([_make_skill("a"), _make_skill("b")]) + trim_conversation_response_skills(info) + # Caller's reference still sees the full skills — model_copy + # gave us a fresh instance, the input is untouched. + assert info.agent.agent_context is not None + assert {s.name for s in info.agent.agent_context.skills} == {"a", "b"} + + def test_agent_without_agent_context_passes_through(self): + now = utc_now() + info = ConversationInfo( + id=uuid4(), + agent=Agent( + llm=LLM(model="gpt-4o", api_key=SecretStr("k"), usage_id="t"), + tools=[], + ), + workspace=LocalWorkspace(working_dir="/tmp/test"), + execution_status=ConversationExecutionStatus.IDLE, + title="Test", + created_at=now, + updated_at=now, + ) + # No agent_context at all → helper is a no-op. + assert trim_conversation_response_skills(info) is info + + +class TestRouteIntegration: + """Integration tests through the FastAPI router. + + Proves the new default-trim semantics and the + ``?include_skills=true`` opt-in escape hatch on every read + endpoint that returns a ``ConversationInfo``. + """ + + @pytest.fixture + def heavy_conversation(self): + # 5 skills with non-trivial content — enough that the trim + # is visible in the serialized JSON byte count. + return _make_conversation_with_skills( + [_make_skill(f"skill-{i}", "x" * 500) for i in range(5)] + ) + + @pytest.fixture + def client(self, heavy_conversation): + service = AsyncMock(spec=ConversationService) + service.get_conversation.return_value = heavy_conversation + service.batch_get_conversations.return_value = [heavy_conversation] + service.search_conversations.return_value = ConversationPage( + items=[heavy_conversation], next_page_id=None + ) + + app = FastAPI() + app.include_router(conversation_router, prefix="/api") + app.state.config = Config( + static_files_path=None, session_api_keys=[], secret_key=None + ) + app.dependency_overrides[get_conversation_service] = lambda: service + return TestClient(app), heavy_conversation + + # --- Default (no query param) → trimmed (breaking-change behaviour) --- + + def test_get_conversation_default_trims_skills(self, client): + """Default response trims ``agent.agent_context.skills`` to ``[]``. + + This is the breaking change: prior releases returned the full + skill list inline. Callers that read + ``conversation.agent.agent_context.skills`` (notably via + ``RemoteConversation``) now see ``[]`` unless they pass + ``?include_skills=true``. No known client (agent-canvas, + OpenHands app-server, SDK examples) reads this field from + HTTP responses, so the change is documentation + opt-in + rather than a coordinated migration. + """ + c, heavy = client + response = c.get(f"/api/conversations/{heavy.id}") + assert response.status_code == 200 + body = response.json() + assert body["agent"]["agent_context"]["skills"] == [] + + def test_batch_get_default_trims_skills(self, client): + c, heavy = client + response = c.get(f"/api/conversations?ids={heavy.id}") + assert response.status_code == 200 + body = response.json() + assert body[0]["agent"]["agent_context"]["skills"] == [] + + def test_search_default_trims_skills(self, client): + c, _heavy = client + response = c.get("/api/conversations/search") + assert response.status_code == 200 + body = response.json() + assert body["items"][0]["agent"]["agent_context"]["skills"] == [] + + # --- include_skills=false (explicit) → same as default --- + + def test_get_conversation_explicit_false_trims_skills(self, client): + c, heavy = client + response = c.get(f"/api/conversations/{heavy.id}?include_skills=false") + assert response.status_code == 200 + body = response.json() + assert body["agent"]["agent_context"]["skills"] == [] + + # --- include_skills=true → opt into legacy full payload --- + + def test_get_conversation_opt_in_includes_skills(self, client): + """Legacy opt-in. Callers that still read + ``conversation.agent.agent_context.skills`` from an HTTP + response can pass ``?include_skills=true`` to keep the prior + shape. Documented as a deprecation escape hatch, not the + steady-state path. + """ + c, heavy = client + response = c.get(f"/api/conversations/{heavy.id}?include_skills=true") + assert response.status_code == 200 + body = response.json() + assert len(body["agent"]["agent_context"]["skills"]) == 5 + + def test_batch_get_opt_in_includes_skills(self, client): + c, heavy = client + response = c.get(f"/api/conversations?ids={heavy.id}&include_skills=true") + assert response.status_code == 200 + body = response.json() + assert len(body[0]["agent"]["agent_context"]["skills"]) == 5 + + def test_search_opt_in_includes_skills(self, client): + c, _heavy = client + response = c.get("/api/conversations/search?include_skills=true") + assert response.status_code == 200 + body = response.json() + assert len(body["items"][0]["agent"]["agent_context"]["skills"]) == 5 + + def test_batch_get_handles_null_items(self): + """Missing items return ``None`` and the trim doesn't crash on them.""" + service = AsyncMock(spec=ConversationService) + service.batch_get_conversations.return_value = [None] + app = FastAPI() + app.include_router(conversation_router, prefix="/api") + app.state.config = Config( + static_files_path=None, session_api_keys=[], secret_key=None + ) + app.dependency_overrides[get_conversation_service] = lambda: service + c = TestClient(app) + response = c.get(f"/api/conversations?ids={uuid4()}") + assert response.status_code == 200 + assert response.json() == [None] + + def test_default_response_size_drops_meaningfully(self, client): + """Compare default (trimmed) vs opt-in (full) HTTP responses. + + The conversation has 5 skills × 500 chars of content. The + default response should be at least that much smaller than + the explicit ``?include_skills=true`` opt-in. + """ + c, _heavy = client + default_bytes = len(c.get("/api/conversations/search").content) + full_bytes = len(c.get("/api/conversations/search?include_skills=true").content) + # 5 × 500 chars of "x" skill content + per-skill metadata + # overhead. Conservatively require at least 1500 bytes shaved. + assert full_bytes - default_bytes > 1500, ( + f"default trim should drop ~2500 B of skill content; got " + f"{full_bytes - default_bytes} B saved " + f"(full {full_bytes} → default {default_bytes})" + )