trpc-agent-python/examples/fastapi_server/_app.py at main · trpc-group/trpc-agent-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# Tencent is pleased to support the open source community by making tRPC-Agent-Python available.
#
# Copyright (C) 2026 Tencent. All rights reserved.
#
# tRPC-Agent-Python is licensed under Apache-2.0.
"""FastAPI application factory and server entry point.

Usage (script)::

    # In examples/fastapi_server/
    python3 run_server.py --model_key sk-... --model_url https://api.openai.com/v1 --port 8080

Usage (programmatic)::

    from _app import RunnerManager, create_app
    import uvicorn

    manager = RunnerManager(app_name="my-app", model_key="sk-...",
                            model_url="https://api.openai.com/v1",
                            model_name="gpt-4o-mini")
    app = create_app(manager)
    uvicorn.run(app, host="0.0.0.0", port=8080)

Endpoints
---------
GET  /health            - liveness check.
POST /v1/chat           - synchronous, returns full reply in one response.
POST /v1/chat/stream    - SSE streaming, yields chunks as they arrive.
"""

from __future__ import annotations

from contextlib import asynccontextmanager
from typing import AsyncGenerator
from typing import Optional

import uvicorn
from _runner_manager import RunnerManager
from _schemas import ChatRequest
from _schemas import ChatResponse
from _schemas import HealthResponse
from _schemas import StreamChunk
from _schemas import ToolEvent
from fastapi import FastAPI
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from trpc_agent_sdk.log import logger
from trpc_agent_sdk.types import Content
from trpc_agent_sdk.types import Part


def create_app(manager: RunnerManager) -> FastAPI:
    """Build and return a configured FastAPI application.

    Args:
        manager: A fully initialized :class:`RunnerManager` that will be
            shared across all requests for the lifetime of the server.

    Returns:
        A :class:`fastapi.FastAPI` instance ready to be served by uvicorn.
    """

    @asynccontextmanager
    async def _lifespan(app: FastAPI):  # noqa: ARG001
        """Startup / shutdown hook: close the runner on exit."""
        logger.info("TRPC Agent FastAPI server starting up.")
        yield
        logger.info("TRPC Agent FastAPI server shutting down.")
        await manager.close()

    app = FastAPI(
        title="TRPC Agent Server",
        description="HTTP API for TRPC Agent",
        version="1.0.0",
        lifespan=_lifespan,
    )

    # ------------------------------------------------------------------
    # GET /health
    # ------------------------------------------------------------------

    @app.get("/health", response_model=HealthResponse, tags=["meta"])
    async def health() -> HealthResponse:
        """Liveness check - always returns 200 while the server is up."""
        return HealthResponse(app_name=manager.app_name)

    # ------------------------------------------------------------------
    # POST /v1/chat  (synchronous, full response)
    # ------------------------------------------------------------------

    @app.post("/v1/chat", response_model=ChatResponse, tags=["chat"])
    async def chat(req: ChatRequest) -> ChatResponse:  # pylint: disable=unused-variable
        """Send a message to the agent and receive the complete reply.

        If ``session_id`` is omitted, a new session is created automatically.
        Pass the returned ``session_id`` in follow-up requests to continue the
        same conversation.
        """
        session_id = req.session_id or manager.new_session_id()
        user_content = Content(parts=[Part.from_text(text=req.message)])

        reply_parts: list[str] = []
        tool_events: list[ToolEvent] = []

        try:
            async for event in manager.runner.run_async(
                    user_id=req.user_id,
                    session_id=session_id,
                    new_message=user_content,
            ):
                if not event.content or not event.content.parts:
                    continue

                for part in event.content.parts:
                    if part.thought:
                        # Internal reasoning steps - not surfaced to the caller.
                        continue

                    if part.text:
                        reply_parts.append(part.text)

                    elif part.function_call:
                        tool_events.append(
                            ToolEvent(
                                type="tool_call",
                                name=part.function_call.name,
                                data=dict(part.function_call.args or {}),
                            ))

                    elif part.function_response:
                        tool_events.append(
                            ToolEvent(
                                type="tool_result",
                                name=part.function_response.name,
                                data=part.function_response.response,
                            ))

        except Exception as exc:
            logger.exception("Error during agent run (session=%s)", session_id)
            raise HTTPException(status_code=500, detail=str(exc)) from exc

        return ChatResponse(
            session_id=session_id,
            user_id=req.user_id,
            reply="".join(reply_parts),
            tool_events=tool_events,
        )

    # ------------------------------------------------------------------
    # POST /v1/chat/stream  (SSE streaming)
    # ------------------------------------------------------------------

    @app.post("/v1/chat/stream", tags=["chat"])
    async def chat_stream(req: ChatRequest) -> StreamingResponse:  # pylint: disable=unused-variable
        """Send a message and receive the agent reply as a Server-Sent Events stream.

        Each SSE event carries a JSON-serialized :class:`StreamChunk`.
        The stream is terminated by a ``done`` chunk (or an ``error`` chunk on failure).

        Example SSE payload::

            data: {"type":"text_delta","data":"Hello","session_id":"abc"}

            data: {"type":"done","data":null,"session_id":"abc"}
        """
        session_id = req.session_id or manager.new_session_id()
        user_content = Content(parts=[Part.from_text(text=req.message)])

        async def _event_generator() -> AsyncGenerator[str, None]:
            try:
                async for event in manager.runner.run_async(
                        user_id=req.user_id,
                        session_id=session_id,
                        new_message=user_content,
                ):
                    if not event.content or not event.content.parts:
                        continue

                    for part in event.content.parts:
                        if part.thought:
                            continue

                        if part.text:
                            yield _sse(StreamChunk(
                                type="text_delta",
                                data=part.text,
                                session_id=session_id,
                            ))

                        elif part.function_call:
                            yield _sse(
                                StreamChunk(
                                    type="tool_call",
                                    data={
                                        "name": part.function_call.name,
                                        "args": dict(part.function_call.args or {}),
                                    },
                                    session_id=session_id,
                                ))

                        elif part.function_response:
                            yield _sse(
                                StreamChunk(
                                    type="tool_result",
                                    data={
                                        "name": part.function_response.name,
                                        "response": part.function_response.response,
                                    },
                                    session_id=session_id,
                                ))

                # Signal normal completion.
                yield _sse(StreamChunk(type="done", session_id=session_id))

            except Exception as exc:
                logger.exception("Error during streaming run (session=%s)", session_id)
                yield _sse(StreamChunk(type="error", data=str(exc), session_id=session_id))

        return StreamingResponse(
            _event_generator(),
            media_type="text/event-stream",
            headers={
                "Cache-Control": "no-cache",
                # Disable nginx/proxy buffering so chunks arrive in real time.
                "X-Accel-Buffering": "no",
            },
        )

    return app


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _sse(chunk: StreamChunk) -> str:
    """Serialize *chunk* as a single SSE ``data:`` line."""
    return f"data: {chunk.model_dump_json()}\n\n"


# ---------------------------------------------------------------------------
# Server entry point (called by the CLI)
# ---------------------------------------------------------------------------


def run_server(
    app_name: str,
    model_key: str,
    model_url: Optional[str],
    model_name: str,
    host: str,
    port: int,
    agent_module: Optional[str] = None,
    instruction: Optional[str] = None,
) -> None:
    """Build the RunnerManager, create the FastAPI app, and start uvicorn.

    Args:
        app_name:     Logical name of this agent application.
        model_key:    API key for the LLM provider.
        model_url:    Base URL of the LLM API endpoint.
        model_name:   Model identifier (e.g. ``gpt-4o-mini``).
        host:         Network interface to bind (e.g. ``0.0.0.0``).
        port:         TCP port to listen on.
        agent_module: Optional Python module path that exports ``root_agent``
                      or ``create_agent()``.  When ``None``, a default assistant
                      agent is created from the provided model credentials.
        instruction:  Optional system instruction override for the default agent.
    """
    manager = RunnerManager(
        app_name=app_name,
        model_key=model_key,
        model_url=model_url or "",
        model_name=model_name,
        agent_module=agent_module,
        instruction=instruction,
    )
    app = create_app(manager)

    logger.info("Starting TRPC Agent FastAPI server on %s:%d", host, port)
    uvicorn.run(app, host=host, port=port)