Skip to content

Commit 13417aa

Browse files
committed
docs(sampling): clarify client-side sampling context
1 parent 3d7b311 commit 13417aa

File tree

3 files changed

+42
-6
lines changed

3 files changed

+42
-6
lines changed

README.md

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -930,13 +930,16 @@ The `elicit()` method returns an `ElicitationResult` with:
930930

931931
### Sampling
932932

933-
Tools can interact with LLMs through sampling (generating text):
933+
Tools can interact with LLMs through sampling (generating text). The server does
934+
not call a model directly through the SDK here; instead it sends a
935+
`sampling/createMessage` request to the connected client, and the client handles
936+
that request via its registered sampling callback:
934937

935938
<!-- snippet-source examples/snippets/servers/sampling.py -->
936939
```python
937940
from mcp.server.fastmcp import Context, FastMCP
938941
from mcp.server.session import ServerSession
939-
from mcp.types import SamplingMessage, TextContent
942+
from mcp.types import ModelHint, ModelPreferences, SamplingMessage, TextContent
940943

941944
mcp = FastMCP(name="Sampling Example")
942945

@@ -954,6 +957,12 @@ async def generate_poem(topic: str, ctx: Context[ServerSession, None]) -> str:
954957
)
955958
],
956959
max_tokens=100,
960+
model_preferences=ModelPreferences(
961+
hints=[ModelHint(name="claude-3")],
962+
intelligence_priority=0.8,
963+
speed_priority=0.2,
964+
),
965+
include_context="thisServer",
957966
)
958967

959968
# Since we're not passing tools param, result.content is single content
@@ -965,6 +974,23 @@ async def generate_poem(topic: str, ctx: Context[ServerSession, None]) -> str:
965974
_Full example: [examples/snippets/servers/sampling.py](https://github.com/modelcontextprotocol/python-sdk/blob/main/examples/snippets/servers/sampling.py)_
966975
<!-- /snippet-source -->
967976

977+
Sampling requests are routed through the client:
978+
979+
- `ctx.session.create_message(...)` asks the MCP client to perform sampling with
980+
whatever LLMs the client has available.
981+
- On the client side, handle that request with `sampling_callback` on
982+
`ClientSession` or `Client`.
983+
- `model_preferences` is advisory only. Use `hints` for model-name preferences
984+
and the numeric priorities to express tradeoffs such as speed vs.
985+
intelligence.
986+
- `include_context` is also advisory and can be `"none"`, `"thisServer"`, or
987+
`"allServers"`. It only has an effect if the client advertises
988+
`sampling.context` capability.
989+
- `RequestContext`/`ClientRequestContext` is callback metadata, not prompt
990+
context. It gives your callback access to the client session plus request
991+
metadata such as `request_id` and `meta`; the actual prompt payload is in
992+
`CreateMessageRequestParams`.
993+
968994
### Logging and Notifications
969995

970996
Tools can send logs and notifications through the context:
@@ -2160,8 +2186,8 @@ import os
21602186
from pydantic import AnyUrl
21612187

21622188
from mcp import ClientSession, StdioServerParameters, types
2189+
from mcp.client.context import ClientRequestContext
21632190
from mcp.client.stdio import stdio_client
2164-
from mcp.shared.context import RequestContext
21652191

21662192
# Create server parameters for stdio connection
21672193
server_params = StdioServerParameters(
@@ -2173,17 +2199,19 @@ server_params = StdioServerParameters(
21732199

21742200
# Optional: create a sampling callback
21752201
async def handle_sampling_message(
2176-
context: RequestContext[ClientSession, None], params: types.CreateMessageRequestParams
2202+
context: ClientRequestContext, params: types.CreateMessageRequestParams
21772203
) -> types.CreateMessageResult:
21782204
print(f"Sampling request: {params.messages}")
2205+
print(f"Requested model preferences: {params.model_preferences}")
2206+
print(f"Requested include_context: {params.include_context}")
21792207
return types.CreateMessageResult(
21802208
role="assistant",
21812209
content=types.TextContent(
21822210
type="text",
21832211
text="Hello, world! from model",
21842212
),
21852213
model="gpt-3.5-turbo",
2186-
stopReason="endTurn",
2214+
stop_reason="endTurn",
21872215
)
21882216

21892217

examples/snippets/clients/stdio_client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ async def handle_sampling_message(
2222
context: ClientRequestContext, params: types.CreateMessageRequestParams
2323
) -> types.CreateMessageResult:
2424
print(f"Sampling request: {params.messages}")
25+
print(f"Requested model preferences: {params.model_preferences}")
26+
print(f"Requested include_context: {params.include_context}")
2527
return types.CreateMessageResult(
2628
role="assistant",
2729
content=types.TextContent(

examples/snippets/servers/sampling.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from mcp.server.mcpserver import Context, MCPServer
2-
from mcp.types import SamplingMessage, TextContent
2+
from mcp.types import ModelHint, ModelPreferences, SamplingMessage, TextContent
33

44
mcp = MCPServer(name="Sampling Example")
55

@@ -17,6 +17,12 @@ async def generate_poem(topic: str, ctx: Context) -> str:
1717
)
1818
],
1919
max_tokens=100,
20+
model_preferences=ModelPreferences(
21+
hints=[ModelHint(name="claude-3")],
22+
intelligence_priority=0.8,
23+
speed_priority=0.2,
24+
),
25+
include_context="thisServer",
2026
)
2127

2228
# Since we're not passing tools param, result.content is single content

0 commit comments

Comments
 (0)