@@ -930,13 +930,16 @@ The `elicit()` method returns an `ElicitationResult` with:
930930
931931### Sampling
932932
933- Tools can interact with LLMs through sampling (generating text):
933+ Tools can interact with LLMs through sampling (generating text). The server does
934+ not call a model directly through the SDK here; instead it sends a
935+ ` sampling/createMessage ` request to the connected client, and the client handles
936+ that request via its registered sampling callback:
934937
935938<!-- snippet-source examples/snippets/servers/sampling.py -->
936939``` python
937940from mcp.server.fastmcp import Context, FastMCP
938941from mcp.server.session import ServerSession
939- from mcp.types import SamplingMessage, TextContent
942+ from mcp.types import ModelHint, ModelPreferences, SamplingMessage, TextContent
940943
941944mcp = FastMCP(name = " Sampling Example" )
942945
@@ -954,6 +957,12 @@ async def generate_poem(topic: str, ctx: Context[ServerSession, None]) -> str:
954957 )
955958 ],
956959 max_tokens = 100 ,
960+ model_preferences = ModelPreferences(
961+ hints = [ModelHint(name = " claude-3" )],
962+ intelligence_priority = 0.8 ,
963+ speed_priority = 0.2 ,
964+ ),
965+ include_context = " thisServer" ,
957966 )
958967
959968 # Since we're not passing tools param, result.content is single content
@@ -965,6 +974,23 @@ async def generate_poem(topic: str, ctx: Context[ServerSession, None]) -> str:
965974_ Full example: [ examples/snippets/servers/sampling.py] ( https://github.com/modelcontextprotocol/python-sdk/blob/main/examples/snippets/servers/sampling.py ) _
966975<!-- /snippet-source -->
967976
977+ Sampling requests are routed through the client:
978+
979+ - ` ctx.session.create_message(...) ` asks the MCP client to perform sampling with
980+ whatever LLMs the client has available.
981+ - On the client side, handle that request with ` sampling_callback ` on
982+ ` ClientSession ` or ` Client ` .
983+ - ` model_preferences ` is advisory only. Use ` hints ` for model-name preferences
984+ and the numeric priorities to express tradeoffs such as speed vs.
985+ intelligence.
986+ - ` include_context ` is also advisory and can be ` "none" ` , ` "thisServer" ` , or
987+ ` "allServers" ` . It only has an effect if the client advertises
988+ ` sampling.context ` capability.
989+ - ` RequestContext ` /` ClientRequestContext ` is callback metadata, not prompt
990+ context. It gives your callback access to the client session plus request
991+ metadata such as ` request_id ` and ` meta ` ; the actual prompt payload is in
992+ ` CreateMessageRequestParams ` .
993+
968994### Logging and Notifications
969995
970996Tools can send logs and notifications through the context:
@@ -2160,8 +2186,8 @@ import os
21602186from pydantic import AnyUrl
21612187
21622188from mcp import ClientSession, StdioServerParameters, types
2189+ from mcp.client.context import ClientRequestContext
21632190from mcp.client.stdio import stdio_client
2164- from mcp.shared.context import RequestContext
21652191
21662192# Create server parameters for stdio connection
21672193server_params = StdioServerParameters(
@@ -2173,17 +2199,19 @@ server_params = StdioServerParameters(
21732199
21742200# Optional: create a sampling callback
21752201async def handle_sampling_message (
2176- context : RequestContext[ClientSession, None ] , params : types.CreateMessageRequestParams
2202+ context : ClientRequestContext , params : types.CreateMessageRequestParams
21772203) -> types.CreateMessageResult:
21782204 print (f " Sampling request: { params.messages} " )
2205+ print (f " Requested model preferences: { params.model_preferences} " )
2206+ print (f " Requested include_context: { params.include_context} " )
21792207 return types.CreateMessageResult(
21802208 role = " assistant" ,
21812209 content = types.TextContent(
21822210 type = " text" ,
21832211 text = " Hello, world! from model" ,
21842212 ),
21852213 model = " gpt-3.5-turbo" ,
2186- stopReason = " endTurn" ,
2214+ stop_reason = " endTurn" ,
21872215 )
21882216
21892217
0 commit comments