From 13417aa3736d9419327993cbcb4b72f4f830eb5d Mon Sep 17 00:00:00 2001
From: mukunda katta <mukunda.vjcs6@gmail.com>
Date: Tue, 21 Apr 2026 08:17:47 -0700
Subject: [PATCH] docs(sampling): clarify client-side sampling context

---
 README.md                                 | 38 ++++++++++++++++++++---
 examples/snippets/clients/stdio_client.py |  2 ++
 examples/snippets/servers/sampling.py     |  8 ++++-
 3 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 487d48bee..223131709 100644
--- a/README.md
+++ b/README.md
@@ -930,13 +930,16 @@ The `elicit()` method returns an `ElicitationResult` with:
 
 ### Sampling
 
-Tools can interact with LLMs through sampling (generating text):
+Tools can interact with LLMs through sampling (generating text). The server does
+not call a model directly through the SDK here; instead it sends a
+`sampling/createMessage` request to the connected client, and the client handles
+that request via its registered sampling callback:
 
 <!-- snippet-source examples/snippets/servers/sampling.py -->
 ```python
 from mcp.server.fastmcp import Context, FastMCP
 from mcp.server.session import ServerSession
-from mcp.types import SamplingMessage, TextContent
+from mcp.types import ModelHint, ModelPreferences, SamplingMessage, TextContent
 
 mcp = FastMCP(name="Sampling Example")
 
@@ -954,6 +957,12 @@ async def generate_poem(topic: str, ctx: Context[ServerSession, None]) -> str:
             )
         ],
         max_tokens=100,
+        model_preferences=ModelPreferences(
+            hints=[ModelHint(name="claude-3")],
+            intelligence_priority=0.8,
+            speed_priority=0.2,
+        ),
+        include_context="thisServer",
     )
 
     # Since we're not passing tools param, result.content is single content
@@ -965,6 +974,23 @@ async def generate_poem(topic: str, ctx: Context[ServerSession, None]) -> str:
 _Full example: [examples/snippets/servers/sampling.py](https://github.com/modelcontextprotocol/python-sdk/blob/main/examples/snippets/servers/sampling.py)_
 <!-- /snippet-source -->
 
+Sampling requests are routed through the client:
+
+- `ctx.session.create_message(...)` asks the MCP client to perform sampling with
+  whatever LLMs the client has available.
+- On the client side, handle that request with `sampling_callback` on
+  `ClientSession` or `Client`.
+- `model_preferences` is advisory only. Use `hints` for model-name preferences
+  and the numeric priorities to express tradeoffs such as speed vs.
+  intelligence.
+- `include_context` is also advisory and can be `"none"`, `"thisServer"`, or
+  `"allServers"`. It only has an effect if the client advertises
+  `sampling.context` capability.
+- `RequestContext`/`ClientRequestContext` is callback metadata, not prompt
+  context. It gives your callback access to the client session plus request
+  metadata such as `request_id` and `meta`; the actual prompt payload is in
+  `CreateMessageRequestParams`.
+
 ### Logging and Notifications
 
 Tools can send logs and notifications through the context:
@@ -2160,8 +2186,8 @@ import os
 from pydantic import AnyUrl
 
 from mcp import ClientSession, StdioServerParameters, types
+from mcp.client.context import ClientRequestContext
 from mcp.client.stdio import stdio_client
-from mcp.shared.context import RequestContext
 
 # Create server parameters for stdio connection
 server_params = StdioServerParameters(
@@ -2173,9 +2199,11 @@ server_params = StdioServerParameters(
 
 # Optional: create a sampling callback
 async def handle_sampling_message(
-    context: RequestContext[ClientSession, None], params: types.CreateMessageRequestParams
+    context: ClientRequestContext, params: types.CreateMessageRequestParams
 ) -> types.CreateMessageResult:
     print(f"Sampling request: {params.messages}")
+    print(f"Requested model preferences: {params.model_preferences}")
+    print(f"Requested include_context: {params.include_context}")
     return types.CreateMessageResult(
         role="assistant",
         content=types.TextContent(
@@ -2183,7 +2211,7 @@ async def handle_sampling_message(
             text="Hello, world! from model",
         ),
         model="gpt-3.5-turbo",
-        stopReason="endTurn",
+        stop_reason="endTurn",
     )
 
 
diff --git a/examples/snippets/clients/stdio_client.py b/examples/snippets/clients/stdio_client.py
index c1f85f42a..b65f06715 100644
--- a/examples/snippets/clients/stdio_client.py
+++ b/examples/snippets/clients/stdio_client.py
@@ -22,6 +22,8 @@ async def handle_sampling_message(
     context: ClientRequestContext, params: types.CreateMessageRequestParams
 ) -> types.CreateMessageResult:
     print(f"Sampling request: {params.messages}")
+    print(f"Requested model preferences: {params.model_preferences}")
+    print(f"Requested include_context: {params.include_context}")
     return types.CreateMessageResult(
         role="assistant",
         content=types.TextContent(
diff --git a/examples/snippets/servers/sampling.py b/examples/snippets/servers/sampling.py
index 43259589a..2f0086102 100644
--- a/examples/snippets/servers/sampling.py
+++ b/examples/snippets/servers/sampling.py
@@ -1,5 +1,5 @@
 from mcp.server.mcpserver import Context, MCPServer
-from mcp.types import SamplingMessage, TextContent
+from mcp.types import ModelHint, ModelPreferences, SamplingMessage, TextContent
 
 mcp = MCPServer(name="Sampling Example")
 
@@ -17,6 +17,12 @@ async def generate_poem(topic: str, ctx: Context) -> str:
             )
         ],
         max_tokens=100,
+        model_preferences=ModelPreferences(
+            hints=[ModelHint(name="claude-3")],
+            intelligence_priority=0.8,
+            speed_priority=0.2,
+        ),
+        include_context="thisServer",
     )
 
     # Since we're not passing tools param, result.content is single content