posit-dev · cpsievert · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### New features
 
 * chatlas is now instrumented with [OpenTelemetry](https://opentelemetry.io/) (OTel) out of the box, making it much easier to see how your app behaves in production — where time goes, how many tokens you're spending, which tools run, and where things fail. Without writing any tracing code, you get spans that capture the full structure of a conversation as one connected trace: an `invoke_agent` span over the whole chat loop, a `chat` span per model call, and an `execute_tool` span per tool invocation, with attributes (token usage, response model/ID, tool errors) that follow the [OTel GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/). Because chatlas keeps its spans active during each call, HTTP spans from provider instrumentors and any spans your own tools emit nest underneath automatically. Point it at any OTel-compatible backend (Logfire, Datadog, Honeycomb, Jaeger, …); message content is omitted by default and opt-in via `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true`. See the [monitoring guide](https://posit-dev.github.io/chatlas/get-started/monitor.html) to get started. (#310)
+* Web search and fetch results now surface their citations across all three providers (OpenAI, Anthropic, Google), both progressively during streaming and on the final turn:
+  * When streaming with `content="all"`, `ContentCitation` objects are emitted as citations arrive — interleaved with text for OpenAI and Anthropic, at stream-end for Google. `ContentCitation` carries `url` and optional `title`; its position in the stream (relative to surrounding text) is the placement signal for rendering footnote markers.
+  * On the final turn, `ContentCitation` items appear in the turn's `contents` list after the `ContentText` they ground, in the same order as during streaming. `ContentCitation` and `Source` are exported from `chatlas.types`.
+  * `ContentToolResponseFetch` gained a normalized `status` field, and web search results are now richer `Source` objects (see Breaking changes).
 * `Chat` gains a `model` property to get (or set) the model after the chat is created. Setting it does not validate the model name.
 * `ChatGoogle()`'s `reasoning` parameter now accepts a string thinking level (`"minimal"`, `"low"`, `"medium"`, or `"high"`) in addition to an integer token budget.
 * `ChatAnthropic()`'s `reasoning` parameter now accepts a string effort level (`"low"`, `"medium"`, `"high"`, `"xhigh"`, or `"max"`) to enable Claude's adaptive thinking, in addition to an integer token budget.
@@ -20,6 +24,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 * OpenAI-compatible providers (e.g., `ChatOllama()` with models like qwen3) now capture thinking content returned in a `reasoning` field, not just `reasoning_content`. Previously this thinking content was silently dropped.
 
+### Breaking changes
+
+* `ContentToolResponseSearch.urls` (a `list[str]`) has been replaced by `.sources` (a `list[Source]`), where each `Source` carries the result's `url`, `title`, and `domain`. Code reading `.urls` should switch to `[s.url for s in x.sources]`.
+
 ## [0.18.1] - 2026-05-21
 
 ### Improvements

diff --git a/chatlas/_chat.py b/chatlas/_chat.py
@@ -23,6 +23,7 @@
     Optional,
     Sequence,
     TypeVar,
+    Union,
     cast,
     overload,
 )
@@ -32,10 +33,16 @@
 from ._callbacks import CallbackManager
 from ._content import (
     Content,
+    ContentCitation,
     ContentJson,
     ContentText,
+    ContentThinking,
     ContentThinkingDelta,
     ContentToolRequest,
+    ContentToolRequestFetch,
+    ContentToolRequestSearch,
+    ContentToolResponseFetch,
+    ContentToolResponseSearch,
     ContentToolResult,
     ToolInfo,
 )
@@ -95,6 +102,20 @@ class TokensDict(TypedDict):
 
 EchoOptions = Literal["output", "all", "none", "text"]
 
+# The values yielded by `.stream()`/`.stream_async()`. Plain text is always
+# yielded; the richer content objects only appear when `content="all"`.
+StreamedContent = Union[
+    str,
+    ContentThinkingDelta,
+    ContentToolRequest,
+    ContentToolResult,
+    ContentToolRequestSearch,
+    ContentToolResponseSearch,
+    ContentToolRequestFetch,
+    ContentToolResponseFetch,
+    ContentCitation,
+]
+
 T = TypeVar("T")
 BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
 
@@ -103,6 +124,14 @@ def is_present(value: T | None | MISSING_TYPE) -> TypeGuard[T]:
     return value is not None and not isinstance(value, MISSING_TYPE)
 
 
+def _display_text(content: "Content") -> "Optional[str]":
+    if isinstance(content, ContentText):
+        return content.text
+    if isinstance(content, (ContentThinking, ContentThinkingDelta)):
+        return content.thinking
+    return None
+
+
 class Chat(Generic[SubmitInputArgsT, CompletionT]):
     """
     A chat object that can be used to interact with a language model.
@@ -1210,9 +1239,7 @@ def stream(
         data_model: Optional[type[BaseModel]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
         controller: StreamController | None = None,
-    ) -> Generator[
-        str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
-    ]: ...
+    ) -> Generator[StreamedContent, None, None]: ...
 
     def stream(
         self,
@@ -1222,9 +1249,7 @@ def stream(
         data_model: Optional[type[BaseModel]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
         controller: StreamController | None = None,
-    ) -> Generator[
-        str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
-    ]:
+    ) -> Generator[StreamedContent, None, None]:
         """
         Generate a response from the chat in a streaming fashion.
 
@@ -1298,11 +1323,7 @@ class Person(BaseModel):
             controller=controller,
         )
 
-        def wrapper() -> Generator[
-            str | ContentThinkingDelta | ContentToolRequest | ContentToolResult,
-            None,
-            None,
-        ]:
+        def wrapper() -> Generator[StreamedContent, None, None]:
             with display:
                 for chunk in generator:
                     yield chunk
@@ -1329,9 +1350,7 @@ async def stream_async(
         data_model: Optional[type[BaseModel]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
         controller: StreamController | None = None,
-    ) -> AsyncGenerator[
-        str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
-    ]: ...
+    ) -> AsyncGenerator[StreamedContent, None]: ...
 
     async def stream_async(
         self,
@@ -1341,9 +1360,7 @@ async def stream_async(
         data_model: Optional[type[BaseModel]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
         controller: StreamController | None = None,
-    ) -> AsyncGenerator[
-        str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
-    ]:
+    ) -> AsyncGenerator[StreamedContent, None]:
         """
         Generate a response from the chat in a streaming fashion asynchronously.
 
@@ -1422,9 +1439,7 @@ class Person(BaseModel):
             controller=controller,
         )
 
-        async def wrapper() -> AsyncGenerator[
-            str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
-        ]:
+        async def wrapper() -> AsyncGenerator[StreamedContent, None]:
             try:
                 with display:
                     async for chunk in generator:
@@ -2586,9 +2601,7 @@ def _chat_impl(
         data_model: Optional[type[BaseModel]] = None,
         *,
         controller: StreamController,
-    ) -> Generator[
-        str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
-    ]: ...
+    ) -> Generator[StreamedContent, None, None]: ...
 
     def _chat_impl(
         self,
@@ -2675,9 +2688,7 @@ def _chat_impl_async(
         data_model: Optional[type[BaseModel]] = None,
         *,
         controller: StreamController,
-    ) -> AsyncGenerator[
-        str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
-    ]: ...
+    ) -> AsyncGenerator[StreamedContent, None]: ...
 
     async def _chat_impl_async(
         self,
@@ -2834,11 +2845,9 @@ def emit(text: str | Content):
                             break
                         if controller.cancelled:
                             break
-                        content = self.provider.stream_content(chunk)
-                        if content is not None:
-                            text = self.provider.stream_text(chunk)
+                        for content in self.provider.stream_content(chunk):
                             yield from acc.process_content(
-                                content, text, content_mode, emit
+                                content, _display_text(content), content_mode, emit
                             )
                         result = self.provider.stream_merge_chunks(result, chunk)
 
@@ -2972,11 +2981,9 @@ def emit(text: str | Content):
                             break
                         if controller.cancelled:
                             break
-                        content = self.provider.stream_content(chunk)
-                        if content is not None:
-                            text = self.provider.stream_text(chunk)
+                        for content in self.provider.stream_content(chunk):
                             for item in acc.process_content(
-                                content, text, content_mode, emit
+                                content, _display_text(content), content_mode, emit
                             ):
                                 yield item
                         result = self.provider.stream_merge_chunks(result, chunk)

diff --git a/chatlas/_content.py b/chatlas/_content.py
@@ -146,6 +146,7 @@ def from_tool(cls, tool: "Tool | ToolBuiltIn") -> "ToolInfo":
     "web_search_results",
     "web_fetch_request",
     "web_fetch_results",
+    "citation",
 ]
 """
 A discriminated union of all content types.
@@ -170,6 +171,14 @@ def _repr_markdown_(self):
         return self.__str__()
 
 
+class Source(BaseModel):
+    """A page surfaced by a web search (not necessarily cited in the answer)."""
+
+    url: str
+    title: Optional[str] = None
+    domain: Optional[str] = None
+
+
 class ContentText(Content):
     """
     Text content for a [](`~chatlas.Turn`)
@@ -668,9 +677,7 @@ class ContentThinking(Content):
 
     @field_serializer("extra")
     @classmethod
-    def serialize_extra(
-        cls, v: Optional[dict[str, Any]]
-    ) -> Optional[dict[str, Any]]:
+    def serialize_extra(cls, v: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]:
         if v is None:
             return None
         return serialize_dict_with_bytes(v)
@@ -775,20 +782,20 @@ class ContentToolResponseSearch(Content):
 
     Parameters
     ----------
-    urls
-        The URLs returned by the search.
+    sources
+        The pages surfaced by the search.
     extra
         The raw provider-specific response data.
     """
 
-    urls: list[str]
+    sources: list[Source]
     extra: Optional[dict[str, Any]] = None
 
     content_type: ContentTypeEnum = "web_search_results"
 
     def __str__(self):
-        url_list = "\n".join(f"* {url}" for url in self.urls)
-        return f"[web search results]:\n{url_list}"
+        lines = "\n".join(f"* {s.url}" for s in self.sources)
+        return f"[web search results]:\n{lines}"
 
 
 class ContentToolRequestFetch(Content):
@@ -826,11 +833,18 @@ class ContentToolResponseFetch(Content):
     ----------
     url
         The URL that was fetched.
+    status
+        A normalized, cross-provider outcome: ``"success"`` if content was
+        retrieved, ``"error"`` if it was not, or ``None`` when the provider
+        doesn't report an outcome. Providers expose finer-grained, non-aligned
+        reasons (e.g. Anthropic's ``url_not_allowed``, Google's ``PAYWALL``);
+        those are not normalized here but remain available in ``extra``.
     extra
         The raw provider-specific response data.
     """
 
     url: str
+    status: Optional[Literal["success", "error"]] = None
     extra: Optional[dict[str, Any]] = None
 
     content_type: ContentTypeEnum = "web_fetch_results"
@@ -839,6 +853,23 @@ def __str__(self):
         return f"[web fetch result]: {self.url}"
 
 
+class ContentCitation(Content):
+    """
+    A citation emitted during streaming and stored on the final turn.
+
+    Position in the turn's contents list (relative to surrounding
+    ``ContentText`` items) is the placement signal: a consumer renders
+    a citation marker at the text accumulated so far.
+    """
+
+    url: str
+    title: Optional[str] = None
+    content_type: ContentTypeEnum = "citation"
+
+    def __str__(self) -> str:
+        return f"[citation]: {self.url}"
+
+
 ContentUnion = Union[
     ContentText,
     ContentImageRemote,
@@ -852,6 +883,7 @@ def __str__(self):
     ContentToolResponseSearch,
     ContentToolRequestFetch,
     ContentToolResponseFetch,
+    ContentCitation,
 ]
 
 
@@ -917,6 +949,8 @@ def create_content(data: dict[str, Any]) -> ContentUnion:
         return ContentToolRequestFetch.model_validate(data)
     elif ct == "web_fetch_results":
         return ContentToolResponseFetch.model_validate(data)
+    elif ct == "citation":
+        return ContentCitation.model_validate(data)
     else:
         raise ValueError(f"Unknown content type: {ct}")
 

diff --git a/chatlas/_provider.py b/chatlas/_provider.py
@@ -9,13 +9,14 @@
     Iterable,
     Literal,
     Optional,
+    Sequence,
     TypeVar,
     overload,
 )
 
 from pydantic import BaseModel
 
-from ._content import Content, ContentText, ContentThinking
+from ._content import Content
 from ._tools import Tool, ToolBuiltIn
 from ._turn import AssistantTurn, Turn
 from ._typing_extensions import NotRequired, TypedDict
@@ -230,17 +231,7 @@ async def chat_perform_async(
     ) -> AsyncIterable[ChatCompletionChunkT] | ChatCompletionT: ...
 
     @abstractmethod
-    def stream_content(self, chunk: ChatCompletionChunkT) -> Optional["Content"]: ...
-
-    def stream_text(self, chunk: ChatCompletionChunkT) -> Optional[str]:
-        content = self.stream_content(chunk)
-        if content is None:
-            return None
-        if isinstance(content, ContentThinking):
-            return content.thinking
-        if isinstance(content, ContentText):
-            return content.text
-        return str(content)
+    def stream_content(self, chunk: ChatCompletionChunkT) -> "Sequence[Content]": ...
 
     @abstractmethod
     def stream_merge_chunks(