Skip to content
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### New features

* chatlas is now instrumented with [OpenTelemetry](https://opentelemetry.io/) (OTel) out of the box, making it much easier to see how your app behaves in production — where time goes, how many tokens you're spending, which tools run, and where things fail. Without writing any tracing code, you get spans that capture the full structure of a conversation as one connected trace: an `invoke_agent` span over the whole chat loop, a `chat` span per model call, and an `execute_tool` span per tool invocation, with attributes (token usage, response model/ID, tool errors) that follow the [OTel GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/). Because chatlas keeps its spans active during each call, HTTP spans from provider instrumentors and any spans your own tools emit nest underneath automatically. Point it at any OTel-compatible backend (Logfire, Datadog, Honeycomb, Jaeger, …); message content is omitted by default and opt-in via `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true`. See the [monitoring guide](https://posit-dev.github.io/chatlas/get-started/monitor.html) to get started. (#310)
* Web search and fetch results now surface their citations across all three providers (OpenAI, Anthropic, Google), both progressively during streaming and on the final turn:
* When streaming with `content="all"`, `ContentCitation` objects are emitted as citations arrive — interleaved with text for OpenAI and Anthropic, at stream-end for Google. `ContentCitation` carries `url` and optional `title`; its position in the stream (relative to surrounding text) is the placement signal for rendering footnote markers.
* On the final turn, `ContentCitation` items appear in the turn's `contents` list after the `ContentText` they ground, in the same order as during streaming. `ContentCitation` and `Source` are exported from `chatlas.types`.
* `ContentToolResponseFetch` gained a normalized `status` field, and web search results are now richer `Source` objects (see Breaking changes).
* `Chat` gains a `model` property to get (or set) the model after the chat is created. Setting it does not validate the model name.
* `ChatGoogle()`'s `reasoning` parameter now accepts a string thinking level (`"minimal"`, `"low"`, `"medium"`, or `"high"`) in addition to an integer token budget.
* `ChatAnthropic()`'s `reasoning` parameter now accepts a string effort level (`"low"`, `"medium"`, `"high"`, `"xhigh"`, or `"max"`) to enable Claude's adaptive thinking, in addition to an integer token budget.
Expand All @@ -20,6 +24,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

* OpenAI-compatible providers (e.g., `ChatOllama()` with models like qwen3) now capture thinking content returned in a `reasoning` field, not just `reasoning_content`. Previously this thinking content was silently dropped.

### Breaking changes

* `ContentToolResponseSearch.urls` (a `list[str]`) has been replaced by `.sources` (a `list[Source]`), where each `Source` carries the result's `url`, `title`, and `domain`. Code reading `.urls` should switch to `[s.url for s in x.sources]`.

## [0.18.1] - 2026-05-21

### Improvements
Expand Down
75 changes: 41 additions & 34 deletions chatlas/_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
Optional,
Sequence,
TypeVar,
Union,
cast,
overload,
)
Expand All @@ -32,10 +33,16 @@
from ._callbacks import CallbackManager
from ._content import (
Content,
ContentCitation,
ContentJson,
ContentText,
ContentThinking,
ContentThinkingDelta,
ContentToolRequest,
ContentToolRequestFetch,
ContentToolRequestSearch,
ContentToolResponseFetch,
ContentToolResponseSearch,
ContentToolResult,
ToolInfo,
)
Expand Down Expand Up @@ -95,6 +102,20 @@ class TokensDict(TypedDict):

EchoOptions = Literal["output", "all", "none", "text"]

# The values yielded by `.stream()`/`.stream_async()`. Plain text is always
# yielded; the richer content objects only appear when `content="all"`.
StreamedContent = Union[
str,
ContentThinkingDelta,
ContentToolRequest,
ContentToolResult,
ContentToolRequestSearch,
ContentToolResponseSearch,
ContentToolRequestFetch,
ContentToolResponseFetch,
ContentCitation,
]

T = TypeVar("T")
BaseModelT = TypeVar("BaseModelT", bound=BaseModel)

Expand All @@ -103,6 +124,14 @@ def is_present(value: T | None | MISSING_TYPE) -> TypeGuard[T]:
return value is not None and not isinstance(value, MISSING_TYPE)


def _display_text(content: "Content") -> "Optional[str]":
if isinstance(content, ContentText):
return content.text
if isinstance(content, (ContentThinking, ContentThinkingDelta)):
return content.thinking
return None


class Chat(Generic[SubmitInputArgsT, CompletionT]):
"""
A chat object that can be used to interact with a language model.
Expand Down Expand Up @@ -1210,9 +1239,7 @@ def stream(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional[SubmitInputArgsT] = None,
controller: StreamController | None = None,
) -> Generator[
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
]: ...
) -> Generator[StreamedContent, None, None]: ...

def stream(
self,
Expand All @@ -1222,9 +1249,7 @@ def stream(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional[SubmitInputArgsT] = None,
controller: StreamController | None = None,
) -> Generator[
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
]:
) -> Generator[StreamedContent, None, None]:
"""
Generate a response from the chat in a streaming fashion.

Expand Down Expand Up @@ -1298,11 +1323,7 @@ class Person(BaseModel):
controller=controller,
)

def wrapper() -> Generator[
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult,
None,
None,
]:
def wrapper() -> Generator[StreamedContent, None, None]:
with display:
for chunk in generator:
yield chunk
Expand All @@ -1329,9 +1350,7 @@ async def stream_async(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional[SubmitInputArgsT] = None,
controller: StreamController | None = None,
) -> AsyncGenerator[
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
]: ...
) -> AsyncGenerator[StreamedContent, None]: ...

async def stream_async(
self,
Expand All @@ -1341,9 +1360,7 @@ async def stream_async(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional[SubmitInputArgsT] = None,
controller: StreamController | None = None,
) -> AsyncGenerator[
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
]:
) -> AsyncGenerator[StreamedContent, None]:
"""
Generate a response from the chat in a streaming fashion asynchronously.

Expand Down Expand Up @@ -1422,9 +1439,7 @@ class Person(BaseModel):
controller=controller,
)

async def wrapper() -> AsyncGenerator[
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
]:
async def wrapper() -> AsyncGenerator[StreamedContent, None]:
try:
with display:
async for chunk in generator:
Expand Down Expand Up @@ -2586,9 +2601,7 @@ def _chat_impl(
data_model: Optional[type[BaseModel]] = None,
*,
controller: StreamController,
) -> Generator[
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
]: ...
) -> Generator[StreamedContent, None, None]: ...

def _chat_impl(
self,
Expand Down Expand Up @@ -2675,9 +2688,7 @@ def _chat_impl_async(
data_model: Optional[type[BaseModel]] = None,
*,
controller: StreamController,
) -> AsyncGenerator[
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
]: ...
) -> AsyncGenerator[StreamedContent, None]: ...

async def _chat_impl_async(
self,
Expand Down Expand Up @@ -2834,11 +2845,9 @@ def emit(text: str | Content):
break
if controller.cancelled:
break
content = self.provider.stream_content(chunk)
if content is not None:
text = self.provider.stream_text(chunk)
for content in self.provider.stream_content(chunk):
yield from acc.process_content(
content, text, content_mode, emit
content, _display_text(content), content_mode, emit
Comment thread
cpsievert marked this conversation as resolved.
)
result = self.provider.stream_merge_chunks(result, chunk)

Expand Down Expand Up @@ -2972,11 +2981,9 @@ def emit(text: str | Content):
break
if controller.cancelled:
break
content = self.provider.stream_content(chunk)
if content is not None:
text = self.provider.stream_text(chunk)
for content in self.provider.stream_content(chunk):
for item in acc.process_content(
content, text, content_mode, emit
content, _display_text(content), content_mode, emit
):
yield item
result = self.provider.stream_merge_chunks(result, chunk)
Expand Down
50 changes: 42 additions & 8 deletions chatlas/_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def from_tool(cls, tool: "Tool | ToolBuiltIn") -> "ToolInfo":
"web_search_results",
"web_fetch_request",
"web_fetch_results",
"citation",
]
"""
A discriminated union of all content types.
Expand All @@ -170,6 +171,14 @@ def _repr_markdown_(self):
return self.__str__()


class Source(BaseModel):
"""A page surfaced by a web search (not necessarily cited in the answer)."""

url: str
title: Optional[str] = None
domain: Optional[str] = None


class ContentText(Content):
"""
Text content for a [](`~chatlas.Turn`)
Expand Down Expand Up @@ -668,9 +677,7 @@ class ContentThinking(Content):

@field_serializer("extra")
@classmethod
def serialize_extra(
cls, v: Optional[dict[str, Any]]
) -> Optional[dict[str, Any]]:
def serialize_extra(cls, v: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]:
if v is None:
return None
return serialize_dict_with_bytes(v)
Expand Down Expand Up @@ -775,20 +782,20 @@ class ContentToolResponseSearch(Content):

Parameters
----------
urls
The URLs returned by the search.
sources
The pages surfaced by the search.
extra
The raw provider-specific response data.
"""

urls: list[str]
sources: list[Source]
extra: Optional[dict[str, Any]] = None

content_type: ContentTypeEnum = "web_search_results"

def __str__(self):
url_list = "\n".join(f"* {url}" for url in self.urls)
return f"[web search results]:\n{url_list}"
lines = "\n".join(f"* {s.url}" for s in self.sources)
return f"[web search results]:\n{lines}"


class ContentToolRequestFetch(Content):
Expand Down Expand Up @@ -826,11 +833,18 @@ class ContentToolResponseFetch(Content):
----------
url
The URL that was fetched.
status
A normalized, cross-provider outcome: ``"success"`` if content was
retrieved, ``"error"`` if it was not, or ``None`` when the provider
doesn't report an outcome. Providers expose finer-grained, non-aligned
reasons (e.g. Anthropic's ``url_not_allowed``, Google's ``PAYWALL``);
those are not normalized here but remain available in ``extra``.
extra
The raw provider-specific response data.
"""

url: str
status: Optional[Literal["success", "error"]] = None
extra: Optional[dict[str, Any]] = None

content_type: ContentTypeEnum = "web_fetch_results"
Expand All @@ -839,6 +853,23 @@ def __str__(self):
return f"[web fetch result]: {self.url}"


class ContentCitation(Content):
"""
A citation emitted during streaming and stored on the final turn.

Position in the turn's contents list (relative to surrounding
``ContentText`` items) is the placement signal: a consumer renders
a citation marker at the text accumulated so far.
"""

url: str
title: Optional[str] = None
content_type: ContentTypeEnum = "citation"

def __str__(self) -> str:
return f"[citation]: {self.url}"


ContentUnion = Union[
ContentText,
ContentImageRemote,
Expand All @@ -852,6 +883,7 @@ def __str__(self):
ContentToolResponseSearch,
ContentToolRequestFetch,
ContentToolResponseFetch,
ContentCitation,
]


Expand Down Expand Up @@ -917,6 +949,8 @@ def create_content(data: dict[str, Any]) -> ContentUnion:
return ContentToolRequestFetch.model_validate(data)
elif ct == "web_fetch_results":
return ContentToolResponseFetch.model_validate(data)
elif ct == "citation":
return ContentCitation.model_validate(data)
else:
raise ValueError(f"Unknown content type: {ct}")

Expand Down
15 changes: 3 additions & 12 deletions chatlas/_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
Iterable,
Literal,
Optional,
Sequence,
TypeVar,
overload,
)

from pydantic import BaseModel

from ._content import Content, ContentText, ContentThinking
from ._content import Content
from ._tools import Tool, ToolBuiltIn
from ._turn import AssistantTurn, Turn
from ._typing_extensions import NotRequired, TypedDict
Expand Down Expand Up @@ -230,17 +231,7 @@ async def chat_perform_async(
) -> AsyncIterable[ChatCompletionChunkT] | ChatCompletionT: ...

@abstractmethod
def stream_content(self, chunk: ChatCompletionChunkT) -> Optional["Content"]: ...

def stream_text(self, chunk: ChatCompletionChunkT) -> Optional[str]:
content = self.stream_content(chunk)
if content is None:
return None
if isinstance(content, ContentThinking):
return content.thinking
if isinstance(content, ContentText):
return content.text
return str(content)
def stream_content(self, chunk: ChatCompletionChunkT) -> "Sequence[Content]": ...

@abstractmethod
def stream_merge_chunks(
Expand Down
Loading
Loading