From 9766ca5360e8881bb3a51a3949a5132650fc4fde Mon Sep 17 00:00:00 2001
From: Matt Harris <mharris@parallel.ai>
Date: Mon, 27 Apr 2026 13:09:04 -0400
Subject: [PATCH 1/5] Phase 1: GA Search/Extract endpoints, citations,
 structured output

Bump parallel-web 0.3.3 -> 0.5.1 and migrate Search and Extract to the
v1 GA contract (client.search / client.extract). Surface the new GA
fields (max_chars_total, location, client_model, session_id) and pack
the prior flat settings (excerpts, fetch_policy, source_policy,
max_results) into the advanced_settings envelope. Legacy mode strings
('fast', 'one-shot', 'agentic') and objective-only calls keep working
via deprecation-warning fallback paths.

Tools now use response_format='content_and_artifact' so agents see a
compact summary string while ToolMessage.artifact carries the full
Parallel response. Direct tool.invoke({...}) returns the content
string; tool._run(...) returns (content, artifact).

ChatParallelWeb fixes:
- response_metadata uses the LangChain-1.x-standard 'model_name' key
  (was 'model'); also surfaces 'basis' (citations / reasoning /
  confidence) and 'interaction_id' on the research models.
- Add with_structured_output() routing through Parallel's
  response_format JSON schema for lite/base/core; raise a clear error
  on speed since it silently ignores the request. function_calling
  routes to json_schema for cross-provider compatibility.
- Drop the alias='model_name' on the model field that silently
  swallowed ChatParallelWeb(model='lite'); add a model_validator
  shim so existing model_name= kwargs keep working.

Slim _client.py: remove the four hand-rolled Parallel*Client wrappers;
tools now instantiate parallel.Parallel / parallel.AsyncParallel
directly. Add SourcePolicy pydantic model.

Packaging:
- pyproject version 0.2.0 -> 0.3.0
- Add include = ['langchain_parallel/py.typed'] so type info ships
  in the wheel.

Tests: rewrite unit + integration tests around the new tuple return,
the SDK 0.5 surface, the v1 endpoint, and the structured-output
method shape; add TestChatParallelWebUnitLite for the research-model
capability flags. 39 unit tests + 11 extract integration tests pass.
---
 CHANGELOG.md                                  |  37 +
 README.md                                     |  70 +-
 langchain_parallel/__init__.py                |   2 +
 langchain_parallel/_client.py                 | 238 +-----
 langchain_parallel/_types.py                  |  23 +
 langchain_parallel/chat_models.py             | 264 ++++++-
 langchain_parallel/extract_tool.py            | 563 +++++++-------
 langchain_parallel/search_tool.py             | 684 ++++++++++--------
 poetry.lock                                   |  12 +-
 pyproject.toml                                |   5 +-
 tests/integration_tests/test_extract_tool.py  | 170 +++--
 .../__snapshots__/test_chat_models.ambr       |  29 +-
 tests/unit_tests/test_chat_models.py          |  37 +-
 tests/unit_tests/test_extract_tool.py         | 384 +++++-----
 tests/unit_tests/test_search_tool.py          | 245 +++++--
 15 files changed, 1552 insertions(+), 1211 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe5bbfa..a3933a0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,43 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.0] - 2026-04-27
+
+### Added
+
+- **Search/Extract GA endpoints**: `ParallelWebSearchTool` and `ParallelExtractTool` now call `client.search` and `client.extract` (the `/v1` GA paths) by default, replacing the deprecated `client.beta.*` calls. New parameters surfaced from the GA contract: `max_chars_total`, `client_model`, `session_id`, `location` on both tools.
+- **Citations on chat responses**: when `model` is `lite`, `base`, or `core`, `AIMessage.response_metadata["basis"]` now carries the API's per-field citations / reasoning / confidence. `interaction_id` is also surfaced for context chaining across calls.
+- **`ChatParallelWeb.with_structured_output()`**: returns a `Runnable` that produces a typed object (pydantic model or dict) using Parallel's `response_format` JSON-schema feature on the research models. `method="json_schema"` (default), `method="json_mode"`, and `method="function_calling"` (routed to `json_schema` for cross-provider compatibility) are supported. Raises a clear error when called on `model="speed"` since that model silently ignores structured-output requests.
+- **`SourcePolicy` pydantic model** in `langchain_parallel._types` mirroring the API's `include_domains` / `exclude_domains` / `after_date` shape.
+- **`tool.response_format = "content_and_artifact"`** on both Search and Extract tools — agents see a compact summary string while consumers reading from the `ToolMessage` get the full structured payload via `.artifact`.
+
+### Changed
+
+- **BREAKING — tool return shape**: `ParallelWebSearchTool` and `ParallelExtractTool` now return `(content_str, artifact)` per the LangChain `content_and_artifact` convention. Direct `tool.invoke({...})` returns just the content string; the tool-call form (`{"args": {...}, "id": ..., "name": ..., "type": "tool_call"}`) returns a `ToolMessage` whose `.artifact` carries the full Parallel response. To keep the old direct-dict access, use `_, artifact = tool._run(...)` or unpack the `ToolMessage`.
+- **BREAKING — `mode` strings**: legacy values `"fast"`, `"one-shot"`, and `"agentic"` continue to work but emit a `DeprecationWarning` and are mapped to `basic` / `basic` / `advanced` respectively. The GA values `"basic"` and `"advanced"` are the new canonical set.
+- **`ChatParallelWeb.model` alias removed (with back-compat shim)**: the `model_name` alias on the `model` field has been removed because it silently swallowed `ChatParallelWeb(model="lite")` and forced users into the default `"speed"`. Both `ChatParallelWeb(model="lite")` and `ChatParallelWeb(model_name="lite")` now work — the latter via a `model_validator` that maps `model_name=` to `model=`. `lc_attributes` still serializes as `model_name` for tracing parity.
+- **Search behavior**: when `search_queries` is omitted, the tool falls back to the deprecated `/v1beta/search` endpoint with a `DeprecationWarning`. The GA endpoint requires `search_queries` (1–5 keyword strings); pass them explicitly to silence the warning.
+- **`response_metadata["model_name"]`**: chat completions now emit `model_name` (the LangChain 1.x standard key) instead of `model`. Standard tests check for `model_name`.
+- **`parallel-web` SDK bumped** from `^0.3.3` to `^0.5.1`. Brings in v1 GA Search/Extract types, `AdvancedSearchSettingsParam` / `AdvancedExtractSettingsParam`, and FindAll / Task Group surface (not yet exposed by this integration; see the IMPROVEMENT_PLAN.md roadmap for Phase 2).
+- **Slimmed `_client.py`**: the four hand-rolled `ParallelSearchClient` / `AsyncParallelSearchClient` / `ParallelExtractClient` / `AsyncParallelExtractClient` wrappers have been removed. Tools now instantiate the `parallel.Parallel` / `parallel.AsyncParallel` SDK clients directly. Internal change; no public surface impact.
+- `ParallelExtractTool.full_content` precedence is now explicit: a `FullContentSettings` (or dict) on the call always wins over the tool-level `max_chars_per_extract`; the latter only applies when `full_content=True` is passed as a plain bool.
+
+### Fixed
+
+- `ChatParallelWeb(model="lite")` now actually selects the `lite` model. Previously the `alias="model_name"` on the field meant the `model=` kwarg was silently ignored and the default `"speed"` was used.
+- `py.typed` is now bundled into the wheel via the `[tool.poetry] include` directive, so downstream `mypy` runs see the package's type information.
+
+### Migration
+
+- **Tools**: existing code that does `result = tool.invoke({...})` and treats `result` as a dict/list should switch to either `_, result = tool._run(...)` or use the tool-call envelope:
+  ```python
+  msg = tool.invoke({"args": {...}, "id": "1", "name": tool.name, "type": "tool_call"})
+  result = msg.artifact
+  ```
+- **Search**: callers using only `objective` (no `search_queries`) keep working but should add `search_queries=["...","..."]` to silence the deprecation warning and use the GA endpoint.
+- **Search modes**: rename `mode="one-shot"` → `mode="basic"` (or `"advanced"` for higher quality), `mode="agentic"` → `mode="advanced"`, `mode="fast"` → `mode="basic"`.
+- **Chat**: code that did `ChatParallelWeb(model_name="...")` continues to work via `model_name` mapping in `lc_attributes`. New code should prefer `ChatParallelWeb(model="lite")`. Read citations from `response.response_metadata["basis"]`.
+
 ## [0.2.0] - 2025-12-01
 
 ### Changed
diff --git a/README.md b/README.md
index 8da36ff..acbb53c 100644
--- a/README.md
+++ b/README.md
@@ -33,28 +33,48 @@ export PARALLEL_API_KEY="your-api-key-here"
 
 The `ChatParallelWeb` class provides access to Parallel's Chat API, which combines language models with real-time web research capabilities.
 
+#### Picking a model
+
+| Model | Latency | Citations (`response_metadata["basis"]`) | Structured output |
+|-------|---------|------------------------------------------|-------------------|
+| `speed` (default) | ~3s | none | not supported |
+| `lite` | seconds | yes | `with_structured_output()` |
+| `base` | seconds–minutes | yes | `with_structured_output()` |
+| `core` | minutes | yes (most thorough) | `with_structured_output()` |
+
 #### Basic Usage
 
 ```python
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_parallel.chat_models import ChatParallelWeb
 
-# Initialize the chat model
-chat = ChatParallelWeb(
-    model="speed",  # Parallel's chat model
-    temperature=0.7,  # Optional: ignored by Parallel
-    max_tokens=None,  # Optional: ignored by Parallel
-)
+chat = ChatParallelWeb(model="speed")
 
-# Create messages
 messages = [
     SystemMessage(content="You are a helpful assistant with access to real-time web information."),
-    HumanMessage(content="What are the latest developments in artificial intelligence?")
+    HumanMessage(content="What are the latest developments in artificial intelligence?"),
 ]
 
-# Get response
 response = chat.invoke(messages)
 print(response.content)
+# Citations on the research models (lite/base/core):
+print(response.response_metadata.get("basis"))
+```
+
+#### Structured output (research models)
+
+```python
+from pydantic import BaseModel, Field
+from langchain_parallel import ChatParallelWeb
+
+class Founder(BaseModel):
+    name: str = Field(description="Full name of the founder")
+    company: str = Field(description="Company they founded")
+
+structured = ChatParallelWeb(model="lite").with_structured_output(Founder)
+result = structured.invoke([("human", "Who founded SpaceX?")])
+print(result)
+# Founder(name='Elon Musk', company='SpaceX')
 ```
 
 #### Streaming Responses
@@ -187,31 +207,21 @@ The search tool provides direct access to Parallel's Search API:
 ```python
 from langchain_parallel import ParallelWebSearchTool
 
-# Initialize the search tool
 search_tool = ParallelWebSearchTool()
 
-# Search with an objective
-result = search_tool.invoke({
-    "objective": "What are the latest developments in renewable energy?",
-    "max_results": 5
-})
-
-print(result)
-# {
-#     "search_id": "search_123...",
-#     "results": [
-#         {
-#             "url": "https://example.com/renewable-energy",
-#             "title": "Latest Renewable Energy Developments",
-#             "excerpts": [
-#                 "Solar energy has seen remarkable growth...",
-#                 "Wind power capacity increased by 15%..."
-#             ]
-#         }
-#     ]
-# }
+# In a tool-calling agent, the tool returns a ToolMessage with .content
+# (compact LLM-readable summary) and .artifact (full Parallel response).
+# To get both directly:
+content, artifact = search_tool._run(
+    search_queries=["renewable energy 2026", "solar power developments"],
+    max_results=5,
+)
+print(content)
+print(artifact["search_id"], len(artifact["results"]))
 ```
 
+> **0.3.0 migration note**: tools now use `response_format="content_and_artifact"`. A bare `tool.invoke({...})` returns the content string only; pass a tool-call envelope (`{"args": {...}, "id": "1", "name": tool.name, "type": "tool_call"}`) to get back a `ToolMessage` with `.artifact`, or call `tool._run(...)` for the `(content, artifact)` tuple.
+
 
 
 
diff --git a/langchain_parallel/__init__.py b/langchain_parallel/__init__.py
index 45463ab..1543082 100644
--- a/langchain_parallel/__init__.py
+++ b/langchain_parallel/__init__.py
@@ -4,6 +4,7 @@
     ExcerptSettings,
     FetchPolicy,
     FullContentSettings,
+    SourcePolicy,
 )
 from langchain_parallel.chat_models import ChatParallelWeb
 from langchain_parallel.extract_tool import ParallelExtractTool
@@ -23,5 +24,6 @@
     "FullContentSettings",
     "ParallelExtractTool",
     "ParallelWebSearchTool",
+    "SourcePolicy",
     "__version__",
 ]
diff --git a/langchain_parallel/_client.py b/langchain_parallel/_client.py
index 949649f..691e461 100644
--- a/langchain_parallel/_client.py
+++ b/langchain_parallel/_client.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import os
-from typing import Any, Optional, Union
+from typing import Optional
 
 import openai
 from parallel import AsyncParallel, Parallel
@@ -45,235 +45,11 @@ def get_async_openai_client(api_key: str, base_url: str) -> openai.AsyncOpenAI:
     return openai.AsyncOpenAI(api_key=api_key, base_url=base_url)
 
 
-class ParallelSearchClient:
-    """Synchronous client for Parallel Search API using the Parallel SDK."""
+def get_parallel_client(api_key: str, base_url: str) -> Parallel:
+    """Returns a configured sync Parallel SDK client."""
+    return Parallel(api_key=api_key, base_url=base_url)
 
-    def __init__(
-        self,
-        api_key: str,
-        base_url: str = "https://api.parallel.ai",
-    ):
-        self.api_key = api_key
-        self.base_url = base_url.rstrip("/")
-        # Initialize the Parallel SDK client
-        self.client = Parallel(api_key=api_key, base_url=base_url)
 
-    def search(
-        self,
-        objective: Optional[str] = None,
-        search_queries: Optional[list[str]] = None,
-        max_results: int = 10,
-        excerpts: Optional[dict[str, Any]] = None,
-        mode: Optional[str] = None,
-        source_policy: Optional[dict[str, Union[str, list[str]]]] = None,
-        fetch_policy: Optional[dict[str, Any]] = None,
-        timeout: Optional[float] = None,
-    ) -> dict[str, Any]:
-        """Perform a synchronous search using the Parallel Search API via SDK."""
-        if not objective and not search_queries:
-            msg = "Either 'objective' or 'search_queries' must be provided"
-            raise ValueError(msg)
-
-        # Use default timeout if not provided
-        if timeout is None:
-            timeout = 30.0
-
-        # Build kwargs, only including non-None values for optional params
-        kwargs: dict[str, Any] = {
-            "objective": objective,
-            "search_queries": search_queries,
-            "max_results": max_results,
-            "timeout": timeout,
-        }
-        if excerpts is not None:
-            kwargs["excerpts"] = excerpts
-        if mode is not None:
-            kwargs["mode"] = mode
-        if source_policy is not None:
-            kwargs["source_policy"] = source_policy
-        if fetch_policy is not None:
-            kwargs["fetch_policy"] = fetch_policy
-
-        # Use the Parallel SDK's beta.search method
-        search_response = self.client.beta.search(**kwargs)
-
-        # Convert the SDK response to a dictionary
-        return search_response.model_dump()
-
-
-class AsyncParallelSearchClient:
-    """Asynchronous client for Parallel Search API using the Parallel SDK."""
-
-    def __init__(
-        self,
-        api_key: str,
-        base_url: str = "https://api.parallel.ai",
-    ):
-        self.api_key = api_key
-        self.base_url = base_url.rstrip("/")
-        # Initialize the Parallel SDK async client
-        self.client = AsyncParallel(api_key=api_key, base_url=base_url)
-
-    async def search(
-        self,
-        objective: Optional[str] = None,
-        search_queries: Optional[list[str]] = None,
-        max_results: int = 10,
-        excerpts: Optional[dict[str, Any]] = None,
-        mode: Optional[str] = None,
-        source_policy: Optional[dict[str, Union[str, list[str]]]] = None,
-        fetch_policy: Optional[dict[str, Any]] = None,
-        timeout: Optional[float] = None,
-    ) -> dict[str, Any]:
-        """Perform an async search using the Parallel Search API via SDK."""
-        if not objective and not search_queries:
-            msg = "Either 'objective' or 'search_queries' must be provided"
-            raise ValueError(msg)
-
-        # Use default timeout if not provided
-        if timeout is None:
-            timeout = 30.0
-
-        # Build kwargs, only including non-None values for optional params
-        kwargs: dict[str, Any] = {
-            "objective": objective,
-            "search_queries": search_queries,
-            "max_results": max_results,
-            "timeout": timeout,
-        }
-        if excerpts is not None:
-            kwargs["excerpts"] = excerpts
-        if mode is not None:
-            kwargs["mode"] = mode
-        if source_policy is not None:
-            kwargs["source_policy"] = source_policy
-        if fetch_policy is not None:
-            kwargs["fetch_policy"] = fetch_policy
-
-        # Use the Parallel SDK's beta.search method
-        search_response = await self.client.beta.search(**kwargs)
-
-        # Convert the SDK response to a dictionary
-        return search_response.model_dump()
-
-
-def get_search_client(
-    api_key: str, base_url: str = "https://api.parallel.ai"
-) -> ParallelSearchClient:
-    """Returns a configured sync Parallel Search client."""
-    return ParallelSearchClient(api_key, base_url)
-
-
-def get_async_search_client(
-    api_key: str, base_url: str = "https://api.parallel.ai"
-) -> AsyncParallelSearchClient:
-    """Returns a configured async Parallel Search client."""
-    return AsyncParallelSearchClient(api_key, base_url)
-
-
-class ParallelExtractClient:
-    """Synchronous client for Parallel Extract API using the Parallel SDK."""
-
-    def __init__(
-        self,
-        api_key: str,
-        base_url: str = "https://api.parallel.ai",
-    ):
-        self.api_key = api_key
-        self.base_url = base_url.rstrip("/")
-        # Initialize the Parallel SDK client
-        self.client = Parallel(api_key=api_key, base_url=base_url)
-
-    def extract(
-        self,
-        urls: list[str],
-        objective: Optional[str] = None,
-        search_queries: Optional[list[str]] = None,
-        excerpts: Optional[Union[bool, dict[str, Any]]] = None,
-        full_content: Optional[Union[bool, dict[str, Any]]] = None,
-        fetch_policy: Optional[dict[str, Any]] = None,
-        timeout: Optional[float] = None,
-    ) -> dict[str, Any]:
-        """Perform a synchronous extract using the Parallel Extract API via SDK."""
-        if not urls:
-            msg = "At least one URL must be provided"
-            raise ValueError(msg)
-
-        # Use default timeout if not provided (5 seconds per URL)
-        if timeout is None:
-            timeout = 5.0 * len(urls)
-
-        # Use the Parallel SDK's beta.extract method
-        extract_response = self.client.beta.extract(
-            urls=urls,
-            objective=objective,
-            search_queries=search_queries,
-            excerpts=excerpts,
-            full_content=full_content,
-            fetch_policy=fetch_policy,
-            timeout=timeout,
-        )
-
-        # Convert the SDK response to a dictionary
-        return extract_response.model_dump()
-
-
-class AsyncParallelExtractClient:
-    """Asynchronous client for Parallel Extract API using the Parallel SDK."""
-
-    def __init__(
-        self,
-        api_key: str,
-        base_url: str = "https://api.parallel.ai",
-    ):
-        self.api_key = api_key
-        self.base_url = base_url.rstrip("/")
-        # Initialize the Parallel SDK async client
-        self.client = AsyncParallel(api_key=api_key, base_url=base_url)
-
-    async def extract(
-        self,
-        urls: list[str],
-        objective: Optional[str] = None,
-        search_queries: Optional[list[str]] = None,
-        excerpts: Optional[Union[bool, dict[str, Any]]] = None,
-        full_content: Optional[Union[bool, dict[str, Any]]] = None,
-        fetch_policy: Optional[dict[str, Any]] = None,
-        timeout: Optional[float] = None,
-    ) -> dict[str, Any]:
-        """Perform an async extract using the Parallel Extract API via SDK."""
-        if not urls:
-            msg = "At least one URL must be provided"
-            raise ValueError(msg)
-
-        # Use default timeout if not provided (5 seconds per URL)
-        if timeout is None:
-            timeout = 5.0 * len(urls)
-
-        # Use the Parallel SDK's beta.extract method
-        extract_response = await self.client.beta.extract(
-            urls=urls,
-            objective=objective,
-            search_queries=search_queries,
-            excerpts=excerpts,
-            full_content=full_content,
-            fetch_policy=fetch_policy,
-            timeout=timeout,
-        )
-
-        # Convert the SDK response to a dictionary
-        return extract_response.model_dump()
-
-
-def get_extract_client(
-    api_key: str, base_url: str = "https://api.parallel.ai"
-) -> ParallelExtractClient:
-    """Returns a configured sync Parallel Extract client."""
-    return ParallelExtractClient(api_key, base_url)
-
-
-def get_async_extract_client(
-    api_key: str, base_url: str = "https://api.parallel.ai"
-) -> AsyncParallelExtractClient:
-    """Returns a configured async Parallel Extract client."""
-    return AsyncParallelExtractClient(api_key, base_url)
+def get_async_parallel_client(api_key: str, base_url: str) -> AsyncParallel:
+    """Returns a configured async Parallel SDK client."""
+    return AsyncParallel(api_key=api_key, base_url=base_url)
diff --git a/langchain_parallel/_types.py b/langchain_parallel/_types.py
index e25bfa2..7bb2f18 100644
--- a/langchain_parallel/_types.py
+++ b/langchain_parallel/_types.py
@@ -58,3 +58,26 @@ class FetchPolicy(BaseModel):
             "fetch fails or times out. If true, returns an error instead."
         ),
     )
+
+
+class SourcePolicy(BaseModel):
+    """Domain allow/deny lists and freshness floor for web research."""
+
+    include_domains: Optional[list[str]] = Field(
+        default=None,
+        description=(
+            "If provided, only sources from these apex domains are returned. "
+            "Combined include + exclude lists are capped at 200 domains."
+        ),
+    )
+    exclude_domains: Optional[list[str]] = Field(
+        default=None,
+        description="If provided, sources from these apex domains are excluded.",
+    )
+    after_date: Optional[str] = Field(
+        default=None,
+        description=(
+            "ISO date (YYYY-MM-DD). Only return sources published on or after "
+            "this date."
+        ),
+    )
diff --git a/langchain_parallel/chat_models.py b/langchain_parallel/chat_models.py
index eac915e..052e6fa 100644
--- a/langchain_parallel/chat_models.py
+++ b/langchain_parallel/chat_models.py
@@ -8,14 +8,14 @@
 
 import contextlib
 from collections.abc import AsyncIterator, Iterator
-from typing import Any, Optional, cast
+from typing import Any, Literal, Optional, Union, cast
 
 import openai
 from langchain_core.callbacks import (
     AsyncCallbackManagerForLLMRun,
     CallbackManagerForLLMRun,
 )
-from langchain_core.language_models import BaseChatModel
+from langchain_core.language_models import BaseChatModel, LanguageModelInput
 from langchain_core.messages import (
     AIMessage,
     AIMessageChunk,
@@ -23,13 +23,23 @@
     HumanMessage,
     SystemMessage,
 )
+from langchain_core.output_parsers import (
+    JsonOutputParser,
+    PydanticOutputParser,
+)
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
+from langchain_core.utils.function_calling import convert_to_json_schema
+from langchain_core.utils.pydantic import is_basemodel_subclass
 from openai import AuthenticationError, RateLimitError
-from pydantic import Field, SecretStr, model_validator
+from pydantic import BaseModel, Field, SecretStr, model_validator
 from typing_extensions import Self
 
 from ._client import get_api_key, get_async_openai_client, get_openai_client
 
+# Models that support response_format JSON schema. The `speed` model ignores it.
+_STRUCTURED_OUTPUT_MODELS: frozenset[str] = frozenset({"lite", "base", "core"})
+
 
 def _convert_message_to_dict(message: BaseMessage) -> dict[str, Any]:
     """Convert a LangChain message to OpenAI message format."""
@@ -50,12 +60,31 @@ def _prepare_messages(messages: list[BaseMessage]) -> list[dict[str, Any]]:
 
 
 def _create_response_metadata(response: Any, choice: Any) -> dict[str, Any]:
-    """Create response metadata from API response."""
-    return {
-        "model": getattr(response, "model", None),
+    """Create response metadata from API response.
+
+    Uses LangChain 1.x standard keys (`model_name`, `finish_reason`,
+    `system_fingerprint`). Surfaces Parallel-specific fields (`basis`,
+    `interaction_id`) when present.
+    """
+    metadata: dict[str, Any] = {
+        "model_name": getattr(response, "model", None),
         "finish_reason": getattr(choice, "finish_reason", None),
         "created": getattr(response, "created", None),
     }
+    system_fingerprint = getattr(response, "system_fingerprint", None)
+    if system_fingerprint is not None:
+        metadata["system_fingerprint"] = system_fingerprint
+    basis = getattr(response, "basis", None)
+    if basis:
+        metadata["basis"] = (
+            [b.model_dump() if hasattr(b, "model_dump") else b for b in basis]
+            if isinstance(basis, list)
+            else basis
+        )
+    interaction_id = getattr(response, "interaction_id", None)
+    if interaction_id is not None:
+        metadata["interaction_id"] = interaction_id
+    return metadata
 
 
 def _create_ai_message(content: str, response_metadata: dict[str, Any]) -> AIMessage:
@@ -69,11 +98,22 @@ def _create_ai_message(content: str, response_metadata: dict[str, Any]) -> AIMes
 
 def _create_stream_response_metadata(chunk: Any, choice: Any) -> dict[str, Any]:
     """Create response metadata for streaming chunks."""
-    response_metadata = {}
+    response_metadata: dict[str, Any] = {}
     if hasattr(choice, "finish_reason") and choice.finish_reason is not None:
         response_metadata["finish_reason"] = str(choice.finish_reason)
     if hasattr(chunk, "model"):
-        response_metadata["model"] = chunk.model
+        response_metadata["model_name"] = chunk.model
+    if getattr(chunk, "system_fingerprint", None) is not None:
+        response_metadata["system_fingerprint"] = chunk.system_fingerprint
+    if getattr(chunk, "interaction_id", None) is not None:
+        response_metadata["interaction_id"] = chunk.interaction_id
+    basis = getattr(chunk, "basis", None)
+    if basis:
+        response_metadata["basis"] = (
+            [b.model_dump() if hasattr(b, "model_dump") else b for b in basis]
+            if isinstance(basis, list)
+            else basis
+        )
     return response_metadata
 
 
@@ -219,8 +259,17 @@ class ChatParallelWeb(BaseChatModel):
 
     """
 
-    model: str = Field(default="speed", alias="model_name")
-    """The name of the model to use. Defaults to 'speed' for Parallel."""
+    model: str = Field(default="speed")
+    """The name of the model to use.
+
+    One of:
+
+    - ``"speed"`` (default): low-latency conversational answers, no citations.
+    - ``"lite"`` / ``"base"`` / ``"core"``: research models with web access
+      that return source citations on ``response_metadata['basis']`` and
+      support ``response_format`` JSON schemas via
+      :meth:`with_structured_output`.
+    """
 
     api_key: Optional[SecretStr] = Field(default=None)
     """Parallel API key. If not provided, will be read from
@@ -275,6 +324,24 @@ class ChatParallelWeb(BaseChatModel):
     _client: Optional[openai.OpenAI] = None
     _async_client: Optional[openai.AsyncOpenAI] = None
 
+    @model_validator(mode="before")
+    @classmethod
+    def _accept_model_name_alias(cls, values: Any) -> Any:
+        """Accept ``model_name="..."`` as a back-compat alias for ``model="..."``.
+
+        Pre-0.3.0 the field was declared as ``Field(alias="model_name")``,
+        meaning users had to pass ``model_name=`` and ``model=`` was silently
+        ignored. The alias was removed in 0.3.0 to fix that footgun; this
+        validator preserves callers that still use ``model_name=``.
+        """
+        if (
+            isinstance(values, dict)
+            and "model_name" in values
+            and "model" not in values
+        ):
+            values = {**values, "model": values.pop("model_name")}
+        return values
+
     @model_validator(mode="after")
     def validate_environment(self) -> Self:
         """Validate that api key exists and initialize clients."""
@@ -339,7 +406,7 @@ def lc_secrets(self) -> dict[str, str]:
     @property
     def lc_attributes(self) -> dict[str, Any]:
         """Return attributes for LangChain serialization."""
-        attributes: dict[str, Any] = {}
+        attributes: dict[str, Any] = {"model_name": self.model}
         if self.base_url:
             attributes["base_url"] = self.base_url
         return attributes
@@ -377,7 +444,7 @@ def _process_non_stream_response(self, response: Any) -> ChatResult:
         choice = response.choices[0]
         content = choice.message.content or ""
         response_metadata = _create_response_metadata(response, choice)
-        response_metadata["model"] = response_metadata["model"] or self.model
+        response_metadata["model_name"] = response_metadata["model_name"] or self.model
 
         message = _create_ai_message(content, response_metadata)
         generation = ChatGeneration(message=message)
@@ -437,6 +504,33 @@ async def _process_async_stream_chunk(
 
         return ChatGenerationChunk(message=chunk_message)
 
+    def _build_create_kwargs(
+        self,
+        messages: list[dict[str, Any]],
+        stop: Optional[list[str]],
+        *,
+        stream: bool,
+        extra: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Build kwargs for the OpenAI ``chat.completions.create`` call.
+
+        Per-call ``extra`` (typically populated by ``with_structured_output``)
+        wins over instance-level fields.
+        """
+        create_kwargs: dict[str, Any] = {
+            "model": self.model,
+            "messages": cast(Any, messages),
+            "stream": stream,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "stop": stop,
+        }
+        if self.response_format is not None:
+            create_kwargs["response_format"] = self.response_format
+        # Per-call overrides from the runnable kwargs. Drop None values.
+        create_kwargs.update({k: v for k, v in extra.items() if v is not None})
+        return create_kwargs
+
     def _generate(
         self,
         messages: list[BaseMessage],
@@ -449,12 +543,12 @@ def _generate(
 
         with self._handle_errors():
             response = self.client.chat.completions.create(
-                model=self.model,
-                messages=cast(Any, openai_messages),
-                stream=False,
-                temperature=self.temperature,
-                max_tokens=self.max_tokens,
-                stop=stop,
+                **self._build_create_kwargs(
+                    openai_messages,
+                    stop,
+                    stream=False,
+                    extra=kwargs,
+                ),
             )
 
             return self._process_non_stream_response(response)
@@ -471,12 +565,12 @@ def _stream(
 
         with self._handle_errors():
             stream = self.client.chat.completions.create(
-                model=self.model,
-                messages=cast(Any, openai_messages),
-                stream=True,
-                temperature=self.temperature,
-                max_tokens=self.max_tokens,
-                stop=stop,
+                **self._build_create_kwargs(
+                    openai_messages,
+                    stop,
+                    stream=True,
+                    extra=kwargs,
+                ),
             )
 
             for chunk in stream:
@@ -496,12 +590,12 @@ async def _agenerate(
 
         with self._handle_errors():
             response = await self.async_client.chat.completions.create(
-                model=self.model,
-                messages=cast(Any, openai_messages),
-                stream=False,
-                temperature=self.temperature,
-                max_tokens=self.max_tokens,
-                stop=stop,
+                **self._build_create_kwargs(
+                    openai_messages,
+                    stop,
+                    stream=False,
+                    extra=kwargs,
+                ),
             )
 
             return self._process_non_stream_response(response)
@@ -518,12 +612,12 @@ async def _astream(
 
         with self._handle_errors():
             stream = await self.async_client.chat.completions.create(
-                model=self.model,
-                messages=cast(Any, openai_messages),
-                stream=True,
-                temperature=self.temperature,
-                max_tokens=self.max_tokens,
-                stop=stop,
+                **self._build_create_kwargs(
+                    openai_messages,
+                    stop,
+                    stream=True,
+                    extra=kwargs,
+                ),
             )
 
             async for chunk in stream:
@@ -532,3 +626,101 @@ async def _astream(
                 )
                 if chunk_result is not None:
                     yield chunk_result
+
+    def with_structured_output(
+        self,
+        schema: Optional[Union[dict[str, Any], type[BaseModel]]] = None,
+        *,
+        method: Literal["json_schema", "function_calling", "json_mode"] = "json_schema",
+        include_raw: bool = False,
+        strict: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, Union[dict[str, Any], BaseModel]]:
+        """Return a Runnable that produces structured output.
+
+        Parallel's research models (``lite``, ``base``, ``core``) accept the
+        OpenAI ``response_format`` parameter with a JSON schema. The ``speed``
+        model silently ignores it; this method raises if you try to use it
+        on a non-supporting model so the failure is loud.
+
+        Args:
+            schema: A pydantic v2 model class or a JSON schema dict.
+            method: ``"json_schema"`` (default) for strict-typed output, or
+                ``"json_mode"`` to ask the model for any valid JSON object.
+                ``"function_calling"`` is accepted for cross-provider
+                compatibility and is routed to ``"json_schema"`` since
+                Parallel's chat API does not support tool calling.
+            include_raw: If True, return ``{"raw": AIMessage, "parsed": ...,
+                "parsing_error": ...}`` instead of just the parsed value.
+            strict: Forwarded to the API's ``response_format`` JSON schema.
+                Defaults to True for pydantic schemas, None for raw dicts.
+            **kwargs: Reserved for forward compatibility; unused.
+        """
+        if kwargs:
+            msg = f"Received unsupported kwargs: {sorted(kwargs)}"
+            raise ValueError(msg)
+        if self.model not in _STRUCTURED_OUTPUT_MODELS:
+            msg = (
+                f"Structured output requires one of the research models "
+                f"({sorted(_STRUCTURED_OUTPUT_MODELS)}); the '{self.model}' "
+                f"model silently ignores response_format. Re-instantiate with "
+                f"`ChatParallelWeb(model='lite' | 'base' | 'core')`."
+            )
+            raise ValueError(msg)
+        if method == "function_calling":
+            # Parallel chat doesn't support tool calling; route to json_schema
+            # since the user-visible result is equivalent.
+            method = "json_schema"
+        if method not in {"json_schema", "json_mode"}:
+            msg = (
+                f"Unsupported method '{method}'. Use 'json_schema', "
+                f"'function_calling' (routed to json_schema), or 'json_mode'."
+            )
+            raise ValueError(msg)
+
+        if method == "json_mode":
+            # `json_mode` only enables JSON output without a schema constraint;
+            # if a schema is also passed, accept it for cross-provider compat
+            # but only use it for the parser, not for the API call.
+            response_format: dict[str, Any] = {"type": "json_object"}
+            schema_is_pydantic = (
+                schema is not None
+                and isinstance(schema, type)
+                and is_basemodel_subclass(schema)
+            )
+            output_parser: Runnable = (
+                PydanticOutputParser(pydantic_object=schema)  # type: ignore[arg-type]
+                if schema_is_pydantic
+                else JsonOutputParser()
+            )
+        else:
+            if schema is None:
+                msg = "method='json_schema' requires a schema."
+                raise ValueError(msg)
+            is_pydantic = isinstance(schema, type) and is_basemodel_subclass(schema)
+            strict_value: Optional[bool]
+            if is_pydantic:
+                json_schema = convert_to_json_schema(schema)
+                output_parser = PydanticOutputParser(pydantic_object=schema)  # type: ignore[arg-type]
+                strict_value = True if strict is None else strict
+            else:
+                json_schema = dict(schema)  # type: ignore[arg-type]
+                output_parser = JsonOutputParser()
+                strict_value = strict
+            response_format = {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": json_schema.get("title", "output"),
+                    "schema": json_schema,
+                },
+            }
+            if strict_value is not None:
+                response_format["json_schema"]["strict"] = strict_value
+
+        bound = self.bind(response_format=response_format)
+        if include_raw:
+            return RunnableMap(raw=bound) | RunnablePassthrough.assign(
+                parsed=lambda x: output_parser.invoke(x["raw"]),
+                parsing_error=lambda _: None,
+            )
+        return bound | output_parser
diff --git a/langchain_parallel/extract_tool.py b/langchain_parallel/extract_tool.py
index 06d09f4..dc6f256 100644
--- a/langchain_parallel/extract_tool.py
+++ b/langchain_parallel/extract_tool.py
@@ -2,68 +2,136 @@
 
 from __future__ import annotations
 
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union
 
 from langchain_core.callbacks import (
     AsyncCallbackManagerForToolRun,
     CallbackManagerForToolRun,
 )
 from langchain_core.tools import BaseTool
+from parallel import AsyncParallel, Parallel
 from pydantic import BaseModel, Field, SecretStr, model_validator
 
-from ._client import get_api_key, get_async_extract_client, get_extract_client
+from ._client import get_api_key, get_async_parallel_client, get_parallel_client
 from ._types import ExcerptSettings, FetchPolicy, FullContentSettings
 
 
+def _coerce_full_content(
+    full_content: Union[bool, FullContentSettings, dict[str, Any]],
+    *,
+    tool_max_chars: Optional[int],
+) -> Union[bool, dict[str, Any]]:
+    """Resolve the user-provided full_content arg + tool-level default.
+
+    Precedence: an explicit FullContentSettings or dict wins over tool_max_chars,
+    which only applies when full_content was passed as a plain True/False.
+    """
+    if isinstance(full_content, FullContentSettings):
+        return full_content.model_dump(exclude_none=True)
+    if isinstance(full_content, dict):
+        return {k: v for k, v in full_content.items() if v is not None}
+    if full_content is True and tool_max_chars is not None:
+        return {"max_chars_per_result": tool_max_chars}
+    return full_content
+
+
+def _build_advanced_settings(
+    *,
+    excerpts: Optional[ExcerptSettings],
+    full_content: Union[bool, dict[str, Any]],
+    fetch_policy: Optional[FetchPolicy],
+) -> Optional[dict[str, Any]]:
+    """Pack the user-facing flat fields into the GA `advanced_settings` envelope."""
+    settings: dict[str, Any] = {}
+    if excerpts is not None:
+        settings["excerpt_settings"] = excerpts.model_dump(exclude_none=True)
+    if fetch_policy is not None:
+        settings["fetch_policy"] = fetch_policy.model_dump(exclude_none=True)
+    # full_content goes through whether True/False/dict — the API treats False
+    # as "do not return full content" (default).
+    if full_content is not False:
+        settings["full_content"] = full_content
+    return settings or None
+
+
+def _format_results_for_llm(results: list[dict[str, Any]]) -> str:
+    """Build a compact, LLM-friendly string from formatted extract results."""
+    if not results:
+        return "No content extracted."
+    blocks: list[str] = []
+    for r in results:
+        url = r.get("url") or ""
+        title = r.get("title") or "(untitled)"
+        if "error_type" in r:
+            blocks.append(f"[ERROR] {title}\n  {url}\n  {r.get('content', '')}")
+            continue
+        body = r.get("content") or ""
+        if len(body) > 800:
+            body = body[:800] + "..."
+        blocks.append(f"## {title}\n{url}\n\n{body}")
+    return "\n\n---\n\n".join(blocks)
+
+
 class ParallelExtractInput(BaseModel):
     """Input schema for Parallel Extract Tool."""
 
-    urls: list[str] = Field(description="List of URLs to extract content from")
-
+    urls: list[str] = Field(
+        description="List of URLs to extract content from. Up to 20 per request.",
+    )
     search_objective: Optional[str] = Field(
         default=None,
         description=(
-            "If provided, focuses extracted content on the specified search objective"
+            "Natural-language objective to focus extraction. Up to 5000 characters."
         ),
     )
-
     search_queries: Optional[list[str]] = Field(
         default=None,
-        description=(
-            "If provided, focuses extracted content on the specified keyword search "
-            "queries"
-        ),
+        description="Keyword queries to focus extracted content.",
     )
-
-    excerpts: Union[bool, ExcerptSettings] = Field(
-        default=True,
+    excerpts: Optional[ExcerptSettings] = Field(
+        default=None,
         description=(
-            "Include excerpts from each URL relevant to the search objective and "
-            "queries. Can be boolean or ExcerptSettings object."
+            "Per-result excerpt-size settings. In v1 GA, excerpts are always "
+            "returned; this field controls only their size. Boolean values "
+            "are accepted via the legacy path with a deprecation warning."
         ),
     )
-
     full_content: Union[bool, FullContentSettings] = Field(
         default=False,
         description=(
-            "Include full content from each URL. Can be boolean or "
-            "FullContentSettings object."
+            "Include full page content in addition to excerpts. "
+            "Use FullContentSettings(max_chars_per_result=...) to cap size."
+        ),
+    )
+    max_chars_total: Optional[int] = Field(
+        default=None,
+        description=(
+            "Upper bound on total characters of excerpts across all results. "
+            "Does not affect full_content."
         ),
     )
-
     fetch_policy: Optional[FetchPolicy] = Field(
+        default=None,
+        description="Policy for cached vs live content fetches.",
+    )
+    client_model: Optional[str] = Field(
         default=None,
         description=(
-            "Fetch policy: determines when to return content from the cache "
-            "(faster) vs fetching live content (fresher)"
+            "Identifier of the calling LLM, used by the API for "
+            "model-specific result optimizations."
+        ),
+    )
+    session_id: Optional[str] = Field(
+        default=None,
+        description=(
+            "Group related Search and Extract calls made by the same agent task "
+            "under a shared session id. The server returns one if not provided."
         ),
     )
-
     timeout: Optional[float] = Field(
         default=None,
         description=(
-            "Request timeout in seconds. If not specified, uses default of "
-            "5 seconds per URL."
+            "Request timeout in seconds. If not specified, uses the SDK default."
         ),
     )
 
@@ -71,8 +139,9 @@ class ParallelExtractInput(BaseModel):
 class ParallelExtractTool(BaseTool):
     """Parallel Extract Tool.
 
-    This tool extracts clean, structured content from web pages using the
-    Parallel Extract API.
+    Calls Parallel's Extract API to pull clean, structured content from web
+    pages. Returns a compact summary string the LLM sees and the full
+    structured response as a tool artifact.
 
     Setup:
         Install `langchain-parallel` and set environment variable
@@ -90,55 +159,52 @@ class ParallelExtractTool(BaseTool):
         base_url: str
             Base URL for Parallel API. Defaults to "https://api.parallel.ai".
         max_chars_per_extract: Optional[int]
-            Maximum characters per extracted result.
+            Tool-wide default cap on full_content size (per URL). Only applied
+            when full_content is passed as ``True`` (a settings object always
+            wins).
 
     Instantiation:
         ```python
         from langchain_parallel import ParallelExtractTool
 
-        # Basic instantiation
         tool = ParallelExtractTool()
-
-        # With custom API key and parameters
-        tool = ParallelExtractTool(
-            api_key="your-api-key",
-            max_chars_per_extract=5000
-        )
         ```
 
     Invocation:
         ```python
-        # Extract content from URLs
-        result = tool.invoke({
-            "urls": [
-                "https://example.com/article1",
-                "https://example.com/article2"
-            ]
+        # Returns (content_str, artifact_list).
+        content, artifact = tool.invoke({
+            "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
+            "search_objective": "Main applications of AI",
+            "full_content": False,
         })
+        for r in artifact:
+            print(r["url"], r.get("title"))
+        ```
 
-        # Result is a list of dicts with url, title, and content
-        for item in result:
-            print(f"Title: {item['title']}")
-            print(f"URL: {item['url']}")
-            print(f"Content: {item['content'][:200]}...")
+    Async:
+        ```python
+        content, artifact = await tool.ainvoke({"urls": [...]})
         ```
 
-    Response Format:
-        Returns a list of dictionaries, each containing:
-        - url: The URL that was extracted
-        - title: Title of the webpage
-        - content: Full extracted content as markdown
-        - publish_date: Publish date if available (optional)
+    Response artifact (list[dict]):
+        Each item carries `url`, `title`, optional `publish_date`, and
+        either `excerpts` (always present in v1) and/or `full_content`.
+        Errors carry `error_type` and `http_status_code`.
     """
 
     name: str = "parallel_extract"
     description: str = (
-        "Extract clean, structured content from web pages. "
-        "Input should be a list of URLs to extract content from. "
-        "Returns extracted content formatted as markdown."
+        "Extract clean, structured content from web pages using Parallel's "
+        "Extract API. Returns a compact summary string plus a list of "
+        "per-URL records as artifact (url, title, excerpts, full_content)."
     )
     args_schema: type[BaseModel] = ParallelExtractInput
 
+    response_format: Literal["content", "content_and_artifact"] = "content_and_artifact"
+    """Tools return ``(content, artifact)``: a compact summary string the
+    LLM sees, and the per-URL records list for downstream code."""
+
     api_key: Optional[SecretStr] = Field(default=None)
     """Parallel API key. If not provided, will be read from env var."""
 
@@ -146,108 +212,58 @@ class ParallelExtractTool(BaseTool):
     """Base URL for Parallel API."""
 
     max_chars_per_extract: Optional[int] = None
-    """Maximum characters per extracted result."""
+    """Tool-wide default cap on full_content size (per URL).
+    Only applied when ``full_content=True`` is passed.
+    """
 
-    _client: Any = None
-    """Synchronous extract client (initialized after validation)."""
+    _client: Optional[Parallel] = None
+    """Synchronous Parallel SDK client (initialized after validation)."""
 
-    _async_client: Any = None
-    """Asynchronous extract client (initialized after validation)."""
+    _async_client: Optional[AsyncParallel] = None
+    """Asynchronous Parallel SDK client (initialized after validation)."""
 
     @model_validator(mode="after")
     def validate_environment(self) -> ParallelExtractTool:
-        """Validate the environment and initialize clients."""
-        # Get API key from parameter or environment
+        """Validate the environment and initialize SDK clients."""
         api_key_str = get_api_key(
-            self.api_key.get_secret_value() if self.api_key else None
+            self.api_key.get_secret_value() if self.api_key else None,
         )
-
-        # Initialize both sync and async clients once
-        self._client = get_extract_client(api_key_str, self.base_url)
-        self._async_client = get_async_extract_client(api_key_str, self.base_url)
-
+        self._client = get_parallel_client(api_key_str, self.base_url)
+        self._async_client = get_async_parallel_client(api_key_str, self.base_url)
         return self
 
-    def _prepare_extract_params(
+    def _format_response(
         self,
-        excerpts: Union[bool, ExcerptSettings],
-        full_content: Union[bool, FullContentSettings],
-        fetch_policy: Optional[FetchPolicy],
-    ) -> tuple[Any, Any, Optional[dict[str, Any]]]:
-        """Prepare parameters for extract API call.
-
-        Args:
-            excerpts: Include excerpts (boolean or ExcerptSettings)
-            full_content: Include full content (boolean or FullContentSettings)
-            fetch_policy: Optional fetch policy for cache vs live content
-
-        Returns:
-            Tuple of (excerpts_param, full_content_param, fetch_policy_param)
-        """
-        # Build full_content config
-        full_content_param = full_content
-        if self.max_chars_per_extract and isinstance(full_content, bool):
-            # Use tool-level config if full_content is just a boolean
-            full_content_param = {"max_chars_per_result": self.max_chars_per_extract}
-        elif isinstance(full_content, FullContentSettings):
-            full_content_param = full_content.model_dump(exclude_none=True)
-
-        # Build excerpts config
-        excerpts_param = excerpts
-        if isinstance(excerpts, ExcerptSettings):
-            excerpts_param = excerpts.model_dump(exclude_none=True)
-
-        # Build fetch_policy config
-        fetch_policy_param = None
-        if fetch_policy:
-            fetch_policy_param = fetch_policy.model_dump(exclude_none=True)
-
-        return excerpts_param, full_content_param, fetch_policy_param
-
-    def _format_extract_response(
-        self, extract_response: dict[str, Any]
+        extract_response: dict[str, Any],
     ) -> list[dict[str, Any]]:
-        """Format the extract API response.
+        """Format the extract API response into a per-URL list.
 
-        Args:
-            extract_response: Raw response from the extract API
-
-        Returns:
-            List of formatted result dictionaries
+        Mirrors the v0.2 shape so existing consumers continue to work:
+        - "content" stays populated (full_content if present, else joined excerpts)
+        - error rows carry "error_type" and "http_status_code"
         """
-        results = extract_response.get("results", [])
-        errors = extract_response.get("errors", [])
+        results = extract_response.get("results") or []
+        errors = extract_response.get("errors") or []
 
-        # Format results
-        formatted_results = []
+        formatted: list[dict[str, Any]] = []
         for result in results:
-            formatted_result = {
+            entry: dict[str, Any] = {
                 "url": result.get("url"),
                 "title": result.get("title"),
             }
-
-            # Add excerpts if present
-            if "excerpts" in result and result["excerpts"] is not None:
-                formatted_result["excerpts"] = result["excerpts"]
-                # Combine excerpts into content field for backward compatibility
-                # Excerpts are a list of strings, join them with newlines
-                formatted_result["content"] = "\n\n".join(result["excerpts"])
-
-            # Add full_content if present and not None
-            # (overrides excerpts-based content)
-            if "full_content" in result and result["full_content"] is not None:
-                formatted_result["full_content"] = result["full_content"]
-                # For backward compatibility, also set as "content"
-                formatted_result["content"] = result["full_content"]
-
-            # Add optional fields if present
+            excerpts = result.get("excerpts")
+            full_content = result.get("full_content")
+            if excerpts is not None:
+                entry["excerpts"] = excerpts
+                entry["content"] = "\n\n".join(excerpts)
+            if full_content is not None:
+                entry["full_content"] = full_content
+                entry["content"] = full_content
             if "publish_date" in result:
-                formatted_result["publish_date"] = result["publish_date"]
+                entry["publish_date"] = result["publish_date"]
+            formatted.append(entry)
 
-            formatted_results.append(formatted_result)
-
-        # If there were errors, add them to the results with error info
-        formatted_results.extend(
+        formatted.extend(
             [
                 {
                     "url": error.get("url"),
@@ -257,180 +273,187 @@ def _format_extract_response(
                     "http_status_code": error.get("http_status_code"),
                 }
                 for error in errors
-            ]
+            ],
+        )
+        return formatted
+
+    def _build_call_kwargs(
+        self,
+        *,
+        urls: list[str],
+        search_objective: Optional[str],
+        search_queries: Optional[list[str]],
+        excerpts: Optional[ExcerptSettings],
+        full_content: Union[bool, FullContentSettings, dict[str, Any]],
+        fetch_policy: Optional[FetchPolicy],
+        max_chars_total: Optional[int],
+        client_model: Optional[str],
+        session_id: Optional[str],
+        timeout: Optional[float],
+    ) -> dict[str, Any]:
+        """Resolve params into the GA `client.extract(...)` shape."""
+        if not urls:
+            msg = "At least one URL must be provided."
+            raise ValueError(msg)
+
+        full_content_resolved = _coerce_full_content(
+            full_content,
+            tool_max_chars=self.max_chars_per_extract,
+        )
+        advanced_settings = _build_advanced_settings(
+            excerpts=excerpts,
+            full_content=full_content_resolved,
+            fetch_policy=fetch_policy,
         )
 
-        return formatted_results
+        kwargs: dict[str, Any] = {"urls": list(urls)}
+        if search_objective is not None:
+            kwargs["objective"] = search_objective
+        if search_queries is not None:
+            kwargs["search_queries"] = list(search_queries)
+        if max_chars_total is not None:
+            kwargs["max_chars_total"] = max_chars_total
+        if client_model is not None:
+            kwargs["client_model"] = client_model
+        if session_id is not None:
+            kwargs["session_id"] = session_id
+        if advanced_settings is not None:
+            kwargs["advanced_settings"] = advanced_settings
+        if timeout is not None:
+            kwargs["timeout"] = timeout
+        return kwargs
 
     def _run(
         self,
         urls: list[str],
         search_objective: Optional[str] = None,
         search_queries: Optional[list[str]] = None,
-        excerpts: Union[bool, ExcerptSettings] = True,
+        excerpts: Optional[ExcerptSettings] = None,
         full_content: Union[bool, FullContentSettings] = False,
+        max_chars_total: Optional[int] = None,
         fetch_policy: Optional[FetchPolicy] = None,
+        client_model: Optional[str] = None,
+        session_id: Optional[str] = None,
         timeout: Optional[float] = None,
         run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> list[dict[str, Any]]:
-        """Extract content from URLs.
-
-        Args:
-            urls: List of URLs to extract content from
-            search_objective: Optional search objective to focus extraction
-            search_queries: Optional keyword search queries to focus extraction
-            excerpts: Include excerpts (boolean or ExcerptSettings)
-            full_content: Include full content (boolean or FullContentSettings)
-            fetch_policy: Optional fetch policy for cache vs live content
-            timeout: Request timeout in seconds (defaults to 5 seconds per URL)
-            run_manager: Callback manager for the tool run
-
-        Returns:
-            List of dictionaries with extracted content
-        """
-        # Notify callback manager about extraction start
+    ) -> tuple[str, list[dict[str, Any]]]:
+        """Extract content from URLs."""
+        if self._client is None:
+            msg = "Parallel client not initialized."
+            raise RuntimeError(msg)
+
         if run_manager:
-            url_count = len(urls)
-            url_desc = f"{url_count} URL{'s' if url_count != 1 else ''}"
+            count = len(urls)
             run_manager.on_text(
-                f"Starting content extraction from {url_desc}\n", color="blue"
-            )
-
-        try:
-            # Prepare parameters for the extract API call
-            excerpts_param, full_content_param, fetch_policy_param = (
-                self._prepare_extract_params(excerpts, full_content, fetch_policy)
-            )
-
-            # Notify about extraction execution
-            if run_manager:
-                run_manager.on_text("Executing extraction...\n", color="yellow")
-
-            # Extract content from URLs using the pre-initialized client
-            extract_response = self._client.extract(
-                urls=urls,
-                objective=search_objective,
-                search_queries=search_queries,
-                excerpts=excerpts_param,
-                full_content=full_content_param,
-                fetch_policy=fetch_policy_param,
-                timeout=timeout,
+                f"Starting content extraction from {count} URL"
+                f"{'' if count == 1 else 's'}\n",
+                color="blue",
             )
 
-            # Format and return the response
-            result = self._format_extract_response(extract_response)
-
-            # Notify callback manager about completion
-            if run_manager:
-                success_count = sum(1 for item in result if "error_type" not in item)
-                error_count = len(result) - success_count
-                if error_count > 0:
-                    run_manager.on_text(
-                        f"Extraction completed: {success_count} succeeded, "
-                        f"{error_count} failed\n",
-                        color="green",
-                    )
-                else:
-                    url_text = "URL" if success_count == 1 else "URLs"
-                    run_manager.on_text(
-                        f"Extraction completed: {success_count} {url_text} processed\n",
-                        color="green",
-                    )
-
-            return result
+        kwargs = self._build_call_kwargs(
+            urls=urls,
+            search_objective=search_objective,
+            search_queries=search_queries,
+            excerpts=excerpts,
+            full_content=full_content,
+            fetch_policy=fetch_policy,
+            max_chars_total=max_chars_total,
+            client_model=client_model,
+            session_id=session_id,
+            timeout=timeout,
+        )
 
+        try:
+            response_obj = self._client.extract(**kwargs)
         except Exception as e:
-            # Notify callback manager about error
             if run_manager:
                 run_manager.on_text(f"Extraction failed: {e!s}\n", color="red")
             msg = f"Error calling Parallel Extract API: {e!s}"
             raise ValueError(msg) from e
 
+        formatted = self._format_response(response_obj.model_dump())
+
+        if run_manager:
+            success_count = sum(1 for item in formatted if "error_type" not in item)
+            error_count = len(formatted) - success_count
+            run_manager.on_text(
+                (
+                    f"Extraction completed: {success_count} succeeded, "
+                    f"{error_count} failed\n"
+                    if error_count
+                    else f"Extraction completed: {success_count} URL"
+                    f"{'' if success_count == 1 else 's'} processed\n"
+                ),
+                color="green",
+            )
+
+        return _format_results_for_llm(formatted), formatted
+
     async def _arun(
         self,
         urls: list[str],
         search_objective: Optional[str] = None,
         search_queries: Optional[list[str]] = None,
-        excerpts: Union[bool, ExcerptSettings] = True,
+        excerpts: Optional[ExcerptSettings] = None,
         full_content: Union[bool, FullContentSettings] = False,
+        max_chars_total: Optional[int] = None,
         fetch_policy: Optional[FetchPolicy] = None,
+        client_model: Optional[str] = None,
+        session_id: Optional[str] = None,
         timeout: Optional[float] = None,
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> list[dict[str, Any]]:
-        """Extract content from URLs asynchronously.
-
-        Args:
-            urls: List of URLs to extract content from
-            search_objective: Optional search objective to focus extraction
-            search_queries: Optional keyword search queries to focus extraction
-            excerpts: Include excerpts (boolean or ExcerptSettings)
-            full_content: Include full content (boolean or FullContentSettings)
-            fetch_policy: Optional fetch policy for cache vs live content
-            timeout: Request timeout in seconds (defaults to 5 seconds per URL)
-            run_manager: Async callback manager for the tool run
-
-        Returns:
-            List of dictionaries with extracted content
-        """
-        # Notify callback manager about extraction start
+    ) -> tuple[str, list[dict[str, Any]]]:
+        """Async extract content from URLs."""
+        if self._async_client is None:
+            msg = "Async Parallel client not initialized."
+            raise RuntimeError(msg)
+
         if run_manager:
-            url_count = len(urls)
-            url_desc = f"{url_count} URL{'s' if url_count != 1 else ''}"
+            count = len(urls)
             await run_manager.on_text(
-                f"Starting async content extraction from {url_desc}\n", color="blue"
-            )
-
-        try:
-            # Prepare parameters for the extract API call
-            excerpts_param, full_content_param, fetch_policy_param = (
-                self._prepare_extract_params(excerpts, full_content, fetch_policy)
-            )
-
-            # Notify about extraction execution
-            if run_manager:
-                await run_manager.on_text(
-                    "Executing async extraction...\n", color="yellow"
-                )
-
-            # Extract content from URLs using the pre-initialized async client
-            extract_response = await self._async_client.extract(
-                urls=urls,
-                objective=search_objective,
-                search_queries=search_queries,
-                excerpts=excerpts_param,
-                full_content=full_content_param,
-                fetch_policy=fetch_policy_param,
-                timeout=timeout,
+                f"Starting async content extraction from {count} URL"
+                f"{'' if count == 1 else 's'}\n",
+                color="blue",
             )
 
-            # Format and return the response
-            result = self._format_extract_response(extract_response)
-
-            # Notify callback manager about completion
-            if run_manager:
-                success_count = sum(1 for item in result if "error_type" not in item)
-                error_count = len(result) - success_count
-                if error_count > 0:
-                    await run_manager.on_text(
-                        f"Async extraction completed: {success_count} succeeded, "
-                        f"{error_count} failed\n",
-                        color="green",
-                    )
-                else:
-                    url_text = "URL" if success_count == 1 else "URLs"
-                    await run_manager.on_text(
-                        f"Async extraction completed: {success_count} {url_text} "
-                        f"processed\n",
-                        color="green",
-                    )
-
-            return result
+        kwargs = self._build_call_kwargs(
+            urls=urls,
+            search_objective=search_objective,
+            search_queries=search_queries,
+            excerpts=excerpts,
+            full_content=full_content,
+            fetch_policy=fetch_policy,
+            max_chars_total=max_chars_total,
+            client_model=client_model,
+            session_id=session_id,
+            timeout=timeout,
+        )
 
+        try:
+            response_obj = await self._async_client.extract(**kwargs)
         except Exception as e:
-            # Notify callback manager about error
             if run_manager:
                 await run_manager.on_text(
-                    f"Async extraction failed: {e!s}\n", color="red"
+                    f"Async extraction failed: {e!s}\n",
+                    color="red",
                 )
             msg = f"Error calling Parallel Extract API: {e!s}"
             raise ValueError(msg) from e
+
+        formatted = self._format_response(response_obj.model_dump())
+
+        if run_manager:
+            success_count = sum(1 for item in formatted if "error_type" not in item)
+            error_count = len(formatted) - success_count
+            await run_manager.on_text(
+                (
+                    f"Async extraction completed: {success_count} succeeded, "
+                    f"{error_count} failed\n"
+                    if error_count
+                    else f"Async extraction completed: {success_count} URL"
+                    f"{'' if success_count == 1 else 's'} processed\n"
+                ),
+                color="green",
+            )
+
+        return _format_results_for_llm(formatted), formatted
diff --git a/langchain_parallel/search_tool.py b/langchain_parallel/search_tool.py
index 1c78d11..65b7b96 100644
--- a/langchain_parallel/search_tool.py
+++ b/langchain_parallel/search_tool.py
@@ -2,18 +2,95 @@
 
 from __future__ import annotations
 
+import warnings
 from datetime import datetime
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union
 
 from langchain_core.callbacks import (
     AsyncCallbackManagerForToolRun,
     CallbackManagerForToolRun,
 )
 from langchain_core.tools import BaseTool
+from parallel import AsyncParallel, Parallel
 from pydantic import BaseModel, Field, SecretStr, model_validator
 
-from ._client import get_api_key, get_async_search_client, get_search_client
-from ._types import ExcerptSettings, FetchPolicy
+from ._client import get_api_key, get_async_parallel_client, get_parallel_client
+from ._types import ExcerptSettings, FetchPolicy, SourcePolicy
+
+_LEGACY_MODE_MAP: dict[str, str] = {
+    "fast": "basic",
+    "one-shot": "basic",
+    "agentic": "advanced",
+}
+
+
+def _normalize_mode(mode: Optional[str]) -> Optional[str]:
+    """Translate legacy beta mode strings to the GA `basic` / `advanced` set."""
+    if mode is None or mode in {"basic", "advanced"}:
+        return mode
+    if mode in _LEGACY_MODE_MAP:
+        new_mode = _LEGACY_MODE_MAP[mode]
+        warnings.warn(
+            f"mode='{mode}' is a legacy beta value; mapping to '{new_mode}'. "
+            f"Pass mode='{new_mode}' directly to silence this warning.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        return new_mode
+    msg = (
+        f"Invalid mode '{mode}'. Expected one of: 'basic', 'advanced'. "
+        f"(Legacy values 'fast', 'one-shot', 'agentic' are accepted with a warning.)"
+    )
+    raise ValueError(msg)
+
+
+def _coerce_source_policy(
+    source_policy: Optional[Union[SourcePolicy, dict[str, Any]]],
+) -> Optional[dict[str, Any]]:
+    """Accept a SourcePolicy model or a raw dict, return a dict for the SDK."""
+    if source_policy is None:
+        return None
+    if isinstance(source_policy, SourcePolicy):
+        return source_policy.model_dump(exclude_none=True)
+    return {k: v for k, v in source_policy.items() if v is not None}
+
+
+def _build_advanced_settings(
+    *,
+    excerpts: Optional[ExcerptSettings],
+    fetch_policy: Optional[FetchPolicy],
+    source_policy: Optional[Union[SourcePolicy, dict[str, Any]]],
+    max_results: Optional[int],
+    location: Optional[str],
+) -> Optional[dict[str, Any]]:
+    """Pack the user-facing flat fields into the GA `advanced_settings` envelope."""
+    settings: dict[str, Any] = {}
+    if excerpts is not None:
+        settings["excerpt_settings"] = excerpts.model_dump(exclude_none=True)
+    if fetch_policy is not None:
+        settings["fetch_policy"] = fetch_policy.model_dump(exclude_none=True)
+    sp = _coerce_source_policy(source_policy)
+    if sp:
+        settings["source_policy"] = sp
+    if max_results is not None:
+        settings["max_results"] = max_results
+    if location is not None:
+        settings["location"] = location
+    return settings or None
+
+
+def _format_results_for_llm(response: dict[str, Any]) -> str:
+    """Build a compact, LLM-friendly string from the raw search response."""
+    results = response.get("results") or []
+    if not results:
+        return "No results."
+    lines: list[str] = []
+    for i, result in enumerate(results, 1):
+        title = result.get("title") or "(untitled)"
+        url = result.get("url") or ""
+        lines.append(f"{i}. {title}\n   {url}")
+        lines.extend(f"   - {excerpt}" for excerpt in result.get("excerpts") or [])
+    return "\n".join(lines)
 
 
 class ParallelWebSearchInput(BaseModel):
@@ -21,67 +98,106 @@ class ParallelWebSearchInput(BaseModel):
 
     objective: Optional[str] = Field(
         default=None,
-        description="Natural-language description of what the web research goal is. "
-        "Include any source or freshness guidance. Either this or search_queries "
-        "must be provided.",
+        description=(
+            "Natural-language description of the research goal. Up to 5000 "
+            "characters. Include any source or freshness guidance. Recommended "
+            "alongside `search_queries` for best results."
+        ),
     )
     search_queries: Optional[list[str]] = Field(
         default=None,
-        description="Optional list of search queries to guide the search. "
-        "Maximum 5 queries, each up to 200 characters. Either this or objective "
-        "must be provided.",
+        description=(
+            "List of keyword search queries to guide the search. Maximum 5 "
+            "queries, each up to 200 characters (3-6 words works best). "
+            "Required for the GA endpoint; if only `objective` is provided, "
+            "the call falls back to the deprecated v1beta endpoint."
+        ),
     )
     max_results: int = Field(
-        default=10, description="Maximum number of search results to return (1 to 40)."
+        default=10,
+        description="Maximum number of search results to return (1 to 40).",
     )
     excerpts: Optional[ExcerptSettings] = Field(
         default=None,
         description=(
-            "Optional excerpt settings for controlling excerpt length. "
-            "Example: ExcerptSettings(max_chars_per_result=1500)"
+            "Per-result excerpt-size settings. "
+            "Example: ExcerptSettings(max_chars_per_result=1500)."
+        ),
+    )
+    max_chars_total: Optional[int] = Field(
+        default=None,
+        description=(
+            "Upper bound on the total characters of excerpts across all results. "
+            "Useful for capping context size when feeding results to an LLM."
         ),
     )
     mode: Optional[str] = Field(
         default=None,
         description=(
-            "Search mode: 'one-shot' for comprehensive results with longer "
-            "excerpts, 'agentic' for concise, token-efficient results. "
-            "Defaults to 'one-shot'."
+            "Search mode: 'basic' for low-latency searches, 'advanced' (default) "
+            "for higher quality with more retrieval and compression. Legacy "
+            "values 'fast', 'one-shot' (-> 'basic') and 'agentic' (-> 'advanced') "
+            "are accepted with a deprecation warning."
         ),
     )
-    source_policy: Optional[dict[str, Union[str, list[str]]]] = Field(
+    source_policy: Optional[Union[SourcePolicy, dict[str, Any]]] = Field(
         default=None,
         description=(
-            "Optional source policy with 'include_domains' and/or "
-            "'exclude_domains' lists. Example: "
-            "{'include_domains': ['wikipedia.org'], 'exclude_domains': ['reddit.com']}"
+            "Domain include/exclude lists and a freshness floor (after_date). "
+            "Example: SourcePolicy(include_domains=['nature.com'], "
+            "after_date='2024-01-01'). A raw dict is also accepted."
         ),
     )
     fetch_policy: Optional[FetchPolicy] = Field(
         default=None,
         description=(
-            "Optional fetch policy to control when to return cached vs live "
-            "content. Example: FetchPolicy(max_age_seconds=86400, timeout_seconds=60)"
+            "Cache vs live-fetch policy. "
+            "Example: FetchPolicy(max_age_seconds=86400, timeout_seconds=60)."
+        ),
+    )
+    location: Optional[str] = Field(
+        default=None,
+        description=(
+            "ISO 3166-1 alpha-2 country code (e.g., 'us', 'gb', 'de', 'jp') "
+            "to geo-target results. Unsupported values are ignored with a "
+            "warning by the API."
+        ),
+    )
+    client_model: Optional[str] = Field(
+        default=None,
+        description=(
+            "Identifier of the calling LLM, used by the API for model-specific "
+            "result optimizations."
+        ),
+    )
+    session_id: Optional[str] = Field(
+        default=None,
+        description=(
+            "Group related Search and Extract calls made by the same agent task "
+            "under a shared session id. The server returns one if not provided."
         ),
     )
     include_metadata: bool = Field(
         default=True,
-        description="Whether to include metadata in the response "
-        "(search timing, result counts, etc.).",
+        description=(
+            "Whether to attach client-side timing/result metadata to the artifact."
+        ),
     )
     timeout: Optional[int] = Field(
         default=None,
-        description="Request timeout in seconds. If not specified, uses default timeout.",  # noqa: E501
+        description=(
+            "Request timeout in seconds. If not specified, uses the SDK default."
+        ),
     )
 
 
 class ParallelWebSearchTool(BaseTool):
     """Parallel Search tool with web research capabilities.
 
-    This tool provides access to Parallel's Search API, which streamlines
-    the traditional search → scrape → extract pipeline into a single API call.
-    Features include domain filtering, multiple processors, async support,
-    and metadata collection.
+    This tool calls Parallel's Search API, which streamlines the traditional
+    search -> scrape -> extract pipeline into a single API call. It supports
+    natural-language objectives, keyword queries, domain filters, two modes
+    (`basic`, `advanced`), location targeting, and async usage.
 
     Setup:
         Install `langchain-parallel` and set environment variable
@@ -103,136 +219,83 @@ class ParallelWebSearchTool(BaseTool):
         ```python
         from langchain_parallel import ParallelWebSearchTool
 
-        # Basic instantiation
         tool = ParallelWebSearchTool()
-
-        # With custom API key
-        tool = ParallelWebSearchTool(api_key="your-api-key")
         ```
 
-    Basic Usage:
+    Invocation:
         ```python
-        # Simple objective-based search
-        result = tool.invoke({
-            "objective": "What are the latest developments in AI?"
-        })
-
-        # Query-based search with multiple queries
-        result = tool.invoke({
-            "search_queries": [
-                "latest AI developments 2024",
-                "machine learning breakthroughs",
-                "artificial intelligence news"
-            ],
-            "max_results": 10
+        # Returns (content_str, artifact_dict). The string is what the agent
+        # sees in a ToolMessage; the dict is the full Parallel response.
+        content, artifact = tool.invoke({
+            "objective": "Latest developments in AI agents",
+            "search_queries": ["AI agents 2026", "autonomous LLM systems"],
+            "mode": "advanced",
+            "max_results": 5,
         })
+        print(content)
+        print(artifact["search_id"], len(artifact["results"]))
         ```
 
-    Domain filtering and advanced options:
+    Domain and freshness filters:
         ```python
-        # Domain filtering with fetch policy (using dict format)
-        result = tool.invoke({
-            "objective": "Recent climate change research",
-            "source_policy": {
-                "include_domains": ["nature.com", "science.org"],
-                "exclude_domains": ["reddit.com", "twitter.com"]
-            },
-            "max_results": 15,
-            "excerpts": {"max_chars_per_result": 2000},  # Auto-converted
-            "mode": "one-shot",  # Use 'agentic' for token-efficient results
-            "fetch_policy": {  # Auto-converted to FetchPolicy
-                "max_age_seconds": 86400,  # 1 day cache
-                "timeout_seconds": 60
-            },
-            "include_metadata": True
-        })
-
-        # Or use the types directly
-        from langchain_parallel import ExcerptSettings, FetchPolicy
+        from langchain_parallel import SourcePolicy
 
         result = tool.invoke({
-            "objective": "Recent climate change research",
-            "excerpts": ExcerptSettings(max_chars_per_result=2000),
-            "fetch_policy": FetchPolicy(max_age_seconds=86400, timeout_seconds=60),
+            "search_queries": ["climate research breakthroughs"],
+            "source_policy": SourcePolicy(
+                include_domains=["nature.com", "science.org"],
+                after_date="2025-01-01",
+            ),
+            "location": "us",
         })
         ```
 
-    Async Usage:
+    Async:
         ```python
-        import asyncio
-
-        async def search_async():
-            result = await tool.ainvoke({
-                "objective": "Latest tech news"
-            })
-            return result
-
-        result = asyncio.run(search_async())
+        content, artifact = await tool.ainvoke({"search_queries": ["..."]})
         ```
 
-    Response Format:
+    Response artifact:
         ```python
         {
-            "search_id": "search_abc123...",
+            "search_id": "search_abc123",
+            "session_id": "sess_...",
             "results": [
-                {
-                    "url": "https://example.com/article",
-                    "title": "Article Title",
-                    "excerpts": [
-                        "Relevant excerpt from the page...",
-                        "Another important section..."
-                    ]
-                }
+                {"url": "...", "title": "...", "publish_date": "...",
+                 "excerpts": ["..."]},
+                ...
             ],
-            "search_metadata": {
+            "warnings": [...],
+            "usage": {...},
+            "search_metadata": {  # added by this tool when include_metadata=True
                 "search_duration_seconds": 2.451,
-                "search_timestamp": "2024-01-15T10:30:00",
-                "max_results_requested": 10,
-                "actual_results_returned": 8,
-                "search_id": "search_abc123...",
-                "query_count": 3,
-                "queries_used": ["query1", "query2", "query3"],
-                "source_policy_applied": true,
-                "included_domains": ["nature.com"],
-                "excluded_domains": ["reddit.com"]
+                "search_timestamp": "2026-04-27T10:30:00",
+                "endpoint": "v1",
+                "actual_results_returned": 5,
             }
         }
         ```
 
-    Tool Calling Integration:
-        ```python
-        # When used with LangChain agents or chat models with tool calling
-        from langchain_core.messages import HumanMessage
-        from langchain_parallel import ChatParallelWeb
-
-        chat = ChatParallelWeb()
-        chat_with_tools = chat.bind_tools([tool])
-
-        response = chat_with_tools.invoke([
-            HumanMessage(content="Search for the latest AI research papers")
-        ])
-        ```
-
-    Best Practices:
-        - Use specific objectives for better results
-        - Apply domain filtering for focused searches
-        - Include metadata for debugging and optimization
     """
 
     name: str = "parallel_web_search"
-    """The name that is passed to the model when performing tool calling."""
+    """The name passed to the model when performing tool calling."""
 
     description: str = (
         "Search the web using Parallel's Search API. "
         "Provides real-time web information with compressed, structured excerpts "
-        "optimized for LLM consumption. Supports domain filtering "
-        "and metadata. Specify either an objective "
-        "(natural language goal) or specific search queries for targeted results."
+        "optimized for LLM consumption. Supports natural-language objectives, "
+        "keyword queries, domain filtering, and geo-targeting. Returns a "
+        "compact summary string plus the full structured response as artifact."
     )
-    """The description that is passed to the model when performing tool calling."""
+    """The description passed to the model when performing tool calling."""
 
     args_schema: type[BaseModel] = ParallelWebSearchInput
-    """The schema that is passed to the model when performing tool calling."""
+    """The schema passed to the model when performing tool calling."""
+
+    response_format: Literal["content", "content_and_artifact"] = "content_and_artifact"
+    """Tools return ``(content, artifact)``: a compact summary string the
+    LLM sees, and the full Parallel API response dict for downstream code."""
 
     api_key: Optional[SecretStr] = Field(default=None)
     """Parallel API key. If not provided, will be read from
@@ -241,62 +304,120 @@ async def search_async():
     base_url: str = Field(default="https://api.parallel.ai")
     """Base URL for Parallel API."""
 
-    _client: Any = None
-    """Synchronous search client (initialized after validation)."""
+    _client: Optional[Parallel] = None
+    """Synchronous Parallel SDK client (initialized after validation)."""
 
-    _async_client: Any = None
-    """Asynchronous search client (initialized after validation)."""
+    _async_client: Optional[AsyncParallel] = None
+    """Asynchronous Parallel SDK client (initialized after validation)."""
 
     @model_validator(mode="after")
     def validate_environment(self) -> ParallelWebSearchTool:
-        """Validate the environment and initialize clients."""
-        # Get API key from parameter or environment
+        """Validate the environment and initialize SDK clients."""
         api_key_str = get_api_key(
-            self.api_key.get_secret_value() if self.api_key else None
+            self.api_key.get_secret_value() if self.api_key else None,
         )
-
-        # Initialize both sync and async clients once
-        self._client = get_search_client(api_key_str, self.base_url)
-        self._async_client = get_async_search_client(api_key_str, self.base_url)
-
+        self._client = get_parallel_client(api_key_str, self.base_url)
+        self._async_client = get_async_parallel_client(api_key_str, self.base_url)
         return self
 
-    def _create_response_metadata(
+    def _build_metadata(
         self,
+        *,
         start_time: datetime,
-        search_params: dict[str, Any],
+        endpoint: str,
         response: dict[str, Any],
-        *,
-        include_metadata: bool,
     ) -> dict[str, Any]:
-        """Create response metadata."""
-        if not include_metadata:
-            return {}
-
+        """Build client-side timing/result metadata."""
         end_time = datetime.now()
-        duration = (end_time - start_time).total_seconds()
-
-        metadata = {
-            "search_duration_seconds": round(duration, 3),
+        return {
+            "search_duration_seconds": round(
+                (end_time - start_time).total_seconds(),
+                3,
+            ),
             "search_timestamp": start_time.isoformat(),
-            "max_results_requested": search_params.get("max_results", 10),
-            "actual_results_returned": len(response.get("results", [])),
-            "search_id": response.get("search_id"),
+            "endpoint": endpoint,
+            "actual_results_returned": len(response.get("results") or []),
         }
 
-        if search_params.get("search_queries"):
-            metadata["query_count"] = len(search_params["search_queries"])
-            metadata["queries_used"] = search_params["search_queries"]
-
-        if search_params.get("source_policy"):
-            metadata["source_policy_applied"] = True
-            policy = search_params["source_policy"]
-            if "include_domains" in policy:
-                metadata["included_domains"] = policy["include_domains"]
-            if "exclude_domains" in policy:
-                metadata["excluded_domains"] = policy["exclude_domains"]
+    def _build_call_kwargs(
+        self,
+        *,
+        objective: Optional[str],
+        search_queries: Optional[list[str]],
+        mode: Optional[str],
+        max_chars_total: Optional[int],
+        client_model: Optional[str],
+        session_id: Optional[str],
+        excerpts: Optional[ExcerptSettings],
+        fetch_policy: Optional[FetchPolicy],
+        source_policy: Optional[Union[SourcePolicy, dict[str, Any]]],
+        max_results: int,
+        location: Optional[str],
+        timeout: Optional[int],
+    ) -> tuple[str, dict[str, Any]]:
+        """Resolve params + endpoint (v1 GA vs v1beta fallback)."""
+        if not objective and not search_queries:
+            msg = "Either 'objective' or 'search_queries' must be provided."
+            raise ValueError(msg)
+
+        normalized_mode = _normalize_mode(mode)
+        advanced_settings = _build_advanced_settings(
+            excerpts=excerpts,
+            fetch_policy=fetch_policy,
+            source_policy=source_policy,
+            max_results=max_results,
+            location=location,
+        )
 
-        return metadata
+        if not search_queries:
+            warnings.warn(
+                "Calling Parallel Search without 'search_queries' falls back "
+                "to the deprecated v1beta endpoint. Provide search_queries "
+                "(1-5 keyword strings) to use the GA endpoint.",
+                DeprecationWarning,
+                stacklevel=4,
+            )
+            kwargs: dict[str, Any] = {
+                "objective": objective,
+                "max_results": max_results,
+            }
+            if excerpts is not None:
+                kwargs["excerpts"] = excerpts.model_dump(exclude_none=True)
+            if fetch_policy is not None:
+                kwargs["fetch_policy"] = fetch_policy.model_dump(exclude_none=True)
+            sp = _coerce_source_policy(source_policy)
+            if sp:
+                kwargs["source_policy"] = sp
+            if normalized_mode is not None:
+                kwargs["mode"] = (
+                    "agentic" if normalized_mode == "advanced" else "one-shot"
+                )
+            if client_model is not None:
+                kwargs["client_model"] = client_model
+            if session_id is not None:
+                kwargs["session_id"] = session_id
+            if location is not None:
+                kwargs["location"] = location
+            if timeout is not None:
+                kwargs["timeout"] = timeout
+            return "v1beta", kwargs
+
+        kwargs = {"search_queries": list(search_queries)}
+        if objective is not None:
+            kwargs["objective"] = objective
+        if normalized_mode is not None:
+            kwargs["mode"] = normalized_mode
+        if max_chars_total is not None:
+            kwargs["max_chars_total"] = max_chars_total
+        if client_model is not None:
+            kwargs["client_model"] = client_model
+        if session_id is not None:
+            kwargs["session_id"] = session_id
+        if advanced_settings is not None:
+            kwargs["advanced_settings"] = advanced_settings
+        if timeout is not None:
+            kwargs["timeout"] = timeout
+        return "v1", kwargs
 
     def _run(
         self,
@@ -304,193 +425,152 @@ def _run(
         search_queries: Optional[list[str]] = None,
         max_results: int = 10,
         excerpts: Optional[ExcerptSettings] = None,
+        max_chars_total: Optional[int] = None,
         mode: Optional[str] = None,
-        source_policy: Optional[dict[str, Union[str, list[str]]]] = None,
+        source_policy: Optional[Union[SourcePolicy, dict[str, Any]]] = None,
         fetch_policy: Optional[FetchPolicy] = None,
+        location: Optional[str] = None,
+        client_model: Optional[str] = None,
+        session_id: Optional[str] = None,
         *,
         include_metadata: bool = True,
         timeout: Optional[int] = None,
         run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> dict[str, Any]:
-        """Execute the search using Parallel's Search API.
-
-        Args:
-            objective: Natural-language description of the research goal
-            search_queries: List of specific search queries
-            max_results: Maximum number of results (1-40)
-            excerpts: Optional ExcerptSettings for controlling excerpt length
-            mode: Search mode ('one-shot' or 'agentic')
-            source_policy: Optional source policy for domain filtering
-            fetch_policy: Optional FetchPolicy for cache vs live content
-            include_metadata: Whether to include metadata
-            timeout: Request timeout in seconds
-            run_manager: Callback manager for the tool run
-
-        Returns:
-            Dictionary containing search results with metadata
-        """
-        start_time = datetime.now()
+    ) -> tuple[str, dict[str, Any]]:
+        """Execute the search using Parallel's Search API."""
+        if self._client is None:
+            msg = "Parallel client not initialized."
+            raise RuntimeError(msg)
 
-        # Notify callback manager about search start
+        start_time = datetime.now()
         if run_manager:
             query_desc = objective or f"{len(search_queries or [])} search queries"
             run_manager.on_text(f"Starting web search: {query_desc}\n", color="blue")
 
-        # Convert ExcerptSettings and FetchPolicy to dict if provided
-        excerpts_dict = excerpts.model_dump(exclude_none=True) if excerpts else None
-        fetch_policy_dict = (
-            fetch_policy.model_dump(exclude_none=True) if fetch_policy else None
+        endpoint, kwargs = self._build_call_kwargs(
+            objective=objective,
+            search_queries=search_queries,
+            mode=mode,
+            max_chars_total=max_chars_total,
+            client_model=client_model,
+            session_id=session_id,
+            excerpts=excerpts,
+            fetch_policy=fetch_policy,
+            source_policy=source_policy,
+            max_results=max_results,
+            location=location,
+            timeout=timeout,
         )
 
-        search_params = {
-            "objective": objective,
-            "search_queries": search_queries,
-            "max_results": max_results,
-            "excerpts": excerpts_dict,
-            "mode": mode,
-            "source_policy": source_policy,
-            "fetch_policy": fetch_policy_dict,
-        }
-
         try:
-            # Notify about search execution
-            if run_manager:
-                run_manager.on_text("Executing search...\n", color="yellow")
-
-            # Perform search using pre-initialized client
-            response = self._client.search(
-                objective=objective,
-                search_queries=search_queries,
-                max_results=max_results,
-                excerpts=excerpts_dict,
-                mode=mode,
-                source_policy=source_policy,
-                fetch_policy=fetch_policy_dict,
-                timeout=timeout,
-            )
-
-            # Create metadata
-            metadata = self._create_response_metadata(
-                start_time, search_params, response, include_metadata=include_metadata
+            response_obj: Any = (
+                self._client.search(**kwargs)
+                if endpoint == "v1"
+                else self._client.beta.search(**kwargs)
             )
-            if metadata:
-                response["search_metadata"] = metadata
-
-            # Notify callback manager about completion
-            if run_manager:
-                result_count = len(response.get("results", []))
-                duration = metadata.get("search_duration_seconds", 0) if metadata else 0
-                run_manager.on_text(
-                    f"Search completed: {result_count} results in {duration}s\n",
-                    color="green",
-                )
-
-            return response
-
         except Exception as e:
-            # Notify callback manager about error
             if run_manager:
                 run_manager.on_text(f"Search failed: {e!s}\n", color="red")
             msg = f"Error calling Parallel Search API: {e!s}"
             raise ValueError(msg) from e
 
+        response: dict[str, Any] = response_obj.model_dump()
+        if include_metadata:
+            response["search_metadata"] = self._build_metadata(
+                start_time=start_time,
+                endpoint=endpoint,
+                response=response,
+            )
+
+        if run_manager:
+            count = len(response.get("results") or [])
+            duration = response.get("search_metadata", {}).get(
+                "search_duration_seconds", 0
+            )
+            run_manager.on_text(
+                f"Search completed: {count} results in {duration}s\n",
+                color="green",
+            )
+
+        return _format_results_for_llm(response), response
+
     async def _arun(
         self,
         objective: Optional[str] = None,
         search_queries: Optional[list[str]] = None,
         max_results: int = 10,
         excerpts: Optional[ExcerptSettings] = None,
+        max_chars_total: Optional[int] = None,
         mode: Optional[str] = None,
-        source_policy: Optional[dict[str, Union[str, list[str]]]] = None,
+        source_policy: Optional[Union[SourcePolicy, dict[str, Any]]] = None,
         fetch_policy: Optional[FetchPolicy] = None,
+        location: Optional[str] = None,
+        client_model: Optional[str] = None,
+        session_id: Optional[str] = None,
         *,
         include_metadata: bool = True,
         timeout: Optional[int] = None,
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> dict[str, Any]:
-        """Async execute the search using Parallel's Search API.
-
-        Args:
-            objective: Natural-language description of the research goal
-            search_queries: List of specific search queries
-            max_results: Maximum number of results (1-40)
-            excerpts: Optional ExcerptSettings for controlling excerpt length
-            mode: Search mode ('one-shot' or 'agentic')
-            source_policy: Optional source policy for domain filtering
-            fetch_policy: Optional FetchPolicy for cache vs live content
-            include_metadata: Whether to include metadata
-            timeout: Request timeout in seconds
-            run_manager: Async callback manager for the tool run
-
-        Returns:
-            Dictionary containing search results with metadata
-        """
-        start_time = datetime.now()
+    ) -> tuple[str, dict[str, Any]]:
+        """Async execute the search using Parallel's Search API."""
+        if self._async_client is None:
+            msg = "Async Parallel client not initialized."
+            raise RuntimeError(msg)
 
-        # Notify callback manager about search start
+        start_time = datetime.now()
         if run_manager:
             query_desc = objective or f"{len(search_queries or [])} search queries"
             await run_manager.on_text(
-                f"Starting async web search: {query_desc}\n", color="blue"
+                f"Starting async web search: {query_desc}\n",
+                color="blue",
             )
 
-        # Convert ExcerptSettings and FetchPolicy to dict if provided
-        excerpts_dict = excerpts.model_dump(exclude_none=True) if excerpts else None
-        fetch_policy_dict = (
-            fetch_policy.model_dump(exclude_none=True) if fetch_policy else None
+        endpoint, kwargs = self._build_call_kwargs(
+            objective=objective,
+            search_queries=search_queries,
+            mode=mode,
+            max_chars_total=max_chars_total,
+            client_model=client_model,
+            session_id=session_id,
+            excerpts=excerpts,
+            fetch_policy=fetch_policy,
+            source_policy=source_policy,
+            max_results=max_results,
+            location=location,
+            timeout=timeout,
         )
 
-        search_params = {
-            "objective": objective,
-            "search_queries": search_queries,
-            "max_results": max_results,
-            "excerpts": excerpts_dict,
-            "mode": mode,
-            "source_policy": source_policy,
-            "fetch_policy": fetch_policy_dict,
-        }
-
         try:
-            # Notify about search execution
+            response_obj: Any = (
+                await self._async_client.search(**kwargs)
+                if endpoint == "v1"
+                else await self._async_client.beta.search(**kwargs)
+            )
+        except Exception as e:
             if run_manager:
                 await run_manager.on_text(
-                    "Executing async search...\n",
-                    color="yellow",
+                    f"Async search failed: {e!s}\n",
+                    color="red",
                 )
+            msg = f"Error calling Parallel Search API: {e!s}"
+            raise ValueError(msg) from e
 
-            # Use the pre-initialized async client for better performance
-            response = await self._async_client.search(
-                objective=objective,
-                search_queries=search_queries,
-                max_results=max_results,
-                excerpts=excerpts_dict,
-                mode=mode,
-                source_policy=source_policy,
-                fetch_policy=fetch_policy_dict,
-                timeout=timeout,
+        response: dict[str, Any] = response_obj.model_dump()
+        if include_metadata:
+            response["search_metadata"] = self._build_metadata(
+                start_time=start_time,
+                endpoint=endpoint,
+                response=response,
             )
 
-            # Create metadata
-            metadata = self._create_response_metadata(
-                start_time, search_params, response, include_metadata=include_metadata
+        if run_manager:
+            count = len(response.get("results") or [])
+            duration = response.get("search_metadata", {}).get(
+                "search_duration_seconds", 0
+            )
+            await run_manager.on_text(
+                f"Async search completed: {count} results in {duration}s\n",
+                color="green",
             )
-            if metadata:
-                response["search_metadata"] = metadata
-
-            # Notify callback manager about completion
-            if run_manager:
-                result_count = len(response.get("results", []))
-                duration = metadata.get("search_duration_seconds", 0) if metadata else 0
-                await run_manager.on_text(
-                    f"Async search completed: {result_count} results in {duration}s\n",
-                    color="green",
-                )
-
-            return response
 
-        except Exception as e:
-            # Notify callback manager about error
-            if run_manager:
-                await run_manager.on_text(f"Async search failed: {e!s}\n", color="red")
-            msg = f"Error calling Parallel Search API: {e!s}"
-            raise ValueError(msg) from e
+        return _format_results_for_llm(response), response
diff --git a/poetry.lock b/poetry.lock
index 129c934..5020db4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1191,14 +1191,14 @@ files = [
 
 [[package]]
 name = "parallel-web"
-version = "0.3.3"
+version = "0.5.1"
 description = "The official Python library for the Parallel API"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "parallel_web-0.3.3-py3-none-any.whl", hash = "sha256:730187f8754c81bbdb7f3d06c5c44b6df25d665366c051f34409ab04aad69570"},
-    {file = "parallel_web-0.3.3.tar.gz", hash = "sha256:31a33ae094182887d731390468d816a52dc76a7f462f8e00b91477098cb00e50"},
+    {file = "parallel_web-0.5.1-py3-none-any.whl", hash = "sha256:7db65556a362d44ae864b5e4881a239e96377bcefbf931616d9c3b80a6124c21"},
+    {file = "parallel_web-0.5.1.tar.gz", hash = "sha256:e967f3bd1833c73db30ea11aa49f5b3248c10342af1fa768a4a290ff8f4301f6"},
 ]
 
 [package.dependencies]
@@ -1207,7 +1207,7 @@ distro = ">=1.7.0,<2"
 httpx = ">=0.23.0,<1"
 pydantic = ">=1.9.0,<3"
 sniffio = "*"
-typing-extensions = ">=4.10,<5"
+typing-extensions = ">=4.14,<5"
 
 [package.extras]
 aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.9)"]
@@ -2566,4 +2566,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<4.0"
-content-hash = "85dcc91af918c74e6c832182b6a6d130a51c6da49bb4c3b586ed4976a6edc72b"
+content-hash = "1f2192a6249d32118c66b8ab9f78754e93a50cde422fef85e96d70e19cdc6053"
diff --git a/pyproject.toml b/pyproject.toml
index c8d1bb7..66e5b0c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,12 +4,13 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "langchain-parallel"
-version = "0.2.0"
+version = "0.3.0"
 description = "A LangChain integration for Parallel Web AI services, including Chat and Search."
 authors = ["Parallel Team <support@parallel.ai>"]
 readme = "README.md"
 repository = "https://github.com/parallel-web/langchain-parallel"
 license = "MIT"
+include = ["langchain_parallel/py.typed"]
 
 [tool.mypy]
 disallow_untyped_defs = true
@@ -45,7 +46,7 @@ langchain-core = ">=1.1.0,<2.0.0"
 openai = "^1.88.0"
 pydantic = "^2.11.7"
 httpx = ">=0.28.1,<1.0.0"
-parallel-web = "^0.3.3"
+parallel-web = "^0.5.1"
 
 [tool.ruff]
 target-version = "py310"
diff --git a/tests/integration_tests/test_extract_tool.py b/tests/integration_tests/test_extract_tool.py
index 6ff626b..6131d6f 100644
--- a/tests/integration_tests/test_extract_tool.py
+++ b/tests/integration_tests/test_extract_tool.py
@@ -1,5 +1,7 @@
 """Integration tests for Parallel Extract Tool."""
 
+from __future__ import annotations
+
 import os
 
 import pytest
@@ -16,6 +18,22 @@ def api_key() -> str:
     return key
 
 
+def _invoke(tool: ParallelExtractTool, args: dict) -> tuple[str, list[dict]]:
+    """Invoke via the tool_call form so we get back a ToolMessage with .artifact.
+
+    Returns ``(content, artifact)``.
+    """
+    msg = tool.invoke(
+        {
+            "args": args,
+            "id": "1",
+            "name": tool.name,
+            "type": "tool_call",
+        },
+    )
+    return msg.content, msg.artifact
+
+
 class TestParallelExtractToolIntegration:
     """Integration tests for ParallelExtractTool."""
 
@@ -23,19 +41,21 @@ def test_extract_single_url(self, api_key: str) -> None:
         """Test extracting content from a single URL."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        result = tool.invoke(
+        _, artifact = _invoke(
+            tool,
             {
                 "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
                 "full_content": True,
-            }
+            },
         )
 
-        assert len(result) == 1
+        assert len(artifact) == 1
         assert (
-            result[0]["url"] == "https://en.wikipedia.org/wiki/Artificial_intelligence"
+            artifact[0]["url"]
+            == "https://en.wikipedia.org/wiki/Artificial_intelligence"
         )
-        assert len(result[0]["content"]) > 0
-        assert result[0]["title"] is not None
+        assert len(artifact[0]["content"]) > 0
+        assert artifact[0]["title"] is not None
 
     def test_extract_multiple_urls(self, api_key: str) -> None:
         """Test extracting content from multiple URLs."""
@@ -46,175 +66,169 @@ def test_extract_multiple_urls(self, api_key: str) -> None:
             "https://en.wikipedia.org/wiki/Python_(programming_language)",
         ]
 
-        result = tool.invoke({"urls": urls})
+        _, artifact = _invoke(tool, {"urls": urls})
 
-        assert len(result) == 2
-        for item in result:
+        assert len(artifact) == 2
+        for item in artifact:
             assert "url" in item
             assert "content" in item
-            # Content may be empty for some pages, so just check it exists
 
     def test_extract_with_search_objective(self, api_key: str) -> None:
         """Test extraction with search objective to focus content."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        result = tool.invoke(
+        _, artifact = _invoke(
+            tool,
             {
                 "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
                 "search_objective": "What are the main applications of AI?",
-                "excerpts": True,
                 "full_content": False,
-            }
+            },
         )
 
-        assert len(result) == 1
+        assert len(artifact) == 1
         assert (
-            result[0]["url"] == "https://en.wikipedia.org/wiki/Artificial_intelligence"
+            artifact[0]["url"]
+            == "https://en.wikipedia.org/wiki/Artificial_intelligence"
         )
-        # Should have excerpts focused on the objective
-        assert "excerpts" in result[0]
-        assert isinstance(result[0]["excerpts"], list)
-        # Content should be populated from excerpts
-        assert len(result[0]["content"]) > 0
+        assert "excerpts" in artifact[0]
+        assert isinstance(artifact[0]["excerpts"], list)
+        assert len(artifact[0]["content"]) > 0
 
     def test_extract_with_search_queries(self, api_key: str) -> None:
         """Test extraction with search queries to focus content."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        result = tool.invoke(
+        _, artifact = _invoke(
+            tool,
             {
-                "urls": [
-                    "https://en.wikipedia.org/wiki/Machine_learning",
-                ],
+                "urls": ["https://en.wikipedia.org/wiki/Machine_learning"],
                 "search_queries": ["neural networks", "training algorithms"],
-                "excerpts": True,
-            }
+            },
         )
 
-        assert len(result) == 1
-        # Should have excerpts focused on the queries
-        assert "excerpts" in result[0]
-        assert isinstance(result[0]["excerpts"], list)
-        assert len(result[0]["excerpts"]) > 0
+        assert len(artifact) == 1
+        assert "excerpts" in artifact[0]
+        assert isinstance(artifact[0]["excerpts"], list)
+        assert len(artifact[0]["excerpts"]) > 0
 
     def test_extract_with_max_chars(self, api_key: str) -> None:
         """Test extraction with max_chars_per_extract limit."""
         tool = ParallelExtractTool(api_key=api_key, max_chars_per_extract=1000)
 
-        result = tool.invoke(
+        _, artifact = _invoke(
+            tool,
             {
                 "urls": ["https://en.wikipedia.org/wiki/Python_(programming_language)"],
                 "full_content": True,
-            }
+            },
         )
 
-        assert len(result) == 1
-        # Note: The API currently returns up to 100k characters for full_content
-        # regardless of max_characters setting. This test verifies the tool
-        # correctly passes the parameter to the API.
-        assert len(result[0]["content"]) > 0
-        assert result[0]["title"] is not None
+        assert len(artifact) == 1
+        assert len(artifact[0]["content"]) > 0
+        assert artifact[0]["title"] is not None
 
     def test_extract_metadata_fields(self, api_key: str) -> None:
         """Test that metadata fields are properly populated."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        result = tool.invoke(
-            {"urls": ["https://en.wikipedia.org/wiki/Machine_learning"]}
+        _, artifact = _invoke(
+            tool, {"urls": ["https://en.wikipedia.org/wiki/Machine_learning"]}
         )
 
-        assert len(result) > 0
-
-        item = result[0]
+        assert len(artifact) > 0
+        item = artifact[0]
         assert "url" in item
         assert "title" in item
         assert "content" in item
-        # Other metadata fields may or may not be present depending on the source
 
     def test_extract_invalid_url(self, api_key: str) -> None:
         """Test extraction handles invalid URLs gracefully."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        # The API handles invalid URLs gracefully by returning error info
-        result = tool.invoke(
+        _, artifact = _invoke(
+            tool,
             {
                 "urls": ["https://this-domain-does-not-exist-12345.com/"],
                 "full_content": True,
-                "timeout": 30.0,  # Reasonable timeout
-            }
+                "timeout": 30.0,
+            },
         )
 
-        # Should return a result with error information
-        assert len(result) == 1
-        assert result[0]["url"] == "https://this-domain-does-not-exist-12345.com/"
-        # Should have error information in content or error_type
-        assert "Error" in result[0]["content"] or "error_type" in result[0]
+        assert len(artifact) == 1
+        assert artifact[0]["url"] == "https://this-domain-does-not-exist-12345.com/"
+        assert "Error" in artifact[0]["content"] or "error_type" in artifact[0]
 
     def test_extract_mixed_valid_invalid_urls(self, api_key: str) -> None:
         """Test extraction with mix of valid and invalid URLs."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        result = tool.invoke(
+        _, artifact = _invoke(
+            tool,
             {
                 "urls": [
                     "https://en.wikipedia.org/wiki/Python_(programming_language)",
                     "https://this-domain-does-not-exist-12345.com/",
                 ],
                 "full_content": True,
-            }
+            },
         )
 
-        assert len(result) == 2
-        # First URL should have content
-        assert len(result[0]["content"]) > 0 or len(result[1]["content"]) > 0
+        assert len(artifact) == 2
+        assert len(artifact[0]["content"]) > 0 or len(artifact[1]["content"]) > 0
 
     @pytest.mark.asyncio
     async def test_extract_async(self, api_key: str) -> None:
         """Test async extraction functionality."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        result = await tool.ainvoke(
+        msg = await tool.ainvoke(
             {
-                "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
-                "full_content": True,
-            }
+                "args": {
+                    "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
+                    "full_content": True,
+                },
+                "id": "1",
+                "name": tool.name,
+                "type": "tool_call",
+            },
         )
+        artifact = msg.artifact
 
-        assert len(result) == 1
-        assert len(result[0]["content"]) > 0
+        assert len(artifact) == 1
+        assert len(artifact[0]["content"]) > 0
         assert (
-            result[0]["url"] == "https://en.wikipedia.org/wiki/Artificial_intelligence"
+            artifact[0]["url"]
+            == "https://en.wikipedia.org/wiki/Artificial_intelligence"
         )
 
     def test_extract_with_long_content(self, api_key: str) -> None:
         """Test extraction of long articles."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        result = tool.invoke(
+        _, artifact = _invoke(
+            tool,
             {
                 "urls": ["https://en.wikipedia.org/wiki/History_of_the_United_States"],
                 "full_content": True,
-            }
+            },
         )
 
-        assert len(result) == 1
-        # Long articles should have substantial content
-        assert len(result[0]["content"]) > 1000
+        assert len(artifact) == 1
+        assert len(artifact[0]["content"]) > 1000
 
     def test_extract_different_content_types(self, api_key: str) -> None:
         """Test extraction from different types of web pages."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        # Test various content types
         urls = [
-            "https://www.wikipedia.org/",  # Homepage
-            "https://en.wikipedia.org/wiki/Main_Page",  # Wiki page
+            "https://www.wikipedia.org/",
+            "https://en.wikipedia.org/wiki/Main_Page",
         ]
 
-        result = tool.invoke({"urls": urls})
+        _, artifact = _invoke(tool, {"urls": urls})
 
-        assert len(result) == 2
-        # All should return some result (even if empty content)
-        for item in result:
+        assert len(artifact) == 2
+        for item in artifact:
             assert "url" in item
             assert "content" in item
diff --git a/tests/unit_tests/__snapshots__/test_chat_models.ambr b/tests/unit_tests/__snapshots__/test_chat_models.ambr
index 4927bb6..057e0bd 100644
--- a/tests/unit_tests/__snapshots__/test_chat_models.ambr
+++ b/tests/unit_tests/__snapshots__/test_chat_models.ambr
@@ -16,10 +16,33 @@
       }),
       'base_url': 'https://api.parallel.ai',
       'max_retries': 2,
-      'max_tokens': 100,
       'model': 'speed',
-      'temperature': 0.0,
-      'timeout': 60.0,
+      'model_name': 'speed',
+    }),
+    'lc': 1,
+    'name': 'ChatParallelWeb',
+    'type': 'constructor',
+  })
+# ---
+# name: TestChatParallelWebUnitLite.test_serdes[serialized]
+  dict({
+    'id': list([
+      'langchain_parallel',
+      'chat_models',
+      'ChatParallelWeb',
+    ]),
+    'kwargs': dict({
+      'api_key': dict({
+        'id': list([
+          'PARALLEL_API_KEY',
+        ]),
+        'lc': 1,
+        'type': 'secret',
+      }),
+      'base_url': 'https://api.parallel.ai',
+      'max_retries': 2,
+      'model': 'lite',
+      'model_name': 'lite',
     }),
     'lc': 1,
     'name': 'ChatParallelWeb',
diff --git a/tests/unit_tests/test_chat_models.py b/tests/unit_tests/test_chat_models.py
index fc5a7b8..6356e3d 100644
--- a/tests/unit_tests/test_chat_models.py
+++ b/tests/unit_tests/test_chat_models.py
@@ -20,6 +20,11 @@ def chat_model_params(self) -> dict:
             "api_key": "test-api-key",
         }
 
+    @property
+    def standard_chat_model_params(self) -> dict:
+        """Parallel ignores most OpenAI sampling params; keep tests honest."""
+        return {}
+
     # Configure capabilities based on Parallel's Chat API features
     @property
     def has_tool_calling(self) -> bool:
@@ -86,8 +91,15 @@ def supports_image_tool_message(self) -> bool:
 
     @property
     def structured_output_kwargs(self) -> dict:
-        """Additional kwargs for with_structured_output."""
-        return {"method": "function_calling"}
+        """Additional kwargs for with_structured_output.
+
+        Parallel research models (`lite`, `base`, `core`) accept
+        ``response_format`` JSON schemas; ``function_calling`` is not
+        supported. The base class doesn't enable structured output
+        (see :attr:`has_structured_output`); subclasses that flip the
+        flag should default to ``method='json_schema'``.
+        """
+        return {"method": "json_schema"}
 
     @property
     def supported_usage_metadata_details(self) -> dict:
@@ -124,3 +136,24 @@ def init_from_env_params(self) -> tuple[dict, dict, dict]:
                 "api_key": "test-env-api-key",
             },
         )
+
+
+class TestChatParallelWebUnitLite(TestChatParallelWebUnit):
+    """Unit tests parametrized for the `lite` research model.
+
+    `lite` (and `base`/`core`) accept ``response_format`` JSON schema, so the
+    structured-output capability flag is True for those models.
+    """
+
+    @property
+    def chat_model_params(self) -> dict:
+        return {"model": "lite", "api_key": "test-api-key"}
+
+    @property
+    def has_structured_output(self) -> bool:
+        return True
+
+    @property
+    def structured_output_kwargs(self) -> dict:
+        # Parallel research models use json_schema, not function_calling.
+        return {"method": "json_schema"}
diff --git a/tests/unit_tests/test_extract_tool.py b/tests/unit_tests/test_extract_tool.py
index e895b34..f2f2e1d 100644
--- a/tests/unit_tests/test_extract_tool.py
+++ b/tests/unit_tests/test_extract_tool.py
@@ -1,5 +1,8 @@
 """Unit tests for Parallel Extract Tool."""
 
+from __future__ import annotations
+
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, Mock, patch
 
 import pytest
@@ -7,6 +10,11 @@
 from langchain_parallel.extract_tool import ParallelExtractTool
 
 
+def _make_response(payload: dict) -> SimpleNamespace:
+    """Build a mock SDK response with .model_dump()."""
+    return SimpleNamespace(model_dump=lambda: dict(payload))
+
+
 class TestParallelExtractTool:
     """Test cases for ParallelExtractTool."""
 
@@ -19,6 +27,7 @@ def test_extract_tool_initialization(self) -> None:
             assert tool.name == "parallel_extract"
             assert tool.base_url == "https://api.parallel.ai"
             assert tool.max_chars_per_extract is None
+            assert tool.response_format == "content_and_artifact"
 
     def test_extract_tool_initialization_with_params(self) -> None:
         """Test extract tool initialization with custom parameters."""
@@ -30,235 +39,246 @@ def test_extract_tool_initialization_with_params(self) -> None:
             )
             assert tool.max_chars_per_extract == 3000
 
-    @patch("langchain_parallel.extract_tool.get_extract_client")
-    def test_extract_single_url(self, mock_get_extract_client: Mock) -> None:
-        """Test extracting content from a single URL."""
-        # Mock the extract client
-        mock_client = Mock()
-        mock_client.extract.return_value = {
-            "extract_id": "extract-123",
-            "results": [
-                {
-                    "url": "https://example.com",
-                    "title": "Test Article",
-                    "full_content": "This is the extracted content.",
-                    "publish_date": "2024-01-01",
-                }
-            ],
-            "errors": [],
-        }
-        mock_get_extract_client.return_value = mock_client
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_extract_single_url(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Test extracting content from a single URL via the GA endpoint."""
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {
+                "extract_id": "extract-1",
+                "results": [
+                    {
+                        "url": "https://example.com",
+                        "title": "Test Article",
+                        "full_content": "This is the extracted content.",
+                        "publish_date": "2024-01-01",
+                    },
+                ],
+                "errors": [],
+            },
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            result = tool.invoke({"urls": ["https://example.com"]})
-
-            assert len(result) == 1
-            assert result[0]["url"] == "https://example.com"
-            assert result[0]["title"] == "Test Article"
-            assert result[0]["content"] == "This is the extracted content."
-            assert result[0]["publish_date"] == "2024-01-01"
+            content, artifact = tool._run(
+                urls=["https://example.com"], full_content=True
+            )
 
-    @patch("langchain_parallel.extract_tool.get_extract_client")
-    def test_extract_multiple_urls(self, mock_get_extract_client: Mock) -> None:
+            sync_client.extract.assert_called_once()
+            sync_client.beta.extract.assert_not_called()
+            assert len(artifact) == 1
+            assert artifact[0]["url"] == "https://example.com"
+            assert artifact[0]["title"] == "Test Article"
+            assert artifact[0]["content"] == "This is the extracted content."
+            assert artifact[0]["publish_date"] == "2024-01-01"
+            assert "Test Article" in content
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_extract_multiple_urls(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
         """Test extraction with multiple URLs."""
-        mock_client = Mock()
-        mock_client.extract.return_value = {
-            "extract_id": "extract-123",
-            "results": [
-                {
-                    "url": "https://example1.com",
-                    "title": "Article 1",
-                    "full_content": "Content 1",
-                },
-                {
-                    "url": "https://example2.com",
-                    "title": "Article 2",
-                    "full_content": "Content 2",
-                },
-            ],
-            "errors": [],
-        }
-        mock_get_extract_client.return_value = mock_client
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {
+                "extract_id": "extract-1",
+                "results": [
+                    {
+                        "url": "https://example1.com",
+                        "title": "Article 1",
+                        "full_content": "Content 1",
+                    },
+                    {
+                        "url": "https://example2.com",
+                        "title": "Article 2",
+                        "full_content": "Content 2",
+                    },
+                ],
+                "errors": [],
+            },
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            result = tool.invoke(
-                {"urls": ["https://example1.com", "https://example2.com"]}
+            _, artifact = tool._run(
+                urls=["https://example1.com", "https://example2.com"],
+                full_content=True,
             )
-
-            assert len(result) == 2
-            assert result[0]["content"] == "Content 1"
-            assert result[1]["content"] == "Content 2"
-
-    @patch("langchain_parallel.extract_tool.get_extract_client")
-    def test_extract_with_errors(self, mock_get_extract_client: Mock) -> None:
+            assert [r["content"] for r in artifact] == ["Content 1", "Content 2"]
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_extract_with_errors(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
         """Test extraction handles errors gracefully."""
-        mock_client = Mock()
-        mock_client.extract.return_value = {
-            "extract_id": "extract-123",
-            "results": [
-                {
-                    "url": "https://example1.com",
-                    "title": "Article 1",
-                    "full_content": "Content 1",
-                }
-            ],
-            "errors": [
-                {
-                    "url": "https://example2.com",
-                    "error_type": "http_error",
-                    "http_status_code": 404,
-                    "content": None,
-                }
-            ],
-        }
-        mock_get_extract_client.return_value = mock_client
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {
+                "extract_id": "extract-1",
+                "results": [
+                    {
+                        "url": "https://example1.com",
+                        "title": "Article 1",
+                        "full_content": "Content 1",
+                    },
+                ],
+                "errors": [
+                    {
+                        "url": "https://example2.com",
+                        "error_type": "http_error",
+                        "http_status_code": 404,
+                        "content": None,
+                    },
+                ],
+            },
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            result = tool.invoke(
-                {"urls": ["https://example1.com", "https://example2.com"]}
+            _, artifact = tool._run(
+                urls=["https://example1.com", "https://example2.com"],
+                full_content=True,
             )
-
-            assert len(result) == 2
-            assert result[0]["content"] == "Content 1"
-            assert result[1]["url"] == "https://example2.com"
-            assert "Error: http_error" in result[1]["content"]
-            assert result[1]["error_type"] == "http_error"
-            assert result[1]["http_status_code"] == 404
-
-    @patch("langchain_parallel.extract_tool.get_extract_client")
-    def test_extract_with_max_chars(self, mock_get_extract_client: Mock) -> None:
-        """Test extraction with max_chars_per_extract limit."""
-        mock_client = Mock()
-        mock_client.extract.return_value = {
-            "extract_id": "extract-123",
-            "results": [
-                {
-                    "url": "https://example.com",
-                    "title": "Test",
-                    "full_content": "Short content",
-                }
-            ],
-            "errors": [],
-        }
-        mock_get_extract_client.return_value = mock_client
+            assert len(artifact) == 2
+            assert artifact[0]["content"] == "Content 1"
+            assert artifact[1]["error_type"] == "http_error"
+            assert artifact[1]["http_status_code"] == 404
+            assert "Error: http_error" in artifact[1]["content"]
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_extract_max_chars_default(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Tool-level max_chars_per_extract applies when full_content=True."""
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {
+                "extract_id": "extract-1",
+                "results": [
+                    {
+                        "url": "https://example.com",
+                        "title": "Test",
+                        "full_content": "Short",
+                    },
+                ],
+                "errors": [],
+            },
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool(max_chars_per_extract=5000)
-            tool.invoke({"urls": ["https://example.com"]})
-
-            # Verify extract was called with full_content config
-            call_kwargs = mock_client.extract.call_args[1]
-            assert call_kwargs["full_content"] == {"max_chars_per_result": 5000}
+            tool._run(urls=["https://example.com"], full_content=True)
+            kwargs = sync_client.extract.call_args.kwargs
+            assert kwargs["advanced_settings"]["full_content"] == {
+                "max_chars_per_result": 5000
+            }
 
-    @patch("langchain_parallel.extract_tool.get_extract_client")
-    def test_extract_handles_api_error(self, mock_get_extract_client: Mock) -> None:
-        """Test extract tool handles API errors gracefully."""
-        mock_client = Mock()
-        mock_client.extract.side_effect = Exception("API Error")
-        mock_get_extract_client.return_value = mock_client
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_extract_handles_api_error(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Test extract tool wraps API errors as ValueError."""
+        sync_client = Mock()
+        sync_client.extract.side_effect = Exception("API Error")
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-
             with pytest.raises(
                 ValueError, match="Error calling Parallel Extract API: API Error"
             ):
-                tool.invoke({"urls": ["https://example.com"]})
+                tool._run(urls=["https://example.com"])
 
-    @patch("langchain_parallel.extract_tool.get_async_extract_client")
-    @patch("langchain_parallel.extract_tool.get_extract_client")
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
     @pytest.mark.asyncio
     async def test_extract_async_functionality(
-        self, mock_get_extract_client: Mock, mock_get_async_extract_client: Mock
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
     ) -> None:
-        """Test async extraction functionality."""
-        # Mock sync client (needed for initialization)
-        mock_sync_client = Mock()
-        mock_get_extract_client.return_value = mock_sync_client
-
-        # Mock async client
-        mock_async_client = Mock()
-        mock_async_client.extract = AsyncMock(
-            return_value={
-                "extract_id": "extract-123",
-                "results": [
-                    {
-                        "url": "https://example.com",
-                        "title": "Test Article",
-                        "full_content": "Async content",
-                    }
-                ],
-                "errors": [],
-            }
-        )
-        mock_get_async_extract_client.return_value = mock_async_client
-
-        with patch(
-            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
-        ):
-            tool = ParallelExtractTool()
-            result = await tool.ainvoke({"urls": ["https://example.com"]})
-
-            assert len(result) == 1
-            assert result[0]["content"] == "Async content"
-
-    @patch("langchain_parallel.extract_tool.get_extract_client")
-    def test_extract_metadata_fields(self, mock_get_extract_client: Mock) -> None:
-        """Test that all metadata fields are properly extracted."""
-        mock_client = Mock()
-        mock_client.extract.return_value = {
-            "extract_id": "extract-123",
-            "results": [
+        """Async path uses async client."""
+        async_client = Mock()
+        async_client.extract = AsyncMock(
+            return_value=_make_response(
                 {
-                    "url": "https://example.com",
-                    "title": "Test Article",
-                    "full_content": "Content",
-                    "publish_date": "2024-01-01",
-                }
-            ],
-            "errors": [],
-        }
-        mock_get_extract_client.return_value = mock_client
+                    "extract_id": "extract-1",
+                    "results": [
+                        {
+                            "url": "https://example.com",
+                            "title": "Async",
+                            "full_content": "Async content",
+                        },
+                    ],
+                    "errors": [],
+                },
+            ),
+        )
+        mock_async_factory.return_value = async_client
+        mock_sync_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            result = tool.invoke({"urls": ["https://example.com"]})
-
-            assert result[0]["url"] == "https://example.com"
-            assert result[0]["title"] == "Test Article"
-            assert result[0]["content"] == "Content"
-            assert result[0].get("publish_date") == "2024-01-01"
-
-    @patch("langchain_parallel.extract_tool.get_extract_client")
-    def test_extract_empty_results(self, mock_get_extract_client: Mock) -> None:
+            _, artifact = await tool._arun(urls=["https://example.com"])
+            assert len(artifact) == 1
+            assert artifact[0]["content"] == "Async content"
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_extract_empty_results(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
         """Test extract tool handles empty results."""
-        mock_client = Mock()
-        mock_client.extract.return_value = {
-            "extract_id": "extract-123",
-            "results": [],
-            "errors": [],
-        }
-        mock_get_extract_client.return_value = mock_client
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {"extract_id": "extract-1", "results": [], "errors": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            result = tool.invoke({"urls": ["https://example.com"]})
-
-            assert len(result) == 0
+            _, artifact = tool._run(urls=["https://example.com"])
+            assert artifact == []
diff --git a/tests/unit_tests/test_search_tool.py b/tests/unit_tests/test_search_tool.py
index 9a2e8d3..8ea86a2 100644
--- a/tests/unit_tests/test_search_tool.py
+++ b/tests/unit_tests/test_search_tool.py
@@ -1,10 +1,18 @@
 """Unit tests for Parallel Search functionality."""
 
+from __future__ import annotations
+
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, Mock, patch
 
 import pytest
 
-from langchain_parallel.search_tool import ParallelWebSearchTool
+from langchain_parallel.search_tool import ParallelWebSearchTool, _normalize_mode
+
+
+def _make_response(payload: dict) -> SimpleNamespace:
+    """Build a mock SDK response with .model_dump()."""
+    return SimpleNamespace(model_dump=lambda: dict(payload))
 
 
 class TestParallelWebSearchTool:
@@ -17,95 +25,194 @@ def test_tool_initialization(self) -> None:
         ):
             tool = ParallelWebSearchTool()
             assert tool.name == "parallel_web_search"
-            assert "Search the web using Parallel" in tool.description
-
-    @patch("langchain_parallel.search_tool.get_search_client")
-    def test_tool_successful_search(self, mock_get_client: Mock) -> None:
-        """Test successful search execution."""
-        # Mock the search client
-        mock_client = Mock()
-        mock_client.search.return_value = {
-            "search_id": "test-123",
-            "results": [
-                {
-                    "url": "https://example.com",
-                    "title": "Test Result",
-                    "excerpts": ["Test excerpt"],
-                }
-            ],
-        }
-        mock_get_client.return_value = mock_client
+            assert "Search the web" in tool.description
+            assert tool.response_format == "content_and_artifact"
+
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    def test_run_uses_v1_endpoint_when_search_queries_provided(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """search_queries triggers the GA endpoint."""
+        sync_client = Mock()
+        sync_client.search.return_value = _make_response(
+            {
+                "search_id": "search-1",
+                "results": [
+                    {
+                        "url": "https://example.com",
+                        "title": "Test",
+                        "excerpts": ["snippet"],
+                    },
+                ],
+            },
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
-            result = tool._run(objective="test search")
-
-            assert result["search_id"] == "test-123"
-            assert len(result["results"]) == 1
-            assert result["results"][0]["title"] == "Test Result"
-
-    @patch("langchain_parallel.search_tool.get_search_client")
-    def test_tool_handles_api_error(self, mock_get_client: Mock) -> None:
-        """Test tool handles API errors gracefully."""
-        # Mock the search client to raise an exception
-        mock_client = Mock()
-        mock_client.search.side_effect = Exception("API Error")
-        mock_get_client.return_value = mock_client
+            content, artifact = tool._run(
+                search_queries=["query 1"],
+                max_results=3,
+                mode="advanced",
+            )
+            sync_client.search.assert_called_once()
+            sync_client.beta.search.assert_not_called()
+            assert artifact["search_id"] == "search-1"
+            assert artifact["search_metadata"]["endpoint"] == "v1"
+            assert "Test" in content
+
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    def test_run_falls_back_to_beta_when_objective_only(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Objective without search_queries falls back to v1beta with a warning."""
+        sync_client = Mock()
+        sync_client.beta.search.return_value = _make_response(
+            {"search_id": "beta-1", "results": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
+            with pytest.warns(DeprecationWarning, match="search_queries"):
+                _, artifact = tool._run(objective="What is AI?")
+            sync_client.beta.search.assert_called_once()
+            sync_client.search.assert_not_called()
+            assert artifact["search_metadata"]["endpoint"] == "v1beta"
+
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    def test_run_translates_legacy_mode(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Legacy mode strings are mapped with a DeprecationWarning."""
+        sync_client = Mock()
+        sync_client.search.return_value = _make_response(
+            {"search_id": "s", "results": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
-            with pytest.raises(
-                ValueError, match="Error calling Parallel Search API: API Error"
-            ):
-                tool._run(objective="test search")
-
-    @patch("langchain_parallel.search_tool.get_search_client")
-    def test_metadata_collection(self, mock_get_client: Mock) -> None:
-        """Test metadata collection."""
-        mock_client = Mock()
-        mock_client.search.return_value = {
-            "search_id": "test-123",
-            "results": [{"url": "https://example.com", "title": "Test"}],
-        }
-        mock_get_client.return_value = mock_client
+        with patch(
+            "langchain_parallel.search_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelWebSearchTool()
+            with pytest.warns(DeprecationWarning, match="legacy beta value"):
+                tool._run(search_queries=["q"], mode="agentic")
+            kwargs = sync_client.search.call_args.kwargs
+            assert kwargs["mode"] == "advanced"
+
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    def test_advanced_settings_envelope(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Flat settings flow into the GA `advanced_settings` envelope."""
+        sync_client = Mock()
+        sync_client.search.return_value = _make_response(
+            {"search_id": "s", "results": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
-            result = tool._run(
-                search_queries=["query1", "query2"],
-                include_metadata=True,
+            tool._run(
+                search_queries=["q"],
+                source_policy={"include_domains": ["nature.com"]},
+                location="us",
+                max_results=15,
             )
-
-            assert "search_metadata" in result
-            metadata = result["search_metadata"]
-            assert "search_duration_seconds" in metadata
-            assert "query_count" in metadata
-            assert metadata["query_count"] == 2
-
-    @patch("langchain_parallel.search_tool.get_async_search_client")
-    async def test_async_functionality(self, mock_get_async_client: Mock) -> None:
-        """Test async search functionality."""
-        mock_client = Mock()
-        mock_client.search = AsyncMock(
-            return_value={
-                "search_id": "async-test-123",
-                "results": [{"url": "https://example.com", "title": "Async Test"}],
+            kwargs = sync_client.search.call_args.kwargs
+            assert kwargs["advanced_settings"] == {
+                "source_policy": {"include_domains": ["nature.com"]},
+                "max_results": 15,
+                "location": "us",
             }
-        )
-        mock_get_async_client.return_value = mock_client
+
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    def test_run_handles_api_error(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """API exceptions are wrapped as ValueError."""
+        sync_client = Mock()
+        sync_client.search.side_effect = Exception("API Error")
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
 
         with patch(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
-            result = await tool._arun(objective="test async search")
+            with pytest.raises(
+                ValueError,
+                match="Error calling Parallel Search API: API Error",
+            ):
+                tool._run(search_queries=["q"])
+
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    async def test_async_functionality(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Async path uses the async client."""
+        async_client = Mock()
+        async_client.search = AsyncMock(
+            return_value=_make_response(
+                {
+                    "search_id": "async-1",
+                    "results": [{"url": "https://example.com", "title": "Async"}],
+                },
+            ),
+        )
+        mock_async_factory.return_value = async_client
+        mock_sync_factory.return_value = Mock()
 
-            assert result["search_id"] == "async-test-123"
-            assert len(result["results"]) == 1
+        with patch(
+            "langchain_parallel.search_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelWebSearchTool()
+            _, artifact = await tool._arun(search_queries=["q"])
+            assert artifact["search_id"] == "async-1"
+
+
+class TestNormalizeMode:
+    def test_passthrough(self) -> None:
+        assert _normalize_mode("basic") == "basic"
+        assert _normalize_mode("advanced") == "advanced"
+        assert _normalize_mode(None) is None
+
+    def test_legacy(self) -> None:
+        with pytest.warns(DeprecationWarning):
+            assert _normalize_mode("one-shot") == "basic"
+        with pytest.warns(DeprecationWarning):
+            assert _normalize_mode("agentic") == "advanced"
+        with pytest.warns(DeprecationWarning):
+            assert _normalize_mode("fast") == "basic"
+
+    def test_invalid(self) -> None:
+        with pytest.raises(ValueError, match="Invalid mode"):
+            _normalize_mode("nonsense")

From e7677a595629a1cc8ebcfac235af2805a0ed9745 Mon Sep 17 00:00:00 2001
From: Matt Harris <mharris@parallel.ai>
Date: Mon, 27 Apr 2026 13:57:38 -0400
Subject: [PATCH 2/5] Address review feedback; restore full backward
 compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The biggest BC break in the previous commit was the tool return shape:
switching to response_format=content_and_artifact made tool.invoke({...})
return a string instead of the dict/list 0.2.x callers expect. Revert
that — both ParallelWebSearchTool and ParallelExtractTool now return
the structured dict/list directly, like 0.2.x.

Also restore Extract.excerpts: Union[bool, ExcerptSettings] = True so
existing extract_tool.invoke({"urls":[...], "excerpts": True}) keeps
validating. Pass-through is a no-op on the wire (v1 GA always returns
excerpts); excerpts=False is accepted with a DeprecationWarning.

Doc fixes from the review:
- README's stale, inverted `mode` description (one-shot/agentic) replaced
  with the GA basic|advanced semantics + new field table.
- README's broken Extract examples (treated tool.invoke as list[dict]
  but new return was string) work again now that we restored the dict.
- README's create_openai_functions_agent block was advertising tool-call
  agents using ChatParallelWeb, which doesn't support tool calling —
  replaced with a create_agent + Anthropic example using Parallel as a
  tool, plus a one-line "use a different LLM as the agent driver" note.
- README duplicate v0.1 changelog stub deleted.
- extract_tool.py:96 docstring claim about a legacy boolean path was
  unsupported by code; updated to describe the actual behavior.
- search_tool.py docstring example dropped the (content, artifact)
  invocation since we reverted to plain dict returns.

Bug fix: with_structured_output(include_raw=True) used to set
parsing_error=lambda _: None, which never reflected real failures.
Replaced with a try/except wrapper that captures the parser exception
and returns parsed=None, parsing_error=<exc> on failure.

Tests added (per the testing-gaps review):
- model="lite" actually selects "lite" (regression test)
- model_name="lite" back-compat shim works
- lc_attributes exposes model_name
- response_metadata round-trips basis / interaction_id /
  system_fingerprint on both AIMessage and final stream chunk
- with_structured_output rejects on speed
- with_structured_output binds the right response_format for
  json_schema, function_calling (routed to json_schema), and json_mode
- include_raw success and failure paths populate parsed/parsing_error
- SourcePolicy pydantic model and raw dict both flow through
- Top-level passthrough (max_chars_total, client_model, session_id)
- Extract full_content precedence (explicit settings beat tool-level cap;
  full_content=False omits the key)
- Extract excerpts=True is a no-op; excerpts=False emits warning
- Async error wrapping for both tools

61 unit tests + 10 extract integration tests pass; lint, format, and
mypy on src+tests all clean. End-to-end smoke against the real API
confirms backward-compat for: search dict return, extract list[dict]
return, excerpts=True default, excerpts=dict, model="lite" selecting
the research model, basis citations populated, with_structured_output
returning the typed pydantic object.
---
 CHANGELOG.md                                 |  44 ++--
 README.md                                    | 122 ++++-----
 langchain_parallel/chat_models.py            |  18 +-
 langchain_parallel/extract_tool.py           | 101 ++++----
 langchain_parallel/search_tool.py            |  43 +---
 tests/integration_tests/test_extract_tool.py | 146 ++++-------
 tests/unit_tests/test_chat_models.py         | 167 ++++++++++++
 tests/unit_tests/test_extract_tool.py        | 251 ++++++++++++++++---
 tests/unit_tests/test_search_tool.py         | 140 +++++++++--
 9 files changed, 716 insertions(+), 316 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a3933a0..396ca9a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,40 +7,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [0.3.0] - 2026-04-27
 
+This release migrates Search and Extract to Parallel's v1 GA endpoints, surfaces citations + structured output on the chat model, and bumps the SDK to `0.5.1`. **All existing 0.2.x call sites continue to work** — return shapes and field names are preserved, with deprecation warnings on legacy paths.
+
 ### Added
 
-- **Search/Extract GA endpoints**: `ParallelWebSearchTool` and `ParallelExtractTool` now call `client.search` and `client.extract` (the `/v1` GA paths) by default, replacing the deprecated `client.beta.*` calls. New parameters surfaced from the GA contract: `max_chars_total`, `client_model`, `session_id`, `location` on both tools.
-- **Citations on chat responses**: when `model` is `lite`, `base`, or `core`, `AIMessage.response_metadata["basis"]` now carries the API's per-field citations / reasoning / confidence. `interaction_id` is also surfaced for context chaining across calls.
-- **`ChatParallelWeb.with_structured_output()`**: returns a `Runnable` that produces a typed object (pydantic model or dict) using Parallel's `response_format` JSON-schema feature on the research models. `method="json_schema"` (default), `method="json_mode"`, and `method="function_calling"` (routed to `json_schema` for cross-provider compatibility) are supported. Raises a clear error when called on `model="speed"` since that model silently ignores structured-output requests.
-- **`SourcePolicy` pydantic model** in `langchain_parallel._types` mirroring the API's `include_domains` / `exclude_domains` / `after_date` shape.
-- **`tool.response_format = "content_and_artifact"`** on both Search and Extract tools — agents see a compact summary string while consumers reading from the `ToolMessage` get the full structured payload via `.artifact`.
+- **Search/Extract GA endpoints**: `ParallelWebSearchTool` and `ParallelExtractTool` now call `client.search` / `client.extract` (the `/v1` GA paths) by default. New parameters surfaced from the GA contract on both tools: `max_chars_total`, `client_model`, `session_id`, `location` (Search), and the `advanced_settings` envelope is built automatically from existing flat fields.
+- **`ChatParallelWeb.with_structured_output()`**: returns a `Runnable` producing a typed object (pydantic model or dict) via Parallel's `response_format` JSON-schema on the research models (`lite`, `base`, `core`). `method="json_schema"` (default), `method="json_mode"`, and `method="function_calling"` (routed to `json_schema` for cross-provider compatibility) are supported. Raises a clear `ValueError` on `model="speed"` since that model silently ignores structured-output requests. `include_raw=True` returns `{"raw", "parsed", "parsing_error"}` and properly captures parser failures.
+- **Citations on chat responses**: for the research models, `AIMessage.response_metadata["basis"]` carries the API's per-field citations / reasoning / confidence list. `response_metadata["interaction_id"]` is surfaced for multi-turn context chaining; `system_fingerprint` is forwarded when present.
+- **`SourcePolicy` pydantic model** in `langchain_parallel._types` mirroring the API's `include_domains` / `exclude_domains` / `after_date`. Both `SourcePolicy(...)` and a raw dict are accepted on `ParallelWebSearchTool`.
 
-### Changed
+### Changed (backward compatible)
 
-- **BREAKING — tool return shape**: `ParallelWebSearchTool` and `ParallelExtractTool` now return `(content_str, artifact)` per the LangChain `content_and_artifact` convention. Direct `tool.invoke({...})` returns just the content string; the tool-call form (`{"args": {...}, "id": ..., "name": ..., "type": "tool_call"}`) returns a `ToolMessage` whose `.artifact` carries the full Parallel response. To keep the old direct-dict access, use `_, artifact = tool._run(...)` or unpack the `ToolMessage`.
-- **BREAKING — `mode` strings**: legacy values `"fast"`, `"one-shot"`, and `"agentic"` continue to work but emit a `DeprecationWarning` and are mapped to `basic` / `basic` / `advanced` respectively. The GA values `"basic"` and `"advanced"` are the new canonical set.
-- **`ChatParallelWeb.model` alias removed (with back-compat shim)**: the `model_name` alias on the `model` field has been removed because it silently swallowed `ChatParallelWeb(model="lite")` and forced users into the default `"speed"`. Both `ChatParallelWeb(model="lite")` and `ChatParallelWeb(model_name="lite")` now work — the latter via a `model_validator` that maps `model_name=` to `model=`. `lc_attributes` still serializes as `model_name` for tracing parity.
-- **Search behavior**: when `search_queries` is omitted, the tool falls back to the deprecated `/v1beta/search` endpoint with a `DeprecationWarning`. The GA endpoint requires `search_queries` (1–5 keyword strings); pass them explicitly to silence the warning.
-- **`response_metadata["model_name"]`**: chat completions now emit `model_name` (the LangChain 1.x standard key) instead of `model`. Standard tests check for `model_name`.
-- **`parallel-web` SDK bumped** from `^0.3.3` to `^0.5.1`. Brings in v1 GA Search/Extract types, `AdvancedSearchSettingsParam` / `AdvancedExtractSettingsParam`, and FindAll / Task Group surface (not yet exposed by this integration; see the IMPROVEMENT_PLAN.md roadmap for Phase 2).
-- **Slimmed `_client.py`**: the four hand-rolled `ParallelSearchClient` / `AsyncParallelSearchClient` / `ParallelExtractClient` / `AsyncParallelExtractClient` wrappers have been removed. Tools now instantiate the `parallel.Parallel` / `parallel.AsyncParallel` SDK clients directly. Internal change; no public surface impact.
-- `ParallelExtractTool.full_content` precedence is now explicit: a `FullContentSettings` (or dict) on the call always wins over the tool-level `max_chars_per_extract`; the latter only applies when `full_content=True` is passed as a plain bool.
+- **`mode` strings**: legacy values `"fast"`, `"one-shot"`, and `"agentic"` continue to accept and call the API correctly, with a `DeprecationWarning` mapping them to the GA values (`"fast"` / `"one-shot"` → `"basic"`, `"agentic"` → `"advanced"`). The GA values `"basic"` and `"advanced"` are now the canonical set.
+- **Search behavior**: when `search_queries` is omitted, the call falls back to the deprecated `/v1beta/search` endpoint with a `DeprecationWarning`. The GA endpoint requires `search_queries` (1–5 keyword strings); pass them explicitly to silence the warning and use `/v1`.
+- **Extract `excerpts: bool` is now a no-op**: the GA Extract API always returns excerpts, so passing `excerpts=True` (the default) is unchanged on the wire and `excerpts=False` is accepted with a `DeprecationWarning`. Use `ExcerptSettings(max_chars_per_result=...)` to control per-result size.
+- **`response_metadata["model_name"]`**: chat completions now emit the LangChain 1.x standard key `model_name` (was `model`). Tracing systems and `langchain-tests`' standard suite check for this name.
+- **`parallel-web` SDK bumped** from `^0.3.3` to `^0.5.1`. Brings in the v1 GA Search/Extract types, `AdvancedSearchSettingsParam` / `AdvancedExtractSettingsParam`, and the FindAll / Task Group / Monitor surfaces (not yet exposed by this integration — see `IMPROVEMENT_PLAN.md` Phase 2).
+- **Slimmed `_client.py`**: the four hand-rolled `ParallelSearchClient` / `AsyncParallelSearchClient` / `ParallelExtractClient` / `AsyncParallelExtractClient` wrapper classes have been removed in favor of using `parallel.Parallel` / `parallel.AsyncParallel` directly. Internal change; no public surface impact.
+- `ParallelExtractTool.full_content` precedence is now explicit: an explicit `FullContentSettings` (or dict) on the call always wins over the tool-level `max_chars_per_extract`; the latter only applies when `full_content=True` is passed as a plain bool.
 
 ### Fixed
 
-- `ChatParallelWeb(model="lite")` now actually selects the `lite` model. Previously the `alias="model_name"` on the field meant the `model=` kwarg was silently ignored and the default `"speed"` was used.
+- `ChatParallelWeb(model="lite")` now actually selects the `lite` model. Pre-0.3.0 the `Field(alias="model_name")` on the `model` field silently swallowed the `model=` kwarg and forced callers into the default `"speed"`. Both `ChatParallelWeb(model="lite")` and `ChatParallelWeb(model_name="lite")` work in 0.3.0 — the latter via a `model_validator` that maps `model_name=` to `model=` for back-compat. `lc_attributes` still serializes the field as `model_name` for tracing parity.
 - `py.typed` is now bundled into the wheel via the `[tool.poetry] include` directive, so downstream `mypy` runs see the package's type information.
+- `with_structured_output(include_raw=True)` correctly populates `parsing_error` on parse failure (previously always `None`).
 
 ### Migration
 
-- **Tools**: existing code that does `result = tool.invoke({...})` and treats `result` as a dict/list should switch to either `_, result = tool._run(...)` or use the tool-call envelope:
-  ```python
-  msg = tool.invoke({"args": {...}, "id": "1", "name": tool.name, "type": "tool_call"})
-  result = msg.artifact
-  ```
-- **Search**: callers using only `objective` (no `search_queries`) keep working but should add `search_queries=["...","..."]` to silence the deprecation warning and use the GA endpoint.
-- **Search modes**: rename `mode="one-shot"` → `mode="basic"` (or `"advanced"` for higher quality), `mode="agentic"` → `mode="advanced"`, `mode="fast"` → `mode="basic"`.
-- **Chat**: code that did `ChatParallelWeb(model_name="...")` continues to work via `model_name` mapping in `lc_attributes`. New code should prefer `ChatParallelWeb(model="lite")`. Read citations from `response.response_metadata["basis"]`.
+For most users, **no code changes are required**. The remaining recommended-but-optional updates:
+
+- **Search**: add `search_queries=["…", "…"]` (1–5 keyword strings) to use the GA `/v1` endpoint and silence the v1beta-fallback deprecation warning.
+- **Search mode**: rename `mode="one-shot"`/`"fast"` → `mode="basic"` and `mode="agentic"` → `mode="advanced"` to silence the legacy-value deprecation warning.
+- **Chat**: prefer `ChatParallelWeb(model="lite")` (or `"base"` / `"core"`) over `model_name="..."`. Read citations off `response.response_metadata["basis"]` and structured outputs via `chat.with_structured_output(MyPydanticModel)`.
 
 ## [0.2.0] - 2025-12-01
 
diff --git a/README.md b/README.md
index acbb53c..83ecb5a 100644
--- a/README.md
+++ b/README.md
@@ -180,21 +180,7 @@ print(result)
 
 ### Agents
 
-```python
-from langchain.agents import create_openai_functions_agent, AgentExecutor
-from langchain_core.prompts import ChatPromptTemplate
-
-# Create an agent with web research capabilities
-prompt = ChatPromptTemplate.from_messages([
-    ("system", "You are a helpful assistant with access to real-time web information."),
-    ("human", "{input}"),
-    ("placeholder", "{agent_scratchpad}"),
-])
-
-# Use with tools for additional capabilities
-# agent = create_openai_functions_agent(chat, tools, prompt)
-# agent_executor = AgentExecutor(agent=agent, tools=tools)
-```
+Parallel's Chat API does not support tool calling, so `ChatParallelWeb` cannot be the LLM that drives an agent. Use it as a research assistant inside a chain (above), or use Parallel's tools (`ParallelWebSearchTool`, `ParallelExtractTool`) with a tool-calling chat model (Anthropic, OpenAI, etc.) — see the **Tool Usage in Agents** section below.
 
 ## Search API
 
@@ -209,18 +195,15 @@ from langchain_parallel import ParallelWebSearchTool
 
 search_tool = ParallelWebSearchTool()
 
-# In a tool-calling agent, the tool returns a ToolMessage with .content
-# (compact LLM-readable summary) and .artifact (full Parallel response).
-# To get both directly:
-content, artifact = search_tool._run(
-    search_queries=["renewable energy 2026", "solar power developments"],
-    max_results=5,
-)
-print(content)
-print(artifact["search_id"], len(artifact["results"]))
-```
+result = search_tool.invoke({
+    "search_queries": ["renewable energy 2026", "solar power developments"],
+    "max_results": 5,
+})
 
-> **0.3.0 migration note**: tools now use `response_format="content_and_artifact"`. A bare `tool.invoke({...})` returns the content string only; pass a tool-call envelope (`{"args": {...}, "id": "1", "name": tool.name, "type": "tool_call"}`) to get back a `ToolMessage` with `.artifact`, or call `tool._run(...)` for the `(content, artifact)` tuple.
+print(result["search_id"], len(result["results"]))
+for r in result["results"]:
+    print(r["title"], "-", r["url"])
+```
 
 
 
@@ -230,14 +213,19 @@ print(artifact["search_id"], len(artifact["results"]))
 
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
-| `objective` | `Optional[str]` | `None` | Natural-language description of research goal |
-| `search_queries` | `Optional[List[str]]` | `None` | Specific search queries (max 5, 200 chars each) |
-| `max_results` | `int` | `10` | Maximum results to return (1-40) |
-| `excerpts` | `Optional[dict]` | `None` | Excerpt settings (e.g., {'max_chars_per_result': 1500}) |
-| `mode` | `Optional[str]` | `None` | Search mode: 'one-shot' for comprehensive results, 'agentic' for token-efficient results |
-| `fetch_policy` | `Optional[dict]` | `None` | Policy for cached vs live content (e.g., {'max_age_seconds': 86400, 'timeout_seconds': 60}) |
-| `api_key` | `Optional[SecretStr]` | `None` | API key (uses env var if not provided) |
-| `base_url` | `str` | `"https://api.parallel.ai"` | API base URL |
+| `objective` | `Optional[str]` | `None` | Natural-language description of research goal (≤5000 chars). |
+| `search_queries` | `Optional[List[str]]` | `None` | Keyword queries (max 5, 200 chars each). Required for the GA endpoint; without it, calls fall back to `/v1beta` with a deprecation warning. |
+| `max_results` | `int` | `10` | Maximum results to return (1–40). |
+| `excerpts` | `Optional[ExcerptSettings]` | `None` | Per-result excerpt-size cap. |
+| `max_chars_total` | `Optional[int]` | `None` | Cap on total excerpt characters across all results. |
+| `mode` | `Optional[Literal["basic", "advanced"]]` | `None` (API default `advanced`) | `basic` is lower-latency; `advanced` is higher quality with more retrieval and compression. Legacy values `fast`, `one-shot` (→ `basic`) and `agentic` (→ `advanced`) are accepted with a `DeprecationWarning`. |
+| `source_policy` | `Optional[SourcePolicy]` | `None` | Domain include/exclude lists and freshness floor (`after_date`). |
+| `fetch_policy` | `Optional[FetchPolicy]` | `None` | Cache vs live-fetch policy (e.g. `FetchPolicy(max_age_seconds=86400, timeout_seconds=60)`). |
+| `location` | `Optional[str]` | `None` | ISO 3166-1 alpha-2 country code (e.g. `"us"`, `"gb"`). |
+| `client_model` | `Optional[str]` | `None` | Identifier of the calling LLM, used for model-specific result optimizations. |
+| `session_id` | `Optional[str]` | `None` | Shared id grouping related Search/Extract calls in one task. |
+| `api_key` | `Optional[SecretStr]` | `None` | API key (uses `PARALLEL_API_KEY` env var if not provided). |
+| `base_url` | `str` | `"https://api.parallel.ai"` | API base URL. |
 
 ### Search with Specific Queries
 
@@ -257,31 +245,27 @@ result = search_tool.invoke({
 
 ### Tool Usage in Agents
 
-The search tool works seamlessly with LangChain agents:
+Use the search tool with a tool-calling chat model (e.g. Anthropic Claude or OpenAI) and `create_agent`. Note that Parallel's own Chat API does not currently support tool calling, so use a different model class for the agent's LLM and use Parallel as a tool.
 
 ```python
-from langchain.agents import create_openai_functions_agent, AgentExecutor
-from langchain_core.prompts import ChatPromptTemplate
-
-# Create agent with search capabilities
-tools = [search_tool]
-
-prompt = ChatPromptTemplate.from_messages([
-    ("system", "You are a research assistant. Use the search tool to find current information."),
-    ("human", "{input}"),
-    ("placeholder", "{agent_scratchpad}"),
-])
-
-agent = create_openai_functions_agent(chat, tools, prompt)
-agent_executor = AgentExecutor(agent=agent, tools=tools)
+from langchain.agents import create_agent
+from langchain_parallel import ParallelWebSearchTool, ParallelExtractTool
+
+agent = create_agent(
+    "anthropic:claude-haiku-4-5",
+    tools=[ParallelWebSearchTool(), ParallelExtractTool()],
+    system_prompt=(
+        "You are a research assistant. Use parallel_web_search to find "
+        "current information and parallel_extract to read specific pages."
+    ),
+)
 
-# Run the agent
-result = agent_executor.invoke({
-    "input": "What are the latest developments in artificial intelligence?"
-})
-print(result["output"])
+result = agent.invoke({"messages": [("human", "Latest AI breakthroughs?")]})
+print(result["messages"][-1].content)
 ```
 
+See `docs/demo_agent.ipynb` for a full walkthrough.
+
 ## Extract API
 
 The Extract API provides clean content extraction from web pages, returning structured markdown-formatted content optimized for LLM consumption.
@@ -371,15 +355,18 @@ print(f"Content length: {len(result[0]['content'])} characters")
 
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
-| `urls` | `List[str]` | Required | List of URLs to extract content from |
-| `search_objective` | `Optional[str]` | `None` | Natural language objective to focus extraction |
-| `search_queries` | `Optional[List[str]]` | `None` | Specific keyword queries to focus extraction |
-| `excerpts` | `Union[bool, ExcerptSettings]` | `True` | Include relevant excerpts (focused on objective/queries if provided) |
-| `full_content` | `Union[bool, FullContentSettings]` | `False` | Include full page content |
-| `fetch_policy` | `Optional[FetchPolicy]` | `None` | Cache vs live content policy |
-| `max_chars_per_extract` | `Optional[int]` | `None` | Maximum characters per extraction (tool-level setting) |
-| `api_key` | `Optional[SecretStr]` | `None` | API key (uses env var if not provided) |
-| `base_url` | `str` | `"https://api.parallel.ai"` | API base URL |
+| `urls` | `List[str]` | Required | List of URLs to extract content from (up to 20 per request). |
+| `search_objective` | `Optional[str]` | `None` | Natural language objective to focus extraction (≤5000 chars). |
+| `search_queries` | `Optional[List[str]]` | `None` | Specific keyword queries to focus extraction. |
+| `excerpts` | `Union[bool, ExcerptSettings]` | `True` | In v1 GA, excerpts are always returned; the bool is kept for backward compatibility, and `ExcerptSettings(max_chars_per_result=…)` controls per-result size. |
+| `full_content` | `Union[bool, FullContentSettings]` | `False` | Include full page content in addition to excerpts. |
+| `max_chars_total` | `Optional[int]` | `None` | Cap on total excerpt characters across all results. Does not affect `full_content`. |
+| `fetch_policy` | `Optional[FetchPolicy]` | `None` | Cache vs live content policy. |
+| `client_model` | `Optional[str]` | `None` | Identifier of the calling LLM, used for model-specific result optimizations. |
+| `session_id` | `Optional[str]` | `None` | Shared id grouping related Search/Extract calls in one task. |
+| `max_chars_per_extract` | `Optional[int]` | `None` | Tool-level default cap on `full_content` size; only applied when `full_content=True`. |
+| `api_key` | `Optional[SecretStr]` | `None` | API key (uses `PARALLEL_API_KEY` env var if not provided). |
+| `base_url` | `str` | `"https://api.parallel.ai"` | API base URL. |
 
 ### Error Handling
 
@@ -510,11 +497,4 @@ This project is licensed under the MIT License - see the LICENSE file for detail
 
 ## Changelog
 
-### v0.1.0
-- Initial release
-- **Chat Models**: ChatParallelWeb with real-time web research
-- **Search Tools**: ParallelWebSearchTool for direct API access
-- **Extract Tools**: ParallelExtractTool for clean content extraction
-- Streaming and async/await support
-- Batch URL extraction with error handling
-- Full LangChain ecosystem compatibility
+See [`CHANGELOG.md`](./CHANGELOG.md) for the full version history.
diff --git a/langchain_parallel/chat_models.py b/langchain_parallel/chat_models.py
index 052e6fa..25098c9 100644
--- a/langchain_parallel/chat_models.py
+++ b/langchain_parallel/chat_models.py
@@ -28,7 +28,7 @@
     PydanticOutputParser,
 )
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
-from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
+from langchain_core.runnables import Runnable
 from langchain_core.utils.function_calling import convert_to_json_schema
 from langchain_core.utils.pydantic import is_basemodel_subclass
 from openai import AuthenticationError, RateLimitError
@@ -719,8 +719,16 @@ def with_structured_output(
 
         bound = self.bind(response_format=response_format)
         if include_raw:
-            return RunnableMap(raw=bound) | RunnablePassthrough.assign(
-                parsed=lambda x: output_parser.invoke(x["raw"]),
-                parsing_error=lambda _: None,
-            )
+
+            def _parse_with_capture(raw: AIMessage) -> dict[str, Any]:
+                try:
+                    return {
+                        "raw": raw,
+                        "parsed": output_parser.invoke(raw),
+                        "parsing_error": None,
+                    }
+                except Exception as e:
+                    return {"raw": raw, "parsed": None, "parsing_error": e}
+
+            return bound | _parse_with_capture
         return bound | output_parser
diff --git a/langchain_parallel/extract_tool.py b/langchain_parallel/extract_tool.py
index dc6f256..5a7ba20 100644
--- a/langchain_parallel/extract_tool.py
+++ b/langchain_parallel/extract_tool.py
@@ -2,7 +2,8 @@
 
 from __future__ import annotations
 
-from typing import Any, Literal, Optional, Union
+import warnings
+from typing import Any, Optional, Union
 
 from langchain_core.callbacks import (
     AsyncCallbackManagerForToolRun,
@@ -35,16 +36,46 @@ def _coerce_full_content(
     return full_content
 
 
+def _coerce_excerpts(
+    excerpts: Union[bool, ExcerptSettings, dict[str, Any], None],
+) -> Optional[dict[str, Any]]:
+    """Resolve the legacy ``Union[bool, ExcerptSettings]`` excerpts arg.
+
+    In v1 GA, excerpts are always returned and the API has no flag to disable
+    them — only their per-result size is configurable. We accept the legacy
+    boolean for backward compatibility:
+
+    - ``None`` / ``True``: no excerpt-size override (API uses its default).
+    - ``False``: warn (the API can no longer disable excerpts) and treat as
+      no override.
+    - ``ExcerptSettings`` / ``dict``: pass through to advanced_settings.
+    """
+    if excerpts is None or excerpts is True:
+        return None
+    if excerpts is False:
+        warnings.warn(
+            "excerpts=False is no longer supported — the GA Extract API "
+            "always returns excerpts. Use ExcerptSettings(max_chars_per_result=…) "
+            "to control per-result size.",
+            DeprecationWarning,
+            stacklevel=4,
+        )
+        return None
+    if isinstance(excerpts, ExcerptSettings):
+        return excerpts.model_dump(exclude_none=True)
+    return {k: v for k, v in excerpts.items() if v is not None}
+
+
 def _build_advanced_settings(
     *,
-    excerpts: Optional[ExcerptSettings],
+    excerpts_settings: Optional[dict[str, Any]],
     full_content: Union[bool, dict[str, Any]],
     fetch_policy: Optional[FetchPolicy],
 ) -> Optional[dict[str, Any]]:
     """Pack the user-facing flat fields into the GA `advanced_settings` envelope."""
     settings: dict[str, Any] = {}
-    if excerpts is not None:
-        settings["excerpt_settings"] = excerpts.model_dump(exclude_none=True)
+    if excerpts_settings is not None:
+        settings["excerpt_settings"] = excerpts_settings
     if fetch_policy is not None:
         settings["fetch_policy"] = fetch_policy.model_dump(exclude_none=True)
     # full_content goes through whether True/False/dict — the API treats False
@@ -54,24 +85,6 @@ def _build_advanced_settings(
     return settings or None
 
 
-def _format_results_for_llm(results: list[dict[str, Any]]) -> str:
-    """Build a compact, LLM-friendly string from formatted extract results."""
-    if not results:
-        return "No content extracted."
-    blocks: list[str] = []
-    for r in results:
-        url = r.get("url") or ""
-        title = r.get("title") or "(untitled)"
-        if "error_type" in r:
-            blocks.append(f"[ERROR] {title}\n  {url}\n  {r.get('content', '')}")
-            continue
-        body = r.get("content") or ""
-        if len(body) > 800:
-            body = body[:800] + "..."
-        blocks.append(f"## {title}\n{url}\n\n{body}")
-    return "\n\n---\n\n".join(blocks)
-
-
 class ParallelExtractInput(BaseModel):
     """Input schema for Parallel Extract Tool."""
 
@@ -88,12 +101,13 @@ class ParallelExtractInput(BaseModel):
         default=None,
         description="Keyword queries to focus extracted content.",
     )
-    excerpts: Optional[ExcerptSettings] = Field(
-        default=None,
+    excerpts: Union[bool, ExcerptSettings] = Field(
+        default=True,
         description=(
-            "Per-result excerpt-size settings. In v1 GA, excerpts are always "
-            "returned; this field controls only their size. Boolean values "
-            "are accepted via the legacy path with a deprecation warning."
+            "Include excerpts from each URL. In v1 GA, excerpts are always "
+            "returned; the boolean is kept for backward compatibility and "
+            "controls nothing on the wire. Pass an ExcerptSettings to control "
+            "per-result size (the API has no flag to disable excerpts in v1)."
         ),
     )
     full_content: Union[bool, FullContentSettings] = Field(
@@ -172,22 +186,21 @@ class ParallelExtractTool(BaseTool):
 
     Invocation:
         ```python
-        # Returns (content_str, artifact_list).
-        content, artifact = tool.invoke({
+        result = tool.invoke({
             "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
             "search_objective": "Main applications of AI",
             "full_content": False,
         })
-        for r in artifact:
+        for r in result:
             print(r["url"], r.get("title"))
         ```
 
     Async:
         ```python
-        content, artifact = await tool.ainvoke({"urls": [...]})
+        result = await tool.ainvoke({"urls": [...]})
         ```
 
-    Response artifact (list[dict]):
+    Response shape (``list[dict]``):
         Each item carries `url`, `title`, optional `publish_date`, and
         either `excerpts` (always present in v1) and/or `full_content`.
         Errors carry `error_type` and `http_status_code`.
@@ -196,15 +209,11 @@ class ParallelExtractTool(BaseTool):
     name: str = "parallel_extract"
     description: str = (
         "Extract clean, structured content from web pages using Parallel's "
-        "Extract API. Returns a compact summary string plus a list of "
-        "per-URL records as artifact (url, title, excerpts, full_content)."
+        "Extract API. Returns a list of per-URL records "
+        "(url, title, excerpts, optional full_content)."
     )
     args_schema: type[BaseModel] = ParallelExtractInput
 
-    response_format: Literal["content", "content_and_artifact"] = "content_and_artifact"
-    """Tools return ``(content, artifact)``: a compact summary string the
-    LLM sees, and the per-URL records list for downstream code."""
-
     api_key: Optional[SecretStr] = Field(default=None)
     """Parallel API key. If not provided, will be read from env var."""
 
@@ -283,7 +292,7 @@ def _build_call_kwargs(
         urls: list[str],
         search_objective: Optional[str],
         search_queries: Optional[list[str]],
-        excerpts: Optional[ExcerptSettings],
+        excerpts: Union[bool, ExcerptSettings, dict[str, Any], None],
         full_content: Union[bool, FullContentSettings, dict[str, Any]],
         fetch_policy: Optional[FetchPolicy],
         max_chars_total: Optional[int],
@@ -301,7 +310,7 @@ def _build_call_kwargs(
             tool_max_chars=self.max_chars_per_extract,
         )
         advanced_settings = _build_advanced_settings(
-            excerpts=excerpts,
+            excerpts_settings=_coerce_excerpts(excerpts),
             full_content=full_content_resolved,
             fetch_policy=fetch_policy,
         )
@@ -328,7 +337,7 @@ def _run(
         urls: list[str],
         search_objective: Optional[str] = None,
         search_queries: Optional[list[str]] = None,
-        excerpts: Optional[ExcerptSettings] = None,
+        excerpts: Union[bool, ExcerptSettings] = True,
         full_content: Union[bool, FullContentSettings] = False,
         max_chars_total: Optional[int] = None,
         fetch_policy: Optional[FetchPolicy] = None,
@@ -336,7 +345,7 @@ def _run(
         session_id: Optional[str] = None,
         timeout: Optional[float] = None,
         run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> tuple[str, list[dict[str, Any]]]:
+    ) -> list[dict[str, Any]]:
         """Extract content from URLs."""
         if self._client is None:
             msg = "Parallel client not initialized."
@@ -387,14 +396,14 @@ def _run(
                 color="green",
             )
 
-        return _format_results_for_llm(formatted), formatted
+        return formatted
 
     async def _arun(
         self,
         urls: list[str],
         search_objective: Optional[str] = None,
         search_queries: Optional[list[str]] = None,
-        excerpts: Optional[ExcerptSettings] = None,
+        excerpts: Union[bool, ExcerptSettings] = True,
         full_content: Union[bool, FullContentSettings] = False,
         max_chars_total: Optional[int] = None,
         fetch_policy: Optional[FetchPolicy] = None,
@@ -402,7 +411,7 @@ async def _arun(
         session_id: Optional[str] = None,
         timeout: Optional[float] = None,
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> tuple[str, list[dict[str, Any]]]:
+    ) -> list[dict[str, Any]]:
         """Async extract content from URLs."""
         if self._async_client is None:
             msg = "Async Parallel client not initialized."
@@ -456,4 +465,4 @@ async def _arun(
                 color="green",
             )
 
-        return _format_results_for_llm(formatted), formatted
+        return formatted
diff --git a/langchain_parallel/search_tool.py b/langchain_parallel/search_tool.py
index 65b7b96..06f5d2e 100644
--- a/langchain_parallel/search_tool.py
+++ b/langchain_parallel/search_tool.py
@@ -4,7 +4,7 @@
 
 import warnings
 from datetime import datetime
-from typing import Any, Literal, Optional, Union
+from typing import Any, Optional, Union
 
 from langchain_core.callbacks import (
     AsyncCallbackManagerForToolRun,
@@ -79,20 +79,6 @@ def _build_advanced_settings(
     return settings or None
 
 
-def _format_results_for_llm(response: dict[str, Any]) -> str:
-    """Build a compact, LLM-friendly string from the raw search response."""
-    results = response.get("results") or []
-    if not results:
-        return "No results."
-    lines: list[str] = []
-    for i, result in enumerate(results, 1):
-        title = result.get("title") or "(untitled)"
-        url = result.get("url") or ""
-        lines.append(f"{i}. {title}\n   {url}")
-        lines.extend(f"   - {excerpt}" for excerpt in result.get("excerpts") or [])
-    return "\n".join(lines)
-
-
 class ParallelWebSearchInput(BaseModel):
     """Input schema for ParallelWeb search tool."""
 
@@ -224,16 +210,13 @@ class ParallelWebSearchTool(BaseTool):
 
     Invocation:
         ```python
-        # Returns (content_str, artifact_dict). The string is what the agent
-        # sees in a ToolMessage; the dict is the full Parallel response.
-        content, artifact = tool.invoke({
+        result = tool.invoke({
             "objective": "Latest developments in AI agents",
             "search_queries": ["AI agents 2026", "autonomous LLM systems"],
             "mode": "advanced",
             "max_results": 5,
         })
-        print(content)
-        print(artifact["search_id"], len(artifact["results"]))
+        print(result["search_id"], len(result["results"]))
         ```
 
     Domain and freshness filters:
@@ -252,10 +235,10 @@ class ParallelWebSearchTool(BaseTool):
 
     Async:
         ```python
-        content, artifact = await tool.ainvoke({"search_queries": ["..."]})
+        result = await tool.ainvoke({"search_queries": ["..."]})
         ```
 
-    Response artifact:
+    Response shape:
         ```python
         {
             "search_id": "search_abc123",
@@ -285,18 +268,14 @@ class ParallelWebSearchTool(BaseTool):
         "Search the web using Parallel's Search API. "
         "Provides real-time web information with compressed, structured excerpts "
         "optimized for LLM consumption. Supports natural-language objectives, "
-        "keyword queries, domain filtering, and geo-targeting. Returns a "
-        "compact summary string plus the full structured response as artifact."
+        "keyword queries, domain filtering, and geo-targeting. Returns the "
+        "structured search response as a dict."
     )
     """The description passed to the model when performing tool calling."""
 
     args_schema: type[BaseModel] = ParallelWebSearchInput
     """The schema passed to the model when performing tool calling."""
 
-    response_format: Literal["content", "content_and_artifact"] = "content_and_artifact"
-    """Tools return ``(content, artifact)``: a compact summary string the
-    LLM sees, and the full Parallel API response dict for downstream code."""
-
     api_key: Optional[SecretStr] = Field(default=None)
     """Parallel API key. If not provided, will be read from
     PARALLEL_API_KEY env var."""
@@ -436,7 +415,7 @@ def _run(
         include_metadata: bool = True,
         timeout: Optional[int] = None,
         run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> tuple[str, dict[str, Any]]:
+    ) -> dict[str, Any]:
         """Execute the search using Parallel's Search API."""
         if self._client is None:
             msg = "Parallel client not initialized."
@@ -492,7 +471,7 @@ def _run(
                 color="green",
             )
 
-        return _format_results_for_llm(response), response
+        return response
 
     async def _arun(
         self,
@@ -511,7 +490,7 @@ async def _arun(
         include_metadata: bool = True,
         timeout: Optional[int] = None,
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> tuple[str, dict[str, Any]]:
+    ) -> dict[str, Any]:
         """Async execute the search using Parallel's Search API."""
         if self._async_client is None:
             msg = "Async Parallel client not initialized."
@@ -573,4 +552,4 @@ async def _arun(
                 color="green",
             )
 
-        return _format_results_for_llm(response), response
+        return response
diff --git a/tests/integration_tests/test_extract_tool.py b/tests/integration_tests/test_extract_tool.py
index 6131d6f..48c072c 100644
--- a/tests/integration_tests/test_extract_tool.py
+++ b/tests/integration_tests/test_extract_tool.py
@@ -18,22 +18,6 @@ def api_key() -> str:
     return key
 
 
-def _invoke(tool: ParallelExtractTool, args: dict) -> tuple[str, list[dict]]:
-    """Invoke via the tool_call form so we get back a ToolMessage with .artifact.
-
-    Returns ``(content, artifact)``.
-    """
-    msg = tool.invoke(
-        {
-            "args": args,
-            "id": "1",
-            "name": tool.name,
-            "type": "tool_call",
-        },
-    )
-    return msg.content, msg.artifact
-
-
 class TestParallelExtractToolIntegration:
     """Integration tests for ParallelExtractTool."""
 
@@ -41,21 +25,19 @@ def test_extract_single_url(self, api_key: str) -> None:
         """Test extracting content from a single URL."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        _, artifact = _invoke(
-            tool,
+        result = tool.invoke(
             {
                 "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
                 "full_content": True,
             },
         )
 
-        assert len(artifact) == 1
+        assert len(result) == 1
         assert (
-            artifact[0]["url"]
-            == "https://en.wikipedia.org/wiki/Artificial_intelligence"
+            result[0]["url"] == "https://en.wikipedia.org/wiki/Artificial_intelligence"
         )
-        assert len(artifact[0]["content"]) > 0
-        assert artifact[0]["title"] is not None
+        assert len(result[0]["content"]) > 0
+        assert result[0]["title"] is not None
 
     def test_extract_multiple_urls(self, api_key: str) -> None:
         """Test extracting content from multiple URLs."""
@@ -66,10 +48,10 @@ def test_extract_multiple_urls(self, api_key: str) -> None:
             "https://en.wikipedia.org/wiki/Python_(programming_language)",
         ]
 
-        _, artifact = _invoke(tool, {"urls": urls})
+        result = tool.invoke({"urls": urls})
 
-        assert len(artifact) == 2
-        for item in artifact:
+        assert len(result) == 2
+        for item in result:
             assert "url" in item
             assert "content" in item
 
@@ -77,8 +59,7 @@ def test_extract_with_search_objective(self, api_key: str) -> None:
         """Test extraction with search objective to focus content."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        _, artifact = _invoke(
-            tool,
+        result = tool.invoke(
             {
                 "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
                 "search_objective": "What are the main applications of AI?",
@@ -86,68 +67,66 @@ def test_extract_with_search_objective(self, api_key: str) -> None:
             },
         )
 
-        assert len(artifact) == 1
+        assert len(result) == 1
         assert (
-            artifact[0]["url"]
-            == "https://en.wikipedia.org/wiki/Artificial_intelligence"
+            result[0]["url"] == "https://en.wikipedia.org/wiki/Artificial_intelligence"
         )
-        assert "excerpts" in artifact[0]
-        assert isinstance(artifact[0]["excerpts"], list)
-        assert len(artifact[0]["content"]) > 0
+        assert "excerpts" in result[0]
+        assert isinstance(result[0]["excerpts"], list)
+        assert len(result[0]["content"]) > 0
 
     def test_extract_with_search_queries(self, api_key: str) -> None:
         """Test extraction with search queries to focus content."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        _, artifact = _invoke(
-            tool,
+        result = tool.invoke(
             {
                 "urls": ["https://en.wikipedia.org/wiki/Machine_learning"],
                 "search_queries": ["neural networks", "training algorithms"],
             },
         )
 
-        assert len(artifact) == 1
-        assert "excerpts" in artifact[0]
-        assert isinstance(artifact[0]["excerpts"], list)
-        assert len(artifact[0]["excerpts"]) > 0
+        assert len(result) == 1
+        assert "excerpts" in result[0]
+        assert isinstance(result[0]["excerpts"], list)
+        assert len(result[0]["excerpts"]) > 0
 
     def test_extract_with_max_chars(self, api_key: str) -> None:
         """Test extraction with max_chars_per_extract limit."""
         tool = ParallelExtractTool(api_key=api_key, max_chars_per_extract=1000)
 
-        _, artifact = _invoke(
-            tool,
+        result = tool.invoke(
             {
                 "urls": ["https://en.wikipedia.org/wiki/Python_(programming_language)"],
                 "full_content": True,
             },
         )
 
-        assert len(artifact) == 1
-        assert len(artifact[0]["content"]) > 0
-        assert artifact[0]["title"] is not None
+        assert len(result) == 1
+        assert len(result[0]["content"]) > 0
+        assert result[0]["title"] is not None
 
-    def test_extract_metadata_fields(self, api_key: str) -> None:
-        """Test that metadata fields are properly populated."""
+    def test_extract_excerpts_metadata_round_trip(self, api_key: str) -> None:
+        """Excerpts and publish_date round-trip through `_format_response`."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        _, artifact = _invoke(
-            tool, {"urls": ["https://en.wikipedia.org/wiki/Machine_learning"]}
+        result = tool.invoke(
+            {
+                "urls": ["https://en.wikipedia.org/wiki/Machine_learning"],
+                "search_objective": "Define machine learning",
+            },
         )
 
-        assert len(artifact) > 0
-        item = artifact[0]
-        assert "url" in item
-        assert "title" in item
-        assert "content" in item
+        assert len(result) > 0
+        item = result[0]
+        assert "excerpts" in item
+        assert isinstance(item["excerpts"], list)
 
     def test_extract_invalid_url(self, api_key: str) -> None:
         """Test extraction handles invalid URLs gracefully."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        _, artifact = _invoke(
-            tool,
+        result = tool.invoke(
             {
                 "urls": ["https://this-domain-does-not-exist-12345.com/"],
                 "full_content": True,
@@ -155,16 +134,15 @@ def test_extract_invalid_url(self, api_key: str) -> None:
             },
         )
 
-        assert len(artifact) == 1
-        assert artifact[0]["url"] == "https://this-domain-does-not-exist-12345.com/"
-        assert "Error" in artifact[0]["content"] or "error_type" in artifact[0]
+        assert len(result) == 1
+        assert result[0]["url"] == "https://this-domain-does-not-exist-12345.com/"
+        assert "Error" in result[0]["content"] or "error_type" in result[0]
 
     def test_extract_mixed_valid_invalid_urls(self, api_key: str) -> None:
         """Test extraction with mix of valid and invalid URLs."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        _, artifact = _invoke(
-            tool,
+        result = tool.invoke(
             {
                 "urls": [
                     "https://en.wikipedia.org/wiki/Python_(programming_language)",
@@ -174,61 +152,37 @@ def test_extract_mixed_valid_invalid_urls(self, api_key: str) -> None:
             },
         )
 
-        assert len(artifact) == 2
-        assert len(artifact[0]["content"]) > 0 or len(artifact[1]["content"]) > 0
+        assert len(result) == 2
+        assert len(result[0]["content"]) > 0 or len(result[1]["content"]) > 0
 
     @pytest.mark.asyncio
     async def test_extract_async(self, api_key: str) -> None:
         """Test async extraction functionality."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        msg = await tool.ainvoke(
+        result = await tool.ainvoke(
             {
-                "args": {
-                    "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
-                    "full_content": True,
-                },
-                "id": "1",
-                "name": tool.name,
-                "type": "tool_call",
+                "urls": ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
+                "full_content": True,
             },
         )
-        artifact = msg.artifact
 
-        assert len(artifact) == 1
-        assert len(artifact[0]["content"]) > 0
+        assert len(result) == 1
+        assert len(result[0]["content"]) > 0
         assert (
-            artifact[0]["url"]
-            == "https://en.wikipedia.org/wiki/Artificial_intelligence"
+            result[0]["url"] == "https://en.wikipedia.org/wiki/Artificial_intelligence"
         )
 
     def test_extract_with_long_content(self, api_key: str) -> None:
         """Test extraction of long articles."""
         tool = ParallelExtractTool(api_key=api_key)
 
-        _, artifact = _invoke(
-            tool,
+        result = tool.invoke(
             {
                 "urls": ["https://en.wikipedia.org/wiki/History_of_the_United_States"],
                 "full_content": True,
             },
         )
 
-        assert len(artifact) == 1
-        assert len(artifact[0]["content"]) > 1000
-
-    def test_extract_different_content_types(self, api_key: str) -> None:
-        """Test extraction from different types of web pages."""
-        tool = ParallelExtractTool(api_key=api_key)
-
-        urls = [
-            "https://www.wikipedia.org/",
-            "https://en.wikipedia.org/wiki/Main_Page",
-        ]
-
-        _, artifact = _invoke(tool, {"urls": urls})
-
-        assert len(artifact) == 2
-        for item in artifact:
-            assert "url" in item
-            assert "content" in item
+        assert len(result) == 1
+        assert len(result[0]["content"]) > 1000
diff --git a/tests/unit_tests/test_chat_models.py b/tests/unit_tests/test_chat_models.py
index 6356e3d..9e9e77b 100644
--- a/tests/unit_tests/test_chat_models.py
+++ b/tests/unit_tests/test_chat_models.py
@@ -2,10 +2,19 @@
 
 from __future__ import annotations
 
+from types import SimpleNamespace
+from unittest.mock import Mock
+
+import pytest
+from langchain_core.messages import AIMessage
+from langchain_core.runnables import RunnableSequence
 from langchain_tests.unit_tests import ChatModelUnitTests
+from pydantic import BaseModel, SecretStr
 
 from langchain_parallel.chat_models import ChatParallelWeb
 
+_TEST_KEY = SecretStr("test")
+
 
 class TestChatParallelWebUnit(ChatModelUnitTests):
     @property
@@ -157,3 +166,161 @@ def has_structured_output(self) -> bool:
     def structured_output_kwargs(self) -> dict:
         # Parallel research models use json_schema, not function_calling.
         return {"method": "json_schema"}
+
+
+class _Founder(BaseModel):
+    name: str
+    company: str
+
+
+class TestChatParallelWebDirect:
+    """Direct unit tests for behaviors the standard suite doesn't cover."""
+
+    def test_model_kwarg_actually_sets_model(self) -> None:
+        """`ChatParallelWeb(model='lite')` selects 'lite' (regression test)."""
+        chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
+        assert chat.model == "lite"
+
+    def test_model_name_alias_back_compat(self) -> None:
+        """`ChatParallelWeb(model_name='lite')` still works via the validator shim."""
+        # Pre-0.3.0 callers used `model_name=`; the validator maps it back
+        # to `model=`. `model_name` isn't a real field, so silence the type
+        # checker on this back-compat call.
+        chat = ChatParallelWeb(model_name="lite", api_key=_TEST_KEY)  # type: ignore[call-arg]
+        assert chat.model == "lite"
+
+    def test_lc_attributes_exposes_model_name(self) -> None:
+        """`lc_attributes` surfaces the model under the LangChain-standard key."""
+        chat = ChatParallelWeb(model="core", api_key=_TEST_KEY)
+        assert chat.lc_attributes["model_name"] == "core"
+
+    def test_response_metadata_surfaces_basis_and_interaction_id(self) -> None:
+        """Basis / interaction_id / system_fingerprint round-trip on AIMessage."""
+        chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
+        choice = SimpleNamespace(
+            finish_reason="stop",
+            message=SimpleNamespace(content="Elon Musk founded SpaceX."),
+        )
+        response = SimpleNamespace(
+            choices=[choice],
+            model="lite",
+            created=1700000000,
+            system_fingerprint="fp-1",
+            interaction_id="int-1",
+            basis=[
+                SimpleNamespace(
+                    model_dump=lambda: {"field": "answer", "citations": []},
+                ),
+            ],
+        )
+        result = chat._process_non_stream_response(response)
+        msg = result.generations[0].message
+        assert isinstance(msg, AIMessage)
+        assert msg.response_metadata["model_name"] == "lite"
+        assert msg.response_metadata["finish_reason"] == "stop"
+        assert msg.response_metadata["system_fingerprint"] == "fp-1"
+        assert msg.response_metadata["interaction_id"] == "int-1"
+        assert msg.response_metadata["basis"] == [
+            {"field": "answer", "citations": []},
+        ]
+
+    def test_with_structured_output_rejects_speed(self) -> None:
+        """Speed silently ignores response_format; raise to make this loud."""
+        chat = ChatParallelWeb(model="speed", api_key=_TEST_KEY)
+        with pytest.raises(ValueError, match="research models"):
+            chat.with_structured_output(_Founder)
+
+    def test_with_structured_output_binds_response_format(self) -> None:
+        """Binding a pydantic schema produces a json_schema response_format."""
+        chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
+        runnable = chat.with_structured_output(_Founder)
+        assert isinstance(runnable, RunnableSequence)
+        bound = runnable.first
+        rf = bound.kwargs["response_format"]  # type: ignore[attr-defined]
+        assert rf["type"] == "json_schema"
+        assert rf["json_schema"]["name"] == "_Founder"
+        assert rf["json_schema"]["strict"] is True
+        assert "name" in rf["json_schema"]["schema"]["properties"]
+
+    def test_with_structured_output_function_calling_routes_to_json_schema(
+        self,
+    ) -> None:
+        """method='function_calling' is routed to json_schema for compat.
+
+        Parallel chat doesn't actually support tool calling; we accept
+        ``function_calling`` for cross-provider compatibility and produce a
+        json_schema response_format under the hood.
+        """
+        chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
+        runnable = chat.with_structured_output(_Founder, method="function_calling")
+        assert isinstance(runnable, RunnableSequence)
+        bound = runnable.first
+        assert bound.kwargs["response_format"]["type"] == "json_schema"  # type: ignore[attr-defined]
+
+    def test_with_structured_output_json_mode(self) -> None:
+        """method='json_mode' produces a json_object response_format."""
+        chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
+        runnable = chat.with_structured_output(method="json_mode")
+        assert isinstance(runnable, RunnableSequence)
+        bound = runnable.first
+        assert bound.kwargs["response_format"] == {"type": "json_object"}  # type: ignore[attr-defined]
+
+    def test_with_structured_output_include_raw_failure_capture(self) -> None:
+        """include_raw=True populates parsing_error on parse failure."""
+        from langchain_core.runnables import RunnableLambda
+
+        chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
+        runnable = chat.with_structured_output(_Founder, include_raw=True)
+        # The capture lambda is the last step; pull it out and exercise directly
+        # so we don't need a live API call.
+        assert isinstance(runnable, RunnableSequence)
+        capture = next(
+            step.func for step in runnable.steps if isinstance(step, RunnableLambda)
+        )
+        result = capture(AIMessage(content="not json"))
+        assert isinstance(result["raw"], AIMessage)
+        assert result["parsed"] is None
+        assert result["parsing_error"] is not None
+
+    def test_with_structured_output_include_raw_success(self) -> None:
+        """include_raw=True wraps the parsed pydantic object."""
+        from langchain_core.runnables import RunnableLambda
+
+        chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
+        runnable = chat.with_structured_output(_Founder, include_raw=True)
+        assert isinstance(runnable, RunnableSequence)
+        capture = next(
+            step.func for step in runnable.steps if isinstance(step, RunnableLambda)
+        )
+        result = capture(
+            AIMessage(content='{"name": "Elon Musk", "company": "SpaceX"}'),
+        )
+        assert isinstance(result["parsed"], _Founder)
+        assert result["parsed"].name == "Elon Musk"
+        assert result["parsing_error"] is None
+
+    def test_response_metadata_stream_chunk_includes_basis(self) -> None:
+        """Streaming chunks expose basis on the last chunk."""
+        chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
+        chunk = SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    finish_reason="stop",
+                    delta=SimpleNamespace(content="."),
+                ),
+            ],
+            model="lite",
+            interaction_id="int-2",
+            basis=[
+                SimpleNamespace(
+                    model_dump=lambda: {"field": "answer", "citations": []},
+                ),
+            ],
+            system_fingerprint=None,
+        )
+        out = chat._process_stream_chunk(chunk, run_manager=Mock())
+        assert out is not None
+        meta = out.message.response_metadata
+        assert meta["model_name"] == "lite"
+        assert meta["interaction_id"] == "int-2"
+        assert meta["basis"] == [{"field": "answer", "citations": []}]
diff --git a/tests/unit_tests/test_extract_tool.py b/tests/unit_tests/test_extract_tool.py
index f2f2e1d..887b9f7 100644
--- a/tests/unit_tests/test_extract_tool.py
+++ b/tests/unit_tests/test_extract_tool.py
@@ -7,6 +7,7 @@
 
 import pytest
 
+from langchain_parallel._types import ExcerptSettings, FetchPolicy, FullContentSettings
 from langchain_parallel.extract_tool import ParallelExtractTool
 
 
@@ -27,16 +28,13 @@ def test_extract_tool_initialization(self) -> None:
             assert tool.name == "parallel_extract"
             assert tool.base_url == "https://api.parallel.ai"
             assert tool.max_chars_per_extract is None
-            assert tool.response_format == "content_and_artifact"
 
     def test_extract_tool_initialization_with_params(self) -> None:
         """Test extract tool initialization with custom parameters."""
         with patch(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
-            tool = ParallelExtractTool(
-                max_chars_per_extract=3000,
-            )
+            tool = ParallelExtractTool(max_chars_per_extract=3000)
             assert tool.max_chars_per_extract == 3000
 
     @patch("langchain_parallel.extract_tool.get_parallel_client")
@@ -46,7 +44,7 @@ def test_extract_single_url(
         mock_async_factory: Mock,
         mock_sync_factory: Mock,
     ) -> None:
-        """Test extracting content from a single URL via the GA endpoint."""
+        """Single URL hits the GA endpoint and returns a list of dicts."""
         sync_client = Mock()
         sync_client.extract.return_value = _make_response(
             {
@@ -69,18 +67,16 @@ def test_extract_single_url(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            content, artifact = tool._run(
-                urls=["https://example.com"], full_content=True
-            )
+            result = tool._run(urls=["https://example.com"], full_content=True)
 
             sync_client.extract.assert_called_once()
             sync_client.beta.extract.assert_not_called()
-            assert len(artifact) == 1
-            assert artifact[0]["url"] == "https://example.com"
-            assert artifact[0]["title"] == "Test Article"
-            assert artifact[0]["content"] == "This is the extracted content."
-            assert artifact[0]["publish_date"] == "2024-01-01"
-            assert "Test Article" in content
+            assert isinstance(result, list)
+            assert len(result) == 1
+            assert result[0]["url"] == "https://example.com"
+            assert result[0]["title"] == "Test Article"
+            assert result[0]["content"] == "This is the extracted content."
+            assert result[0]["publish_date"] == "2024-01-01"
 
     @patch("langchain_parallel.extract_tool.get_parallel_client")
     @patch("langchain_parallel.extract_tool.get_async_parallel_client")
@@ -116,11 +112,11 @@ def test_extract_multiple_urls(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            _, artifact = tool._run(
+            result = tool._run(
                 urls=["https://example1.com", "https://example2.com"],
                 full_content=True,
             )
-            assert [r["content"] for r in artifact] == ["Content 1", "Content 2"]
+            assert [r["content"] for r in result] == ["Content 1", "Content 2"]
 
     @patch("langchain_parallel.extract_tool.get_parallel_client")
     @patch("langchain_parallel.extract_tool.get_async_parallel_client")
@@ -158,19 +154,19 @@ def test_extract_with_errors(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            _, artifact = tool._run(
+            result = tool._run(
                 urls=["https://example1.com", "https://example2.com"],
                 full_content=True,
             )
-            assert len(artifact) == 2
-            assert artifact[0]["content"] == "Content 1"
-            assert artifact[1]["error_type"] == "http_error"
-            assert artifact[1]["http_status_code"] == 404
-            assert "Error: http_error" in artifact[1]["content"]
+            assert len(result) == 2
+            assert result[0]["content"] == "Content 1"
+            assert result[1]["error_type"] == "http_error"
+            assert result[1]["http_status_code"] == 404
+            assert "Error: http_error" in result[1]["content"]
 
     @patch("langchain_parallel.extract_tool.get_parallel_client")
     @patch("langchain_parallel.extract_tool.get_async_parallel_client")
-    def test_extract_max_chars_default(
+    def test_full_content_precedence_tool_level_default(
         self,
         mock_async_factory: Mock,
         mock_sync_factory: Mock,
@@ -200,9 +196,172 @@ def test_extract_max_chars_default(
             tool._run(urls=["https://example.com"], full_content=True)
             kwargs = sync_client.extract.call_args.kwargs
             assert kwargs["advanced_settings"]["full_content"] == {
-                "max_chars_per_result": 5000
+                "max_chars_per_result": 5000,
+            }
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_full_content_precedence_explicit_settings_wins(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Explicit FullContentSettings beats the tool-level cap."""
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {"extract_id": "e", "results": [], "errors": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelExtractTool(max_chars_per_extract=5000)
+            tool._run(
+                urls=["https://example.com"],
+                full_content=FullContentSettings(max_chars_per_result=200),
+            )
+            kwargs = sync_client.extract.call_args.kwargs
+            assert kwargs["advanced_settings"]["full_content"] == {
+                "max_chars_per_result": 200,
+            }
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_full_content_false_omits_key(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """full_content=False produces no full_content key in advanced_settings."""
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {"extract_id": "e", "results": [], "errors": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelExtractTool()
+            tool._run(urls=["https://example.com"], full_content=False)
+            kwargs = sync_client.extract.call_args.kwargs
+            advanced = kwargs.get("advanced_settings") or {}
+            assert "full_content" not in advanced
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_excerpts_bool_true_is_no_op(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Backward compat: excerpts=True (the default) adds no excerpt_settings."""
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {"extract_id": "e", "results": [], "errors": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelExtractTool()
+            tool._run(urls=["https://example.com"], excerpts=True)
+            advanced = sync_client.extract.call_args.kwargs.get("advanced_settings")
+            # No advanced settings at all when only the bool default is set.
+            assert advanced is None
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_excerpts_bool_false_warns(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """excerpts=False emits a DeprecationWarning (v1 always returns excerpts)."""
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {"extract_id": "e", "results": [], "errors": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelExtractTool()
+            with pytest.warns(DeprecationWarning, match="always returns excerpts"):
+                tool._run(urls=["https://example.com"], excerpts=False)
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_advanced_settings_envelope(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """ExcerptSettings + FetchPolicy + full_content nest into advanced_settings."""
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {"extract_id": "e", "results": [], "errors": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelExtractTool()
+            tool._run(
+                urls=["https://example.com"],
+                excerpts=ExcerptSettings(max_chars_per_result=2000),
+                full_content=FullContentSettings(max_chars_per_result=8000),
+                fetch_policy=FetchPolicy(max_age_seconds=86400),
+            )
+            kwargs = sync_client.extract.call_args.kwargs
+            assert kwargs["advanced_settings"] == {
+                "excerpt_settings": {"max_chars_per_result": 2000},
+                "fetch_policy": {
+                    "max_age_seconds": 86400,
+                    "disable_cache_fallback": False,
+                },
+                "full_content": {"max_chars_per_result": 8000},
             }
 
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    def test_top_level_passthrough_fields(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """`max_chars_total`, `client_model`, `session_id` flow through verbatim."""
+        sync_client = Mock()
+        sync_client.extract.return_value = _make_response(
+            {"extract_id": "e", "results": [], "errors": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelExtractTool()
+            tool._run(
+                urls=["https://example.com"],
+                max_chars_total=42_000,
+                client_model="claude-opus-4-7",
+                session_id="sess-1",
+            )
+            kwargs = sync_client.extract.call_args.kwargs
+            assert kwargs["max_chars_total"] == 42_000
+            assert kwargs["client_model"] == "claude-opus-4-7"
+            assert kwargs["session_id"] == "sess-1"
+
     @patch("langchain_parallel.extract_tool.get_parallel_client")
     @patch("langchain_parallel.extract_tool.get_async_parallel_client")
     def test_extract_handles_api_error(
@@ -257,9 +416,34 @@ async def test_extract_async_functionality(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            _, artifact = await tool._arun(urls=["https://example.com"])
-            assert len(artifact) == 1
-            assert artifact[0]["content"] == "Async content"
+            result = await tool._arun(urls=["https://example.com"])
+            assert isinstance(result, list)
+            assert len(result) == 1
+            assert result[0]["content"] == "Async content"
+
+    @patch("langchain_parallel.extract_tool.get_parallel_client")
+    @patch("langchain_parallel.extract_tool.get_async_parallel_client")
+    @pytest.mark.asyncio
+    async def test_extract_async_handles_api_error(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Async API exceptions are wrapped as ValueError."""
+        async_client = Mock()
+        async_client.extract = AsyncMock(side_effect=Exception("Async API Error"))
+        mock_async_factory.return_value = async_client
+        mock_sync_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelExtractTool()
+            with pytest.raises(
+                ValueError,
+                match="Error calling Parallel Extract API: Async API Error",
+            ):
+                await tool._arun(urls=["https://example.com"])
 
     @patch("langchain_parallel.extract_tool.get_parallel_client")
     @patch("langchain_parallel.extract_tool.get_async_parallel_client")
@@ -280,5 +464,14 @@ def test_extract_empty_results(
             "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelExtractTool()
-            _, artifact = tool._run(urls=["https://example.com"])
-            assert artifact == []
+            result = tool._run(urls=["https://example.com"])
+            assert result == []
+
+    def test_extract_empty_urls_raises(self) -> None:
+        """urls=[] raises ValueError."""
+        with patch(
+            "langchain_parallel.extract_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelExtractTool()
+            with pytest.raises(ValueError, match="At least one URL"):
+                tool._run(urls=[])
diff --git a/tests/unit_tests/test_search_tool.py b/tests/unit_tests/test_search_tool.py
index 8ea86a2..485360d 100644
--- a/tests/unit_tests/test_search_tool.py
+++ b/tests/unit_tests/test_search_tool.py
@@ -7,6 +7,7 @@
 
 import pytest
 
+from langchain_parallel._types import ExcerptSettings, FetchPolicy, SourcePolicy
 from langchain_parallel.search_tool import ParallelWebSearchTool, _normalize_mode
 
 
@@ -26,7 +27,6 @@ def test_tool_initialization(self) -> None:
             tool = ParallelWebSearchTool()
             assert tool.name == "parallel_web_search"
             assert "Search the web" in tool.description
-            assert tool.response_format == "content_and_artifact"
 
     @patch("langchain_parallel.search_tool.get_parallel_client")
     @patch("langchain_parallel.search_tool.get_async_parallel_client")
@@ -35,7 +35,7 @@ def test_run_uses_v1_endpoint_when_search_queries_provided(
         mock_async_factory: Mock,
         mock_sync_factory: Mock,
     ) -> None:
-        """search_queries triggers the GA endpoint."""
+        """search_queries triggers the GA endpoint and returns a dict."""
         sync_client = Mock()
         sync_client.search.return_value = _make_response(
             {
@@ -56,16 +56,20 @@ def test_run_uses_v1_endpoint_when_search_queries_provided(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
-            content, artifact = tool._run(
+            result = tool._run(
                 search_queries=["query 1"],
                 max_results=3,
                 mode="advanced",
             )
             sync_client.search.assert_called_once()
             sync_client.beta.search.assert_not_called()
-            assert artifact["search_id"] == "search-1"
-            assert artifact["search_metadata"]["endpoint"] == "v1"
-            assert "Test" in content
+            kwargs = sync_client.search.call_args.kwargs
+            assert kwargs["search_queries"] == ["query 1"]
+            assert kwargs["mode"] == "advanced"
+            assert kwargs["advanced_settings"] == {"max_results": 3}
+            assert isinstance(result, dict)
+            assert result["search_id"] == "search-1"
+            assert result["search_metadata"]["endpoint"] == "v1"
 
     @patch("langchain_parallel.search_tool.get_parallel_client")
     @patch("langchain_parallel.search_tool.get_async_parallel_client")
@@ -86,11 +90,21 @@ def test_run_falls_back_to_beta_when_objective_only(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
-            with pytest.warns(DeprecationWarning, match="search_queries"):
-                _, artifact = tool._run(objective="What is AI?")
+            with pytest.warns(DeprecationWarning, match="v1beta"):
+                result = tool._run(
+                    objective="What is AI?",
+                    mode="advanced",
+                    source_policy={"include_domains": ["wikipedia.org"]},
+                )
             sync_client.beta.search.assert_called_once()
             sync_client.search.assert_not_called()
-            assert artifact["search_metadata"]["endpoint"] == "v1beta"
+            beta_kwargs = sync_client.beta.search.call_args.kwargs
+            # advanced -> agentic on the legacy endpoint
+            assert beta_kwargs["mode"] == "agentic"
+            assert beta_kwargs["source_policy"] == {
+                "include_domains": ["wikipedia.org"],
+            }
+            assert result["search_metadata"]["endpoint"] == "v1beta"
 
     @patch("langchain_parallel.search_tool.get_parallel_client")
     @patch("langchain_parallel.search_tool.get_async_parallel_client")
@@ -113,17 +127,61 @@ def test_run_translates_legacy_mode(
             tool = ParallelWebSearchTool()
             with pytest.warns(DeprecationWarning, match="legacy beta value"):
                 tool._run(search_queries=["q"], mode="agentic")
+            assert sync_client.search.call_args.kwargs["mode"] == "advanced"
+
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    def test_advanced_settings_envelope_pydantic(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Pydantic models pack into `advanced_settings` correctly."""
+        sync_client = Mock()
+        sync_client.search.return_value = _make_response(
+            {"search_id": "s", "results": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.search_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelWebSearchTool()
+            tool._run(
+                search_queries=["q"],
+                excerpts=ExcerptSettings(max_chars_per_result=1500),
+                fetch_policy=FetchPolicy(max_age_seconds=86400),
+                source_policy=SourcePolicy(
+                    include_domains=["nature.com"],
+                    after_date="2025-01-01",
+                ),
+                location="us",
+                max_results=15,
+            )
             kwargs = sync_client.search.call_args.kwargs
-            assert kwargs["mode"] == "advanced"
+            assert kwargs["advanced_settings"] == {
+                "excerpt_settings": {"max_chars_per_result": 1500},
+                "fetch_policy": {
+                    "max_age_seconds": 86400,
+                    "disable_cache_fallback": False,
+                },
+                "source_policy": {
+                    "include_domains": ["nature.com"],
+                    "after_date": "2025-01-01",
+                },
+                "max_results": 15,
+                "location": "us",
+            }
 
     @patch("langchain_parallel.search_tool.get_parallel_client")
     @patch("langchain_parallel.search_tool.get_async_parallel_client")
-    def test_advanced_settings_envelope(
+    def test_advanced_settings_envelope_dict(
         self,
         mock_async_factory: Mock,
         mock_sync_factory: Mock,
     ) -> None:
-        """Flat settings flow into the GA `advanced_settings` envelope."""
+        """Raw-dict source_policy is accepted alongside the pydantic model."""
         sync_client = Mock()
         sync_client.search.return_value = _make_response(
             {"search_id": "s", "results": []},
@@ -148,6 +206,36 @@ def test_advanced_settings_envelope(
                 "location": "us",
             }
 
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    def test_top_level_passthrough_fields(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """`max_chars_total`, `client_model`, `session_id` flow through verbatim."""
+        sync_client = Mock()
+        sync_client.search.return_value = _make_response(
+            {"search_id": "s", "results": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.search_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelWebSearchTool()
+            tool._run(
+                search_queries=["q"],
+                max_chars_total=42_000,
+                client_model="claude-opus-4-7",
+                session_id="sess-1",
+            )
+            kwargs = sync_client.search.call_args.kwargs
+            assert kwargs["max_chars_total"] == 42_000
+            assert kwargs["client_model"] == "claude-opus-4-7"
+            assert kwargs["session_id"] == "sess-1"
+
     @patch("langchain_parallel.search_tool.get_parallel_client")
     @patch("langchain_parallel.search_tool.get_async_parallel_client")
     def test_run_handles_api_error(
@@ -195,8 +283,32 @@ async def test_async_functionality(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
-            _, artifact = await tool._arun(search_queries=["q"])
-            assert artifact["search_id"] == "async-1"
+            result = await tool._arun(search_queries=["q"])
+            assert isinstance(result, dict)
+            assert result["search_id"] == "async-1"
+
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    async def test_async_handles_api_error(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Async API exceptions are wrapped as ValueError."""
+        async_client = Mock()
+        async_client.search = AsyncMock(side_effect=Exception("Async API Error"))
+        mock_async_factory.return_value = async_client
+        mock_sync_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.search_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelWebSearchTool()
+            with pytest.raises(
+                ValueError,
+                match="Error calling Parallel Search API: Async API Error",
+            ):
+                await tool._arun(search_queries=["q"])
 
 
 class TestNormalizeMode:

From 57d68d5031bc0190e6027ebf68afa8592d632b3d Mon Sep 17 00:00:00 2001
From: Matt Harris <mharris@parallel.ai>
Date: Mon, 27 Apr 2026 14:49:56 -0400
Subject: [PATCH 3/5] Drop v1beta search fallback; dedupe _run/_arun; add
 canonical name aliases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The v1beta-fallback path was doing two things at once: silently switching
endpoints when search_queries was missing, AND translating param shapes
(basic→one-shot, advanced→agentic, advanced_settings→flat). Both
reviewers and the maintainer pushed back — the v1 contract requires
search_queries, so honor it.

Now: pydantic validates `search_queries: list[str]` (required field) at
the input-schema layer, and the kwargs builder raises ValueError with a
migration hint pointing at the Parallel migration guide. Removes ~40
lines of legacy-mapping plumbing, removes the `endpoint` string threaded
through _run/_arun/_build_metadata, removes "endpoint": "v1" from
search_metadata.

Dedupe pass on _run/_arun:
- Search: extracted _finalize_response (response_obj -> dict + metadata),
  _start_text and _completion_text static helpers for run_manager log
  messages. Sync and async bodies are now ~25 lines each.
- Extract: same shape, with _start_text and _completion_text.
Net ~80 lines deleted; behavior unchanged.

Naming aliases (forward-compat, no breaking changes):
- ChatParallel = ChatParallelWeb
- ParallelSearchTool = ParallelWebSearchTool
- ParallelExtractTool unchanged
Both new and old names exported from __init__; both ARE the same class
object, so isinstance / serdes / snapshot tests are unaffected. README
and CHANGELOG now lead with the new canonical names; old names
documented as aliases.

Tests:
- Replaced test_run_falls_back_to_beta_when_objective_only with
  test_run_requires_search_queries asserting the ValueError + hint.
- Added two alias-identity tests (one per tool).
- Updated the integration-test fixture to pass search_queries.

63 unit tests pass; lint, format, mypy on src+tests all clean.
End-to-end smoke against the real API confirms: aliases resolve to the
same class, search_queries-missing raises a clean validation error,
search_metadata no longer carries the "endpoint" key, all happy paths
work.
---
 CHANGELOG.md                                |  36 +++--
 README.md                                   |  17 +-
 langchain_parallel/__init__.py              |   6 +-
 langchain_parallel/chat_models.py           |   7 +
 langchain_parallel/extract_tool.py          |  56 +++----
 langchain_parallel/search_tool.py           | 169 +++++++++-----------
 tests/integration_tests/test_search_tool.py |   6 +-
 tests/unit_tests/test_chat_models.py        |   7 +
 tests/unit_tests/test_search_tool.py        |  43 ++---
 9 files changed, 169 insertions(+), 178 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 396ca9a..6a886c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,38 +7,48 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [0.3.0] - 2026-04-27
 
-This release migrates Search and Extract to Parallel's v1 GA endpoints, surfaces citations + structured output on the chat model, and bumps the SDK to `0.5.1`. **All existing 0.2.x call sites continue to work** — return shapes and field names are preserved, with deprecation warnings on legacy paths.
+This release migrates Search and Extract to Parallel's v1 GA endpoints, surfaces citations + structured output on the chat model, and bumps the SDK to `0.5.1`.
 
 ### Added
 
-- **Search/Extract GA endpoints**: `ParallelWebSearchTool` and `ParallelExtractTool` now call `client.search` / `client.extract` (the `/v1` GA paths) by default. New parameters surfaced from the GA contract on both tools: `max_chars_total`, `client_model`, `session_id`, `location` (Search), and the `advanced_settings` envelope is built automatically from existing flat fields.
-- **`ChatParallelWeb.with_structured_output()`**: returns a `Runnable` producing a typed object (pydantic model or dict) via Parallel's `response_format` JSON-schema on the research models (`lite`, `base`, `core`). `method="json_schema"` (default), `method="json_mode"`, and `method="function_calling"` (routed to `json_schema` for cross-provider compatibility) are supported. Raises a clear `ValueError` on `model="speed"` since that model silently ignores structured-output requests. `include_raw=True` returns `{"raw", "parsed", "parsing_error"}` and properly captures parser failures.
+- **Canonical naming**: new aliases `ChatParallel` and `ParallelSearchTool` are the recommended names going forward; the previous `ChatParallelWeb` and `ParallelWebSearchTool` continue to work indefinitely as aliases (same class objects).
+- **Search/Extract GA endpoints**: `ParallelSearchTool` and `ParallelExtractTool` now call `client.search` / `client.extract` (the `/v1` GA paths). New parameters surfaced from the GA contract on both tools: `max_chars_total`, `client_model`, `session_id`, `location` (Search). The `advanced_settings` envelope is built automatically from the existing flat fields.
+- **`ChatParallel.with_structured_output()`**: returns a `Runnable` producing a typed object (pydantic model or dict) via Parallel's `response_format` JSON-schema on the research models (`lite`, `base`, `core`). `method="json_schema"` (default), `method="json_mode"`, and `method="function_calling"` (routed to `json_schema` for cross-provider compatibility) are supported. Raises a clear `ValueError` on `model="speed"` since that model silently ignores structured-output requests. `include_raw=True` returns `{"raw", "parsed", "parsing_error"}` and properly captures parser failures.
 - **Citations on chat responses**: for the research models, `AIMessage.response_metadata["basis"]` carries the API's per-field citations / reasoning / confidence list. `response_metadata["interaction_id"]` is surfaced for multi-turn context chaining; `system_fingerprint` is forwarded when present.
-- **`SourcePolicy` pydantic model** in `langchain_parallel._types` mirroring the API's `include_domains` / `exclude_domains` / `after_date`. Both `SourcePolicy(...)` and a raw dict are accepted on `ParallelWebSearchTool`.
+- **`SourcePolicy` pydantic model** in `langchain_parallel._types` mirroring the API's `include_domains` / `exclude_domains` / `after_date`. Both `SourcePolicy(...)` and a raw dict are accepted on `ParallelSearchTool`.
 
-### Changed (backward compatible)
+### Changed
 
-- **`mode` strings**: legacy values `"fast"`, `"one-shot"`, and `"agentic"` continue to accept and call the API correctly, with a `DeprecationWarning` mapping them to the GA values (`"fast"` / `"one-shot"` → `"basic"`, `"agentic"` → `"advanced"`). The GA values `"basic"` and `"advanced"` are now the canonical set.
-- **Search behavior**: when `search_queries` is omitted, the call falls back to the deprecated `/v1beta/search` endpoint with a `DeprecationWarning`. The GA endpoint requires `search_queries` (1–5 keyword strings); pass them explicitly to silence the warning and use `/v1`.
-- **Extract `excerpts: bool` is now a no-op**: the GA Extract API always returns excerpts, so passing `excerpts=True` (the default) is unchanged on the wire and `excerpts=False` is accepted with a `DeprecationWarning`. Use `ExcerptSettings(max_chars_per_result=...)` to control per-result size.
+- **BREAKING — `search_queries` is required for Search**. Previously the tool accepted `objective` alone (silently calling the deprecated `/v1beta` endpoint). With v1 GA, calls without `search_queries` raise `ValueError` with a migration hint pointing at https://docs.parallel.ai/search/search-migration-guide. Existing call sites that pair `objective` with `search_queries` are unaffected.
+- **`mode` strings**: legacy values `"fast"`, `"one-shot"`, and `"agentic"` continue to call the API correctly with a `DeprecationWarning` mapping them to the GA values (`"fast"` / `"one-shot"` → `"basic"`, `"agentic"` → `"advanced"`). The GA values `"basic"` and `"advanced"` are now the canonical set.
+- **Extract `excerpts: bool` is now a no-op**: the GA Extract API always returns excerpts, so passing `excerpts=True` (the default) is unchanged on the wire and `excerpts=False` is accepted with a `DeprecationWarning`. Use `ExcerptSettings(max_chars_per_result=…)` to control per-result size.
 - **`response_metadata["model_name"]`**: chat completions now emit the LangChain 1.x standard key `model_name` (was `model`). Tracing systems and `langchain-tests`' standard suite check for this name.
 - **`parallel-web` SDK bumped** from `^0.3.3` to `^0.5.1`. Brings in the v1 GA Search/Extract types, `AdvancedSearchSettingsParam` / `AdvancedExtractSettingsParam`, and the FindAll / Task Group / Monitor surfaces (not yet exposed by this integration — see `IMPROVEMENT_PLAN.md` Phase 2).
 - **Slimmed `_client.py`**: the four hand-rolled `ParallelSearchClient` / `AsyncParallelSearchClient` / `ParallelExtractClient` / `AsyncParallelExtractClient` wrapper classes have been removed in favor of using `parallel.Parallel` / `parallel.AsyncParallel` directly. Internal change; no public surface impact.
+- **`_run`/`_arun` deduped**: extracted `_finalize_response`, `_start_text`, and `_completion_text` helpers on both tools so the sync and async bodies are now ~25 lines each instead of ~50.
 - `ParallelExtractTool.full_content` precedence is now explicit: an explicit `FullContentSettings` (or dict) on the call always wins over the tool-level `max_chars_per_extract`; the latter only applies when `full_content=True` is passed as a plain bool.
 
 ### Fixed
 
-- `ChatParallelWeb(model="lite")` now actually selects the `lite` model. Pre-0.3.0 the `Field(alias="model_name")` on the `model` field silently swallowed the `model=` kwarg and forced callers into the default `"speed"`. Both `ChatParallelWeb(model="lite")` and `ChatParallelWeb(model_name="lite")` work in 0.3.0 — the latter via a `model_validator` that maps `model_name=` to `model=` for back-compat. `lc_attributes` still serializes the field as `model_name` for tracing parity.
+- `ChatParallel(model="lite")` now actually selects the `lite` model. Pre-0.3.0 the `Field(alias="model_name")` on the `model` field silently swallowed the `model=` kwarg and forced callers into the default `"speed"`. Both `ChatParallel(model="lite")` and `ChatParallel(model_name="lite")` work in 0.3.0 — the latter via a `model_validator` that maps `model_name=` to `model=` for back-compat. `lc_attributes` still serializes the field as `model_name` for tracing parity.
 - `py.typed` is now bundled into the wheel via the `[tool.poetry] include` directive, so downstream `mypy` runs see the package's type information.
 - `with_structured_output(include_raw=True)` correctly populates `parsing_error` on parse failure (previously always `None`).
 
 ### Migration
 
-For most users, **no code changes are required**. The remaining recommended-but-optional updates:
-
-- **Search**: add `search_queries=["…", "…"]` (1–5 keyword strings) to use the GA `/v1` endpoint and silence the v1beta-fallback deprecation warning.
+- **Search** (only required change for most users): add `search_queries=[…]` (1-5 keyword strings, 3-6 words each). Pair with the existing `objective=…` for best results.
+  ```python
+  # Before (0.2.x — silently used /v1beta)
+  tool.invoke({"objective": "What are the latest AI breakthroughs?"})
+
+  # After (0.3.x — uses /v1 GA)
+  tool.invoke({
+      "search_queries": ["latest AI breakthroughs", "AI advances 2026"],
+      "objective": "What are the latest AI breakthroughs?",
+  })
+  ```
 - **Search mode**: rename `mode="one-shot"`/`"fast"` → `mode="basic"` and `mode="agentic"` → `mode="advanced"` to silence the legacy-value deprecation warning.
-- **Chat**: prefer `ChatParallelWeb(model="lite")` (or `"base"` / `"core"`) over `model_name="..."`. Read citations off `response.response_metadata["basis"]` and structured outputs via `chat.with_structured_output(MyPydanticModel)`.
+- **Chat**: prefer `ChatParallel(model="lite")` (or `"base"` / `"core"`) over `model_name="..."`. Read citations off `response.response_metadata["basis"]` and structured outputs via `chat.with_structured_output(MyPydanticModel)`. The old class name `ChatParallelWeb` continues to work.
 
 ## [0.2.0] - 2025-12-01
 
diff --git a/README.md b/README.md
index 83ecb5a..a5c61b4 100644
--- a/README.md
+++ b/README.md
@@ -4,13 +4,14 @@ This package provides LangChain integrations for [Parallel](https://docs.paralle
 
 ## Features
 
-- **Chat Models**: `ChatParallelWeb` - Real-time web research chat completions
-- **Search Tools**: `ParallelWebSearchTool` - Direct access to Parallel's Search API
-- **Extract Tools**: `ParallelExtractTool` - Clean content extraction from web pages
-- **Streaming Support**: Real-time response streaming
-- **Async/Await**: Full asynchronous operation support
-- **OpenAI Compatible**: Uses familiar OpenAI SDK patterns
-- **LangChain Integration**: Seamless integration with LangChain ecosystem
+- **Chat Models**: `ChatParallel` (formerly `ChatParallelWeb`) — real-time web research chat completions, with citations and structured output on the research models.
+- **Search Tool**: `ParallelSearchTool` (formerly `ParallelWebSearchTool`) — direct access to Parallel's GA `/v1/search` endpoint.
+- **Extract Tool**: `ParallelExtractTool` — clean content extraction from web pages via `/v1/extract`.
+- **Streaming Support**: Real-time response streaming on chat.
+- **Async/Await**: Full asynchronous operation support.
+- **LangChain Integration**: Pydantic input schemas, `bind`-able tools, `with_structured_output()`, `lc_serializable`.
+
+> Note: the older names (`ChatParallelWeb`, `ParallelWebSearchTool`) continue to work as aliases.
 
 ## Installation
 
@@ -214,7 +215,7 @@ for r in result["results"]:
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
 | `objective` | `Optional[str]` | `None` | Natural-language description of research goal (≤5000 chars). |
-| `search_queries` | `Optional[List[str]]` | `None` | Keyword queries (max 5, 200 chars each). Required for the GA endpoint; without it, calls fall back to `/v1beta` with a deprecation warning. |
+| `search_queries` | `List[str]` | Required | 1-5 keyword queries (3-6 words each, ≤200 chars). Pair with an optional `objective` for best results. |
 | `max_results` | `int` | `10` | Maximum results to return (1–40). |
 | `excerpts` | `Optional[ExcerptSettings]` | `None` | Per-result excerpt-size cap. |
 | `max_chars_total` | `Optional[int]` | `None` | Cap on total excerpt characters across all results. |
diff --git a/langchain_parallel/__init__.py b/langchain_parallel/__init__.py
index 1543082..c3ade94 100644
--- a/langchain_parallel/__init__.py
+++ b/langchain_parallel/__init__.py
@@ -6,9 +6,9 @@
     FullContentSettings,
     SourcePolicy,
 )
-from langchain_parallel.chat_models import ChatParallelWeb
+from langchain_parallel.chat_models import ChatParallel, ChatParallelWeb
 from langchain_parallel.extract_tool import ParallelExtractTool
-from langchain_parallel.search_tool import ParallelWebSearchTool
+from langchain_parallel.search_tool import ParallelSearchTool, ParallelWebSearchTool
 
 try:
     __version__ = metadata.version(__package__ or __name__)
@@ -18,11 +18,13 @@
 del metadata  # optional, avoids polluting the results of dir(__package__)
 
 __all__ = [
+    "ChatParallel",
     "ChatParallelWeb",
     "ExcerptSettings",
     "FetchPolicy",
     "FullContentSettings",
     "ParallelExtractTool",
+    "ParallelSearchTool",
     "ParallelWebSearchTool",
     "SourcePolicy",
     "__version__",
diff --git a/langchain_parallel/chat_models.py b/langchain_parallel/chat_models.py
index 25098c9..8441554 100644
--- a/langchain_parallel/chat_models.py
+++ b/langchain_parallel/chat_models.py
@@ -732,3 +732,10 @@ def _parse_with_capture(raw: AIMessage) -> dict[str, Any]:
 
             return bound | _parse_with_capture
         return bound | output_parser
+
+
+#: Forward-compat alias for :class:`ChatParallelWeb`.
+#:
+#: Prefer ChatParallel in new code; ChatParallelWeb will continue to
+#: work indefinitely as an alias for this class.
+ChatParallel = ChatParallelWeb
diff --git a/langchain_parallel/extract_tool.py b/langchain_parallel/extract_tool.py
index 5a7ba20..6404d45 100644
--- a/langchain_parallel/extract_tool.py
+++ b/langchain_parallel/extract_tool.py
@@ -332,6 +332,27 @@ def _build_call_kwargs(
             kwargs["timeout"] = timeout
         return kwargs
 
+    @staticmethod
+    def _start_text(urls: list[str], *, async_: bool) -> str:
+        """Build the run-manager start-of-extraction log message."""
+        prefix = (
+            "Starting async content extraction from"
+            if async_
+            else "Starting content extraction from"
+        )
+        count = len(urls)
+        return f"{prefix} {count} URL{'' if count == 1 else 's'}\n"
+
+    @staticmethod
+    def _completion_text(formatted: list[dict[str, Any]], *, async_: bool) -> str:
+        """Build the run-manager end-of-extraction log message."""
+        prefix = "Async extraction completed" if async_ else "Extraction completed"
+        success = sum(1 for item in formatted if "error_type" not in item)
+        errors = len(formatted) - success
+        if errors:
+            return f"{prefix}: {success} succeeded, {errors} failed\n"
+        return f"{prefix}: {success} URL{'' if success == 1 else 's'} processed\n"
+
     def _run(
         self,
         urls: list[str],
@@ -352,12 +373,7 @@ def _run(
             raise RuntimeError(msg)
 
         if run_manager:
-            count = len(urls)
-            run_manager.on_text(
-                f"Starting content extraction from {count} URL"
-                f"{'' if count == 1 else 's'}\n",
-                color="blue",
-            )
+            run_manager.on_text(self._start_text(urls, async_=False), color="blue")
 
         kwargs = self._build_call_kwargs(
             urls=urls,
@@ -381,21 +397,11 @@ def _run(
             raise ValueError(msg) from e
 
         formatted = self._format_response(response_obj.model_dump())
-
         if run_manager:
-            success_count = sum(1 for item in formatted if "error_type" not in item)
-            error_count = len(formatted) - success_count
             run_manager.on_text(
-                (
-                    f"Extraction completed: {success_count} succeeded, "
-                    f"{error_count} failed\n"
-                    if error_count
-                    else f"Extraction completed: {success_count} URL"
-                    f"{'' if success_count == 1 else 's'} processed\n"
-                ),
+                self._completion_text(formatted, async_=False),
                 color="green",
             )
-
         return formatted
 
     async def _arun(
@@ -418,10 +424,8 @@ async def _arun(
             raise RuntimeError(msg)
 
         if run_manager:
-            count = len(urls)
             await run_manager.on_text(
-                f"Starting async content extraction from {count} URL"
-                f"{'' if count == 1 else 's'}\n",
+                self._start_text(urls, async_=True),
                 color="blue",
             )
 
@@ -450,19 +454,9 @@ async def _arun(
             raise ValueError(msg) from e
 
         formatted = self._format_response(response_obj.model_dump())
-
         if run_manager:
-            success_count = sum(1 for item in formatted if "error_type" not in item)
-            error_count = len(formatted) - success_count
             await run_manager.on_text(
-                (
-                    f"Async extraction completed: {success_count} succeeded, "
-                    f"{error_count} failed\n"
-                    if error_count
-                    else f"Async extraction completed: {success_count} URL"
-                    f"{'' if success_count == 1 else 's'} processed\n"
-                ),
+                self._completion_text(formatted, async_=True),
                 color="green",
             )
-
         return formatted
diff --git a/langchain_parallel/search_tool.py b/langchain_parallel/search_tool.py
index 06f5d2e..197b04f 100644
--- a/langchain_parallel/search_tool.py
+++ b/langchain_parallel/search_tool.py
@@ -90,13 +90,11 @@ class ParallelWebSearchInput(BaseModel):
             "alongside `search_queries` for best results."
         ),
     )
-    search_queries: Optional[list[str]] = Field(
-        default=None,
+    search_queries: list[str] = Field(
         description=(
-            "List of keyword search queries to guide the search. Maximum 5 "
-            "queries, each up to 200 characters (3-6 words works best). "
-            "Required for the GA endpoint; if only `objective` is provided, "
-            "the call falls back to the deprecated v1beta endpoint."
+            "Required. 1-5 keyword search queries (3-6 words each, up to "
+            "200 characters). Pair with an optional `objective` for best "
+            "results."
         ),
     )
     max_results: int = Field(
@@ -253,7 +251,6 @@ class ParallelWebSearchTool(BaseTool):
             "search_metadata": {  # added by this tool when include_metadata=True
                 "search_duration_seconds": 2.451,
                 "search_timestamp": "2026-04-27T10:30:00",
-                "endpoint": "v1",
                 "actual_results_returned": 5,
             }
         }
@@ -303,7 +300,6 @@ def _build_metadata(
         self,
         *,
         start_time: datetime,
-        endpoint: str,
         response: dict[str, Any],
     ) -> dict[str, Any]:
         """Build client-side timing/result metadata."""
@@ -314,7 +310,6 @@ def _build_metadata(
                 3,
             ),
             "search_timestamp": start_time.isoformat(),
-            "endpoint": endpoint,
             "actual_results_returned": len(response.get("results") or []),
         }
 
@@ -333,10 +328,16 @@ def _build_call_kwargs(
         max_results: int,
         location: Optional[str],
         timeout: Optional[int],
-    ) -> tuple[str, dict[str, Any]]:
-        """Resolve params + endpoint (v1 GA vs v1beta fallback)."""
-        if not objective and not search_queries:
-            msg = "Either 'objective' or 'search_queries' must be provided."
+    ) -> dict[str, Any]:
+        """Resolve params into the GA `client.search(...)` kwargs shape."""
+        if not search_queries:
+            msg = (
+                "search_queries is required (1-5 keyword strings, 3-6 words "
+                "each). Pass at least one query; pair with an optional "
+                "`objective` for best results. See "
+                "https://docs.parallel.ai/search/search-migration-guide for "
+                "migrating from the pre-GA objective-only call shape."
+            )
             raise ValueError(msg)
 
         normalized_mode = _normalize_mode(mode)
@@ -348,40 +349,7 @@ def _build_call_kwargs(
             location=location,
         )
 
-        if not search_queries:
-            warnings.warn(
-                "Calling Parallel Search without 'search_queries' falls back "
-                "to the deprecated v1beta endpoint. Provide search_queries "
-                "(1-5 keyword strings) to use the GA endpoint.",
-                DeprecationWarning,
-                stacklevel=4,
-            )
-            kwargs: dict[str, Any] = {
-                "objective": objective,
-                "max_results": max_results,
-            }
-            if excerpts is not None:
-                kwargs["excerpts"] = excerpts.model_dump(exclude_none=True)
-            if fetch_policy is not None:
-                kwargs["fetch_policy"] = fetch_policy.model_dump(exclude_none=True)
-            sp = _coerce_source_policy(source_policy)
-            if sp:
-                kwargs["source_policy"] = sp
-            if normalized_mode is not None:
-                kwargs["mode"] = (
-                    "agentic" if normalized_mode == "advanced" else "one-shot"
-                )
-            if client_model is not None:
-                kwargs["client_model"] = client_model
-            if session_id is not None:
-                kwargs["session_id"] = session_id
-            if location is not None:
-                kwargs["location"] = location
-            if timeout is not None:
-                kwargs["timeout"] = timeout
-            return "v1beta", kwargs
-
-        kwargs = {"search_queries": list(search_queries)}
+        kwargs: dict[str, Any] = {"search_queries": list(search_queries)}
         if objective is not None:
             kwargs["objective"] = objective
         if normalized_mode is not None:
@@ -396,7 +364,40 @@ def _build_call_kwargs(
             kwargs["advanced_settings"] = advanced_settings
         if timeout is not None:
             kwargs["timeout"] = timeout
-        return "v1", kwargs
+        return kwargs
+
+    def _finalize_response(
+        self,
+        response_obj: Any,
+        *,
+        start_time: datetime,
+        include_metadata: bool,
+    ) -> dict[str, Any]:
+        """Convert SDK response to dict and attach client-side metadata."""
+        response: dict[str, Any] = response_obj.model_dump()
+        if include_metadata:
+            response["search_metadata"] = self._build_metadata(
+                start_time=start_time,
+                response=response,
+            )
+        return response
+
+    @staticmethod
+    def _start_text(
+        objective: Optional[str], search_queries: Optional[list[str]], *, async_: bool
+    ) -> str:
+        """Build the run-manager start-of-search log message."""
+        prefix = "Starting async web search" if async_ else "Starting web search"
+        query_desc = objective or f"{len(search_queries or [])} search queries"
+        return f"{prefix}: {query_desc}\n"
+
+    @staticmethod
+    def _completion_text(response: dict[str, Any], *, async_: bool) -> str:
+        """Build the run-manager end-of-search log message."""
+        prefix = "Async search completed" if async_ else "Search completed"
+        count = len(response.get("results") or [])
+        duration = response.get("search_metadata", {}).get("search_duration_seconds", 0)
+        return f"{prefix}: {count} results in {duration}s\n"
 
     def _run(
         self,
@@ -423,10 +424,12 @@ def _run(
 
         start_time = datetime.now()
         if run_manager:
-            query_desc = objective or f"{len(search_queries or [])} search queries"
-            run_manager.on_text(f"Starting web search: {query_desc}\n", color="blue")
+            run_manager.on_text(
+                self._start_text(objective, search_queries, async_=False),
+                color="blue",
+            )
 
-        endpoint, kwargs = self._build_call_kwargs(
+        kwargs = self._build_call_kwargs(
             objective=objective,
             search_queries=search_queries,
             mode=mode,
@@ -442,35 +445,23 @@ def _run(
         )
 
         try:
-            response_obj: Any = (
-                self._client.search(**kwargs)
-                if endpoint == "v1"
-                else self._client.beta.search(**kwargs)
-            )
+            response_obj = self._client.search(**kwargs)
         except Exception as e:
             if run_manager:
                 run_manager.on_text(f"Search failed: {e!s}\n", color="red")
             msg = f"Error calling Parallel Search API: {e!s}"
             raise ValueError(msg) from e
 
-        response: dict[str, Any] = response_obj.model_dump()
-        if include_metadata:
-            response["search_metadata"] = self._build_metadata(
-                start_time=start_time,
-                endpoint=endpoint,
-                response=response,
-            )
-
+        response = self._finalize_response(
+            response_obj,
+            start_time=start_time,
+            include_metadata=include_metadata,
+        )
         if run_manager:
-            count = len(response.get("results") or [])
-            duration = response.get("search_metadata", {}).get(
-                "search_duration_seconds", 0
-            )
             run_manager.on_text(
-                f"Search completed: {count} results in {duration}s\n",
+                self._completion_text(response, async_=False),
                 color="green",
             )
-
         return response
 
     async def _arun(
@@ -498,13 +489,12 @@ async def _arun(
 
         start_time = datetime.now()
         if run_manager:
-            query_desc = objective or f"{len(search_queries or [])} search queries"
             await run_manager.on_text(
-                f"Starting async web search: {query_desc}\n",
+                self._start_text(objective, search_queries, async_=True),
                 color="blue",
             )
 
-        endpoint, kwargs = self._build_call_kwargs(
+        kwargs = self._build_call_kwargs(
             objective=objective,
             search_queries=search_queries,
             mode=mode,
@@ -520,11 +510,7 @@ async def _arun(
         )
 
         try:
-            response_obj: Any = (
-                await self._async_client.search(**kwargs)
-                if endpoint == "v1"
-                else await self._async_client.beta.search(**kwargs)
-            )
+            response_obj = await self._async_client.search(**kwargs)
         except Exception as e:
             if run_manager:
                 await run_manager.on_text(
@@ -534,22 +520,21 @@ async def _arun(
             msg = f"Error calling Parallel Search API: {e!s}"
             raise ValueError(msg) from e
 
-        response: dict[str, Any] = response_obj.model_dump()
-        if include_metadata:
-            response["search_metadata"] = self._build_metadata(
-                start_time=start_time,
-                endpoint=endpoint,
-                response=response,
-            )
-
+        response = self._finalize_response(
+            response_obj,
+            start_time=start_time,
+            include_metadata=include_metadata,
+        )
         if run_manager:
-            count = len(response.get("results") or [])
-            duration = response.get("search_metadata", {}).get(
-                "search_duration_seconds", 0
-            )
             await run_manager.on_text(
-                f"Async search completed: {count} results in {duration}s\n",
+                self._completion_text(response, async_=True),
                 color="green",
             )
-
         return response
+
+
+#: Forward-compat alias for :class:`ParallelWebSearchTool`.
+#:
+#: Prefer ParallelSearchTool in new code; ParallelWebSearchTool will
+#: continue to work indefinitely as an alias for this class.
+ParallelSearchTool = ParallelWebSearchTool
diff --git a/tests/integration_tests/test_search_tool.py b/tests/integration_tests/test_search_tool.py
index 8556b26..9fefc6a 100644
--- a/tests/integration_tests/test_search_tool.py
+++ b/tests/integration_tests/test_search_tool.py
@@ -23,6 +23,10 @@ def tool_invoke_params_example(self) -> dict:
         have {"name", "id", "args"} keys.
         """
         return {
-            "objective": "What are the latest developments in AI?",
+            "search_queries": [
+                "latest AI developments",
+                "AI breakthroughs 2026",
+            ],
+            "objective": "Latest developments in AI",
             "max_results": 3,
         }
diff --git a/tests/unit_tests/test_chat_models.py b/tests/unit_tests/test_chat_models.py
index 9e9e77b..34d08a1 100644
--- a/tests/unit_tests/test_chat_models.py
+++ b/tests/unit_tests/test_chat_models.py
@@ -299,6 +299,13 @@ def test_with_structured_output_include_raw_success(self) -> None:
         assert result["parsed"].name == "Elon Musk"
         assert result["parsing_error"] is None
 
+    def test_chat_parallel_is_alias_of_chat_parallel_web(self) -> None:
+        """``ChatParallel`` is the new canonical name; old name still works."""
+        from langchain_parallel import ChatParallel, ChatParallelWeb
+
+        assert ChatParallel is ChatParallelWeb
+        assert ChatParallel(model="lite", api_key=_TEST_KEY).model == "lite"
+
     def test_response_metadata_stream_chunk_includes_basis(self) -> None:
         """Streaming chunks expose basis on the last chunk."""
         chat = ChatParallelWeb(model="lite", api_key=_TEST_KEY)
diff --git a/tests/unit_tests/test_search_tool.py b/tests/unit_tests/test_search_tool.py
index 485360d..4691350 100644
--- a/tests/unit_tests/test_search_tool.py
+++ b/tests/unit_tests/test_search_tool.py
@@ -69,42 +69,16 @@ def test_run_uses_v1_endpoint_when_search_queries_provided(
             assert kwargs["advanced_settings"] == {"max_results": 3}
             assert isinstance(result, dict)
             assert result["search_id"] == "search-1"
-            assert result["search_metadata"]["endpoint"] == "v1"
-
-    @patch("langchain_parallel.search_tool.get_parallel_client")
-    @patch("langchain_parallel.search_tool.get_async_parallel_client")
-    def test_run_falls_back_to_beta_when_objective_only(
-        self,
-        mock_async_factory: Mock,
-        mock_sync_factory: Mock,
-    ) -> None:
-        """Objective without search_queries falls back to v1beta with a warning."""
-        sync_client = Mock()
-        sync_client.beta.search.return_value = _make_response(
-            {"search_id": "beta-1", "results": []},
-        )
-        mock_sync_factory.return_value = sync_client
-        mock_async_factory.return_value = Mock()
+            assert "search_duration_seconds" in result["search_metadata"]
 
+    def test_run_requires_search_queries(self) -> None:
+        """Calling without search_queries raises with a migration hint."""
         with patch(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
-            with pytest.warns(DeprecationWarning, match="v1beta"):
-                result = tool._run(
-                    objective="What is AI?",
-                    mode="advanced",
-                    source_policy={"include_domains": ["wikipedia.org"]},
-                )
-            sync_client.beta.search.assert_called_once()
-            sync_client.search.assert_not_called()
-            beta_kwargs = sync_client.beta.search.call_args.kwargs
-            # advanced -> agentic on the legacy endpoint
-            assert beta_kwargs["mode"] == "agentic"
-            assert beta_kwargs["source_policy"] == {
-                "include_domains": ["wikipedia.org"],
-            }
-            assert result["search_metadata"]["endpoint"] == "v1beta"
+            with pytest.raises(ValueError, match="search_queries is required"):
+                tool._run(objective="What is AI?")
 
     @patch("langchain_parallel.search_tool.get_parallel_client")
     @patch("langchain_parallel.search_tool.get_async_parallel_client")
@@ -311,6 +285,13 @@ async def test_async_handles_api_error(
                 await tool._arun(search_queries=["q"])
 
 
+def test_parallel_search_tool_is_alias_of_parallel_web_search_tool() -> None:
+    """``ParallelSearchTool`` is the new canonical name; old name still works."""
+    from langchain_parallel import ParallelSearchTool, ParallelWebSearchTool
+
+    assert ParallelSearchTool is ParallelWebSearchTool
+
+
 class TestNormalizeMode:
     def test_passthrough(self) -> None:
         assert _normalize_mode("basic") == "basic"

From c22c360a27df1f2ba87575a1219078b83cc17eb0 Mon Sep 17 00:00:00 2001
From: Matt Harris <mharris@parallel.ai>
Date: Mon, 27 Apr 2026 14:54:20 -0400
Subject: [PATCH 4/5] Restore objective-only search via v1beta with
 DeprecationWarning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Soften the previous breaking change: a 0.2.x caller passing only
`objective` no longer hits a hard ValueError. Instead the call routes
to the deprecated `/v1beta` endpoint with a DeprecationWarning that
names the sunset (0.4.0) and points at Parallel's migration guide.

This matches how legacy `mode` strings and `excerpts=False` already
behave (deprecated, not removed). The Parallel API itself supports
v1beta through at least June 2026, so we have runway.

Trade-off: re-introduces ~50 lines of legacy translation in
_build_call_kwargs (basic↔one-shot, advanced↔agentic, flat↔nested
settings). The v1beta path will be removed in 0.4.0; tracking via
CHANGELOG and via the docstring on _build_call_kwargs.

Restored:
- `search_queries: Optional[list[str]] = None` on the input schema
- v1beta branch in `_build_call_kwargs` with explicit DeprecationWarning
- `endpoint` plumbing through `_finalize_response` and `_build_metadata`
- "endpoint" key in `search_metadata` ("v1" or "v1beta") so callers can
  inspect which path was taken
- Unit test `test_run_falls_back_to_beta_when_objective_only` plus a
  new `test_run_raises_when_neither_objective_nor_queries` for the
  remaining error case

CHANGELOG: moved `search_queries`-required, legacy `mode`, and
`excerpts=False` under a single "Deprecated" section with a clear
0.4.0 sunset note. Demoted the search-queries note from BREAKING.
README: search_queries column reverted to Optional with the deprecation
note inline.

64 unit tests pass; lint, format, mypy on src+tests all clean.
End-to-end smoke against the live API confirms both paths: objective-
only routes to v1beta with the warning, search_queries+objective uses
v1 GA cleanly.
---
 CHANGELOG.md                                | 21 +++--
 README.md                                   |  2 +-
 langchain_parallel/search_tool.py           | 92 ++++++++++++++++-----
 tests/integration_tests/test_search_tool.py |  3 +
 tests/unit_tests/test_search_tool.py        | 49 +++++++++--
 5 files changed, 133 insertions(+), 34 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6a886c6..0e3dbe8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,11 +17,14 @@ This release migrates Search and Extract to Parallel's v1 GA endpoints, surfaces
 - **Citations on chat responses**: for the research models, `AIMessage.response_metadata["basis"]` carries the API's per-field citations / reasoning / confidence list. `response_metadata["interaction_id"]` is surfaced for multi-turn context chaining; `system_fingerprint` is forwarded when present.
 - **`SourcePolicy` pydantic model** in `langchain_parallel._types` mirroring the API's `include_domains` / `exclude_domains` / `after_date`. Both `SourcePolicy(...)` and a raw dict are accepted on `ParallelSearchTool`.
 
+### Deprecated
+
+- **Search without `search_queries`**: calls passing only `objective` route to the deprecated `/v1beta` endpoint with a `DeprecationWarning`. The fallback will be **removed in 0.4.0**; the Parallel API itself sunsets `/v1beta` no earlier than June 2026. Pair `objective` with `search_queries=[...]` (1-5 keyword strings, 3-6 words each) to use the GA `/v1` endpoint.
+- **Legacy `mode` values**: `"fast"`, `"one-shot"`, and `"agentic"` continue to call the API correctly with a `DeprecationWarning` mapping them to the GA values (`"fast"` / `"one-shot"` → `"basic"`, `"agentic"` → `"advanced"`). The GA values `"basic"` and `"advanced"` are now the canonical set.
+- **`Extract.excerpts=False`**: the GA Extract API always returns excerpts and has no flag to disable them; passing `False` is accepted with a `DeprecationWarning` and ignored. Use `ExcerptSettings(max_chars_per_result=…)` to control per-result size.
+
 ### Changed
 
-- **BREAKING — `search_queries` is required for Search**. Previously the tool accepted `objective` alone (silently calling the deprecated `/v1beta` endpoint). With v1 GA, calls without `search_queries` raise `ValueError` with a migration hint pointing at https://docs.parallel.ai/search/search-migration-guide. Existing call sites that pair `objective` with `search_queries` are unaffected.
-- **`mode` strings**: legacy values `"fast"`, `"one-shot"`, and `"agentic"` continue to call the API correctly with a `DeprecationWarning` mapping them to the GA values (`"fast"` / `"one-shot"` → `"basic"`, `"agentic"` → `"advanced"`). The GA values `"basic"` and `"advanced"` are now the canonical set.
-- **Extract `excerpts: bool` is now a no-op**: the GA Extract API always returns excerpts, so passing `excerpts=True` (the default) is unchanged on the wire and `excerpts=False` is accepted with a `DeprecationWarning`. Use `ExcerptSettings(max_chars_per_result=…)` to control per-result size.
 - **`response_metadata["model_name"]`**: chat completions now emit the LangChain 1.x standard key `model_name` (was `model`). Tracing systems and `langchain-tests`' standard suite check for this name.
 - **`parallel-web` SDK bumped** from `^0.3.3` to `^0.5.1`. Brings in the v1 GA Search/Extract types, `AdvancedSearchSettingsParam` / `AdvancedExtractSettingsParam`, and the FindAll / Task Group / Monitor surfaces (not yet exposed by this integration — see `IMPROVEMENT_PLAN.md` Phase 2).
 - **Slimmed `_client.py`**: the four hand-rolled `ParallelSearchClient` / `AsyncParallelSearchClient` / `ParallelExtractClient` / `AsyncParallelExtractClient` wrapper classes have been removed in favor of using `parallel.Parallel` / `parallel.AsyncParallel` directly. Internal change; no public surface impact.
@@ -36,19 +39,21 @@ This release migrates Search and Extract to Parallel's v1 GA endpoints, surfaces
 
 ### Migration
 
-- **Search** (only required change for most users): add `search_queries=[…]` (1-5 keyword strings, 3-6 words each). Pair with the existing `objective=…` for best results.
+For most users, **no code changes are required**. The recommended-but-optional updates to silence deprecation warnings:
+
+- **Search**: add `search_queries=[…]` (1-5 keyword strings, 3-6 words each) to use the GA `/v1` endpoint.
   ```python
-  # Before (0.2.x — silently used /v1beta)
+  # 0.2.x (still works in 0.3.x with a DeprecationWarning; will break in 0.4.0)
   tool.invoke({"objective": "What are the latest AI breakthroughs?"})
 
-  # After (0.3.x — uses /v1 GA)
+  # 0.3.x preferred (GA /v1 endpoint)
   tool.invoke({
       "search_queries": ["latest AI breakthroughs", "AI advances 2026"],
       "objective": "What are the latest AI breakthroughs?",
   })
   ```
-- **Search mode**: rename `mode="one-shot"`/`"fast"` → `mode="basic"` and `mode="agentic"` → `mode="advanced"` to silence the legacy-value deprecation warning.
-- **Chat**: prefer `ChatParallel(model="lite")` (or `"base"` / `"core"`) over `model_name="..."`. Read citations off `response.response_metadata["basis"]` and structured outputs via `chat.with_structured_output(MyPydanticModel)`. The old class name `ChatParallelWeb` continues to work.
+- **Search mode**: rename `mode="one-shot"`/`"fast"` → `mode="basic"` and `mode="agentic"` → `mode="advanced"`.
+- **Chat**: prefer `ChatParallel(model="lite")` (or `"base"` / `"core"`) over `model_name="..."`. Read citations from `response.response_metadata["basis"]` and structured outputs via `chat.with_structured_output(MyPydanticModel)`. The old class name `ChatParallelWeb` continues to work.
 
 ## [0.2.0] - 2025-12-01
 
diff --git a/README.md b/README.md
index a5c61b4..9d7f7bd 100644
--- a/README.md
+++ b/README.md
@@ -215,7 +215,7 @@ for r in result["results"]:
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
 | `objective` | `Optional[str]` | `None` | Natural-language description of research goal (≤5000 chars). |
-| `search_queries` | `List[str]` | Required | 1-5 keyword queries (3-6 words each, ≤200 chars). Pair with an optional `objective` for best results. |
+| `search_queries` | `Optional[List[str]]` | `None` | 1-5 keyword queries (3-6 words each, ≤200 chars). Required by the GA `/v1` endpoint; if omitted, the call routes to the deprecated `/v1beta` endpoint with a `DeprecationWarning` (slated for removal in 0.4.0). Pair with an optional `objective` for best results. |
 | `max_results` | `int` | `10` | Maximum results to return (1–40). |
 | `excerpts` | `Optional[ExcerptSettings]` | `None` | Per-result excerpt-size cap. |
 | `max_chars_total` | `Optional[int]` | `None` | Cap on total excerpt characters across all results. |
diff --git a/langchain_parallel/search_tool.py b/langchain_parallel/search_tool.py
index 197b04f..85827d0 100644
--- a/langchain_parallel/search_tool.py
+++ b/langchain_parallel/search_tool.py
@@ -90,11 +90,13 @@ class ParallelWebSearchInput(BaseModel):
             "alongside `search_queries` for best results."
         ),
     )
-    search_queries: list[str] = Field(
+    search_queries: Optional[list[str]] = Field(
+        default=None,
         description=(
-            "Required. 1-5 keyword search queries (3-6 words each, up to "
-            "200 characters). Pair with an optional `objective` for best "
-            "results."
+            "1-5 keyword search queries (3-6 words each, up to 200 characters). "
+            "Required by Parallel's GA endpoint; if omitted, the call routes "
+            "to the deprecated /v1beta endpoint with a DeprecationWarning. "
+            "Pair with an optional `objective` for best results."
         ),
     )
     max_results: int = Field(
@@ -251,6 +253,7 @@ class ParallelWebSearchTool(BaseTool):
             "search_metadata": {  # added by this tool when include_metadata=True
                 "search_duration_seconds": 2.451,
                 "search_timestamp": "2026-04-27T10:30:00",
+                "endpoint": "v1",  # or "v1beta" if search_queries was omitted
                 "actual_results_returned": 5,
             }
         }
@@ -300,6 +303,7 @@ def _build_metadata(
         self,
         *,
         start_time: datetime,
+        endpoint: str,
         response: dict[str, Any],
     ) -> dict[str, Any]:
         """Build client-side timing/result metadata."""
@@ -310,6 +314,7 @@ def _build_metadata(
                 3,
             ),
             "search_timestamp": start_time.isoformat(),
+            "endpoint": endpoint,
             "actual_results_returned": len(response.get("results") or []),
         }
 
@@ -328,19 +333,55 @@ def _build_call_kwargs(
         max_results: int,
         location: Optional[str],
         timeout: Optional[int],
-    ) -> dict[str, Any]:
-        """Resolve params into the GA `client.search(...)` kwargs shape."""
-        if not search_queries:
-            msg = (
-                "search_queries is required (1-5 keyword strings, 3-6 words "
-                "each). Pass at least one query; pair with an optional "
-                "`objective` for best results. See "
-                "https://docs.parallel.ai/search/search-migration-guide for "
-                "migrating from the pre-GA objective-only call shape."
-            )
+    ) -> tuple[str, dict[str, Any]]:
+        """Resolve params + endpoint (`v1` GA vs `v1beta` legacy fallback).
+
+        The v1beta path is deprecated and will be removed in 0.4.0; it exists
+        so 0.2.x callers passing only ``objective`` keep working through the
+        Parallel API beta sunset (~June 2026).
+        """
+        if not objective and not search_queries:
+            msg = "Either 'objective' or 'search_queries' must be provided."
             raise ValueError(msg)
 
         normalized_mode = _normalize_mode(mode)
+
+        if not search_queries:
+            warnings.warn(
+                "Calling Parallel Search without `search_queries` routes to "
+                "the deprecated /v1beta endpoint and will be removed in "
+                "langchain-parallel 0.4.0. Pass `search_queries=[...]` (1-5 "
+                "keyword strings, 3-6 words each) to use the GA /v1 endpoint. "
+                "See https://docs.parallel.ai/search/search-migration-guide.",
+                DeprecationWarning,
+                stacklevel=4,
+            )
+            beta_kwargs: dict[str, Any] = {
+                "objective": objective,
+                "max_results": max_results,
+            }
+            if excerpts is not None:
+                beta_kwargs["excerpts"] = excerpts.model_dump(exclude_none=True)
+            if fetch_policy is not None:
+                beta_kwargs["fetch_policy"] = fetch_policy.model_dump(exclude_none=True)
+            sp = _coerce_source_policy(source_policy)
+            if sp:
+                beta_kwargs["source_policy"] = sp
+            if normalized_mode is not None:
+                # v1beta speaks the legacy mode dialect.
+                beta_kwargs["mode"] = (
+                    "agentic" if normalized_mode == "advanced" else "one-shot"
+                )
+            if client_model is not None:
+                beta_kwargs["client_model"] = client_model
+            if session_id is not None:
+                beta_kwargs["session_id"] = session_id
+            if location is not None:
+                beta_kwargs["location"] = location
+            if timeout is not None:
+                beta_kwargs["timeout"] = timeout
+            return "v1beta", beta_kwargs
+
         advanced_settings = _build_advanced_settings(
             excerpts=excerpts,
             fetch_policy=fetch_policy,
@@ -348,7 +389,6 @@ def _build_call_kwargs(
             max_results=max_results,
             location=location,
         )
-
         kwargs: dict[str, Any] = {"search_queries": list(search_queries)}
         if objective is not None:
             kwargs["objective"] = objective
@@ -364,13 +404,14 @@ def _build_call_kwargs(
             kwargs["advanced_settings"] = advanced_settings
         if timeout is not None:
             kwargs["timeout"] = timeout
-        return kwargs
+        return "v1", kwargs
 
     def _finalize_response(
         self,
         response_obj: Any,
         *,
         start_time: datetime,
+        endpoint: str,
         include_metadata: bool,
     ) -> dict[str, Any]:
         """Convert SDK response to dict and attach client-side metadata."""
@@ -378,6 +419,7 @@ def _finalize_response(
         if include_metadata:
             response["search_metadata"] = self._build_metadata(
                 start_time=start_time,
+                endpoint=endpoint,
                 response=response,
             )
         return response
@@ -429,7 +471,7 @@ def _run(
                 color="blue",
             )
 
-        kwargs = self._build_call_kwargs(
+        endpoint, kwargs = self._build_call_kwargs(
             objective=objective,
             search_queries=search_queries,
             mode=mode,
@@ -445,7 +487,11 @@ def _run(
         )
 
         try:
-            response_obj = self._client.search(**kwargs)
+            response_obj: Any = (
+                self._client.search(**kwargs)
+                if endpoint == "v1"
+                else self._client.beta.search(**kwargs)
+            )
         except Exception as e:
             if run_manager:
                 run_manager.on_text(f"Search failed: {e!s}\n", color="red")
@@ -455,6 +501,7 @@ def _run(
         response = self._finalize_response(
             response_obj,
             start_time=start_time,
+            endpoint=endpoint,
             include_metadata=include_metadata,
         )
         if run_manager:
@@ -494,7 +541,7 @@ async def _arun(
                 color="blue",
             )
 
-        kwargs = self._build_call_kwargs(
+        endpoint, kwargs = self._build_call_kwargs(
             objective=objective,
             search_queries=search_queries,
             mode=mode,
@@ -510,7 +557,11 @@ async def _arun(
         )
 
         try:
-            response_obj = await self._async_client.search(**kwargs)
+            response_obj = (
+                await self._async_client.search(**kwargs)
+                if endpoint == "v1"
+                else await self._async_client.beta.search(**kwargs)
+            )
         except Exception as e:
             if run_manager:
                 await run_manager.on_text(
@@ -523,6 +574,7 @@ async def _arun(
         response = self._finalize_response(
             response_obj,
             start_time=start_time,
+            endpoint=endpoint,
             include_metadata=include_metadata,
         )
         if run_manager:
diff --git a/tests/integration_tests/test_search_tool.py b/tests/integration_tests/test_search_tool.py
index 9fefc6a..f6fc1c9 100644
--- a/tests/integration_tests/test_search_tool.py
+++ b/tests/integration_tests/test_search_tool.py
@@ -30,3 +30,6 @@ def tool_invoke_params_example(self) -> dict:
             "objective": "Latest developments in AI",
             "max_results": 3,
         }
+        # Note: passing only `objective` (no search_queries) also works in
+        # 0.3.x but routes to /v1beta with a DeprecationWarning. Prefer the
+        # GA shape above; the fallback will be removed in 0.4.0.
diff --git a/tests/unit_tests/test_search_tool.py b/tests/unit_tests/test_search_tool.py
index 4691350..86441be 100644
--- a/tests/unit_tests/test_search_tool.py
+++ b/tests/unit_tests/test_search_tool.py
@@ -69,16 +69,55 @@ def test_run_uses_v1_endpoint_when_search_queries_provided(
             assert kwargs["advanced_settings"] == {"max_results": 3}
             assert isinstance(result, dict)
             assert result["search_id"] == "search-1"
-            assert "search_duration_seconds" in result["search_metadata"]
+            assert result["search_metadata"]["endpoint"] == "v1"
 
-    def test_run_requires_search_queries(self) -> None:
-        """Calling without search_queries raises with a migration hint."""
+    @patch("langchain_parallel.search_tool.get_parallel_client")
+    @patch("langchain_parallel.search_tool.get_async_parallel_client")
+    def test_run_falls_back_to_beta_when_objective_only(
+        self,
+        mock_async_factory: Mock,
+        mock_sync_factory: Mock,
+    ) -> None:
+        """Objective-only routes to /v1beta with a DeprecationWarning.
+
+        This is a deprecated path slated for removal in 0.4.0; it exists so
+        0.2.x callers passing only ``objective`` keep working.
+        """
+        sync_client = Mock()
+        sync_client.beta.search.return_value = _make_response(
+            {"search_id": "beta-1", "results": []},
+        )
+        mock_sync_factory.return_value = sync_client
+        mock_async_factory.return_value = Mock()
+
+        with patch(
+            "langchain_parallel.search_tool.get_api_key", return_value="test-key"
+        ):
+            tool = ParallelWebSearchTool()
+            with pytest.warns(DeprecationWarning, match="0.4.0"):
+                result = tool._run(
+                    objective="What is AI?",
+                    mode="advanced",
+                    source_policy={"include_domains": ["wikipedia.org"]},
+                )
+            sync_client.beta.search.assert_called_once()
+            sync_client.search.assert_not_called()
+            beta_kwargs = sync_client.beta.search.call_args.kwargs
+            # advanced -> agentic on the legacy endpoint
+            assert beta_kwargs["mode"] == "agentic"
+            assert beta_kwargs["source_policy"] == {
+                "include_domains": ["wikipedia.org"],
+            }
+            assert result["search_metadata"]["endpoint"] == "v1beta"
+
+    def test_run_raises_when_neither_objective_nor_queries(self) -> None:
+        """At least one of objective or search_queries must be provided."""
         with patch(
             "langchain_parallel.search_tool.get_api_key", return_value="test-key"
         ):
             tool = ParallelWebSearchTool()
-            with pytest.raises(ValueError, match="search_queries is required"):
-                tool._run(objective="What is AI?")
+            with pytest.raises(ValueError, match="objective.*search_queries.*provided"):
+                tool._run()
 
     @patch("langchain_parallel.search_tool.get_parallel_client")
     @patch("langchain_parallel.search_tool.get_async_parallel_client")

From cfd92c87d36bb6bdea8c24d4fbde153699f0badf Mon Sep 17 00:00:00 2001
From: Matt Harris <mharris@parallel.ai>
Date: Mon, 27 Apr 2026 16:34:03 -0400
Subject: [PATCH 5/5] Refresh docs/*.ipynb and examples/*.py for 0.3.0; add
 scripts/run_notebooks.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Notebooks (docs/):
- chat.ipynb: switch instantiation to ChatParallel + a model-menu comment.
  Add new sections demonstrating with_structured_output() (json_schema +
  pydantic) and basis citations on response_metadata.
- search_tool.ipynb: switch to ParallelSearchTool. Replace the legacy
  mode='one-shot' / 'agentic' values with 'advanced'. Add search_queries
  to every previously-objective-only example so the notebook now hits
  the GA /v1 endpoint and doesn't trigger the v1beta-fallback warning.
  Drop the OpenAI chain demo cells (they require langchain-openai +
  OPENAI_API_KEY); replace with a pointer to demo_agent.ipynb which
  already shows the agent pattern with claude-haiku-4-5.
- extract_tool.ipynb: drop the OpenAI chain demo (same reason). Strip
  the demo `api_key="your-api-key"` literal from the instantiation cell
  so the notebook actually executes against PARALLEL_API_KEY.

Examples (examples/):
- chat_example.py: ChatParallelWeb -> ChatParallel; drop model_name=
  alias usage; drop the temperature=/max_tokens= ignored-param noise.
- search_example.py: full rewrite to use ParallelSearchTool, add
  search_queries to all calls, mode='one-shot'/'agentic' -> 'basic'/
  'advanced', SourcePolicy pydantic model, and trim the
  display_metadata helper to the keys actually emitted in 0.3.0
  (search_duration_seconds, endpoint, actual_results_returned —
  removed the dead max_results_requested / source_policy_applied keys).
- extract_tool_example.py: ChatParallelWeb -> ChatParallel.

Tooling:
- scripts/run_notebooks.py: headless executor that skips %pip and
  getpass cells, then executes the rest against the real Parallel API.
  Used as a release-time smoke test. Run with:
      poetry run python scripts/run_notebooks.py
- pyproject.toml: allow `print()` in scripts/.

End-to-end verified against the live API: all three notebooks pass via
scripts/run_notebooks.py; all three examples run cleanly. 64 unit
tests still pass; lint, format, mypy clean.
---
 docs/chat.ipynb                  |  88 +++++++++++---
 docs/extract_tool.ipynb          |  87 +++-----------
 docs/search_tool.ipynb           | 155 +++++++-----------------
 examples/chat_example.py         |  18 ++-
 examples/extract_tool_example.py |   4 +-
 examples/search_example.py       | 195 +++++++++++++++----------------
 pyproject.toml                   |   6 +-
 scripts/check_imports.py         |   4 +-
 scripts/run_notebooks.py         |  97 +++++++++++++++
 9 files changed, 340 insertions(+), 314 deletions(-)
 create mode 100644 scripts/run_notebooks.py

diff --git a/docs/chat.ipynb b/docs/chat.ipynb
index 0bd924f..112e84b 100644
--- a/docs/chat.ipynb
+++ b/docs/chat.ipynb
@@ -16,12 +16,12 @@
     "\n",
     "| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
     "| :--- | :--- | :---: | :---: |  :---: | :---: | :---: |\n",
-    "| [ChatParallelWeb](https://python.langchain.com/api_reference/parallel_web/chat_models/langchain_parallel.chat_models.ChatParallelWeb.html) | [langchain-parallel](https://python.langchain.com/api_reference/parallel_web/) | ❌ | ✅ | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-parallel?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-parallel?style=flat-square&label=%20) |\n",
+    "| [ChatParallelWeb](https://python.langchain.com/api_reference/parallel_web/chat_models/langchain_parallel.chat_models.ChatParallelWeb.html) | [langchain-parallel](https://python.langchain.com/api_reference/parallel_web/) | \u274c | \u2705 | \u274c | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-parallel?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-parallel?style=flat-square&label=%20) |\n",
     "\n",
     "### Model features\n",
     "| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
     "| :---: | :---: | :---: | :---: |  :---: | :---: | :---: | :---: | :---: | :---: |\n",
-    "| ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | \n",
+    "| \u274c | \u274c | \u274c | \u274c | \u274c | \u274c | \u2705 | \u2705 | \u274c | \u274c | \n",
     "\n",
     "## Setup\n",
     "\n",
@@ -83,24 +83,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain_parallel import ChatParallelWeb\n",
-    "\n",
-    "llm = ChatParallelWeb(\n",
-    "    model=\"speed\",  # Default model with fast responses\n",
-    "    temperature=0.7,\n",
-    "    max_tokens=None,\n",
+    "from langchain_parallel import ChatParallel\n",
+    "\n",
+    "# `ChatParallel` is the canonical name in 0.3+ (the older `ChatParallelWeb`\n",
+    "# continues to work as an alias).\n",
+    "#\n",
+    "# Models:\n",
+    "#   - \"speed\" (default): low-latency conversational answers, no citations.\n",
+    "#   - \"lite\" / \"base\" / \"core\": research models with web access. They return\n",
+    "#     source citations on `response_metadata[\"basis\"]` and support\n",
+    "#     `with_structured_output()` via `response_format` JSON schema.\n",
+    "\n",
+    "llm = ChatParallel(\n",
+    "    model=\"speed\",\n",
     "    timeout=None,\n",
     "    max_retries=2,\n",
-    "    # api_key=\"your-api-key\"  # Optional if set in environment\n",
+    "    # api_key=\"your-api-key\"  # Optional if PARALLEL_API_KEY is set\n",
     "    # base_url=\"https://api.parallel.ai\"  # Optional, uses default\n",
-    "    # OpenAI-compatible parameters (ignored by Parallel but supported for compatibility)\n",
-    "    # response_format={\"type\": \"json_object\"},  # Ignored\n",
-    "    # tools=[...],  # Ignored\n",
-    "    # tool_choice=\"auto\",  # Ignored\n",
-    "    # top_p=1.0,  # Ignored\n",
-    "    # frequency_penalty=0.0,  # Ignored\n",
-    "    # presence_penalty=0.0,  # Ignored\n",
-    ")"
+    ")\n"
    ]
   },
   {
@@ -180,6 +180,58 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Structured output (research models)\n",
+    "\n",
+    "On `lite`, `base`, and `core`, `ChatParallel.with_structured_output(...)` ",
+    "binds a JSON-schema `response_format` and returns a parsed pydantic ",
+    "object (or dict). On `speed` it raises a clear error since that model ",
+    "silently ignores `response_format`.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from pydantic import BaseModel, Field\n",
+    "\n",
+    "class Founder(BaseModel):\n",
+    "    name: str = Field(description=\"Full name of the founder\")\n",
+    "    company: str = Field(description=\"Company they founded\")\n",
+    "\n",
+    "structured = ChatParallel(model=\"lite\").with_structured_output(Founder)\n",
+    "parsed = structured.invoke([(\"human\", \"Who founded SpaceX?\")])\n",
+    "parsed"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Citations (research models)\n",
+    "\n",
+    "Research models populate `AIMessage.response_metadata['basis']` with ",
+    "per-field citations, the model's reasoning, and a confidence label.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "cited = ChatParallel(model=\"lite\").invoke([\n",
+    "    (\"human\", \"Who is the current CEO of OpenAI? One sentence.\")\n",
+    "])\n",
+    "print(cited.content)\n",
+    "print(\"\\nbasis:\", cited.response_metadata.get(\"basis\"))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "d1ee55bc-ffc8-4cfa-801c-993953a08cfd",
@@ -315,4 +367,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
\ No newline at end of file
diff --git a/docs/extract_tool.ipynb b/docs/extract_tool.ipynb
index b3e94b3..aa4c411 100644
--- a/docs/extract_tool.ipynb
+++ b/docs/extract_tool.ipynb
@@ -17,7 +17,7 @@
     "\n",
     "| Class | Package | Serializable | JS support |  Package latest |\n",
     "| :--- | :--- | :---: | :---: | :---: |\n",
-    "| [ParallelExtractTool](https://python.langchain.com/api_reference/parallel_web/extract_tool/langchain_parallel.extract_tool.ParallelExtractTool.html) | [langchain-parallel](https://python.langchain.com/api_reference/parallel_web/) | ❌ | ❌ |  ![PyPI - Version](https://img.shields.io/pypi/v/langchain-parallel?style=flat-square&label=%20) |\n",
+    "| [ParallelExtractTool](https://python.langchain.com/api_reference/parallel_web/extract_tool/langchain_parallel.extract_tool.ParallelExtractTool.html) | [langchain-parallel](https://python.langchain.com/api_reference/parallel_web/) | \u274c | \u274c |  ![PyPI - Version](https://img.shields.io/pypi/v/langchain-parallel?style=flat-square&label=%20) |\n",
     "\n",
     "### Tool features\n",
     "\n",
@@ -87,15 +87,16 @@
    "source": [
     "from langchain_parallel import ParallelExtractTool\n",
     "\n",
-    "# Basic instantiation - API key from environment\n",
+    "# Reads PARALLEL_API_KEY from the environment by default.\n",
     "tool = ParallelExtractTool()\n",
     "\n",
-    "# With explicit API key and custom settings\n",
-    "tool = ParallelExtractTool(\n",
-    "    api_key=\"your-api-key\",\n",
-    "    base_url=\"https://api.parallel.ai\",  # default value\n",
-    "    max_chars_per_extract=5000,  # Limit content length\n",
-    ")"
+    "# To pass an explicit key, override the base URL, or cap the per-URL\n",
+    "# full_content size:\n",
+    "#   tool = ParallelExtractTool(\n",
+    "#       api_key=\"your-api-key\",\n",
+    "#       base_url=\"https://api.parallel.ai\",\n",
+    "#       max_chars_per_extract=5000,\n",
+    "#   )\n"
    ]
   },
   {
@@ -317,72 +318,18 @@
   },
   {
    "cell_type": "markdown",
-   "id": "659f9fbd",
    "metadata": {},
    "source": [
     "## Chaining\n",
     "\n",
-    "We can use our tool in a chain by first binding it to a [tool-calling model](/docs/how_to/tool_calling/) and then calling it:\n",
+    "To use the tool from a tool-calling chat model, bind it to any LLM ",
+    "that supports tool calls (e.g. `ChatAnthropic`, `ChatOpenAI`) and ",
+    "drive an agent with `langchain.agents.create_agent`. Parallel's own ",
+    "`ChatParallel` does not support tool calling \u2014 use it as a research assistant inside a chain, or use the search/extract tools ",
+    "alongside another model.\n",
     "\n",
-    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
-    "\n",
-    "<ChatModelTabs customVarName=\"llm\" />"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "af3123ad",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | output: false\n",
-    "# | echo: false\n",
-    "\n",
-    "# !pip install -qU langchain langchain-openai\n",
-    "from langchain.chat_models import init_chat_model\n",
-    "\n",
-    "llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fdbf35b5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "from langchain_core.runnables import RunnableConfig, chain\n",
-    "\n",
-    "prompt = ChatPromptTemplate(\n",
-    "    [\n",
-    "        (\n",
-    "            \"system\",\n",
-    "            \"You are a helpful assistant that extracts and summarizes web content.\",\n",
-    "        ),\n",
-    "        (\"human\", \"{user_input}\"),\n",
-    "        (\"placeholder\", \"{messages}\"),\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "# specifying tool_choice will force the model to call this tool.\n",
-    "llm_with_tools = llm.bind_tools([tool], tool_choice=tool.name)\n",
-    "\n",
-    "llm_chain = prompt | llm_with_tools\n",
-    "\n",
-    "\n",
-    "@chain\n",
-    "def tool_chain(user_input: str, config: RunnableConfig):\n",
-    "    input_ = {\"user_input\": user_input}\n",
-    "    ai_msg = llm_chain.invoke(input_, config=config)\n",
-    "    tool_msgs = tool.batch(ai_msg.tool_calls, config=config)\n",
-    "    return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n",
-    "\n",
-    "\n",
-    "tool_chain.invoke(\n",
-    "    \"Extract and summarize the content from https://en.wikipedia.org/wiki/Large_language_model\"\n",
-    ")"
+    "See [`docs/demo_agent.ipynb`](./demo_agent.ipynb) for a complete ",
+    "walkthrough using `create_agent` with `claude-haiku-4-5`.\n"
    ]
   },
   {
@@ -447,4 +394,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
\ No newline at end of file
diff --git a/docs/search_tool.ipynb b/docs/search_tool.ipynb
index ce18e9c..617df05 100644
--- a/docs/search_tool.ipynb
+++ b/docs/search_tool.ipynb
@@ -9,7 +9,7 @@
     "\n",
     "This notebook provides a quick overview for getting started with Parallel [search tool](/docs/integrations/tools/). For detailed documentation of all ParallelWebSearchTool features and configurations head to the [API reference](https://docs.parallel.ai/api-reference/).\n",
     "\n",
-    "The ParallelWebSearchTool provides access to Parallel's Search API, which streamlines the traditional search → scrape → extract pipeline into a single API call, returning structured, LLM-optimized results.\n",
+    "The ParallelWebSearchTool provides access to Parallel's Search API, which streamlines the traditional search \u2192 scrape \u2192 extract pipeline into a single API call, returning structured, LLM-optimized results.\n",
     "\n",
     "## Overview\n",
     "\n",
@@ -17,7 +17,7 @@
     "\n",
     "| Class | Package | Serializable | JS support |  Package latest |\n",
     "| :--- | :--- | :---: | :---: | :---: |\n",
-    "| [ParallelWebSearchTool](https://python.langchain.com/api_reference/parallel_web/search_tool/langchain_parallel.search_tool.ParallelWebSearchTool.html) | [langchain-parallel](https://python.langchain.com/api_reference/parallel_web/) | ❌ | ❌ |  ![PyPI - Version](https://img.shields.io/pypi/v/langchain-parallel?style=flat-square&label=%20) |\n",
+    "| [ParallelWebSearchTool](https://python.langchain.com/api_reference/parallel_web/search_tool/langchain_parallel.search_tool.ParallelWebSearchTool.html) | [langchain-parallel](https://python.langchain.com/api_reference/parallel_web/) | \u274c | \u274c |  ![PyPI - Version](https://img.shields.io/pypi/v/langchain-parallel?style=flat-square&label=%20) |\n",
     "\n",
     "### Tool features\n",
     "\n",
@@ -86,16 +86,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain_parallel import ParallelWebSearchTool\n",
+    "from langchain_parallel import ParallelSearchTool\n",
     "\n",
-    "# Basic instantiation - API key from environment\n",
-    "tool = ParallelWebSearchTool()\n",
+    "# Reads PARALLEL_API_KEY from the environment by default.\n",
+    "# (The older `ParallelWebSearchTool` name continues to work as an alias.)\n",
+    "tool = ParallelSearchTool()\n",
     "\n",
-    "# With explicit API key and custom base URL\n",
-    "tool = ParallelWebSearchTool(\n",
-    "    api_key=\"your-api-key\",\n",
-    "    base_url=\"https://api.parallel.ai\",  # default value\n",
-    ")"
+    "# To pass an explicit key or override the base URL:\n",
+    "#   tool = ParallelSearchTool(\n",
+    "#       api_key=\"your-api-key\",\n",
+    "#       base_url=\"https://api.parallel.ai\",\n",
+    "#   )\n"
    ]
   },
   {
@@ -117,17 +118,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Using specific search queries with advanced options\n",
+    "# Using specific search queries with advanced options.\n",
     "result = tool.invoke(\n",
     "    {\n",
     "        \"search_queries\": [\n",
-    "            \"AI breakthroughs 2024\",\n",
+    "            \"AI breakthroughs 2026\",\n",
     "            \"machine learning advances\",\n",
     "            \"generative AI news\",\n",
     "        ],\n",
     "        \"max_results\": 8,\n",
     "        \"excerpts\": {\"max_chars_per_result\": 2000},\n",
-    "        \"mode\": \"one-shot\",  # Use 'agentic' for token-efficient results\n",
+    "        \"mode\": \"advanced\",  # Higher quality; 'basic' is lower-latency\n",
     "        \"source_policy\": {\n",
     "            \"include_domains\": [\"arxiv.org\", \"nature.com\"],\n",
     "            \"exclude_domains\": [\"reddit.com\", \"twitter.com\"],\n",
@@ -137,11 +138,11 @@
     "            \"timeout_seconds\": 60,\n",
     "        },\n",
     "        \"include_metadata\": True,\n",
-    "        \"timeout\": 120,  # Custom timeout in seconds\n",
     "    }\n",
     ")\n",
     "\n",
-    "print(result)"
+    "print(f\"Found {len(result.get('results', []))} results\")\n",
+    "print(f\"endpoint: {result['search_metadata']['endpoint']}\")\n"
    ]
   },
   {
@@ -151,40 +152,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Using an objective (natural language) with metadata\n",
+    "# Pair an objective with search_queries to use the GA /v1 endpoint.\n",
     "result = tool.invoke(\n",
     "    {\n",
-    "        \"objective\": \"What are the latest developments in artificial intelligence in 2024?\",\n",
+    "        \"search_queries\": [\n",
+    "            \"AI developments 2026\",\n",
+    "            \"artificial intelligence breakthroughs\",\n",
+    "        ],\n",
+    "        \"objective\": (\n",
+    "            \"What are the latest developments in artificial intelligence?\"\n",
+    "        ),\n",
     "        \"max_results\": 5,\n",
-    "        \"include_metadata\": True,  # Include search timing and statistics\n",
+    "        \"include_metadata\": True,\n",
     "    }\n",
     ")\n",
     "\n",
-    "print(result)\n",
-    "\n",
-    "# Example response structure:\n",
-    "# {\n",
-    "#     \"search_id\": \"search_abc123...\",\n",
-    "#     \"results\": [\n",
-    "#         {\n",
-    "#             \"url\": \"https://example.com/ai-news\",\n",
-    "#             \"title\": \"Latest AI Developments 2024\",\n",
-    "#             \"excerpts\": [\n",
-    "#                 \"Recent breakthrough in transformer architectures...\",\n",
-    "#                 \"New applications in computer vision...\"\n",
-    "#             ]\n",
-    "#         }\n",
-    "#     ],\n",
-    "#     \"search_metadata\": {\n",
-    "#         \"search_duration_seconds\": 4.123,\n",
-    "#         \"search_timestamp\": \"2024-01-15T10:30:00\",\n",
-    "#         \"max_results_requested\": 5,\n",
-    "#         \"actual_results_returned\": 4,\n",
-    "#         \"search_id\": \"search_abc123...\",\n",
-    "#         \"query_count\": 1,\n",
-    "#         \"source_policy_applied\": false\n",
-    "#     }\n",
-    "# }"
+    "print(f\"Found {len(result.get('results', []))} results / endpoint={result['search_metadata']['endpoint']}\")\n"
    ]
   },
   {
@@ -207,9 +190,15 @@
     "# This is usually generated by a model, but we'll create a tool call directly for demo purposes.\n",
     "model_generated_tool_call = {\n",
     "    \"args\": {\n",
+    "        \"search_queries\": [\n",
+    "            \"climate change initiatives\",\n",
+    "            \"global climate policy 2026\",\n",
+    "        ],\n",
     "        \"objective\": \"Find recent news about climate change initiatives\",\n",
     "        \"max_results\": 3,\n",
-    "        \"source_policy\": {\"include_domains\": [\"ipcc.ch\", \"unfccc.int\", \"nature.com\"]},\n",
+    "        \"source_policy\": {\n",
+    "            \"include_domains\": [\"ipcc.ch\", \"unfccc.int\", \"nature.com\"]\n",
+    "        },\n",
     "        \"include_metadata\": True,\n",
     "    },\n",
     "    \"id\": \"call_123\",\n",
@@ -219,8 +208,7 @@
     "\n",
     "result = tool.invoke(model_generated_tool_call)\n",
     "print(result)\n",
-    "print(f\"Tool name: {tool.name}\")  # parallel_web_search\n",
-    "print(f\"Tool description: {tool.description}\")"
+    "print(f\"Tool name: {tool.name}\")  # parallel_web_search\n"
    ]
   },
   {
@@ -243,16 +231,12 @@
     "async def search_async():\n",
     "    return await tool.ainvoke(\n",
     "        {\n",
+    "            \"search_queries\": [\"quantum computing breakthroughs\"],\n",
     "            \"objective\": \"Latest quantum computing breakthroughs\",\n",
     "            \"max_results\": 5,\n",
     "            \"include_metadata\": True,\n",
     "        }\n",
-    "    )\n",
-    "\n",
-    "\n",
-    "# Run async search\n",
-    "result = await search_async()\n",
-    "print(result)"
+    "    )\n"
    ]
   },
   {
@@ -296,7 +280,7 @@
     "        \"excerpts\": {\n",
     "            \"max_chars_per_result\": 2500\n",
     "        },  # Longer excerpts for detailed information\n",
-    "        \"mode\": \"one-shot\",  # Comprehensive results\n",
+    "        \"mode\": \"advanced\",  # Comprehensive results\n",
     "        \"source_policy\": {\n",
     "            \"include_domains\": [\"europa.eu\", \"iea.org\", \"irena.org\"],\n",
     "            \"exclude_domains\": [\"wikipedia.org\", \"reddit.com\"],\n",
@@ -320,67 +304,18 @@
   },
   {
    "cell_type": "markdown",
-   "id": "659f9fbd-6fcf-445f-aa8c-72d8e60154bd",
    "metadata": {},
    "source": [
     "## Chaining\n",
     "\n",
-    "We can use our tool in a chain by first binding it to a [tool-calling model](/docs/how_to/tool_calling/) and then calling it:\n",
-    "\n",
-    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
-    "\n",
-    "<ChatModelTabs customVarName=\"llm\" />\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "af3123ad-7a02-40e5-b58e-7d56e23e5830",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | output: false\n",
-    "# | echo: false\n",
-    "\n",
-    "# !pip install -qU langchain langchain-openai\n",
-    "from langchain.chat_models import init_chat_model\n",
-    "\n",
-    "llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fdbf35b5-3aaf-4947-9ec6-48c21533fb95",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "from langchain_core.runnables import RunnableConfig, chain\n",
-    "\n",
-    "prompt = ChatPromptTemplate(\n",
-    "    [\n",
-    "        (\"system\", \"You are a helpful assistant.\"),\n",
-    "        (\"human\", \"{user_input}\"),\n",
-    "        (\"placeholder\", \"{messages}\"),\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "# specifying tool_choice will force the model to call this tool.\n",
-    "llm_with_tools = llm.bind_tools([tool], tool_choice=tool.name)\n",
-    "\n",
-    "llm_chain = prompt | llm_with_tools\n",
-    "\n",
-    "\n",
-    "@chain\n",
-    "def tool_chain(user_input: str, config: RunnableConfig):\n",
-    "    input_ = {\"user_input\": user_input}\n",
-    "    ai_msg = llm_chain.invoke(input_, config=config)\n",
-    "    tool_msgs = tool.batch(ai_msg.tool_calls, config=config)\n",
-    "    return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n",
-    "\n",
+    "To use the tool from a tool-calling chat model, bind it to any LLM ",
+    "that supports tool calls (e.g. `ChatAnthropic`, `ChatOpenAI`) and ",
+    "drive an agent with `langchain.agents.create_agent`. Parallel's own ",
+    "`ChatParallel` does not support tool calling \u2014 use it as a research assistant inside a chain, or use the search/extract tools ",
+    "alongside another model.\n",
     "\n",
-    "tool_chain.invoke(\"What are the latest breakthrough discoveries in quantum computing?\")"
+    "See [`docs/demo_agent.ipynb`](./demo_agent.ipynb) for a complete ",
+    "walkthrough using `create_agent` with `claude-haiku-4-5`.\n"
    ]
   },
   {
@@ -455,4 +390,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
\ No newline at end of file
diff --git a/examples/chat_example.py b/examples/chat_example.py
index c272ef1..7c69fa9 100644
--- a/examples/chat_example.py
+++ b/examples/chat_example.py
@@ -7,7 +7,7 @@
 
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
 
-from langchain_parallel import ChatParallelWeb
+from langchain_parallel import ChatParallel
 
 # Set your API key: export PARALLEL_API_KEY="your-api-key"
 
@@ -16,12 +16,10 @@ def basic_example() -> None:
     """Basic synchronous chat example."""
     print("=== Basic Chat Example ===")
 
-    # Initialize the chat model
-    chat = ChatParallelWeb(
-        model_name="speed",  # Parallel's chat model
-        temperature=0.7,  # Optional: temperature (ignored by Parallel)
-        max_tokens=None,  # Optional: max tokens (ignored by Parallel)
-    )
+    # Initialize the chat model. Models: "speed" (default, fast),
+    # "lite" / "base" / "core" (research models with web citations
+    # in `response_metadata["basis"]` and structured-output support).
+    chat = ChatParallel(model="speed")
 
     # Create messages
     messages = [
@@ -57,7 +55,7 @@ def streaming_example() -> None:
     """Streaming example for real-time responses."""
     print("\n=== Streaming Chat Example ===")
 
-    chat = ChatParallelWeb()
+    chat = ChatParallel()
 
     messages = [
         SystemMessage(content="You are a creative writing assistant."),
@@ -86,7 +84,7 @@ async def async_example() -> None:
     """Asynchronous example."""
     print("\n=== Async Chat Example ===")
 
-    chat = ChatParallelWeb()
+    chat = ChatParallel()
 
     messages = [
         SystemMessage(content="You are a technology expert."),
@@ -121,7 +119,7 @@ def conversation_example() -> None:
     """Example of maintaining conversation context."""
     print("\n=== Conversation Example ===")
 
-    chat = ChatParallelWeb()
+    chat = ChatParallel()
 
     # Start with system message
     messages: list[BaseMessage] = [
diff --git a/examples/extract_tool_example.py b/examples/extract_tool_example.py
index 6efafb0..f8c1df9 100644
--- a/examples/extract_tool_example.py
+++ b/examples/extract_tool_example.py
@@ -168,11 +168,11 @@ def agent_integration_example() -> None:
     """Example of using extract tool with an agent."""
     print("\n=== Agent Integration Example ===")
 
-    from langchain_parallel import ChatParallelWeb
+    from langchain_parallel import ChatParallel
 
     # Initialize tools
     extract_tool = ParallelExtractTool(max_chars_per_extract=3000)
-    chat = ChatParallelWeb()
+    chat = ChatParallel()
 
     # Extract content
     print("\nExtracting content from URLs...")
diff --git a/examples/search_example.py b/examples/search_example.py
index f7b33e7..c91c1f7 100644
--- a/examples/search_example.py
+++ b/examples/search_example.py
@@ -6,7 +6,7 @@
 import os
 from typing import Any
 
-from langchain_parallel import ParallelWebSearchTool
+from langchain_parallel import ParallelSearchTool, SourcePolicy
 
 # Set your API key: export PARALLEL_API_KEY="your-api-key"
 
@@ -15,37 +15,37 @@ def basic_search_examples() -> None:
     """Basic search tool examples."""
     print("=== Basic Search Examples ===")
 
-    # Initialize the search tool
-    search_tool = ParallelWebSearchTool()
+    search_tool = ParallelSearchTool()
 
-    # Example 1: Simple objective-based search
-    print("\nExample 1: Simple objective-based search")
+    # Example 1: Objective + search_queries (the recommended GA shape).
+    print("\nExample 1: Objective + search_queries")
     result = search_tool.invoke(
         {
-            "objective": (
-                "What are the latest developments in artificial intelligence in 2024?"
-            )
-        }
+            "search_queries": [
+                "latest AI developments 2026",
+                "AI research breakthroughs",
+            ],
+            "objective": "What are the latest developments in artificial intelligence?",
+            "max_results": 5,
+        },
     )
-
     print(f"Found {len(result.get('results', []))} results")
     display_results(result, max_results=2)
     display_metadata(result)
 
-    # Example 2: Multiple search queries
+    # Example 2: Multiple search queries (no objective).
     print("\nExample 2: Multiple search queries")
     result2 = search_tool.invoke(
         {
             "search_queries": [
-                "AI developments 2024",
+                "AI developments 2026",
                 "latest artificial intelligence news",
-                "machine learning breakthroughs 2024",
+                "machine learning breakthroughs",
             ],
             "max_results": 8,
-            "include_metadata": True,  # Get timing info
-        }
+            "include_metadata": True,
+        },
     )
-
     print(f"Found {len(result2.get('results', []))} results")
     display_results(result2, max_results=3)
     display_metadata(result2)
@@ -55,47 +55,49 @@ def search_examples() -> None:
     """Search features examples."""
     print("\n=== Search Examples ===")
 
-    search_tool = ParallelWebSearchTool()
+    search_tool = ParallelSearchTool()
 
-    # Example 3: Academic search with domain filtering and fetch policy
+    # Example 3: Academic search with domain filtering and fetch policy.
     print("\nExample 3: Academic search with domain filtering and fetch policy")
     result3 = search_tool.invoke(
         {
+            "search_queries": [
+                "climate change research findings",
+                "global warming peer reviewed studies",
+            ],
             "objective": "Latest climate change research and findings",
-            "source_policy": {
-                "include_domains": ["nature.com", "science.org", "arxiv.org"],
-                "exclude_domains": ["reddit.com", "twitter.com", "facebook.com"],
-            },
+            "source_policy": SourcePolicy(
+                include_domains=["nature.com", "science.org", "arxiv.org"],
+                exclude_domains=["reddit.com", "twitter.com", "facebook.com"],
+            ),
             "max_results": 5,
-            "excerpts": {"max_chars_per_result": 2000},  # Longer excerpts
-            "mode": "one-shot",  # Comprehensive results
+            "excerpts": {"max_chars_per_result": 2000},
+            "mode": "advanced",  # Higher quality with more retrieval and compression.
             "fetch_policy": {
-                "max_age_seconds": 86400,  # Cache content for 1 day
-                "timeout_seconds": 60,  # 60 second timeout for live fetches
+                "max_age_seconds": 86400,  # Cache content for 1 day.
+                "timeout_seconds": 60,
             },
             "include_metadata": True,
-        }
+        },
     )
-
     print("Academic sources search completed")
     display_results(result3, max_results=2, show_excerpts=True)
     display_metadata(result3)
 
-    # Example 4: Multiple topic news search with agentic mode
-    print("\nExample 4: Multiple topic news search with agentic mode")
+    # Example 4: Multiple-topic news search with the basic (low-latency) mode.
+    print("\nExample 4: Multiple topic news search (basic mode)")
     result4 = search_tool.invoke(
         {
             "search_queries": [
-                "tech industry layoffs 2024",
+                "tech industry layoffs 2026",
                 "startup funding trends",
                 "AI company acquisitions",
             ],
             "max_results": 6,
-            "mode": "agentic",  # Token-efficient, concise results
+            "mode": "basic",  # Low-latency mode; pair with 2-3 high-quality queries.
             "include_metadata": True,
-        }
+        },
     )
-
     print("Multiple query search completed")
     display_results(result4, max_results=3)
     display_metadata(result4)
@@ -105,34 +107,44 @@ async def async_search_examples() -> None:
     """Async search examples."""
     print("\n=== Async Search Examples ===")
 
-    search_tool = ParallelWebSearchTool()
+    search_tool = ParallelSearchTool()
 
-    # Example 5: Async search
+    # Example 5: Async search.
     print("\nExample 5: Async search execution")
     result5 = await search_tool.ainvoke(
         {
+            "search_queries": ["quantum computing breakthroughs"],
             "objective": "Latest developments in quantum computing",
             "max_results": 4,
             "include_metadata": True,
-        }
+        },
     )
-
     print("Async search completed")
     display_results(result5, max_results=2)
     display_metadata(result5)
 
-    # Example 6: Parallel async searches
+    # Example 6: Parallel async searches.
     print("\nExample 6: Parallel async searches")
     tasks = [
         search_tool.ainvoke(
-            {"objective": "artificial intelligence news", "max_results": 3}
+            {
+                "search_queries": ["artificial intelligence news"],
+                "max_results": 3,
+            },
+        ),
+        search_tool.ainvoke(
+            {
+                "search_queries": ["machine learning research"],
+                "max_results": 3,
+            },
         ),
         search_tool.ainvoke(
-            {"objective": "machine learning research", "max_results": 3}
+            {
+                "search_queries": ["robotics developments"],
+                "max_results": 3,
+            },
         ),
-        search_tool.ainvoke({"objective": "robotics developments", "max_results": 3}),
     ]
-
     results = await asyncio.gather(*tasks)
 
     for i, result in enumerate(results, 1):
@@ -141,7 +153,10 @@ async def async_search_examples() -> None:
 
 
 def display_results(
-    result: dict[str, Any], *, max_results: int = 5, show_excerpts: bool = False
+    result: dict[str, Any],
+    *,
+    max_results: int = 5,
+    show_excerpts: bool = False,
 ) -> None:
     """Display search results in a formatted way."""
     if "results" not in result:
@@ -149,13 +164,10 @@ def display_results(
         print(f"Response keys: {list(result.keys())}")
         return
 
-    results = result["results"][:max_results]
-
-    for i, res in enumerate(results, 1):
+    for i, res in enumerate(result["results"][:max_results], 1):
         print(f"\nResult {i}:")
         print(f"  URL: {res.get('url', 'N/A')}")
         print(f"  Title: {res.get('title', 'N/A')}")
-
         excerpts = res.get("excerpts", [])
         if excerpts:
             print(f"  Excerpts: {len(excerpts)} found")
@@ -170,100 +182,92 @@ def display_metadata(result: dict[str, Any]) -> None:
     """Display search metadata if available."""
     if "search_metadata" not in result:
         return
-
     metadata = result["search_metadata"]
     print("\n  Search Metadata:")
+    print(f"    Endpoint: {metadata.get('endpoint', 'N/A')}")
     print(f"    Duration: {metadata.get('search_duration_seconds', 'N/A')}s")
-    print(
-        f"    Results: {metadata.get('actual_results_returned', 'N/A')}"
-        f"/{metadata.get('max_results_requested', 'N/A')}"
-    )
-
-    if metadata.get("query_count"):
-        print(f"    Queries: {metadata['query_count']}")
-
-    if metadata.get("source_policy_applied"):
-        if "included_domains" in metadata:
-            print(f"    Included domains: {metadata['included_domains']}")
-        if "excluded_domains" in metadata:
-            print(f"    Excluded domains: {metadata['excluded_domains']}")
+    print(f"    Results: {metadata.get('actual_results_returned', 'N/A')}")
 
 
 def practical_use_cases() -> None:
     """Practical use case examples."""
     print("\n=== Practical Use Cases ===")
 
-    search_tool = ParallelWebSearchTool()
+    search_tool = ParallelSearchTool()
 
-    # Use case 1: Research assistance
+    # Use case 1: Research assistance.
     print("\nUse Case 1: Research Assistant")
     research_result = search_tool.invoke(
         {
-            "objective": "Analysis of renewable energy adoption trends in 2024",
-            "source_policy": {
-                "include_domains": ["iea.org", "irena.org", "energy.gov", "nature.com"],
-                "exclude_domains": ["blog.com", "personal-site.com"],
-            },
+            "search_queries": [
+                "renewable energy adoption 2026",
+                "solar wind energy growth",
+            ],
+            "objective": "Analysis of renewable energy adoption trends",
+            "source_policy": SourcePolicy(
+                include_domains=["iea.org", "irena.org", "energy.gov", "nature.com"],
+                exclude_domains=["blog.com", "personal-site.com"],
+            ),
             "max_results": 10,
             "excerpts": {"max_chars_per_result": 2500},
             "include_metadata": True,
-        }
+        },
     )
-
     print("Research completed - energy analysis")
     print(f"Found {len(research_result.get('results', []))} authoritative sources")
     display_metadata(research_result)
 
-    # Use case 2: News monitoring
+    # Use case 2: News monitoring.
     print("\nUse Case 2: News Monitoring Dashboard")
     news_result = search_tool.invoke(
         {
             "search_queries": [
                 "tech industry news today",
                 "AI company funding",
-                "cybersecurity breaches 2024",
+                "cybersecurity breaches 2026",
                 "cloud computing trends",
             ],
             "max_results": 15,
             "include_metadata": True,
-        }
+        },
     )
-
     print("News monitoring completed")
     print(f"Found {len(news_result.get('results', []))} relevant news items")
     display_metadata(news_result)
 
-    # Use case 3: Competitive analysis
+    # Use case 3: Competitive analysis.
     print("\nUse Case 3: Competitive Analysis")
     competitor_result = search_tool.invoke(
         {
+            "search_queries": [
+                "tech company product launches",
+                "big tech strategic moves",
+            ],
             "objective": (
                 "Latest product launches and strategic moves by major tech companies"
             ),
-            "source_policy": {
-                "include_domains": [
+            "source_policy": SourcePolicy(
+                include_domains=[
                     "techcrunch.com",
                     "theverge.com",
                     "wired.com",
                     "ars-technica.com",
                 ],
-                "exclude_domains": ["reddit.com", "twitter.com"],
-            },
+                exclude_domains=["reddit.com", "twitter.com"],
+            ),
             "max_results": 12,
             "include_metadata": True,
-        }
+        },
     )
-
     print("Competitive analysis completed")
     display_results(competitor_result, max_results=2)
     display_metadata(competitor_result)
 
 
 async def main() -> None:
-    """Main function demonstrating Parallel Web Search Tool usage."""
+    """Main function demonstrating Parallel Search Tool usage."""
     print("=== Parallel Search Examples ===")
 
-    # Check if API key is set
     if not os.getenv("PARALLEL_API_KEY"):
         print("Error: PARALLEL_API_KEY environment variable not set")
         print("Please set your API key: export PARALLEL_API_KEY='your-api-key'")
@@ -272,31 +276,21 @@ async def main() -> None:
     print("API key found in environment")
     print("Starting search examples...")
 
-    # Run examples
     try:
-        # Basic examples
         basic_search_examples()
-
-        # Search features
         search_examples()
-
-        # Async examples
         await async_search_examples()
-
-        # Practical use cases
         practical_use_cases()
 
         print("\n=== All examples completed successfully ===")
         print("\nKey features demonstrated:")
-        print("  - Basic objective and query-based searches")
-        print("  - Multi-query search capabilities")
-        print("  - Domain filtering with source policies")
-        print("  - Fetch policies for cache control")
-        print("  - Search modes: one-shot and agentic")
-        print("  - Async search execution")
-        print("  - Parallel search processing")
+        print("  - search_queries + objective (GA /v1 endpoint)")
+        print("  - Multi-query search")
+        print("  - Domain filtering with SourcePolicy")
+        print("  - FetchPolicy for cache control")
+        print("  - Search modes: basic (low-latency) and advanced (high-quality)")
+        print("  - Async + parallel execution")
         print("  - Metadata collection")
-        print("  - Practical use case implementations")
 
     except Exception as e:
         print(f"\nError during execution: {e}")
@@ -325,5 +319,4 @@ def run_sync_examples() -> None:
 
 
 if __name__ == "__main__":
-    # Run async main
     asyncio.run(main())
diff --git a/pyproject.toml b/pyproject.toml
index 66e5b0c..5836e53 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -118,7 +118,11 @@ convention = "google"
 ]
 "examples/*.py" = ["T201"] # Allow print statements in examples
 "docs/*.ipynb" = ["T201", "E501"] # Allow print statements and long lines in documentation
-"scripts/*.py" = ["SIM105", "S110"] # Allow try-except-pass in scripts
+"scripts/*.py" = [
+    "SIM105", # Allow try-except-pass in scripts
+    "S110",
+    "T201",   # Allow print statements in scripts
+]
 "langchain_parallel/extract_tool.py" = [
     "FBT001", # Boolean-typed positional argument (matches Extract API design)
     "FBT002", # Boolean default positional argument (matches Extract API design)
diff --git a/scripts/check_imports.py b/scripts/check_imports.py
index b141be6..87777df 100644
--- a/scripts/check_imports.py
+++ b/scripts/check_imports.py
@@ -63,8 +63,8 @@ def load_module_with_deps(file: str, loaded_modules: set[str] | None = None) ->
             load_module_with_deps(file, loaded_modules)
         except Exception:
             has_failure = True
-            print(file)  # noqa: T201
+            print(file)
             traceback.print_exc()
-            print()  # noqa: T201
+            print()
 
     sys.exit(1 if has_failure else 0)
diff --git a/scripts/run_notebooks.py b/scripts/run_notebooks.py
new file mode 100644
index 0000000..1bb1a7f
--- /dev/null
+++ b/scripts/run_notebooks.py
@@ -0,0 +1,97 @@
+"""Headless executor for the docs/*.ipynb notebooks.
+
+Run from the repo root with PARALLEL_API_KEY set:
+
+    poetry run python scripts/run_notebooks.py
+
+Skips cells that need user interaction (`%pip install`, `getpass.getpass`)
+so the rest can run end-to-end against the real Parallel API. Useful as a
+pre-release smoke test that the published examples still work.
+
+Pass paths to limit which notebooks run:
+
+    poetry run python scripts/run_notebooks.py docs/chat.ipynb
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+import nbformat
+from nbclient import NotebookClient
+from nbclient.exceptions import CellExecutionError
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+DOCS = REPO_ROOT / "docs"
+DEFAULT_NOTEBOOKS = [
+    DOCS / "chat.ipynb",
+    DOCS / "search_tool.ipynb",
+    DOCS / "extract_tool.ipynb",
+]
+
+
+def _is_interactive_cell(source: str) -> bool:
+    """Skip cells that block on user input or shell-out to install."""
+    stripped = source.lstrip()
+    return stripped.startswith(("%pip", "!pip")) or "getpass.getpass" in source
+
+
+def run_notebook(path: Path, *, timeout: int = 180) -> bool:
+    """Execute a notebook in-place and report whether it succeeded."""
+    nb = nbformat.read(path, as_version=4)
+
+    keep = []
+    for cell in nb.cells:
+        if cell.cell_type == "code" and _is_interactive_cell(
+            "".join(cell.get("source", [])),
+        ):
+            continue
+        keep.append(cell)
+    nb.cells = keep
+
+    client = NotebookClient(nb, timeout=timeout, kernel_name="python3")
+    try:
+        client.execute()
+    except CellExecutionError as e:
+        print(f"FAIL: {path.name}")
+        # Tail of the traceback is what matters; full message is huge.
+        print(str(e)[-2000:])
+        return False
+    print(f"OK:   {path.name}")
+    return True
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    parser.add_argument(
+        "notebooks",
+        nargs="*",
+        type=Path,
+        help="Notebook paths (defaults to docs/*.ipynb)",
+    )
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        default=180,
+        help="Per-cell timeout in seconds (default 180)",
+    )
+    args = parser.parse_args()
+
+    if not os.environ.get("PARALLEL_API_KEY"):
+        print(
+            "PARALLEL_API_KEY is not set; notebooks that hit the API will fail.",
+            file=sys.stderr,
+        )
+
+    notebooks = args.notebooks or DEFAULT_NOTEBOOKS
+    ok = True
+    for nb_path in notebooks:
+        ok &= run_notebook(nb_path.resolve(), timeout=args.timeout)
+    return 0 if ok else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())