From 951b77838f1a7d007ad57fc65b8a7fc2c76e905d Mon Sep 17 00:00:00 2001 From: John Niche Date: Wed, 8 Apr 2026 02:07:57 -0300 Subject: [PATCH 01/17] =?UTF-8?q?chore(release):=20v0.21.0=20source-layer?= =?UTF-8?q?=20prep=20=E2=80=94=20exports,=20extras,=20beta=20markers,=20ve?= =?UTF-8?q?rsion=20bump?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Bump version to 0.21.0 in pyproject.toml and __init__.py - Add OTelObserver/LangfuseObserver lazy exports to observe/__init__.py - Export AzureOpenAIProvider and observe submodule from package root - Add ContentPart/image_message/text_content to public __all__ - Apply @beta to all 9 new toolbox tools (code, search, github, db) - Extend stability.beta()/stable() with Any overload for Tool objects - Add qdrant-client/faiss-cpu/beautifulsoup4 to [rag] extras - Add new [observe] extras with opentelemetry-api/langfuse --- pyproject.toml | 9 ++++++++- src/selectools/__init__.py | 10 ++++++++-- src/selectools/observe/__init__.py | 14 ++++++++++++++ src/selectools/stability.py | 16 ++++++++++++---- src/selectools/toolbox/code_tools.py | 3 +++ src/selectools/toolbox/db_tools.py | 3 +++ src/selectools/toolbox/github_tools.py | 4 ++++ src/selectools/toolbox/search_tools.py | 3 +++ 8 files changed, 55 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 45aa7a0..5e4a9a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "selectools" -version = "0.20.1" +version = "0.21.0" description = "Production-ready Python framework for AI agents with multi-agent graphs, hybrid RAG, guardrails, audit logging, 50 evaluators, and a visual builder. Supports OpenAI, Anthropic, Gemini, Ollama. By NichevLabs." readme = "README.md" requires-python = ">=3.9" @@ -69,6 +69,13 @@ rag = [ "voyageai>=0.2.0", "cohere>=5.0.0", "pypdf>=4.0.0", + "qdrant-client>=1.7.0", + "faiss-cpu>=1.7.0", + "beautifulsoup4>=4.12.0", +] +observe = [ + "opentelemetry-api>=1.20.0", + "langfuse>=2.0.0", ] evals = [ "pyyaml>=6.0.0", diff --git a/src/selectools/__init__.py b/src/selectools/__init__.py index 8539e81..12bfba6 100644 --- a/src/selectools/__init__.py +++ b/src/selectools/__init__.py @@ -1,9 +1,9 @@ """Public exports for the selectools package.""" -__version__ = "0.20.1" +__version__ = "0.21.0" # Import submodules (lazy loading for optional dependencies) -from . import embeddings, evals, guardrails, models, patterns, rag, toolbox +from . import embeddings, evals, guardrails, models, observe, patterns, rag, toolbox from .agent import Agent, AgentConfig from .agent.config_groups import ( BudgetConfig, @@ -117,6 +117,7 @@ from .pricing import PRICING, calculate_cost, calculate_embedding_cost, get_model_pricing from .prompt import REASONING_STRATEGIES, PromptBuilder from .providers.anthropic_provider import AnthropicProvider +from .providers.azure_openai_provider import AzureOpenAIProvider from .providers.fallback import FallbackProvider from .providers.gemini_provider import GeminiProvider from .providers.ollama_provider import OllamaProvider @@ -145,6 +146,9 @@ "ToolMetrics", "ConversationMemory", "Message", + "ContentPart", + "image_message", + "text_content", "Role", "Tool", "ToolParameter", @@ -153,6 +157,7 @@ "PromptBuilder", "REASONING_STRATEGIES", "OpenAIProvider", + "AzureOpenAIProvider", "AnthropicProvider", "GeminiProvider", "OllamaProvider", @@ -263,6 +268,7 @@ "KnowledgeGraphMemory", # Submodules (for lazy loading) "embeddings", + "observe", "rag", "toolbox", # Orchestration diff --git a/src/selectools/observe/__init__.py b/src/selectools/observe/__init__.py index b8da498..108d35f 100644 --- a/src/selectools/observe/__init__.py +++ b/src/selectools/observe/__init__.py @@ -32,3 +32,17 @@ "SQLiteTraceStore", "JSONLTraceStore", ] + +try: + from .otel import OTelObserver # noqa: F401 + + __all__.append("OTelObserver") +except ImportError: + pass + +try: + from .langfuse import LangfuseObserver # noqa: F401 + + __all__.append("LangfuseObserver") +except ImportError: + pass diff --git a/src/selectools/stability.py b/src/selectools/stability.py index a47dd3b..c338d9b 100644 --- a/src/selectools/stability.py +++ b/src/selectools/stability.py @@ -41,9 +41,13 @@ def stable(obj: _C) -> _C: ... def stable(obj: _F) -> _F: ... -def stable(obj: Union[_F, _C]) -> Union[_F, _C]: +@overload +def stable(obj: Any) -> Any: ... + + +def stable(obj: Any) -> Any: """Set stability marker to 'stable' (API is frozen).""" - obj.__stability__ = "stable" # type: ignore[union-attr] + obj.__stability__ = "stable" return obj @@ -58,9 +62,13 @@ def beta(obj: _C) -> _C: ... def beta(obj: _F) -> _F: ... -def beta(obj: Union[_F, _C]) -> Union[_F, _C]: +@overload +def beta(obj: Any) -> Any: ... + + +def beta(obj: Any) -> Any: """Set stability marker to 'beta' (API may change in minor releases).""" - obj.__stability__ = "beta" # type: ignore[union-attr] + obj.__stability__ = "beta" return obj diff --git a/src/selectools/toolbox/code_tools.py b/src/selectools/toolbox/code_tools.py index b38e487..2350142 100644 --- a/src/selectools/toolbox/code_tools.py +++ b/src/selectools/toolbox/code_tools.py @@ -11,6 +11,7 @@ import subprocess # nosec B404 — code execution tool import tempfile +from ..stability import beta from ..tools import tool _MAX_OUTPUT_BYTES = 10 * 1024 # 10 KB @@ -29,6 +30,7 @@ def _truncate(text: str, max_bytes: int = _MAX_OUTPUT_BYTES) -> str: return truncated + "\n... (output truncated to 10 KB)" +@beta @tool(description="Execute Python code and return stdout + stderr") def execute_python(code: str, timeout: int = 30) -> str: """ @@ -95,6 +97,7 @@ def execute_python(code: str, timeout: int = 30) -> str: os.unlink(tmp_path) +@beta @tool(description="Execute a shell command and return output") def execute_shell(command: str, timeout: int = 30) -> str: """ diff --git a/src/selectools/toolbox/db_tools.py b/src/selectools/toolbox/db_tools.py index e407985..d612af6 100644 --- a/src/selectools/toolbox/db_tools.py +++ b/src/selectools/toolbox/db_tools.py @@ -11,6 +11,7 @@ import re import sqlite3 +from ..stability import beta from ..tools import tool @@ -63,6 +64,7 @@ def _format_table(columns: list[str], rows: list[tuple]) -> str: return "\n".join(lines) +@beta @tool(description="Execute a read-only SQL query against a SQLite database") def query_sqlite(db_path: str, sql: str, max_rows: int = 100) -> str: """ @@ -128,6 +130,7 @@ def query_sqlite(db_path: str, sql: str, max_rows: int = 100) -> str: conn.close() +@beta @tool(description="Execute a read-only SQL query against PostgreSQL") def query_postgres(connection_string: str, sql: str, max_rows: int = 100) -> str: """ diff --git a/src/selectools/toolbox/github_tools.py b/src/selectools/toolbox/github_tools.py index cb12871..38bb2f2 100644 --- a/src/selectools/toolbox/github_tools.py +++ b/src/selectools/toolbox/github_tools.py @@ -15,6 +15,7 @@ import urllib.request from typing import Any +from ..stability import beta from ..tools import tool _API_BASE = "https://api.github.com" @@ -48,6 +49,7 @@ def _github_request(path: str, params: dict[str, str] | None = None) -> Any: return json.loads(resp.read().decode("utf-8")) +@beta @tool(description="Search GitHub repositories") def github_search_repos(query: str, max_results: int = 5) -> str: """ @@ -105,6 +107,7 @@ def github_search_repos(query: str, max_results: int = 5) -> str: return f"Error searching GitHub: {e}" +@beta @tool(description="Get file contents from a GitHub repository") def github_get_file(repo: str, path: str, ref: str = "main") -> str: """ @@ -177,6 +180,7 @@ def github_get_file(repo: str, path: str, ref: str = "main") -> str: return f"Error fetching file: {e}" +@beta @tool(description="List issues in a GitHub repository") def github_list_issues(repo: str, state: str = "open", max_results: int = 10) -> str: """ diff --git a/src/selectools/toolbox/search_tools.py b/src/selectools/toolbox/search_tools.py index 90c4fb3..de53584 100644 --- a/src/selectools/toolbox/search_tools.py +++ b/src/selectools/toolbox/search_tools.py @@ -16,6 +16,7 @@ from typing import Optional from urllib.parse import urlparse +from ..stability import beta from ..tools import tool # Private IP networks that must be blocked to prevent SSRF @@ -92,6 +93,7 @@ def _strip_html_tags(text: str) -> str: return text.strip() +@beta @tool(description="Search the web using DuckDuckGo (no API key needed)") def web_search(query: str, num_results: int = 5) -> str: """ @@ -169,6 +171,7 @@ def web_search(query: str, num_results: int = 5) -> str: return f"Error performing web search: {e}" +@beta @tool(description="Fetch a URL and extract text content") def scrape_url(url: str, selector: Optional[str] = None) -> str: """ From 734ca8464843762d3962128646dd855f30ffbb2e Mon Sep 17 00:00:00 2001 From: John Niche Date: Wed, 8 Apr 2026 02:20:24 -0300 Subject: [PATCH 02/17] docs(release): v0.21.0 documentation, CHANGELOG, llms.txt, README, mypy fix - CHANGELOG: 0.21.0 entry covering all 7 connector subsystems - README: What's New in v0.21 section, Azure provider row, FAISS/Qdrant/pgvector imports, test count 4960 - 7 new module docs in docs/modules/: FAISS, QDRANT, PGVECTOR, MULTIMODAL, OTEL, AZURE_OPENAI, LANGFUSE - mkdocs.yml nav: surfaced new pages in Core/Features/Reference sections - llms.txt + llms-full.txt: 7 new module pointers, version bumped to v0.21.0, page count 32 -> 39 - Fix pre-existing mypy error in azure_openai_provider.py default_model assignment --- CHANGELOG.md | 48 + README.md | 49 +- docs/CHANGELOG.md | 48 + docs/llms-full.txt | 956 +++++++++++++++++- docs/llms.txt | 12 +- docs/modules/AZURE_OPENAI.md | 148 +++ docs/modules/FAISS.md | 108 ++ docs/modules/LANGFUSE.md | 125 +++ docs/modules/MULTIMODAL.md | 123 +++ docs/modules/OTEL.md | 130 +++ docs/modules/PGVECTOR.md | 139 +++ docs/modules/QDRANT.md | 126 +++ mkdocs.yml | 7 + .../providers/azure_openai_provider.py | 6 +- 14 files changed, 2016 insertions(+), 9 deletions(-) create mode 100644 docs/modules/AZURE_OPENAI.md create mode 100644 docs/modules/FAISS.md create mode 100644 docs/modules/LANGFUSE.md create mode 100644 docs/modules/MULTIMODAL.md create mode 100644 docs/modules/OTEL.md create mode 100644 docs/modules/PGVECTOR.md create mode 100644 docs/modules/QDRANT.md diff --git a/CHANGELOG.md b/CHANGELOG.md index c6f3d57..23d551f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,54 @@ All notable changes to selectools will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.21.0] - 2026-04-08 + +### Added + +#### Vector Stores +- **`FAISSVectorStore`** (`selectools.rag.stores.FAISSVectorStore`): in-process vector index using Facebook AI Similarity Search. Supports cosine, L2, and inner-product metrics; persistence via `save()`/`load()`; thread-safe writes. Optional dep: `faiss-cpu>=1.7.0`. +- **`QdrantVectorStore`** (`selectools.rag.stores.QdrantVectorStore`): connector for Qdrant. REST + gRPC support, auto-creates collections, payload filtering, cosine by default. Optional dep: `qdrant-client>=1.7.0`. +- **`PgVectorStore`** (`selectools.rag.stores.PgVectorStore`): PostgreSQL vector store using the `pgvector` extension. JSONB metadata, parameterized queries, auto-`CREATE TABLE`. Uses existing `[postgres]` extras (`psycopg2-binary`). + +#### Document Loaders +- `DocumentLoader.from_csv(path, text_column=..., metadata_columns=..., delimiter=...)` — one document per row, stdlib `csv.DictReader`. +- `DocumentLoader.from_json(path, text_field=..., metadata_fields=..., jq_filter=...)` — single objects or arrays, with simple dot-path filtering. +- `DocumentLoader.from_html(path, selector=..., strip_tags=...)` — optional `beautifulsoup4` for CSS selectors, regex fallback otherwise. +- `DocumentLoader.from_url(url, selector=..., headers=..., timeout=...)` — fetches via stdlib `urllib.request` and delegates to `from_html`. + +#### Toolbox +- **Code execution** (`selectools.toolbox.code_tools`): `execute_python(code, timeout)` and `execute_shell(command, timeout)`. Subprocess-isolated, 10 KB output truncation, shell metacharacter blocklist for SSRF/injection mitigation. +- **Web search** (`selectools.toolbox.search_tools`): `web_search(query, num_results)` via DuckDuckGo HTML (no API key) and `scrape_url(url, selector)` with SSRF guards. +- **GitHub** (`selectools.toolbox.github_tools`): `github_search_repos`, `github_get_file`, `github_list_issues` against GitHub REST API v3. Uses `GITHUB_TOKEN` env var when present (5000 req/hr vs 60). +- **Database** (`selectools.toolbox.db_tools`): `query_sqlite` with `PRAGMA query_only = ON`, `query_postgres` via psycopg2. Read-only enforcement at the validator level. + +#### Multimodal Messages +- `ContentPart` dataclass for multipart messages (`text`, `image_url`, `image_base64`, `audio`). +- `Message.content` now accepts `str | list[ContentPart]`. Existing `content: str` paths unchanged (backward compatible). +- `image_message(image, prompt)` and `text_content(message)` helpers exported from package root. +- All four providers (OpenAI, Anthropic, Gemini, Ollama) format multimodal content into their native shape. + +#### Observability +- **`OTelObserver`** (`selectools.observe.OTelObserver`): maps the 45 selectools observer events to OpenTelemetry spans following the GenAI semantic conventions. Async variant `AsyncOTelObserver` for `arun()`/`astream()`. Optional dep: `opentelemetry-api>=1.20.0`. +- **`LangfuseObserver`** (`selectools.observe.LangfuseObserver`): sends traces, generations, and spans to Langfuse Cloud or self-hosted instances. Reads `LANGFUSE_PUBLIC_KEY`/`LANGFUSE_SECRET_KEY`/`LANGFUSE_HOST` env vars. Optional dep: `langfuse>=2.0.0`. + +#### Providers +- **`AzureOpenAIProvider`** (`selectools.AzureOpenAIProvider`): wraps the OpenAI SDK's `AzureOpenAI` client. Supports `AZURE_OPENAI_ENDPOINT`/`AZURE_OPENAI_API_KEY` env vars, AAD token auth, and Azure deployment-name to model-id mapping. Inherits all behavior from `OpenAIProvider`. + +#### Optional Dependencies +- New `[observe]` extras group: `opentelemetry-api>=1.20.0`, `langfuse>=2.0.0`. +- `[rag]` extras now also include: `qdrant-client>=1.7.0`, `faiss-cpu>=1.7.0`, `beautifulsoup4>=4.12.0`. + +### Changed +- `stability.beta()` and `stability.stable()` decorators now accept arbitrary objects via an `Any` overload, in addition to classes and callables. Lets `@beta` mark `Tool` instances produced by `@tool()`. + +### Stats +- **4,960 tests** (188 new across 7 spec subsystems) +- **88 examples** (12 new: `77_faiss_vector_store.py` through `88_langfuse_observer.py`) +- **5 providers** (added Azure OpenAI) +- **7 vector stores** (added FAISS, Qdrant, pgvector) +- **152 models** + ## [0.20.1] - 2026-04-03 ### Added diff --git a/README.md b/README.md index 919d57e..dd540e8 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,41 @@ result = AgentGraph.chain(planner, writer, reviewer).run("Write a blog post") # selectools serve agent.yaml ``` +## What's New in v0.21 + +### v0.21.0 — Connector Expansion + +Seven new subsystems land at once: three vector stores, four document loaders, eight new toolbox tools, multimodal messages, an Azure OpenAI provider, and two observability backends. + +```python +# New vector stores +from selectools.rag.stores import FAISSVectorStore, QdrantVectorStore, PgVectorStore + +# New provider +from selectools import AzureOpenAIProvider + +# New observers +from selectools.observe import OTelObserver, LangfuseObserver + +# Multimodal messages +from selectools import image_message +agent.run([image_message("./screenshot.png", "What does this UI show?")]) +``` + +- **Vector stores**: `FAISSVectorStore` (in-process, persistable), `QdrantVectorStore` (REST + gRPC), `PgVectorStore` (PostgreSQL pgvector extension) +- **Document loaders**: `DocumentLoader.from_csv`, `from_json`, `from_html`, `from_url` +- **Toolbox**: `execute_python`, `execute_shell`, `web_search`, `scrape_url`, `github_search_repos`, `github_get_file`, `github_list_issues`, `query_sqlite`, `query_postgres` +- **Multimodal**: `Message.content` accepts `list[ContentPart]`; image input works on OpenAI, Anthropic, Gemini, and Ollama vision models +- **Azure OpenAI**: deployment-name routing, AAD token auth, env-var fallback (`AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`) +- **OpenTelemetry**: `OTelObserver` emits GenAI semantic-convention spans (Jaeger, Tempo, Datadog, Honeycomb, Grafana) +- **Langfuse**: `LangfuseObserver` ships traces, generations, and spans to Langfuse Cloud or self-hosted + +```bash +pip install "selectools[rag]" # FAISS + Qdrant + beautifulsoup4 (HTML CSS selectors) +pip install "selectools[observe]" # OpenTelemetry + Langfuse +pip install "selectools[postgres]" # pgvector (uses psycopg2-binary) +``` + ## What's New in v0.20 ### v0.20.1 — Visual Agent Builder + GitHub Pages @@ -95,7 +130,7 @@ Path("trace.html").write_text(trace_to_html(result.trace)) - **Trace HTML viewer** — `trace_to_html(trace)` renders a standalone waterfall timeline - **Deprecation policy** — 2-minor-version window, programmatic introspection via `.__stability__` - **Security audit** — all 41 `# nosec` annotations reviewed and published in `docs/SECURITY.md` -- **Quality infrastructure** — property-based tests (Hypothesis), thread-safety smoke suite, 5 new production simulations (4612 tests total) +- **Quality infrastructure** — property-based tests (Hypothesis), thread-safety smoke suite, 5 new production simulations (4960 tests total) ### v0.19.1 — Advanced Agent Patterns @@ -451,7 +486,7 @@ report.to_html("report.html") - **76 Examples**: Multi-agent graphs, RAG, hybrid search, streaming, structured output, traces, batch, policy, observer, guardrails, audit, sessions, entity memory, knowledge graph, eval framework, advanced agent patterns, stability markers, HTML trace viewer, and more - **Built-in Eval Framework**: 50 evaluators (30 deterministic + 21 LLM-as-judge), A/B testing, regression detection, HTML reports, JUnit XML, snapshot testing - **AgentObserver Protocol**: 45 lifecycle events with `run_id` correlation, `LoggingObserver`, `SimpleStepObserver`, OTel export -- **4612 Tests**: Unit, integration, regression, and E2E with real API calls +- **4960 Tests**: Unit, integration, regression, and E2E with real API calls ## Install @@ -791,6 +826,7 @@ See [docs/modules/STREAMING.md](docs/modules/STREAMING.md) for full documentatio | Provider | Streaming | Vision | Native Tools | Cost | |---|---|---|---|---| | **OpenAI** | Yes | Yes | Yes | Paid | +| **Azure OpenAI** | Yes | Yes | Yes | Paid (Azure billing) | | **Anthropic** | Yes | Yes | Yes | Paid | | **Gemini** | Yes | Yes | Yes | Free tier | | **Ollama** | Yes | No | No | Free (local) | @@ -821,11 +857,18 @@ from selectools.embeddings import ( ```python from selectools.rag import VectorStore +from selectools.rag.stores import FAISSVectorStore, QdrantVectorStore, PgVectorStore +# Built-in / factory-style store = VectorStore.create("memory", embedder=embedder) # Fast, no persistence store = VectorStore.create("sqlite", embedder=embedder, db_path="docs.db") # Persistent store = VectorStore.create("chroma", embedder=embedder, persist_directory="./chroma") store = VectorStore.create("pinecone", embedder=embedder, index_name="my-index") + +# v0.21.0 — direct imports +store = FAISSVectorStore(embedder=embedder) # In-process, save/load to disk +store = QdrantVectorStore(embedder=embedder, url="http://localhost:6333") # REST + gRPC +store = PgVectorStore(embedder=embedder, connection_string="postgresql://...") ``` ## Agent Configuration @@ -1065,7 +1108,7 @@ pytest tests/ -x -q # All tests pytest tests/ -k "not e2e" # Skip E2E (no API keys needed) ``` -4612 tests covering parsing, agent loop, providers, RAG pipeline, hybrid search, advanced chunking, dynamic tools, caching, streaming, guardrails, sessions, memory, eval framework, budget/cancellation, knowledge stores, orchestration, pipelines, agent patterns, stability markers, trace viewer, and E2E integration with real API calls. +4960 tests covering parsing, agent loop, providers, RAG pipeline, hybrid search, advanced chunking, dynamic tools, caching, streaming, guardrails, sessions, memory, eval framework, budget/cancellation, knowledge stores, orchestration, pipelines, agent patterns, stability markers, trace viewer, and E2E integration with real API calls. ## License diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c6f3d57..23d551f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -5,6 +5,54 @@ All notable changes to selectools will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.21.0] - 2026-04-08 + +### Added + +#### Vector Stores +- **`FAISSVectorStore`** (`selectools.rag.stores.FAISSVectorStore`): in-process vector index using Facebook AI Similarity Search. Supports cosine, L2, and inner-product metrics; persistence via `save()`/`load()`; thread-safe writes. Optional dep: `faiss-cpu>=1.7.0`. +- **`QdrantVectorStore`** (`selectools.rag.stores.QdrantVectorStore`): connector for Qdrant. REST + gRPC support, auto-creates collections, payload filtering, cosine by default. Optional dep: `qdrant-client>=1.7.0`. +- **`PgVectorStore`** (`selectools.rag.stores.PgVectorStore`): PostgreSQL vector store using the `pgvector` extension. JSONB metadata, parameterized queries, auto-`CREATE TABLE`. Uses existing `[postgres]` extras (`psycopg2-binary`). + +#### Document Loaders +- `DocumentLoader.from_csv(path, text_column=..., metadata_columns=..., delimiter=...)` — one document per row, stdlib `csv.DictReader`. +- `DocumentLoader.from_json(path, text_field=..., metadata_fields=..., jq_filter=...)` — single objects or arrays, with simple dot-path filtering. +- `DocumentLoader.from_html(path, selector=..., strip_tags=...)` — optional `beautifulsoup4` for CSS selectors, regex fallback otherwise. +- `DocumentLoader.from_url(url, selector=..., headers=..., timeout=...)` — fetches via stdlib `urllib.request` and delegates to `from_html`. + +#### Toolbox +- **Code execution** (`selectools.toolbox.code_tools`): `execute_python(code, timeout)` and `execute_shell(command, timeout)`. Subprocess-isolated, 10 KB output truncation, shell metacharacter blocklist for SSRF/injection mitigation. +- **Web search** (`selectools.toolbox.search_tools`): `web_search(query, num_results)` via DuckDuckGo HTML (no API key) and `scrape_url(url, selector)` with SSRF guards. +- **GitHub** (`selectools.toolbox.github_tools`): `github_search_repos`, `github_get_file`, `github_list_issues` against GitHub REST API v3. Uses `GITHUB_TOKEN` env var when present (5000 req/hr vs 60). +- **Database** (`selectools.toolbox.db_tools`): `query_sqlite` with `PRAGMA query_only = ON`, `query_postgres` via psycopg2. Read-only enforcement at the validator level. + +#### Multimodal Messages +- `ContentPart` dataclass for multipart messages (`text`, `image_url`, `image_base64`, `audio`). +- `Message.content` now accepts `str | list[ContentPart]`. Existing `content: str` paths unchanged (backward compatible). +- `image_message(image, prompt)` and `text_content(message)` helpers exported from package root. +- All four providers (OpenAI, Anthropic, Gemini, Ollama) format multimodal content into their native shape. + +#### Observability +- **`OTelObserver`** (`selectools.observe.OTelObserver`): maps the 45 selectools observer events to OpenTelemetry spans following the GenAI semantic conventions. Async variant `AsyncOTelObserver` for `arun()`/`astream()`. Optional dep: `opentelemetry-api>=1.20.0`. +- **`LangfuseObserver`** (`selectools.observe.LangfuseObserver`): sends traces, generations, and spans to Langfuse Cloud or self-hosted instances. Reads `LANGFUSE_PUBLIC_KEY`/`LANGFUSE_SECRET_KEY`/`LANGFUSE_HOST` env vars. Optional dep: `langfuse>=2.0.0`. + +#### Providers +- **`AzureOpenAIProvider`** (`selectools.AzureOpenAIProvider`): wraps the OpenAI SDK's `AzureOpenAI` client. Supports `AZURE_OPENAI_ENDPOINT`/`AZURE_OPENAI_API_KEY` env vars, AAD token auth, and Azure deployment-name to model-id mapping. Inherits all behavior from `OpenAIProvider`. + +#### Optional Dependencies +- New `[observe]` extras group: `opentelemetry-api>=1.20.0`, `langfuse>=2.0.0`. +- `[rag]` extras now also include: `qdrant-client>=1.7.0`, `faiss-cpu>=1.7.0`, `beautifulsoup4>=4.12.0`. + +### Changed +- `stability.beta()` and `stability.stable()` decorators now accept arbitrary objects via an `Any` overload, in addition to classes and callables. Lets `@beta` mark `Tool` instances produced by `@tool()`. + +### Stats +- **4,960 tests** (188 new across 7 spec subsystems) +- **88 examples** (12 new: `77_faiss_vector_store.py` through `88_langfuse_observer.py`) +- **5 providers** (added Azure OpenAI) +- **7 vector stores** (added FAISS, Qdrant, pgvector) +- **152 models** + ## [0.20.1] - 2026-04-03 ### Added diff --git a/docs/llms-full.txt b/docs/llms-full.txt index c231783..56acba4 100644 --- a/docs/llms-full.txt +++ b/docs/llms-full.txt @@ -2,7 +2,7 @@ > This file concatenates all selectools documentation pages for AI agent consumption. -> 32 pages included. Generated from docs/ source files. +> 39 pages included. Generated from docs/ source files. @@ -18097,3 +18097,957 @@ results = searcher.search("refund policy", top_k=10) - **LlamaParse** for complex document parsing (tables, PDFs) If your primary need is sophisticated document retrieval with many data sources, LlamaIndex is purpose-built for that. If you need agents + RAG + evals + deployment in one package, selectools combines all of these. + +============================================================ + +## FILE: docs/modules/FAISS.md + +============================================================ + +--- +description: "In-process FAISS vector index for fast local similarity search with disk persistence" +tags: + - rag + - vector-stores + - faiss +--- + +# FAISS Vector Store + +**Import:** `from selectools.rag.stores import FAISSVectorStore` +**Stability:** beta +**Added in:** v0.21.0 + +`FAISSVectorStore` wraps Facebook AI's FAISS library to provide a fast, in-process +vector index that lives entirely in memory but can be persisted to disk. It's ideal +when you want zero-server RAG with millions of vectors and have plenty of RAM. + +```python title="faiss_quick.py" +from selectools.embeddings import OpenAIEmbedder +from selectools.rag import Document +from selectools.rag.stores import FAISSVectorStore + +store = FAISSVectorStore(embedder=OpenAIEmbedder()) +store.add_documents([ + Document(text="Selectools is a Python AI agent framework."), + Document(text="FAISS does fast similarity search."), +]) + +results = store.search("agent framework", top_k=2) +for r in results: + print(r.score, r.document.text) + +store.save("faiss_index") # writes index + documents +``` + +!!! tip "See Also" + - [Qdrant](QDRANT.md) - Self-hosted vector store with REST + gRPC + - [pgvector](PGVECTOR.md) - PostgreSQL-backed vector store + - [RAG](RAG.md) - High-level retrieval pipeline + +--- + +## Install + +```bash +pip install "selectools[rag]" +``` + +`faiss-cpu>=1.7.0` is part of the `[rag]` optional extras. If you want GPU acceleration, +install `faiss-gpu` separately. + +--- + +## Constructor + +```python +FAISSVectorStore( + embedder: EmbeddingProvider | None = None, + dimension: int | None = None, +) +``` + +| Parameter | Description | +|---|---| +| `embedder` | Any `selectools.embeddings.EmbeddingProvider`. May be `None` when loading a persisted index that already contains pre-computed vectors. | +| `dimension` | Vector dimension. If `None`, inferred from the first batch of `add_documents()`. | + +--- + +## Persistence + +```python +store.save("path/to/index") # writes index file + sidecar JSON for documents +loaded = FAISSVectorStore.load("path/to/index", embedder=OpenAIEmbedder()) +``` + +`save()` persists both the FAISS index and the parallel `Document` list so search +results can return original text/metadata after reload. + +--- + +## Thread Safety + +FAISS itself is not thread-safe for writes. `FAISSVectorStore` wraps every mutation +in a `threading.Lock`, so concurrent `add_documents()` and `search()` calls from +multiple agent threads are safe. + +--- + +## API Reference + +| Method | Description | +|---|---| +| `add_documents(docs)` | Embed and add documents to the index | +| `search(query, top_k)` | Cosine similarity search; returns `List[SearchResult]` | +| `delete(ids)` | Remove documents by ID | +| `clear()` | Wipe the index | +| `save(path)` | Persist index + documents to disk | +| `load(path, embedder)` | Class method: rehydrate a persisted store | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 77 | [`77_faiss_vector_store.py`](https://github.com/johnnichev/selectools/blob/main/examples/77_faiss_vector_store.py) | FAISS quickstart with embeddings + persistence | + + +============================================================ + +## FILE: docs/modules/QDRANT.md + +============================================================ + +--- +description: "Connector for the Qdrant vector database with REST + gRPC support and payload filtering" +tags: + - rag + - vector-stores + - qdrant +--- + +# Qdrant Vector Store + +**Import:** `from selectools.rag.stores import QdrantVectorStore` +**Stability:** beta +**Added in:** v0.21.0 + +`QdrantVectorStore` wraps the official `qdrant-client` to give you a self-hosted or +Qdrant Cloud-backed vector store. It auto-creates collections, supports cosine +similarity by default, and lets you filter searches on metadata via Qdrant's payload +indexing. + +```python title="qdrant_quick.py" +from selectools.embeddings import OpenAIEmbedder +from selectools.rag import Document +from selectools.rag.stores import QdrantVectorStore + +store = QdrantVectorStore( + embedder=OpenAIEmbedder(), + collection_name="my_docs", + url="http://localhost:6333", +) + +store.add_documents([ + Document(text="Qdrant is a vector search engine.", metadata={"category": "infra"}), + Document(text="It supports REST and gRPC.", metadata={"category": "infra"}), +]) + +results = store.search("vector search", top_k=2) +``` + +!!! tip "See Also" + - [FAISS](FAISS.md) - In-process vector index, no server required + - [pgvector](PGVECTOR.md) - PostgreSQL-backed vector store + - [RAG](RAG.md) - Higher-level retrieval pipeline + +--- + +## Install + +```bash +pip install "selectools[rag]" +``` + +`qdrant-client>=1.7.0` is part of the `[rag]` extras. + +You also need a running Qdrant instance. The simplest way: + +```bash +docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant +``` + +Or sign up for [Qdrant Cloud](https://cloud.qdrant.io/) and get a managed instance. + +--- + +## Constructor + +```python +QdrantVectorStore( + embedder: EmbeddingProvider, + collection_name: str = "selectools", + url: str = "http://localhost:6333", + api_key: str | None = None, + prefer_grpc: bool = True, + **qdrant_kwargs, +) +``` + +| Parameter | Description | +|---|---| +| `embedder` | Any `EmbeddingProvider`. Used to compute vectors for both `add_documents()` and `search()`. | +| `collection_name` | Qdrant collection. Auto-created on first `add_documents()` if it doesn't exist. | +| `url` | Qdrant server URL. Use `https://...` for cloud. | +| `api_key` | Optional API key for Qdrant Cloud or authenticated servers. | +| `prefer_grpc` | When `True` (default) the client uses gRPC for lower-latency vector ops. | +| `**qdrant_kwargs` | Additional arguments forwarded to `qdrant_client.QdrantClient`. | + +--- + +## Cloud Configuration + +```python +import os + +store = QdrantVectorStore( + embedder=OpenAIEmbedder(), + collection_name="prod_docs", + url="https://my-cluster.qdrant.io", + api_key=os.environ["QDRANT_API_KEY"], +) +``` + +--- + +## Metadata Filtering + +Document metadata is stored as Qdrant payload, so you can filter searches at the +database level. Use `qdrant_client.models.Filter` constructs and pass them via +`**search_kwargs` (the store forwards them to the underlying client). + +--- + +## API Reference + +| Method | Description | +|---|---| +| `add_documents(docs)` | Embed documents and upsert into the collection | +| `search(query, top_k)` | Cosine similarity search | +| `delete(ids)` | Delete documents by ID | +| `clear()` | Delete the entire collection | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 78 | [`78_qdrant_vector_store.py`](https://github.com/johnnichev/selectools/blob/main/examples/78_qdrant_vector_store.py) | Qdrant quickstart with metadata filtering | + + +============================================================ + +## FILE: docs/modules/PGVECTOR.md + +============================================================ + +--- +description: "PostgreSQL-backed vector store using the pgvector extension" +tags: + - rag + - vector-stores + - postgres + - pgvector +--- + +# pgvector Store + +**Import:** `from selectools.rag.stores import PgVectorStore` +**Stability:** beta +**Added in:** v0.21.0 + +`PgVectorStore` lets you store and search document embeddings inside a PostgreSQL +database using the [pgvector](https://github.com/pgvector/pgvector) extension. It's +the right choice when you already run Postgres and want vectors next to the rest of +your application data without standing up a separate vector service. + +```python title="pgvector_quick.py" +from selectools.embeddings import OpenAIEmbedder +from selectools.rag import Document +from selectools.rag.stores import PgVectorStore + +store = PgVectorStore( + embedder=OpenAIEmbedder(), + connection_string="postgresql://user:pass@localhost:5432/mydb", + table_name="selectools_documents", +) + +store.add_documents([ + Document(text="pgvector adds vector types to Postgres."), + Document(text="It supports cosine, L2, and inner-product distance."), +]) + +results = store.search("postgres vector search", top_k=2) +``` + +!!! tip "See Also" + - [Qdrant](QDRANT.md) - Self-hosted vector database with REST + gRPC + - [FAISS](FAISS.md) - In-process vector index, no server required + - [Sessions](SESSIONS.md) - Postgres-backed agent sessions + +--- + +## Install + +```bash +pip install "selectools[postgres]" +``` + +The `[postgres]` extras already include `psycopg2-binary>=2.9.0`. You also need +the pgvector extension installed in your database: + +```sql +CREATE EXTENSION IF NOT EXISTS vector; +``` + +--- + +## Constructor + +```python +PgVectorStore( + embedder: EmbeddingProvider, + connection_string: str, + table_name: str = "selectools_documents", + dimensions: int | None = None, +) +``` + +| Parameter | Description | +|---|---| +| `embedder` | Embedding provider used to compute vectors. | +| `connection_string` | Standard libpq connection string. | +| `table_name` | Table to store documents in. Validated as a SQL identifier (letters, digits, underscores) to prevent injection. | +| `dimensions` | Vector dimensions. Auto-detected from `embedder.embed_query("test")` on first use if not specified. | + +--- + +## Schema + +`PgVectorStore` creates the following table on first use (idempotent): + +```sql +CREATE TABLE IF NOT EXISTS selectools_documents ( + id TEXT PRIMARY KEY, + text TEXT NOT NULL, + metadata JSONB, + embedding vector(N) +); +``` + +The `N` is the embedding dimension. An index on the `embedding` column accelerates +cosine similarity queries. + +--- + +## Search + +`search()` runs a parameterized query using pgvector's `<=>` cosine distance +operator: + +```sql +SELECT id, text, metadata, embedding <=> %s AS distance +FROM selectools_documents +ORDER BY distance ASC +LIMIT %s; +``` + +All queries are parameterized — there's no SQL injection risk from user input. + +--- + +## Connection Pooling + +`PgVectorStore` opens a single `psycopg2.connect()` per instance. If you need +pooling for high concurrency, manage it externally (e.g. PgBouncer) and pass the +pooler URL as the connection string. + +--- + +## API Reference + +| Method | Description | +|---|---| +| `add_documents(docs)` | Embed and upsert documents (`INSERT ... ON CONFLICT DO UPDATE`) | +| `search(query, top_k)` | Cosine similarity search | +| `delete(ids)` | Delete documents by ID | +| `clear()` | `TRUNCATE` the table | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 79 | [`79_pgvector_store.py`](https://github.com/johnnichev/selectools/blob/main/examples/79_pgvector_store.py) | pgvector quickstart with auto-table creation | + + +============================================================ + +## FILE: docs/modules/MULTIMODAL.md + +============================================================ + +--- +description: "Multimodal messages — pass images and other content parts to vision-capable LLMs" +tags: + - core + - messages + - multimodal + - vision +--- + +# Multimodal Messages + +**Import:** `from selectools import ContentPart, image_message, Message` +**Stability:** beta +**Added in:** v0.21.0 + +`Message.content` now accepts a list of `ContentPart` objects in addition to a plain +string. This unlocks vision and other multimodal inputs across every provider that +supports them: GPT-4o, Claude 3.5/3.7, Gemini, and Ollama vision models. + +```python title="multimodal_quick.py" +from selectools import Agent, OpenAIProvider, image_message + +agent = Agent(provider=OpenAIProvider(model="gpt-4o")) + +# Helper for the common "image + prompt" case +result = agent.run([ + image_message("https://example.com/diagram.png", "What does this diagram show?") +]) +print(result.content) +``` + +!!! tip "See Also" + - [Providers](PROVIDERS.md) - Which providers support multimodal input + - [Models](MODELS.md) - Vision-capable model identifiers + +--- + +## ContentPart Anatomy + +```python +from selectools import ContentPart, Message, Role + +msg = Message( + role=Role.USER, + content=[ + ContentPart(type="text", text="Compare these two screenshots."), + ContentPart(type="image_url", image_url="https://example.com/before.png"), + ContentPart(type="image_url", image_url="https://example.com/after.png"), + ], +) +``` + +| Field | Used when | +|---|---| +| `type` | One of `"text"`, `"image_url"`, `"image_base64"`, `"audio"` | +| `text` | Set when `type == "text"` | +| `image_url` | Public URL for an image (most providers) | +| `image_base64` | Inline base64 payload for an image | +| `media_type` | MIME type, e.g. `"image/png"` or `"audio/wav"` | + +--- + +## Helper: `image_message` + +For the common "single image + prompt" case, use the `image_message` helper: + +```python +from selectools import image_message + +# From a URL +msg = image_message("https://example.com/photo.jpg", "Describe what you see.") + +# From a local file path (auto-encoded as base64) +msg = image_message("./screenshots/error.png", "What's the error in this UI?") +``` + +The helper detects whether the input is a URL or a local path and chooses the +right `ContentPart.type` (`image_url` vs `image_base64`). + +--- + +## Provider Compatibility + +| Provider | Format used internally | +|---|---| +| OpenAI | `[{"type": "text", ...}, {"type": "image_url", "image_url": {"url": ...}}]` | +| Anthropic | `[{"type": "text", ...}, {"type": "image", "source": {"type": "base64", ...}}]` | +| Gemini | `types.Part` objects with `inline_data` | +| Ollama | `images` parameter (list of base64 strings) | + +You don't need to format any of this yourself — selectools handles the conversion +in each provider's `_format_messages()`. + +--- + +## Backward Compatibility + +`Message(role=..., content="plain text")` continues to work everywhere. The +`list[ContentPart]` path is opt-in and existing code is unaffected. + +```python +# Still works exactly as before +msg = Message(role=Role.USER, content="What is 2 + 2?") +``` + +--- + +## API Reference + +| Symbol | Description | +|---|---| +| `ContentPart` | Dataclass for a single part of a multimodal message | +| `Message.content` | Now `str \| list[ContentPart]` | +| `image_message(image, prompt)` | Convenience constructor for image + text | +| `text_content(message)` | Extract concatenated text from a (possibly multimodal) Message | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 81 | [`81_multimodal_messages.py`](https://github.com/johnnichev/selectools/blob/main/examples/81_multimodal_messages.py) | Image input with `image_message` and raw `ContentPart` | + + +============================================================ + +## FILE: docs/modules/OTEL.md + +============================================================ + +--- +description: "OpenTelemetry observer — emit GenAI semantic-convention spans for agent runs, LLM calls, and tool executions" +tags: + - observability + - opentelemetry + - tracing +--- + +# OpenTelemetry Observer + +**Import:** `from selectools.observe import OTelObserver` +**Stability:** beta +**Added in:** v0.21.0 + +`OTelObserver` maps the 45 selectools observer events to OpenTelemetry spans, +following the [OpenTelemetry GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/). +Once attached, every agent run, LLM call, and tool execution becomes a span you +can ship to Jaeger, Tempo, Honeycomb, Datadog, Grafana, or any other OTLP-capable +backend. + +```python title="otel_quick.py" +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter + +from selectools import Agent, AgentConfig, OpenAIProvider, tool +from selectools.observe import OTelObserver + +# 1. Configure your OTel SDK once at process start +trace.set_tracer_provider(TracerProvider()) +trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(ConsoleSpanExporter())) + +# 2. Attach the observer +@tool() +def search(query: str) -> str: + return f"Results for {query}" + +agent = Agent( + tools=[search], + provider=OpenAIProvider(), + config=AgentConfig(observers=[OTelObserver()]), +) + +result = agent.run("Find articles about Python") +# Spans now flow to your OTel exporter +``` + +!!! tip "See Also" + - [Langfuse](LANGFUSE.md) - Alternative observer focused on LLM tracing + - [Trace Store](TRACE_STORE.md) - Persist agent traces to disk or SQLite + - [Audit](AUDIT.md) - JSONL audit logs + +--- + +## Install + +```bash +pip install "selectools[observe]" +``` + +The `[observe]` extras include `opentelemetry-api>=1.20.0`. **selectools does not +ship `opentelemetry-sdk` or any exporters** — bring your own. Common choices: + +```bash +pip install opentelemetry-sdk opentelemetry-exporter-otlp # OTLP +pip install opentelemetry-sdk opentelemetry-exporter-jaeger # Jaeger +``` + +This separation lets you reuse whatever exporter the rest of your stack already +uses without selectools pinning a transitive dependency. + +--- + +## Span Hierarchy + +Each agent run becomes a span tree: + +``` +agent.run ← root span +├── gen_ai.llm.call ← per LLM round-trip +│ └── gen_ai.tool.execution ← per tool call +├── gen_ai.llm.call +└── ... +``` + +| Span name | Attributes | +|---|---| +| `agent.run` | `gen_ai.system="selectools"`, `gen_ai.usage.total_tokens`, `gen_ai.usage.cost_usd` | +| `gen_ai.llm.call` | `gen_ai.request.model`, `gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens` | +| `gen_ai.tool.execution` | `gen_ai.tool.name`, `gen_ai.tool.duration_ms`, `gen_ai.tool.success` | + +--- + +## Constructor + +```python +OTelObserver(tracer_name: str = "selectools") +``` + +| Parameter | Description | +|---|---| +| `tracer_name` | Name passed to `trace.get_tracer()`. Use this to scope spans by service in multi-app processes. | + +--- + +## Async + +For `agent.arun()` / `agent.astream()` use the async variant: + +```python +from selectools.observe.otel import AsyncOTelObserver +agent = Agent(..., config=AgentConfig(observers=[AsyncOTelObserver()])) +``` + +--- + +## API Reference + +| Symbol | Description | +|---|---| +| `OTelObserver(tracer_name)` | Sync observer for `agent.run()` / `agent.stream()` | +| `AsyncOTelObserver(tracer_name)` | Async observer for `agent.arun()` / `agent.astream()` | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 87 | [`87_otel_observer.py`](https://github.com/johnnichev/selectools/blob/main/examples/87_otel_observer.py) | Wire selectools traces into an OTLP exporter | + + +============================================================ + +## FILE: docs/modules/AZURE_OPENAI.md + +============================================================ + +--- +description: "Azure OpenAI Service provider — use selectools agents with Azure-deployed GPT-4 / GPT-4o models" +tags: + - providers + - azure + - openai +--- + +# Azure OpenAI Provider + +**Import:** `from selectools import AzureOpenAIProvider` +**Stability:** beta +**Added in:** v0.21.0 + +`AzureOpenAIProvider` lets selectools talk to OpenAI models deployed on Azure +OpenAI Service. It extends `OpenAIProvider` and uses the OpenAI SDK's built-in +`AzureOpenAI` client, so you get every feature of the regular OpenAI provider +(streaming, tool calling, structured output, multimodal) without having to +maintain a separate code path. + +```python title="azure_openai_quick.py" +from selectools import Agent, AzureOpenAIProvider, tool + +@tool() +def get_time() -> str: + """Return the current time.""" + from datetime import datetime + return datetime.utcnow().isoformat() + +provider = AzureOpenAIProvider( + azure_endpoint="https://my-resource.openai.azure.com", + api_key="", + azure_deployment="gpt-4o", # your Azure deployment name +) + +agent = Agent(tools=[get_time], provider=provider) +print(agent.run("What time is it?").content) +``` + +!!! tip "See Also" + - [Providers](PROVIDERS.md) - All available LLM providers + - [Fallback Provider](PROVIDERS.md#fallback) - Use Azure as a fallback for the public OpenAI API + +--- + +## Install + +No new dependencies. Azure support uses the same `openai>=1.30.0` package that +ships as a core selectools dependency. + +```bash +pip install selectools # Azure already supported +``` + +--- + +## Constructor + +```python +AzureOpenAIProvider( + azure_endpoint: str | None = None, + api_key: str | None = None, + api_version: str = "2024-10-21", + azure_deployment: str | None = None, + azure_ad_token: str | None = None, +) +``` + +| Parameter | Description | +|---|---| +| `azure_endpoint` | Azure resource endpoint (`https://.openai.azure.com`). Falls back to `AZURE_OPENAI_ENDPOINT` env var. | +| `api_key` | Azure API key. Falls back to `AZURE_OPENAI_API_KEY`. Optional when `azure_ad_token` is set. | +| `api_version` | Azure OpenAI API version string. Defaults to a recent stable release. | +| `azure_deployment` | The deployment name to use as the default model (Azure uses deployment names, not OpenAI model IDs). Falls back to `AZURE_OPENAI_DEPLOYMENT`. | +| `azure_ad_token` | An Azure Active Directory token for AAD-based auth. When set, `api_key` is not required. | + +--- + +## Environment Variables + +`AzureOpenAIProvider()` with no arguments works if you set the standard Azure +env vars: + +```bash +export AZURE_OPENAI_ENDPOINT="https://my-resource.openai.azure.com" +export AZURE_OPENAI_API_KEY="..." +export AZURE_OPENAI_DEPLOYMENT="gpt-4o" +``` + +```python +provider = AzureOpenAIProvider() # Reads everything from env +``` + +--- + +## Azure Deployments vs Model IDs + +In the public OpenAI API you pass model IDs like `"gpt-4o"`. In Azure OpenAI you +pass **deployment names** that you create in the Azure Portal. selectools maps +the `azure_deployment` parameter to the `model` argument internally, so the rest +of your agent code is unchanged: + +```python +# Same Agent code, swappable providers +agent = Agent(provider=OpenAIProvider(model="gpt-4o")) # Public OpenAI +agent = Agent(provider=AzureOpenAIProvider(azure_deployment="gpt-4o")) # Azure +``` + +--- + +## AAD Token Auth + +For enterprise deployments using Azure Active Directory: + +```python +from azure.identity import DefaultAzureCredential + +credential = DefaultAzureCredential() +token = credential.get_token("https://cognitiveservices.azure.com/.default").token + +provider = AzureOpenAIProvider( + azure_endpoint="https://my-resource.openai.azure.com", + azure_deployment="gpt-4o", + azure_ad_token=token, +) +``` + +--- + +## Inheritance + +`AzureOpenAIProvider` extends `OpenAIProvider`, so it inherits everything: + +- `complete()` / `acomplete()` +- `stream()` / `astream()` +- Tool calling, structured output, multimodal messages +- Token usage and cost tracking via `selectools.pricing` + +Only `__init__` is overridden — to use the `AzureOpenAI` client class instead of +the regular `OpenAI` one. + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 86 | [`86_azure_openai.py`](https://github.com/johnnichev/selectools/blob/main/examples/86_azure_openai.py) | Azure OpenAI agent with deployment-name routing | + + +============================================================ + +## FILE: docs/modules/LANGFUSE.md + +============================================================ + +--- +description: "Langfuse observer — send agent traces, generations, and spans to Langfuse Cloud or self-hosted" +tags: + - observability + - langfuse + - tracing +--- + +# Langfuse Observer + +**Import:** `from selectools.observe import LangfuseObserver` +**Stability:** beta +**Added in:** v0.21.0 + +`LangfuseObserver` ships selectools traces to [Langfuse](https://langfuse.com), an +open-source LLM observability platform. Each agent run becomes a Langfuse trace, +each LLM call becomes a generation (with input/output/tokens/cost), and each tool +call becomes a span. Works with both Langfuse Cloud and self-hosted instances. + +```python title="langfuse_quick.py" +import os +from selectools import Agent, AgentConfig, OpenAIProvider, tool +from selectools.observe import LangfuseObserver + +os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..." +os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..." +# os.environ["LANGFUSE_HOST"] = "https://my-langfuse.example.com" # self-hosted + +@tool() +def search(query: str) -> str: + return f"Results for {query}" + +agent = Agent( + tools=[search], + provider=OpenAIProvider(), + config=AgentConfig(observers=[LangfuseObserver()]), +) + +result = agent.run("Find articles about Python") +# View the trace in your Langfuse dashboard +``` + +!!! tip "See Also" + - [OpenTelemetry](OTEL.md) - Alternative observer for OTLP backends + - [Trace Store](TRACE_STORE.md) - Persist traces locally as JSONL or SQLite + +--- + +## Install + +```bash +pip install "selectools[observe]" +``` + +The `[observe]` extras include `langfuse>=2.0.0`. + +--- + +## Constructor + +```python +LangfuseObserver( + public_key: str | None = None, + secret_key: str | None = None, + host: str | None = None, +) +``` + +| Parameter | Description | +|---|---| +| `public_key` | Langfuse public key. Falls back to `LANGFUSE_PUBLIC_KEY` env var. | +| `secret_key` | Langfuse secret key. Falls back to `LANGFUSE_SECRET_KEY` env var. | +| `host` | Langfuse host URL. Defaults to Langfuse Cloud. Set this to point at a self-hosted instance. Falls back to `LANGFUSE_HOST` env var. | + +The observer auto-flushes after every `run_end`, so traces are visible in your +Langfuse dashboard within seconds of an agent finishing. + +--- + +## What Gets Recorded + +| Selectools event | Langfuse object | Fields | +|---|---|---| +| `on_run_start` | Trace | `id=run_id`, `name="agent.run"`, input messages | +| `on_llm_start` | Generation | `model`, `input` (messages) | +| `on_llm_end` | Generation update | `output`, `usage.input/output/total`, `cost_usd` | +| `on_tool_start` | Span | `name=tool_name`, `input=tool_args` | +| `on_tool_end` | Span update | `output`, `duration_ms` | +| `on_run_end` | Trace update | `output`, total tokens, total cost | + +--- + +## Self-Hosted Langfuse + +```python +observer = LangfuseObserver( + public_key="pk-lf-local-...", + secret_key="sk-lf-local-...", + host="https://langfuse.internal.example.com", +) +``` + +Or via env vars: + +```bash +export LANGFUSE_PUBLIC_KEY="pk-lf-..." +export LANGFUSE_SECRET_KEY="sk-lf-..." +export LANGFUSE_HOST="https://langfuse.internal.example.com" +``` + +--- + +## API Reference + +| Symbol | Description | +|---|---| +| `LangfuseObserver(public_key, secret_key, host)` | Observer for `agent.run()` / `agent.stream()` | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 88 | [`88_langfuse_observer.py`](https://github.com/johnnichev/selectools/blob/main/examples/88_langfuse_observer.py) | Langfuse trace + generation + span hierarchy | diff --git a/docs/llms.txt b/docs/llms.txt index 8ed9a67..3db0c64 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -1,6 +1,6 @@ # Selectools -> Selectools is a production-ready Python library for building AI agents with tool calling, RAG, and multi-agent orchestration. One pip install. No DSL. Supports OpenAI, Anthropic, Gemini, Ollama. v0.20.1, 4612 tests at 95% coverage, Apache-2.0. +> Selectools is a production-ready Python library for building AI agents with tool calling, RAG, and multi-agent orchestration. One pip install. No DSL. Supports OpenAI, Azure OpenAI, Anthropic, Gemini, Ollama. v0.21.0, 4960 tests at 95% coverage, Apache-2.0. Selectools uses a single `Agent` class with native tool calling. No chains, no expression language, no complex abstractions. It includes built-in features that other frameworks charge for or split into separate packages: 50 evaluators, hybrid RAG search (BM25 + vector), guardrails, audit logging, multi-agent orchestration, and a visual drag-drop builder. Free, local, MIT-compatible. @@ -76,9 +76,13 @@ result = agent.run("Find our refund policy") - [Reasoning Strategies](https://selectools.dev/modules/REASONING_STRATEGIES/): ReAct, CoT, Plan-Then-Act - [Builder Docs](https://selectools.dev/modules/builder/): Visual builder reference - [Templates](https://selectools.dev/modules/TEMPLATES/): YAML agent configuration -- [OTel Observer](https://selectools.dev/modules/PROVIDERS/#observability-integrations-v0210): OpenTelemetry agent trace export -- [Langfuse Observer](https://selectools.dev/modules/PROVIDERS/#langfuseobserver): Langfuse agent trace export -- [Multimodal Messages](https://selectools.dev/modules/STREAMING/#multimodal-messages-v0210): ContentPart, image_message(), text_content() +- [FAISS](https://selectools.dev/modules/FAISS/): In-process FAISS vector index with disk persistence (v0.21.0) +- [Qdrant](https://selectools.dev/modules/QDRANT/): Qdrant vector database connector with REST + gRPC (v0.21.0) +- [pgvector](https://selectools.dev/modules/PGVECTOR/): PostgreSQL-backed vector store using the pgvector extension (v0.21.0) +- [Azure OpenAI](https://selectools.dev/modules/AZURE_OPENAI/): Azure OpenAI Service provider with AAD auth and deployment routing (v0.21.0) +- [OpenTelemetry](https://selectools.dev/modules/OTEL/): GenAI semantic-convention spans for agent runs, LLM calls, tool executions (v0.21.0) +- [Langfuse](https://selectools.dev/modules/LANGFUSE/): Send traces, generations, and spans to Langfuse Cloud or self-hosted (v0.21.0) +- [Multimodal Messages](https://selectools.dev/modules/MULTIMODAL/): ContentPart, image_message(), text_content() (v0.21.0) - [Stability Markers](https://selectools.dev/modules/STABILITY/): @stable, @beta, @deprecated - [Changelog](https://selectools.dev/CHANGELOG/): Release history - [Examples Gallery](https://selectools.dev/examples/): 88 runnable scripts with categories diff --git a/docs/modules/AZURE_OPENAI.md b/docs/modules/AZURE_OPENAI.md new file mode 100644 index 0000000..043594f --- /dev/null +++ b/docs/modules/AZURE_OPENAI.md @@ -0,0 +1,148 @@ +--- +description: "Azure OpenAI Service provider — use selectools agents with Azure-deployed GPT-4 / GPT-4o models" +tags: + - providers + - azure + - openai +--- + +# Azure OpenAI Provider + +**Import:** `from selectools import AzureOpenAIProvider` +**Stability:** beta +**Added in:** v0.21.0 + +`AzureOpenAIProvider` lets selectools talk to OpenAI models deployed on Azure +OpenAI Service. It extends `OpenAIProvider` and uses the OpenAI SDK's built-in +`AzureOpenAI` client, so you get every feature of the regular OpenAI provider +(streaming, tool calling, structured output, multimodal) without having to +maintain a separate code path. + +```python title="azure_openai_quick.py" +from selectools import Agent, AzureOpenAIProvider, tool + +@tool() +def get_time() -> str: + """Return the current time.""" + from datetime import datetime + return datetime.utcnow().isoformat() + +provider = AzureOpenAIProvider( + azure_endpoint="https://my-resource.openai.azure.com", + api_key="", + azure_deployment="gpt-4o", # your Azure deployment name +) + +agent = Agent(tools=[get_time], provider=provider) +print(agent.run("What time is it?").content) +``` + +!!! tip "See Also" + - [Providers](PROVIDERS.md) - All available LLM providers + - [Fallback Provider](PROVIDERS.md#fallback) - Use Azure as a fallback for the public OpenAI API + +--- + +## Install + +No new dependencies. Azure support uses the same `openai>=1.30.0` package that +ships as a core selectools dependency. + +```bash +pip install selectools # Azure already supported +``` + +--- + +## Constructor + +```python +AzureOpenAIProvider( + azure_endpoint: str | None = None, + api_key: str | None = None, + api_version: str = "2024-10-21", + azure_deployment: str | None = None, + azure_ad_token: str | None = None, +) +``` + +| Parameter | Description | +|---|---| +| `azure_endpoint` | Azure resource endpoint (`https://.openai.azure.com`). Falls back to `AZURE_OPENAI_ENDPOINT` env var. | +| `api_key` | Azure API key. Falls back to `AZURE_OPENAI_API_KEY`. Optional when `azure_ad_token` is set. | +| `api_version` | Azure OpenAI API version string. Defaults to a recent stable release. | +| `azure_deployment` | The deployment name to use as the default model (Azure uses deployment names, not OpenAI model IDs). Falls back to `AZURE_OPENAI_DEPLOYMENT`. | +| `azure_ad_token` | An Azure Active Directory token for AAD-based auth. When set, `api_key` is not required. | + +--- + +## Environment Variables + +`AzureOpenAIProvider()` with no arguments works if you set the standard Azure +env vars: + +```bash +export AZURE_OPENAI_ENDPOINT="https://my-resource.openai.azure.com" +export AZURE_OPENAI_API_KEY="..." +export AZURE_OPENAI_DEPLOYMENT="gpt-4o" +``` + +```python +provider = AzureOpenAIProvider() # Reads everything from env +``` + +--- + +## Azure Deployments vs Model IDs + +In the public OpenAI API you pass model IDs like `"gpt-4o"`. In Azure OpenAI you +pass **deployment names** that you create in the Azure Portal. selectools maps +the `azure_deployment` parameter to the `model` argument internally, so the rest +of your agent code is unchanged: + +```python +# Same Agent code, swappable providers +agent = Agent(provider=OpenAIProvider(model="gpt-4o")) # Public OpenAI +agent = Agent(provider=AzureOpenAIProvider(azure_deployment="gpt-4o")) # Azure +``` + +--- + +## AAD Token Auth + +For enterprise deployments using Azure Active Directory: + +```python +from azure.identity import DefaultAzureCredential + +credential = DefaultAzureCredential() +token = credential.get_token("https://cognitiveservices.azure.com/.default").token + +provider = AzureOpenAIProvider( + azure_endpoint="https://my-resource.openai.azure.com", + azure_deployment="gpt-4o", + azure_ad_token=token, +) +``` + +--- + +## Inheritance + +`AzureOpenAIProvider` extends `OpenAIProvider`, so it inherits everything: + +- `complete()` / `acomplete()` +- `stream()` / `astream()` +- Tool calling, structured output, multimodal messages +- Token usage and cost tracking via `selectools.pricing` + +Only `__init__` is overridden — to use the `AzureOpenAI` client class instead of +the regular `OpenAI` one. + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 86 | [`86_azure_openai.py`](https://github.com/johnnichev/selectools/blob/main/examples/86_azure_openai.py) | Azure OpenAI agent with deployment-name routing | diff --git a/docs/modules/FAISS.md b/docs/modules/FAISS.md new file mode 100644 index 0000000..6e2326f --- /dev/null +++ b/docs/modules/FAISS.md @@ -0,0 +1,108 @@ +--- +description: "In-process FAISS vector index for fast local similarity search with disk persistence" +tags: + - rag + - vector-stores + - faiss +--- + +# FAISS Vector Store + +**Import:** `from selectools.rag.stores import FAISSVectorStore` +**Stability:** beta +**Added in:** v0.21.0 + +`FAISSVectorStore` wraps Facebook AI's FAISS library to provide a fast, in-process +vector index that lives entirely in memory but can be persisted to disk. It's ideal +when you want zero-server RAG with millions of vectors and have plenty of RAM. + +```python title="faiss_quick.py" +from selectools.embeddings import OpenAIEmbedder +from selectools.rag import Document +from selectools.rag.stores import FAISSVectorStore + +store = FAISSVectorStore(embedder=OpenAIEmbedder()) +store.add_documents([ + Document(text="Selectools is a Python AI agent framework."), + Document(text="FAISS does fast similarity search."), +]) + +results = store.search("agent framework", top_k=2) +for r in results: + print(r.score, r.document.text) + +store.save("faiss_index") # writes index + documents +``` + +!!! tip "See Also" + - [Qdrant](QDRANT.md) - Self-hosted vector store with REST + gRPC + - [pgvector](PGVECTOR.md) - PostgreSQL-backed vector store + - [RAG](RAG.md) - High-level retrieval pipeline + +--- + +## Install + +```bash +pip install "selectools[rag]" +``` + +`faiss-cpu>=1.7.0` is part of the `[rag]` optional extras. If you want GPU acceleration, +install `faiss-gpu` separately. + +--- + +## Constructor + +```python +FAISSVectorStore( + embedder: EmbeddingProvider | None = None, + dimension: int | None = None, +) +``` + +| Parameter | Description | +|---|---| +| `embedder` | Any `selectools.embeddings.EmbeddingProvider`. May be `None` when loading a persisted index that already contains pre-computed vectors. | +| `dimension` | Vector dimension. If `None`, inferred from the first batch of `add_documents()`. | + +--- + +## Persistence + +```python +store.save("path/to/index") # writes index file + sidecar JSON for documents +loaded = FAISSVectorStore.load("path/to/index", embedder=OpenAIEmbedder()) +``` + +`save()` persists both the FAISS index and the parallel `Document` list so search +results can return original text/metadata after reload. + +--- + +## Thread Safety + +FAISS itself is not thread-safe for writes. `FAISSVectorStore` wraps every mutation +in a `threading.Lock`, so concurrent `add_documents()` and `search()` calls from +multiple agent threads are safe. + +--- + +## API Reference + +| Method | Description | +|---|---| +| `add_documents(docs)` | Embed and add documents to the index | +| `search(query, top_k)` | Cosine similarity search; returns `List[SearchResult]` | +| `delete(ids)` | Remove documents by ID | +| `clear()` | Wipe the index | +| `save(path)` | Persist index + documents to disk | +| `load(path, embedder)` | Class method: rehydrate a persisted store | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 77 | [`77_faiss_vector_store.py`](https://github.com/johnnichev/selectools/blob/main/examples/77_faiss_vector_store.py) | FAISS quickstart with embeddings + persistence | diff --git a/docs/modules/LANGFUSE.md b/docs/modules/LANGFUSE.md new file mode 100644 index 0000000..64784c7 --- /dev/null +++ b/docs/modules/LANGFUSE.md @@ -0,0 +1,125 @@ +--- +description: "Langfuse observer — send agent traces, generations, and spans to Langfuse Cloud or self-hosted" +tags: + - observability + - langfuse + - tracing +--- + +# Langfuse Observer + +**Import:** `from selectools.observe import LangfuseObserver` +**Stability:** beta +**Added in:** v0.21.0 + +`LangfuseObserver` ships selectools traces to [Langfuse](https://langfuse.com), an +open-source LLM observability platform. Each agent run becomes a Langfuse trace, +each LLM call becomes a generation (with input/output/tokens/cost), and each tool +call becomes a span. Works with both Langfuse Cloud and self-hosted instances. + +```python title="langfuse_quick.py" +import os +from selectools import Agent, AgentConfig, OpenAIProvider, tool +from selectools.observe import LangfuseObserver + +os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..." +os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..." +# os.environ["LANGFUSE_HOST"] = "https://my-langfuse.example.com" # self-hosted + +@tool() +def search(query: str) -> str: + return f"Results for {query}" + +agent = Agent( + tools=[search], + provider=OpenAIProvider(), + config=AgentConfig(observers=[LangfuseObserver()]), +) + +result = agent.run("Find articles about Python") +# View the trace in your Langfuse dashboard +``` + +!!! tip "See Also" + - [OpenTelemetry](OTEL.md) - Alternative observer for OTLP backends + - [Trace Store](TRACE_STORE.md) - Persist traces locally as JSONL or SQLite + +--- + +## Install + +```bash +pip install "selectools[observe]" +``` + +The `[observe]` extras include `langfuse>=2.0.0`. + +--- + +## Constructor + +```python +LangfuseObserver( + public_key: str | None = None, + secret_key: str | None = None, + host: str | None = None, +) +``` + +| Parameter | Description | +|---|---| +| `public_key` | Langfuse public key. Falls back to `LANGFUSE_PUBLIC_KEY` env var. | +| `secret_key` | Langfuse secret key. Falls back to `LANGFUSE_SECRET_KEY` env var. | +| `host` | Langfuse host URL. Defaults to Langfuse Cloud. Set this to point at a self-hosted instance. Falls back to `LANGFUSE_HOST` env var. | + +The observer auto-flushes after every `run_end`, so traces are visible in your +Langfuse dashboard within seconds of an agent finishing. + +--- + +## What Gets Recorded + +| Selectools event | Langfuse object | Fields | +|---|---|---| +| `on_run_start` | Trace | `id=run_id`, `name="agent.run"`, input messages | +| `on_llm_start` | Generation | `model`, `input` (messages) | +| `on_llm_end` | Generation update | `output`, `usage.input/output/total`, `cost_usd` | +| `on_tool_start` | Span | `name=tool_name`, `input=tool_args` | +| `on_tool_end` | Span update | `output`, `duration_ms` | +| `on_run_end` | Trace update | `output`, total tokens, total cost | + +--- + +## Self-Hosted Langfuse + +```python +observer = LangfuseObserver( + public_key="pk-lf-local-...", + secret_key="sk-lf-local-...", + host="https://langfuse.internal.example.com", +) +``` + +Or via env vars: + +```bash +export LANGFUSE_PUBLIC_KEY="pk-lf-..." +export LANGFUSE_SECRET_KEY="sk-lf-..." +export LANGFUSE_HOST="https://langfuse.internal.example.com" +``` + +--- + +## API Reference + +| Symbol | Description | +|---|---| +| `LangfuseObserver(public_key, secret_key, host)` | Observer for `agent.run()` / `agent.stream()` | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 88 | [`88_langfuse_observer.py`](https://github.com/johnnichev/selectools/blob/main/examples/88_langfuse_observer.py) | Langfuse trace + generation + span hierarchy | diff --git a/docs/modules/MULTIMODAL.md b/docs/modules/MULTIMODAL.md new file mode 100644 index 0000000..a396157 --- /dev/null +++ b/docs/modules/MULTIMODAL.md @@ -0,0 +1,123 @@ +--- +description: "Multimodal messages — pass images and other content parts to vision-capable LLMs" +tags: + - core + - messages + - multimodal + - vision +--- + +# Multimodal Messages + +**Import:** `from selectools import ContentPart, image_message, Message` +**Stability:** beta +**Added in:** v0.21.0 + +`Message.content` now accepts a list of `ContentPart` objects in addition to a plain +string. This unlocks vision and other multimodal inputs across every provider that +supports them: GPT-4o, Claude 3.5/3.7, Gemini, and Ollama vision models. + +```python title="multimodal_quick.py" +from selectools import Agent, OpenAIProvider, image_message + +agent = Agent(provider=OpenAIProvider(model="gpt-4o")) + +# Helper for the common "image + prompt" case +result = agent.run([ + image_message("https://example.com/diagram.png", "What does this diagram show?") +]) +print(result.content) +``` + +!!! tip "See Also" + - [Providers](PROVIDERS.md) - Which providers support multimodal input + - [Models](MODELS.md) - Vision-capable model identifiers + +--- + +## ContentPart Anatomy + +```python +from selectools import ContentPart, Message, Role + +msg = Message( + role=Role.USER, + content=[ + ContentPart(type="text", text="Compare these two screenshots."), + ContentPart(type="image_url", image_url="https://example.com/before.png"), + ContentPart(type="image_url", image_url="https://example.com/after.png"), + ], +) +``` + +| Field | Used when | +|---|---| +| `type` | One of `"text"`, `"image_url"`, `"image_base64"`, `"audio"` | +| `text` | Set when `type == "text"` | +| `image_url` | Public URL for an image (most providers) | +| `image_base64` | Inline base64 payload for an image | +| `media_type` | MIME type, e.g. `"image/png"` or `"audio/wav"` | + +--- + +## Helper: `image_message` + +For the common "single image + prompt" case, use the `image_message` helper: + +```python +from selectools import image_message + +# From a URL +msg = image_message("https://example.com/photo.jpg", "Describe what you see.") + +# From a local file path (auto-encoded as base64) +msg = image_message("./screenshots/error.png", "What's the error in this UI?") +``` + +The helper detects whether the input is a URL or a local path and chooses the +right `ContentPart.type` (`image_url` vs `image_base64`). + +--- + +## Provider Compatibility + +| Provider | Format used internally | +|---|---| +| OpenAI | `[{"type": "text", ...}, {"type": "image_url", "image_url": {"url": ...}}]` | +| Anthropic | `[{"type": "text", ...}, {"type": "image", "source": {"type": "base64", ...}}]` | +| Gemini | `types.Part` objects with `inline_data` | +| Ollama | `images` parameter (list of base64 strings) | + +You don't need to format any of this yourself — selectools handles the conversion +in each provider's `_format_messages()`. + +--- + +## Backward Compatibility + +`Message(role=..., content="plain text")` continues to work everywhere. The +`list[ContentPart]` path is opt-in and existing code is unaffected. + +```python +# Still works exactly as before +msg = Message(role=Role.USER, content="What is 2 + 2?") +``` + +--- + +## API Reference + +| Symbol | Description | +|---|---| +| `ContentPart` | Dataclass for a single part of a multimodal message | +| `Message.content` | Now `str \| list[ContentPart]` | +| `image_message(image, prompt)` | Convenience constructor for image + text | +| `text_content(message)` | Extract concatenated text from a (possibly multimodal) Message | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 81 | [`81_multimodal_messages.py`](https://github.com/johnnichev/selectools/blob/main/examples/81_multimodal_messages.py) | Image input with `image_message` and raw `ContentPart` | diff --git a/docs/modules/OTEL.md b/docs/modules/OTEL.md new file mode 100644 index 0000000..afcc4fb --- /dev/null +++ b/docs/modules/OTEL.md @@ -0,0 +1,130 @@ +--- +description: "OpenTelemetry observer — emit GenAI semantic-convention spans for agent runs, LLM calls, and tool executions" +tags: + - observability + - opentelemetry + - tracing +--- + +# OpenTelemetry Observer + +**Import:** `from selectools.observe import OTelObserver` +**Stability:** beta +**Added in:** v0.21.0 + +`OTelObserver` maps the 45 selectools observer events to OpenTelemetry spans, +following the [OpenTelemetry GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/). +Once attached, every agent run, LLM call, and tool execution becomes a span you +can ship to Jaeger, Tempo, Honeycomb, Datadog, Grafana, or any other OTLP-capable +backend. + +```python title="otel_quick.py" +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter + +from selectools import Agent, AgentConfig, OpenAIProvider, tool +from selectools.observe import OTelObserver + +# 1. Configure your OTel SDK once at process start +trace.set_tracer_provider(TracerProvider()) +trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(ConsoleSpanExporter())) + +# 2. Attach the observer +@tool() +def search(query: str) -> str: + return f"Results for {query}" + +agent = Agent( + tools=[search], + provider=OpenAIProvider(), + config=AgentConfig(observers=[OTelObserver()]), +) + +result = agent.run("Find articles about Python") +# Spans now flow to your OTel exporter +``` + +!!! tip "See Also" + - [Langfuse](LANGFUSE.md) - Alternative observer focused on LLM tracing + - [Trace Store](TRACE_STORE.md) - Persist agent traces to disk or SQLite + - [Audit](AUDIT.md) - JSONL audit logs + +--- + +## Install + +```bash +pip install "selectools[observe]" +``` + +The `[observe]` extras include `opentelemetry-api>=1.20.0`. **selectools does not +ship `opentelemetry-sdk` or any exporters** — bring your own. Common choices: + +```bash +pip install opentelemetry-sdk opentelemetry-exporter-otlp # OTLP +pip install opentelemetry-sdk opentelemetry-exporter-jaeger # Jaeger +``` + +This separation lets you reuse whatever exporter the rest of your stack already +uses without selectools pinning a transitive dependency. + +--- + +## Span Hierarchy + +Each agent run becomes a span tree: + +``` +agent.run ← root span +├── gen_ai.llm.call ← per LLM round-trip +│ └── gen_ai.tool.execution ← per tool call +├── gen_ai.llm.call +└── ... +``` + +| Span name | Attributes | +|---|---| +| `agent.run` | `gen_ai.system="selectools"`, `gen_ai.usage.total_tokens`, `gen_ai.usage.cost_usd` | +| `gen_ai.llm.call` | `gen_ai.request.model`, `gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens` | +| `gen_ai.tool.execution` | `gen_ai.tool.name`, `gen_ai.tool.duration_ms`, `gen_ai.tool.success` | + +--- + +## Constructor + +```python +OTelObserver(tracer_name: str = "selectools") +``` + +| Parameter | Description | +|---|---| +| `tracer_name` | Name passed to `trace.get_tracer()`. Use this to scope spans by service in multi-app processes. | + +--- + +## Async + +For `agent.arun()` / `agent.astream()` use the async variant: + +```python +from selectools.observe.otel import AsyncOTelObserver +agent = Agent(..., config=AgentConfig(observers=[AsyncOTelObserver()])) +``` + +--- + +## API Reference + +| Symbol | Description | +|---|---| +| `OTelObserver(tracer_name)` | Sync observer for `agent.run()` / `agent.stream()` | +| `AsyncOTelObserver(tracer_name)` | Async observer for `agent.arun()` / `agent.astream()` | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 87 | [`87_otel_observer.py`](https://github.com/johnnichev/selectools/blob/main/examples/87_otel_observer.py) | Wire selectools traces into an OTLP exporter | diff --git a/docs/modules/PGVECTOR.md b/docs/modules/PGVECTOR.md new file mode 100644 index 0000000..1aadd27 --- /dev/null +++ b/docs/modules/PGVECTOR.md @@ -0,0 +1,139 @@ +--- +description: "PostgreSQL-backed vector store using the pgvector extension" +tags: + - rag + - vector-stores + - postgres + - pgvector +--- + +# pgvector Store + +**Import:** `from selectools.rag.stores import PgVectorStore` +**Stability:** beta +**Added in:** v0.21.0 + +`PgVectorStore` lets you store and search document embeddings inside a PostgreSQL +database using the [pgvector](https://github.com/pgvector/pgvector) extension. It's +the right choice when you already run Postgres and want vectors next to the rest of +your application data without standing up a separate vector service. + +```python title="pgvector_quick.py" +from selectools.embeddings import OpenAIEmbedder +from selectools.rag import Document +from selectools.rag.stores import PgVectorStore + +store = PgVectorStore( + embedder=OpenAIEmbedder(), + connection_string="postgresql://user:pass@localhost:5432/mydb", + table_name="selectools_documents", +) + +store.add_documents([ + Document(text="pgvector adds vector types to Postgres."), + Document(text="It supports cosine, L2, and inner-product distance."), +]) + +results = store.search("postgres vector search", top_k=2) +``` + +!!! tip "See Also" + - [Qdrant](QDRANT.md) - Self-hosted vector database with REST + gRPC + - [FAISS](FAISS.md) - In-process vector index, no server required + - [Sessions](SESSIONS.md) - Postgres-backed agent sessions + +--- + +## Install + +```bash +pip install "selectools[postgres]" +``` + +The `[postgres]` extras already include `psycopg2-binary>=2.9.0`. You also need +the pgvector extension installed in your database: + +```sql +CREATE EXTENSION IF NOT EXISTS vector; +``` + +--- + +## Constructor + +```python +PgVectorStore( + embedder: EmbeddingProvider, + connection_string: str, + table_name: str = "selectools_documents", + dimensions: int | None = None, +) +``` + +| Parameter | Description | +|---|---| +| `embedder` | Embedding provider used to compute vectors. | +| `connection_string` | Standard libpq connection string. | +| `table_name` | Table to store documents in. Validated as a SQL identifier (letters, digits, underscores) to prevent injection. | +| `dimensions` | Vector dimensions. Auto-detected from `embedder.embed_query("test")` on first use if not specified. | + +--- + +## Schema + +`PgVectorStore` creates the following table on first use (idempotent): + +```sql +CREATE TABLE IF NOT EXISTS selectools_documents ( + id TEXT PRIMARY KEY, + text TEXT NOT NULL, + metadata JSONB, + embedding vector(N) +); +``` + +The `N` is the embedding dimension. An index on the `embedding` column accelerates +cosine similarity queries. + +--- + +## Search + +`search()` runs a parameterized query using pgvector's `<=>` cosine distance +operator: + +```sql +SELECT id, text, metadata, embedding <=> %s AS distance +FROM selectools_documents +ORDER BY distance ASC +LIMIT %s; +``` + +All queries are parameterized — there's no SQL injection risk from user input. + +--- + +## Connection Pooling + +`PgVectorStore` opens a single `psycopg2.connect()` per instance. If you need +pooling for high concurrency, manage it externally (e.g. PgBouncer) and pass the +pooler URL as the connection string. + +--- + +## API Reference + +| Method | Description | +|---|---| +| `add_documents(docs)` | Embed and upsert documents (`INSERT ... ON CONFLICT DO UPDATE`) | +| `search(query, top_k)` | Cosine similarity search | +| `delete(ids)` | Delete documents by ID | +| `clear()` | `TRUNCATE` the table | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 79 | [`79_pgvector_store.py`](https://github.com/johnnichev/selectools/blob/main/examples/79_pgvector_store.py) | pgvector quickstart with auto-table creation | diff --git a/docs/modules/QDRANT.md b/docs/modules/QDRANT.md new file mode 100644 index 0000000..a67cabb --- /dev/null +++ b/docs/modules/QDRANT.md @@ -0,0 +1,126 @@ +--- +description: "Connector for the Qdrant vector database with REST + gRPC support and payload filtering" +tags: + - rag + - vector-stores + - qdrant +--- + +# Qdrant Vector Store + +**Import:** `from selectools.rag.stores import QdrantVectorStore` +**Stability:** beta +**Added in:** v0.21.0 + +`QdrantVectorStore` wraps the official `qdrant-client` to give you a self-hosted or +Qdrant Cloud-backed vector store. It auto-creates collections, supports cosine +similarity by default, and lets you filter searches on metadata via Qdrant's payload +indexing. + +```python title="qdrant_quick.py" +from selectools.embeddings import OpenAIEmbedder +from selectools.rag import Document +from selectools.rag.stores import QdrantVectorStore + +store = QdrantVectorStore( + embedder=OpenAIEmbedder(), + collection_name="my_docs", + url="http://localhost:6333", +) + +store.add_documents([ + Document(text="Qdrant is a vector search engine.", metadata={"category": "infra"}), + Document(text="It supports REST and gRPC.", metadata={"category": "infra"}), +]) + +results = store.search("vector search", top_k=2) +``` + +!!! tip "See Also" + - [FAISS](FAISS.md) - In-process vector index, no server required + - [pgvector](PGVECTOR.md) - PostgreSQL-backed vector store + - [RAG](RAG.md) - Higher-level retrieval pipeline + +--- + +## Install + +```bash +pip install "selectools[rag]" +``` + +`qdrant-client>=1.7.0` is part of the `[rag]` extras. + +You also need a running Qdrant instance. The simplest way: + +```bash +docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant +``` + +Or sign up for [Qdrant Cloud](https://cloud.qdrant.io/) and get a managed instance. + +--- + +## Constructor + +```python +QdrantVectorStore( + embedder: EmbeddingProvider, + collection_name: str = "selectools", + url: str = "http://localhost:6333", + api_key: str | None = None, + prefer_grpc: bool = True, + **qdrant_kwargs, +) +``` + +| Parameter | Description | +|---|---| +| `embedder` | Any `EmbeddingProvider`. Used to compute vectors for both `add_documents()` and `search()`. | +| `collection_name` | Qdrant collection. Auto-created on first `add_documents()` if it doesn't exist. | +| `url` | Qdrant server URL. Use `https://...` for cloud. | +| `api_key` | Optional API key for Qdrant Cloud or authenticated servers. | +| `prefer_grpc` | When `True` (default) the client uses gRPC for lower-latency vector ops. | +| `**qdrant_kwargs` | Additional arguments forwarded to `qdrant_client.QdrantClient`. | + +--- + +## Cloud Configuration + +```python +import os + +store = QdrantVectorStore( + embedder=OpenAIEmbedder(), + collection_name="prod_docs", + url="https://my-cluster.qdrant.io", + api_key=os.environ["QDRANT_API_KEY"], +) +``` + +--- + +## Metadata Filtering + +Document metadata is stored as Qdrant payload, so you can filter searches at the +database level. Use `qdrant_client.models.Filter` constructs and pass them via +`**search_kwargs` (the store forwards them to the underlying client). + +--- + +## API Reference + +| Method | Description | +|---|---| +| `add_documents(docs)` | Embed documents and upsert into the collection | +| `search(query, top_k)` | Cosine similarity search | +| `delete(ids)` | Delete documents by ID | +| `clear()` | Delete the entire collection | + +--- + +## Related Examples + +| # | Script | Description | +|---|--------|-------------| +| 78 | [`78_qdrant_vector_store.py`](https://github.com/johnnichev/selectools/blob/main/examples/78_qdrant_vector_store.py) | Qdrant quickstart with metadata filtering | diff --git a/mkdocs.yml b/mkdocs.yml index bbed611..9d48c27 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -110,9 +110,11 @@ nav: - Toolbox (33 Built-in): modules/TOOLBOX.md - Dynamic Tools: modules/DYNAMIC_TOOLS.md - Streaming: modules/STREAMING.md + - Multimodal Messages: modules/MULTIMODAL.md - Memory: modules/MEMORY.md - Sessions: modules/SESSIONS.md - Providers: modules/PROVIDERS.md + - Azure OpenAI: modules/AZURE_OPENAI.md - Models & Pricing: modules/MODELS.md - Usage & Cost: modules/USAGE.md - Features: @@ -121,6 +123,9 @@ nav: - Chunking: modules/ADVANCED_CHUNKING.md - Embeddings: modules/EMBEDDINGS.md - Vector Stores: modules/VECTOR_STORES.md + - FAISS: modules/FAISS.md + - Qdrant: modules/QDRANT.md + - pgvector: modules/PGVECTOR.md - Guardrails: modules/GUARDRAILS.md - Eval Framework: modules/EVALS.md - Orchestration: modules/ORCHESTRATION.md @@ -151,6 +156,8 @@ nav: - Screening: modules/SECURITY.md - Error Handling: modules/EXCEPTIONS.md - Trace Store: modules/TRACE_STORE.md + - OpenTelemetry: modules/OTEL.md + - Langfuse: modules/LANGFUSE.md - Stability Markers: modules/STABILITY.md - Changelog: CHANGELOG.md - Architecture Decisions: diff --git a/src/selectools/providers/azure_openai_provider.py b/src/selectools/providers/azure_openai_provider.py index b5677a2..eca6228 100644 --- a/src/selectools/providers/azure_openai_provider.py +++ b/src/selectools/providers/azure_openai_provider.py @@ -114,7 +114,11 @@ def __init__( # _client, _async_client, default_model, api_key self._client = AzureOpenAI(**client_kwargs) self._async_client = AsyncAzureOpenAI(**client_kwargs) - self.default_model = azure_deployment or os.getenv("AZURE_OPENAI_DEPLOYMENT", "gpt-4o") + self.default_model = ( + azure_deployment + if azure_deployment is not None + else os.getenv("AZURE_OPENAI_DEPLOYMENT", "gpt-4o") + ) self.api_key = resolved_key # -- template method overrides ------------------------------------------- From 64c2349058c3bb13419a30e3afcf8386aac0d5ec Mon Sep 17 00:00:00 2001 From: John Niche Date: Wed, 8 Apr 2026 02:58:51 -0300 Subject: [PATCH 03/17] test(e2e): real-backend coverage for every v0.21.0 subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every existing v0.21.0 test file mocks its backend: test_faiss_store.py injects a fake faiss module, test_code_tools.py mocks subprocess.run, test_qdrant_store.py mocks qdrant_client, etc. That leaves the real wire format, real C++ bindings, real subprocesses, real HTTP, and real vision APIs completely unverified — if our assumptions differ from reality we ship green tests and broken code. This commit adds 12 new test files marked @pytest.mark.e2e that exercise real backends: Tier 1 — no external services (28 tests, all passing): - tests/rag/test_e2e_faiss_store.py (real faiss-cpu, 5) - tests/tools/test_e2e_code_tools.py (real subprocess.run, 8) - tests/tools/test_e2e_db_tools.py (real sqlite3, 6) - tests/rag/test_e2e_document_loaders.py (real files + example.com, 6) - tests/test_e2e_otel_observer.py (real opentelemetry-sdk, 3) Tier 2 — real API calls, credentials via .env (8 tests, all passing): - tests/test_e2e_multimodal.py (real OpenAI gpt-4o-mini + Anthropic claude-haiku-4-5 + Gemini gemini-2.5-flash with an in-memory 4x4 PNG) - tests/tools/test_e2e_search_tools.py (real DuckDuckGo + scrape) - tests/tools/test_e2e_github_tools.py (real GitHub REST API) Tier 3 — skip-if-missing-deps-or-credentials (7 tests, 2 passing + 5 skip): - tests/rag/test_e2e_qdrant_store.py (skip if Qdrant not reachable) - tests/rag/test_e2e_pgvector_store.py (passes against local pgvector) - tests/providers/test_e2e_azure_openai.py (skip if AZURE_* not set) - tests/test_e2e_langfuse_observer.py (skip if LANGFUSE_* not set) Result: pytest --run-e2e → 38 passed, 5 skipped, 0 failed. Also fix three v0.21.0 module docs whose quickstart examples showed the wrong VectorStore.search() signature: search() takes a query embedding (List[float]), not a string. Updated FAISS.md, QDRANT.md, PGVECTOR.md to show the correct embed-first pattern (matches RAG.md). --- docs/modules/FAISS.md | 9 +- docs/modules/PGVECTOR.md | 9 +- docs/modules/QDRANT.md | 9 +- tests/providers/test_e2e_azure_openai.py | 73 ++++++++++++ tests/rag/test_e2e_document_loaders.py | 118 +++++++++++++++++++ tests/rag/test_e2e_faiss_store.py | 122 ++++++++++++++++++++ tests/rag/test_e2e_pgvector_store.py | 114 ++++++++++++++++++ tests/rag/test_e2e_qdrant_store.py | 123 ++++++++++++++++++++ tests/test_e2e_langfuse_observer.py | 64 +++++++++++ tests/test_e2e_multimodal.py | 140 +++++++++++++++++++++++ tests/test_e2e_otel_observer.py | 123 ++++++++++++++++++++ tests/tools/test_e2e_code_tools.py | 77 +++++++++++++ tests/tools/test_e2e_db_tools.py | 110 ++++++++++++++++++ tests/tools/test_e2e_github_tools.py | 72 ++++++++++++ tests/tools/test_e2e_search_tools.py | 59 ++++++++++ 15 files changed, 1213 insertions(+), 9 deletions(-) create mode 100644 tests/providers/test_e2e_azure_openai.py create mode 100644 tests/rag/test_e2e_document_loaders.py create mode 100644 tests/rag/test_e2e_faiss_store.py create mode 100644 tests/rag/test_e2e_pgvector_store.py create mode 100644 tests/rag/test_e2e_qdrant_store.py create mode 100644 tests/test_e2e_langfuse_observer.py create mode 100644 tests/test_e2e_multimodal.py create mode 100644 tests/test_e2e_otel_observer.py create mode 100644 tests/tools/test_e2e_code_tools.py create mode 100644 tests/tools/test_e2e_db_tools.py create mode 100644 tests/tools/test_e2e_github_tools.py create mode 100644 tests/tools/test_e2e_search_tools.py diff --git a/docs/modules/FAISS.md b/docs/modules/FAISS.md index 6e2326f..4b8aad1 100644 --- a/docs/modules/FAISS.md +++ b/docs/modules/FAISS.md @@ -17,17 +17,20 @@ vector index that lives entirely in memory but can be persisted to disk. It's id when you want zero-server RAG with millions of vectors and have plenty of RAM. ```python title="faiss_quick.py" -from selectools.embeddings import OpenAIEmbedder +from selectools.embeddings import OpenAIEmbeddingProvider from selectools.rag import Document from selectools.rag.stores import FAISSVectorStore -store = FAISSVectorStore(embedder=OpenAIEmbedder()) +embedder = OpenAIEmbeddingProvider() +store = FAISSVectorStore(embedder=embedder) store.add_documents([ Document(text="Selectools is a Python AI agent framework."), Document(text="FAISS does fast similarity search."), ]) -results = store.search("agent framework", top_k=2) +# search() takes a query embedding, not a string — embed the query first +query_vec = embedder.embed_query("agent framework") +results = store.search(query_vec, top_k=2) for r in results: print(r.score, r.document.text) diff --git a/docs/modules/PGVECTOR.md b/docs/modules/PGVECTOR.md index 1aadd27..ea67ded 100644 --- a/docs/modules/PGVECTOR.md +++ b/docs/modules/PGVECTOR.md @@ -19,12 +19,13 @@ the right choice when you already run Postgres and want vectors next to the rest your application data without standing up a separate vector service. ```python title="pgvector_quick.py" -from selectools.embeddings import OpenAIEmbedder +from selectools.embeddings import OpenAIEmbeddingProvider from selectools.rag import Document from selectools.rag.stores import PgVectorStore +embedder = OpenAIEmbeddingProvider() store = PgVectorStore( - embedder=OpenAIEmbedder(), + embedder=embedder, connection_string="postgresql://user:pass@localhost:5432/mydb", table_name="selectools_documents", ) @@ -34,7 +35,9 @@ store.add_documents([ Document(text="It supports cosine, L2, and inner-product distance."), ]) -results = store.search("postgres vector search", top_k=2) +# search() takes a query embedding, not a string — embed the query first +query_vec = embedder.embed_query("postgres vector search") +results = store.search(query_vec, top_k=2) ``` !!! tip "See Also" diff --git a/docs/modules/QDRANT.md b/docs/modules/QDRANT.md index a67cabb..e7888e9 100644 --- a/docs/modules/QDRANT.md +++ b/docs/modules/QDRANT.md @@ -18,12 +18,13 @@ similarity by default, and lets you filter searches on metadata via Qdrant's pay indexing. ```python title="qdrant_quick.py" -from selectools.embeddings import OpenAIEmbedder +from selectools.embeddings import OpenAIEmbeddingProvider from selectools.rag import Document from selectools.rag.stores import QdrantVectorStore +embedder = OpenAIEmbeddingProvider() store = QdrantVectorStore( - embedder=OpenAIEmbedder(), + embedder=embedder, collection_name="my_docs", url="http://localhost:6333", ) @@ -33,7 +34,9 @@ store.add_documents([ Document(text="It supports REST and gRPC.", metadata={"category": "infra"}), ]) -results = store.search("vector search", top_k=2) +# search() takes a query embedding, not a string — embed the query first +query_vec = embedder.embed_query("vector search") +results = store.search(query_vec, top_k=2) ``` !!! tip "See Also" diff --git a/tests/providers/test_e2e_azure_openai.py b/tests/providers/test_e2e_azure_openai.py new file mode 100644 index 0000000..97e9df6 --- /dev/null +++ b/tests/providers/test_e2e_azure_openai.py @@ -0,0 +1,73 @@ +"""End-to-end tests for AzureOpenAIProvider against a real Azure endpoint. + +``test_azure_openai.py`` mocks the OpenAI client. This file uses the real +``AzureOpenAI`` client and hits an actual Azure OpenAI Service deployment. + +Required env vars: + - AZURE_OPENAI_ENDPOINT: e.g. https://my-resource.openai.azure.com + - AZURE_OPENAI_API_KEY: Azure API key + - AZURE_OPENAI_DEPLOYMENT: deployment name (defaults to "gpt-4o-mini" if missing) + +Run with: + + pytest tests/providers/test_e2e_azure_openai.py --run-e2e -v +""" + +from __future__ import annotations + +import os + +import pytest + +from selectools import Agent, AgentConfig, tool +from selectools.providers.azure_openai_provider import AzureOpenAIProvider + +pytestmark = pytest.mark.e2e + + +@pytest.fixture(scope="module") +def azure_or_skip() -> None: + if not os.environ.get("AZURE_OPENAI_ENDPOINT"): + pytest.skip("AZURE_OPENAI_ENDPOINT not set — skipping Azure e2e") + if not os.environ.get("AZURE_OPENAI_API_KEY"): + pytest.skip("AZURE_OPENAI_API_KEY not set — skipping Azure e2e") + + +@tool() +def _noop() -> str: + """Return a fixed string.""" + return "noop" + + +class TestAzureOpenAIRealEndpoint: + def test_simple_completion(self, azure_or_skip: None) -> None: + """Real Azure OpenAI call returns a non-empty response.""" + deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt-4o-mini") + provider = AzureOpenAIProvider(azure_deployment=deployment) + agent = Agent( + tools=[_noop], + provider=provider, + config=AgentConfig(model=deployment, max_tokens=20), + ) + result = agent.run("Reply with exactly the word OK and nothing else.") + assert result.content + assert result.usage.total_tokens > 0 + + def test_tool_calling_round_trip(self, azure_or_skip: None) -> None: + """Real Azure OpenAI invokes a tool and returns a final answer.""" + deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt-4o-mini") + + @tool() + def get_capital(country: str) -> str: + """Return the capital of a country.""" + capitals = {"france": "Paris", "japan": "Tokyo", "italy": "Rome"} + return capitals.get(country.lower(), "unknown") + + agent = Agent( + tools=[get_capital], + provider=AzureOpenAIProvider(azure_deployment=deployment), + config=AgentConfig(model=deployment, max_tokens=100), + ) + result = agent.run("What is the capital of France? Use the get_capital tool.") + assert result.content + assert "Paris" in result.content or "paris" in result.content.lower() diff --git a/tests/rag/test_e2e_document_loaders.py b/tests/rag/test_e2e_document_loaders.py new file mode 100644 index 0000000..10cf266 --- /dev/null +++ b/tests/rag/test_e2e_document_loaders.py @@ -0,0 +1,118 @@ +"""End-to-end tests for DocumentLoader with real files and URLs. + +Exercises the four new v0.21.0 loaders (from_csv, from_json, from_html, +from_url) against real data on disk and (for from_url) a stable public URL. + +No API keys are required. ``from_url`` hits ``https://example.com`` which +has been stable for decades and is the canonical "test I can fetch HTML" +target. + +Run with: + + pytest tests/rag/test_e2e_document_loaders.py --run-e2e -v +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from selectools.rag import DocumentLoader + +pytestmark = pytest.mark.e2e + + +class TestFromCSVReal: + def test_csv_with_text_column(self, tmp_path: Path) -> None: + """Load a real CSV file using text_column to pick the body field.""" + path = tmp_path / "articles.csv" + path.write_text( + "title,body,author\n" + "First post,This is the body of the first post.,alice\n" + "Second,Body of the second article.,bob\n", + encoding="utf-8", + ) + docs = DocumentLoader.from_csv( + str(path), text_column="body", metadata_columns=["title", "author"] + ) + assert len(docs) == 2 + assert docs[0].text == "This is the body of the first post." + assert docs[0].metadata["title"] == "First post" + assert docs[0].metadata["author"] == "alice" + assert docs[1].text == "Body of the second article." + + def test_csv_all_columns_concatenated(self, tmp_path: Path) -> None: + """When text_column is None, all columns are joined into the text.""" + path = tmp_path / "rows.csv" + path.write_text("k1,k2\nfoo,bar\n", encoding="utf-8") + docs = DocumentLoader.from_csv(str(path)) + assert len(docs) == 1 + # Both column values should be present somewhere in the text + assert "foo" in docs[0].text + assert "bar" in docs[0].text + + +class TestFromJSONReal: + def test_json_array_of_objects(self, tmp_path: Path) -> None: + """A real JSON array yields one Document per item.""" + path = tmp_path / "posts.json" + payload = [ + {"body": "first body", "title": "A", "tag": "x"}, + {"body": "second body", "title": "B", "tag": "y"}, + ] + path.write_text(json.dumps(payload), encoding="utf-8") + docs = DocumentLoader.from_json( + str(path), text_field="body", metadata_fields=["title", "tag"] + ) + assert len(docs) == 2 + assert docs[0].text == "first body" + assert docs[0].metadata["title"] == "A" + assert docs[1].metadata["tag"] == "y" + + def test_json_single_object(self, tmp_path: Path) -> None: + """A single object produces a single Document.""" + path = tmp_path / "one.json" + path.write_text(json.dumps({"text": "alone", "meta": "value"}), encoding="utf-8") + docs = DocumentLoader.from_json(str(path), text_field="text") + assert len(docs) == 1 + assert docs[0].text == "alone" + + +class TestFromHTMLReal: + def test_html_full_text_extraction(self, tmp_path: Path) -> None: + """Real HTML file -> stripped plain text.""" + path = tmp_path / "page.html" + path.write_text( + "" + "

Title

" + "

First paragraph.

" + "

Second paragraph.

" + "", + encoding="utf-8", + ) + docs = DocumentLoader.from_html(str(path)) + assert len(docs) == 1 + text = docs[0].text + assert "Title" in text + assert "First paragraph" in text + assert "Second paragraph" in text + # Tags should be stripped + assert "

" not in text + assert "

" not in text + + +class TestFromURLReal: + def test_fetch_example_com(self) -> None: + """Real HTTP GET to example.com — this URL has been stable for years.""" + try: + docs = DocumentLoader.from_url("https://example.com", timeout=15.0) + except Exception as exc: # pragma: no cover - network hiccup only + pytest.skip(f"Network unavailable: {exc}") + assert len(docs) == 1 + text = docs[0].text + # example.com contains "Example Domain" — very stable + assert "Example Domain" in text + # Source metadata should be the URL + assert docs[0].metadata.get("source") == "https://example.com" diff --git a/tests/rag/test_e2e_faiss_store.py b/tests/rag/test_e2e_faiss_store.py new file mode 100644 index 0000000..0a0470f --- /dev/null +++ b/tests/rag/test_e2e_faiss_store.py @@ -0,0 +1,122 @@ +"""End-to-end tests for FAISSVectorStore against real faiss-cpu. + +These tests use the real ``faiss`` package (no mocking) and a deterministic +hash-based embedder so no API keys are required. They exercise the actual +FAISS C++ bindings and verify that: + +- selectools' wrapper calls match the real FAISS API +- Cosine similarity search returns correct nearest-neighbour ordering +- Save/load round-trip preserves both the index and document payloads +- Delete and clear leave the index in a usable state + +Run with: + + pytest tests/rag/test_e2e_faiss_store.py --run-e2e -v +""" + +from __future__ import annotations + +import hashlib +from typing import List + +import pytest + +faiss = pytest.importorskip("faiss", reason="faiss-cpu not installed") + +from selectools.embeddings import EmbeddingProvider # noqa: E402 +from selectools.rag import Document # noqa: E402 +from selectools.rag.stores import FAISSVectorStore # noqa: E402 + + +class HashEmbedder(EmbeddingProvider): + """Deterministic 32-dim hash embedder so tests need no API key.""" + + def __init__(self, dim: int = 32) -> None: + self._dim = dim + + @property + def dimension(self) -> int: + return self._dim + + def embed_query(self, text: str) -> List[float]: + digest = hashlib.sha256(text.encode("utf-8")).digest() + raw = (digest * ((self._dim // len(digest)) + 1))[: self._dim] + return [(b / 127.5) - 1.0 for b in raw] + + def embed_text(self, text: str) -> List[float]: + return self.embed_query(text) + + def embed_texts(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(t) for t in texts] + + +@pytest.mark.e2e +class TestFAISSRealBindings: + """Tests that exercise the real faiss-cpu C++ bindings.""" + + def test_real_faiss_is_imported(self) -> None: + """Confirm we are hitting real faiss, not a mock module.""" + import faiss as real_faiss + + assert hasattr(real_faiss, "IndexFlatIP") + # Real faiss has a numeric version number; the mock we use in unit + # tests does not. + assert hasattr(real_faiss, "__version__") + + def test_add_and_search_single_document(self) -> None: + """Adding a doc and searching returns it with a positive score.""" + embedder = HashEmbedder() + store = FAISSVectorStore(embedder=embedder) + store.add_documents([Document(text="the quick brown fox")]) + results = store.search(embedder.embed_query("the quick brown fox"), top_k=1) + assert len(results) == 1 + assert results[0].document.text == "the quick brown fox" + # Cosine self-similarity should be ~1.0 + assert results[0].score > 0.99 + + def test_search_returns_topk_ordered(self) -> None: + """Search returns top_k results in descending score order.""" + embedder = HashEmbedder() + store = FAISSVectorStore(embedder=embedder) + docs = [Document(text=f"document number {i}", metadata={"idx": i}) for i in range(5)] + store.add_documents(docs) + results = store.search(embedder.embed_query("document number 2"), top_k=3) + assert len(results) == 3 + # Exact match should be first + assert results[0].document.text == "document number 2" + # Scores strictly descending + for a, b in zip(results, results[1:]): + assert a.score >= b.score + + def test_save_and_load_round_trip(self, tmp_path) -> None: + """Persisting then loading restores both vectors and documents.""" + embedder = HashEmbedder() + store = FAISSVectorStore(embedder=embedder) + docs = [ + Document(text="alpha", metadata={"id": "a"}), + Document(text="beta", metadata={"id": "b"}), + Document(text="gamma", metadata={"id": "c"}), + ] + store.add_documents(docs) + save_path = tmp_path / "faiss_index" + store.save(str(save_path)) + + loaded = FAISSVectorStore.load(str(save_path), embedder=embedder) + results = loaded.search(embedder.embed_query("alpha"), top_k=3) + texts = {r.document.text for r in results} + assert texts == {"alpha", "beta", "gamma"} + # Metadata survived the round-trip + alpha = next(r for r in results if r.document.text == "alpha") + assert alpha.document.metadata["id"] == "a" + + def test_clear_leaves_store_usable(self) -> None: + """clear() empties the index and new adds still work.""" + embedder = HashEmbedder() + store = FAISSVectorStore(embedder=embedder) + store.add_documents([Document(text="will be cleared")]) + store.clear() + assert store.search(embedder.embed_query("anything"), top_k=1) == [] + store.add_documents([Document(text="after clear")]) + results = store.search(embedder.embed_query("after clear"), top_k=1) + assert len(results) == 1 + assert results[0].document.text == "after clear" diff --git a/tests/rag/test_e2e_pgvector_store.py b/tests/rag/test_e2e_pgvector_store.py new file mode 100644 index 0000000..c016f45 --- /dev/null +++ b/tests/rag/test_e2e_pgvector_store.py @@ -0,0 +1,114 @@ +"""End-to-end tests for PgVectorStore against a real PostgreSQL instance. + +``test_pgvector_store.py`` mocks psycopg2. This file requires a real +PostgreSQL server with the ``pgvector`` extension installed. + +To run: + + # Start Postgres + pgvector locally: + docker run -d --name pgvector \ + -e POSTGRES_PASSWORD=selectools -p 5432:5432 \ + pgvector/pgvector:pg16 + + docker exec pgvector psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS vector" + + # Then: + POSTGRES_URL="postgresql://postgres:selectools@localhost:5432/postgres" \ + pytest tests/rag/test_e2e_pgvector_store.py --run-e2e -v + +Tests skip automatically if POSTGRES_URL is not set. +""" + +from __future__ import annotations + +import hashlib +import os +import uuid +from typing import List + +import pytest + +pytest.importorskip("psycopg2", reason="psycopg2-binary not installed") + +from selectools.embeddings import EmbeddingProvider # noqa: E402 +from selectools.rag import Document # noqa: E402 +from selectools.rag.stores import PgVectorStore # noqa: E402 + +pytestmark = pytest.mark.e2e + + +def _postgres_url() -> str | None: + return os.environ.get("POSTGRES_URL") or os.environ.get("DATABASE_URL") + + +@pytest.fixture(scope="module") +def postgres_or_skip() -> str: + url = _postgres_url() + if not url: + pytest.skip("POSTGRES_URL / DATABASE_URL not set — skipping pgvector e2e") + return url + + +class HashEmbedder(EmbeddingProvider): + """Deterministic 32-dim hash embedder so tests need no API key.""" + + @property + def dimension(self) -> int: + return 32 + + def embed_query(self, text: str) -> List[float]: + digest = hashlib.sha256(text.encode("utf-8")).digest() + raw = (digest * 2)[:32] + return [(b / 127.5) - 1.0 for b in raw] + + def embed_text(self, text: str) -> List[float]: + return self.embed_query(text) + + def embed_texts(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(t) for t in texts] + + +@pytest.fixture +def pg_store(postgres_or_skip: str) -> PgVectorStore: + """Create a PgVectorStore with a unique table per test (auto-cleaned).""" + table = f"selectools_e2e_{uuid.uuid4().hex[:8]}" + store = PgVectorStore( + embedder=HashEmbedder(), + connection_string=postgres_or_skip, + table_name=table, + dimensions=32, + ) + yield store + # Cleanup: drop the table + try: + import psycopg2 + + conn = psycopg2.connect(postgres_or_skip) + conn.autocommit = True + with conn.cursor() as cur: + cur.execute(f"DROP TABLE IF EXISTS {table}") # nosec B608 + conn.close() + except Exception: + pass + + +class TestPgVectorRealServer: + def test_add_and_search(self, pg_store: PgVectorStore) -> None: + """Real add + search round-trip against a real Postgres+pgvector.""" + docs = [ + Document(text="alpha document", metadata={"id": "a"}), + Document(text="beta document", metadata={"id": "b"}), + Document(text="gamma document", metadata={"id": "c"}), + ] + pg_store.add_documents(docs) + query_vec = pg_store.embedder.embed_query("alpha document") + results = pg_store.search(query_vec, top_k=3) + assert len(results) == 3 + assert results[0].document.text == "alpha document" + + def test_clear_truncates_table(self, pg_store: PgVectorStore) -> None: + """clear() removes all rows from the real pgvector table.""" + pg_store.add_documents([Document(text="to be cleared")]) + pg_store.clear() + results = pg_store.search(pg_store.embedder.embed_query("to be cleared"), top_k=1) + assert results == [] diff --git a/tests/rag/test_e2e_qdrant_store.py b/tests/rag/test_e2e_qdrant_store.py new file mode 100644 index 0000000..6df3f25 --- /dev/null +++ b/tests/rag/test_e2e_qdrant_store.py @@ -0,0 +1,123 @@ +"""End-to-end tests for QdrantVectorStore against a real Qdrant instance. + +``test_qdrant_store.py`` mocks the ``qdrant_client`` module. This file +requires a running Qdrant server and exercises the real client. + +To run: + + # Start Qdrant locally: + docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant + + # Then: + pytest tests/rag/test_e2e_qdrant_store.py --run-e2e -v + +Or point at Qdrant Cloud: + + QDRANT_URL=https://xxx.cloud.qdrant.io \ + QDRANT_API_KEY=... \ + pytest tests/rag/test_e2e_qdrant_store.py --run-e2e -v + +Tests skip automatically if no Qdrant is reachable. +""" + +from __future__ import annotations + +import hashlib +import os +import socket +import uuid +from typing import List +from urllib.parse import urlparse + +import pytest + +pytest.importorskip("qdrant_client", reason="qdrant-client not installed") + +from selectools.embeddings import EmbeddingProvider # noqa: E402 +from selectools.rag import Document # noqa: E402 +from selectools.rag.stores import QdrantVectorStore # noqa: E402 + +pytestmark = pytest.mark.e2e + + +def _qdrant_url() -> str: + return os.environ.get("QDRANT_URL", "http://localhost:6333") + + +def _qdrant_reachable() -> bool: + url = urlparse(_qdrant_url()) + host = url.hostname or "localhost" + port = url.port or (443 if url.scheme == "https" else 6333) + try: + with socket.create_connection((host, port), timeout=2): + return True + except OSError: + return False + + +@pytest.fixture(scope="module") +def qdrant_or_skip() -> None: + if not _qdrant_reachable(): + pytest.skip(f"Qdrant not reachable at {_qdrant_url()}") + + +class HashEmbedder(EmbeddingProvider): + """Deterministic 32-dim hash embedder so tests need no API key.""" + + @property + def dimension(self) -> int: + return 32 + + def embed_query(self, text: str) -> List[float]: + digest = hashlib.sha256(text.encode("utf-8")).digest() + raw = (digest * 2)[:32] + return [(b / 127.5) - 1.0 for b in raw] + + def embed_text(self, text: str) -> List[float]: + return self.embed_query(text) + + def embed_texts(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(t) for t in texts] + + +@pytest.fixture +def qdrant_store(qdrant_or_skip: None) -> QdrantVectorStore: + """Create a QdrantVectorStore with a unique collection per test.""" + collection = f"selectools_e2e_{uuid.uuid4().hex[:8]}" + store = QdrantVectorStore( + embedder=HashEmbedder(), + collection_name=collection, + url=_qdrant_url(), + api_key=os.environ.get("QDRANT_API_KEY"), + prefer_grpc=False, # REST is more reliable for e2e + ) + yield store + # Cleanup: drop the collection + try: + store.clear() + except Exception: + pass + + +class TestQdrantRealServer: + def test_add_and_search(self, qdrant_store: QdrantVectorStore) -> None: + """Real add + search round-trip against a real Qdrant instance.""" + docs = [ + Document(text="the first document", metadata={"id": "a"}), + Document(text="the second document", metadata={"id": "b"}), + Document(text="another unrelated text", metadata={"id": "c"}), + ] + qdrant_store.add_documents(docs) + query_vec = qdrant_store.embedder.embed_query("the first document") + results = qdrant_store.search(query_vec, top_k=3) + assert len(results) == 3 + # Exact-match doc should be first + assert results[0].document.text == "the first document" + + def test_clear_empties_collection(self, qdrant_store: QdrantVectorStore) -> None: + """clear() removes all documents from the real collection.""" + qdrant_store.add_documents([Document(text="temporary")]) + qdrant_store.clear() + query_vec = qdrant_store.embedder.embed_query("temporary") + results = qdrant_store.search(query_vec, top_k=1) + assert results == [] diff --git a/tests/test_e2e_langfuse_observer.py b/tests/test_e2e_langfuse_observer.py new file mode 100644 index 0000000..f9fb16f --- /dev/null +++ b/tests/test_e2e_langfuse_observer.py @@ -0,0 +1,64 @@ +"""End-to-end tests for LangfuseObserver against a real Langfuse instance. + +``test_langfuse_observer.py`` mocks the langfuse SDK. This file talks to a +real Langfuse backend — either Langfuse Cloud or a self-hosted instance. + +Required env vars (tests skip if missing): + - LANGFUSE_PUBLIC_KEY + - LANGFUSE_SECRET_KEY + - LANGFUSE_HOST (optional; defaults to Langfuse Cloud) + +Run with: + + pytest tests/test_e2e_langfuse_observer.py --run-e2e -v + +Note: this test does NOT attempt to read traces back from Langfuse (that +requires API access and timing). It just verifies the SDK accepts our +event sequence without throwing and that ``flush()`` completes cleanly. +""" + +from __future__ import annotations + +import os + +import pytest + +pytest.importorskip("langfuse", reason="langfuse not installed") + +from selectools import Agent, AgentConfig, tool # noqa: E402 +from selectools.observe import LangfuseObserver # noqa: E402 +from tests.conftest import SharedFakeProvider # noqa: E402 + +pytestmark = pytest.mark.e2e + + +@pytest.fixture(scope="module") +def langfuse_or_skip() -> None: + if not os.environ.get("LANGFUSE_PUBLIC_KEY"): + pytest.skip("LANGFUSE_PUBLIC_KEY not set — skipping Langfuse e2e") + if not os.environ.get("LANGFUSE_SECRET_KEY"): + pytest.skip("LANGFUSE_SECRET_KEY not set — skipping Langfuse e2e") + + +@tool() +def _noop() -> str: + """Return a fixed string.""" + return "noop" + + +class TestLangfuseRealBackend: + def test_agent_run_emits_trace_without_errors(self, langfuse_or_skip: None) -> None: + """A full agent run pushes a real trace to Langfuse and flushes cleanly.""" + observer = LangfuseObserver() + agent = Agent( + tools=[_noop], + provider=SharedFakeProvider(responses=["final answer"]), + config=AgentConfig( + model="fake-model", + observers=[observer], + ), + ) + result = agent.run("hello") + assert "final answer" in result.content + # Force flush — should not raise + observer._langfuse.flush() diff --git a/tests/test_e2e_multimodal.py b/tests/test_e2e_multimodal.py new file mode 100644 index 0000000..6c2ced0 --- /dev/null +++ b/tests/test_e2e_multimodal.py @@ -0,0 +1,140 @@ +"""End-to-end multimodal tests with real vision-capable LLM calls. + +The existing ``test_multimodal.py`` checks that ``ContentPart`` objects are +constructed correctly and that providers' ``_format_messages`` produce the +expected dict shapes. Those tests never actually call a real vision model. + +These tests: + +- Build a tiny base64-encoded PNG in memory (4x4 pixels, no external asset) +- Send it to OpenAI (gpt-4o-mini), Anthropic (claude-haiku-4-5), and Gemini + (gemini-2.5-flash) via ``image_message()`` +- Assert that each provider returns a non-empty response + +This is the only place we prove that the selectools wire format matches +what each provider actually accepts for image inputs. + +Required env vars (tests skip if missing): + - OPENAI_API_KEY + - ANTHROPIC_API_KEY + - GOOGLE_API_KEY or GEMINI_API_KEY + +Run with: + + pytest tests/test_e2e_multimodal.py --run-e2e -v +""" + +from __future__ import annotations + +import os +import struct +import zlib +from pathlib import Path + +import pytest + +from selectools import Agent, AgentConfig, image_message, tool +from selectools.providers.anthropic_provider import AnthropicProvider +from selectools.providers.gemini_provider import GeminiProvider +from selectools.providers.openai_provider import OpenAIProvider + +pytestmark = pytest.mark.e2e + + +@tool() +def _noop() -> str: + """Return a fixed string. Used so Agent can be instantiated.""" + return "noop" + + +def _make_tiny_red_png_bytes() -> bytes: + """Build a 4x4 solid-red PNG entirely in-memory. + + No PIL dependency, no network fetch for image construction. Only the + subsequent LLM call needs the network. + """ + width, height = 4, 4 + # One row: filter byte + RGB bytes per pixel + row = b"\x00" + b"\xff\x00\x00" * width + raw = row * height + + def chunk(ctype: bytes, data: bytes) -> bytes: + return ( + struct.pack(">I", len(data)) + + ctype + + data + + struct.pack(">I", zlib.crc32(ctype + data) & 0xFFFFFFFF) + ) + + sig = b"\x89PNG\r\n\x1a\n" + ihdr = struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0) + idat = zlib.compress(raw) + return sig + chunk(b"IHDR", ihdr) + chunk(b"IDAT", idat) + chunk(b"IEND", b"") + + +@pytest.fixture(scope="module") +def tiny_red_png(tmp_path_factory: pytest.TempPathFactory) -> str: + """Write a 4x4 red PNG to a module-scoped temp file and return its path.""" + tmp_dir = tmp_path_factory.mktemp("mm") + png_path = tmp_dir / "tiny_red.png" + png_path.write_bytes(_make_tiny_red_png_bytes()) + return str(png_path) + + +class TestMultimodalRealProviders: + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY"), + reason="OPENAI_API_KEY not set", + ) + def test_openai_gpt4o_mini_accepts_image(self, tiny_red_png: str) -> None: + """Real OpenAI call with an image attachment returns a non-empty response.""" + agent = Agent( + tools=[_noop], + provider=OpenAIProvider(), + config=AgentConfig(model="gpt-4o-mini", max_tokens=50), + ) + msg = image_message( + tiny_red_png, + prompt="What primary color is this tiny image? Reply in one word.", + ) + result = agent.run([msg]) + assert result.content, "Empty response from OpenAI" + assert result.usage.total_tokens > 0 + + @pytest.mark.skipif( + not os.environ.get("ANTHROPIC_API_KEY"), + reason="ANTHROPIC_API_KEY not set", + ) + def test_anthropic_claude_accepts_image(self, tiny_red_png: str) -> None: + """Real Anthropic call with an image attachment returns a non-empty response.""" + agent = Agent( + tools=[_noop], + provider=AnthropicProvider(), + config=AgentConfig(model="claude-haiku-4-5", max_tokens=50), + ) + msg = image_message( + tiny_red_png, + prompt="What primary color is this tiny image? Reply in one word.", + ) + result = agent.run([msg]) + assert result.content, "Empty response from Anthropic" + assert result.usage.total_tokens > 0 + + @pytest.mark.skipif( + not (os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")), + reason="GOOGLE_API_KEY / GEMINI_API_KEY not set", + ) + def test_gemini_flash_accepts_image(self, tiny_red_png: str) -> None: + """Real Gemini call with an image attachment returns a non-empty response.""" + agent = Agent( + tools=[_noop], + provider=GeminiProvider(), + config=AgentConfig(model="gemini-2.5-flash", max_tokens=50), + ) + msg = image_message( + tiny_red_png, + prompt="What primary color is this tiny image? Reply in one word.", + ) + result = agent.run([msg]) + assert result.content, "Empty response from Gemini" + assert result.usage.total_tokens > 0 diff --git a/tests/test_e2e_otel_observer.py b/tests/test_e2e_otel_observer.py new file mode 100644 index 0000000..64f376a --- /dev/null +++ b/tests/test_e2e_otel_observer.py @@ -0,0 +1,123 @@ +"""End-to-end tests for OTelObserver against the real OpenTelemetry SDK. + +``test_otel_observer.py`` mocks the ``opentelemetry`` module. These tests +use the real ``opentelemetry-sdk`` with an in-memory span exporter so we +can assert that: + +- A TracerProvider actually receives span start/end events +- Span names follow the GenAI semantic conventions +- Run -> LLM -> Tool span hierarchy is correct +- Attributes like ``gen_ai.request.model`` and token counts are set + +Run with: + + pytest tests/test_e2e_otel_observer.py --run-e2e -v +""" + +from __future__ import annotations + +import pytest + +pytest.importorskip("opentelemetry", reason="opentelemetry-api not installed") +pytest.importorskip("opentelemetry.sdk", reason="opentelemetry-sdk not installed") + +from opentelemetry import trace # noqa: E402 +from opentelemetry.sdk.trace import TracerProvider # noqa: E402 +from opentelemetry.sdk.trace.export import SimpleSpanProcessor # noqa: E402 +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( # noqa: E402 + InMemorySpanExporter, +) + +from selectools import Agent, AgentConfig, tool # noqa: E402 +from selectools.observe import OTelObserver # noqa: E402 +from tests.conftest import SharedFakeProvider # noqa: E402 + +pytestmark = pytest.mark.e2e + + +# OpenTelemetry only allows ONE global TracerProvider per process. Set it up +# exactly once at module import time, reuse the same exporter across tests, +# and clear its span buffer in the fixture so tests stay isolated. +_EXPORTER = InMemorySpanExporter() +_PROVIDER = TracerProvider() +_PROVIDER.add_span_processor(SimpleSpanProcessor(_EXPORTER)) +trace.set_tracer_provider(_PROVIDER) + + +@pytest.fixture +def otel_exporter() -> InMemorySpanExporter: + """Return the shared in-memory exporter, cleared for this test.""" + _EXPORTER.clear() + return _EXPORTER + + +@tool() +def _noop() -> str: + """Return a fixed string. Used so Agent can be instantiated.""" + return "noop" + + +class TestOTelRealSDK: + def test_agent_run_emits_root_span(self, otel_exporter: InMemorySpanExporter) -> None: + """A single agent run produces at least one finished span.""" + agent = Agent( + tools=[_noop], + provider=SharedFakeProvider(responses=["final answer"]), + config=AgentConfig( + model="fake-model", + observers=[OTelObserver(tracer_name="selectools-e2e")], + ), + ) + result = agent.run("hello") + assert "final answer" in result.content + + spans = otel_exporter.get_finished_spans() + assert len(spans) >= 1, "Expected at least one span from agent.run" + + # There should be a root agent.run span + names = [s.name for s in spans] + assert any( + "run" in n.lower() or "agent" in n.lower() for n in names + ), f"No agent/run span found; got: {names}" + + def test_run_span_has_gen_ai_system_attribute( + self, otel_exporter: InMemorySpanExporter + ) -> None: + """The root span carries the GenAI semantic-convention system attr.""" + agent = Agent( + tools=[_noop], + provider=SharedFakeProvider(responses=["hi"]), + config=AgentConfig( + model="fake-model", + observers=[OTelObserver(tracer_name="selectools-e2e")], + ), + ) + agent.run("ping") + + spans = otel_exporter.get_finished_spans() + # At least one span should carry the gen_ai.system attribute + saw_gen_ai_system = False + for span in spans: + attrs = dict(span.attributes or {}) + if attrs.get("gen_ai.system") == "selectools": + saw_gen_ai_system = True + break + assert saw_gen_ai_system, "Expected at least one span with gen_ai.system='selectools'" + + def test_multiple_runs_produce_distinct_spans( + self, otel_exporter: InMemorySpanExporter + ) -> None: + """Each agent.run() creates its own set of spans.""" + agent = Agent( + tools=[_noop], + provider=SharedFakeProvider(responses=["a", "b", "c"]), + config=AgentConfig( + model="fake-model", + observers=[OTelObserver(tracer_name="selectools-e2e")], + ), + ) + agent.run("first") + count_after_first = len(otel_exporter.get_finished_spans()) + agent.run("second") + count_after_second = len(otel_exporter.get_finished_spans()) + assert count_after_second > count_after_first, "Second run did not emit additional spans" diff --git a/tests/tools/test_e2e_code_tools.py b/tests/tools/test_e2e_code_tools.py new file mode 100644 index 0000000..7d5786c --- /dev/null +++ b/tests/tools/test_e2e_code_tools.py @@ -0,0 +1,77 @@ +"""End-to-end tests for code execution tools with real subprocesses. + +Unlike ``test_code_tools.py`` (which mocks ``subprocess.run``), these tests +actually spawn ``python3`` and ``sh`` processes and assert on their real +output. They're the only place we verify that: + +- The subprocess invocation string is well-formed +- Timeout handling works against a real blocking process +- The shell metacharacter blocklist matches what a real shell would execute +- Output truncation kicks in at the expected byte count + +Run with: + + pytest tests/tools/test_e2e_code_tools.py --run-e2e -v +""" + +from __future__ import annotations + +import pytest + +from selectools.toolbox import code_tools + +pytestmark = pytest.mark.e2e + + +class TestExecutePythonReal: + def test_hello_world_roundtrip(self) -> None: + """Real python3 subprocess runs and stdout is captured.""" + result = code_tools.execute_python.function("print('hello e2e')") + assert "hello e2e" in result + + def test_exception_shown_in_stderr_section(self) -> None: + """Real python3 traceback lands in the stderr section of the output.""" + result = code_tools.execute_python.function("raise ValueError('boom')") + assert "ValueError" in result + assert "boom" in result + assert "exit code" in result.lower() + + def test_real_timeout_expiry(self) -> None: + """A real long-running process is killed after the timeout.""" + result = code_tools.execute_python.function("import time; time.sleep(10)", timeout=1) + assert "timed out" in result.lower() + + def test_stdout_stderr_both_captured(self) -> None: + """stdout and stderr are both captured from the real subprocess.""" + code = ( + "import sys\n" "sys.stdout.write('on stdout\\n')\n" "sys.stderr.write('on stderr\\n')\n" + ) + result = code_tools.execute_python.function(code) + assert "on stdout" in result + assert "on stderr" in result + + def test_output_truncation_on_large_output(self) -> None: + """Very large stdout is truncated (real process emits > 10KB).""" + code = "print('x' * 20000)" # 20KB of 'x' + result = code_tools.execute_python.function(code) + # Real output was 20KB; truncated to 10KB with a notice + assert "truncated" in result.lower() + + +class TestExecuteShellReal: + def test_echo_real_shell(self) -> None: + """A real shell executes echo and returns stdout.""" + result = code_tools.execute_shell.function("echo hello-e2e") + assert "hello-e2e" in result + + def test_nonexistent_command_returns_error(self) -> None: + """A real shell rejects a nonexistent binary with non-zero exit.""" + result = code_tools.execute_shell.function("this-binary-does-not-exist-42") + # Should include some indication of failure (stderr or exit code) + assert "exit code" in result.lower() or "not found" in result.lower() + + def test_pipe_metacharacter_rejected_before_execution(self) -> None: + """Shell metacharacters are rejected before subprocess is called.""" + result = code_tools.execute_shell.function("echo hi | cat") + # Blocklist rejects the command; should not contain the piped output + assert "error" in result.lower() or "reject" in result.lower() diff --git a/tests/tools/test_e2e_db_tools.py b/tests/tools/test_e2e_db_tools.py new file mode 100644 index 0000000..1a025d4 --- /dev/null +++ b/tests/tools/test_e2e_db_tools.py @@ -0,0 +1,110 @@ +"""End-to-end tests for the database tools against real SQLite. + +The existing ``test_db_tools.py`` relies on mocked ``psycopg2`` and limited +SQLite coverage. These tests create real on-disk SQLite databases with real +schemas and verify that: + +- ``query_sqlite`` reads actual rows from a real file +- The ``PRAGMA query_only = ON`` enforcement rejects writes +- ``max_rows`` genuinely limits the returned result set +- The table formatting matches what the LLM will see + +``query_postgres`` lives in test_e2e_pgvector_store.py's tier because it +requires a running Postgres instance with credentials. + +Run with: + + pytest tests/tools/test_e2e_db_tools.py --run-e2e -v +""" + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +import pytest + +from selectools.toolbox import db_tools + +pytestmark = pytest.mark.e2e + + +@pytest.fixture +def real_sqlite_db(tmp_path: Path) -> Path: + """Create a real SQLite database on disk with sample data.""" + db_path = tmp_path / "e2e.db" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, age INTEGER)") + conn.executemany( + "INSERT INTO users (id, name, age) VALUES (?, ?, ?)", + [ + (1, "alice", 30), + (2, "bob", 25), + (3, "carol", 40), + (4, "dave", 35), + (5, "eve", 28), + ], + ) + conn.commit() + conn.close() + return db_path + + +class TestQuerySqliteReal: + def test_select_returns_rows(self, real_sqlite_db: Path) -> None: + """A real SELECT returns all rows formatted as a text table.""" + result = db_tools.query_sqlite.function( + str(real_sqlite_db), "SELECT id, name, age FROM users ORDER BY id" + ) + for name in ("alice", "bob", "carol", "dave", "eve"): + assert name in result + # Column headers appear in output + assert "id" in result + assert "name" in result + + def test_select_where_clause(self, real_sqlite_db: Path) -> None: + """WHERE clauses filter rows as expected.""" + result = db_tools.query_sqlite.function( + str(real_sqlite_db), "SELECT name FROM users WHERE age > 30" + ) + assert "carol" in result + assert "dave" in result + assert "alice" not in result + assert "bob" not in result + + def test_count_query(self, real_sqlite_db: Path) -> None: + """Aggregate queries return single-row results.""" + result = db_tools.query_sqlite.function( + str(real_sqlite_db), "SELECT COUNT(*) AS total FROM users" + ) + assert "5" in result + + def test_insert_rejected_readonly(self, real_sqlite_db: Path) -> None: + """INSERT is rejected by the read-only validator.""" + result = db_tools.query_sqlite.function( + str(real_sqlite_db), "INSERT INTO users (id, name) VALUES (99, 'mallory')" + ) + assert "error" in result.lower() or "read-only" in result.lower() + + # Verify the row was NOT inserted (sanity-check the enforcement worked) + conn = sqlite3.connect(str(real_sqlite_db)) + (count,) = conn.execute("SELECT COUNT(*) FROM users WHERE name = 'mallory'").fetchone() + conn.close() + assert count == 0 + + def test_update_rejected_readonly(self, real_sqlite_db: Path) -> None: + """UPDATE is rejected by the read-only validator.""" + result = db_tools.query_sqlite.function( + str(real_sqlite_db), "UPDATE users SET age = 999 WHERE id = 1" + ) + assert "error" in result.lower() or "read-only" in result.lower() + + def test_max_rows_truncates(self, real_sqlite_db: Path) -> None: + """max_rows caps the result set.""" + result = db_tools.query_sqlite.function( + str(real_sqlite_db), "SELECT name FROM users ORDER BY id", max_rows=2 + ) + assert "alice" in result + assert "bob" in result + # Rows 3-5 should NOT be present + assert "carol" not in result diff --git a/tests/tools/test_e2e_github_tools.py b/tests/tools/test_e2e_github_tools.py new file mode 100644 index 0000000..b6dbbab --- /dev/null +++ b/tests/tools/test_e2e_github_tools.py @@ -0,0 +1,72 @@ +"""End-to-end tests for GitHub tools against the real GitHub REST API. + +``test_github_tools.py`` mocks all HTTP. These tests make real unauthenticated +calls to the public GitHub API. Unauth calls are limited to 60/hour per IP; +each test makes exactly ONE call so the full file uses 3 calls. + +If ``GITHUB_TOKEN`` is set the auth header is included and the limit jumps +to 5000/hour. + +Run with: + + pytest tests/tools/test_e2e_github_tools.py --run-e2e -v +""" + +from __future__ import annotations + +import urllib.request + +import pytest + +from selectools.toolbox import github_tools + +pytestmark = pytest.mark.e2e + + +def _have_internet() -> bool: + try: + urllib.request.urlopen("https://api.github.com", timeout=5) + return True + except Exception: + return False + + +@pytest.fixture(scope="module") +def internet_or_skip() -> None: + if not _have_internet(): + pytest.skip("Network unavailable or api.github.com unreachable") + + +class TestGithubToolsReal: + def test_search_repos_real(self, internet_or_skip: None) -> None: + """Real github search for a popular library returns results.""" + result = github_tools.github_search_repos.function( + "selectools language:python", max_results=3 + ) + # Should not be a pure error; should include at least one known name + assert result + assert "error" not in result.lower() or "selectools" in result.lower() + + def test_get_file_real(self, internet_or_skip: None) -> None: + """Real get_file of a stable public file returns its contents.""" + # python/cpython has a very stable README + result = github_tools.github_get_file.function( + repo="python/cpython", path="README.rst", ref="main" + ) + assert result + # cpython's README mentions Python + assert "python" in result.lower() or "error" in result.lower() + + def test_list_issues_real(self, internet_or_skip: None) -> None: + """Real list_issues against a well-known active repo.""" + result = github_tools.github_list_issues.function( + repo="python/cpython", state="open", max_results=3 + ) + assert result + # Either real issues or a documented error + assert ( + "#" in result + or "issue" in result.lower() + or "error" in result.lower() + or "rate" in result.lower() + ) diff --git a/tests/tools/test_e2e_search_tools.py b/tests/tools/test_e2e_search_tools.py new file mode 100644 index 0000000..3b885da --- /dev/null +++ b/tests/tools/test_e2e_search_tools.py @@ -0,0 +1,59 @@ +"""End-to-end tests for web_search and scrape_url against real endpoints. + +``test_search_tools.py`` mocks all HTTP. These tests hit real servers: + +- ``web_search`` → DuckDuckGo HTML search (no API key) +- ``scrape_url`` → https://example.com (stable for decades) + +Both are rate-limited and kept minimal (1-2 calls each) so they don't +hammer anyone. If the network is unavailable the tests skip. + +Run with: + + pytest tests/tools/test_e2e_search_tools.py --run-e2e -v +""" + +from __future__ import annotations + +import urllib.request + +import pytest + +from selectools.toolbox import search_tools + +pytestmark = pytest.mark.e2e + + +def _have_internet() -> bool: + try: + urllib.request.urlopen("https://example.com", timeout=5) + return True + except Exception: + return False + + +@pytest.fixture(scope="module") +def internet_or_skip() -> None: + if not _have_internet(): + pytest.skip("Network unavailable") + + +class TestWebSearchReal: + def test_duckduckgo_returns_results(self, internet_or_skip: None) -> None: + """Real DuckDuckGo HTML search returns non-empty output.""" + result = search_tools.web_search.function("python programming language") + # Should not be an error string, and should mention something relevant + assert result + assert "error" not in result.lower() or "python" in result.lower() + # Should be plaintext (not raw HTML) + assert " None: + """Real scrape of example.com returns the canonical page text.""" + result = search_tools.scrape_url.function("https://example.com") + assert "Example Domain" in result + # HTML tags should be stripped + assert " Date: Wed, 8 Apr 2026 07:47:32 -0300 Subject: [PATCH 04/17] fix(rag): migrate QdrantVectorStore from removed search() to query_points() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qdrant-client >=1.13 removed QdrantClient.search() in favour of query_points(). The new API differs in two ways: 1. The kwarg is `query=` instead of `query_vector=` 2. The return value is a `QueryResponse` object whose `.points` attribute holds the list of `ScoredPoint`s, not a flat list The mock-based unit tests in tests/rag/test_qdrant_store.py never caught this regression because they mocked QdrantClient — the mock had a `search` attribute that didn't exist on the real client. The new e2e test in tests/rag/test_e2e_qdrant_store.py exposed the bug on the first real call against Qdrant 1.17.1. Also fix a second consistency bug exposed by the e2e test: after clear() drops the collection, query_points() raises 404 instead of returning empty results. Caught the 404 in search() and return [] to match FAISSVectorStore semantics (search-after-clear → []). Mock unit tests updated to mirror the new API: - s/client.search/client.query_points/ - Mock return values now wrap a points list in a MagicMock with a .points attribute - Assertions that checked call_kwargs["query_vector"] now check call_kwargs["query"] After fix: 35 mock tests + 2 e2e tests against real Qdrant 1.17.1 all pass. Full e2e suite: 40 passed, 3 skipped (Azure + Langfuse, no creds). Full non-e2e suite: 4961 passed, 0 regressions. --- src/selectools/rag/stores/qdrant.py | 29 +++++++++++++----- tests/rag/test_qdrant_store.py | 46 ++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/src/selectools/rag/stores/qdrant.py b/src/selectools/rag/stores/qdrant.py index fbdb8af..9b1cf57 100644 --- a/src/selectools/rag/stores/qdrant.py +++ b/src/selectools/rag/stores/qdrant.py @@ -270,16 +270,29 @@ def search( # Build Qdrant filter from simple dict or pass-through native filter qdrant_filter = self._build_filter(filter) - results = self.client.search( - collection_name=self.collection_name, - query_vector=query_embedding, - limit=top_k, - query_filter=qdrant_filter, - with_payload=True, - ) + # qdrant-client >=1.13 removed `client.search()` in favour of + # `client.query_points()`. The new API takes `query=` instead of + # `query_vector=` and returns a `QueryResponse` whose `.points` + # attribute holds the list of `ScoredPoint`s. + try: + response = self.client.query_points( + collection_name=self.collection_name, + query=query_embedding, + limit=top_k, + query_filter=qdrant_filter, + with_payload=True, + ) + except Exception as exc: + # Be consistent with the other vector stores: searching an + # empty/uninitialised store returns an empty list rather than + # raising. Qdrant 404s when the collection has been dropped + # by ``clear()`` or has never been created. + if "404" in str(exc) or "not found" in str(exc).lower(): + return [] + raise search_results: List[SearchResult] = [] - for scored_point in results: + for scored_point in response.points: payload = scored_point.payload or {} # Extract document text and metadata from namespaced keys. diff --git a/tests/rag/test_qdrant_store.py b/tests/rag/test_qdrant_store.py index dd90cfd..f9a57f0 100644 --- a/tests/rag/test_qdrant_store.py +++ b/tests/rag/test_qdrant_store.py @@ -383,7 +383,9 @@ def test_search_returns_results( "_st_meta": {"source": "test.txt"}, } scored_point.score = 0.95 - qdrant_store.client.search.return_value = [scored_point] + _resp = MagicMock() + _resp.points = [scored_point] + qdrant_store.client.query_points.return_value = _resp query_emb = [0.1] * 128 results = qdrant_store.search(query_emb, top_k=5) @@ -396,21 +398,25 @@ def test_search_returns_results( def test_search_passes_correct_parameters(self, qdrant_store: Any) -> None: """Search forwards collection name, vector, limit, and payload flag.""" - qdrant_store.client.search.return_value = [] + _resp = MagicMock() + _resp.points = [] + qdrant_store.client.query_points.return_value = _resp query_emb = [0.5] * 128 qdrant_store.search(query_emb, top_k=10) - qdrant_store.client.search.assert_called_once() - call_kwargs = qdrant_store.client.search.call_args[1] + qdrant_store.client.query_points.assert_called_once() + call_kwargs = qdrant_store.client.query_points.call_args[1] assert call_kwargs["collection_name"] == "test_collection" - assert call_kwargs["query_vector"] == query_emb + assert call_kwargs["query"] == query_emb assert call_kwargs["limit"] == 10 assert call_kwargs["with_payload"] is True def test_search_empty_results(self, qdrant_store: Any) -> None: """Search returns empty list when no matches found.""" - qdrant_store.client.search.return_value = [] + _resp = MagicMock() + _resp.points = [] + qdrant_store.client.query_points.return_value = _resp results = qdrant_store.search([0.1] * 128) assert results == [] @@ -423,7 +429,9 @@ def test_search_uses_namespaced_payload(self, qdrant_store: Any) -> None: "_st_meta": {"author": "Alice"}, } scored_point.score = 0.8 - qdrant_store.client.search.return_value = [scored_point] + _resp = MagicMock() + _resp.points = [scored_point] + qdrant_store.client.query_points.return_value = _resp results = qdrant_store.search([0.1] * 128) @@ -441,7 +449,9 @@ def test_search_legacy_payload_fallback(self, qdrant_store: Any) -> None: "author": "Bob", } scored_point.score = 0.7 - qdrant_store.client.search.return_value = [scored_point] + _resp = MagicMock() + _resp.points = [scored_point] + qdrant_store.client.query_points.return_value = _resp results = qdrant_store.search([0.1] * 128) @@ -454,7 +464,9 @@ def test_search_handles_none_payload(self, qdrant_store: Any) -> None: scored_point = MagicMock() scored_point.payload = None scored_point.score = 0.5 - qdrant_store.client.search.return_value = [scored_point] + _resp = MagicMock() + _resp.points = [scored_point] + qdrant_store.client.query_points.return_value = _resp results = qdrant_store.search([0.1] * 128) @@ -466,7 +478,9 @@ def test_search_with_simple_filter( self, qdrant_store: Any, mock_qdrant_client_module: MagicMock ) -> None: """Simple dict filters are converted to Qdrant Filter objects.""" - qdrant_store.client.search.return_value = [] + _resp = MagicMock() + _resp.points = [] + qdrant_store.client.query_points.return_value = _resp qdrant_store.search( [0.1] * 128, @@ -474,17 +488,19 @@ def test_search_with_simple_filter( filter={"category": "ai"}, ) - call_kwargs = qdrant_store.client.search.call_args[1] + call_kwargs = qdrant_store.client.query_points.call_args[1] # Filter should have been converted (not None) assert call_kwargs["query_filter"] is not None def test_search_with_no_filter(self, qdrant_store: Any) -> None: """Search with no filter passes None as query_filter.""" - qdrant_store.client.search.return_value = [] + _resp = MagicMock() + _resp.points = [] + qdrant_store.client.query_points.return_value = _resp qdrant_store.search([0.1] * 128, top_k=5) - call_kwargs = qdrant_store.client.search.call_args[1] + call_kwargs = qdrant_store.client.query_points.call_args[1] assert call_kwargs["query_filter"] is None def test_search_multiple_results_ordering(self, qdrant_store: Any) -> None: @@ -502,7 +518,9 @@ def test_search_multiple_results_ordering(self, qdrant_store: Any) -> None: pt.score = score points.append(pt) - qdrant_store.client.search.return_value = points + _resp = MagicMock() + _resp.points = points + qdrant_store.client.query_points.return_value = _resp results = qdrant_store.search([0.1] * 128, top_k=3) From b047c1aa02d2417a6be3874ee0f2bd85f6d8d939 Mon Sep 17 00:00:00 2001 From: John Niche Date: Wed, 8 Apr 2026 10:23:18 -0300 Subject: [PATCH 05/17] fix(core): three shipping-blocker bugs surfaced by full-release simulations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four end-to-end integration scenarios in tests/test_e2e_v0_21_0_simulations.py that wire multiple v0.21.0 features together with real LLM calls: 1. FAISS + real OpenAI embeddings + RAGTool + real OpenAI agent + OTel 2. Multimodal image + execute_python tool + real Gemini agent + OTel 3. query_sqlite + execute_python + real Anthropic Claude agent 4. Qdrant + real OpenAI embeddings + RAGTool + real OpenAI agent + OTel Running the simulations surfaced three pre-existing shipping blockers that the entire existing test suite (188 mock-based v0.21.0 tests + 4 "workflow" tests that never actually call agent.run) had silently hidden: Bug 6 — @tool() on class methods fundamentally broken ---------------------------------------------------- @tool() applied to a method (def f(self, query: str)) produced a class-level Tool whose function was the unbound method. When the agent executor called tool.function(**llm_kwargs) Python raised TypeError: missing 1 required positional argument: 'self', so the LLM got back a "Tool Execution Failed" string and gave up. This broke the canonical RAG pattern documented everywhere in selectools: rag_tool = RAGTool(vector_store=store) agent = Agent(tools=[rag_tool.search_knowledge_base], provider=...) RAGTool, SemanticSearchTool, and HybridSearchTool were all affected. The existing tests/rag/test_rag_workflow.py tests that appeared to exercise this path only asserted isinstance(agent, Agent) and never actually ran the agent, so nobody noticed. Fix: add a _BoundMethodTool descriptor to selectools/tools/decorators.py that detects method-decorated tools (first param is self) and returns a per-instance Tool on attribute access. The descriptor wraps the original function in functools.partial(fn, instance) so the agent executor can invoke it with only the LLM's kwargs. Class-level access falls through to a template Tool for introspection (.name, .description, etc.). Callers that previously worked around the bug by manually passing the instance as the first argument to .function (test_rag_workflow.py, test_hybrid_search.py, test_rag_regression_phase3.py) are updated to the correct API. Bug 7 — Gemini provider silently drops images from content_parts --------------------------------------------------------------- GeminiProvider._format_messages only handled the legacy message.image_base64 attribute. The v0.21.0 image_message() helper creates a Message with content_parts=[ContentPart(type="image_base64", ...)] and explicitly sets message.image_base64 = None, so Gemini received only the text prompt and replied "I cannot see images". Fix: add a content_parts loop to GeminiProvider that converts each ContentPart to types.Part(inline_data=...) or file_data=... . Bug 8 — Anthropic provider has the same bug ------------------------------------------- Same pattern in AnthropicProvider. Claude replied "I don't see any image attached". Fix: content_parts loop producing the Anthropic native {type: image, source: {type: base64, ...}} shape. OpenAI already had the right handling in providers/_openai_compat.py, so only Gemini and Anthropic needed the fix. Also: tighten tests/test_e2e_multimodal.py assertions so the provider can never silently drop an image again. Previously the tests only asserted result.content was non-empty, which passed on "I cannot see images" — a classic false-green. Now each provider must actually say "red" in its reply to a 4x4 red PNG. Finally: move the shared otel_exporter fixture into tests/conftest.py so every e2e file that needs OTel span capture uses the same singleton TracerProvider. OpenTelemetry only allows one global TracerProvider per process, and having each file install its own caused later-loaded files to silently see empty span lists when run in the same suite. Verification: - 47 e2e tests collected → 44 passed, 3 skipped (Azure OpenAI x2 and Langfuse x1 skip cleanly when no credentials are set) - Full non-e2e suite: 4961 passed, 3 skipped, 0 regressions - The 4 full-release simulations in test_e2e_v0_21_0_simulations.py now verify every v0.21.0 subsystem works together with real LLM calls --- .../providers/anthropic_provider.py | 57 ++- src/selectools/providers/gemini_provider.py | 47 ++- src/selectools/tools/decorators.py | 123 +++++- tests/conftest.py | 50 +++ tests/rag/test_hybrid_search.py | 10 +- tests/rag/test_rag_regression_phase3.py | 6 +- tests/rag/test_rag_workflow.py | 10 +- tests/test_e2e_multimodal.py | 12 + tests/test_e2e_otel_observer.py | 23 +- tests/test_e2e_v0_21_0_simulations.py | 371 ++++++++++++++++++ 10 files changed, 635 insertions(+), 74 deletions(-) create mode 100644 tests/test_e2e_v0_21_0_simulations.py diff --git a/src/selectools/providers/anthropic_provider.py b/src/selectools/providers/anthropic_provider.py index 42d9877..d637996 100644 --- a/src/selectools/providers/anthropic_provider.py +++ b/src/selectools/providers/anthropic_provider.py @@ -275,20 +275,49 @@ def _format_messages(self, messages: List[Message]) -> List[dict]: } ) else: - # User or Assistant - if message.image_base64: - content.append( - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/png", - "data": message.image_base64, - }, - } - ) - if message.content: - content.append({"type": "text", "text": message.content}) + # User or Assistant. + # Prefer the v0.21.0 multimodal ``content_parts`` path: when + # the message was built via ``image_message()`` the image + # lives in a ContentPart (not in the legacy + # ``message.image_base64`` attribute, which is explicitly + # None for multimodal messages). Fall back to the legacy + # path for pre-0.21 callers. + if getattr(message, "content_parts", None): + for cp in message.content_parts: # type: ignore[union-attr] + if cp.type == "text" and cp.text: + content.append({"type": "text", "text": cp.text}) + elif cp.type == "image_url" and cp.image_url: + content.append( + { + "type": "image", + "source": {"type": "url", "url": cp.image_url}, + } + ) + elif cp.type == "image_base64" and cp.image_base64: + content.append( + { + "type": "image", + "source": { + "type": "base64", + "media_type": cp.media_type or "image/png", + "data": cp.image_base64, + }, + } + ) + else: + if message.image_base64: + content.append( + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": message.image_base64, + }, + } + ) + if message.content: + content.append({"type": "text", "text": message.content}) # Check for outgoing tool calls (from Assistant) if message.tool_calls: diff --git a/src/selectools/providers/gemini_provider.py b/src/selectools/providers/gemini_provider.py index eb92583..e9f9cbd 100644 --- a/src/selectools/providers/gemini_provider.py +++ b/src/selectools/providers/gemini_provider.py @@ -335,17 +335,46 @@ def _format_contents(self, system_prompt: str, messages: List[Message]) -> List: elif role == Role.USER.value: role = "user" - if message.content: - parts.append(types.Part(text=message.content)) - if message.image_base64: - parts.append( - types.Part( - inline_data=types.Blob( - mime_type="image/png", - data=base64.b64decode(message.image_base64), + # Prefer the v0.21.0 multimodal ``content_parts`` path: when + # the message was built via ``image_message()`` the image + # lives in a ContentPart (not in the legacy + # ``message.image_base64`` attribute, which is explicitly + # None for multimodal messages). Fall back to the legacy + # path for pre-0.21 callers. + if getattr(message, "content_parts", None): + for cp in message.content_parts: # type: ignore[union-attr] + if cp.type == "text" and cp.text: + parts.append(types.Part(text=cp.text)) + elif cp.type == "image_url" and cp.image_url: + parts.append( + types.Part( + file_data=types.FileData( + file_uri=cp.image_url, + mime_type=cp.media_type or "image/png", + ) + ) + ) + elif cp.type == "image_base64" and cp.image_base64: + parts.append( + types.Part( + inline_data=types.Blob( + mime_type=cp.media_type or "image/png", + data=base64.b64decode(cp.image_base64), + ) + ) + ) + else: + if message.content: + parts.append(types.Part(text=message.content)) + if message.image_base64: + parts.append( + types.Part( + inline_data=types.Blob( + mime_type="image/png", + data=base64.b64decode(message.image_base64), + ) ) ) - ) elif role == Role.SYSTEM.value: # Gemini handles system instructions via config, not messages. diff --git a/src/selectools/tools/decorators.py b/src/selectools/tools/decorators.py index c258998..c788f79 100644 --- a/src/selectools/tools/decorators.py +++ b/src/selectools/tools/decorators.py @@ -4,6 +4,7 @@ from __future__ import annotations +import functools import inspect import sys from typing import Any, Callable, Dict, List, Optional, Union, get_args, get_origin, get_type_hints @@ -112,6 +113,80 @@ def _infer_parameters_from_callable( return parameters +def _build_tool_from_fn(func: Callable[..., Any], tool_kwargs: Dict[str, Any]) -> Tool: + """Build a Tool instance from a callable and the kwargs ``@tool()`` received. + + Extracted as a helper so it can be reused both for top-level function + tools and for per-instance bound method tools (see ``_BoundMethodTool``). + """ + tool_name = tool_kwargs.get("name") or func.__name__ + tool_description = tool_kwargs.get("description") or inspect.getdoc(func) or f"Tool {tool_name}" + parameters = _infer_parameters_from_callable( + func, + tool_kwargs.get("param_metadata"), + tool_kwargs.get("injected_kwargs"), + ) + return Tool( + name=tool_name, + description=tool_description, + parameters=parameters, + function=func, + injected_kwargs=tool_kwargs.get("injected_kwargs"), + config_injector=tool_kwargs.get("config_injector"), + streaming=tool_kwargs.get("streaming", False), + screen_output=tool_kwargs.get("screen_output", False), + terminal=tool_kwargs.get("terminal", False), + requires_approval=tool_kwargs.get("requires_approval", False), + cacheable=tool_kwargs.get("cacheable", False), + cache_ttl=tool_kwargs.get("cache_ttl", 300), + ) + + +class _BoundMethodTool: + """Descriptor that binds a ``@tool``-decorated method to its instance. + + Applying ``@tool()`` to a regular function returns a ``Tool`` whose + ``function`` attribute is the raw callable — the agent executor calls + ``tool.function(**llm_args)`` and everything works. + + Applying ``@tool()`` to a method (``def f(self, ...)``) is trickier: + the LLM does not know about ``self``, so ``function(**llm_args)`` would + call the method without its receiver and raise + ``TypeError: missing 1 required positional argument: 'self'``. + + This descriptor solves it by returning a **per-instance** ``Tool`` from + ``__get__``: the Tool's ``function`` is ``functools.partial(original_fn, + instance)``, so the agent executor can invoke it with only the LLM's + kwargs and the method still receives its receiver. + + Class-level access (``RAGTool.search_knowledge_base``) returns the + descriptor itself, which proxies attribute lookups to a template ``Tool`` + so introspection (``.name``, ``.description``, ``.parameters``) keeps + working. + """ + + def __init__(self, original_fn: Callable[..., Any], tool_kwargs: Dict[str, Any]) -> None: + self._original_fn = original_fn + self._tool_kwargs = tool_kwargs + # Template Tool used for class-level introspection. ``self`` is + # already skipped by ``_infer_parameters_from_callable`` so the + # parameters field is correct for LLM schema generation. + self._template = _build_tool_from_fn(original_fn, tool_kwargs) + + def __getattr__(self, name: str) -> Any: + # Forward attribute lookups to the template Tool so that + # ``MyClass.my_method.name`` / ``.description`` / ``.parameters`` + # still work at the class level. + return getattr(self._template, name) + + def __get__(self, instance: Any, owner: Optional[type] = None) -> Any: + if instance is None: + return self + bound_fn = functools.partial(self._original_fn, instance) + functools.update_wrapper(bound_fn, self._original_fn) + return _build_tool_from_fn(bound_fn, self._tool_kwargs) + + @stable def tool( *, @@ -126,7 +201,7 @@ def tool( requires_approval: bool = False, cacheable: bool = False, cache_ttl: int = 300, -) -> Callable[[Callable[..., Any]], Tool]: +) -> Callable[[Callable[..., Any]], Any]: """ Decorator to convert a function into a Tool. @@ -163,26 +238,32 @@ def tool( >>> print(tool_instance.name) 'add' """ + tool_kwargs: Dict[str, Any] = { + "name": name, + "description": description, + "param_metadata": param_metadata, + "injected_kwargs": injected_kwargs, + "config_injector": config_injector, + "streaming": streaming, + "screen_output": screen_output, + "terminal": terminal, + "requires_approval": requires_approval, + "cacheable": cacheable, + "cache_ttl": cache_ttl, + } - def decorator(func: Callable[..., Any]) -> Tool: - tool_name = name or func.__name__ - tool_description = description or inspect.getdoc(func) or f"Tool {tool_name}" - parameters = _infer_parameters_from_callable(func, param_metadata, injected_kwargs) - - tool_instance = Tool( - name=tool_name, - description=tool_description, - parameters=parameters, - function=func, - injected_kwargs=injected_kwargs, - config_injector=config_injector, - streaming=streaming, - screen_output=screen_output, - terminal=terminal, - requires_approval=requires_approval, - cacheable=cacheable, - cache_ttl=cache_ttl, - ) - return tool_instance + def decorator(func: Callable[..., Any]) -> Any: + # Detect method: first parameter is named ``self``. If so, return a + # descriptor that produces a per-instance bound Tool on attribute + # access. Otherwise (regular function) build a plain Tool. + try: + sig_params = list(inspect.signature(func).parameters.values()) + is_method = bool(sig_params and sig_params[0].name == "self") + except (TypeError, ValueError): + is_method = False + + if is_method: + return _BoundMethodTool(func, tool_kwargs) + return _build_tool_from_fn(func, tool_kwargs) return decorator diff --git a/tests/conftest.py b/tests/conftest.py index c1546cb..0da419f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,6 +70,56 @@ def pytest_collection_modifyitems(config: Any, items: List[Any]) -> None: item.add_marker(skip_e2e) +# --------------------------------------------------------------------------- +# Shared OpenTelemetry fixture +# --------------------------------------------------------------------------- +# OpenTelemetry's SDK only allows ONE global TracerProvider per process. If +# two test files each create their own the second one is silently rejected +# and OTelObserver spans flow to whichever provider was installed first — +# causing the "wrong exporter" tests to see an empty span list. +# +# This fixture installs a single InMemorySpanExporter+TracerProvider once +# per session and hands it to every e2e test that needs to assert on OTel +# spans. The per-test fixture clears the exporter so tests stay isolated. + +_otel_exporter_singleton: Any = None + + +@pytest.fixture +def otel_exporter() -> Any: + """Return a shared InMemorySpanExporter, cleared for this test. + + Installs a TracerProvider + SimpleSpanProcessor + InMemorySpanExporter + on first use and reuses them for every subsequent call. Subsequent test + files that also want an OTel exporter must use this fixture rather than + calling ``trace.set_tracer_provider`` themselves. + """ + global _otel_exporter_singleton + if _otel_exporter_singleton is None: + try: + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + except ImportError: + pytest.skip("opentelemetry-sdk not installed") + + _otel_exporter_singleton = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(_otel_exporter_singleton)) + try: + trace.set_tracer_provider(provider) + except Exception: + # Another test file may have installed a provider already. In + # that case this fixture can't guarantee span capture — the + # tests that depend on it should be updated to use only this + # fixture, not their own provider. + pass + + _otel_exporter_singleton.clear() + return _otel_exporter_singleton + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- diff --git a/tests/rag/test_hybrid_search.py b/tests/rag/test_hybrid_search.py index 9c60ee0..3cb0570 100644 --- a/tests/rag/test_hybrid_search.py +++ b/tests/rag/test_hybrid_search.py @@ -379,16 +379,16 @@ def test_tool_is_decorated(self, hybrid_tool: Any) -> None: assert hybrid_tool.search_knowledge_base.name == "search_knowledge_base" def test_tool_search_returns_string(self, hybrid_tool: Any) -> None: - result = hybrid_tool.search_knowledge_base.function(hybrid_tool, "selectools library") + result = hybrid_tool.search_knowledge_base.function("selectools library") assert isinstance(result, str) assert "selectools" in result.lower() def test_tool_search_includes_source(self, hybrid_tool: Any) -> None: - result = hybrid_tool.search_knowledge_base.function(hybrid_tool, "install selectools") + result = hybrid_tool.search_knowledge_base.function("install selectools") assert "install.md" in result def test_tool_search_includes_page(self, hybrid_tool: Any) -> None: - result = hybrid_tool.search_knowledge_base.function(hybrid_tool, "install selectools") + result = hybrid_tool.search_knowledge_base.function("install selectools") assert "page 1" in result def test_tool_search_no_results(self) -> None: @@ -400,7 +400,7 @@ def test_tool_search_no_results(self) -> None: searcher.add_documents([Document(text="Python programming")]) ht = HybridSearchTool(searcher=searcher, score_threshold=999.0) - result = ht.search_knowledge_base.function(ht, "Python") + result = ht.search_knowledge_base.function("Python") assert "No relevant information found" in result def test_tool_structured_search(self, hybrid_tool: Any) -> None: @@ -418,7 +418,7 @@ def test_tool_scores_hidden(self) -> None: searcher.add_documents([Document(text="Python programming")]) ht = HybridSearchTool(searcher=searcher, include_scores=False) - result = ht.search_knowledge_base.function(ht, "Python") + result = ht.search_knowledge_base.function("Python") assert "Relevance:" not in result diff --git a/tests/rag/test_rag_regression_phase3.py b/tests/rag/test_rag_regression_phase3.py index 0e22722..1d2c430 100644 --- a/tests/rag/test_rag_regression_phase3.py +++ b/tests/rag/test_rag_regression_phase3.py @@ -534,8 +534,10 @@ def _make_tool(self, text: str) -> "str": ] tool_obj = SemanticSearchTool(vector_store=mock_store, top_k=1) - # semantic_search is a Tool object; call the underlying function directly. - return tool_obj.semantic_search.function(tool_obj, "query") + # semantic_search is a @tool-decorated method; accessing it on an + # instance returns a Tool whose function has `self` pre-bound via + # the _BoundMethodTool descriptor, so we pass only the LLM kwarg. + return tool_obj.semantic_search.function("query") def test_short_text_no_ellipsis(self): """Text under 200 chars must NOT end with '...' (L2).""" diff --git a/tests/rag/test_rag_workflow.py b/tests/rag/test_rag_workflow.py index 26b3b7e..8e43cb5 100644 --- a/tests/rag/test_rag_workflow.py +++ b/tests/rag/test_rag_workflow.py @@ -407,8 +407,10 @@ def test_rag_tool_basic(self, mock_embedder: Mock) -> None: # Create RAG tool rag_tool = RAGTool(vector_store=vector_store, top_k=2, score_threshold=0.5) - # Search - call the underlying function of the decorated tool (pass self explicitly) - result = rag_tool.search_knowledge_base.function(rag_tool, "programming") + # Search via the Tool's function — @tool() on a method now returns + # a descriptor that binds self on attribute access, so we pass only + # the LLM-visible kwargs. + result = rag_tool.search_knowledge_base.function("programming") assert isinstance(result, str) assert len(result) > 0 @@ -430,9 +432,7 @@ def test_rag_tool_no_results(self, mock_embedder: Mock) -> None: # Create tool with high threshold — orthogonal vectors have similarity ~0 rag_tool = RAGTool(vector_store=vector_store, top_k=1, score_threshold=0.5) - result = rag_tool.search_knowledge_base.function( - rag_tool, "completely unrelated query xyz123" - ) + result = rag_tool.search_knowledge_base.function("completely unrelated query xyz123") assert "No relevant information found" in result diff --git a/tests/test_e2e_multimodal.py b/tests/test_e2e_multimodal.py index 6c2ced0..6b85cc4 100644 --- a/tests/test_e2e_multimodal.py +++ b/tests/test_e2e_multimodal.py @@ -99,6 +99,12 @@ def test_openai_gpt4o_mini_accepts_image(self, tiny_red_png: str) -> None: ) result = agent.run([msg]) assert result.content, "Empty response from OpenAI" + # Critical assertion: prove the image actually reached the model + # (without this the provider could silently drop the image and + # the test would still pass on "I can't see an image" style replies) + assert ( + "red" in result.content.lower() + ), f"OpenAI did not see the red test image. Got: {result.content[:200]}" assert result.usage.total_tokens > 0 @pytest.mark.skipif( @@ -118,6 +124,9 @@ def test_anthropic_claude_accepts_image(self, tiny_red_png: str) -> None: ) result = agent.run([msg]) assert result.content, "Empty response from Anthropic" + assert ( + "red" in result.content.lower() + ), f"Anthropic did not see the red test image. Got: {result.content[:200]}" assert result.usage.total_tokens > 0 @pytest.mark.skipif( @@ -137,4 +146,7 @@ def test_gemini_flash_accepts_image(self, tiny_red_png: str) -> None: ) result = agent.run([msg]) assert result.content, "Empty response from Gemini" + assert ( + "red" in result.content.lower() + ), f"Gemini did not see the red test image. Got: {result.content[:200]}" assert result.usage.total_tokens > 0 diff --git a/tests/test_e2e_otel_observer.py b/tests/test_e2e_otel_observer.py index 64f376a..af92729 100644 --- a/tests/test_e2e_otel_observer.py +++ b/tests/test_e2e_otel_observer.py @@ -21,9 +21,6 @@ pytest.importorskip("opentelemetry", reason="opentelemetry-api not installed") pytest.importorskip("opentelemetry.sdk", reason="opentelemetry-sdk not installed") -from opentelemetry import trace # noqa: E402 -from opentelemetry.sdk.trace import TracerProvider # noqa: E402 -from opentelemetry.sdk.trace.export import SimpleSpanProcessor # noqa: E402 from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( # noqa: E402 InMemorySpanExporter, ) @@ -34,21 +31,11 @@ pytestmark = pytest.mark.e2e - -# OpenTelemetry only allows ONE global TracerProvider per process. Set it up -# exactly once at module import time, reuse the same exporter across tests, -# and clear its span buffer in the fixture so tests stay isolated. -_EXPORTER = InMemorySpanExporter() -_PROVIDER = TracerProvider() -_PROVIDER.add_span_processor(SimpleSpanProcessor(_EXPORTER)) -trace.set_tracer_provider(_PROVIDER) - - -@pytest.fixture -def otel_exporter() -> InMemorySpanExporter: - """Return the shared in-memory exporter, cleared for this test.""" - _EXPORTER.clear() - return _EXPORTER +# The ``otel_exporter`` fixture comes from tests/conftest.py and installs a +# single process-wide TracerProvider + InMemorySpanExporter. Do NOT add a +# local fixture with the same name here — that breaks test isolation when +# another e2e file also wants OTel span capture (only the first file to +# call ``trace.set_tracer_provider`` wins, so the others see empty spans). @tool() diff --git a/tests/test_e2e_v0_21_0_simulations.py b/tests/test_e2e_v0_21_0_simulations.py new file mode 100644 index 0000000..303a6c2 --- /dev/null +++ b/tests/test_e2e_v0_21_0_simulations.py @@ -0,0 +1,371 @@ +"""Full-release end-to-end simulations for v0.21.0. + +The 12 isolated e2e test files prove that each v0.21.0 subsystem works +against its real backend in isolation. This file is different — each +scenario wires **multiple** v0.21.0 features together in a single agent +run against a real LLM, to prove the combinations work: + +- Scenario 1: CSV loader → real OpenAI embeddings → real FAISS → RAGTool + → real OpenAI Agent → real OTel SDK span capture +- Scenario 2: real Gemini agent with a multimodal image input + the new + execute_python toolbox tool, OTel observer attached +- Scenario 3: real Anthropic agent with query_sqlite + execute_python + toolbox tools against a real SQLite database +- Scenario 4: real Qdrant vector store with real OpenAI embeddings wired + into a real OpenAI Agent (skipped if Qdrant is not reachable) + +These simulations are the only place we verify that: + +- The @tool() schema on the new toolbox tools is correct enough for + real providers' native tool calling to actually pick them +- The real RAGTool + real vector store + real embeddings + real LLM + retrieval path actually returns useful context to the LLM +- OTelObserver captures spans on REAL LLM calls (not just fake provider + stubs), including gen_ai.* attributes with actual model / token data +- Multimodal messages flow through an iterative agent loop that also + uses tools, not just a single one-shot call + +Cost: every scenario that runs hits a real API. Keep prompts short, +max_tokens small, and max_iterations capped so the whole file runs for +well under $0.01 per invocation. + +Run with: + + pytest tests/test_e2e_v0_21_0_simulations.py --run-e2e -v +""" + +from __future__ import annotations + +import os +import socket +import sqlite3 +import struct +import zlib +from pathlib import Path + +import pytest + +from selectools import Agent, AgentConfig +from selectools.observe import OTelObserver +from selectools.providers.anthropic_provider import AnthropicProvider +from selectools.providers.gemini_provider import GeminiProvider +from selectools.providers.openai_provider import OpenAIProvider +from selectools.rag import Document, DocumentLoader +from selectools.rag.stores import FAISSVectorStore +from selectools.rag.tools import RAGTool +from selectools.toolbox import code_tools, db_tools + +pytestmark = pytest.mark.e2e + + +# --------------------------------------------------------------------------- +# OpenTelemetry fixture comes from tests/conftest.py (session-wide singleton) +# --------------------------------------------------------------------------- + +pytest.importorskip("opentelemetry", reason="opentelemetry-api not installed") +pytest.importorskip("opentelemetry.sdk", reason="opentelemetry-sdk not installed") + +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( # noqa: E402 + InMemorySpanExporter, +) + +# --------------------------------------------------------------------------- +# Small helpers +# --------------------------------------------------------------------------- + + +def _require(env_var: str) -> None: + if not os.environ.get(env_var): + pytest.skip(f"{env_var} not set") + + +def _make_tiny_red_png() -> bytes: + """Build a 4x4 solid-red PNG with no external deps.""" + width, height = 4, 4 + row = b"\x00" + b"\xff\x00\x00" * width + raw = row * height + + def chunk(ctype: bytes, data: bytes) -> bytes: + return ( + struct.pack(">I", len(data)) + + ctype + + data + + struct.pack(">I", zlib.crc32(ctype + data) & 0xFFFFFFFF) + ) + + sig = b"\x89PNG\r\n\x1a\n" + ihdr = struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0) + idat = zlib.compress(raw) + return sig + chunk(b"IHDR", ihdr) + chunk(b"IDAT", idat) + chunk(b"IEND", b"") + + +def _qdrant_reachable(url: str = "http://localhost:6333") -> bool: + from urllib.parse import urlparse + + parsed = urlparse(url) + host = parsed.hostname or "localhost" + port = parsed.port or 6333 + try: + with socket.create_connection((host, port), timeout=2): + return True + except OSError: + return False + + +# --------------------------------------------------------------------------- +# Scenario 1 — RAG pipeline with real OpenAI embeddings + FAISS + OpenAI agent + OTel +# --------------------------------------------------------------------------- + + +class TestScenario1_RAGWithOpenAI: + """CSV → real embeddings → FAISS → RAGTool → real OpenAI agent → OTel spans.""" + + def test_agent_answers_from_csv_backed_faiss( + self, tmp_path: Path, otel_exporter: InMemorySpanExporter + ) -> None: + _require("OPENAI_API_KEY") + pytest.importorskip("faiss", reason="faiss-cpu not installed") + + # 1. Build a small CSV of facts with a deliberately unusual anchor word + # so we can tell whether the agent actually retrieved from our docs + # (vs. answering from the LLM's prior knowledge) + csv_path = tmp_path / "facts.csv" + csv_path.write_text( + "topic,body\n" + "selectools," + '"The selectools library was first tagged with the magic codename ZOOPLANKTON-91 in v0.21.0."\n' + "python," + '"Python is a high-level programming language created by Guido van Rossum."\n', + encoding="utf-8", + ) + + # 2. Load via the new CSV loader + docs = DocumentLoader.from_csv( + str(csv_path), text_column="body", metadata_columns=["topic"] + ) + assert len(docs) == 2 + + # 3. Real OpenAI embeddings + from selectools.embeddings.openai import OpenAIEmbeddingProvider + + embedder = OpenAIEmbeddingProvider(model="text-embedding-3-small") + + # 4. Real FAISS store + store = FAISSVectorStore(embedder=embedder) + store.add_documents(docs) + + # 5. Real RAGTool + rag_tool = RAGTool(vector_store=store, top_k=2) + + # 6. Real OpenAI agent with OTel observer + agent = Agent( + tools=[rag_tool.search_knowledge_base], + provider=OpenAIProvider(), + config=AgentConfig( + model="gpt-4o-mini", + max_tokens=150, + max_iterations=4, + observers=[OTelObserver(tracer_name="selectools-sim")], + ), + ) + + # 7. Ask a question that REQUIRES retrieval (the anchor word is unique) + result = agent.run( + "What is the magic codename associated with selectools v0.21.0? " + "Use the search_knowledge_base tool and quote the codename verbatim." + ) + + # 8. Assert the agent actually retrieved from OUR docs + assert "ZOOPLANKTON" in result.content.upper(), ( + f"Agent did not return the anchor word from the CSV. " f"Got: {result.content[:300]}" + ) + assert result.usage.total_tokens > 0 + + # 9. Assert OTel captured real spans for this real run + spans = otel_exporter.get_finished_spans() + assert len(spans) > 0, "OTel captured no spans for the real LLM+tool run" + saw_gen_ai = any((s.attributes or {}).get("gen_ai.system") == "selectools" for s in spans) + assert saw_gen_ai, "No span carried gen_ai.system='selectools'" + + +# --------------------------------------------------------------------------- +# Scenario 2 — Multimodal + toolbox + OTel with real Gemini +# --------------------------------------------------------------------------- + + +class TestScenario2_MultimodalWithGemini: + """Real Gemini vision call + execute_python tool + OTel in one run.""" + + def test_gemini_sees_image_and_calls_python_tool( + self, tmp_path: Path, otel_exporter: InMemorySpanExporter + ) -> None: + ( + _require("GOOGLE_API_KEY") if not os.environ.get("GEMINI_API_KEY") else None + ) # either is fine + + # 1. Write a tiny red PNG to disk (image_message needs a file path) + png_path = tmp_path / "red.png" + png_path.write_bytes(_make_tiny_red_png()) + + # 2. Real Gemini agent with execute_python + OTel + agent = Agent( + tools=[code_tools.execute_python], + provider=GeminiProvider(), + config=AgentConfig( + model="gemini-2.5-flash", + max_tokens=200, + max_iterations=4, + observers=[OTelObserver(tracer_name="selectools-sim")], + ), + ) + + # 3. Build a multimodal message that asks for BOTH vision AND tool use + from selectools import image_message + + msg = image_message( + str(png_path), + prompt=( + "Step 1: In one word, what primary color dominates this tiny image? " + "Step 2: Use the execute_python tool to compute and print the result of 7*6. " + "Then give me a one-sentence final answer containing both the color and the number." + ), + ) + + result = agent.run([msg]) + + # 4. Assert the real Gemini call did BOTH things: + # (a) saw the image (mentions red) + # (b) called execute_python and got 42 + content_lower = result.content.lower() + assert "red" in content_lower, f"Gemini did not describe the image: {result.content[:300]}" + assert ( + "42" in result.content + ), f"Gemini did not use execute_python to compute 7*6: {result.content[:300]}" + assert result.usage.total_tokens > 0 + + # 5. OTel should have captured the run + spans = otel_exporter.get_finished_spans() + assert len(spans) > 0, "OTel captured no spans" + + +# --------------------------------------------------------------------------- +# Scenario 3 — Toolbox integration with real Anthropic agent +# --------------------------------------------------------------------------- + + +class TestScenario3_ToolboxWithAnthropic: + """Real Anthropic Claude picks and calls query_sqlite + execute_python.""" + + def test_claude_uses_sqlite_tool(self, tmp_path: Path) -> None: + _require("ANTHROPIC_API_KEY") + + # 1. Create a real SQLite db with deliberately distinctive data + db_path = tmp_path / "people.db" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE people (name TEXT, age INTEGER)") + conn.executemany( + "INSERT INTO people VALUES (?, ?)", + [("alice", 29), ("bob", 31), ("carol", 47), ("dave", 23)], + ) + conn.commit() + conn.close() + + # 2. Real Anthropic agent with the new db_tools AND code_tools + agent = Agent( + tools=[db_tools.query_sqlite, code_tools.execute_python], + provider=AnthropicProvider(), + config=AgentConfig( + model="claude-haiku-4-5", + max_tokens=300, + max_iterations=4, + ), + ) + + # 3. Ask a question that requires the sqlite tool + result = agent.run( + f"Use the query_sqlite tool with db_path='{db_path}' to find the " + f"name of the oldest person in the 'people' table. " + f"Respond with just their name." + ) + + # 4. Assert the agent called the tool and got 'carol' (the oldest at 47) + assert ( + "carol" in result.content.lower() + ), f"Anthropic did not find carol via query_sqlite: {result.content[:300]}" + assert result.usage.total_tokens > 0 + + +# --------------------------------------------------------------------------- +# Scenario 4 — Qdrant RAG with real OpenAI agent (skipped if no Qdrant) +# --------------------------------------------------------------------------- + + +class TestScenario4_RAGWithQdrant: + """Same shape as scenario 1 but proves Qdrant works end-to-end too.""" + + def test_agent_answers_from_qdrant_backed_rag( + self, otel_exporter: InMemorySpanExporter + ) -> None: + _require("OPENAI_API_KEY") + pytest.importorskip("qdrant_client", reason="qdrant-client not installed") + + qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333") + if not _qdrant_reachable(qdrant_url): + pytest.skip(f"Qdrant not reachable at {qdrant_url}") + + import uuid + + from selectools.embeddings.openai import OpenAIEmbeddingProvider + from selectools.rag.stores import QdrantVectorStore + + embedder = OpenAIEmbeddingProvider(model="text-embedding-3-small") + store = QdrantVectorStore( + embedder=embedder, + collection_name=f"selectools_sim_{uuid.uuid4().hex[:8]}", + url=qdrant_url, + api_key=os.environ.get("QDRANT_API_KEY"), + prefer_grpc=False, + ) + + # Add anchor documents with a unique phrase + store.add_documents( + [ + Document( + text=( + "The selectools v0.21.0 connector expansion was internally " + "nicknamed PROJECT FLAMINGO-17 by the NichevLabs team." + ), + metadata={"src": "internal"}, + ), + Document( + text="Selectools is an AI agent framework written in Python.", + metadata={"src": "public"}, + ), + ] + ) + + try: + rag_tool = RAGTool(vector_store=store, top_k=2) + agent = Agent( + tools=[rag_tool.search_knowledge_base], + provider=OpenAIProvider(), + config=AgentConfig( + model="gpt-4o-mini", + max_tokens=150, + max_iterations=4, + observers=[OTelObserver(tracer_name="selectools-sim")], + ), + ) + + result = agent.run( + "What was the internal nickname for the selectools v0.21.0 connector " + "expansion? Use search_knowledge_base and quote it verbatim." + ) + + assert ( + "FLAMINGO" in result.content.upper() + ), f"OpenAI+Qdrant RAG did not retrieve the anchor: {result.content[:300]}" + assert result.usage.total_tokens > 0 + assert len(otel_exporter.get_finished_spans()) > 0 + finally: + store.clear() From 1e97df1d63e975c28bdf9e2eeb2b6ae41efcffb8 Mon Sep 17 00:00:00 2001 From: John Niche Date: Wed, 8 Apr 2026 10:38:21 -0300 Subject: [PATCH 06/17] test(e2e): persona-based app simulations for v0.21.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous e2e work proved individual v0.21.0 subsystems work in isolation (tests/test_e2e_*) and that multiple features compose (the 4 scenarios in tests/test_e2e_v0_21_0_simulations.py). Those are integration tests — they prove the wiring doesn't throw. This commit adds something different: **app-shaped** simulations that match the idiom already used in tests/test_simulation_evals.py. Each test sets up an agent with a realistic system prompt, drives it through a plausible user workflow, and asserts on the behaviour a real app author would care about. App 1 — Documentation Q&A Bot ----------------------------- A support bot for a fictional product called "Skylake" backed by a FAQ CSV. The CSV is loaded via the new DocumentLoader.from_csv, embedded with real OpenAI text-embedding-3-small, indexed in real FAISS, and wrapped in a RAGTool. The bot runs on real OpenAI gpt-4o-mini with a ConversationMemory so it can carry context across turns. Three asserts: - Turn 1: bot answers an in-KB install question by quoting KB facts (curl URL, version string) - Turn 2: same agent instance answers a follow-up port question (8742) — proves memory + tool calling continue to work across turns on a memory-enabled agent - Turn 3: bot refuses an out-of-KB WebSocket question instead of hallucinating a number App 2 — Data Analyst Bot ------------------------ An analytics assistant over a small SQLite sales database. Real Anthropic Claude agent with query_sqlite + execute_python. The user asks a question whose answer requires *chaining*: 1. SQL query to find the top region by total sales 2. Python computation for the average 3. Natural-language explanation Asserts that "EU" and "2000" both appear in the final answer, proving the LLM successfully chained two real tool calls end-to-end. App 3 — Knowledge Base Librarian --------------------------------- The only simulation that exercises ALL FOUR new document loaders in a single workflow: - DocumentLoader.from_csv (product catalog) - DocumentLoader.from_json (release notes) - DocumentLoader.from_html (about page) Real OpenAI embeddings, real Qdrant store, real Gemini gemini-2.5-flash agent with a RAGTool. Three asserts, one per source format, each asking for a deliberately unique anchor phrase (THUNDERCAT-7, MOONWALK, VANTA-NORTH) that exists in exactly one of the loaded files. Proves that every loader's output is actually retrievable through the full embed → store → search → LLM pipeline. Verification ------------ Solo run of tests/test_e2e_v0_21_0_apps.py: 7 passed in 30.41s Full e2e suite including new app sims: 54 collected → 51 passed, 3 skipped (Azure OpenAI x2 + Langfuse x1, no creds), 0 failed, 50.67s total Full non-e2e suite: 4961 passed, 3 skipped, 239 deselected (+7 from the new app file), 0 regressions --- tests/test_e2e_v0_21_0_apps.py | 446 +++++++++++++++++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 tests/test_e2e_v0_21_0_apps.py diff --git a/tests/test_e2e_v0_21_0_apps.py b/tests/test_e2e_v0_21_0_apps.py new file mode 100644 index 0000000..6094959 --- /dev/null +++ b/tests/test_e2e_v0_21_0_apps.py @@ -0,0 +1,446 @@ +"""Persona-based app simulations for v0.21.0. + +These are **not** integration tests of "does feature A combined with +feature B work". They are simulations of **real application use cases**, +matching the selectools simulation idiom from ``tests/test_simulation_evals.py``: + +- Each test sets up an agent with a realistic system prompt +- Multi-turn conversations use real ``ConversationMemory`` +- Real LLM calls drive the agent through plausible user workflows +- Assertions check the *behaviour* of the app, not just the wiring + +Three app shapes are covered: + +1. **Documentation Q&A bot** (RAG pipeline used the way a real support + bot would): FAQ CSV loader → real OpenAI embeddings → FAISS → + RAGTool → multi-turn user conversation with memory → agent must cite + from KB and refuse on out-of-KB questions + +2. **Data analyst bot** (toolbox chaining the way a real analytics bot + would): real SQLite sales db → Claude with ``query_sqlite`` + + ``execute_python`` → agent must query, compute, and answer with a + real number + +3. **Knowledge base librarian** (all four new document loaders feeding a + real Qdrant store → Gemini agent using RAGTool to answer a question + whose answer is split across multiple source files) + +Each simulation is gated behind ``--run-e2e`` and will skip cleanly when +credentials or backing services aren't available. Total cost per full +run is under $0.01 at current pricing. + +Run with: + + pytest tests/test_e2e_v0_21_0_apps.py --run-e2e -v +""" + +from __future__ import annotations + +import json +import os +import socket +import sqlite3 +import uuid +from pathlib import Path + +import pytest + +from selectools import Agent, AgentConfig +from selectools.memory import ConversationMemory +from selectools.rag import DocumentLoader +from selectools.rag.stores import FAISSVectorStore +from selectools.rag.tools import RAGTool +from selectools.toolbox import code_tools, db_tools + +pytestmark = pytest.mark.e2e + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + + +def _openai_or_skip() -> tuple: + if not os.environ.get("OPENAI_API_KEY"): + pytest.skip("OPENAI_API_KEY not set") + from selectools.providers.openai_provider import OpenAIProvider + + return OpenAIProvider(), "gpt-4o-mini" + + +def _anthropic_or_skip() -> tuple: + if not os.environ.get("ANTHROPIC_API_KEY"): + pytest.skip("ANTHROPIC_API_KEY not set") + from selectools.providers.anthropic_provider import AnthropicProvider + + return AnthropicProvider(), "claude-haiku-4-5" + + +def _gemini_or_skip() -> tuple: + if not (os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")): + pytest.skip("GOOGLE_API_KEY / GEMINI_API_KEY not set") + from selectools.providers.gemini_provider import GeminiProvider + + return GeminiProvider(), "gemini-2.5-flash" + + +def _openai_embedder(): + pytest.importorskip("openai") + from selectools.embeddings.openai import OpenAIEmbeddingProvider + + return OpenAIEmbeddingProvider(model="text-embedding-3-small") + + +def _qdrant_reachable(url: str = "http://localhost:6333") -> bool: + from urllib.parse import urlparse + + parsed = urlparse(url) + host = parsed.hostname or "localhost" + port = parsed.port or 6333 + try: + with socket.create_connection((host, port), timeout=2): + return True + except OSError: + return False + + +# =========================================================================== +# App 1: Documentation Q&A Bot +# =========================================================================== +# +# Persona: a support bot for a fictional product called "Skylake" whose +# knowledge base consists of a FAQ CSV. A real user opens the bot, asks +# several questions, some of which are covered and some aren't. The bot +# should answer with information from the KB and should refuse (or say it +# doesn't know) for out-of-KB questions. This is the canonical RAG support +# bot pattern. + + +@pytest.fixture +def skylake_faq_agent(tmp_path: Path): + """Build a real RAG support bot for the fictional Skylake product.""" + _openai_or_skip() # fail fast if no creds + pytest.importorskip("faiss", reason="faiss-cpu not installed") + + # 1. Realistic FAQ CSV — five entries with unique anchor facts so we + # can assert that retrieval actually worked + faq_csv = tmp_path / "skylake_faq.csv" + faq_csv.write_text( + "question,answer\n" + '"How do I install Skylake?",' + '"Install Skylake by running: curl -sL https://skylake.sh | bash. Version 4.2.1 is the latest stable release."\n' + '"What is the default port?",' + '"Skylake listens on port 8742 by default. You can override this with the --port flag or the SKYLAKE_PORT environment variable."\n' + '"How do I reset my password?",' + '"Run skylake auth reset --user . A reset link will be emailed within 15 minutes."\n' + '"Does Skylake support single sign-on?",' + '"Yes, Skylake supports SAML 2.0 and OpenID Connect for SSO. Configuration lives in /etc/skylake/sso.yaml."\n' + '"What is the monthly uptime SLA?",' + '"The enterprise plan includes a 99.95% monthly uptime SLA with service credits for breaches."\n', + encoding="utf-8", + ) + + # 2. Load via the new CSV loader, embed, and index in real FAISS + docs = DocumentLoader.from_csv( + str(faq_csv), text_column="answer", metadata_columns=["question"] + ) + assert len(docs) == 5 + + embedder = _openai_embedder() + store = FAISSVectorStore(embedder=embedder) + store.add_documents(docs) + + # 3. Wire the RAG tool into a real OpenAI agent with a support-bot + # system prompt. Use ConversationMemory so the bot can actually + # carry context across turns. + provider, model = _openai_or_skip() + rag_tool = RAGTool(vector_store=store, top_k=3) + return Agent( + tools=[rag_tool.search_knowledge_base], + provider=provider, + memory=ConversationMemory(max_messages=20), + config=AgentConfig( + model=model, + system_prompt=( + "You are the official support bot for a product called Skylake. " + "Always use the search_knowledge_base tool before answering. " + "If the knowledge base does not contain the answer, say you " + "don't know — do NOT invent details. Be concise: 1-2 sentences." + ), + max_tokens=200, + max_iterations=4, + ), + ) + + +class TestApp1_DocsQABot: + def test_bot_answers_install_question_from_kb(self, skylake_faq_agent: Agent) -> None: + """Turn 1: user asks an in-KB question. Bot should quote KB facts.""" + result = skylake_faq_agent.run("How do I install Skylake?") + assert result.content + content = result.content.lower() + # KB anchor facts that a correct retrieval would surface + assert ( + "curl" in content or "skylake.sh" in content or "4.2.1" in content + ), f"Bot did not retrieve install instructions from KB. Got: {result.content[:300]}" + + def test_bot_answers_port_question_using_memory(self, skylake_faq_agent: Agent) -> None: + """Turn 2 (same agent): different in-KB question. + + Exercises ConversationMemory by making a SECOND call on the same + agent instance. If memory is broken the agent would either drop + context or re-send the whole first turn, and token usage on the + second call would look weird. More importantly, this proves that + tool calling continues to work across turns on a memory-enabled + agent — a bug-prone area. + """ + skylake_faq_agent.run("How do I install Skylake?") # Turn 1 + result = skylake_faq_agent.run("Got it. What port does it listen on?") # Turn 2 + assert result.content + assert "8742" in result.content, ( + f"Bot did not retrieve the port fact from KB on turn 2. " f"Got: {result.content[:300]}" + ) + + def test_bot_refuses_out_of_kb_question(self, skylake_faq_agent: Agent) -> None: + """User asks something NOT in the KB. Bot must not hallucinate.""" + result = skylake_faq_agent.run( + "What is the maximum WebSocket message size Skylake supports?" + ) + assert result.content + content = result.content.lower() + # A correct bot says "don't know" (or similar). We don't require an + # exact phrase — just that the bot does not confidently invent a + # numeric answer. Accept any phrasing that signals uncertainty. + signals_uncertainty = ( + "don't know" in content + or "do not know" in content + or "not in the knowledge base" in content + or "not available" in content + or "can't find" in content + or "cannot find" in content + or "not listed" in content + or "no information" in content + or "not covered" in content + or "unable to find" in content + ) + assert signals_uncertainty, ( + f"Bot should refuse out-of-KB questions instead of hallucinating. " + f"Got: {result.content[:300]}" + ) + + +# =========================================================================== +# App 2: Data Analyst Bot +# =========================================================================== +# +# Persona: an analytics assistant for a small sales database. A real user +# asks a business question whose answer requires: +# 1. Running a SQL query to pull raw data +# 2. Using Python to compute a derived number +# 3. Explaining the result in natural language +# +# This exercises multi-step tool chaining by a real LLM — a path that +# mock tests cannot validate because the LLM decides when each tool is +# needed and how to pass data between them. + + +@pytest.fixture +def sales_db(tmp_path: Path) -> Path: + """Create a real SQLite sales db with deliberately distinctive numbers.""" + db_path = tmp_path / "sales.db" + conn = sqlite3.connect(str(db_path)) + conn.execute( + "CREATE TABLE orders (id INTEGER PRIMARY KEY, region TEXT, " "amount_usd REAL, month TEXT)" + ) + # Carefully chosen so the answer is unambiguous: region 'EU' has the + # highest total (1000 + 2000 + 3000 = 6000) and a specific average + # (2000) that the LLM should be able to verify with Python. + rows = [ + (1, "US", 500, "2026-01"), + (2, "US", 600, "2026-02"), + (3, "US", 700, "2026-03"), + (4, "EU", 1000, "2026-01"), + (5, "EU", 2000, "2026-02"), + (6, "EU", 3000, "2026-03"), + (7, "APAC", 800, "2026-01"), + (8, "APAC", 900, "2026-02"), + ] + conn.executemany("INSERT INTO orders VALUES (?, ?, ?, ?)", rows) + conn.commit() + conn.close() + return db_path + + +class TestApp2_DataAnalystBot: + def test_bot_finds_top_region_and_computes_average(self, sales_db: Path) -> None: + """Multi-step: query → compute → explain.""" + provider, model = _anthropic_or_skip() + + agent = Agent( + tools=[db_tools.query_sqlite, code_tools.execute_python], + provider=provider, + config=AgentConfig( + model=model, + system_prompt=( + "You are a data analyst assistant. You have two tools: " + "query_sqlite for reading from a SQLite database, and " + "execute_python for running small Python snippets when " + "you need to compute a derived value. Always use the " + "tools to get real numbers — do not guess." + ), + max_tokens=500, + max_iterations=6, + ), + ) + + result = agent.run( + f"Use db_path='{sales_db}'. Find the region with the highest " + f"total sales in the 'orders' table, and report its average " + f"order amount. Show your work." + ) + assert result.content + content = result.content + # The correct region is EU (total = 6000) + assert ( + "EU" in content or "eu" in content.lower() + ), f"Bot did not identify EU as top region. Got: {content[:400]}" + # The average of EU orders is 2000. Accept '2000' or '2,000'. + assert "2000" in content or "2,000" in content, ( + f"Bot did not compute the correct average (2000). " f"Got: {content[:400]}" + ) + + +# =========================================================================== +# App 3: Knowledge Base Librarian +# =========================================================================== +# +# Persona: a librarian that ingests docs from heterogeneous sources (CSV, +# JSON, HTML, URL) into a real Qdrant store and answers questions whose +# truth is split across sources. This exercises every new v0.21.0 +# document loader in a single realistic workflow. + + +@pytest.fixture +def librarian_agent(tmp_path: Path): + """Build a real Qdrant-backed librarian agent with heterogeneous sources.""" + pytest.importorskip("qdrant_client", reason="qdrant-client not installed") + qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333") + if not _qdrant_reachable(qdrant_url): + pytest.skip(f"Qdrant not reachable at {qdrant_url}") + _gemini_or_skip() # fail fast if no Gemini creds + + from selectools.rag.stores import QdrantVectorStore + + # 1. CSV source — product catalog with unique anchor phrase + csv_path = tmp_path / "products.csv" + csv_path.write_text( + "sku,description\n" + '"SKY-001","The Skylake SKY-001 is an edge router shipping with the internal codename THUNDERCAT-7."\n' + '"SKY-002","The Skylake SKY-002 is a development kit."\n', + encoding="utf-8", + ) + + # 2. JSON source — release notes with another unique anchor phrase + json_path = tmp_path / "releases.json" + json_path.write_text( + json.dumps( + [ + { + "version": "4.2.1", + "body": ( + "Skylake 4.2.1 was released on the full-moon day and " + "is internally referenced as the MOONWALK release." + ), + }, + {"version": "4.2.0", "body": "Skylake 4.2.0 was a bug-fix release."}, + ] + ), + encoding="utf-8", + ) + + # 3. HTML source — marketing blurb with a third anchor phrase + html_path = tmp_path / "about.html" + html_path.write_text( + "

" + "

Skylake was founded in Helsinki in 2023.

" + "

The team operates under the office code VANTA-NORTH.

" + "
", + encoding="utf-8", + ) + + # 4. Load via all four loaders + csv_docs = DocumentLoader.from_csv(str(csv_path), text_column="description") + json_docs = DocumentLoader.from_json( + str(json_path), text_field="body", metadata_fields=["version"] + ) + html_docs = DocumentLoader.from_html(str(html_path)) + all_docs = csv_docs + json_docs + html_docs + assert len(all_docs) >= 5 # 2 csv + 2 json + 1 html + + embedder = _openai_embedder() # needs OPENAI_API_KEY + if not os.environ.get("OPENAI_API_KEY"): + pytest.skip("OPENAI_API_KEY not set for embedding") + + store = QdrantVectorStore( + embedder=embedder, + collection_name=f"skylake_kb_{uuid.uuid4().hex[:8]}", + url=qdrant_url, + api_key=os.environ.get("QDRANT_API_KEY"), + prefer_grpc=False, + ) + store.add_documents(all_docs) + + provider, model = _gemini_or_skip() + rag_tool = RAGTool(vector_store=store, top_k=4) + + agent = Agent( + tools=[rag_tool.search_knowledge_base], + provider=provider, + config=AgentConfig( + model=model, + system_prompt=( + "You are the Skylake knowledge base librarian. Always use " + "search_knowledge_base to answer. Quote anchor phrases from " + "the docs verbatim when asked for them. Keep answers short." + ), + max_tokens=200, + max_iterations=4, + ), + ) + + try: + yield agent + finally: + # Cleanup: drop the collection + try: + store.clear() + except Exception: + pass + + +class TestApp3_KnowledgeBaseLibrarian: + def test_librarian_retrieves_from_csv_source(self, librarian_agent: Agent) -> None: + """Asks a question whose answer lives in the CSV-loaded docs.""" + result = librarian_agent.run( + "What is the internal codename for the SKY-001 router? " "Quote it verbatim." + ) + assert result.content + assert "THUNDERCAT" in result.content.upper(), ( + f"Librarian did not retrieve the CSV anchor phrase. " f"Got: {result.content[:300]}" + ) + + def test_librarian_retrieves_from_json_source(self, librarian_agent: Agent) -> None: + """Asks a question whose answer lives in the JSON-loaded docs.""" + result = librarian_agent.run("What is the internal reference name for Skylake 4.2.1?") + assert result.content + assert "MOONWALK" in result.content.upper(), ( + f"Librarian did not retrieve the JSON anchor phrase. " f"Got: {result.content[:300]}" + ) + + def test_librarian_retrieves_from_html_source(self, librarian_agent: Agent) -> None: + """Asks a question whose answer lives in the HTML-loaded docs.""" + result = librarian_agent.run("What is the Skylake office code?") + assert result.content + assert "VANTA-NORTH" in result.content.upper(), ( + f"Librarian did not retrieve the HTML anchor phrase. " f"Got: {result.content[:300]}" + ) From 250468af80409d9fe83d12441c4c10d5e65cddb9 Mon Sep 17 00:00:00 2001 From: John Niche Date: Wed, 8 Apr 2026 10:55:41 -0300 Subject: [PATCH 07/17] =?UTF-8?q?docs(release):=20doc=20audit=20sweep=20?= =?UTF-8?q?=E2=80=94=20counts,=20CHANGELOG=20Fixed=20section,=20onboarding?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-reference audit run (via the project /audit and /doc-audit-skill skills with 4 parallel QA sub-agents) found 13 MUST-FIX issues left over after the earlier release-prep commit. This commit fixes all of them. CHANGELOG.md ------------ - Add the missing ### Fixed section documenting bugs 6, 7, 8 (RAGTool @tool() on methods, Gemini + Anthropic content_parts image drop) and the Qdrant query_points() API migration. These landed in commits f4401f2 and b047c1a after the initial doc commit but never made it into the release notes. - Add the missing ### Tests section documenting the 345 new e2e tests, 4 integration simulations, and 7 app-shaped simulations. - Update Stats: 4,960 -> 5,203 tests. README.md --------- - Line 489 and 1111: stale "4960 Tests" -> 5203. - Line 133: restore the historical "4612 tests total" in the v0.19 What's New section (I had over-corrected it to 4960 earlier). - Line 460: "5 LLM Providers" enumeration was missing Azure OpenAI, even though it's claimed in the count. Added. - Line 467: "4 Vector Stores" -> "7 Vector Stores" with FAISS, Qdrant, pgvector added to the list. - Install section: added "pip install selectools[observe]" and "pip install selectools[postgres]" extras and updated the [rag] extras comment to mention FAISS, Qdrant, and beautifulsoup4. CONTRIBUTING.md + docs/CONTRIBUTING.md ------------------------------------- - Main file was stale: v0.20.1 / 4612 tests. Updated to v0.21.0 / 5203. - docs/CONTRIBUTING.md was stale by TWO releases (v0.19.2, 61 examples, 24 tools, 100% coverage, different release script examples). Fixed by re-copying from the updated CONTRIBUTING.md. docs/llms.txt ------------- - Line 3: "4960 tests at 95% coverage" -> "5203 tests at 95% coverage". docs/QUICKSTART.md ------------------ - Added a v0.21.0 callout under Step 5 (RAG) linking to the new FAISS.md, QDRANT.md, and PGVECTOR.md module docs and mentioning the new DocumentLoader.from_csv / from_json / from_html / from_url loaders. Minimal addition — does not rewrite the working example. docs/index.md ------------- - RAG Pipeline feature card: "4 vector store backends" -> "7 vector store backends", listed all 7 explicitly, and mentioned the four new document loaders. landing/index.html ------------------ - All 8 occurrences of "4612" / "4,612" in visible text, schema descriptions, animated counter targets, and FAQ answers -> "5203" / "5,203". Pure text substitution, no visual changes. Verification ------------ - mkdocs build: clean (only the pre-existing Material "Excluding README.md" template warning, unrelated to this release) - Full non-e2e suite: 4961 passed, 3 skipped, 239 deselected, 0 regressions - diff CHANGELOG.md docs/CHANGELOG.md: byte-identical - diff CONTRIBUTING.md docs/CONTRIBUTING.md: byte-identical - grep for any remaining 4612 / 4960 in user-facing docs: clean (only legitimate "up from 4,612" delta reference in the 0.21.0 Stats block remains) --- CHANGELOG.md | 34 ++++++++++++++- CONTRIBUTING.md | 10 ++--- README.md | 16 ++++--- docs/CHANGELOG.md | 34 ++++++++++++++- docs/CONTRIBUTING.md | 102 ++++++++++++++++++++++++++----------------- docs/QUICKSTART.md | 8 ++++ docs/index.md | 2 +- docs/llms.txt | 2 +- landing/index.html | 16 +++---- 9 files changed, 160 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23d551f..c27a086 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,8 +46,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - `stability.beta()` and `stability.stable()` decorators now accept arbitrary objects via an `Any` overload, in addition to classes and callables. Lets `@beta` mark `Tool` instances produced by `@tool()`. +### Fixed + +#### RAG — `@tool()` on class methods (shipping blocker caught by real-call simulations) +- `@tool()` applied to a method (`def f(self, query: str)`) produced a class-level `Tool` whose `function` was the *unbound* method. When the agent executor called `tool.function(**llm_kwargs)` Python raised `TypeError: missing 1 required positional argument: 'self'` and the LLM saw "Tool Execution Failed", giving up after a few iterations. This fundamentally broke the canonical RAG pattern documented across selectools: + ```python + rag_tool = RAGTool(vector_store=store) + agent = Agent(tools=[rag_tool.search_knowledge_base], provider=...) + ``` + `RAGTool`, `SemanticSearchTool`, and `HybridSearchTool` were all affected. The existing `tests/rag/test_rag_workflow.py` coverage never caught it because those tests built the agent and then only asserted `isinstance(agent, Agent)` — they never called `agent.run()`. +- **Fix:** new `_BoundMethodTool` descriptor in `selectools/tools/decorators.py`. `@tool()` detects when the first parameter is `self` and returns a descriptor that binds per-instance on attribute access via `functools.partial(original_fn, instance)`. Class-level access falls through to a template `Tool` so introspection (`MyClass.method.name`, `.description`, `.parameters`) still works. + +#### Qdrant — migrated to `query_points()` API +- `QdrantVectorStore.search()` called `self.client.search(query_vector=…)`, which was removed from `qdrant-client >=1.13`. Users on any recent `qdrant-client` would have hit `AttributeError: 'QdrantClient' object has no attribute 'search'` on their first query. The existing mock-based unit tests didn't catch it because they mocked `QdrantClient` and accepted whatever attribute the test asked for. +- **Fix:** migrated to `client.query_points(query=…)` and unwrap `response.points`. Also: return `[]` on 404 when the collection has been dropped by `clear()`, to match `FAISSVectorStore` semantics (search-after-clear returns `[]`, doesn't raise). + +#### Multimodal — Gemini and Anthropic providers silently dropped images +- `GeminiProvider._format_messages` only handled the legacy `message.image_base64` attribute. The new `image_message()` helper puts the image in `message.content_parts` and explicitly sets `message.image_base64 = None`, so Gemini received only the text prompt and replied "I cannot see images." Every Gemini vision user would have hit this. +- `AnthropicProvider` had the exact same bug — Claude replied "I don't see any image attached." Every Claude vision user would have hit this. +- OpenAI was unaffected because `providers/_openai_compat.py` already iterates `content_parts`. +- **Fix:** both providers now iterate `message.content_parts` and convert each `ContentPart` to the provider's native image shape (`types.Part(inline_data=…)` for Gemini, `{type: image, source: {type: base64, …}}` for Anthropic), with the legacy path preserved as a fallback for pre-0.21.0 callers. + +#### Internal +- Pre-existing mypy error in `providers/azure_openai_provider.py:117` where `str | None` from `os.getenv` wasn't narrowed correctly — fixed with an explicit `is not None` check. + +### Tests +- **+345 new tests** across 13 new e2e test files (`tests/test_e2e_*.py`, `tests/rag/test_e2e_*.py`, `tests/tools/test_e2e_*.py`, `tests/providers/test_e2e_azure_openai.py`) and full-release simulations: + - **Tier 1** — real backends with no external services (28 tests): real `faiss-cpu` C++ bindings, real `subprocess.run` for code tools, real `sqlite3` for db tools, real local files + HTTP for document loaders, real `opentelemetry-sdk` with `InMemorySpanExporter` for OTel. + - **Tier 2** — real API calls using credentials in `.env` (8 tests): real OpenAI `gpt-4o-mini` + Anthropic `claude-haiku-4-5` + Gemini `gemini-2.5-flash` multimodal with an in-memory 4x4 PNG; real DuckDuckGo search; real GitHub REST API (unauthenticated). + - **Tier 3** — skip-cleanly when external services or credentials are missing (7 tests): Qdrant, pgvector, Azure OpenAI, Langfuse. + - **Integration simulations** (4 tests in `test_e2e_v0_21_0_simulations.py`): FAISS RAG + real OpenAI agent + OTel; Gemini multimodal + `execute_python` tool; Anthropic `query_sqlite` + `execute_python` chaining; Qdrant RAG + real OpenAI agent. + - **App-shaped simulations** (7 tests in `test_e2e_v0_21_0_apps.py`): "Skylake" documentation Q&A bot with real CSV → FAISS → OpenAI agent + ConversationMemory multi-turn; sales data analyst bot with real SQLite + Claude chaining query + Python compute; knowledge base librarian that ingests from `from_csv` + `from_json` + `from_html` into real Qdrant and answers anchor-phrase questions with Gemini. + ### Stats -- **4,960 tests** (188 new across 7 spec subsystems) +- **5,203 tests** — up from 4,612 in v0.20.1 - **88 examples** (12 new: `77_faiss_vector_store.py` through `88_langfuse_observer.py`) - **5 providers** (added Azure OpenAI) - **7 vector stores** (added FAISS, Qdrant, pgvector) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ad1d230..621eff0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,8 +2,8 @@ Thank you for your interest in contributing to Selectools! We welcome contributions from the community. -**Current Version:** v0.20.1 -**Test Status:** 4612 tests passing (95% coverage) +**Current Version:** v0.21.0 +**Test Status:** 5203 tests passing (95% coverage) **Python:** 3.9 – 3.13 ## Getting Started @@ -74,7 +74,7 @@ Similar to `npm run` scripts, here are the common commands for this project: ### Testing ```bash -# Run all tests (4612 tests) +# Run all tests (5203 tests) pytest tests/ -v # Run tests quietly (summary only) @@ -264,7 +264,7 @@ selectools/ │ ├── embeddings/ # Embedding providers │ ├── rag/ # RAG: vector stores, chunking, loaders │ └── toolbox/ # 33 pre-built tools -├── tests/ # Test suite (4612 tests, 95% coverage) +├── tests/ # Test suite (5203 tests, 95% coverage) │ ├── agent/ # Agent tests │ ├── rag/ # RAG tests │ ├── tools/ # Tool tests @@ -371,7 +371,7 @@ We especially welcome contributions in these areas: - Add comparison guides (vs LangChain, LlamaIndex) ### 🧪 **Testing** -- Increase test coverage (currently 4612 tests passing!) +- Increase test coverage (currently 5203 tests passing!) - Add performance benchmarks - Improve E2E test stability with retry/rate-limit handling diff --git a/README.md b/README.md index dd540e8..f80c452 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,7 @@ Path("trace.html").write_text(trace_to_html(result.trace)) - **Trace HTML viewer** — `trace_to_html(trace)` renders a standalone waterfall timeline - **Deprecation policy** — 2-minor-version window, programmatic introspection via `.__stability__` - **Security audit** — all 41 `# nosec` annotations reviewed and published in `docs/SECURITY.md` -- **Quality infrastructure** — property-based tests (Hypothesis), thread-safety smoke suite, 5 new production simulations (4960 tests total) +- **Quality infrastructure** — property-based tests (Hypothesis), thread-safety smoke suite, 5 new production simulations (4612 tests total) ### v0.19.1 — Advanced Agent Patterns @@ -457,14 +457,14 @@ report.to_html("report.html") ## What's Included -- **5 LLM Providers**: OpenAI, Anthropic, Gemini, Ollama + FallbackProvider (auto-failover) +- **5 LLM Providers**: OpenAI, Azure OpenAI, Anthropic, Gemini, Ollama + FallbackProvider (auto-failover) - **Structured Output**: Pydantic / JSON Schema `response_format` with auto-retry - **Execution Traces**: `result.trace` with typed timeline of every agent step - **Reasoning Visibility**: `result.reasoning` explains *why* the agent chose a tool - **Batch Processing**: `agent.batch()` / `agent.abatch()` for concurrent classification - **Tool Policy Engine**: Declarative allow/review/deny rules with human-in-the-loop - **4 Embedding Providers**: OpenAI, Anthropic/Voyage, Gemini (free!), Cohere -- **4 Vector Stores**: In-memory, SQLite, Chroma, Pinecone +- **7 Vector Stores**: In-memory, SQLite, Chroma, Pinecone, FAISS, Qdrant, pgvector - **Hybrid Search**: BM25 + vector fusion with Cohere/Jina reranking - **Advanced Chunking**: Semantic + contextual chunking for better retrieval - **Dynamic Tool Loading**: Plugin system with hot-reload support @@ -486,16 +486,18 @@ report.to_html("report.html") - **76 Examples**: Multi-agent graphs, RAG, hybrid search, streaming, structured output, traces, batch, policy, observer, guardrails, audit, sessions, entity memory, knowledge graph, eval framework, advanced agent patterns, stability markers, HTML trace viewer, and more - **Built-in Eval Framework**: 50 evaluators (30 deterministic + 21 LLM-as-judge), A/B testing, regression detection, HTML reports, JUnit XML, snapshot testing - **AgentObserver Protocol**: 45 lifecycle events with `run_id` correlation, `LoggingObserver`, `SimpleStepObserver`, OTel export -- **4960 Tests**: Unit, integration, regression, and E2E with real API calls +- **5203 Tests**: Unit, integration, regression, and E2E with real API calls ## Install ```bash pip install selectools # Core + basic RAG -pip install selectools[rag] # + Chroma, Pinecone, Voyage, Cohere, PyPDF +pip install selectools[rag] # + Chroma, Pinecone, FAISS, Qdrant, Voyage, Cohere, PyPDF, BeautifulSoup +pip install selectools[observe] # + OpenTelemetry, Langfuse observers +pip install selectools[postgres] # + psycopg2 (enables pgvector) pip install selectools[cache] # + Redis cache pip install selectools[mcp] # + MCP client/server -pip install selectools[rag,cache,mcp] # Everything +pip install "selectools[rag,observe,cache,mcp]" # Everything ``` Add your provider's API key to a `.env` file in your project root: @@ -1108,7 +1110,7 @@ pytest tests/ -x -q # All tests pytest tests/ -k "not e2e" # Skip E2E (no API keys needed) ``` -4960 tests covering parsing, agent loop, providers, RAG pipeline, hybrid search, advanced chunking, dynamic tools, caching, streaming, guardrails, sessions, memory, eval framework, budget/cancellation, knowledge stores, orchestration, pipelines, agent patterns, stability markers, trace viewer, and E2E integration with real API calls. +5203 tests covering parsing, agent loop, providers, RAG pipeline, hybrid search, advanced chunking, dynamic tools, caching, streaming, guardrails, sessions, memory, eval framework, budget/cancellation, knowledge stores, orchestration, pipelines, agent patterns, stability markers, trace viewer, and E2E integration with real API calls. ## License diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 23d551f..c27a086 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -46,8 +46,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - `stability.beta()` and `stability.stable()` decorators now accept arbitrary objects via an `Any` overload, in addition to classes and callables. Lets `@beta` mark `Tool` instances produced by `@tool()`. +### Fixed + +#### RAG — `@tool()` on class methods (shipping blocker caught by real-call simulations) +- `@tool()` applied to a method (`def f(self, query: str)`) produced a class-level `Tool` whose `function` was the *unbound* method. When the agent executor called `tool.function(**llm_kwargs)` Python raised `TypeError: missing 1 required positional argument: 'self'` and the LLM saw "Tool Execution Failed", giving up after a few iterations. This fundamentally broke the canonical RAG pattern documented across selectools: + ```python + rag_tool = RAGTool(vector_store=store) + agent = Agent(tools=[rag_tool.search_knowledge_base], provider=...) + ``` + `RAGTool`, `SemanticSearchTool`, and `HybridSearchTool` were all affected. The existing `tests/rag/test_rag_workflow.py` coverage never caught it because those tests built the agent and then only asserted `isinstance(agent, Agent)` — they never called `agent.run()`. +- **Fix:** new `_BoundMethodTool` descriptor in `selectools/tools/decorators.py`. `@tool()` detects when the first parameter is `self` and returns a descriptor that binds per-instance on attribute access via `functools.partial(original_fn, instance)`. Class-level access falls through to a template `Tool` so introspection (`MyClass.method.name`, `.description`, `.parameters`) still works. + +#### Qdrant — migrated to `query_points()` API +- `QdrantVectorStore.search()` called `self.client.search(query_vector=…)`, which was removed from `qdrant-client >=1.13`. Users on any recent `qdrant-client` would have hit `AttributeError: 'QdrantClient' object has no attribute 'search'` on their first query. The existing mock-based unit tests didn't catch it because they mocked `QdrantClient` and accepted whatever attribute the test asked for. +- **Fix:** migrated to `client.query_points(query=…)` and unwrap `response.points`. Also: return `[]` on 404 when the collection has been dropped by `clear()`, to match `FAISSVectorStore` semantics (search-after-clear returns `[]`, doesn't raise). + +#### Multimodal — Gemini and Anthropic providers silently dropped images +- `GeminiProvider._format_messages` only handled the legacy `message.image_base64` attribute. The new `image_message()` helper puts the image in `message.content_parts` and explicitly sets `message.image_base64 = None`, so Gemini received only the text prompt and replied "I cannot see images." Every Gemini vision user would have hit this. +- `AnthropicProvider` had the exact same bug — Claude replied "I don't see any image attached." Every Claude vision user would have hit this. +- OpenAI was unaffected because `providers/_openai_compat.py` already iterates `content_parts`. +- **Fix:** both providers now iterate `message.content_parts` and convert each `ContentPart` to the provider's native image shape (`types.Part(inline_data=…)` for Gemini, `{type: image, source: {type: base64, …}}` for Anthropic), with the legacy path preserved as a fallback for pre-0.21.0 callers. + +#### Internal +- Pre-existing mypy error in `providers/azure_openai_provider.py:117` where `str | None` from `os.getenv` wasn't narrowed correctly — fixed with an explicit `is not None` check. + +### Tests +- **+345 new tests** across 13 new e2e test files (`tests/test_e2e_*.py`, `tests/rag/test_e2e_*.py`, `tests/tools/test_e2e_*.py`, `tests/providers/test_e2e_azure_openai.py`) and full-release simulations: + - **Tier 1** — real backends with no external services (28 tests): real `faiss-cpu` C++ bindings, real `subprocess.run` for code tools, real `sqlite3` for db tools, real local files + HTTP for document loaders, real `opentelemetry-sdk` with `InMemorySpanExporter` for OTel. + - **Tier 2** — real API calls using credentials in `.env` (8 tests): real OpenAI `gpt-4o-mini` + Anthropic `claude-haiku-4-5` + Gemini `gemini-2.5-flash` multimodal with an in-memory 4x4 PNG; real DuckDuckGo search; real GitHub REST API (unauthenticated). + - **Tier 3** — skip-cleanly when external services or credentials are missing (7 tests): Qdrant, pgvector, Azure OpenAI, Langfuse. + - **Integration simulations** (4 tests in `test_e2e_v0_21_0_simulations.py`): FAISS RAG + real OpenAI agent + OTel; Gemini multimodal + `execute_python` tool; Anthropic `query_sqlite` + `execute_python` chaining; Qdrant RAG + real OpenAI agent. + - **App-shaped simulations** (7 tests in `test_e2e_v0_21_0_apps.py`): "Skylake" documentation Q&A bot with real CSV → FAISS → OpenAI agent + ConversationMemory multi-turn; sales data analyst bot with real SQLite + Claude chaining query + Python compute; knowledge base librarian that ingests from `from_csv` + `from_json` + `from_html` into real Qdrant and answers anchor-phrase questions with Gemini. + ### Stats -- **4,960 tests** (188 new across 7 spec subsystems) +- **5,203 tests** — up from 4,612 in v0.20.1 - **88 examples** (12 new: `77_faiss_vector_store.py` through `88_langfuse_observer.py`) - **5 providers** (added Azure OpenAI) - **7 vector stores** (added FAISS, Qdrant, pgvector) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index a83e0b1..621eff0 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -2,9 +2,9 @@ Thank you for your interest in contributing to Selectools! We welcome contributions from the community. -**Current Version:** v0.19.2 -**Test Status:** 4612 tests passing (100%) -**Python:** 3.9+ +**Current Version:** v0.21.0 +**Test Status:** 5203 tests passing (95% coverage) +**Python:** 3.9 – 3.13 ## Getting Started @@ -74,7 +74,7 @@ Similar to `npm run` scripts, here are the common commands for this project: ### Testing ```bash -# Run all tests (4612 tests) +# Run all tests (5203 tests) pytest tests/ -v # Run tests quietly (summary only) @@ -146,13 +146,13 @@ python scripts/test_memory_with_openai.py ```bash # Release a new version (recommended) -python scripts/release.py --version 0.5.1 +python scripts/release.py --version 0.20.2 # Dry run (see what would happen) -python scripts/release.py --version 0.5.1 --dry-run +python scripts/release.py --version 0.20.2 --dry-run # Or use the bash script -./scripts/release.sh 0.5.1 +./scripts/release.sh 0.20.2 ``` See `scripts/README.md` for detailed release instructions. @@ -263,14 +263,14 @@ selectools/ │ │ └── stubs.py # LocalProvider / test stubs │ ├── embeddings/ # Embedding providers │ ├── rag/ # RAG: vector stores, chunking, loaders -│ └── toolbox/ # 24 pre-built tools -├── tests/ # Test suite (4612 tests) +│ └── toolbox/ # 33 pre-built tools +├── tests/ # Test suite (5203 tests, 95% coverage) │ ├── agent/ # Agent tests │ ├── rag/ # RAG tests │ ├── tools/ # Tool tests │ ├── core/ # Core framework tests │ └── integration/ # E2E tests (require API keys) -├── examples/ # 61 numbered examples (01–61) +├── examples/ # 88 numbered examples ├── docs/ # Detailed documentation │ ├── QUICKSTART.md # 5-minute getting started │ ├── ARCHITECTURE.md # Architecture overview @@ -317,7 +317,8 @@ git checkout -b fix/your-bug-fix 3. **Test your changes** ```bash -python tests/test_framework.py +pytest tests/ -x -q # All tests +pytest tests/ -k "not e2e" -x -q # Skip E2E (no API keys needed) ``` 4. **Commit with clear messages** @@ -370,7 +371,7 @@ We especially welcome contributions in these areas: - Add comparison guides (vs LangChain, LlamaIndex) ### 🧪 **Testing** -- Increase test coverage (currently 4612 tests passing!) +- Increase test coverage (currently 5203 tests passing!) - Add performance benchmarks - Improve E2E test stability with retry/rate-limit handling @@ -436,10 +437,11 @@ class YourProvider(Provider): 2. **Add tests** ```python -# tests/test_framework.py +# tests/providers/test_your_provider.py -def test_your_provider(): - # Add test cases +def test_your_provider_complete(): + provider = YourProvider(api_key="fake-key") + # Add test cases — see existing tests/providers/ for patterns pass ``` @@ -450,38 +452,58 @@ def test_your_provider(): ## Adding a New Tool -To contribute a new pre-built tool: +To contribute a new pre-built tool to `src/selectools/toolbox/`: -1. **Create the tool** +1. **Create the tool** with the `@tool` decorator ```python -# src/selectools/tools/your_tool.py - -from ..tools import Tool, ToolParameter - -def your_tool_implementation(param1: str, param2: int = 10) -> str: - """Implementation of your tool.""" - # Your logic here - return result - -def create_your_tool() -> Tool: - """Factory function to create the tool.""" - return Tool( - name="your_tool", - description="Clear description of what the tool does", - parameters=[ - ToolParameter(name="param1", param_type=str, description="Description", required=True), - ToolParameter(name="param2", param_type=int, description="Description", required=False), - ], - function=your_tool_implementation, - ) +# src/selectools/toolbox/your_tools.py + +from selectools import tool + + +@tool() +def your_tool(param1: str, param2: int = 10) -> str: + """One-line description of what the tool does. + + Longer multi-line docstring becomes the tool's description in the + LLM-facing schema. Be specific about what the tool does and when + to use it. + + Args: + param1: What this parameter is for. + param2: Optional. What this parameter is for. Default 10. + + Returns: + Description of the return value. + """ + # Your implementation here + return f"Result: {param1} with {param2}" ``` -2. **Add tests and examples** +The `@tool()` decorator (note the parentheses — they're required) introspects +the function signature and docstring to build the JSON schema automatically. +No manual `Tool` / `ToolParameter` construction needed. -3. **Update documentation** +2. **Use the tool with an Agent** + +```python +from selectools import Agent +from selectools.toolbox.your_tools import your_tool + +agent = Agent(tools=[your_tool], provider=OpenAIProvider()) +result = agent.run("Use your_tool with param1='hello'") +``` + +3. **Add tests** in `tests/toolbox/test_your_tools.py` + +4. **Add an example** in `examples/NN_your_feature.py` (zero-padded number) + +5. **Update documentation**: + - Add the tool to `docs/modules/TOOLBOX.md` + - Bump the tool count in `docs/llms.txt`, `landing/index.html`, and `CONTRIBUTING.md` -## Adding RAG Features (New in v0.8.0!) +## Adding RAG Features ### Adding a New Vector Store diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index dabf055..58cf8e2 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -192,6 +192,14 @@ result = agent.ask("How long does shipping take for premium members?") print(result.content) ``` +!!! tip "Other loaders and stores (v0.21.0)" + - Load documents directly from **CSV**, **JSON**, **HTML**, or a **URL**: + `DocumentLoader.from_csv(...)`, `from_json(...)`, `from_html(...)`, `from_url(...)` + - Swap the in-memory store for a production-grade backend without changing the rest of your code: + `FAISSVectorStore` ([docs](modules/FAISS.md)) for in-process search with disk persistence, + `QdrantVectorStore` ([docs](modules/QDRANT.md)) for a self-hosted or Qdrant Cloud server, + `PgVectorStore` ([docs](modules/PGVECTOR.md)) when you already run PostgreSQL. + ## Step 6: Get Structured Output Get typed, validated results from the LLM: diff --git a/docs/index.md b/docs/index.md index cd70fb5..c18a84c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -82,7 +82,7 @@ pip install selectools --- - Hybrid search (BM25 + vector) with reranking, 4 vector store backends, semantic chunking. + Hybrid search (BM25 + vector) with reranking, **7 vector store backends** (In-memory, SQLite, Chroma, Pinecone, FAISS, Qdrant, pgvector), semantic chunking, and CSV / JSON / HTML / URL document loaders. [:octicons-arrow-right-24: RAG module](modules/RAG.md) diff --git a/docs/llms.txt b/docs/llms.txt index 3db0c64..2850c71 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -1,6 +1,6 @@ # Selectools -> Selectools is a production-ready Python library for building AI agents with tool calling, RAG, and multi-agent orchestration. One pip install. No DSL. Supports OpenAI, Azure OpenAI, Anthropic, Gemini, Ollama. v0.21.0, 4960 tests at 95% coverage, Apache-2.0. +> Selectools is a production-ready Python library for building AI agents with tool calling, RAG, and multi-agent orchestration. One pip install. No DSL. Supports OpenAI, Azure OpenAI, Anthropic, Gemini, Ollama. v0.21.0, 5203 tests at 95% coverage, Apache-2.0. Selectools uses a single `Agent` class with native tool calling. No chains, no expression language, no complex abstractions. It includes built-in features that other frameworks charge for or split into separate packages: 50 evaluators, hybrid RAG search (BM25 + vector), guardrails, audit logging, multi-agent orchestration, and a visual drag-drop builder. Free, local, MIT-compatible. diff --git a/landing/index.html b/landing/index.html index b4b2679..92900d1 100644 --- a/landing/index.html +++ b/landing/index.html @@ -87,7 +87,7 @@ "Visual drag-and-drop agent builder with 8 node types and 7 templates", "Composable pipelines with @step decorator and @pipeline operator", "Token-level streaming with native tool call support", - "Compatibility matrix across Python 3.9 to 3.13 (95% coverage, 4612 tests)" + "Compatibility matrix across Python 3.9 to 3.13 (95% coverage, 5203 tests)" ], "keywords": "python, ai agent, llm, tool calling, rag, hybrid search, multi-agent, langchain alternative, openai, anthropic, gemini, ollama, agent framework, mcp, model context protocol" } @@ -230,7 +230,7 @@ "name": "Is Selectools production-ready?", "acceptedAnswer": { "@type": "Answer", - "text": "Yes. 4,612 tests at 95% coverage, published security audit, SBOM (CycloneDX), formal deprecation policy, @stable/@beta markers on every public API, compatibility matrix for Python 3.9-3.13. Migration guides for 4 frameworks. Apache-2.0 licensed." + "text": "Yes. 5,203 tests at 95% coverage, published security audit, SBOM (CycloneDX), formal deprecation policy, @stable/@beta markers on every public API, compatibility matrix for Python 3.9-3.13. Migration guides for 4 frameworks. Apache-2.0 licensed." } }, { @@ -4191,7 +4191,7 @@ font-size: 8px; } - /* Card 7: 4612 - animated counter */ + /* Card 7: 5203 - animated counter */ .stat-viz-counter { font-family: var(--font-mono); font-size: 38px; @@ -4449,7 +4449,7 @@

AI agents that are just Python.

152 models
-
4612 tests
+
5203 tests
95% coverage
88 examples
50 evaluators
@@ -5066,7 +5066,7 @@

Five things your security team will ask for first.

tests passing
- 0 + 0

Unit, integration, and e2e. Green on every commit.

@@ -5436,11 +5436,11 @@

What you get vs. what you pay for elsewhere.

measured across 1000 runs
- +
tests passing
-
0
+
0
95% coverage
unit, integration, e2e
@@ -5787,7 +5787,7 @@

Type a question. Or browse the docs.

concepts
- Yes. 4,612 tests at 95% coverage (including 40 real API evaluations), published security audit, SBOM (CycloneDX 1.6), formal deprecation policy, @stable/@beta markers on every public API, and a compatibility matrix covering Python 3.9 to 3.13. Migration guides for LangChain, CrewAI, AutoGen, and LlamaIndex. Apache-2.0 licensed. + Yes. 5,203 tests at 95% coverage (including 40 real API evaluations), published security audit, SBOM (CycloneDX 1.6), formal deprecation policy, @stable/@beta markers on every public API, and a compatibility matrix covering Python 3.9 to 3.13. Migration guides for LangChain, CrewAI, AutoGen, and LlamaIndex. Apache-2.0 licensed.
From a4c93ce16e0c0a83008a3aef9d387ea42a8632a9 Mon Sep 17 00:00:00 2001 From: John Niche Date: Wed, 8 Apr 2026 11:01:01 -0300 Subject: [PATCH 08/17] =?UTF-8?q?docs(landing):=20v0.21.0=20sweep=20?= =?UTF-8?q?=E2=80=94=20version=20bump,=20Azure=20OpenAI,=207=20vector=20st?= =?UTF-8?q?ores,=20new=20extras?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second pass on landing/index.html after the earlier stale-count fix (4612 -> 5203 ×8). This pass catches the v0.21.0-specific content staleness that the test-count edit missed. Version strings (3 places) ------------------------- - Schema.org softwareVersion: 0.20.1 -> 0.21.0 - Hero status bar badge: v0.20.1 -> v0.21.0 - Footer comment: v0.20.1 -> v0.21.0 Azure OpenAI added to every provider enumeration (11 places) ------------------------------------------------------------ - SEO tag - social preview - Schema.org JSON-LD description field - Schema.org featureList item - FAQ item "Which LLM providers does selectools support?" — re-worded from "5 LLM providers: OpenAI, Anthropic, Gemini, Ollama, and FallbackProvider" to the correct 5 LLMs (OpenAI, Azure OpenAI, Anthropic, Gemini, Ollama) plus FallbackProvider as a wrapper - FAQ item "What's the license?" — added Azure to the token billing list - FAQ intro "What is selectools?" - Rendered FAQ in the HTML (not just the JSON-LD) - bento__desc on the fallback provider card - Five providers FAQ rendered answer - Visible tags in the hero "Works with" row — added an Azure OpenAI tag between OpenAI and Anthropic Vector store counts (4 -> 7, 4 places) -------------------------------------- - FAQ "Does it include RAG?" — "4 vector store backends" -> "7 vector store backends (memory, SQLite, Chroma, Pinecone, FAISS, Qdrant, pgvector)" - Same FAQ rendered in the HTML below the JSON-LD - Install FAQ answers updated to mention FAISS + Qdrant - Both RAG FAQ answers now mention the new CSV / JSON / HTML / URL document loaders Install extras (missing [observe] + [postgres]) ----------------------------------------------- - Install FAQ JSON-LD and rendered HTML now document: - pip install selectools[rag] (+ FAISS, Qdrant, beautifulsoup4) - pip install selectools[observe] (+ OpenTelemetry, Langfuse) - pip install selectools[postgres] (for pgvector) Verification ------------ - grep 4612 / 4,612 / 4960 / 4 vector store / 0.20.1 (excluding the one legitimate self-referential JS comment): clean - Count of "Azure OpenAI" occurrences: 0 -> 11 - No visual layout changes — text-only substitutions within existing elements. The hero provider row grows from 4 tags to 5, which is the only structural change and fits the existing flex layout. --- landing/index.html | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/landing/index.html b/landing/index.html index 92900d1..8e31884 100644 --- a/landing/index.html +++ b/landing/index.html @@ -4,7 +4,7 @@ Selectools: Production-Ready AI Agents in Plain Python - + @@ -28,14 +28,14 @@ - + ` tag, add this block on three new lines: + +``` +function syncPrompt(){{const q=document.getElementById('si').value;document.getElementById('ex-grep').textContent=q?' | grep -i '+q:'';document.getElementById('ex-flags').textContent=ac==='all'?'':' --tags '+ac}} +function typeLine(target,text,perChar,done){{let i=0;const tick=()=>{{if(i<=text.length){{target.textContent=text.slice(0,i);i++;setTimeout(tick,perChar)}}else if(done){{done()}}}};tick()}} +(function bootPrompt(){{const cmd=document.getElementById('ex-cmd');if(!cmd)return;const reduced=window.matchMedia('(prefers-reduced-motion: reduce)').matches;if(reduced){{cmd.textContent='ls examples/';syncPrompt();return}}typeLine(cmd,'ls examples/',35,syncPrompt)}})(); +``` + +Note that `syncPrompt()` writes to `.textContent` only — it does not touch any HTML rendering path. The `typeLine()` helper also writes only to `.textContent`. Both are safe text-only DOM updates. + +- [ ] **Step 3.5: Wire `syncPrompt()` to the search input's `oninput`** + +In `scripts/build_examples_gallery.py`, locate line 294: + +```python + +``` + +Replace `oninput="flt()"` with `oninput="flt();syncPrompt()"`. The full line becomes: + +```python + +``` + +- [ ] **Step 3.6: Regenerate the HTML** + +Run: +```bash +python scripts/build_examples_gallery.py > landing/examples/index.html +``` + +Expected: exits 0, file ~600KB. + +- [ ] **Step 3.7: Verify in browser — type-on, live-mirror, mobile, reduced-motion** + +Open `landing/examples/index.html` in a browser at desktop width. + +Expected on page load: +- A terminal panel appears below the nav. It has a colored window-control bar (red/yellow/green dots) with `~/selectools/examples` and `zsh` labels. +- Inside the terminal body, the prompt `selectools@examples.dev:~/selectools/examples $ ` appears immediately, fully styled. +- After ~10ms, the cursor types `ls examples/` character-by-character at ~35ms per char (~420ms total). +- After typing finishes, a blinking cyan caret remains at the end of the line. +- Below the prompt, a paragraph reads "88 runnable scripts covering agents, RAG, multi-agent graphs, evals, streaming, and guardrails. 34 run without an API key." + +Expected on interaction: +- Type `rag` into the search input. The terminal prompt updates live to show ` | grep -i rag` appended after `ls examples/`. +- Clear the search. The grep suffix disappears (no dangling pipe). +- The counter below the rail updates to `# 12 files match` while filtered, `# 88 files match` when cleared. + +Mobile fallback test: +- In DevTools, switch to a 375×812 viewport (iPhone). Reload. +- Expected: the `selectools@examples.dev:~/selectools/examples` prefix is hidden. Only `$ ls examples/` and the caret are visible. Typing in search still updates the grep suffix. + +Reduced-motion test: +- DevTools → Rendering → Emulate `prefers-reduced-motion: reduce`. Reload. +- Expected: the `ls examples/` text appears fully typed instantly (no character-by-character animation). The caret is visible but does not blink. + +- [ ] **Step 3.8: Commit** + +```bash +git add scripts/build_examples_gallery.py landing/examples/index.html +git commit -m "$(cat <<'EOF' +feat(examples): replace page header with terminal-session panel (§1) + +Replaces the bare

+ paragraph with a full terminal-window panel +that types out 'ls examples/' on page load and live-mirrors the search +state into the prompt suffix as ' | grep -i '. + +Counter format changes from 'N examples' to '# N files match' to +match the monospace comment aesthetic. + +The category --tags suffix wiring lands in Task 4 once the rail exists. + +Adds typeLine() and syncPrompt() helpers and a bootPrompt() IIFE that +respects prefers-reduced-motion. Mobile collapses to '$ ls examples/'. +Both helpers write only to .textContent — no HTML rendering paths. + +Spec §1: docs/superpowers/specs/2026-04-08-examples-page-overdrive-design.md +EOF +)" +``` + +--- + +## Task 4: Replace the chip row with the proportional-width category rail (§2) + +**Goal:** Remove the 18 pill-shaped category buttons (`.cb`) and replace them with a single horizontal bar of segments whose widths are proportional to category counts. On viewport entry, the rail "stamps" each segment left-to-right in sequence. + +**Files:** +- Modify: `scripts/build_examples_gallery.py:259-261` (the `.cr` and `.cb` CSS rules) +- Modify: `scripts/build_examples_gallery.py:178-183` (the `cat_btns` Python loop that builds the chip markup) +- Modify: `scripts/build_examples_gallery.py:295` (the f-string slot that emits `cat_btns`) +- Modify: `scripts/build_examples_gallery.py:305` (the inline JS chip click handler) + +- [ ] **Step 4.1: Replace `.cr` and `.cb` CSS with `.ex-rail` CSS** + +In `scripts/build_examples_gallery.py`, locate lines 259-261: + +``` +.cr{{display:flex;flex-wrap:wrap;gap:6px}} +.cb{{font-family:var(--font);font-size:12px;font-weight:500;padding:6px 14px;border-radius:100px;border:1px solid rgba(51,65,85,0.6);background:rgba(30,41,59,0.7);color:var(--dm);cursor:pointer;transition:all .15s;-webkit-backdrop-filter:blur(4px);backdrop-filter:blur(4px)}} +.cb:hover{{background:rgba(51,65,85,0.5);border-color:var(--dm);color:var(--tx)}}.cb.on{{background:rgba(34,211,238,0.12);border-color:rgba(34,211,238,0.35);color:var(--cy);box-shadow:0 0 12px rgba(34,211,238,0.08)}} +``` + +Replace those three lines with: + +``` +.ex-rail{{display:flex;gap:2px;height:40px;border-radius:8px;overflow:hidden;border:1px solid var(--bd);background:rgba(30,41,59,0.4)}} +.ex-rail__seg{{flex:var(--seg-weight,1) 1 0;min-width:56px;height:100%;display:flex;align-items:center;justify-content:center;gap:6px;font-family:var(--mono);font-size:12px;color:var(--dm);background:transparent;border:none;cursor:pointer;transition:background .15s,color .15s;position:relative;padding:0 8px;white-space:nowrap}} +.ex-rail__seg--all{{flex:0 0 72px}} +.ex-rail__seg:hover{{background:rgba(34,211,238,0.08);color:var(--tx)}} +.ex-rail__seg.on{{background:rgba(34,211,238,0.12);color:var(--cy);box-shadow:inset 0 -2px 0 var(--exec-color)}} +.ex-rail__name{{font-size:12px}} +.ex-rail__count{{font-size:11px;color:var(--cy);opacity:0.75}} +.ex-rail.in-view .ex-rail__seg{{animation:exec-stamp 0.6s var(--exec-ease-soft) both;animation-delay:calc(var(--seg-index,0) * 80ms)}} +@media(max-width:640px){{.ex-rail{{overflow-x:auto;-webkit-overflow-scrolling:touch;scroll-snap-type:x mandatory;height:44px}}.ex-rail__seg{{flex:0 0 auto;min-width:80px;scroll-snap-align:start}}}} +@media(prefers-reduced-motion:reduce){{.ex-rail.in-view .ex-rail__seg{{animation:none}}}} +``` + +- [ ] **Step 4.2: Rewrite the `cat_btns` builder loop in Python** + +In `scripts/build_examples_gallery.py`, locate the existing `cat_btns` builder at lines 178-183: + +```python + cat_btns = [f''] + for c in all_cats: + n = sum(1 for e in examples if c in e["categories"]) + icon = CAT_ICONS.get(c, "") + label = c.replace("-", " ").title() + cat_btns.append(f'') +``` + +Replace those six lines with: + +```python + rail_segs = [ + f'' + ] + for idx, c in enumerate(all_cats, start=1): + n = sum(1 for e in examples if c in e["categories"]) + rail_segs.append( + f'' + ) +``` + +Note: the variable is renamed from `cat_btns` to `rail_segs` to reflect what it now produces. The `CAT_ICONS` dictionary is no longer referenced — leave the dictionary in place (other code may still use it; do not delete it in this task). + +- [ ] **Step 4.3: Update the f-string slot in the markup template** + +In `scripts/build_examples_gallery.py`, locate line 295: + +```python +
{chr(10).join(cat_btns)}
+``` + +Replace it with: + +```python +
{chr(10).join(rail_segs)}
+``` + +- [ ] **Step 4.4: Replace the chip click handler in the inline `` tag (after the `bootPrompt` IIFE you added in Task 3), add: + +``` +document.addEventListener('keydown',(e)=>{{if(e.key!=='/')return;const t=e.target;if(t&&(t.tagName==='INPUT'||t.tagName==='TEXTAREA'||t.isContentEditable))return;e.preventDefault();const si=document.getElementById('si');if(si)si.focus()}}); +``` + +The handler ignores `/` when typed inside any input/textarea/contenteditable element so it doesn't break normal typing. + +- [ ] **Step 5.5: Regenerate the HTML** + +Run: +```bash +python scripts/build_examples_gallery.py > landing/examples/index.html +``` + +Expected: exits 0, file ~600KB. + +- [ ] **Step 5.6: Verify in browser — glyph, kbd, shortcut, counter format** + +Open `landing/examples/index.html` in a browser at desktop width. + +Expected: +- The search input now has a `⌕` glyph at the left and a small `/` keyboard hint pill at the right. +- The placeholder text is `search by name or keyword…`. +- Below the rail, the counter reads `# 88 files match`. +- Press `/` while focused outside any input. The search box gains focus immediately. +- Press `/` while typing inside the search box itself. The character `/` appears in the input (the shortcut does NOT fire). +- Type `rag`. The counter updates to `# 12 files match` (or however many RAG examples exist). +- Clear the search. Counter returns to `# 88 files match`. + +Reduced-motion: no animations in this section, nothing extra to verify. + +- [ ] **Step 5.7: Commit** + +```bash +git add scripts/build_examples_gallery.py landing/examples/index.html +git commit -m "$(cat <<'EOF' +feat(examples): search glyph, kbd hint, and # files match counter (§3) + +Wraps the search input with a leading ⌕ glyph and a trailing / +keyboard shortcut hint. Adds a global keydown listener that focuses +the search when / is pressed outside any input. + +Counter format finalized as '# N files match' to match the monospace +comment aesthetic of the terminal header. + +Spec §3: docs/superpowers/specs/2026-04-08-examples-page-overdrive-design.md +EOF +)" +``` + +--- + +## Task 6: Card rows as `ls -la` columns (§4) + +**Goal:** Replace each card's flex-row header (`.eh`) with a 7-column CSS Grid that mimics `ls -la` output. Add a subtle 14ms-staggered enter animation for the first 30 rows. Add a mobile media query that collapses the grid to 2 visual lines. + +**Files:** +- Modify: `scripts/build_examples_gallery.py:264-274` (the `.ec` and `.eh` CSS rules) +- Modify: `scripts/build_examples_gallery.py:189-233` (the Python loop that builds each card) +- Modify: the `toggle()` function inside the inline `` tag (after the `/` keydown handler from Task 5), add this keydown listener: + +``` +document.querySelectorAll('.ex-row').forEach(r=>{{r.addEventListener('keydown',(e)=>{{if(e.key==='Enter'||e.key===' '){{e.preventDefault();toggle(r)}}}})}}); +``` + +This makes every row keyboard-activatable. The existing `toggle()` function (now updated in Step 6.3 to sync `aria-expanded`) handles the rest. + +- [ ] **Step 6.5: Regenerate the HTML** + +Run: +```bash +python scripts/build_examples_gallery.py > landing/examples/index.html +``` + +Expected: exits 0, file ~600KB. + +- [ ] **Step 6.6: Verify in browser — desktop columns, mobile collapse, enter animation, keyboard** + +Open `landing/examples/index.html` in a browser at desktop width (1440×900). + +Expected first paint: +- Cards render as monospace rows in `ls -la` style. Each row reads (left to right): two-digit number (cyan), `-rw-r--r--` (dim), line count `46L` (dim, right-aligned), key badge `no-key` (green) or `api-key` (dim), filename without the `NN_` prefix (cyan), description (bright), chevron `▾` (dim). +- The first ~30 rows visibly fade-up in a 14ms-staggered cascade on first paint. The full cascade completes in about 450ms. +- Rows past index 29 render in their final state immediately (no animation). +- Hovering a row tints its background and grows a 2px cyan border on the left. + +Click row 01: +- The row expands. The chevron rotates 180° smoothly (with the new ease curve). +- The `.eb` body becomes visible with the existing tag links + Copy/GitHub/Docs buttons + source `
`.
+- The row's `aria-expanded` attribute becomes `"true"` (inspect in DevTools).
+
+Tab into the first row, then press Enter:
+- The row expands. Press Enter again to collapse.
+- `aria-expanded` toggles between `"true"` and `"false"` on each activation.
+
+Mobile fallback:
+- Switch to a 375×812 viewport. Reload.
+- Expected: each card collapses to 2 visual lines. Line 1 has the number on the left, the filename, and the chevron on the right. Line 2 has the line count, key badge, and description (truncated). The `-rw-r--r--` permissions column is hidden on mobile.
+
+Reduced-motion:
+- Enable `prefers-reduced-motion`. Reload.
+- Expected: rows appear immediately without the cascade animation. Chevron rotation is instant (0.01s). Card expansion still works.
+
+Smoke test all 88 cards filter correctly:
+- Type `rag` in search. Only RAG cards remain visible. Counter says `# 12 files match`.
+- Click the `agent` segment in the rail. Only Agent cards visible.
+- Click `all`. All 88 visible. Clear search.
+
+- [ ] **Step 6.7: Commit**
+
+```bash
+git add scripts/build_examples_gallery.py landing/examples/index.html
+git commit -m "$(cat <<'EOF'
+feat(examples): card rows as ls -la grid columns (§4)
+
+Replaces the flex-row .eh card header with a 7-column CSS Grid that
+mimics 'ls -la' output: number, permissions, size, key badge,
+filename (with NN_ prefix stripped), description, chevron.
+
+The first 30 rows get a 14ms-staggered fade-up enter animation via
+the .ex-row--enter class. Rows past index 29 render immediately to
+avoid a cascade-of-88 effect.
+
+Mobile collapses to 2-line layout with permissions column hidden.
+Rows are keyboard-accessible (Enter/Space) with aria-expanded synced
+via a surgical single-line addition to the existing toggle() function.
+
+Spec §4: docs/superpowers/specs/2026-04-08-examples-page-overdrive-design.md
+EOF
+)"
+```
+
+---
+
+## Task 7: `$ cat` prefix on card expansion (§5)
+
+**Goal:** Add a one-line `$ cat examples/NN_name.py` terminal prefix above each expanded card's body content. The prefix uses the same monospace comment styling as the rest of the page.
+
+**Files:**
+- Modify: `scripts/build_examples_gallery.py:275` (the `.eb` CSS — add `.ex-cat-prefix`)
+- Modify: `scripts/build_examples_gallery.py:189-233` (the per-card builder — insert prefix into `.eb`)
+
+- [ ] **Step 7.1: Add `.ex-cat-prefix` CSS**
+
+In `scripts/build_examples_gallery.py`, locate line 275:
+
+```
+.eb{{padding:0 18px 18px}}.eg{{display:flex;flex-wrap:wrap;gap:6px;margin-bottom:12px}}
+```
+
+Add these new lines AFTER line 275 (do not delete the existing line):
+
+```
+.ex-cat-prefix{{font-family:var(--mono);font-size:11px;color:var(--ft);padding:0 0 10px;user-select:text}}
+.ex-cat-prefix__glyph{{color:var(--gn);margin-right:6px}}
+```
+
+- [ ] **Step 7.2: Insert the prefix into the `.eb` body**
+
+In `scripts/build_examples_gallery.py`, locate the per-card builder loop. Find this part of the f-string in the `cards.append(...)` call:
+
+```python
+            f'